All of lore.kernel.org
 help / color / mirror / Atom feed
* [Intel-gfx] [PATCH 01/31] drm/i915/gt: Ratelimit heartbeat completion probing
@ 2021-02-08 10:52 Chris Wilson
  2021-02-08 10:52 ` [Intel-gfx] [PATCH 02/31] drm/i915: Move context revocation to scheduler Chris Wilson
                   ` (33 more replies)
  0 siblings, 34 replies; 54+ messages in thread
From: Chris Wilson @ 2021-02-08 10:52 UTC (permalink / raw)
  To: intel-gfx; +Cc: Chris Wilson

The heartbeat runs through a few phases that we expect to complete
within a certain number of heartbeat intervals. First we must submit the
heartbeat to the queue, and if the queue is occupied it may take a
couple of intervals before the heartbeat preempts the workload and is
submitted to HW. Once running on HW, completion is not instantaneous as
it may have to first reset the current workload before it itself runs
through the empty request and signals completion. As such, we know that
the heartbeat must take at least the preempt reset timeout and before we
have had a chance to reset the engine, we do not want to issue a global
reset ourselves (simply so that we only try to do one reset at a time
and not confuse ourselves by resetting twice and hitting an innocent.)

So by taking into consideration that once running the request must take
a finite amount of time, we can delay the final completion check to
accommodate that and avoid checking too early (before we've had a chance
to handle any engine resets required).

v2: Attach a callback to flush the work immediately upon the heartbeat
completion and insert the delay before the next.

Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/2853
Suggested-by: CQ Tang <cq.tang@intel.com>
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 .../gpu/drm/i915/gt/intel_engine_heartbeat.c  | 95 ++++++++++++++++---
 drivers/gpu/drm/i915/gt/intel_engine_types.h  |  1 +
 .../drm/i915/gt/selftest_engine_heartbeat.c   | 65 ++++++-------
 drivers/gpu/drm/i915/gt/selftest_execlists.c  |  5 +-
 4 files changed, 117 insertions(+), 49 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c b/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c
index 0b062fad1837..209a477af412 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c
@@ -20,6 +20,18 @@
  * issue a reset -- in the hope that restores progress.
  */
 
+#define HEARTBEAT_COMPLETION 50u /* milliseconds */
+
+static long completion_timeout(const struct intel_engine_cs *engine)
+{
+	long timeout = HEARTBEAT_COMPLETION;
+
+	if (intel_engine_has_preempt_reset(engine))
+		timeout += READ_ONCE(engine->props.preempt_timeout_ms);
+
+	return msecs_to_jiffies(timeout);
+}
+
 static bool next_heartbeat(struct intel_engine_cs *engine)
 {
 	long delay;
@@ -29,6 +41,26 @@ static bool next_heartbeat(struct intel_engine_cs *engine)
 		return false;
 
 	delay = msecs_to_jiffies_timeout(delay);
+
+	/*
+	 * Once we submit a heartbeat to the HW, we know that it will take
+	 * at least a certain amount of time to complete. On a hanging system
+	 * it will first have to wait for the preempt reset timeout, and
+	 * then it will take some time for the reset to resume with the
+	 * heartbeat and for it to complete. So once we have submitted the
+	 * heartbeat to HW, we can wait a while longer before declaring the
+	 * engine stuck and forcing a reset ourselves. If we do a reset
+	 * and the engine is also doing a reset, it is possible that we
+	 * reset the engine twice, harming an innocent.
+	 *
+	 * Before we have sumitted the heartbeat, we do not want to change
+	 * the interval as we to promote the heartbeat and trigger preemption
+	 * in a deterministic time frame.
+	 */
+	if (engine->heartbeat.systole &&
+	    i915_request_is_active(engine->heartbeat.systole))
+		delay = max(delay, completion_timeout(engine));
+
 	if (delay >= HZ)
 		delay = round_jiffies_up_relative(delay);
 	mod_delayed_work(system_highpri_wq, &engine->heartbeat.work, delay + 1);
@@ -48,12 +80,49 @@ heartbeat_create(struct intel_context *ce, gfp_t gfp)
 	return rq;
 }
 
+static void defibrillator(struct dma_fence *f, struct dma_fence_cb *cb)
+{
+	struct intel_engine_cs *engine =
+		container_of(cb, typeof(*engine), heartbeat.cb);
+
+	if (READ_ONCE(engine->heartbeat.systole))
+		mod_delayed_work(system_highpri_wq, &engine->heartbeat.work, 0);
+}
+
+static void
+untrack_heartbeat(struct intel_engine_cs *engine)
+{
+	struct i915_request *rq;
+
+	rq = fetch_and_zero(&engine->heartbeat.systole);
+	if (!rq)
+		return;
+
+	ENGINE_TRACE(engine, "heartbeat " RQ_FMT "completed\n", RQ_ARG(rq));
+
+	dma_fence_remove_callback(&rq->fence, &engine->heartbeat.cb);
+	i915_request_put(rq);
+}
+
+static void
+track_heartbeat(struct intel_engine_cs *engine, struct i915_request *rq)
+{
+	ENGINE_TRACE(engine, "heartbeat " RQ_FMT "started\n", RQ_ARG(rq));
+
+	dma_fence_add_callback(&rq->fence,
+			       &engine->heartbeat.cb,
+			       defibrillator);
+	engine->heartbeat.systole = i915_request_get(rq);
+	if (!next_heartbeat(engine))
+		untrack_heartbeat(engine);
+}
+
 static void idle_pulse(struct intel_engine_cs *engine, struct i915_request *rq)
 {
 	engine->wakeref_serial = READ_ONCE(engine->serial) + 1;
 	i915_request_add_active_barriers(rq);
 	if (!engine->heartbeat.systole && intel_engine_has_heartbeat(engine))
-		engine->heartbeat.systole = i915_request_get(rq);
+		track_heartbeat(engine, rq);
 }
 
 static void heartbeat_commit(struct i915_request *rq,
@@ -106,13 +175,8 @@ static void heartbeat(struct work_struct *wrk)
 	intel_engine_flush_scheduler(engine);
 
 	rq = engine->heartbeat.systole;
-	if (rq && i915_request_completed(rq)) {
-		ENGINE_TRACE(engine,
-			     "heartbeat " RQ_FMT "completed\n",
-			     RQ_ARG(rq));
-		i915_request_put(rq);
-		engine->heartbeat.systole = NULL;
-	}
+	if (rq && i915_request_completed(rq))
+		untrack_heartbeat(engine);
 
 	if (!intel_engine_pm_get_if_awake(engine))
 		return;
@@ -180,6 +244,11 @@ static void heartbeat(struct work_struct *wrk)
 		goto out;
 	}
 
+	/* Just completed one heartbeat, wait a tick before the next */
+	if (rq)
+		goto out;
+
+	/* The engine is parking. We can rest until the next user */
 	serial = READ_ONCE(engine->serial);
 	if (engine->wakeref_serial == serial)
 		goto out;
@@ -198,14 +267,14 @@ static void heartbeat(struct work_struct *wrk)
 	if (IS_ERR(rq))
 		goto unlock;
 
-	ENGINE_TRACE(engine, "heartbeat " RQ_FMT "started\n", RQ_ARG(rq));
 	heartbeat_commit(rq, &attr);
 
 unlock:
 	mutex_unlock(&ce->timeline->mutex);
 out:
+	intel_engine_flush_scheduler(engine);
 	if (!engine->i915->params.enable_hangcheck || !next_heartbeat(engine))
-		i915_request_put(fetch_and_zero(&engine->heartbeat.systole));
+		untrack_heartbeat(engine);
 	intel_engine_pm_put(engine);
 }
 
@@ -219,8 +288,10 @@ void intel_engine_unpark_heartbeat(struct intel_engine_cs *engine)
 
 void intel_engine_park_heartbeat(struct intel_engine_cs *engine)
 {
-	if (cancel_delayed_work(&engine->heartbeat.work))
-		i915_request_put(fetch_and_zero(&engine->heartbeat.systole));
+	/* completion may rearm work */
+	while (cancel_delayed_work(&engine->heartbeat.work))
+		;
+	untrack_heartbeat(engine);
 }
 
 void intel_engine_init_heartbeat(struct intel_engine_cs *engine)
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h
index 7efa6290cc3e..d27a44070cb1 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h
@@ -322,6 +322,7 @@ struct intel_engine_cs {
 	struct {
 		struct delayed_work work;
 		struct i915_request *systole;
+		struct dma_fence_cb cb;
 		unsigned long blocked;
 	} heartbeat;
 
diff --git a/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c b/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c
index b2c369317bf1..812c4a168b01 100644
--- a/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c
+++ b/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c
@@ -202,47 +202,44 @@ static int cmp_u32(const void *_a, const void *_b)
 
 static int __live_heartbeat_fast(struct intel_engine_cs *engine)
 {
-	const unsigned int error_threshold = max(20000u, jiffies_to_usecs(6));
-	struct intel_context *ce;
-	struct i915_request *rq;
-	ktime_t t0, t1;
+	const unsigned int error_threshold =
+		max(3 * HEARTBEAT_COMPLETION * 1000, jiffies_to_usecs(6));
+	struct intel_context *ce = engine->kernel_context;
 	u32 times[5];
 	int err;
 	int i;
 
-	ce = intel_context_create(engine);
-	if (IS_ERR(ce))
-		return PTR_ERR(ce);
-
 	intel_engine_pm_get(engine);
 
 	err = intel_engine_set_heartbeat(engine, 1);
 	if (err)
 		goto err_pm;
 
+	flush_delayed_work(&engine->heartbeat.work);
+	while (engine->heartbeat.systole)
+		intel_engine_park_heartbeat(engine);
+
 	for (i = 0; i < ARRAY_SIZE(times); i++) {
-		do {
-			/* Manufacture a tick */
-			intel_engine_park_heartbeat(engine);
-			GEM_BUG_ON(engine->heartbeat.systole);
-			engine->serial++; /*  pretend we are not idle! */
-			intel_engine_unpark_heartbeat(engine);
+		struct i915_sched_attr attr = { .priority = I915_PRIORITY_MIN };
+		struct i915_request *rq;
+		ktime_t t0, t1;
 
-			flush_delayed_work(&engine->heartbeat.work);
-			if (!delayed_work_pending(&engine->heartbeat.work)) {
-				pr_err("%s: heartbeat %d did not start\n",
-				       engine->name, i);
-				err = -EINVAL;
-				goto err_pm;
-			}
+		GEM_BUG_ON(READ_ONCE(engine->heartbeat.systole));
 
-			rcu_read_lock();
-			rq = READ_ONCE(engine->heartbeat.systole);
-			if (rq)
-				rq = i915_request_get_rcu(rq);
-			rcu_read_unlock();
-		} while (!rq);
+		/* Manufacture a tick */
+		mutex_lock(&ce->timeline->mutex);
+		rq = heartbeat_create(ce, GFP_KERNEL);
+		if (!IS_ERR(rq)) {
+			i915_request_get(rq);
+			heartbeat_commit(rq, &attr);
+		}
+		mutex_unlock(&ce->timeline->mutex);
+		if (IS_ERR(rq)) {
+			err = PTR_ERR(rq);
+			goto err_reset;
+		}
 
+		/* Time how long before the heartbeat monitor checks */
 		t0 = ktime_get();
 		while (rq == READ_ONCE(engine->heartbeat.systole))
 			yield(); /* work is on the local cpu! */
@@ -275,10 +272,10 @@ static int __live_heartbeat_fast(struct intel_engine_cs *engine)
 		err = -EINVAL;
 	}
 
+err_reset:
 	reset_heartbeat(engine);
 err_pm:
 	intel_engine_pm_put(engine);
-	intel_context_put(ce);
 	return err;
 }
 
@@ -308,20 +305,16 @@ static int __live_heartbeat_off(struct intel_engine_cs *engine)
 
 	intel_engine_pm_get(engine);
 
+	/* Kick once, so that we change an active heartbeat */
 	engine->serial++;
-	flush_delayed_work(&engine->heartbeat.work);
-	if (!delayed_work_pending(&engine->heartbeat.work)) {
-		pr_err("%s: heartbeat not running\n",
-		       engine->name);
-		err = -EINVAL;
-		goto err_pm;
-	}
+	intel_engine_unpark_heartbeat(engine);
 
 	err = intel_engine_set_heartbeat(engine, 0);
 	if (err)
 		goto err_pm;
 
-	engine->serial++;
+	/* The next heartbeat work should cancel the heartbeat */
+	engine->serial++; /* pretend the engine is still active */
 	flush_delayed_work(&engine->heartbeat.work);
 	if (delayed_work_pending(&engine->heartbeat.work)) {
 		pr_err("%s: heartbeat still running\n",
diff --git a/drivers/gpu/drm/i915/gt/selftest_execlists.c b/drivers/gpu/drm/i915/gt/selftest_execlists.c
index f625c29023ea..04ded3a2d491 100644
--- a/drivers/gpu/drm/i915/gt/selftest_execlists.c
+++ b/drivers/gpu/drm/i915/gt/selftest_execlists.c
@@ -2325,13 +2325,16 @@ static int __cancel_fail(struct live_preempt_cancel *arg)
 	del_timer_sync(&engine->execlists.preempt);
 	intel_engine_flush_scheduler(engine);
 
+	engine->props.preempt_timeout_ms = 0;
 	cancel_reset_timeout(engine);
 
-	/* after failure, require heartbeats to reset device */
+	/* after failure, require fast heartbeats to reset device */
 	intel_engine_set_heartbeat(engine, 1);
 	err = wait_for_reset(engine, rq, HZ / 2);
 	intel_engine_set_heartbeat(engine,
 				   engine->defaults.heartbeat_interval_ms);
+
+	engine->props.preempt_timeout_ms = engine->defaults.preempt_timeout_ms;
 	if (err) {
 		pr_err("Cancelled inflight0 request did not reset\n");
 		goto out;
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 54+ messages in thread

* [Intel-gfx] [PATCH 02/31] drm/i915: Move context revocation to scheduler
  2021-02-08 10:52 [Intel-gfx] [PATCH 01/31] drm/i915/gt: Ratelimit heartbeat completion probing Chris Wilson
@ 2021-02-08 10:52 ` Chris Wilson
  2021-02-08 11:18   ` Tvrtko Ursulin
  2021-02-08 10:52 ` [Intel-gfx] [PATCH 03/31] drm/i915: Introduce the scheduling mode Chris Wilson
                   ` (32 subsequent siblings)
  33 siblings, 1 reply; 54+ messages in thread
From: Chris Wilson @ 2021-02-08 10:52 UTC (permalink / raw)
  To: intel-gfx; +Cc: Chris Wilson

Centralise the means by which to remove a context from execution to the
scheduler, allowing the backends to specialise as necessary. Note that
without backend support, we can simplify the procedure to forcibly reset
the HW to remove the context.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/gem/i915_gem_context.c   | 117 +-----------------
 .../drm/i915/gt/intel_execlists_submission.c  |  47 +++++++
 drivers/gpu/drm/i915/i915_scheduler.c         |  20 +++
 drivers/gpu/drm/i915/i915_scheduler_types.h   |   5 +
 4 files changed, 75 insertions(+), 114 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c
index ca37d93ef5e7..be75f861db67 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c
@@ -382,104 +382,9 @@ __context_engines_static(const struct i915_gem_context *ctx)
 	return rcu_dereference_protected(ctx->engines, true);
 }
 
-static void __reset_context(struct i915_gem_context *ctx,
-			    struct intel_engine_cs *engine)
-{
-	intel_gt_handle_error(engine->gt, engine->mask, 0,
-			      "context closure in %s", ctx->name);
-}
-
-static bool __cancel_engine(struct intel_engine_cs *engine)
-{
-	/*
-	 * Send a "high priority pulse" down the engine to cause the
-	 * current request to be momentarily preempted. (If it fails to
-	 * be preempted, it will be reset). As we have marked our context
-	 * as banned, any incomplete request, including any running, will
-	 * be skipped following the preemption.
-	 *
-	 * If there is no hangchecking (one of the reasons why we try to
-	 * cancel the context) and no forced preemption, there may be no
-	 * means by which we reset the GPU and evict the persistent hog.
-	 * Ergo if we are unable to inject a preemptive pulse that can
-	 * kill the banned context, we fallback to doing a local reset
-	 * instead.
-	 */
-	return intel_engine_pulse(engine) == 0;
-}
-
-static bool
-__active_engine(struct i915_request *rq, struct intel_engine_cs **active)
-{
-	struct intel_engine_cs *engine, *locked;
-	bool ret = false;
-
-	/*
-	 * Serialise with __i915_request_submit() so that it sees
-	 * is-banned?, or we know the request is already inflight.
-	 *
-	 * Note that rq->engine is unstable, and so we double
-	 * check that we have acquired the lock on the final engine.
-	 */
-	locked = READ_ONCE(rq->engine);
-	spin_lock_irq(&locked->sched.lock);
-	while (unlikely(locked != (engine = READ_ONCE(rq->engine)))) {
-		spin_unlock(&locked->sched.lock);
-		locked = engine;
-		spin_lock(&locked->sched.lock);
-	}
-
-	if (i915_request_is_active(rq)) {
-		if (!__i915_request_is_complete(rq))
-			*active = locked;
-		ret = true;
-	}
-
-	spin_unlock_irq(&locked->sched.lock);
-
-	return ret;
-}
-
-static struct intel_engine_cs *active_engine(struct intel_context *ce)
-{
-	struct intel_engine_cs *engine = NULL;
-	struct i915_request *rq;
-
-	if (intel_context_has_inflight(ce))
-		return intel_context_inflight(ce);
-
-	if (!ce->timeline)
-		return NULL;
-
-	/*
-	 * rq->link is only SLAB_TYPESAFE_BY_RCU, we need to hold a reference
-	 * to the request to prevent it being transferred to a new timeline
-	 * (and onto a new timeline->requests list).
-	 */
-	rcu_read_lock();
-	list_for_each_entry_reverse(rq, &ce->timeline->requests, link) {
-		bool found;
-
-		/* timeline is already completed upto this point? */
-		if (!i915_request_get_rcu(rq))
-			break;
-
-		/* Check with the backend if the request is inflight */
-		found = true;
-		if (likely(rcu_access_pointer(rq->timeline) == ce->timeline))
-			found = __active_engine(rq, &engine);
-
-		i915_request_put(rq);
-		if (found)
-			break;
-	}
-	rcu_read_unlock();
-
-	return engine;
-}
-
 static void kill_engines(struct i915_gem_engines *engines, bool ban)
 {
+	const int error = ban ? -EIO : -EAGAIN;
 	struct i915_gem_engines_iter it;
 	struct intel_context *ce;
 
@@ -491,28 +396,12 @@ static void kill_engines(struct i915_gem_engines *engines, bool ban)
 	 * engines on which there are incomplete requests.
 	 */
 	for_each_gem_engine(ce, engines, it) {
-		struct intel_engine_cs *engine;
+		struct i915_sched *se = intel_engine_get_scheduler(ce->engine);
 
 		if (ban && intel_context_set_banned(ce))
 			continue;
 
-		/*
-		 * Check the current active state of this context; if we
-		 * are currently executing on the GPU we need to evict
-		 * ourselves. On the other hand, if we haven't yet been
-		 * submitted to the GPU or if everything is complete,
-		 * we have nothing to do.
-		 */
-		engine = active_engine(ce);
-
-		/* First attempt to gracefully cancel the context */
-		if (engine && !__cancel_engine(engine) && ban)
-			/*
-			 * If we are unable to send a preemptive pulse to bump
-			 * the context from the GPU, we have to resort to a full
-			 * reset. We hope the collateral damage is worth it.
-			 */
-			__reset_context(engines->ctx, engine);
+		se->revoke_context(ce, ban ? engines->ctx->name : NULL, error);
 	}
 }
 
diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
index 85ff5fe861b4..e51112302fb8 100644
--- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
+++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
@@ -114,6 +114,7 @@
 #include "gen8_engine_cs.h"
 #include "intel_breadcrumbs.h"
 #include "intel_context.h"
+#include "intel_engine_heartbeat.h"
 #include "intel_engine_pm.h"
 #include "intel_engine_stats.h"
 #include "intel_execlists_submission.h"
@@ -2774,6 +2775,50 @@ execlists_active_request(const struct i915_sched *se)
 	return rq;
 }
 
+static bool __cancel_engine(struct intel_engine_cs *engine)
+{
+	/*
+	 * Send a "high priority pulse" down the engine to cause the
+	 * current request to be momentarily preempted. (If it fails to
+	 * be preempted, it will be reset). As we have marked our context
+	 * as banned, any incomplete request, including any running, will
+	 * be skipped following the preemption.
+	 *
+	 * If there is no hangchecking (one of the reasons why we try to
+	 * cancel the context) and no forced preemption, there may be no
+	 * means by which we reset the GPU and evict the persistent hog.
+	 * Ergo if we are unable to inject a preemptive pulse that can
+	 * kill the banned context, we fallback to doing a local reset
+	 * instead.
+	 */
+	return intel_engine_pulse(engine) == 0;
+}
+
+static void
+execlists_revoke_context(struct intel_context *ce, const char *force, int error)
+{
+	struct intel_engine_cs *engine;
+
+	/*
+	 * Check the current active state of this context; if we
+	 * are currently executing on the GPU we need to evict
+	 * ourselves. On the other hand, if we haven't yet been
+	 * submitted to the GPU or if everything is complete,
+	 * we have nothing to do.
+	 */
+	engine = intel_context_inflight(ce);
+
+	/* First attempt to gracefully cancel the context */
+	if (engine && !__cancel_engine(engine) && force)
+		/*
+		 * If we are unable to send a preemptive pulse to bump
+		 * the context from the GPU, we have to resort to a full
+		 * reset. We hope the collateral damage is worth it.
+		 */
+		intel_gt_handle_error(engine->gt, engine->mask, 0,
+				      "context revoked from %s", force);
+}
+
 static bool can_preempt(struct intel_engine_cs *engine)
 {
 	if (INTEL_GEN(engine->i915) > 8)
@@ -2911,6 +2956,7 @@ static void init_execlists(struct intel_engine_cs *engine)
 	u32 base = engine->mmio_base;
 
 	engine->sched.active_request = execlists_active_request;
+	engine->sched.revoke_context = execlists_revoke_context;
 	engine->sched.show = execlists_show;
 	tasklet_setup(&engine->sched.tasklet, execlists_submission_tasklet);
 
@@ -3454,6 +3500,7 @@ intel_execlists_create_virtual(struct intel_engine_cs **siblings,
 			ENGINE_VIRTUAL);
 
 	ve->base.sched.submit_request = virtual_submit_request;
+	ve->base.sched.revoke_context = execlists_revoke_context;
 	tasklet_setup(&ve->base.sched.tasklet, virtual_submission_tasklet);
 
 	virtual_engine_initial_hint(ve);
diff --git a/drivers/gpu/drm/i915/i915_scheduler.c b/drivers/gpu/drm/i915/i915_scheduler.c
index a8fb787278e6..7855601a4958 100644
--- a/drivers/gpu/drm/i915/i915_scheduler.c
+++ b/drivers/gpu/drm/i915/i915_scheduler.c
@@ -135,6 +135,25 @@ i915_sched_default_active_request(const struct i915_sched *se)
 	return active;
 }
 
+static bool context_active(struct intel_context *ce)
+{
+	return i915_active_fence_isset(&ce->timeline->last_request);
+}
+
+static void
+i915_sched_default_revoke_context(struct intel_context *ce,
+				  const char *force,
+				  int error)
+{
+	/*
+	 * Without backend support, we cannot remove the context from the
+	 * HW gracefully. All we can do is force a reset, as a last resort.
+	 */
+	if (force && context_active(ce))
+		intel_gt_handle_error(ce->engine->gt, ce->engine->mask, 0,
+				      "context revoked from %s", force);
+}
+
 void i915_sched_init(struct i915_sched *se,
 		     struct device *dev,
 		     const char *name,
@@ -158,6 +177,7 @@ void i915_sched_init(struct i915_sched *se,
 
 	se->submit_request = i915_request_enqueue;
 	se->active_request = i915_sched_default_active_request;
+	se->revoke_context = i915_sched_default_revoke_context;
 }
 
 void i915_sched_park(struct i915_sched *se)
diff --git a/drivers/gpu/drm/i915/i915_scheduler_types.h b/drivers/gpu/drm/i915/i915_scheduler_types.h
index a8502c94d7c5..84232a07163f 100644
--- a/drivers/gpu/drm/i915/i915_scheduler_types.h
+++ b/drivers/gpu/drm/i915/i915_scheduler_types.h
@@ -15,6 +15,7 @@
 
 struct drm_printer;
 struct i915_request;
+struct intel_context;
 
 /**
  * struct i915_sched - funnels requests towards hardware
@@ -40,6 +41,10 @@ struct i915_sched {
 
 	struct i915_request *(*active_request)(const struct i915_sched *se);
 
+	void (*revoke_context)(struct intel_context *ce,
+			       const char *whom,
+			       int error);
+
 	void (*show)(struct drm_printer *m,
 		     struct i915_sched *se,
 		     void (*show_request)(struct drm_printer *m,
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 54+ messages in thread

* [Intel-gfx] [PATCH 03/31] drm/i915: Introduce the scheduling mode
  2021-02-08 10:52 [Intel-gfx] [PATCH 01/31] drm/i915/gt: Ratelimit heartbeat completion probing Chris Wilson
  2021-02-08 10:52 ` [Intel-gfx] [PATCH 02/31] drm/i915: Move context revocation to scheduler Chris Wilson
@ 2021-02-08 10:52 ` Chris Wilson
  2021-02-08 10:52 ` [Intel-gfx] [PATCH 04/31] drm/i915: Move timeslicing flag to scheduler Chris Wilson
                   ` (31 subsequent siblings)
  33 siblings, 0 replies; 54+ messages in thread
From: Chris Wilson @ 2021-02-08 10:52 UTC (permalink / raw)
  To: intel-gfx; +Cc: Chris Wilson

Start extracting the scheduling flags from the engine. We begin with its
own existence by declaring whether or not the scheduler supports any
task reordering. This information can then be passed directly to the
user (using the SCHEDULER_CAPS) without having to infer in the user
interface.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/gt/intel_engine.h        |  7 +++
 drivers/gpu/drm/i915/gt/intel_engine_cs.c     |  1 +
 drivers/gpu/drm/i915/gt/intel_engine_types.h  | 19 +++-----
 drivers/gpu/drm/i915/gt/intel_engine_user.c   | 34 +++++++++------
 .../drm/i915/gt/intel_execlists_submission.c  |  7 ++-
 drivers/gpu/drm/i915/gt/intel_reset.c         |  8 +++-
 .../gpu/drm/i915/gt/uc/intel_guc_submission.c |  3 +-
 drivers/gpu/drm/i915/i915_request.h           |  3 +-
 drivers/gpu/drm/i915/i915_scheduler.c         | 16 ++++++-
 drivers/gpu/drm/i915/i915_scheduler.h         | 18 ++++++++
 drivers/gpu/drm/i915/i915_scheduler_types.h   | 43 +++++++++++++++++++
 11 files changed, 126 insertions(+), 33 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_engine.h b/drivers/gpu/drm/i915/gt/intel_engine.h
index c530839627bb..875fde52bcb6 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine.h
@@ -261,6 +261,13 @@ intel_engine_has_heartbeat(const struct intel_engine_cs *engine)
 	return READ_ONCE(engine->props.heartbeat_interval_ms);
 }
 
+static inline bool
+intel_engine_has_scheduler(struct intel_engine_cs *engine)
+{
+	/* Is there an active scheduler attached to this engine? */
+	return i915_sched_is_active(intel_engine_get_scheduler(engine));
+}
+
 static inline void
 intel_engine_kick_scheduler(struct intel_engine_cs *engine)
 {
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
index 577ebd4a324f..da2447f18daa 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
@@ -1245,6 +1245,7 @@ void intel_engines_reset_default_submission(struct intel_gt *gt)
 			engine->sanitize(engine);
 
 		engine->set_default_submission(engine);
+		i915_sched_enable(intel_engine_get_scheduler(engine));
 	}
 }
 
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h
index d27a44070cb1..ce5732099815 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h
@@ -442,13 +442,12 @@ struct intel_engine_cs {
 
 #define I915_ENGINE_USING_CMD_PARSER BIT(0)
 #define I915_ENGINE_SUPPORTS_STATS   BIT(1)
-#define I915_ENGINE_HAS_SCHEDULER    BIT(2)
-#define I915_ENGINE_HAS_PREEMPTION   BIT(3)
-#define I915_ENGINE_HAS_SEMAPHORES   BIT(4)
-#define I915_ENGINE_HAS_TIMESLICES   BIT(5)
-#define I915_ENGINE_IS_VIRTUAL       BIT(6)
-#define I915_ENGINE_HAS_RELATIVE_MMIO BIT(7)
-#define I915_ENGINE_REQUIRES_CMD_PARSER BIT(8)
+#define I915_ENGINE_HAS_PREEMPTION   BIT(2)
+#define I915_ENGINE_HAS_SEMAPHORES   BIT(3)
+#define I915_ENGINE_HAS_TIMESLICES   BIT(4)
+#define I915_ENGINE_IS_VIRTUAL       BIT(5)
+#define I915_ENGINE_HAS_RELATIVE_MMIO BIT(6)
+#define I915_ENGINE_REQUIRES_CMD_PARSER BIT(7)
 	unsigned int flags;
 
 	/*
@@ -531,12 +530,6 @@ intel_engine_supports_stats(const struct intel_engine_cs *engine)
 	return engine->flags & I915_ENGINE_SUPPORTS_STATS;
 }
 
-static inline bool
-intel_engine_has_scheduler(const struct intel_engine_cs *engine)
-{
-	return engine->flags & I915_ENGINE_HAS_SCHEDULER;
-}
-
 static inline bool
 intel_engine_has_preemption(const struct intel_engine_cs *engine)
 {
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_user.c b/drivers/gpu/drm/i915/gt/intel_engine_user.c
index 64eccdf32a22..3d3cdc080c32 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_user.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_user.c
@@ -90,13 +90,18 @@ static void sort_engines(struct drm_i915_private *i915,
 static void set_scheduler_caps(struct drm_i915_private *i915)
 {
 	static const struct {
-		u8 engine;
-		u8 sched;
-	} map[] = {
+		u8 flag;
+		u8 cap;
+	} engine_map[] = {
 #define MAP(x, y) { ilog2(I915_ENGINE_##x), ilog2(I915_SCHEDULER_CAP_##y) }
 		MAP(HAS_PREEMPTION, PREEMPTION),
 		MAP(HAS_SEMAPHORES, SEMAPHORES),
 		MAP(SUPPORTS_STATS, ENGINE_BUSY_STATS),
+#undef MAP
+	}, sched_map[] = {
+#define MAP(x, y) { I915_SCHED_##x, ilog2(I915_SCHEDULER_CAP_##y) }
+		MAP(ACTIVE_BIT, ENABLED),
+		MAP(PRIORITY_BIT, PRIORITY),
 #undef MAP
 	};
 	struct intel_engine_cs *engine;
@@ -105,20 +110,21 @@ static void set_scheduler_caps(struct drm_i915_private *i915)
 	enabled = 0;
 	disabled = 0;
 	for_each_uabi_engine(engine, i915) { /* all engines must agree! */
+		struct i915_sched *se = intel_engine_get_scheduler(engine);
 		int i;
 
-		if (intel_engine_has_scheduler(engine))
-			enabled |= (I915_SCHEDULER_CAP_ENABLED |
-				    I915_SCHEDULER_CAP_PRIORITY);
-		else
-			disabled |= (I915_SCHEDULER_CAP_ENABLED |
-				     I915_SCHEDULER_CAP_PRIORITY);
-
-		for (i = 0; i < ARRAY_SIZE(map); i++) {
-			if (engine->flags & BIT(map[i].engine))
-				enabled |= BIT(map[i].sched);
+		for (i = 0; i < ARRAY_SIZE(engine_map); i++) {
+			if (engine->flags & BIT(engine_map[i].flag))
+				enabled |= BIT(engine_map[i].cap);
 			else
-				disabled |= BIT(map[i].sched);
+				disabled |= BIT(engine_map[i].cap);
+		}
+
+		for (i = 0; i < ARRAY_SIZE(sched_map); i++) {
+			if (se->flags & BIT(sched_map[i].flag))
+				enabled |= BIT(sched_map[i].cap);
+			else
+				disabled |= BIT(sched_map[i].cap);
 		}
 	}
 
diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
index e51112302fb8..0f2c3c62cac9 100644
--- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
+++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
@@ -2891,7 +2891,6 @@ logical_ring_default_vfuncs(struct intel_engine_cs *engine)
 	}
 	intel_engine_set_irq_handler(engine, execlists_irq_handler);
 
-	engine->flags |= I915_ENGINE_HAS_SCHEDULER;
 	engine->flags |= I915_ENGINE_SUPPORTS_STATS;
 	if (!intel_vgpu_active(engine->i915)) {
 		engine->flags |= I915_ENGINE_HAS_SEMAPHORES;
@@ -2960,6 +2959,8 @@ static void init_execlists(struct intel_engine_cs *engine)
 	engine->sched.show = execlists_show;
 	tasklet_setup(&engine->sched.tasklet, execlists_submission_tasklet);
 
+	i915_sched_select_mode(&engine->sched, I915_SCHED_MODE_PRIORITY);
+
 	timer_setup(&engine->execlists.timer, execlists_timeslice, 0);
 	timer_setup(&engine->execlists.preempt, execlists_preempt, 0);
 
@@ -3370,6 +3371,7 @@ intel_execlists_create_virtual(struct intel_engine_cs **siblings,
 			       unsigned int count)
 {
 	struct virtual_engine *ve;
+	unsigned long sched;
 	unsigned int n;
 	int err;
 
@@ -3428,6 +3430,7 @@ intel_execlists_create_virtual(struct intel_engine_cs **siblings,
 		goto err_put;
 	}
 
+	sched = ~0U;
 	for (n = 0; n < count; n++) {
 		struct intel_engine_cs *sibling = siblings[n];
 
@@ -3457,6 +3460,7 @@ intel_execlists_create_virtual(struct intel_engine_cs **siblings,
 
 		ve->siblings[ve->num_siblings++] = sibling;
 		ve->base.mask |= sibling->mask;
+		sched &= sibling->sched.flags;
 
 		/*
 		 * All physical engines must be compatible for their emission
@@ -3498,6 +3502,7 @@ intel_execlists_create_virtual(struct intel_engine_cs **siblings,
 			ve->base.name,
 			ve->base.mask,
 			ENGINE_VIRTUAL);
+	ve->base.sched.flags = sched;
 
 	ve->base.sched.submit_request = virtual_submit_request;
 	ve->base.sched.revoke_context = execlists_revoke_context;
diff --git a/drivers/gpu/drm/i915/gt/intel_reset.c b/drivers/gpu/drm/i915/gt/intel_reset.c
index 990cb4adbb9a..fbcebc7b4d77 100644
--- a/drivers/gpu/drm/i915/gt/intel_reset.c
+++ b/drivers/gpu/drm/i915/gt/intel_reset.c
@@ -819,8 +819,12 @@ static void __intel_gt_set_wedged(struct intel_gt *gt)
 	if (!INTEL_INFO(gt->i915)->gpu_reset_clobbers_display)
 		__intel_gt_reset(gt, ALL_ENGINES);
 
-	for_each_engine(engine, gt, id)
-		engine->sched.submit_request = nop_submit_request;
+	for_each_engine(engine, gt, id) {
+		struct i915_sched *se = intel_engine_get_scheduler(engine);
+
+		i915_sched_disable(se);
+		se->submit_request = nop_submit_request;
+	}
 
 	/*
 	 * Make sure no request can slip through without getting completed by
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
index c66c867ada23..d14b9db77df8 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
@@ -592,7 +592,6 @@ static void guc_default_vfuncs(struct intel_engine_cs *engine)
 	}
 	engine->set_default_submission = guc_set_default_submission;
 
-	engine->flags |= I915_ENGINE_HAS_SCHEDULER;
 	engine->flags |= I915_ENGINE_HAS_PREEMPTION;
 
 	/*
@@ -643,6 +642,8 @@ int intel_guc_submission_setup(struct intel_engine_cs *engine)
 
 	tasklet_setup(&engine->sched.tasklet, guc_submission_tasklet);
 
+	i915_sched_select_mode(&engine->sched, I915_SCHED_MODE_PRIORITY);
+
 	guc_default_vfuncs(engine);
 	guc_default_irqs(engine);
 
diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h
index dd10a6db3d21..dde868e9ee5f 100644
--- a/drivers/gpu/drm/i915/i915_request.h
+++ b/drivers/gpu/drm/i915/i915_request.h
@@ -594,6 +594,7 @@ static inline void i915_request_clear_hold(struct i915_request *rq)
 static inline struct i915_sched *
 i915_request_get_scheduler(const struct i915_request *rq)
 {
+	/* Is there an active scheduler for this request? */
 	return intel_engine_get_scheduler(rq->engine);
 }
 
@@ -626,7 +627,7 @@ i915_request_active_timeline(const struct i915_request *rq)
 
 static inline bool i915_request_use_scheduler(const struct i915_request *rq)
 {
-	return intel_engine_has_scheduler(rq->engine);
+	return i915_sched_is_active(i915_request_get_scheduler(rq));
 }
 
 static inline bool i915_request_is_executing(const struct i915_request *rq)
diff --git a/drivers/gpu/drm/i915/i915_scheduler.c b/drivers/gpu/drm/i915/i915_scheduler.c
index 7855601a4958..a42d8b5bf1f9 100644
--- a/drivers/gpu/drm/i915/i915_scheduler.c
+++ b/drivers/gpu/drm/i915/i915_scheduler.c
@@ -154,6 +154,20 @@ i915_sched_default_revoke_context(struct intel_context *ce,
 				      "context revoked from %s", force);
 }
 
+void i915_sched_select_mode(struct i915_sched *se, enum i915_sched_mode mode)
+{
+	switch (mode) {
+	case I915_SCHED_MODE_PRIORITY:
+		__set_bit(I915_SCHED_PRIORITY_BIT, &se->flags);
+		fallthrough;
+	case I915_SCHED_MODE_FIFO:
+		__set_bit(I915_SCHED_ACTIVE_BIT, &se->flags);
+		fallthrough;
+	case I915_SCHED_MODE_NONE:
+		break;
+	}
+}
+
 void i915_sched_init(struct i915_sched *se,
 		     struct device *dev,
 		     const char *name,
@@ -534,7 +548,7 @@ void i915_request_set_priority(struct i915_request *rq, int prio)
 	if (__i915_request_is_complete(rq))
 		goto unlock;
 
-	if (!intel_engine_has_scheduler(engine)) {
+	if (!i915_sched_has_priorities(&engine->sched)) {
 		rq->sched.attr.priority = prio;
 		goto unlock;
 	}
diff --git a/drivers/gpu/drm/i915/i915_scheduler.h b/drivers/gpu/drm/i915/i915_scheduler.h
index a12083721c84..fe392109b112 100644
--- a/drivers/gpu/drm/i915/i915_scheduler.h
+++ b/drivers/gpu/drm/i915/i915_scheduler.h
@@ -49,6 +49,7 @@ void i915_sched_init(struct i915_sched *se,
 		     const char *name,
 		     unsigned long mask,
 		     unsigned int subclass);
+void i915_sched_select_mode(struct i915_sched *se, enum i915_sched_mode mode);
 void i915_sched_park(struct i915_sched *se);
 void i915_sched_fini(struct i915_sched *se);
 
@@ -73,6 +74,23 @@ void i915_sched_resume_request(struct intel_engine_cs *engine,
 
 void __i915_sched_cancel_queue(struct i915_sched *se);
 
+/*
+ * Control whether the scheduler accepts any more requests. While
+ * disabled all incoming [ready] requests will be dropped and marked
+ * as completed in error (-EIO).
+ * Typically used when the device fails to recover from a GPU hang
+ * and declared wedged.
+ */
+static inline void i915_sched_enable(struct i915_sched *se)
+{
+	set_bit(I915_SCHED_ENABLE_BIT, &se->flags);
+}
+
+static inline void i915_sched_disable(struct i915_sched *se)
+{
+	clear_bit(I915_SCHED_ENABLE_BIT, &se->flags);
+}
+
 void __i915_priolist_free(struct i915_priolist *p);
 static inline void i915_priolist_free(struct i915_priolist *p)
 {
diff --git a/drivers/gpu/drm/i915/i915_scheduler_types.h b/drivers/gpu/drm/i915/i915_scheduler_types.h
index 84232a07163f..2cb46b2e1ac8 100644
--- a/drivers/gpu/drm/i915/i915_scheduler_types.h
+++ b/drivers/gpu/drm/i915/i915_scheduler_types.h
@@ -17,6 +17,38 @@ struct drm_printer;
 struct i915_request;
 struct intel_context;
 
+enum {
+	I915_SCHED_ENABLE_BIT = 0,
+	I915_SCHED_ACTIVE_BIT, /* can reorder the request flow */
+	I915_SCHED_PRIORITY_BIT, /* priority sorting of queue */
+};
+
+/**
+ * enum i915_sched_mode - how the scheduler may reorder requests
+ *
+ * The foundational principle of scheduling is that it controls the order
+ * of execution of tasks within the system. Given a set of ready requests,
+ * we may choose not to reorder them [FIFO] or submit them in a priority
+ * order. The basic ordering is by user assigned priority [PRIORITY], the
+ * highest priority requests are executed first. A revision of that is
+ * to execute them in order of a virtual deadline, which is a fair, budget
+ * conserving exection order [DEADLINE]. In both PRIORITY and DEADLINE, we
+ * have to consider priority inversion that may arise when higher priority
+ * work depends on low priority work, and so we track the work dependencies
+ * and when we need to execute higher priority work, we raise the priority
+ * of all of its dependecies so that it is not block.
+ *
+ * With no active scheduler [NONE], the execution order is fixed by order of
+ * submission, due to ordering via a global resource such as the legacy
+ * shared ringbuffer. Since we cannot reorder the requests at all, we
+ * mark this as an inactive scheduler, and do not track any dependencies.
+ */
+enum i915_sched_mode {
+	I915_SCHED_MODE_NONE = -1, /* inactive, no bubble prevention */
+	I915_SCHED_MODE_FIFO, /* pass-through of ready, first in first out */
+	I915_SCHED_MODE_PRIORITY, /* reorder strictly by priority */
+};
+
 /**
  * struct i915_sched - funnels requests towards hardware
  *
@@ -28,6 +60,7 @@ struct intel_context;
 struct i915_sched {
 	spinlock_t lock; /* protects the scheduling lists and queue */
 
+	unsigned long flags;
 	unsigned long mask; /* available scheduling channels */
 
 	/*
@@ -197,4 +230,14 @@ struct i915_dependency {
 				&(rq__)->sched.signalers_list, \
 				signal_link)
 
+static inline bool i915_sched_is_active(const struct i915_sched *se)
+{
+	return test_bit(I915_SCHED_ACTIVE_BIT, &se->flags);
+}
+
+static inline bool i915_sched_has_priorities(const struct i915_sched *se)
+{
+	return test_bit(I915_SCHED_PRIORITY_BIT, &se->flags);
+}
+
 #endif /* _I915_SCHEDULER_TYPES_H_ */
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 54+ messages in thread

* [Intel-gfx] [PATCH 04/31] drm/i915: Move timeslicing flag to scheduler
  2021-02-08 10:52 [Intel-gfx] [PATCH 01/31] drm/i915/gt: Ratelimit heartbeat completion probing Chris Wilson
  2021-02-08 10:52 ` [Intel-gfx] [PATCH 02/31] drm/i915: Move context revocation to scheduler Chris Wilson
  2021-02-08 10:52 ` [Intel-gfx] [PATCH 03/31] drm/i915: Introduce the scheduling mode Chris Wilson
@ 2021-02-08 10:52 ` Chris Wilson
  2021-02-08 11:43   ` Tvrtko Ursulin
  2021-02-08 10:52 ` [Intel-gfx] [PATCH 05/31] drm/i915/gt: Declare when we enabled timeslicing Chris Wilson
                   ` (30 subsequent siblings)
  33 siblings, 1 reply; 54+ messages in thread
From: Chris Wilson @ 2021-02-08 10:52 UTC (permalink / raw)
  To: intel-gfx; +Cc: Chris Wilson

Whether a scheduler chooses to implement timeslicing is up to it, and
not an underlying property of the HW engine. The scheduler does depend
on the HW supporting preemption.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/gt/intel_engine.h           |  6 ++++++
 drivers/gpu/drm/i915/gt/intel_engine_types.h     | 16 +++-------------
 .../gpu/drm/i915/gt/intel_execlists_submission.c |  8 +++++---
 drivers/gpu/drm/i915/gt/selftest_execlists.c     |  2 +-
 drivers/gpu/drm/i915/i915_scheduler_types.h      | 10 ++++++++++
 5 files changed, 25 insertions(+), 17 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_engine.h b/drivers/gpu/drm/i915/gt/intel_engine.h
index 875fde52bcb6..5d3bcbfe8f6e 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine.h
@@ -280,4 +280,10 @@ intel_engine_flush_scheduler(struct intel_engine_cs *engine)
 	i915_sched_flush(intel_engine_get_scheduler(engine));
 }
 
+static inline bool
+intel_engine_has_timeslices(struct intel_engine_cs *engine)
+{
+	return i915_sched_has_timeslices(intel_engine_get_scheduler(engine));
+}
+
 #endif /* _INTEL_RINGBUFFER_H_ */
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h
index ce5732099815..08bddc5263aa 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h
@@ -444,10 +444,9 @@ struct intel_engine_cs {
 #define I915_ENGINE_SUPPORTS_STATS   BIT(1)
 #define I915_ENGINE_HAS_PREEMPTION   BIT(2)
 #define I915_ENGINE_HAS_SEMAPHORES   BIT(3)
-#define I915_ENGINE_HAS_TIMESLICES   BIT(4)
-#define I915_ENGINE_IS_VIRTUAL       BIT(5)
-#define I915_ENGINE_HAS_RELATIVE_MMIO BIT(6)
-#define I915_ENGINE_REQUIRES_CMD_PARSER BIT(7)
+#define I915_ENGINE_IS_VIRTUAL       BIT(4)
+#define I915_ENGINE_HAS_RELATIVE_MMIO BIT(5)
+#define I915_ENGINE_REQUIRES_CMD_PARSER BIT(6)
 	unsigned int flags;
 
 	/*
@@ -542,15 +541,6 @@ intel_engine_has_semaphores(const struct intel_engine_cs *engine)
 	return engine->flags & I915_ENGINE_HAS_SEMAPHORES;
 }
 
-static inline bool
-intel_engine_has_timeslices(const struct intel_engine_cs *engine)
-{
-	if (!IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION))
-		return false;
-
-	return engine->flags & I915_ENGINE_HAS_TIMESLICES;
-}
-
 static inline bool
 intel_engine_is_virtual(const struct intel_engine_cs *engine)
 {
diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
index 0f2c3c62cac9..aa1816d28def 100644
--- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
+++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
@@ -1025,7 +1025,7 @@ static bool needs_timeslice(const struct intel_engine_cs *engine,
 {
 	const struct i915_sched *se = &engine->sched;
 
-	if (!intel_engine_has_timeslices(engine))
+	if (!i915_sched_has_timeslices(se))
 		return false;
 
 	/* If not currently active, or about to switch, wait for next event */
@@ -2896,8 +2896,6 @@ logical_ring_default_vfuncs(struct intel_engine_cs *engine)
 		engine->flags |= I915_ENGINE_HAS_SEMAPHORES;
 		if (can_preempt(engine)) {
 			engine->flags |= I915_ENGINE_HAS_PREEMPTION;
-			if (IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION))
-				engine->flags |= I915_ENGINE_HAS_TIMESLICES;
 		}
 	}
 
@@ -2961,6 +2959,10 @@ static void init_execlists(struct intel_engine_cs *engine)
 
 	i915_sched_select_mode(&engine->sched, I915_SCHED_MODE_PRIORITY);
 
+	if (IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION) &&
+	    intel_engine_has_preemption(engine))
+		__set_bit(I915_SCHED_TIMESLICE_BIT, &engine->sched.flags);
+
 	timer_setup(&engine->execlists.timer, execlists_timeslice, 0);
 	timer_setup(&engine->execlists.preempt, execlists_preempt, 0);
 
diff --git a/drivers/gpu/drm/i915/gt/selftest_execlists.c b/drivers/gpu/drm/i915/gt/selftest_execlists.c
index 04ded3a2d491..be99fbd7cfab 100644
--- a/drivers/gpu/drm/i915/gt/selftest_execlists.c
+++ b/drivers/gpu/drm/i915/gt/selftest_execlists.c
@@ -3809,7 +3809,7 @@ static unsigned int
 __select_siblings(struct intel_gt *gt,
 		  unsigned int class,
 		  struct intel_engine_cs **siblings,
-		  bool (*filter)(const struct intel_engine_cs *))
+		  bool (*filter)(struct intel_engine_cs *))
 {
 	unsigned int n = 0;
 	unsigned int inst;
diff --git a/drivers/gpu/drm/i915/i915_scheduler_types.h b/drivers/gpu/drm/i915/i915_scheduler_types.h
index 2cb46b2e1ac8..3c94378def52 100644
--- a/drivers/gpu/drm/i915/i915_scheduler_types.h
+++ b/drivers/gpu/drm/i915/i915_scheduler_types.h
@@ -12,6 +12,7 @@
 #include <linux/workqueue.h>
 
 #include "i915_priolist_types.h"
+#include "i915_utils.h"
 
 struct drm_printer;
 struct i915_request;
@@ -21,6 +22,7 @@ enum {
 	I915_SCHED_ENABLE_BIT = 0,
 	I915_SCHED_ACTIVE_BIT, /* can reorder the request flow */
 	I915_SCHED_PRIORITY_BIT, /* priority sorting of queue */
+	I915_SCHED_TIMESLICE_BIT, /* multitasking for long workloads */
 };
 
 /**
@@ -240,4 +242,12 @@ static inline bool i915_sched_has_priorities(const struct i915_sched *se)
 	return test_bit(I915_SCHED_PRIORITY_BIT, &se->flags);
 }
 
+static inline bool i915_sched_has_timeslices(const struct i915_sched *se)
+{
+	if (!IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION))
+		return false;
+
+	return test_bit(I915_SCHED_TIMESLICE_BIT, &se->flags);
+}
+
 #endif /* _I915_SCHEDULER_TYPES_H_ */
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 54+ messages in thread

* [Intel-gfx] [PATCH 05/31] drm/i915/gt: Declare when we enabled timeslicing
  2021-02-08 10:52 [Intel-gfx] [PATCH 01/31] drm/i915/gt: Ratelimit heartbeat completion probing Chris Wilson
                   ` (2 preceding siblings ...)
  2021-02-08 10:52 ` [Intel-gfx] [PATCH 04/31] drm/i915: Move timeslicing flag to scheduler Chris Wilson
@ 2021-02-08 10:52 ` Chris Wilson
  2021-02-08 11:44   ` Tvrtko Ursulin
  2021-02-08 10:52 ` [Intel-gfx] [PATCH 06/31] drm/i915: Move busywaiting control to the scheduler Chris Wilson
                   ` (29 subsequent siblings)
  33 siblings, 1 reply; 54+ messages in thread
From: Chris Wilson @ 2021-02-08 10:52 UTC (permalink / raw)
  To: intel-gfx; +Cc: Chris Wilson

Let userspace know if they can trust timeslicing by including it as part
of the I915_PARAM_HAS_SCHEDULER::I915_SCHEDULER_CAP_TIMESLICING

v2: Only declare timeslicing if we can safely preempt userspace.

Fixes: 8ee36e048c98 ("drm/i915/execlists: Minimalistic timeslicing")
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 drivers/gpu/drm/i915/gt/intel_engine_user.c | 1 +
 include/uapi/drm/i915_drm.h                 | 1 +
 2 files changed, 2 insertions(+)

diff --git a/drivers/gpu/drm/i915/gt/intel_engine_user.c b/drivers/gpu/drm/i915/gt/intel_engine_user.c
index 3d3cdc080c32..3fab439ba22b 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_user.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_user.c
@@ -102,6 +102,7 @@ static void set_scheduler_caps(struct drm_i915_private *i915)
 #define MAP(x, y) { I915_SCHED_##x, ilog2(I915_SCHEDULER_CAP_##y) }
 		MAP(ACTIVE_BIT, ENABLED),
 		MAP(PRIORITY_BIT, PRIORITY),
+		MAP(TIMESLICE_BIT, TIMESLICING),
 #undef MAP
 	};
 	struct intel_engine_cs *engine;
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index 1987e2ea79a3..cda0f391d965 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -524,6 +524,7 @@ typedef struct drm_i915_irq_wait {
 #define   I915_SCHEDULER_CAP_PREEMPTION	(1ul << 2)
 #define   I915_SCHEDULER_CAP_SEMAPHORES	(1ul << 3)
 #define   I915_SCHEDULER_CAP_ENGINE_BUSY_STATS	(1ul << 4)
+#define   I915_SCHEDULER_CAP_TIMESLICING	(1ul << 5)
 
 #define I915_PARAM_HUC_STATUS		 42
 
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 54+ messages in thread

* [Intel-gfx] [PATCH 06/31] drm/i915: Move busywaiting control to the scheduler
  2021-02-08 10:52 [Intel-gfx] [PATCH 01/31] drm/i915/gt: Ratelimit heartbeat completion probing Chris Wilson
                   ` (3 preceding siblings ...)
  2021-02-08 10:52 ` [Intel-gfx] [PATCH 05/31] drm/i915/gt: Declare when we enabled timeslicing Chris Wilson
@ 2021-02-08 10:52 ` Chris Wilson
  2021-02-08 10:52 ` [Intel-gfx] [PATCH 07/31] drm/i915: Move preempt-reset flag " Chris Wilson
                   ` (28 subsequent siblings)
  33 siblings, 0 replies; 54+ messages in thread
From: Chris Wilson @ 2021-02-08 10:52 UTC (permalink / raw)
  To: intel-gfx; +Cc: Chris Wilson

Busy-waiting is used for preempt-to-busy by schedulers, if they so
choose. Since it is not a property of the engine, but that of the
submission backend, move the flag from out of the engine to
i915_sched_engine.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 drivers/gpu/drm/i915/gt/gen8_engine_cs.c      |  4 ++--
 .../drm/i915/gt/intel_execlists_submission.c  |  5 ++++-
 drivers/gpu/drm/i915/gt/selftest_lrc.c        | 19 +++++++++++++------
 drivers/gpu/drm/i915/i915_request.h           |  5 +++++
 drivers/gpu/drm/i915/i915_scheduler_types.h   |  6 ++++++
 5 files changed, 30 insertions(+), 9 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/gen8_engine_cs.c b/drivers/gpu/drm/i915/gt/gen8_engine_cs.c
index cac80af7ad1c..8791e03ebe61 100644
--- a/drivers/gpu/drm/i915/gt/gen8_engine_cs.c
+++ b/drivers/gpu/drm/i915/gt/gen8_engine_cs.c
@@ -507,7 +507,7 @@ gen8_emit_fini_breadcrumb_tail(struct i915_request *rq, u32 *cs)
 	*cs++ = MI_USER_INTERRUPT;
 
 	*cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
-	if (intel_engine_has_semaphores(rq->engine))
+	if (i915_request_use_busywait(rq))
 		cs = emit_preempt_busywait(rq, cs);
 
 	rq->tail = intel_ring_offset(rq, cs);
@@ -599,7 +599,7 @@ gen12_emit_fini_breadcrumb_tail(struct i915_request *rq, u32 *cs)
 	*cs++ = MI_USER_INTERRUPT;
 
 	*cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
-	if (intel_engine_has_semaphores(rq->engine))
+	if (i915_request_use_busywait(rq))
 		cs = gen12_emit_preempt_busywait(rq, cs);
 
 	rq->tail = intel_ring_offset(rq, cs);
diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
index aa1816d28def..0a93386ad15f 100644
--- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
+++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
@@ -306,7 +306,7 @@ static bool need_preempt(const struct intel_engine_cs *engine,
 	const struct i915_sched *se = &engine->sched;
 	int last_prio;
 
-	if (!intel_engine_has_semaphores(engine))
+	if (!i915_sched_use_busywait(se))
 		return false;
 
 	/*
@@ -2963,6 +2963,9 @@ static void init_execlists(struct intel_engine_cs *engine)
 	    intel_engine_has_preemption(engine))
 		__set_bit(I915_SCHED_TIMESLICE_BIT, &engine->sched.flags);
 
+	if (intel_engine_has_preemption(engine))
+		__set_bit(I915_SCHED_BUSYWAIT_BIT, &engine->sched.flags);
+
 	timer_setup(&engine->execlists.timer, execlists_timeslice, 0);
 	timer_setup(&engine->execlists.preempt, execlists_preempt, 0);
 
diff --git a/drivers/gpu/drm/i915/gt/selftest_lrc.c b/drivers/gpu/drm/i915/gt/selftest_lrc.c
index 279091e41b41..6d73add47109 100644
--- a/drivers/gpu/drm/i915/gt/selftest_lrc.c
+++ b/drivers/gpu/drm/i915/gt/selftest_lrc.c
@@ -679,9 +679,11 @@ static int live_lrc_gpr(void *arg)
 		if (err)
 			goto err;
 
-		err = __live_lrc_gpr(engine, scratch, true);
-		if (err)
-			goto err;
+		if (intel_engine_has_preemption(engine)) {
+			err = __live_lrc_gpr(engine, scratch, true);
+			if (err)
+				goto err;
+		}
 
 err:
 		st_engine_heartbeat_enable(engine);
@@ -859,9 +861,11 @@ static int live_lrc_timestamp(void *arg)
 			if (err)
 				break;
 
-			err = __lrc_timestamp(&data, true);
-			if (err)
-				break;
+			if (intel_engine_has_preemption(data.engine)) {
+				err = __lrc_timestamp(&data, true);
+				if (err)
+					break;
+			}
 		}
 
 err:
@@ -1508,6 +1512,9 @@ static int live_lrc_isolation(void *arg)
 		    skip_isolation(engine))
 			continue;
 
+		if (!intel_engine_has_preemption(engine))
+			continue;
+
 		intel_engine_pm_get(engine);
 		for (i = 0; i < ARRAY_SIZE(poison); i++) {
 			int result;
diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h
index dde868e9ee5f..843e6a873148 100644
--- a/drivers/gpu/drm/i915/i915_request.h
+++ b/drivers/gpu/drm/i915/i915_request.h
@@ -651,4 +651,9 @@ static inline bool i915_request_use_semaphores(const struct i915_request *rq)
 	return intel_engine_has_semaphores(rq->engine);
 }
 
+static inline bool i915_request_use_busywait(const struct i915_request *rq)
+{
+	return i915_sched_use_busywait(i915_request_get_scheduler(rq));
+}
+
 #endif /* I915_REQUEST_H */
diff --git a/drivers/gpu/drm/i915/i915_scheduler_types.h b/drivers/gpu/drm/i915/i915_scheduler_types.h
index 3c94378def52..3aaf5b40b801 100644
--- a/drivers/gpu/drm/i915/i915_scheduler_types.h
+++ b/drivers/gpu/drm/i915/i915_scheduler_types.h
@@ -23,6 +23,7 @@ enum {
 	I915_SCHED_ACTIVE_BIT, /* can reorder the request flow */
 	I915_SCHED_PRIORITY_BIT, /* priority sorting of queue */
 	I915_SCHED_TIMESLICE_BIT, /* multitasking for long workloads */
+	I915_SCHED_BUSYWAIT_BIT, /* preempt-to-busy */
 };
 
 /**
@@ -250,4 +251,9 @@ static inline bool i915_sched_has_timeslices(const struct i915_sched *se)
 	return test_bit(I915_SCHED_TIMESLICE_BIT, &se->flags);
 }
 
+static inline bool i915_sched_use_busywait(const struct i915_sched *se)
+{
+	return test_bit(I915_SCHED_BUSYWAIT_BIT, &se->flags);
+}
+
 #endif /* _I915_SCHEDULER_TYPES_H_ */
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 54+ messages in thread

* [Intel-gfx] [PATCH 07/31] drm/i915: Move preempt-reset flag to the scheduler
  2021-02-08 10:52 [Intel-gfx] [PATCH 01/31] drm/i915/gt: Ratelimit heartbeat completion probing Chris Wilson
                   ` (4 preceding siblings ...)
  2021-02-08 10:52 ` [Intel-gfx] [PATCH 06/31] drm/i915: Move busywaiting control to the scheduler Chris Wilson
@ 2021-02-08 10:52 ` Chris Wilson
  2021-02-08 10:52 ` [Intel-gfx] [PATCH 08/31] drm/i915: Fix the iterative dfs for defering requests Chris Wilson
                   ` (27 subsequent siblings)
  33 siblings, 0 replies; 54+ messages in thread
From: Chris Wilson @ 2021-02-08 10:52 UTC (permalink / raw)
  To: intel-gfx; +Cc: Chris Wilson

While the HW may support preemption, whether or not the scheduler
enforces preemption by forcibly resetting the current context is
ultimately up to the scheduler.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 drivers/gpu/drm/i915/gt/intel_engine.h               | 7 ++-----
 drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c     | 4 ++--
 drivers/gpu/drm/i915/gt/intel_execlists_submission.c | 4 +++-
 drivers/gpu/drm/i915/i915_scheduler_types.h          | 9 +++++++++
 4 files changed, 16 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_engine.h b/drivers/gpu/drm/i915/gt/intel_engine.h
index 5d3bcbfe8f6e..e4f390bba009 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine.h
@@ -244,12 +244,9 @@ static inline bool intel_engine_uses_guc(const struct intel_engine_cs *engine)
 }
 
 static inline bool
-intel_engine_has_preempt_reset(const struct intel_engine_cs *engine)
+intel_engine_has_preempt_reset(struct intel_engine_cs *engine)
 {
-	if (!IS_ACTIVE(CONFIG_DRM_I915_PREEMPT_TIMEOUT))
-		return false;
-
-	return intel_engine_has_preemption(engine);
+	return i915_sched_has_preempt_reset(intel_engine_get_scheduler(engine));
 }
 
 static inline bool
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c b/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c
index 209a477af412..5ed263f36f93 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c
@@ -22,11 +22,11 @@
 
 #define HEARTBEAT_COMPLETION 50u /* milliseconds */
 
-static long completion_timeout(const struct intel_engine_cs *engine)
+static long completion_timeout(struct intel_engine_cs *engine)
 {
 	long timeout = HEARTBEAT_COMPLETION;
 
-	if (intel_engine_has_preempt_reset(engine))
+	if (i915_sched_has_preempt_reset(intel_engine_get_scheduler(engine)))
 		timeout += READ_ONCE(engine->props.preempt_timeout_ms);
 
 	return msecs_to_jiffies(timeout);
diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
index 0a93386ad15f..78fda9b4f626 100644
--- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
+++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
@@ -2963,8 +2963,10 @@ static void init_execlists(struct intel_engine_cs *engine)
 	    intel_engine_has_preemption(engine))
 		__set_bit(I915_SCHED_TIMESLICE_BIT, &engine->sched.flags);
 
-	if (intel_engine_has_preemption(engine))
+	if (intel_engine_has_preemption(engine)) {
 		__set_bit(I915_SCHED_BUSYWAIT_BIT, &engine->sched.flags);
+		__set_bit(I915_SCHED_PREEMPT_RESET_BIT, &engine->sched.flags);
+	}
 
 	timer_setup(&engine->execlists.timer, execlists_timeslice, 0);
 	timer_setup(&engine->execlists.preempt, execlists_preempt, 0);
diff --git a/drivers/gpu/drm/i915/i915_scheduler_types.h b/drivers/gpu/drm/i915/i915_scheduler_types.h
index 3aaf5b40b801..5ca2dc1b4fb5 100644
--- a/drivers/gpu/drm/i915/i915_scheduler_types.h
+++ b/drivers/gpu/drm/i915/i915_scheduler_types.h
@@ -23,6 +23,7 @@ enum {
 	I915_SCHED_ACTIVE_BIT, /* can reorder the request flow */
 	I915_SCHED_PRIORITY_BIT, /* priority sorting of queue */
 	I915_SCHED_TIMESLICE_BIT, /* multitasking for long workloads */
+	I915_SCHED_PREEMPT_RESET_BIT, /* reset if preemption times out */
 	I915_SCHED_BUSYWAIT_BIT, /* preempt-to-busy */
 };
 
@@ -256,4 +257,12 @@ static inline bool i915_sched_use_busywait(const struct i915_sched *se)
 	return test_bit(I915_SCHED_BUSYWAIT_BIT, &se->flags);
 }
 
+static inline bool i915_sched_has_preempt_reset(const struct i915_sched *se)
+{
+	if (!IS_ACTIVE(CONFIG_DRM_I915_PREEMPT_TIMEOUT))
+		return false;
+
+	return test_bit(I915_SCHED_PREEMPT_RESET_BIT, &se->flags);
+}
+
 #endif /* _I915_SCHEDULER_TYPES_H_ */
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 54+ messages in thread

* [Intel-gfx] [PATCH 08/31] drm/i915: Fix the iterative dfs for defering requests
  2021-02-08 10:52 [Intel-gfx] [PATCH 01/31] drm/i915/gt: Ratelimit heartbeat completion probing Chris Wilson
                   ` (5 preceding siblings ...)
  2021-02-08 10:52 ` [Intel-gfx] [PATCH 07/31] drm/i915: Move preempt-reset flag " Chris Wilson
@ 2021-02-08 10:52 ` Chris Wilson
  2021-02-08 10:52 ` [Intel-gfx] [PATCH 09/31] drm/i915: Replace priolist rbtree with a skiplist Chris Wilson
                   ` (26 subsequent siblings)
  33 siblings, 0 replies; 54+ messages in thread
From: Chris Wilson @ 2021-02-08 10:52 UTC (permalink / raw)
  To: intel-gfx; +Cc: Chris Wilson

The current implementation of walking the children of a deferred
requests lacks the backtracking required to reduce the dfs to linear.
Having pulled it from execlists into the common layer, we can reuse the
dfs code for priority inheritance.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 drivers/gpu/drm/i915/i915_scheduler.c | 56 +++++++++++++++++++--------
 1 file changed, 40 insertions(+), 16 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_scheduler.c b/drivers/gpu/drm/i915/i915_scheduler.c
index a42d8b5bf1f9..312e1538d001 100644
--- a/drivers/gpu/drm/i915/i915_scheduler.c
+++ b/drivers/gpu/drm/i915/i915_scheduler.c
@@ -565,9 +565,11 @@ void i915_request_set_priority(struct i915_request *rq, int prio)
 void __i915_sched_defer_request(struct intel_engine_cs *engine,
 				struct i915_request *rq)
 {
+	struct list_head *pos = &rq->sched.waiters_list;
 	struct i915_sched *se = intel_engine_get_scheduler(engine);
-	struct list_head *pl;
-	LIST_HEAD(list);
+	const int prio = rq_prio(rq);
+	struct i915_request *rn;
+	LIST_HEAD(dfs);
 
 	SCHED_TRACE(se, "defer request " RQ_FMT "\n", RQ_ARG(rq));
 
@@ -579,14 +581,11 @@ void __i915_sched_defer_request(struct intel_engine_cs *engine,
 	 * to those that are waiting upon it. So we traverse its chain of
 	 * waiters and move any that are earlier than the request to after it.
 	 */
-	pl = lookup_priolist(se, rq_prio(rq));
+	rq->sched.dfs.prev = NULL;
 	do {
-		struct i915_dependency *p;
-
-		GEM_BUG_ON(i915_request_is_active(rq));
-		list_move_tail(&rq->sched.link, pl);
-
-		for_each_waiter(p, rq) {
+		list_for_each_continue(pos, &rq->sched.waiters_list) {
+			struct i915_dependency *p =
+				list_entry(pos, typeof(*p), wait_link);
 			struct i915_request *w =
 				container_of(p->waiter, typeof(*w), sched);
 
@@ -602,19 +601,44 @@ void __i915_sched_defer_request(struct intel_engine_cs *engine,
 				   __i915_request_has_started(w) &&
 				   !__i915_request_is_complete(rq));
 
-			if (!i915_request_is_ready(w))
+			if (!i915_request_in_priority_queue(w))
 				continue;
 
-			if (rq_prio(w) < rq_prio(rq))
+			/*
+			 * We also need to reorder within the same priority.
+			 *
+			 * This is unlike priority-inheritance, where if the
+			 * signaler already has a higher priority [earlier
+			 * deadline] than us, we can ignore as it will be
+			 * scheduled first. If a waiter already has the
+			 * same priority, we still have to push it to the end
+			 * of the list. This unfortunately means we cannot
+			 * use the rq_deadline() itself as a 'visited' bit.
+			 */
+			if (rq_prio(w) < prio)
 				continue;
 
-			GEM_BUG_ON(rq_prio(w) > rq_prio(rq));
-			GEM_BUG_ON(i915_request_is_active(w));
-			list_move_tail(&w->sched.link, &list);
+			GEM_BUG_ON(rq_prio(w) != prio);
+
+			/* Remember our position along this branch */
+			rq = stack_push(w, rq, pos);
+			pos = &rq->sched.waiters_list;
 		}
 
-		rq = list_first_entry_or_null(&list, typeof(*rq), sched.link);
-	} while (rq);
+		/* Note list is reversed for waiters wrt signal hierarchy */
+		GEM_BUG_ON(rq->engine != engine);
+		GEM_BUG_ON(!i915_request_in_priority_queue(rq));
+		list_move(&rq->sched.link, &dfs);
+
+		/* Track our visit, and prevent duplicate processing */
+		clear_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags);
+	} while ((rq = stack_pop(rq, &pos)));
+
+	pos = lookup_priolist(se, prio);
+	list_for_each_entry_safe(rq, rn, &dfs, sched.link) {
+		set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags);
+		list_add_tail(&rq->sched.link, pos);
+	}
 }
 
 static void queue_request(struct i915_sched *se, struct i915_request *rq)
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 54+ messages in thread

* [Intel-gfx] [PATCH 09/31] drm/i915: Replace priolist rbtree with a skiplist
  2021-02-08 10:52 [Intel-gfx] [PATCH 01/31] drm/i915/gt: Ratelimit heartbeat completion probing Chris Wilson
                   ` (6 preceding siblings ...)
  2021-02-08 10:52 ` [Intel-gfx] [PATCH 08/31] drm/i915: Fix the iterative dfs for defering requests Chris Wilson
@ 2021-02-08 10:52 ` Chris Wilson
  2021-02-08 12:29   ` Tvrtko Ursulin
  2021-02-08 15:23   ` Tvrtko Ursulin
  2021-02-08 10:52 ` [Intel-gfx] [PATCH 10/31] drm/i915: Fair low-latency scheduling Chris Wilson
                   ` (25 subsequent siblings)
  33 siblings, 2 replies; 54+ messages in thread
From: Chris Wilson @ 2021-02-08 10:52 UTC (permalink / raw)
  To: intel-gfx; +Cc: Chris Wilson

Replace the priolist rbtree with a skiplist. The crucial difference is
that walking and removing the first element of a skiplist is O(1), but
O(lgN) for an rbtree, as we need to rebalance on remove. This is a
hindrance for submission latency as it occurs between picking a request
for the priolist and submitting it to hardware, as well effectively
tripling the number of O(lgN) operations required under the irqoff lock.
This is critical to reducing the latency jitter with multiple clients.

The downsides to skiplists are that lookup/insertion is only
probabilistically O(lgN) and there is a significant memory penalty to
as each skip node is larger than the rbtree equivalent. Furthermore, we
don't use dynamic arrays for the skiplist, so the allocation is fixed,
and imposes an upper bound on the scalability wrt to the number of
inflight requests.

In the following patches, we introduce a new sort key to the scheduler,
a virtual deadline. This imposes a different structure to the tree.
Using a priority sort, we have very few priority levels active at any
time, most likely just the default priority and so the rbtree degenerates
to a single elements containing the list of all ready requests. The
deadlines in contrast are very sparse, and typically each request has a
unique deadline. Instead of being able to simply walk the list during
dequeue, with the deadline scheduler we have to iterate through the bst
on the critical submission path. Skiplists are vastly superior in this
instance due to the O(1) iteration during dequeue, with very similar
characteristics [on average] to the rbtree for insertion.

This means that by using skiplists we can introduce a sparse sort key
without degrading latency on the critical submission path.

As an example, one simple case where we try to do lots of
semi-independent work without any priority management (gem_exec_parallel),
the lock hold times were:
[worst]        [total]    [avg]
 973.05     6301584.84     0.35 # plain rbtree
 559.82     5424915.25     0.33 # best rbtree with pruning
 208.21     3898784.09     0.24 # skiplist
  34.05     5784106.01     0.32 # rbtree without deadlines
  23.35     4152999.80     0.24 # skiplist without deadlines

Based on the skiplist implementation by Dr Con Kolivas for MuQSS.

References: https://en.wikipedia.org/wiki/Skip_list
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 .../drm/i915/gt/intel_execlists_submission.c  | 168 +++++-----
 .../gpu/drm/i915/gt/uc/intel_guc_submission.c |  41 +--
 drivers/gpu/drm/i915/i915_priolist_types.h    |  64 +++-
 drivers/gpu/drm/i915/i915_scheduler.c         | 304 +++++++++++++-----
 drivers/gpu/drm/i915/i915_scheduler.h         |  16 +-
 drivers/gpu/drm/i915/i915_scheduler_types.h   |   2 +-
 .../drm/i915/selftests/i915_mock_selftests.h  |   1 +
 .../gpu/drm/i915/selftests/i915_scheduler.c   |  53 ++-
 8 files changed, 454 insertions(+), 195 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
index 78fda9b4f626..4a0258347c10 100644
--- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
+++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
@@ -254,11 +254,6 @@ static void ring_set_paused(const struct intel_engine_cs *engine, int state)
 		wmb();
 }
 
-static struct i915_priolist *to_priolist(struct rb_node *rb)
-{
-	return rb_entry(rb, struct i915_priolist, node);
-}
-
 static int rq_prio(const struct i915_request *rq)
 {
 	return READ_ONCE(rq->sched.attr.priority);
@@ -282,15 +277,27 @@ static int effective_prio(const struct i915_request *rq)
 	return prio;
 }
 
+static struct i915_request *first_request(const struct i915_sched *se)
+{
+	struct i915_priolist *pl = se->queue.sentinel.next[0];
+
+	if (pl == &se->queue.sentinel)
+		return NULL;
+
+	return list_first_entry_or_null(&pl->requests,
+					struct i915_request,
+					sched.link);
+}
+
 static int queue_prio(const struct i915_sched *se)
 {
-	struct rb_node *rb;
+	struct i915_request *rq;
 
-	rb = rb_first_cached(&se->queue);
-	if (!rb)
+	rq = first_request(se);
+	if (!rq)
 		return INT_MIN;
 
-	return to_priolist(rb)->priority;
+	return rq_prio(rq);
 }
 
 static int virtual_prio(const struct intel_engine_execlists *el)
@@ -300,7 +307,7 @@ static int virtual_prio(const struct intel_engine_execlists *el)
 	return rb ? rb_entry(rb, struct ve_node, rb)->prio : INT_MIN;
 }
 
-static bool need_preempt(const struct intel_engine_cs *engine,
+static bool need_preempt(struct intel_engine_cs *engine,
 			 const struct i915_request *rq)
 {
 	const struct i915_sched *se = &engine->sched;
@@ -1144,7 +1151,9 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
 	struct i915_request **port = execlists->pending;
 	struct i915_request ** const last_port = port + execlists->port_mask;
 	struct i915_request *last, * const *active;
+	struct i915_request *rq, *rn;
 	struct virtual_engine *ve;
+	struct i915_priolist *pl;
 	struct rb_node *rb;
 	bool submit = false;
 
@@ -1355,87 +1364,79 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
 			break;
 	}
 
-	while ((rb = rb_first_cached(&se->queue))) {
-		struct i915_priolist *p = to_priolist(rb);
-		struct i915_request *rq, *rn;
+	i915_sched_dequeue(se, pl, rq, rn) {
+		bool merge = true;
 
-		priolist_for_each_request_consume(rq, rn, p) {
-			bool merge = true;
+		/*
+		 * Can we combine this request with the current port?
+		 * It has to be the same context/ringbuffer and not
+		 * have any exceptions (e.g. GVT saying never to
+		 * combine contexts).
+		 *
+		 * If we can combine the requests, we can execute both
+		 * by updating the RING_TAIL to point to the end of the
+		 * second request, and so we never need to tell the
+		 * hardware about the first.
+		 */
+		if (last && !can_merge_rq(last, rq)) {
+			/*
+			 * If we are on the second port and cannot
+			 * combine this request with the last, then we
+			 * are done.
+			 */
+			if (port == last_port)
+				goto done;
 
 			/*
-			 * Can we combine this request with the current port?
-			 * It has to be the same context/ringbuffer and not
-			 * have any exceptions (e.g. GVT saying never to
-			 * combine contexts).
-			 *
-			 * If we can combine the requests, we can execute both
-			 * by updating the RING_TAIL to point to the end of the
-			 * second request, and so we never need to tell the
-			 * hardware about the first.
+			 * We must not populate both ELSP[] with the
+			 * same LRCA, i.e. we must submit 2 different
+			 * contexts if we submit 2 ELSP.
 			 */
-			if (last && !can_merge_rq(last, rq)) {
-				/*
-				 * If we are on the second port and cannot
-				 * combine this request with the last, then we
-				 * are done.
-				 */
-				if (port == last_port)
-					goto done;
+			if (last->context == rq->context)
+				goto done;
 
-				/*
-				 * We must not populate both ELSP[] with the
-				 * same LRCA, i.e. we must submit 2 different
-				 * contexts if we submit 2 ELSP.
-				 */
-				if (last->context == rq->context)
-					goto done;
+			if (i915_request_has_sentinel(last))
+				goto done;
 
-				if (i915_request_has_sentinel(last))
-					goto done;
+			/*
+			 * We avoid submitting virtual requests into
+			 * the secondary ports so that we can migrate
+			 * the request immediately to another engine
+			 * rather than wait for the primary request.
+			 */
+			if (rq->execution_mask != engine->mask)
+				goto done;
 
-				/*
-				 * We avoid submitting virtual requests into
-				 * the secondary ports so that we can migrate
-				 * the request immediately to another engine
-				 * rather than wait for the primary request.
-				 */
-				if (rq->execution_mask != engine->mask)
-					goto done;
+			/*
+			 * If GVT overrides us we only ever submit
+			 * port[0], leaving port[1] empty. Note that we
+			 * also have to be careful that we don't queue
+			 * the same context (even though a different
+			 * request) to the second port.
+			 */
+			if (ctx_single_port_submission(last->context) ||
+			    ctx_single_port_submission(rq->context))
+				goto done;
 
-				/*
-				 * If GVT overrides us we only ever submit
-				 * port[0], leaving port[1] empty. Note that we
-				 * also have to be careful that we don't queue
-				 * the same context (even though a different
-				 * request) to the second port.
-				 */
-				if (ctx_single_port_submission(last->context) ||
-				    ctx_single_port_submission(rq->context))
-					goto done;
-
-				merge = false;
-			}
-
-			if (__i915_request_submit(rq)) {
-				if (!merge) {
-					*port++ = i915_request_get(last);
-					last = NULL;
-				}
-
-				GEM_BUG_ON(last &&
-					   !can_merge_ctx(last->context,
-							  rq->context));
-				GEM_BUG_ON(last &&
-					   i915_seqno_passed(last->fence.seqno,
-							     rq->fence.seqno));
-
-				submit = true;
-				last = rq;
-			}
+			merge = false;
 		}
 
-		rb_erase_cached(&p->node, &se->queue);
-		i915_priolist_free(p);
+		if (__i915_request_submit(rq)) {
+			if (!merge) {
+				*port++ = i915_request_get(last);
+				last = NULL;
+			}
+
+			GEM_BUG_ON(last &&
+				   !can_merge_ctx(last->context,
+						  rq->context));
+			GEM_BUG_ON(last &&
+				   i915_seqno_passed(last->fence.seqno,
+						     rq->fence.seqno));
+
+			submit = true;
+			last = rq;
+		}
 	}
 done:
 	*port++ = i915_request_get(last);
@@ -1456,7 +1457,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
 	 * request triggering preemption on the next dequeue (or subsequent
 	 * interrupt for secondary ports).
 	 */
-	execlists->queue_priority_hint = queue_prio(se);
+	execlists->queue_priority_hint = pl->priority;
 	spin_unlock(&se->lock);
 
 	/*
@@ -2716,7 +2717,6 @@ static void execlists_reset_cancel(struct intel_engine_cs *engine)
 	}
 
 	execlists->queue_priority_hint = INT_MIN;
-	se->queue = RB_ROOT_CACHED;
 
 	GEM_BUG_ON(__tasklet_is_enabled(&se->tasklet));
 	se->tasklet.callback = nop_submission_tasklet;
@@ -3173,6 +3173,8 @@ static void virtual_context_exit(struct intel_context *ce)
 
 	for (n = 0; n < ve->num_siblings; n++)
 		intel_engine_pm_put(ve->siblings[n]);
+
+	i915_sched_park(intel_engine_get_scheduler(&ve->base));
 }
 
 static const struct intel_context_ops virtual_context_ops = {
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
index d14b9db77df8..c16393df42a0 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
@@ -60,11 +60,6 @@
 
 #define GUC_REQUEST_SIZE 64 /* bytes */
 
-static inline struct i915_priolist *to_priolist(struct rb_node *rb)
-{
-	return rb_entry(rb, struct i915_priolist, node);
-}
-
 static struct guc_stage_desc *__get_stage_desc(struct intel_guc *guc, u32 id)
 {
 	struct guc_stage_desc *base = guc->stage_desc_pool_vaddr;
@@ -186,9 +181,10 @@ static void __guc_dequeue(struct intel_engine_cs *engine)
 	struct i915_request **first = execlists->inflight;
 	struct i915_request ** const last_port = first + execlists->port_mask;
 	struct i915_request *last = first[0];
+	struct i915_request *rq, *rn;
 	struct i915_request **port;
+	struct i915_priolist *pl;
 	bool submit = false;
-	struct rb_node *rb;
 
 	lockdep_assert_held(&se->lock);
 
@@ -205,32 +201,22 @@ static void __guc_dequeue(struct intel_engine_cs *engine)
 	 * event.
 	 */
 	port = first;
-	while ((rb = rb_first_cached(&se->queue))) {
-		struct i915_priolist *p = to_priolist(rb);
-		struct i915_request *rq, *rn;
+	i915_sched_dequeue(se, pl, rq, rn) {
+		if (last && rq->context != last->context) {
+			if (port == last_port)
+				goto done;
 
-		priolist_for_each_request_consume(rq, rn, p) {
-			if (last && rq->context != last->context) {
-				if (port == last_port)
-					goto done;
-
-				*port = schedule_in(last,
-						    port - execlists->inflight);
-				port++;
-			}
-
-			list_del_init(&rq->sched.link);
-			__i915_request_submit(rq);
-			submit = true;
-			last = rq;
+			*port = schedule_in(last, port - execlists->inflight);
+			port++;
 		}
 
-		rb_erase_cached(&p->node, &se->queue);
-		i915_priolist_free(p);
+		list_del_init(&rq->sched.link);
+		__i915_request_submit(rq);
+		submit = true;
+		last = rq;
 	}
 done:
-	execlists->queue_priority_hint =
-		rb ? to_priolist(rb)->priority : INT_MIN;
+	execlists->queue_priority_hint = pl->priority;
 	if (submit) {
 		*port = schedule_in(last, port - execlists->inflight);
 		*++port = NULL;
@@ -361,7 +347,6 @@ static void guc_reset_cancel(struct intel_engine_cs *engine)
 	__i915_sched_cancel_queue(se);
 
 	execlists->queue_priority_hint = INT_MIN;
-	se->queue = RB_ROOT_CACHED;
 
 	spin_unlock_irqrestore(&se->lock, flags);
 	intel_engine_signal_breadcrumbs(engine);
diff --git a/drivers/gpu/drm/i915/i915_priolist_types.h b/drivers/gpu/drm/i915/i915_priolist_types.h
index bc2fa84f98a8..ee7482b9c813 100644
--- a/drivers/gpu/drm/i915/i915_priolist_types.h
+++ b/drivers/gpu/drm/i915/i915_priolist_types.h
@@ -38,10 +38,72 @@ enum {
 #define I915_PRIORITY_UNPREEMPTABLE INT_MAX
 #define I915_PRIORITY_BARRIER (I915_PRIORITY_UNPREEMPTABLE - 1)
 
+/*
+ * The slab returns power-of-two chunks of memory, so fill out the
+ * node to the next cacheline.
+ *
+ * We can estimate how many requests the skiplist will scale to based
+ * on its height:
+ *   11 =>  4 million requests
+ *   12 => 16 million requests
+ */
+#ifdef CONFIG_64BIT
+#define I915_PRIOLIST_HEIGHT 12
+#else
+#define I915_PRIOLIST_HEIGHT 11
+#endif
+
+/*
+ * i915_priolist forms a skiplist. The skiplist is built in layers,
+ * starting at the base [0] is a singly linked list of all i915_priolist.
+ * Each higher layer contains a fraction of the i915_priolist from the
+ * previous layer:
+ *
+ * S[0] 0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF S
+ * E[1] >1>3>5>7>9>B>D>F>1>3>5>7>9>B>D>F>1>3>5>7>9>B>D>F>1>3>5>7>9>B>D>F E
+ * N[2] -->3-->7-->B-->F-->3-->7-->B-->F-->3-->7-->B-->F-->3-->7-->B-->F N
+ * T[3] ------>7------>F-------7------>F------>7------>F------>7------>F T
+ * I[4] -------------->F-------------->F-------------->F-------------->F I
+ * N[5] ------------------------------>F------------------------------>F N
+ * E[6] ------------------------------>F-------------------------------> E
+ * L[7] ---------------------------------------------------------------> L
+ *
+ * To iterate through all active i915_priolist, we only need to follow
+ * the chain in i915_priolist.next[0] (see for_each_priolist()).
+ *
+ * To quickly find a specific key (or insert point), we can perform a binary
+ * search by starting at the highest level and following the linked list
+ * at that level until we either find the node, or have gone passed the key.
+ * Then we descend a level, and start walking the list again starting from
+ * the current position, until eventually we find our key, or we run out of
+ * levels.
+ *
+ * https://en.wikipedia.org/wiki/Skip_list
+ */
 struct i915_priolist {
 	struct list_head requests;
-	struct rb_node node;
 	int priority;
+
+	int level;
+	struct i915_priolist *next[I915_PRIOLIST_HEIGHT];
 };
 
+struct i915_priolist_root {
+	struct i915_priolist sentinel;
+	u32 prng;
+};
+
+#define i915_priolist_is_empty(root) ((root)->sentinel.level < 0)
+
+#define for_each_priolist(p, root) \
+	for ((p) = (root)->sentinel.next[0]; \
+	     (p) != &(root)->sentinel; \
+	     (p) = (p)->next[0])
+
+#define priolist_for_each_request(it, plist) \
+	list_for_each_entry(it, &(plist)->requests, sched.link)
+
+#define priolist_for_each_request_safe(it, n, plist) \
+	list_for_each_entry_safe(it, n, &(plist)->requests, sched.link)
+
 #endif /* _I915_PRIOLIST_TYPES_H_ */
diff --git a/drivers/gpu/drm/i915/i915_scheduler.c b/drivers/gpu/drm/i915/i915_scheduler.c
index 312e1538d001..518eac67959e 100644
--- a/drivers/gpu/drm/i915/i915_scheduler.c
+++ b/drivers/gpu/drm/i915/i915_scheduler.c
@@ -4,7 +4,9 @@
  * Copyright © 2018 Intel Corporation
  */
 
+#include <linux/bitops.h>
 #include <linux/mutex.h>
+#include <linux/prandom.h>
 
 #include "gt/intel_ring.h"
 #include "gt/intel_lrc_reg.h"
@@ -168,6 +170,16 @@ void i915_sched_select_mode(struct i915_sched *se, enum i915_sched_mode mode)
 	}
 }
 
+static void init_priolist(struct i915_priolist_root *const root)
+{
+	struct i915_priolist *pl = &root->sentinel;
+
+	memset_p((void **)pl->next, pl, ARRAY_SIZE(pl->next));
+	pl->requests.prev = NULL;
+	pl->priority = INT_MIN;
+	pl->level = -1;
+}
+
 void i915_sched_init(struct i915_sched *se,
 		     struct device *dev,
 		     const char *name,
@@ -183,9 +195,9 @@ void i915_sched_init(struct i915_sched *se,
 
 	se->mask = mask;
 
+	init_priolist(&se->queue);
 	INIT_LIST_HEAD(&se->requests);
 	INIT_LIST_HEAD(&se->hold);
-	se->queue = RB_ROOT_CACHED;
 
 	init_ipi(&se->ipi);
 
@@ -194,8 +206,60 @@ void i915_sched_init(struct i915_sched *se,
 	se->revoke_context = i915_sched_default_revoke_context;
 }
 
+__maybe_unused static bool priolist_idle(struct i915_priolist_root *root)
+{
+	struct i915_priolist *pl = &root->sentinel;
+	int lvl;
+
+	for (lvl = 0; lvl < ARRAY_SIZE(pl->next); lvl++) {
+		if (pl->next[lvl] != pl) {
+			GEM_TRACE_ERR("root[%d] is not empty\n", lvl);
+			return false;
+		}
+	}
+
+	if (pl->level != -1) {
+		GEM_TRACE_ERR("root is not clear: %d\n", pl->level);
+		return false;
+	}
+
+	return true;
+}
+
+static bool pl_empty(struct list_head *st)
+{
+	return !st->prev;
+}
+
+static void pl_push(struct i915_priolist *pl, struct list_head *st)
+{
+	/* Keep list_empty(&pl->requests) valid for concurrent readers */
+	pl->requests.prev = st->prev;
+	st->prev = &pl->requests;
+	GEM_BUG_ON(pl_empty(st));
+}
+
+static struct i915_priolist *pl_pop(struct list_head *st)
+{
+	struct i915_priolist *pl;
+
+	GEM_BUG_ON(pl_empty(st));
+	pl = container_of(st->prev, typeof(*pl), requests);
+	st->prev = pl->requests.prev;
+
+	return pl;
+}
+
 void i915_sched_park(struct i915_sched *se)
 {
+	struct i915_priolist_root *root = &se->queue;
+	struct list_head *list = &root->sentinel.requests;
+
+	GEM_BUG_ON(!priolist_idle(root));
+
+	while (!pl_empty(list))
+		kmem_cache_free(global.slab_priorities, pl_pop(list));
+
 	GEM_BUG_ON(!i915_sched_is_idle(se));
 	se->no_priolist = false;
 }
@@ -251,70 +315,71 @@ static inline bool node_signaled(const struct i915_sched_node *node)
 	return i915_request_completed(node_to_request(node));
 }
 
-static inline struct i915_priolist *to_priolist(struct rb_node *rb)
+static inline unsigned int random_level(struct i915_priolist_root *root)
 {
-	return rb_entry(rb, struct i915_priolist, node);
-}
-
-static void assert_priolists(struct i915_sched * const se)
-{
-	struct rb_node *rb;
-	long last_prio;
-
-	if (!IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
-		return;
-
-	GEM_BUG_ON(rb_first_cached(&se->queue) !=
-		   rb_first(&se->queue.rb_root));
-
-	last_prio = INT_MAX;
-	for (rb = rb_first_cached(&se->queue); rb; rb = rb_next(rb)) {
-		const struct i915_priolist *p = to_priolist(rb);
-
-		GEM_BUG_ON(p->priority > last_prio);
-		last_prio = p->priority;
-	}
+	/*
+	 * Given a uniform distribution of random numbers over the u32, then
+	 * the probability each bit being unset is P=0.5. The probability of a
+	 * successive sequence of bits being unset is P(n) = 0.5^n [n > 0].
+	 *   P(level:1) = 0.5
+	 *   P(level:2) = 0.25
+	 *   P(level:3) = 0.125
+	 *   P(level:4) = 0.0625
+	 *   ...
+	 * So we can use ffs() on a good random number generator to pick our
+	 * level. We divide by two to reduce the probability of choosing a
+	 * level to .25, as the cost of descending a level is the same as
+	 * following an extra link in the chain at that level (so we can
+	 * pack more nodes into fewer levels without incurring extra cost,
+	 * and allow scaling to higher volumes of requests without expanding
+	 * the height of the skiplist).
+	 */
+	root->prng = next_pseudo_random32(root->prng);
+	return  __ffs(root->prng) / 2;
 }
 
 static struct list_head *
 lookup_priolist(struct i915_sched *se, int prio)
 {
-	struct i915_priolist *p;
-	struct rb_node **parent, *rb;
-	bool first = true;
+	struct i915_priolist *update[I915_PRIOLIST_HEIGHT];
+	struct i915_priolist_root *const root = &se->queue;
+	struct i915_priolist *pl, *tmp;
+	int lvl;
 
 	lockdep_assert_held(&se->lock);
-	assert_priolists(se);
-
 	if (unlikely(se->no_priolist))
 		prio = I915_PRIORITY_NORMAL;
 
+	for_each_priolist(pl, root) { /* recycle any empty elements before us */
+		if (pl->priority <= prio || !list_empty(&pl->requests))
+			break;
+
+		__i915_sched_dequeue_next(se);
+	}
+
 find_priolist:
-	/* most positive priority is scheduled first, equal priorities fifo */
-	rb = NULL;
-	parent = &se->queue.rb_root.rb_node;
-	while (*parent) {
-		rb = *parent;
-		p = to_priolist(rb);
-		if (prio > p->priority) {
-			parent = &rb->rb_left;
-		} else if (prio < p->priority) {
-			parent = &rb->rb_right;
-			first = false;
-		} else {
-			return &p->requests;
-		}
+	pl = &root->sentinel;
+	lvl = pl->level;
+	while (lvl >= 0) {
+		while (tmp = pl->next[lvl], tmp->priority >= prio)
+			pl = tmp;
+		if (pl->priority == prio)
+			goto out;
+		update[lvl--] = pl;
 	}
 
 	if (prio == I915_PRIORITY_NORMAL) {
-		p = &se->default_priolist;
+		pl = &se->default_priolist;
+	} else if (!pl_empty(&root->sentinel.requests)) {
+		pl = pl_pop(&root->sentinel.requests);
 	} else {
-		p = kmem_cache_alloc(global.slab_priorities, GFP_ATOMIC);
+		pl = kmem_cache_alloc(global.slab_priorities, GFP_ATOMIC);
 		/* Convert an allocation failure to a priority bump */
-		if (unlikely(!p)) {
+		if (unlikely(!pl)) {
 			prio = I915_PRIORITY_NORMAL; /* recurses just once */
 
-			/* To maintain ordering with all rendering, after an
+			/*
+			 * To maintain ordering with all rendering, after an
 			 * allocation failure we have to disable all scheduling.
 			 * Requests will then be executed in fifo, and schedule
 			 * will ensure that dependencies are emitted in fifo.
@@ -327,18 +392,123 @@ lookup_priolist(struct i915_sched *se, int prio)
 		}
 	}
 
-	p->priority = prio;
-	INIT_LIST_HEAD(&p->requests);
+	pl->priority = prio;
+	INIT_LIST_HEAD(&pl->requests);
 
-	rb_link_node(&p->node, rb, parent);
-	rb_insert_color_cached(&p->node, &se->queue, first);
+	lvl = random_level(root);
+	if (lvl > root->sentinel.level) {
+		if (root->sentinel.level < I915_PRIOLIST_HEIGHT - 1) {
+			lvl = ++root->sentinel.level;
+			update[lvl] = &root->sentinel;
+		} else {
+			lvl = I915_PRIOLIST_HEIGHT - 1;
+		}
+	}
+	GEM_BUG_ON(lvl < 0);
+	GEM_BUG_ON(lvl >= ARRAY_SIZE(pl->next));
 
-	return &p->requests;
+	pl->level = lvl;
+	do {
+		tmp = update[lvl];
+		pl->next[lvl] = tmp->next[lvl];
+		tmp->next[lvl] = pl;
+	} while (--lvl >= 0);
+
+	if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) {
+		struct i915_priolist *chk;
+
+		chk = &root->sentinel;
+		lvl = chk->level;
+		do {
+			while (tmp = chk->next[lvl], tmp->priority >= prio)
+				chk = tmp;
+		} while (--lvl >= 0);
+
+		GEM_BUG_ON(chk != pl);
+	}
+
+out:
+	GEM_BUG_ON(pl == &root->sentinel);
+	return &pl->requests;
 }
 
-void __i915_priolist_free(struct i915_priolist *p)
+static void __remove_priolist(struct i915_sched *se, struct list_head *plist)
 {
-	kmem_cache_free(global.slab_priorities, p);
+	struct i915_priolist_root *root = &se->queue;
+	struct i915_priolist *pl, *tmp;
+	struct i915_priolist *old =
+		container_of(plist, struct i915_priolist, requests);
+	int prio = old->priority;
+	int lvl;
+
+	lockdep_assert_held(&se->lock);
+	GEM_BUG_ON(!list_empty(plist));
+
+	pl = &root->sentinel;
+	lvl = pl->level;
+	GEM_BUG_ON(lvl < 0);
+
+	if (prio != I915_PRIORITY_NORMAL)
+		pl_push(old, &pl->requests);
+
+	do {
+		while (tmp = pl->next[lvl], tmp->priority > prio)
+			pl = tmp;
+		if (lvl <= old->level) {
+			pl->next[lvl] = old->next[lvl];
+			if (pl == &root->sentinel && old->next[lvl] == pl) {
+				GEM_BUG_ON(pl->level != lvl);
+				pl->level--;
+			}
+		}
+	} while (--lvl >= 0);
+	GEM_BUG_ON(tmp != old);
+}
+
+static void remove_from_priolist(struct i915_sched *se,
+				 struct i915_request *rq,
+				 struct list_head *list,
+				 bool tail)
+{
+	struct list_head *prev = rq->sched.link.prev;
+
+	GEM_BUG_ON(!i915_request_in_priority_queue(rq));
+
+	__list_del_entry(&rq->sched.link);
+	if (tail)
+		list_add_tail(&rq->sched.link, list);
+	else
+		list_add(&rq->sched.link, list);
+
+	/* If we just removed the last element in the old plist, delete it */
+	if (list_empty(prev))
+		__remove_priolist(se, prev);
+}
+
+struct i915_priolist *__i915_sched_dequeue_next(struct i915_sched *se)
+{
+	struct i915_priolist * const s = &se->queue.sentinel;
+	struct i915_priolist *pl = s->next[0];
+	int lvl;
+
+	GEM_BUG_ON(!list_empty(&pl->requests));
+	GEM_BUG_ON(pl == s);
+
+	/* Keep pl->next[0] valid for for_each_priolist iteration */
+	if (pl->priority != I915_PRIORITY_NORMAL)
+		pl_push(pl, &s->requests);
+
+	lvl = pl->level;
+	GEM_BUG_ON(lvl < 0);
+	do {
+		s->next[lvl] = pl->next[lvl];
+		if (pl->next[lvl] == s) {
+			GEM_BUG_ON(s->level != lvl);
+			s->level--;
+		}
+	} while (--lvl >= 0);
+
+	return pl->next[0];
 }
 
 static struct i915_request *
@@ -491,7 +661,7 @@ static void __i915_request_set_priority(struct i915_request *rq, int prio)
 
 		GEM_BUG_ON(rq->engine != engine);
 		if (i915_request_in_priority_queue(rq))
-			list_move_tail(&rq->sched.link, plist);
+			remove_from_priolist(se, rq, plist, true);
 
 		/* Defer (tasklet) submission until after all updates. */
 		kick_submission(engine, rq, prio);
@@ -627,8 +797,7 @@ void __i915_sched_defer_request(struct intel_engine_cs *engine,
 
 		/* Note list is reversed for waiters wrt signal hierarchy */
 		GEM_BUG_ON(rq->engine != engine);
-		GEM_BUG_ON(!i915_request_in_priority_queue(rq));
-		list_move(&rq->sched.link, &dfs);
+		remove_from_priolist(se, rq, &dfs, false);
 
 		/* Track our visit, and prevent duplicate processing */
 		clear_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags);
@@ -927,7 +1096,7 @@ void i915_sched_resume_request(struct intel_engine_cs *engine,
 void __i915_sched_cancel_queue(struct i915_sched *se)
 {
 	struct i915_request *rq, *rn;
-	struct rb_node *rb;
+	struct i915_priolist *pl;
 
 	lockdep_assert_held(&se->lock);
 
@@ -936,16 +1105,9 @@ void __i915_sched_cancel_queue(struct i915_sched *se)
 		i915_request_put(i915_request_mark_eio(rq));
 
 	/* Flush the queued requests to the timeline list (for retiring). */
-	while ((rb = rb_first_cached(&se->queue))) {
-		struct i915_priolist *p = to_priolist(rb);
-
-		priolist_for_each_request_consume(rq, rn, p) {
-			i915_request_put(i915_request_mark_eio(rq));
-			__i915_request_submit(rq);
-		}
-
-		rb_erase_cached(&p->node, &se->queue);
-		i915_priolist_free(p);
+	i915_sched_dequeue(se, pl, rq, rn) {
+		i915_request_put(i915_request_mark_eio(rq));
+		__i915_request_submit(rq);
 	}
 	GEM_BUG_ON(!i915_sched_is_idle(se));
 
@@ -1225,9 +1387,9 @@ void i915_sched_show(struct drm_printer *m,
 		     unsigned int max)
 {
 	const struct i915_request *rq, *last;
+	struct i915_priolist *pl;
 	unsigned long flags;
 	unsigned int count;
-	struct rb_node *rb;
 
 	rcu_read_lock();
 	spin_lock_irqsave(&se->lock, flags);
@@ -1282,10 +1444,8 @@ void i915_sched_show(struct drm_printer *m,
 
 	last = NULL;
 	count = 0;
-	for (rb = rb_first_cached(&se->queue); rb; rb = rb_next(rb)) {
-		struct i915_priolist *p = rb_entry(rb, typeof(*p), node);
-
-		priolist_for_each_request(rq, p) {
+	for_each_priolist(pl, &se->queue) {
+		priolist_for_each_request(rq, pl) {
 			if (count++ < max - 1)
 				show_request(m, rq, "\t", 0);
 			else
diff --git a/drivers/gpu/drm/i915/i915_scheduler.h b/drivers/gpu/drm/i915/i915_scheduler.h
index fe392109b112..872d221f6ba7 100644
--- a/drivers/gpu/drm/i915/i915_scheduler.h
+++ b/drivers/gpu/drm/i915/i915_scheduler.h
@@ -24,12 +24,6 @@ struct intel_engine_cs;
 		  ##__VA_ARGS__);					\
 } while (0)
 
-#define priolist_for_each_request(it, plist) \
-	list_for_each_entry(it, &(plist)->requests, sched.link)
-
-#define priolist_for_each_request_consume(it, n, plist) \
-	list_for_each_entry_safe(it, n, &(plist)->requests, sched.link)
-
 void i915_sched_node_init(struct i915_sched_node *node);
 void i915_sched_node_reinit(struct i915_sched_node *node);
 
@@ -100,7 +94,7 @@ static inline void i915_priolist_free(struct i915_priolist *p)
 
 static inline bool i915_sched_is_idle(const struct i915_sched *se)
 {
-	return RB_EMPTY_ROOT(&se->queue.rb_root);
+	return i915_priolist_is_empty(&se->queue);
 }
 
 static inline bool
@@ -168,6 +162,14 @@ i915_sched_get_active_request(const struct i915_sched *se)
 	return NULL;
 }
 
+/* Walk the scheduler queue of requests (in submission order) and remove them */
+struct i915_priolist *__i915_sched_dequeue_next(struct i915_sched *se);
+#define i915_sched_dequeue(se, pl, rq, rn) \
+	for ((pl) = (se)->queue.sentinel.next[0]; \
+	     (pl) != &(se)->queue.sentinel; \
+	     (pl) = __i915_sched_dequeue_next(se)) \
+		priolist_for_each_request_safe(rq, rn, pl)
+
 void i915_request_show_with_schedule(struct drm_printer *m,
 				     const struct i915_request *rq,
 				     const char *prefix,
diff --git a/drivers/gpu/drm/i915/i915_scheduler_types.h b/drivers/gpu/drm/i915/i915_scheduler_types.h
index 5ca2dc1b4fb5..bc668f375097 100644
--- a/drivers/gpu/drm/i915/i915_scheduler_types.h
+++ b/drivers/gpu/drm/i915/i915_scheduler_types.h
@@ -115,7 +115,7 @@ struct i915_sched {
 	 * @queue is only used to transfer requests from the scheduler
 	 * frontend to the back.
 	 */
-	struct rb_root_cached queue;
+	struct i915_priolist_root queue;
 
 	/**
 	 * @tasklet: softirq tasklet for bottom half
diff --git a/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h b/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h
index 3db34d3eea58..946c93441c1f 100644
--- a/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h
+++ b/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h
@@ -25,6 +25,7 @@ selftest(ring, intel_ring_mock_selftests)
 selftest(engine, intel_engine_cs_mock_selftests)
 selftest(timelines, intel_timeline_mock_selftests)
 selftest(requests, i915_request_mock_selftests)
+selftest(scheduler, i915_scheduler_mock_selftests)
 selftest(objects, i915_gem_object_mock_selftests)
 selftest(phys, i915_gem_phys_mock_selftests)
 selftest(dmabuf, i915_gem_dmabuf_mock_selftests)
diff --git a/drivers/gpu/drm/i915/selftests/i915_scheduler.c b/drivers/gpu/drm/i915/selftests/i915_scheduler.c
index f54bdbeaa48b..2bb2d3d07d06 100644
--- a/drivers/gpu/drm/i915/selftests/i915_scheduler.c
+++ b/drivers/gpu/drm/i915/selftests/i915_scheduler.c
@@ -12,6 +12,54 @@
 #include "selftests/igt_spinner.h"
 #include "selftests/i915_random.h"
 
+static int mock_skiplist_levels(void *dummy)
+{
+	struct i915_priolist_root root = {};
+	struct i915_priolist *pl = &root.sentinel;
+	IGT_TIMEOUT(end_time);
+	unsigned long total;
+	int count, lvl;
+
+	total = 0;
+	do {
+		for (count = 0; count < 16384; count++) {
+			lvl = random_level(&root);
+			if (lvl > pl->level) {
+				if (lvl < I915_PRIOLIST_HEIGHT - 1)
+					lvl = ++pl->level;
+				else
+					lvl = I915_PRIOLIST_HEIGHT - 1;
+			}
+
+			pl->next[lvl] = ptr_inc(pl->next[lvl]);
+		}
+		total += count;
+	} while (!__igt_timeout(end_time, NULL));
+
+	pr_info("Total %9lu\n", total);
+	for (lvl = 0; lvl <= pl->level; lvl++) {
+		int x = ilog2((unsigned long)pl->next[lvl]);
+		char row[80];
+
+		memset(row, '*', x);
+		row[x] = '\0';
+
+		pr_info(" [%2d] %9lu %s\n",
+			lvl, (unsigned long)pl->next[lvl], row);
+	}
+
+	return 0;
+}
+
+int i915_scheduler_mock_selftests(void)
+{
+	static const struct i915_subtest tests[] = {
+		SUBTEST(mock_skiplist_levels),
+	};
+
+	return i915_subtests(tests, NULL);
+}
+
 static void scheduling_disable(struct intel_engine_cs *engine)
 {
 	engine->props.preempt_timeout_ms = 0;
@@ -80,9 +128,9 @@ static int all_engines(struct drm_i915_private *i915,
 static bool check_context_order(struct i915_sched *se)
 {
 	u64 last_seqno, last_context;
+	struct i915_priolist *p;
 	unsigned long count;
 	bool result = false;
-	struct rb_node *rb;
 	int last_prio;
 
 	/* We expect the execution order to follow ascending fence-context */
@@ -92,8 +140,7 @@ static bool check_context_order(struct i915_sched *se)
 	last_context = 0;
 	last_seqno = 0;
 	last_prio = 0;
-	for (rb = rb_first_cached(&se->queue); rb; rb = rb_next(rb)) {
-		struct i915_priolist *p = rb_entry(rb, typeof(*p), node);
+	for_each_priolist(p, &se->queue) {
 		struct i915_request *rq;
 
 		priolist_for_each_request(rq, p) {
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 54+ messages in thread

* [Intel-gfx] [PATCH 10/31] drm/i915: Fair low-latency scheduling
  2021-02-08 10:52 [Intel-gfx] [PATCH 01/31] drm/i915/gt: Ratelimit heartbeat completion probing Chris Wilson
                   ` (7 preceding siblings ...)
  2021-02-08 10:52 ` [Intel-gfx] [PATCH 09/31] drm/i915: Replace priolist rbtree with a skiplist Chris Wilson
@ 2021-02-08 10:52 ` Chris Wilson
  2021-02-08 14:56   ` Tvrtko Ursulin
  2021-02-09  9:37   ` Tvrtko Ursulin
  2021-02-08 10:52 ` [Intel-gfx] [PATCH 11/31] drm/i915/gt: Specify a deadline for the heartbeat Chris Wilson
                   ` (24 subsequent siblings)
  33 siblings, 2 replies; 54+ messages in thread
From: Chris Wilson @ 2021-02-08 10:52 UTC (permalink / raw)
  To: intel-gfx; +Cc: Chris Wilson

The first "scheduler" was a topographical sorting of requests into
priority order. The execution order was deterministic, the earliest
submitted, highest priority request would be executed first. Priority
inheritance ensured that inversions were kept at bay, and allowed us to
dynamically boost priorities (e.g. for interactive pageflips).

The minimalistic timeslicing scheme was an attempt to introduce fairness
between long running requests, by evicting the active request at the end
of a timeslice and moving it to the back of its priority queue (while
ensuring that dependencies were kept in order). For short running
requests from many clients of equal priority, the scheme is still very
much FIFO submission ordering, and as unfair as before.

To impose fairness, we need an external metric that ensures that clients
are interspersed, so we don't execute one long chain from client A before
executing any of client B. This could be imposed by the clients
themselves by using fences based on an external clock, that is they only
submit work for a "frame" at frame-intervals, instead of submitting as
much work as they are able to. The standard SwapBuffers approach is akin
to double buffering, where as one frame is being executed, the next is
being submitted, such that there is always a maximum of two frames per
client in the pipeline and so ideally maintains consistent input-output
latency. Even this scheme exhibits unfairness under load as a single
client will execute two frames back to back before the next, and with
enough clients, deadlines will be missed.

The idea introduced by BFS/MuQSS is that fairness is introduced by
metering with an external clock. Every request, when it becomes ready to
execute is assigned a virtual deadline, and execution order is then
determined by earliest deadline. Priority is used as a hint, rather than
strict ordering, where high priority requests have earlier deadlines,
but not necessarily earlier than outstanding work. Thus work is executed
in order of 'readiness', with timeslicing to demote long running work.

The Achille's heel of this scheduler is its strong preference for
low-latency and favouring of new queues. Whereas it was easy to dominate
the old scheduler by flooding it with many requests over a short period
of time, the new scheduler can be dominated by a 'synchronous' client
that waits for each of its requests to complete before submitting the
next. As such a client has no history, it is always considered
ready-to-run and receives an earlier deadline than the long running
requests. This is compensated for by refreshing the current execution's
deadline and by disallowing preemption for timeslice shuffling.

In contrast, one key advantage of disconnecting the sort key from the
priority value is that we can freely adjust the deadline to compensate
for other factors. This is used in conjunction with submitting requests
ahead-of-schedule that then busywait on the GPU using semaphores. Since
we don't want to spend a timeslice busywaiting instead of doing real
work when available, we deprioritise work by giving the semaphore waits
a later virtual deadline. The priority deboost is applied to semaphore
workloads after they miss a semaphore wait and a new context is pending.
The request is then restored to its normal priority once the semaphores
are signaled so that it not unfairly penalised under contention by
remaining at a far future deadline. This is a much improved and cleaner
version of commit f9e9e9de58c7 ("drm/i915: Prioritise non-busywait
semaphore workloads").

To check the impact on throughput (often the downfall of latency
sensitive schedulers), we used gem_wsim to simulate various transcode
workloads with different load balancers, and varying the number of
competing [heterogeneous] clients. On Kabylake gt3e running at fixed
cpu/gpu clocks,

+delta%------------------------------------------------------------------+
|       a                                                                |
|       a                                                                |
|       a                                                                |
|       a                                                                |
|       aa                                                               |
|      aaa                                                               |
|      aaaa                                                              |
|     aaaaaa                                                             |
|     aaaaaa                                                             |
|     aaaaaa   a                a                                        |
| aa  aaaaaa a a      a  a   aa a       a         a       a             a|
||______M__A__________|                                                  |
+------------------------------------------------------------------------+
    N           Min           Max        Median          Avg       Stddev
  108    -4.6326643     47.797855 -0.00069639128     2.116185   7.6764049

Each point is the relative percentage change in gem_wsim's work-per-second
score [using the median result of 120 25s runs, the relative change
computed as (B/A - 1) * 100]; 0 being no change.

Reviewing the same workloads on Tigerlake,

+delta%------------------------------------------------------------------+
|       a                                                                |
|       a                                                                |
|       a                                                                |
|       aa a                                                             |
|       aaaa                                                             |
|       aaaa                                                             |
|    aaaaaaa                                                             |
|    aaaaaaa                                                             |
|    aaaaaaa      a   a   aa  a         a                         a      |
| aaaaaaaaaa a aa a a a aaaa aa   a     a        aa               a     a|
||_______M____A_____________|                                            |
+------------------------------------------------------------------------+
    N           Min           Max        Median          Avg       Stddev
  108     -4.258712      46.83081    0.36853159    4.1415662     9.461689

The expectation is that by deliberately increasing the number of context
switches to improve fairness between clients, throughput will be
diminished. What we do see are small fluctuations around no change, with
the median result being improved throughput. The dramatic improvement is
from reintroducing the improved no-semaphore boosting, which avoids
accidentally preventing scheduling of ready workloads due to busy
spinners (i.e. avoids wasting cycles when there is work to be done).

We expect to see no change in single client workloads such as games,
though running multiple applications on a desktop should have reduced
jitter i.e. smoother input-output latency.

This scheduler is based on MuQSS by Dr Con Kolivas.

v2: More commentary, especially around where we reset the deadlines.

Testcase: igt/gem_exec_fair
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 drivers/gpu/drm/i915/Kconfig.profile          |  62 +++
 drivers/gpu/drm/i915/gt/intel_engine_cs.c     |   2 -
 .../gpu/drm/i915/gt/intel_engine_heartbeat.c  |   1 +
 drivers/gpu/drm/i915/gt/intel_engine_pm.c     |   4 +-
 drivers/gpu/drm/i915/gt/intel_engine_types.h  |  14 -
 drivers/gpu/drm/i915/gt/intel_engine_user.c   |   1 +
 .../drm/i915/gt/intel_execlists_submission.c  | 233 ++++----
 drivers/gpu/drm/i915/gt/selftest_execlists.c  |  30 +-
 drivers/gpu/drm/i915/gt/selftest_hangcheck.c  |   5 +-
 drivers/gpu/drm/i915/gt/selftest_lrc.c        |   1 +
 .../gpu/drm/i915/gt/uc/intel_guc_submission.c |   4 -
 drivers/gpu/drm/i915/i915_priolist_types.h    |   7 +-
 drivers/gpu/drm/i915/i915_request.c           |  19 +-
 drivers/gpu/drm/i915/i915_scheduler.c         | 518 +++++++++++++-----
 drivers/gpu/drm/i915/i915_scheduler.h         |  18 +-
 drivers/gpu/drm/i915/i915_scheduler_types.h   |  39 ++
 drivers/gpu/drm/i915/selftests/i915_request.c |   1 +
 .../gpu/drm/i915/selftests/i915_scheduler.c   | 136 +++++
 include/uapi/drm/i915_drm.h                   |   1 +
 19 files changed, 810 insertions(+), 286 deletions(-)

diff --git a/drivers/gpu/drm/i915/Kconfig.profile b/drivers/gpu/drm/i915/Kconfig.profile
index 35bbe2b80596..f1d009906f71 100644
--- a/drivers/gpu/drm/i915/Kconfig.profile
+++ b/drivers/gpu/drm/i915/Kconfig.profile
@@ -1,3 +1,65 @@
+choice
+	prompt "Preferred scheduler"
+	default DRM_I915_SCHED_VIRTUAL_DEADLINE
+	help
+	  Select the preferred method to decide the order of execution.
+
+	  The scheduler is used for two purposes. First to defer unready
+	  jobs to not block execution of independent ready clients, so
+	  preventing GPU stalls while work waits for other tasks. The second
+	  purpose is to decide which task to run next, as well as decide
+	  if that task should preempt the currently running task, or if
+	  the current task has exceeded its allotment of GPU time and should
+	  be replaced.
+
+	config DRM_I915_SCHED_FIFO
+	bool "FIFO"
+	help
+	  No task reordering, tasks are executed in order of readiness.
+	  First in, first out.
+
+	  Unready tasks do not block execution of other, independent clients.
+	  A client will not be scheduled for execution until all of its
+	  prerequisite work has completed.
+
+	  This disables the scheduler and puts it into a pass-through mode.
+
+	config DRM_I915_SCHED_PRIORITY
+	bool "Priority"
+	help
+	  Strict priority ordering, equal priority tasks are executed
+	  in order of readiness. Clients are liable to starve other clients,
+	  causing uneven execution and excess task latency. High priority
+	  clients will preempt lower priority clients and will run
+	  uninterrupted.
+
+	  Note that interactive desktops will implicitly perform priority
+	  boosting to minimise frame jitter.
+
+	config DRM_I915_SCHED_VIRTUAL_DEADLINE
+	bool "Virtual Deadline"
+	help
+	  A fair scheduler based on MuQSS with priority-hinting.
+
+	  When a task is ready for execution, it is given a quota (from the
+	  engine's timeslice) and a virtual deadline. The virtual deadline is
+	  derived from the current time and the timeslice scaled by the
+	  task's priority. Higher priority tasks are given an earlier
+	  deadline and receive a large portion of the execution bandwidth.
+
+	  Requests are then executed in order of deadline completion.
+	  Requests with earlier deadlines and higher priority than currently
+	  executing on the engine will preempt the active task.
+
+endchoice
+
+config DRM_I915_SCHED
+	int
+	default 2 if DRM_I915_SCHED_VIRTUAL_DEADLINE
+	default 1 if DRM_I915_SCHED_PRIORITY
+	default 0 if DRM_I915_SCHED_FIFO
+	default -1
+
 config DRM_I915_FENCE_TIMEOUT
 	int "Timeout for unsignaled foreign fences (ms, jiffy granularity)"
 	default 10000 # milliseconds
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
index da2447f18daa..7d34bf03670b 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
@@ -579,8 +579,6 @@ void intel_engine_init_execlists(struct intel_engine_cs *engine)
 	memset(execlists->pending, 0, sizeof(execlists->pending));
 	execlists->active =
 		memset(execlists->inflight, 0, sizeof(execlists->inflight));
-
-	execlists->queue_priority_hint = INT_MIN;
 }
 
 static void cleanup_status_page(struct intel_engine_cs *engine)
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c b/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c
index 5ed263f36f93..1d0e7daa6285 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c
@@ -313,6 +313,7 @@ static int __intel_engine_pulse(struct intel_engine_cs *engine)
 	if (IS_ERR(rq))
 		return PTR_ERR(rq);
 
+	rq->sched.deadline = 0;
 	__set_bit(I915_FENCE_FLAG_SENTINEL, &rq->fence.flags);
 
 	heartbeat_commit(rq, &attr);
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pm.c b/drivers/gpu/drm/i915/gt/intel_engine_pm.c
index 27d9d17b35cb..ef5064ea54e5 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_pm.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_pm.c
@@ -211,6 +211,7 @@ static bool switch_to_kernel_context(struct intel_engine_cs *engine)
 	i915_request_add_active_barriers(rq);
 
 	/* Install ourselves as a preemption barrier */
+	rq->sched.deadline = 0;
 	rq->sched.attr.priority = I915_PRIORITY_BARRIER;
 	if (likely(!__i915_request_commit(rq))) { /* engine should be idle! */
 		/*
@@ -271,9 +272,6 @@ static int __engine_park(struct intel_wakeref *wf)
 	intel_engine_park_heartbeat(engine);
 	intel_breadcrumbs_park(engine->breadcrumbs);
 
-	/* Must be reset upon idling, or we may miss the busy wakeup. */
-	GEM_BUG_ON(engine->execlists.queue_priority_hint != INT_MIN);
-
 	if (engine->park)
 		engine->park(engine);
 
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h
index 08bddc5263aa..d1024e8717e1 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h
@@ -223,20 +223,6 @@ struct intel_engine_execlists {
 	 */
 	unsigned int port_mask;
 
-	/**
-	 * @queue_priority_hint: Highest pending priority.
-	 *
-	 * When we add requests into the queue, or adjust the priority of
-	 * executing requests, we compute the maximum priority of those
-	 * pending requests. We can then use this value to determine if
-	 * we need to preempt the executing requests to service the queue.
-	 * However, since the we may have recorded the priority of an inflight
-	 * request we wanted to preempt but since completed, at the time of
-	 * dequeuing the priority hint may no longer may match the highest
-	 * available request priority.
-	 */
-	int queue_priority_hint;
-
 	struct rb_root_cached virtual;
 
 	/**
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_user.c b/drivers/gpu/drm/i915/gt/intel_engine_user.c
index 3fab439ba22b..92632afa52ae 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_user.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_user.c
@@ -102,6 +102,7 @@ static void set_scheduler_caps(struct drm_i915_private *i915)
 #define MAP(x, y) { I915_SCHED_##x, ilog2(I915_SCHEDULER_CAP_##y) }
 		MAP(ACTIVE_BIT, ENABLED),
 		MAP(PRIORITY_BIT, PRIORITY),
+		MAP(DEADLINE_BIT, FAIR),
 		MAP(TIMESLICE_BIT, TIMESLICING),
 #undef MAP
 	};
diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
index 4a0258347c10..e249b1423309 100644
--- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
+++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
@@ -180,7 +180,7 @@ struct virtual_engine {
 	 */
 	struct ve_node {
 		struct rb_node rb;
-		int prio;
+		u64 deadline;
 	} nodes[I915_NUM_ENGINES];
 
 	/*
@@ -256,25 +256,12 @@ static void ring_set_paused(const struct intel_engine_cs *engine, int state)
 
 static int rq_prio(const struct i915_request *rq)
 {
-	return READ_ONCE(rq->sched.attr.priority);
+	return rq->sched.attr.priority;
 }
 
-static int effective_prio(const struct i915_request *rq)
+static u64 rq_deadline(const struct i915_request *rq)
 {
-	int prio = rq_prio(rq);
-
-	/*
-	 * If this request is special and must not be interrupted at any
-	 * cost, so be it. Note we are only checking the most recent request
-	 * in the context and so may be masking an earlier vip request. It
-	 * is hoped that under the conditions where nopreempt is used, this
-	 * will not matter (i.e. all requests to that context will be
-	 * nopreempt for as long as desired).
-	 */
-	if (i915_request_has_nopreempt(rq))
-		prio = I915_PRIORITY_UNPREEMPTABLE;
-
-	return prio;
+	return rq->sched.deadline;
 }
 
 static struct i915_request *first_request(const struct i915_sched *se)
@@ -289,62 +276,62 @@ static struct i915_request *first_request(const struct i915_sched *se)
 					sched.link);
 }
 
-static int queue_prio(const struct i915_sched *se)
+static struct i915_request *first_virtual(const struct intel_engine_cs *engine)
 {
-	struct i915_request *rq;
+	struct rb_node *rb;
 
-	rq = first_request(se);
-	if (!rq)
-		return INT_MIN;
+	rb = rb_first_cached(&engine->execlists.virtual);
+	if (!rb)
+		return NULL;
 
-	return rq_prio(rq);
+	return READ_ONCE(rb_entry(rb,
+				  struct virtual_engine,
+				  nodes[engine->id].rb)->request);
 }
 
-static int virtual_prio(const struct intel_engine_execlists *el)
+static const struct i915_request *
+next_elsp_request(const struct i915_sched *se, const struct i915_request *rq)
 {
-	struct rb_node *rb = rb_first_cached(&el->virtual);
+	if (i915_sched_is_last_request(se, rq))
+		return NULL;
 
-	return rb ? rb_entry(rb, struct ve_node, rb)->prio : INT_MIN;
+	return list_next_entry(rq, sched.link);
 }
 
-static bool need_preempt(struct intel_engine_cs *engine,
+static bool
+dl_before(const struct i915_request *next, const struct i915_request *prev)
+{
+	return !prev || (next && rq_deadline(next) < rq_deadline(prev));
+}
+
+static bool need_preempt(const struct intel_engine_cs *engine,
 			 const struct i915_request *rq)
 {
 	const struct i915_sched *se = &engine->sched;
-	int last_prio;
+	const struct i915_request *first = NULL;
+	const struct i915_request *next;
 
 	if (!i915_sched_use_busywait(se))
 		return false;
 
 	/*
-	 * Check if the current priority hint merits a preemption attempt.
-	 *
-	 * We record the highest value priority we saw during rescheduling
-	 * prior to this dequeue, therefore we know that if it is strictly
-	 * less than the current tail of ESLP[0], we do not need to force
-	 * a preempt-to-idle cycle.
-	 *
-	 * However, the priority hint is a mere hint that we may need to
-	 * preempt. If that hint is stale or we may be trying to preempt
-	 * ourselves, ignore the request.
-	 *
-	 * More naturally we would write
-	 *      prio >= max(0, last);
-	 * except that we wish to prevent triggering preemption at the same
-	 * priority level: the task that is running should remain running
-	 * to preserve FIFO ordering of dependencies.
+	 * If this request is special and must not be interrupted at any
+	 * cost, so be it. Note we are only checking the most recent request
+	 * in the context and so may be masking an earlier vip request. It
+	 * is hoped that under the conditions where nopreempt is used, this
+	 * will not matter (i.e. all requests to that context will be
+	 * nopreempt for as long as desired).
 	 */
-	last_prio = max(effective_prio(rq), I915_PRIORITY_NORMAL - 1);
-	if (engine->execlists.queue_priority_hint <= last_prio)
+	if (i915_request_has_nopreempt(rq))
 		return false;
 
 	/*
 	 * Check against the first request in ELSP[1], it will, thanks to the
 	 * power of PI, be the highest priority of that context.
 	 */
-	if (!list_is_last(&rq->sched.link, &se->requests) &&
-	    rq_prio(list_next_entry(rq, sched.link)) > last_prio)
-		return true;
+	next = next_elsp_request(se, rq);
+	if (dl_before(next, first))
+		first = next;
 
 	/*
 	 * If the inflight context did not trigger the preemption, then maybe
@@ -356,8 +343,31 @@ static bool need_preempt(struct intel_engine_cs *engine,
 	 * ELSP[0] or ELSP[1] as, thanks again to PI, if it was the same
 	 * context, it's priority would not exceed ELSP[0] aka last_prio.
 	 */
-	return max(virtual_prio(&engine->execlists),
-		   queue_prio(se)) > last_prio;
+	next = first_request(se);
+	if (dl_before(next, first))
+		first = next;
+
+	next = first_virtual(engine);
+	if (dl_before(next, first))
+		first = next;
+
+	if (!dl_before(first, rq))
+		return false;
+
+	/*
+	 * While a request may have been queued that has an earlier deadline
+	 * than is currently running, we only allow it to perform an urgent
+	 * preemption if it also has higher priority. The cost of frequently
+	 * switching between contexts is noticeable, so we try to keep
+	 * the deadline shuffling only to timeslice boundaries.
+	 */
+	ENGINE_TRACE(engine,
+		     "preempt for first=%llx:%llu, dl=%llu, prio=%d?\n",
+		     first->fence.context,
+		     first->fence.seqno,
+		     rq_deadline(first),
+		     rq_prio(first));
+	return rq_prio(first) > max(rq_prio(rq), I915_PRIORITY_NORMAL - 1);
 }
 
 __maybe_unused static bool
@@ -374,7 +384,15 @@ assert_priority_queue(const struct i915_request *prev,
 	if (i915_request_is_active(prev))
 		return true;
 
-	return rq_prio(prev) >= rq_prio(next);
+	if (rq_deadline(prev) <= rq_deadline(next))
+		return true;
+
+	ENGINE_TRACE(prev->engine,
+		     "next %llx:%lld dl %lld is before prev %llx:%lld dl %lld\n",
+		     next->fence.context, next->fence.seqno, rq_deadline(next),
+		     prev->fence.context, prev->fence.seqno, rq_deadline(prev));
+
+	return false;
 }
 
 static void
@@ -555,10 +573,25 @@ static void __execlists_schedule_out(struct i915_request * const rq,
 	/*
 	 * If we have just completed this context, the engine may now be
 	 * idle and we want to re-enter powersaving.
+	 *
+	 * If the context is still active, update the deadline on the next
+	 * request as we submitted it much earlier with an estimation based
+	 * on this request and all those before it consuming their whole budget.
+	 * Since the next request is ready but may have a deadline set far in
+	 * the future, we will prefer any new client before executing this
+	 * context again. If the other clients are submitting synchronous
+	 * workloads, each submission appears as a fresh piece of work and ready
+	 * to run; each time they will receive a deadline that is likely earlier
+	 * than the accumulated deadline of this context. So we re-evaluate this
+	 * context's deadline and put it on an equal footing with the
+	 * synchronous clients.
 	 */
-	if (intel_timeline_is_last(ce->timeline, rq) &&
-	    __i915_request_is_complete(rq))
-		intel_engine_add_retire(engine, ce->timeline);
+	if (__i915_request_is_complete(rq)) {
+		if (!intel_timeline_is_last(ce->timeline, rq))
+			i915_request_update_deadline(list_next_entry(rq, link));
+		else
+			intel_engine_add_retire(engine, ce->timeline);
+	}
 
 	ccid = ce->lrc.ccid;
 	ccid >>= GEN11_SW_CTX_ID_SHIFT - 32;
@@ -668,14 +701,14 @@ dump_port(char *buf, int buflen, const char *prefix, struct i915_request *rq)
 	if (!rq)
 		return "";
 
-	snprintf(buf, buflen, "%sccid:%x %llx:%lld%s prio %d",
+	snprintf(buf, buflen, "%sccid:%x %llx:%lld%s dl:%llu",
 		 prefix,
 		 rq->context->lrc.ccid,
 		 rq->fence.context, rq->fence.seqno,
 		 __i915_request_is_complete(rq) ? "!" :
 		 __i915_request_has_started(rq) ? "*" :
 		 "",
-		 rq_prio(rq));
+		 rq_deadline(rq));
 
 	return buf;
 }
@@ -1197,11 +1230,11 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
 	if (last) {
 		if (need_preempt(engine, last)) {
 			ENGINE_TRACE(engine,
-				     "preempting last=%llx:%lld, prio=%d, hint=%d\n",
+				     "preempting last=%llx:%llu, dl=%llu, prio=%d\n",
 				     last->fence.context,
 				     last->fence.seqno,
-				     last->sched.attr.priority,
-				     execlists->queue_priority_hint);
+				     rq_deadline(last),
+				     rq_prio(last));
 			record_preemption(execlists);
 
 			/*
@@ -1223,11 +1256,11 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
 			last = NULL;
 		} else if (timeslice_expired(engine, last)) {
 			ENGINE_TRACE(engine,
-				     "expired:%s last=%llx:%lld, prio=%d, hint=%d, yield?=%s\n",
+				     "expired:%s last=%llx:%llu, deadline=%llu, now=%llu, yield?=%s\n",
 				     yesno(timer_expired(&execlists->timer)),
 				     last->fence.context, last->fence.seqno,
-				     rq_prio(last),
-				     execlists->queue_priority_hint,
+				     rq_deadline(last),
+				     i915_sched_to_ticks(ktime_get()),
 				     yesno(timeslice_yield(execlists, last)));
 
 			/*
@@ -1298,7 +1331,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
 		GEM_BUG_ON(rq->engine != &ve->base);
 		GEM_BUG_ON(rq->context != &ve->context);
 
-		if (unlikely(rq_prio(rq) < queue_prio(se))) {
+		if (!dl_before(rq, first_request(se))) {
 			spin_unlock(&ve->base.sched.lock);
 			break;
 		}
@@ -1310,16 +1343,15 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
 		}
 
 		ENGINE_TRACE(engine,
-			     "virtual rq=%llx:%lld%s, new engine? %s\n",
+			     "virtual rq=%llx:%lld%s, dl %llx, new engine? %s\n",
 			     rq->fence.context,
 			     rq->fence.seqno,
 			     __i915_request_is_complete(rq) ? "!" :
 			     __i915_request_has_started(rq) ? "*" :
 			     "",
+			     rq_deadline(rq),
 			     yesno(engine != ve->siblings[0]));
-
 		WRITE_ONCE(ve->request, NULL);
-		WRITE_ONCE(ve->base.execlists.queue_priority_hint, INT_MIN);
 
 		rb = &ve->nodes[engine->id].rb;
 		rb_erase_cached(rb, &execlists->virtual);
@@ -1407,6 +1439,9 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
 			if (rq->execution_mask != engine->mask)
 				goto done;
 
+			if (unlikely(dl_before(first_virtual(engine), rq)))
+				goto done;
+
 			/*
 			 * If GVT overrides us we only ever submit
 			 * port[0], leaving port[1] empty. Note that we
@@ -1440,24 +1475,6 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
 	}
 done:
 	*port++ = i915_request_get(last);
-
-	/*
-	 * Here be a bit of magic! Or sleight-of-hand, whichever you prefer.
-	 *
-	 * We choose the priority hint such that if we add a request of greater
-	 * priority than this, we kick the submission tasklet to decide on
-	 * the right order of submitting the requests to hardware. We must
-	 * also be prepared to reorder requests as they are in-flight on the
-	 * HW. We derive the priority hint then as the first "hole" in
-	 * the HW submission ports and if there are no available slots,
-	 * the priority of the lowest executing request, i.e. last.
-	 *
-	 * When we do receive a higher priority request ready to run from the
-	 * user, see queue_request(), the priority hint is bumped to that
-	 * request triggering preemption on the next dequeue (or subsequent
-	 * interrupt for secondary ports).
-	 */
-	execlists->queue_priority_hint = pl->priority;
 	spin_unlock(&se->lock);
 
 	/*
@@ -2653,15 +2670,6 @@ static void execlists_reset_rewind(struct intel_engine_cs *engine, bool stalled)
 	rcu_read_unlock();
 }
 
-static void nop_submission_tasklet(struct tasklet_struct *t)
-{
-	struct intel_engine_cs * const engine =
-		from_tasklet(engine, t, sched.tasklet);
-
-	/* The driver is wedged; don't process any more events. */
-	WRITE_ONCE(engine->execlists.queue_priority_hint, INT_MIN);
-}
-
 static void execlists_reset_cancel(struct intel_engine_cs *engine)
 {
 	struct intel_engine_execlists * const execlists = &engine->execlists;
@@ -2710,17 +2718,10 @@ static void execlists_reset_cancel(struct intel_engine_cs *engine)
 				i915_request_put(rq);
 			}
 			i915_request_put(rq);
-
-			ve->base.execlists.queue_priority_hint = INT_MIN;
 		}
 		spin_unlock(&ve->base.sched.lock);
 	}
 
-	execlists->queue_priority_hint = INT_MIN;
-
-	GEM_BUG_ON(__tasklet_is_enabled(&se->tasklet));
-	se->tasklet.callback = nop_submission_tasklet;
-
 	spin_unlock_irqrestore(&se->lock, flags);
 	rcu_read_unlock();
 
@@ -2831,7 +2832,6 @@ static bool can_preempt(struct intel_engine_cs *engine)
 static void execlists_set_default_submission(struct intel_engine_cs *engine)
 {
 	engine->sched.submit_request = i915_request_enqueue;
-	engine->sched.tasklet.callback = execlists_submission_tasklet;
 }
 
 static void execlists_shutdown(struct intel_engine_cs *engine)
@@ -2957,7 +2957,7 @@ static void init_execlists(struct intel_engine_cs *engine)
 	engine->sched.show = execlists_show;
 	tasklet_setup(&engine->sched.tasklet, execlists_submission_tasklet);
 
-	i915_sched_select_mode(&engine->sched, I915_SCHED_MODE_PRIORITY);
+	i915_sched_select_mode(&engine->sched, I915_SCHED_MODE_DEADLINE);
 
 	if (IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION) &&
 	    intel_engine_has_preemption(engine))
@@ -3193,7 +3193,8 @@ static const struct intel_context_ops virtual_context_ops = {
 	.destroy = virtual_context_destroy,
 };
 
-static intel_engine_mask_t virtual_submission_mask(struct virtual_engine *ve)
+static intel_engine_mask_t
+virtual_submission_mask(struct virtual_engine *ve, u64 *deadline)
 {
 	struct i915_request *rq;
 	intel_engine_mask_t mask;
@@ -3210,9 +3211,11 @@ static intel_engine_mask_t virtual_submission_mask(struct virtual_engine *ve)
 		mask = ve->siblings[0]->mask;
 	}
 
-	ENGINE_TRACE(&ve->base, "rq=%llx:%lld, mask=%x, prio=%d\n",
+	*deadline = rq_deadline(rq);
+
+	ENGINE_TRACE(&ve->base, "rq=%llx:%llu, mask=%x, dl=%llu\n",
 		     rq->fence.context, rq->fence.seqno,
-		     mask, ve->base.execlists.queue_priority_hint);
+		     mask, *deadline);
 
 	return mask;
 }
@@ -3221,12 +3224,12 @@ static void virtual_submission_tasklet(struct tasklet_struct *t)
 {
 	struct virtual_engine * const ve =
 		from_tasklet(ve, t, base.sched.tasklet);
-	const int prio = READ_ONCE(ve->base.execlists.queue_priority_hint);
 	intel_engine_mask_t mask;
 	unsigned int n;
+	u64 deadline;
 
 	rcu_read_lock();
-	mask = virtual_submission_mask(ve);
+	mask = virtual_submission_mask(ve, &deadline);
 	rcu_read_unlock();
 	if (unlikely(!mask))
 		return;
@@ -3260,7 +3263,8 @@ static void virtual_submission_tasklet(struct tasklet_struct *t)
 			 */
 			first = rb_first_cached(&sibling->execlists.virtual) ==
 				&node->rb;
-			if (prio == node->prio || (prio > node->prio && first))
+			if (deadline == node->deadline ||
+			    (deadline < node->deadline && first))
 				goto submit_engine;
 
 			rb_erase_cached(&node->rb, &sibling->execlists.virtual);
@@ -3274,7 +3278,7 @@ static void virtual_submission_tasklet(struct tasklet_struct *t)
 
 			rb = *parent;
 			other = rb_entry(rb, typeof(*other), rb);
-			if (prio > other->prio) {
+			if (deadline < other->deadline) {
 				parent = &rb->rb_left;
 			} else {
 				parent = &rb->rb_right;
@@ -3289,8 +3293,8 @@ static void virtual_submission_tasklet(struct tasklet_struct *t)
 
 submit_engine:
 		GEM_BUG_ON(RB_EMPTY_NODE(&node->rb));
-		node->prio = prio;
-		if (first && prio > sibling->execlists.queue_priority_hint)
+		node->deadline = deadline;
+		if (first)
 			i915_sched_kick(se);
 
 unlock_engine:
@@ -3327,7 +3331,9 @@ static void virtual_submit_request(struct i915_request *rq)
 		i915_request_put(ve->request);
 	}
 
-	ve->base.execlists.queue_priority_hint = rq_prio(rq);
+	rq->sched.deadline =
+		min(rq->sched.deadline,
+		    i915_scheduler_next_virtual_deadline(rq_prio(rq)));
 	ve->request = i915_request_get(rq);
 
 	GEM_BUG_ON(!list_empty(virtual_queue(ve)));
@@ -3429,7 +3435,6 @@ intel_execlists_create_virtual(struct intel_engine_cs **siblings,
 	ve->base.bond_execute = virtual_bond_execute;
 
 	INIT_LIST_HEAD(virtual_queue(ve));
-	ve->base.execlists.queue_priority_hint = INT_MIN;
 
 	intel_context_init(&ve->context, &ve->base);
 
diff --git a/drivers/gpu/drm/i915/gt/selftest_execlists.c b/drivers/gpu/drm/i915/gt/selftest_execlists.c
index be99fbd7cfab..112a09aa0d8d 100644
--- a/drivers/gpu/drm/i915/gt/selftest_execlists.c
+++ b/drivers/gpu/drm/i915/gt/selftest_execlists.c
@@ -868,7 +868,7 @@ semaphore_queue(struct intel_engine_cs *engine, struct i915_vma *vma, int idx)
 static int
 release_queue(struct intel_engine_cs *engine,
 	      struct i915_vma *vma,
-	      int idx, int prio)
+	      int idx, u64 deadline)
 {
 	struct i915_request *rq;
 	u32 *cs;
@@ -893,10 +893,7 @@ release_queue(struct intel_engine_cs *engine,
 	i915_request_get(rq);
 	i915_request_add(rq);
 
-	local_bh_disable();
-	i915_request_set_priority(rq, prio);
-	local_bh_enable(); /* kick tasklet */
-
+	i915_request_set_deadline(rq, deadline);
 	i915_request_put(rq);
 
 	return 0;
@@ -910,6 +907,7 @@ slice_semaphore_queue(struct intel_engine_cs *outer,
 	struct intel_engine_cs *engine;
 	struct i915_request *head;
 	enum intel_engine_id id;
+	long timeout;
 	int err, i, n = 0;
 
 	head = semaphore_queue(outer, vma, n++);
@@ -933,12 +931,16 @@ slice_semaphore_queue(struct intel_engine_cs *outer,
 		}
 	}
 
-	err = release_queue(outer, vma, n, I915_PRIORITY_BARRIER);
+	err = release_queue(outer, vma, n, 0);
 	if (err)
 		goto out;
 
-	if (i915_request_wait(head, 0,
-			      2 * outer->gt->info.num_engines * (count + 2) * (count + 3)) < 0) {
+	/* Expected number of pessimal slices required */
+	timeout = outer->gt->info.num_engines * (count + 2) * (count + 3);
+	timeout *= 4; /* safety factor, including bucketing */
+	timeout += HZ / 2; /* and include the request completion */
+
+	if (i915_request_wait(head, 0, timeout) < 0) {
 		pr_err("%s: Failed to slice along semaphore chain of length (%d, %d)!\n",
 		       outer->name, count, n);
 		GEM_TRACE_DUMP();
@@ -1043,6 +1045,8 @@ create_rewinder(struct intel_context *ce,
 		err = i915_request_await_dma_fence(rq, &wait->fence);
 		if (err)
 			goto err;
+
+		i915_request_set_deadline(rq, rq_deadline(wait));
 	}
 
 	cs = intel_ring_begin(rq, 14);
@@ -1319,6 +1323,7 @@ static int live_timeslice_queue(void *arg)
 			goto err_heartbeat;
 		}
 		i915_request_set_priority(rq, I915_PRIORITY_MAX);
+		i915_request_set_deadline(rq, 0);
 		err = wait_for_submit(engine, rq, HZ / 2);
 		if (err) {
 			pr_err("%s: Timed out trying to submit semaphores\n",
@@ -1341,10 +1346,9 @@ static int live_timeslice_queue(void *arg)
 		}
 
 		GEM_BUG_ON(i915_request_completed(rq));
-		GEM_BUG_ON(execlists_active(&engine->execlists) != rq);
 
 		/* Queue: semaphore signal, matching priority as semaphore */
-		err = release_queue(engine, vma, 1, effective_prio(rq));
+		err = release_queue(engine, vma, 1, rq_deadline(rq));
 		if (err)
 			goto err_rq;
 
@@ -1455,6 +1459,7 @@ static int live_timeslice_nopreempt(void *arg)
 			goto out_spin;
 		}
 
+		rq->sched.deadline = 0;
 		rq->sched.attr.priority = I915_PRIORITY_BARRIER;
 		i915_request_get(rq);
 		i915_request_add(rq);
@@ -1818,6 +1823,7 @@ static int live_late_preempt(void *arg)
 
 	/* Make sure ctx_lo stays before ctx_hi until we trigger preemption. */
 	ctx_lo->sched.priority = 1;
+	ctx_hi->sched.priority = I915_PRIORITY_MIN;
 
 	for_each_engine(engine, gt, id) {
 		struct igt_live_test t;
@@ -2985,6 +2991,9 @@ static int live_preempt_gang(void *arg)
 		while (rq) { /* wait for each rq from highest to lowest prio */
 			struct i915_request *n = list_next_entry(rq, mock.link);
 
+			/* With deadlines, no strict priority ordering */
+			i915_request_set_deadline(rq, 0);
+
 			if (err == 0 && i915_request_wait(rq, 0, HZ / 5) < 0) {
 				struct drm_printer p =
 					drm_info_printer(engine->i915->drm.dev);
@@ -3207,6 +3216,7 @@ static int preempt_user(struct intel_engine_cs *engine,
 	i915_request_add(rq);
 
 	i915_request_set_priority(rq, I915_PRIORITY_MAX);
+	i915_request_set_deadline(rq, 0);
 
 	if (i915_request_wait(rq, 0, HZ / 2) < 0)
 		err = -ETIME;
diff --git a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c
index cdb0ceff3be1..5323fd56efd6 100644
--- a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c
+++ b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c
@@ -1010,7 +1010,10 @@ static int __igt_reset_engines(struct intel_gt *gt,
 					break;
 				}
 
-				if (i915_request_wait(rq, 0, HZ / 5) < 0) {
+				/* With deadlines, no strict priority */
+				i915_request_set_deadline(rq, 0);
+
+				if (i915_request_wait(rq, 0, HZ / 2) < 0) {
 					struct drm_printer p =
 						drm_info_printer(gt->i915->drm.dev);
 
diff --git a/drivers/gpu/drm/i915/gt/selftest_lrc.c b/drivers/gpu/drm/i915/gt/selftest_lrc.c
index 6d73add47109..b7dd5646c882 100644
--- a/drivers/gpu/drm/i915/gt/selftest_lrc.c
+++ b/drivers/gpu/drm/i915/gt/selftest_lrc.c
@@ -1257,6 +1257,7 @@ poison_registers(struct intel_context *ce,
 
 	intel_ring_advance(rq, cs);
 
+	rq->sched.deadline = 0;
 	rq->sched.attr.priority = I915_PRIORITY_BARRIER;
 err_rq:
 	i915_request_add(rq);
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
index c16393df42a0..79205e9a84ba 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
@@ -216,7 +216,6 @@ static void __guc_dequeue(struct intel_engine_cs *engine)
 		last = rq;
 	}
 done:
-	execlists->queue_priority_hint = pl->priority;
 	if (submit) {
 		*port = schedule_in(last, port - execlists->inflight);
 		*++port = NULL;
@@ -322,7 +321,6 @@ static void guc_reset_rewind(struct intel_engine_cs *engine, bool stalled)
 
 static void guc_reset_cancel(struct intel_engine_cs *engine)
 {
-	struct intel_engine_execlists * const execlists = &engine->execlists;
 	struct i915_sched *se = intel_engine_get_scheduler(engine);
 	unsigned long flags;
 
@@ -346,8 +344,6 @@ static void guc_reset_cancel(struct intel_engine_cs *engine)
 
 	__i915_sched_cancel_queue(se);
 
-	execlists->queue_priority_hint = INT_MIN;
-
 	spin_unlock_irqrestore(&se->lock, flags);
 	intel_engine_signal_breadcrumbs(engine);
 }
diff --git a/drivers/gpu/drm/i915/i915_priolist_types.h b/drivers/gpu/drm/i915/i915_priolist_types.h
index ee7482b9c813..542b47078104 100644
--- a/drivers/gpu/drm/i915/i915_priolist_types.h
+++ b/drivers/gpu/drm/i915/i915_priolist_types.h
@@ -22,6 +22,8 @@ enum {
 
 	/* Interactive workload, scheduled for immediate pageflipping */
 	I915_PRIORITY_DISPLAY,
+
+	__I915_PRIORITY_KERNEL__
 };
 
 /* Smallest priority value that cannot be bumped. */
@@ -35,8 +37,7 @@ enum {
  * i.e. nothing can have higher priority and force us to usurp the
  * active request.
  */
-#define I915_PRIORITY_UNPREEMPTABLE INT_MAX
-#define I915_PRIORITY_BARRIER (I915_PRIORITY_UNPREEMPTABLE - 1)
+#define I915_PRIORITY_BARRIER INT_MAX
 
 /*
  * The slab returns power-of-two chunks of memory, so fill out the
@@ -82,7 +83,7 @@ enum {
  */
 struct i915_priolist {
 	struct list_head requests;
-	int priority;
+	u64 deadline;
 
 	int level;
 	struct i915_priolist *next[I915_PRIOLIST_HEIGHT];
diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
index e7b4c4bc41a6..ce828dc73402 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -467,7 +467,7 @@ bool __i915_request_submit(struct i915_request *request)
 	struct i915_sched *se = intel_engine_get_scheduler(engine);
 	bool result = false;
 
-	RQ_TRACE(request, "\n");
+	RQ_TRACE(request, "dl %llu\n", request->sched.deadline);
 
 	lockdep_assert_held(&se->lock);
 
@@ -650,6 +650,12 @@ semaphore_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state)
 
 	switch (state) {
 	case FENCE_COMPLETE:
+		/*
+		 * The request is now ready to run; re-evaluate its deadline
+		 * to remove the semaphore deprioritisation and to assign
+		 * a deadline relative to its point-of-readiness [now].
+		 */
+		i915_request_update_deadline(rq);
 		break;
 
 	case FENCE_FREE:
@@ -1810,14 +1816,15 @@ long i915_request_wait(struct i915_request *rq,
 	return timeout;
 }
 
-static int print_sched_attr(const struct i915_sched_attr *attr,
-			    char *buf, int x, int len)
+static int print_sched(const struct i915_sched_node *node,
+		       char *buf, int x, int len)
 {
-	if (attr->priority == I915_PRIORITY_INVALID)
+	if (node->attr.priority == I915_PRIORITY_INVALID)
 		return x;
 
 	x += snprintf(buf + x, len - x,
-		      " prio=%d", attr->priority);
+		      " prio=%d, dl=%llu",
+		      node->attr.priority, node->deadline);
 
 	return x;
 }
@@ -1903,7 +1910,7 @@ void i915_request_show(struct drm_printer *m,
 	 *    - the request has been temporarily suspended from execution
 	 */
 
-	x = print_sched_attr(&rq->sched.attr, buf, x, sizeof(buf));
+	x = print_sched(&rq->sched, buf, x, sizeof(buf));
 
 	drm_printf(m, "%s%.*s%c %llx:%lld%s%s %s @ %dms: %s\n",
 		   prefix, indent, "                ",
diff --git a/drivers/gpu/drm/i915/i915_scheduler.c b/drivers/gpu/drm/i915/i915_scheduler.c
index 518eac67959e..1d77ece46241 100644
--- a/drivers/gpu/drm/i915/i915_scheduler.c
+++ b/drivers/gpu/drm/i915/i915_scheduler.c
@@ -54,6 +54,11 @@ static void node_put(struct i915_sched_node *node)
 	i915_request_put(container_of(node, struct i915_request, sched));
 }
 
+static inline u64 rq_deadline(const struct i915_request *rq)
+{
+	return READ_ONCE(rq->sched.deadline);
+}
+
 static inline int rq_prio(const struct i915_request *rq)
 {
 	return READ_ONCE(rq->sched.attr.priority);
@@ -67,6 +72,14 @@ static int ipi_get_prio(struct i915_request *rq)
 	return xchg(&rq->sched.ipi_priority, I915_PRIORITY_INVALID);
 }
 
+static u64 ipi_get_deadline(struct i915_request *rq)
+{
+	if (READ_ONCE(rq->sched.ipi_deadline) == I915_DEADLINE_NEVER)
+		return I915_DEADLINE_NEVER;
+
+	return xchg64(&rq->sched.ipi_deadline, I915_DEADLINE_NEVER);
+}
+
 static void ipi_schedule(struct work_struct *wrk)
 {
 	struct i915_sched_ipi *ipi = container_of(wrk, typeof(*ipi), work);
@@ -74,9 +87,11 @@ static void ipi_schedule(struct work_struct *wrk)
 
 	do {
 		struct i915_request *rn = xchg(&rq->sched.ipi_link, NULL);
+		u64 deadline;
 		int prio;
 
 		prio = ipi_get_prio(rq);
+		deadline = ipi_get_deadline(rq);
 
 		/*
 		 * For cross-engine scheduling to work we rely on one of two
@@ -101,6 +116,7 @@ static void ipi_schedule(struct work_struct *wrk)
 		 */
 		local_bh_disable();
 		i915_request_set_priority(rq, prio);
+		i915_request_set_deadline(rq, deadline);
 		local_bh_enable();
 
 		i915_request_put(rq);
@@ -158,7 +174,10 @@ i915_sched_default_revoke_context(struct intel_context *ce,
 
 void i915_sched_select_mode(struct i915_sched *se, enum i915_sched_mode mode)
 {
-	switch (mode) {
+	switch (min_t(int, mode, CONFIG_DRM_I915_SCHED)) {
+	case I915_SCHED_MODE_DEADLINE:
+		__set_bit(I915_SCHED_DEADLINE_BIT, &se->flags);
+		fallthrough;
 	case I915_SCHED_MODE_PRIORITY:
 		__set_bit(I915_SCHED_PRIORITY_BIT, &se->flags);
 		fallthrough;
@@ -175,8 +194,8 @@ static void init_priolist(struct i915_priolist_root *const root)
 	struct i915_priolist *pl = &root->sentinel;
 
 	memset_p((void **)pl->next, pl, ARRAY_SIZE(pl->next));
+	pl->deadline = I915_DEADLINE_NEVER;
 	pl->requests.prev = NULL;
-	pl->priority = INT_MIN;
 	pl->level = -1;
 }
 
@@ -339,19 +358,20 @@ static inline unsigned int random_level(struct i915_priolist_root *root)
 }
 
 static struct list_head *
-lookup_priolist(struct i915_sched *se, int prio)
+lookup_priolist(struct i915_sched * const se, u64 deadline)
 {
 	struct i915_priolist *update[I915_PRIOLIST_HEIGHT];
 	struct i915_priolist_root *const root = &se->queue;
 	struct i915_priolist *pl, *tmp;
 	int lvl;
 
+	GEM_BUG_ON(deadline == I915_DEADLINE_NEVER);
 	lockdep_assert_held(&se->lock);
 	if (unlikely(se->no_priolist))
-		prio = I915_PRIORITY_NORMAL;
+		deadline = 0;
 
 	for_each_priolist(pl, root) { /* recycle any empty elements before us */
-		if (pl->priority <= prio || !list_empty(&pl->requests))
+		if (pl->deadline >= deadline || !list_empty(&pl->requests))
 			break;
 
 		__i915_sched_dequeue_next(se);
@@ -361,14 +381,14 @@ lookup_priolist(struct i915_sched *se, int prio)
 	pl = &root->sentinel;
 	lvl = pl->level;
 	while (lvl >= 0) {
-		while (tmp = pl->next[lvl], tmp->priority >= prio)
+		while (tmp = pl->next[lvl], tmp->deadline <= deadline)
 			pl = tmp;
-		if (pl->priority == prio)
+		if (pl->deadline == deadline)
 			goto out;
 		update[lvl--] = pl;
 	}
 
-	if (prio == I915_PRIORITY_NORMAL) {
+	if (!deadline) {
 		pl = &se->default_priolist;
 	} else if (!pl_empty(&root->sentinel.requests)) {
 		pl = pl_pop(&root->sentinel.requests);
@@ -376,7 +396,7 @@ lookup_priolist(struct i915_sched *se, int prio)
 		pl = kmem_cache_alloc(global.slab_priorities, GFP_ATOMIC);
 		/* Convert an allocation failure to a priority bump */
 		if (unlikely(!pl)) {
-			prio = I915_PRIORITY_NORMAL; /* recurses just once */
+			deadline = 0; /* recurses just once */
 
 			/*
 			 * To maintain ordering with all rendering, after an
@@ -392,7 +412,7 @@ lookup_priolist(struct i915_sched *se, int prio)
 		}
 	}
 
-	pl->priority = prio;
+	pl->deadline = deadline;
 	INIT_LIST_HEAD(&pl->requests);
 
 	lvl = random_level(root);
@@ -420,7 +440,7 @@ lookup_priolist(struct i915_sched *se, int prio)
 		chk = &root->sentinel;
 		lvl = chk->level;
 		do {
-			while (tmp = chk->next[lvl], tmp->priority >= prio)
+			while (tmp = chk->next[lvl], tmp->deadline <= deadline)
 				chk = tmp;
 		} while (--lvl >= 0);
 
@@ -438,7 +458,7 @@ static void __remove_priolist(struct i915_sched *se, struct list_head *plist)
 	struct i915_priolist *pl, *tmp;
 	struct i915_priolist *old =
 		container_of(plist, struct i915_priolist, requests);
-	int prio = old->priority;
+	u64 deadline = old->deadline;
 	int lvl;
 
 	lockdep_assert_held(&se->lock);
@@ -448,11 +468,11 @@ static void __remove_priolist(struct i915_sched *se, struct list_head *plist)
 	lvl = pl->level;
 	GEM_BUG_ON(lvl < 0);
 
-	if (prio != I915_PRIORITY_NORMAL)
+	if (deadline)
 		pl_push(old, &pl->requests);
 
 	do {
-		while (tmp = pl->next[lvl], tmp->priority > prio)
+		while (tmp = pl->next[lvl], tmp->deadline < deadline)
 			pl = tmp;
 		if (lvl <= old->level) {
 			pl->next[lvl] = old->next[lvl];
@@ -495,7 +515,7 @@ struct i915_priolist *__i915_sched_dequeue_next(struct i915_sched *se)
 	GEM_BUG_ON(pl == s);
 
 	/* Keep pl->next[0] valid for for_each_priolist iteration */
-	if (pl->priority != I915_PRIORITY_NORMAL)
+	if (pl->deadline)
 		pl_push(pl, &s->requests);
 
 	lvl = pl->level;
@@ -531,52 +551,267 @@ stack_pop(struct i915_request *rq,
 	return rq;
 }
 
-static inline bool need_preempt(int prio, int active)
+static void ipi_deadline(struct i915_request *rq, u64 deadline)
 {
-	/*
-	 * Allow preemption of low -> normal -> high, but we do
-	 * not allow low priority tasks to preempt other low priority
-	 * tasks under the impression that latency for low priority
-	 * tasks does not matter (as much as background throughput),
-	 * so kiss.
-	 */
-	return prio >= max(I915_PRIORITY_NORMAL, active);
+	u64 old = READ_ONCE(rq->sched.ipi_deadline);
+
+	do {
+		if (deadline >= old)
+			return;
+	} while (!try_cmpxchg64(&rq->sched.ipi_deadline, &old, deadline));
+
+	__ipi_add(rq);
 }
 
-static void kick_submission(struct intel_engine_cs *engine,
-			    const struct i915_request *rq,
-			    int prio)
+static bool is_first_priolist(const struct i915_sched *se,
+			      const struct list_head *requests)
 {
-	const struct i915_request *inflight;
+	return requests == &se->queue.sentinel.next[0]->requests;
+}
+
+static bool
+__i915_request_set_deadline(struct i915_sched * const se,
+			    struct i915_request *rq,
+			    u64 deadline)
+{
+	struct intel_engine_cs *engine = rq->engine;
+	struct list_head *pos = &rq->sched.signalers_list;
+	struct list_head *plist;
+
+	if (unlikely(!i915_request_in_priority_queue(rq))) {
+		rq->sched.deadline = deadline;
+		return false;
+	}
+
+	/* Fifo and depth-first replacement ensure our deps execute first */
+	plist = lookup_priolist(se, deadline);
+
+	rq->sched.dfs.prev = NULL;
+	do {
+		if (i915_sched_has_deadlines(se)) {
+			list_for_each_continue(pos, &rq->sched.signalers_list) {
+				struct i915_dependency *p =
+					list_entry(pos, typeof(*p), signal_link);
+				struct i915_request *s =
+					container_of(p->signaler, typeof(*s), sched);
+
+				if (rq_deadline(s) <= deadline)
+					continue;
+
+				if (__i915_request_is_complete(s))
+					continue;
+
+				if (s->engine != engine) {
+					ipi_deadline(s, deadline);
+					continue;
+				}
+
+				/* Remember our position along this branch */
+				rq = stack_push(s, rq, pos);
+				pos = &rq->sched.signalers_list;
+			}
+		}
+
+		RQ_TRACE(rq, "set-deadline:%llu\n", deadline);
+		WRITE_ONCE(rq->sched.deadline, deadline);
+
+		/*
+		 * Once the request is ready, it will be placed into the
+		 * priority lists and then onto the HW runlist. Before the
+		 * request is ready, it does not contribute to our preemption
+		 * decisions and we can safely ignore it, as it will, and
+		 * any preemption required, be dealt with upon submission.
+		 * See engine->submit_request()
+		 */
+		GEM_BUG_ON(i915_request_get_scheduler(rq) != se);
+		if (i915_request_in_priority_queue(rq))
+			remove_from_priolist(se, rq, plist, true);
+	} while ((rq = stack_pop(rq, &pos)));
+
+	return is_first_priolist(se, plist);
+}
+
+void i915_request_set_deadline(struct i915_request *rq, u64 deadline)
+{
+	struct intel_engine_cs *engine;
+	unsigned long flags;
+
+	if (deadline >= rq_deadline(rq))
+		return;
+
+	engine = lock_engine_irqsave(rq, flags);
+	if (!i915_sched_has_deadlines(&engine->sched))
+		goto unlock;
+
+	if (deadline >= rq_deadline(rq))
+		goto unlock;
+
+	if (__i915_request_is_complete(rq))
+		goto unlock;
+
+	rcu_read_lock();
+	if (__i915_request_set_deadline(&engine->sched, rq, deadline))
+		i915_sched_kick(&engine->sched);
+	rcu_read_unlock();
+	GEM_BUG_ON(rq_deadline(rq) != deadline);
+
+unlock:
+	spin_unlock_irqrestore(&engine->sched.lock, flags);
+}
+
+static u64 prio_slice(int prio)
+{
+	u64 slice;
+	int sf;
 
 	/*
-	 * We only need to kick the tasklet once for the high priority
-	 * new context we add into the queue.
+	 * This is the central heuristic to the virtual deadlines. By
+	 * imposing that each task takes an equal amount of time, we
+	 * let each client have an equal slice of the GPU time. By
+	 * bringing the virtual deadline forward, that client will then
+	 * have more GPU time, and vice versa a lower priority client will
+	 * have a later deadline and receive less GPU time.
+	 *
+	 * In BFS/MuQSS, the prio_ratios[] are based on the task nice range of
+	 * [-20, 20], with each lower priority having a ~10% longer deadline,
+	 * with the note that the proportion of CPU time between two clients
+	 * of different priority will be the square of the relative prio_slice.
+	 *
+	 * This property that the budget of each client is proprotional to
+	 * the relative priority, and that the scheduler fairly distributes
+	 * work according to that budget, opens up a very powerful tool
+	 * for managing clients.
+	 *
+	 * In contrast, this prio_slice() curve was chosen because it gave good
+	 * results with igt/gem_exec_schedule. It may not be the best choice!
+	 *
+	 * With a 1ms scheduling quantum:
+	 *
+	 *   MAX USER:  ~32us deadline
+	 *   0:         ~16ms deadline
+	 *   MIN_USER: 1000ms deadline
 	 */
-	if (prio <= engine->execlists.queue_priority_hint)
-		return;
 
-	/* Nothing currently active? We're overdue for a submission! */
-	inflight = execlists_active(&engine->execlists);
-	if (!inflight)
-		return;
+	if (prio >= __I915_PRIORITY_KERNEL__)
+		return INT_MAX - prio;
+
+	slice = __I915_PRIORITY_KERNEL__ - prio;
+	if (prio >= 0)
+		sf = 20 - 6;
+	else
+		sf = 20 - 1;
+
+	return slice << sf;
+}
+
+static u64 virtual_deadline(u64 kt, int priority)
+{
+	return i915_sched_to_ticks(kt + prio_slice(priority));
+}
+
+u64 i915_scheduler_next_virtual_deadline(int priority)
+{
+	return virtual_deadline(ktime_get_mono_fast_ns(), priority);
+}
+
+static u64 signal_deadline(const struct i915_request *rq)
+{
+	u64 last = ktime_get_mono_fast_ns();
+	const struct i915_dependency *p;
 
 	/*
-	 * If we are already the currently executing context, don't
-	 * bother evaluating if we should preempt ourselves.
+	 * Find the earliest point at which we will become 'ready',
+	 * which we infer from the deadline of all active signalers.
+	 * We will position ourselves at the end of that chain of work.
 	 */
-	if (inflight->context == rq->context)
-		return;
 
-	SCHED_TRACE(&engine->sched,
-		    "bumping queue-priority-hint:%d for rq:" RQ_FMT ", inflight:" RQ_FMT " prio %d\n",
-		    prio,
-		    RQ_ARG(rq), RQ_ARG(inflight),
-		    inflight->sched.attr.priority);
+	rcu_read_lock();
+	for_each_signaler(p, rq) {
+		const struct i915_request *s =
+			container_of(p->signaler, typeof(*s), sched);
+		u64 deadline;
+		int prio;
 
-	engine->execlists.queue_priority_hint = prio;
-	if (need_preempt(prio, rq_prio(inflight)))
-		intel_engine_kick_scheduler(engine);
+		if (__i915_request_is_complete(s))
+			continue;
+
+		if (s->timeline == rq->timeline &&
+		    __i915_request_has_started(s))
+			continue;
+
+		prio = rq_prio(s);
+		if (prio < rq_prio(rq))
+			continue;
+
+		deadline = rq_deadline(s);
+		if (deadline == I915_DEADLINE_NEVER) /* retired & reused */
+			continue;
+
+		if (s->context == rq->context) /* break ties in favour of hot */
+			deadline--;
+
+		deadline = i915_sched_to_ns(deadline);
+		if (p->flags & I915_DEPENDENCY_WEAK)
+			deadline -= prio_slice(prio);
+
+		last = max(last, deadline);
+	}
+	rcu_read_unlock();
+
+	return last;
+}
+
+static int adj_prio(const struct i915_request *rq)
+{
+	int prio = rq_prio(rq);
+
+	/*
+	 * Deprioritize semaphore waiters. We only want to run these if there
+	 * is nothing ready to run first.
+	 *
+	 * Note by giving a more distant deadline (due to a lower priority)
+	 * we do not prevent them from having a slice of the GPU, and if there
+	 * is still contention at that point, we expect to immediately yield
+	 * on the semaphore.
+	 *
+	 * When all semaphores are signaled, we will update the request
+	 * to remove the semaphore penalty.
+	 */
+	if (!i915_sw_fence_signaled(&rq->semaphore))
+		prio -= __I915_PRIORITY_KERNEL__;
+
+	return prio;
+}
+
+static u64
+earliest_deadline(const struct i915_sched *se, const struct i915_request *rq)
+{
+	/*
+	 * At its heart, the scheduler is simply a topological sort into
+	 * a linear sequence of requests. As we use a single ascending index,
+	 * we can repurpose the sort to achieve different goals, or to disable
+	 * the sort entirely and funnel all requests onto a single list for
+	 * immediate extraction.
+	 */
+	if (i915_sched_has_deadlines(se))
+		return virtual_deadline(signal_deadline(rq), rq_prio(rq));
+	else if (i915_sched_has_priorities(se))
+		return INT_MAX - rq_prio(rq);
+	else
+		return 0;
+}
+
+static bool
+set_earliest_deadline(struct i915_sched *se, struct i915_request *rq, u64 old)
+{
+	u64 dl;
+
+	/* Recompute our deadlines and promote after a priority change */
+	dl = min(earliest_deadline(se, rq), rq_deadline(rq));
+	if (dl >= old)
+		return false;
+
+	return __i915_request_set_deadline(se, rq, dl);
 }
 
 static void ipi_priority(struct i915_request *rq, int prio)
@@ -591,17 +826,16 @@ static void ipi_priority(struct i915_request *rq, int prio)
 	__ipi_add(rq);
 }
 
-static void __i915_request_set_priority(struct i915_request *rq, int prio)
+static bool
+__i915_request_set_priority(struct i915_sched * const se,
+			    struct i915_request *rq,
+			    int prio)
 {
 	struct intel_engine_cs *engine = rq->engine;
-	struct i915_sched *se = intel_engine_get_scheduler(engine);
 	struct list_head *pos = &rq->sched.signalers_list;
-	struct list_head *plist;
+	bool kick = false;
 
-	SCHED_TRACE(&engine->sched, "PI for " RQ_FMT ", prio:%d\n",
-		    RQ_ARG(rq), prio);
-
-	plist = lookup_priolist(se, prio);
+	SCHED_TRACE(se, "PI for " RQ_FMT ", prio:%d\n", RQ_ARG(rq), prio);
 
 	/*
 	 * Recursively bump all dependent priorities to match the new request.
@@ -623,31 +857,37 @@ static void __i915_request_set_priority(struct i915_request *rq, int prio)
 	 */
 	rq->sched.dfs.prev = NULL;
 	do {
-		list_for_each_continue(pos, &rq->sched.signalers_list) {
-			struct i915_dependency *p =
-				list_entry(pos, typeof(*p), signal_link);
-			struct i915_request *s =
-				container_of(p->signaler, typeof(*s), sched);
+		struct i915_request *next;
 
-			if (rq_prio(s) >= prio)
-				continue;
+		if (i915_sched_has_priorities(i915_request_get_scheduler(rq))) {
+			list_for_each_continue(pos, &rq->sched.signalers_list) {
+				struct i915_dependency *p =
+					list_entry(pos, typeof(*p), signal_link);
+				struct i915_request *s =
+					container_of(p->signaler, typeof(*s), sched);
 
-			if (__i915_request_is_complete(s))
-				continue;
+				if (rq_prio(s) >= prio)
+					continue;
 
-			if (s->engine != engine) {
-				ipi_priority(s, prio);
-				continue;
+				if (__i915_request_is_complete(s))
+					continue;
+
+				if (s->engine != engine) {
+					ipi_priority(s, prio);
+					continue;
+				}
+
+				/* Remember our position along this branch */
+				rq = stack_push(s, rq, pos);
+				pos = &rq->sched.signalers_list;
 			}
-
-			/* Remember our position along this branch */
-			rq = stack_push(s, rq, pos);
-			pos = &rq->sched.signalers_list;
 		}
 
 		RQ_TRACE(rq, "set-priority:%d\n", prio);
 		WRITE_ONCE(rq->sched.attr.priority, prio);
 
+		next = stack_pop(rq, &pos);
+
 		/*
 		 * Once the request is ready, it will be placed into the
 		 * priority lists and then onto the HW runlist. Before the
@@ -656,16 +896,15 @@ static void __i915_request_set_priority(struct i915_request *rq, int prio)
 		 * any preemption required, be dealt with upon submission.
 		 * See engine->submit_request()
 		 */
-		if (!i915_request_is_ready(rq))
-			continue;
-
 		GEM_BUG_ON(rq->engine != engine);
-		if (i915_request_in_priority_queue(rq))
-			remove_from_priolist(se, rq, plist, true);
+		if (i915_request_is_ready(rq) &&
+		    set_earliest_deadline(se, rq, rq_deadline(rq)))
+			kick = true;
 
-		/* Defer (tasklet) submission until after all updates. */
-		kick_submission(engine, rq, prio);
-	} while ((rq = stack_pop(rq, &pos)));
+		rq = next;
+	} while (rq);
+
+	return kick;
 }
 
 #define all_signalers_checked(p, rq) \
@@ -718,13 +957,9 @@ void i915_request_set_priority(struct i915_request *rq, int prio)
 	if (__i915_request_is_complete(rq))
 		goto unlock;
 
-	if (!i915_sched_has_priorities(&engine->sched)) {
-		rq->sched.attr.priority = prio;
-		goto unlock;
-	}
-
 	rcu_read_lock();
-	__i915_request_set_priority(rq, prio);
+	if (__i915_request_set_priority(&engine->sched, rq, prio))
+		i915_sched_kick(&engine->sched);
 	rcu_read_unlock();
 	GEM_BUG_ON(rq_prio(rq) != prio);
 
@@ -737,7 +972,7 @@ void __i915_sched_defer_request(struct intel_engine_cs *engine,
 {
 	struct list_head *pos = &rq->sched.waiters_list;
 	struct i915_sched *se = intel_engine_get_scheduler(engine);
-	const int prio = rq_prio(rq);
+	u64 deadline = rq_deadline(rq);
 	struct i915_request *rn;
 	LIST_HEAD(dfs);
 
@@ -746,6 +981,9 @@ void __i915_sched_defer_request(struct intel_engine_cs *engine,
 	lockdep_assert_held(&se->lock);
 	GEM_BUG_ON(!test_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags));
 
+	if (i915_sched_has_deadlines(se))
+		deadline = max(deadline, i915_scheduler_next_virtual_deadline(adj_prio(rq)));
+
 	/*
 	 * When we defer a request, we must maintain its order with respect
 	 * to those that are waiting upon it. So we traverse its chain of
@@ -771,62 +1009,51 @@ void __i915_sched_defer_request(struct intel_engine_cs *engine,
 				   __i915_request_has_started(w) &&
 				   !__i915_request_is_complete(rq));
 
+			/* An unready waiter imposes no deadline */
 			if (!i915_request_in_priority_queue(w))
 				continue;
 
 			/*
-			 * We also need to reorder within the same priority.
+			 * We also need to reorder within the same deadline.
 			 *
 			 * This is unlike priority-inheritance, where if the
 			 * signaler already has a higher priority [earlier
 			 * deadline] than us, we can ignore as it will be
 			 * scheduled first. If a waiter already has the
-			 * same priority, we still have to push it to the end
+			 * same deadline, we still have to push it to the end
 			 * of the list. This unfortunately means we cannot
 			 * use the rq_deadline() itself as a 'visited' bit.
 			 */
-			if (rq_prio(w) < prio)
+			if (rq_deadline(w) > deadline)
 				continue;
 
-			GEM_BUG_ON(rq_prio(w) != prio);
-
 			/* Remember our position along this branch */
 			rq = stack_push(w, rq, pos);
 			pos = &rq->sched.waiters_list;
 		}
 
+		RQ_TRACE(rq, "set-deadline:%llu\n", deadline);
+		WRITE_ONCE(rq->sched.deadline, deadline);
+
 		/* Note list is reversed for waiters wrt signal hierarchy */
-		GEM_BUG_ON(rq->engine != engine);
+		GEM_BUG_ON(i915_request_get_scheduler(rq) != se);
 		remove_from_priolist(se, rq, &dfs, false);
 
 		/* Track our visit, and prevent duplicate processing */
 		clear_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags);
 	} while ((rq = stack_pop(rq, &pos)));
 
-	pos = lookup_priolist(se, prio);
+	pos = lookup_priolist(se, deadline);
 	list_for_each_entry_safe(rq, rn, &dfs, sched.link) {
 		set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags);
 		list_add_tail(&rq->sched.link, pos);
 	}
 }
 
-static void queue_request(struct i915_sched *se, struct i915_request *rq)
+static bool queue_request(struct i915_sched *se, struct i915_request *rq)
 {
-	GEM_BUG_ON(!list_empty(&rq->sched.link));
-	list_add_tail(&rq->sched.link, lookup_priolist(se, rq_prio(rq)));
 	set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags);
-}
-
-static bool submit_queue(struct intel_engine_cs *engine,
-			 const struct i915_request *rq)
-{
-	struct intel_engine_execlists *execlists = &engine->execlists;
-
-	if (rq_prio(rq) <= execlists->queue_priority_hint)
-		return false;
-
-	execlists->queue_priority_hint = rq_prio(rq);
-	return true;
+	return set_earliest_deadline(se, rq, I915_DEADLINE_NEVER);
 }
 
 static bool hold_request(const struct i915_request *rq)
@@ -864,8 +1091,8 @@ static bool ancestor_on_hold(const struct i915_sched *se,
 
 void i915_request_enqueue(struct i915_request *rq)
 {
-	struct intel_engine_cs *engine = rq->engine;
-	struct i915_sched *se = intel_engine_get_scheduler(engine);
+	struct i915_sched *se = i915_request_get_scheduler(rq);
+	u64 dl = earliest_deadline(se, rq);
 	unsigned long flags;
 	bool kick = false;
 
@@ -880,11 +1107,11 @@ void i915_request_enqueue(struct i915_request *rq)
 		list_add_tail(&rq->sched.link, &se->hold);
 		i915_request_set_hold(rq);
 	} else {
-		queue_request(se, rq);
-
+		set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags);
+		kick = __i915_request_set_deadline(se, rq,
+						   min(dl, rq_deadline(rq)));
+		GEM_BUG_ON(rq_deadline(rq) == I915_DEADLINE_NEVER);
 		GEM_BUG_ON(i915_sched_is_idle(se));
-
-		kick = submit_queue(engine, rq);
 	}
 
 	GEM_BUG_ON(list_empty(&rq->sched.link));
@@ -898,8 +1125,8 @@ __i915_sched_rewind_requests(struct intel_engine_cs *engine)
 {
 	struct i915_sched *se = intel_engine_get_scheduler(engine);
 	struct i915_request *rq, *rn, *active = NULL;
+	u64 deadline = I915_DEADLINE_NEVER;
 	struct list_head *pl;
-	int prio = I915_PRIORITY_INVALID;
 
 	lockdep_assert_held(&se->lock);
 
@@ -911,13 +1138,21 @@ __i915_sched_rewind_requests(struct intel_engine_cs *engine)
 
 		__i915_request_unsubmit(rq);
 
-		GEM_BUG_ON(rq_prio(rq) == I915_PRIORITY_INVALID);
-		if (rq_prio(rq) != prio) {
-			prio = rq_prio(rq);
-			pl = lookup_priolist(se, prio);
+		if (__i915_request_has_started(rq) &&
+		    i915_sched_has_deadlines(se)) {
+			u64 deadline =
+				i915_scheduler_next_virtual_deadline(rq_prio(rq));
+			rq->sched.deadline = min(rq_deadline(rq), deadline);
+		}
+		GEM_BUG_ON(rq_deadline(rq) == I915_DEADLINE_NEVER);
+
+		if (rq_deadline(rq) != deadline) {
+			deadline = rq_deadline(rq);
+			pl = lookup_priolist(se, deadline);
 		}
 		GEM_BUG_ON(i915_sched_is_idle(se));
 
+		GEM_BUG_ON(i915_request_in_priority_queue(rq));
 		list_move(&rq->sched.link, pl);
 		set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags);
 
@@ -1023,14 +1258,10 @@ void __i915_sched_resume_request(struct intel_engine_cs *engine,
 {
 	struct i915_sched *se = intel_engine_get_scheduler(engine);
 	LIST_HEAD(list);
+	bool submit = false;
 
 	lockdep_assert_held(&se->lock);
 
-	if (rq_prio(rq) > engine->execlists.queue_priority_hint) {
-		engine->execlists.queue_priority_hint = rq_prio(rq);
-		i915_sched_kick(se);
-	}
-
 	if (!i915_request_on_hold(rq))
 		return;
 
@@ -1051,7 +1282,7 @@ void __i915_sched_resume_request(struct intel_engine_cs *engine,
 		i915_request_clear_hold(rq);
 		list_del_init(&rq->sched.link);
 
-		queue_request(se, rq);
+		submit |= queue_request(se, rq);
 
 		/* Also release any children on this engine that are ready */
 		for_each_waiter(p, rq) {
@@ -1081,6 +1312,24 @@ void __i915_sched_resume_request(struct intel_engine_cs *engine,
 
 		rq = list_first_entry_or_null(&list, typeof(*rq), sched.link);
 	} while (rq);
+
+	if (submit)
+		i915_sched_kick(se);
+}
+
+static u64
+update_deadline(const struct i915_request *rq)
+{
+	return earliest_deadline(i915_request_get_scheduler(rq), rq);
+}
+
+void i915_request_update_deadline(struct i915_request *rq)
+{
+	if (!i915_request_in_priority_queue(rq))
+		return;
+
+	/* Recompute our deadlines and promote after a priority change */
+	i915_request_set_deadline(rq, update_deadline(rq));
 }
 
 void i915_sched_resume_request(struct intel_engine_cs *engine,
@@ -1134,10 +1383,12 @@ void i915_sched_node_init(struct i915_sched_node *node)
 void i915_sched_node_reinit(struct i915_sched_node *node)
 {
 	node->attr.priority = I915_PRIORITY_INVALID;
+	node->deadline = I915_DEADLINE_NEVER;
 	node->semaphores = 0;
 	node->flags = 0;
 
 	GEM_BUG_ON(node->ipi_link);
+	node->ipi_deadline = I915_DEADLINE_NEVER;
 	node->ipi_priority = I915_PRIORITY_INVALID;
 
 	GEM_BUG_ON(!list_empty(&node->signalers_list));
@@ -1378,6 +1629,20 @@ print_request_ring(struct drm_printer *m, const struct i915_request *rq)
 	}
 }
 
+static const char *repr_mode(const struct i915_sched *se)
+{
+	if (i915_sched_has_deadlines(se))
+		return "Deadline";
+
+	if (i915_sched_has_priorities(se))
+		return "Priority";
+
+	if (i915_sched_is_active(se))
+		return "FIFO";
+
+	return "None";
+}
+
 void i915_sched_show(struct drm_printer *m,
 		     struct i915_sched *se,
 		     void (*show_request)(struct drm_printer *m,
@@ -1419,6 +1684,9 @@ void i915_sched_show(struct drm_printer *m,
 		}
 	}
 
+	drm_printf(m, "Scheduler: %s (%s)\n", repr_mode(se),
+		   enableddisabled(test_bit(I915_SCHED_ENABLE_BIT,
+					    &se->flags)));
 	drm_printf(m, "Tasklet queued? %s (%s)\n",
 		   yesno(test_bit(TASKLET_STATE_SCHED, &se->tasklet.state)),
 		   enableddisabled(!atomic_read(&se->tasklet.count)));
diff --git a/drivers/gpu/drm/i915/i915_scheduler.h b/drivers/gpu/drm/i915/i915_scheduler.h
index 872d221f6ba7..14714e56ad80 100644
--- a/drivers/gpu/drm/i915/i915_scheduler.h
+++ b/drivers/gpu/drm/i915/i915_scheduler.h
@@ -47,7 +47,14 @@ void i915_sched_select_mode(struct i915_sched *se, enum i915_sched_mode mode);
 void i915_sched_park(struct i915_sched *se);
 void i915_sched_fini(struct i915_sched *se);
 
+void i915_sched_select_mode(struct i915_sched *se, enum i915_sched_mode mode);
+
 void i915_request_set_priority(struct i915_request *request, int prio);
+void i915_request_set_deadline(struct i915_request *request, u64 deadline);
+
+void i915_request_update_deadline(struct i915_request *request);
+
+u64 i915_scheduler_next_virtual_deadline(int priority);
 
 void i915_request_enqueue(struct i915_request *request);
 
@@ -85,11 +92,14 @@ static inline void i915_sched_disable(struct i915_sched *se)
 	clear_bit(I915_SCHED_ENABLE_BIT, &se->flags);
 }
 
-void __i915_priolist_free(struct i915_priolist *p);
-static inline void i915_priolist_free(struct i915_priolist *p)
+static inline u64 i915_sched_to_ticks(ktime_t kt)
 {
-	if (p->priority != I915_PRIORITY_NORMAL)
-		__i915_priolist_free(p);
+	return ktime_to_ns(kt) >> I915_SCHED_DEADLINE_SHIFT;
+}
+
+static inline u64 i915_sched_to_ns(u64 deadline)
+{
+	return deadline << I915_SCHED_DEADLINE_SHIFT;
 }
 
 static inline bool i915_sched_is_idle(const struct i915_sched *se)
diff --git a/drivers/gpu/drm/i915/i915_scheduler_types.h b/drivers/gpu/drm/i915/i915_scheduler_types.h
index bc668f375097..89cccda35ecd 100644
--- a/drivers/gpu/drm/i915/i915_scheduler_types.h
+++ b/drivers/gpu/drm/i915/i915_scheduler_types.h
@@ -22,6 +22,7 @@ enum {
 	I915_SCHED_ENABLE_BIT = 0,
 	I915_SCHED_ACTIVE_BIT, /* can reorder the request flow */
 	I915_SCHED_PRIORITY_BIT, /* priority sorting of queue */
+	I915_SCHED_DEADLINE_BIT, /* sorting by virtual deadline */
 	I915_SCHED_TIMESLICE_BIT, /* multitasking for long workloads */
 	I915_SCHED_PREEMPT_RESET_BIT, /* reset if preemption times out */
 	I915_SCHED_BUSYWAIT_BIT, /* preempt-to-busy */
@@ -51,6 +52,7 @@ enum i915_sched_mode {
 	I915_SCHED_MODE_NONE = -1, /* inactive, no bubble prevention */
 	I915_SCHED_MODE_FIFO, /* pass-through of ready, first in first out */
 	I915_SCHED_MODE_PRIORITY, /* reorder strictly by priority */
+	I915_SCHED_MODE_DEADLINE, /* reorder to meet soft deadlines; fair */
 };
 
 /**
@@ -207,8 +209,31 @@ struct i915_sched_node {
 #define I915_SCHED_HAS_EXTERNAL_CHAIN	BIT(0)
 	unsigned long semaphores;
 
+	/**
+	 * @deadline: [virtual] deadline
+	 *
+	 * When the request is ready for execution, it is given a quota
+	 * (the engine's timeslice) and a virtual deadline. The virtual
+	 * deadline is derived from the current time:
+	 *     ktime_get() + (prio_ratio * timeslice)
+	 *
+	 * Requests are then executed in order of deadline completion.
+	 * Requests with earlier deadlines than currently executing on
+	 * the engine will preempt the active requests.
+	 *
+	 * By treating it as a virtual deadline, we use it as a hint for
+	 * when it is appropriate for a request to start with respect to
+	 * all other requests in the system. It is not a hard deadline, as
+	 * we allow requests to miss them, and we do not account for the
+	 * request runtime.
+	 */
+	u64 deadline;
+#define I915_SCHED_DEADLINE_SHIFT 19 /* i.e. roughly 500us buckets */
+#define I915_DEADLINE_NEVER U64_MAX
+
 	/* handle being scheduled for PI from outside of our active.lock */
 	struct i915_request *ipi_link;
+	u64 ipi_deadline;
 	int ipi_priority;
 };
 
@@ -236,14 +261,28 @@ struct i915_dependency {
 
 static inline bool i915_sched_is_active(const struct i915_sched *se)
 {
+	if (CONFIG_DRM_I915_SCHED < 0)
+		return false;
+
 	return test_bit(I915_SCHED_ACTIVE_BIT, &se->flags);
 }
 
 static inline bool i915_sched_has_priorities(const struct i915_sched *se)
 {
+	if (CONFIG_DRM_I915_SCHED < 1)
+		return false;
+
 	return test_bit(I915_SCHED_PRIORITY_BIT, &se->flags);
 }
 
+static inline bool i915_sched_has_deadlines(const struct i915_sched *se)
+{
+	if (CONFIG_DRM_I915_SCHED < 2)
+		return false;
+
+	return test_bit(I915_SCHED_DEADLINE_BIT, &se->flags);
+}
+
 static inline bool i915_sched_has_timeslices(const struct i915_sched *se)
 {
 	if (!IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION))
diff --git a/drivers/gpu/drm/i915/selftests/i915_request.c b/drivers/gpu/drm/i915/selftests/i915_request.c
index 8035ea7565ed..c5d7427bd429 100644
--- a/drivers/gpu/drm/i915/selftests/i915_request.c
+++ b/drivers/gpu/drm/i915/selftests/i915_request.c
@@ -2129,6 +2129,7 @@ static int measure_preemption(struct intel_context *ce)
 
 		intel_ring_advance(rq, cs);
 		rq->sched.attr.priority = I915_PRIORITY_BARRIER;
+		rq->sched.deadline = 0;
 
 		elapsed[i - 1] = ENGINE_READ_FW(ce->engine, RING_TIMESTAMP);
 		i915_request_add(rq);
diff --git a/drivers/gpu/drm/i915/selftests/i915_scheduler.c b/drivers/gpu/drm/i915/selftests/i915_scheduler.c
index 2bb2d3d07d06..59df7f834dad 100644
--- a/drivers/gpu/drm/i915/selftests/i915_scheduler.c
+++ b/drivers/gpu/drm/i915/selftests/i915_scheduler.c
@@ -12,6 +12,40 @@
 #include "selftests/igt_spinner.h"
 #include "selftests/i915_random.h"
 
+static int mock_scheduler_slices(void *dummy)
+{
+	u64 min, max, normal, kernel;
+
+	min = prio_slice(I915_PRIORITY_MIN);
+	pr_info("%8s slice: %lluus\n", "min", min >> 10);
+
+	normal = prio_slice(0);
+	pr_info("%8s slice: %lluus\n", "normal", normal >> 10);
+
+	max = prio_slice(I915_PRIORITY_MAX);
+	pr_info("%8s slice: %lluus\n", "max", max >> 10);
+
+	kernel = prio_slice(I915_PRIORITY_BARRIER);
+	pr_info("%8s slice: %lluus\n", "kernel", kernel >> 10);
+
+	if (kernel != 0) {
+		pr_err("kernel prio slice should be 0\n");
+		return -EINVAL;
+	}
+
+	if (max >= normal) {
+		pr_err("maximum prio slice should be shorter than normal\n");
+		return -EINVAL;
+	}
+
+	if (min <= normal) {
+		pr_err("minimum prio slice should be longer than normal\n");
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
 static int mock_skiplist_levels(void *dummy)
 {
 	struct i915_priolist_root root = {};
@@ -54,6 +88,7 @@ static int mock_skiplist_levels(void *dummy)
 int i915_scheduler_mock_selftests(void)
 {
 	static const struct i915_subtest tests[] = {
+		SUBTEST(mock_scheduler_slices),
 		SUBTEST(mock_skiplist_levels),
 	};
 
@@ -556,6 +591,53 @@ static int igt_priority_chains(void *arg)
 	return igt_schedule_chains(arg, igt_priority);
 }
 
+static bool igt_deadline(struct i915_request *rq,
+			 unsigned long v, unsigned long e)
+{
+	i915_request_set_deadline(rq, 0);
+	GEM_BUG_ON(rq_deadline(rq) != 0);
+	return true;
+}
+
+static int igt_deadline_chains(void *arg)
+{
+	return igt_schedule_chains(arg, igt_deadline);
+}
+
+static bool igt_defer(struct i915_request *rq, unsigned long v, unsigned long e)
+{
+	struct intel_engine_cs *engine = rq->engine;
+	struct i915_sched *se = intel_engine_get_scheduler(engine);
+
+	/* XXX No generic means to unwind incomplete requests yet */
+	if (!i915_request_in_priority_queue(rq))
+		return false;
+
+	if (!intel_engine_has_preemption(engine))
+		return false;
+
+	spin_lock_irq(&se->lock);
+
+	/* Push all the requests to the same deadline */
+	__i915_request_set_deadline(se, rq, 0);
+	GEM_BUG_ON(rq_deadline(rq) != 0);
+
+	/* Then the very first request must be the one everyone depends on */
+	rq = list_first_entry(lookup_priolist(se, 0), typeof(*rq), sched.link);
+	GEM_BUG_ON(rq->engine != engine);
+
+	/* Deferring the first request will then have to defer all requests */
+	__i915_sched_defer_request(engine, rq);
+
+	spin_unlock_irq(&se->lock);
+	return true;
+}
+
+static int igt_deadline_defer(void *arg)
+{
+	return igt_schedule_chains(arg, igt_defer);
+}
+
 static struct i915_request *
 __write_timestamp(struct intel_engine_cs *engine,
 		  struct drm_i915_gem_object *obj,
@@ -767,13 +849,22 @@ static int igt_priority_cycle(void *arg)
 	return __igt_schedule_cycle(arg, igt_priority);
 }
 
+static int igt_deadline_cycle(void *arg)
+{
+	return __igt_schedule_cycle(arg, igt_deadline);
+}
+
 int i915_scheduler_live_selftests(struct drm_i915_private *i915)
 {
 	static const struct i915_subtest tests[] = {
+		SUBTEST(igt_deadline_chains),
 		SUBTEST(igt_priority_chains),
 
 		SUBTEST(igt_schedule_cycle),
+		SUBTEST(igt_deadline_cycle),
 		SUBTEST(igt_priority_cycle),
+
+		SUBTEST(igt_deadline_defer),
 	};
 
 	return i915_subtests(tests, i915);
@@ -909,9 +1000,54 @@ static int sparse_priority(void *arg)
 	return sparse(arg, set_priority);
 }
 
+static u64 __set_deadline(struct i915_request *rq, u64 deadline)
+{
+	u64 dt;
+
+	preempt_disable();
+	dt = ktime_get_raw_fast_ns();
+	i915_request_set_deadline(rq, deadline);
+	dt = ktime_get_raw_fast_ns() - dt;
+	preempt_enable();
+
+	return dt;
+}
+
+static bool set_deadline(struct i915_request *rq,
+			 unsigned long v, unsigned long e)
+{
+	report("set-deadline", v, e, __set_deadline(rq, 0));
+	return true;
+}
+
+static int single_deadline(void *arg)
+{
+	return single(arg, set_deadline);
+}
+
+static int wide_deadline(void *arg)
+{
+	return wide(arg, set_deadline);
+}
+
+static int inv_deadline(void *arg)
+{
+	return inv(arg, set_deadline);
+}
+
+static int sparse_deadline(void *arg)
+{
+	return sparse(arg, set_deadline);
+}
+
 int i915_scheduler_perf_selftests(struct drm_i915_private *i915)
 {
 	static const struct i915_subtest tests[] = {
+		SUBTEST(single_deadline),
+		SUBTEST(wide_deadline),
+		SUBTEST(inv_deadline),
+		SUBTEST(sparse_deadline),
+
 		SUBTEST(single_priority),
 		SUBTEST(wide_priority),
 		SUBTEST(inv_priority),
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index cda0f391d965..4efc5801173c 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -525,6 +525,7 @@ typedef struct drm_i915_irq_wait {
 #define   I915_SCHEDULER_CAP_SEMAPHORES	(1ul << 3)
 #define   I915_SCHEDULER_CAP_ENGINE_BUSY_STATS	(1ul << 4)
 #define   I915_SCHEDULER_CAP_TIMESLICING	(1ul << 5)
+#define   I915_SCHEDULER_CAP_FAIR	(1ul << 6)
 
 #define I915_PARAM_HUC_STATUS		 42
 
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 54+ messages in thread

* [Intel-gfx] [PATCH 11/31] drm/i915/gt: Specify a deadline for the heartbeat
  2021-02-08 10:52 [Intel-gfx] [PATCH 01/31] drm/i915/gt: Ratelimit heartbeat completion probing Chris Wilson
                   ` (8 preceding siblings ...)
  2021-02-08 10:52 ` [Intel-gfx] [PATCH 10/31] drm/i915: Fair low-latency scheduling Chris Wilson
@ 2021-02-08 10:52 ` Chris Wilson
  2021-02-08 10:52 ` [Intel-gfx] [PATCH 12/31] drm/i915: Extend the priority boosting for the display with a deadline Chris Wilson
                   ` (23 subsequent siblings)
  33 siblings, 0 replies; 54+ messages in thread
From: Chris Wilson @ 2021-02-08 10:52 UTC (permalink / raw)
  To: intel-gfx; +Cc: Chris Wilson

As we know when we expect the heartbeat to be checked for completion,
pass this information along as its deadline. We still do not complain if
the deadline is missed, at least until we have tried a few times, but it
will allow for quicker hang detection on systems where deadlines are
adhered to.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c b/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c
index 1d0e7daa6285..47e1cd25a20e 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c
@@ -134,6 +134,16 @@ static void heartbeat_commit(struct i915_request *rq,
 	__i915_request_queue(rq, attr);
 }
 
+static void set_heartbeat_deadline(struct intel_engine_cs *engine,
+				   struct i915_request *rq)
+{
+	unsigned long interval;
+
+	interval = READ_ONCE(engine->props.heartbeat_interval_ms);
+	if (interval)
+		i915_request_set_deadline(rq, ktime_get() + (interval << 20));
+}
+
 static void show_heartbeat(const struct i915_request *rq,
 			   struct intel_engine_cs *engine)
 {
@@ -224,6 +234,8 @@ static void heartbeat(struct work_struct *wrk)
 				     RQ_ARG(rq), attr.priority);
 
 			local_bh_disable();
+			if (attr.priority == I915_PRIORITY_BARRIER)
+				i915_request_set_deadline(rq, 0);
 			i915_request_set_priority(rq, attr.priority);
 			local_bh_enable();
 		} else {
@@ -267,6 +279,7 @@ static void heartbeat(struct work_struct *wrk)
 	if (IS_ERR(rq))
 		goto unlock;
 
+	set_heartbeat_deadline(engine, rq);
 	heartbeat_commit(rq, &attr);
 
 unlock:
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 54+ messages in thread

* [Intel-gfx] [PATCH 12/31] drm/i915: Extend the priority boosting for the display with a deadline
  2021-02-08 10:52 [Intel-gfx] [PATCH 01/31] drm/i915/gt: Ratelimit heartbeat completion probing Chris Wilson
                   ` (9 preceding siblings ...)
  2021-02-08 10:52 ` [Intel-gfx] [PATCH 11/31] drm/i915/gt: Specify a deadline for the heartbeat Chris Wilson
@ 2021-02-08 10:52 ` Chris Wilson
  2021-02-08 10:52 ` [Intel-gfx] [PATCH 13/31] drm/i915/gt: Support virtual engine queues Chris Wilson
                   ` (22 subsequent siblings)
  33 siblings, 0 replies; 54+ messages in thread
From: Chris Wilson @ 2021-02-08 10:52 UTC (permalink / raw)
  To: intel-gfx; +Cc: Chris Wilson

For a modeset/pageflip, there is a very precise deadline by which the
frame must be completed in order to hit the vblank and be shown. While
we don't pass along that exact information, we can at least inform the
scheduler that this request-chain needs to be completed asap.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/display/intel_display.c |  7 +++++--
 drivers/gpu/drm/i915/gem/i915_gem_object.h   |  5 +++--
 drivers/gpu/drm/i915/gem/i915_gem_wait.c     | 21 ++++++++++++--------
 3 files changed, 21 insertions(+), 12 deletions(-)

diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c
index beed08c00b6c..7462dd9a7116 100644
--- a/drivers/gpu/drm/i915/display/intel_display.c
+++ b/drivers/gpu/drm/i915/display/intel_display.c
@@ -11656,7 +11656,8 @@ intel_prepare_plane_fb(struct drm_plane *_plane,
 
 	if (new_plane_state->uapi.fence) { /* explicit fencing */
 		i915_gem_fence_wait_priority(new_plane_state->uapi.fence,
-					     I915_PRIORITY_DISPLAY);
+					     I915_PRIORITY_DISPLAY,
+					     ktime_get() /* next vblank? */);
 		ret = i915_sw_fence_await_dma_fence(&state->commit_ready,
 						    new_plane_state->uapi.fence,
 						    i915_fence_timeout(dev_priv),
@@ -11678,7 +11679,9 @@ intel_prepare_plane_fb(struct drm_plane *_plane,
 	if (ret)
 		return ret;
 
-	i915_gem_object_wait_priority(obj, 0, I915_PRIORITY_DISPLAY);
+	i915_gem_object_wait_priority(obj, 0,
+				      I915_PRIORITY_DISPLAY,
+				      ktime_get() /* next vblank? */);
 	i915_gem_object_flush_frontbuffer(obj, ORIGIN_DIRTYFB);
 
 	if (!new_plane_state->uapi.fence) { /* implicit fencing */
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.h b/drivers/gpu/drm/i915/gem/i915_gem_object.h
index 366d23afbb1a..322a5ab3720b 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object.h
@@ -549,14 +549,15 @@ static inline void __start_cpu_write(struct drm_i915_gem_object *obj)
 		obj->cache_dirty = true;
 }
 
-void i915_gem_fence_wait_priority(struct dma_fence *fence, int prio);
+void i915_gem_fence_wait_priority(struct dma_fence *fence,
+				  int prio, ktime_t deadline);
 
 int i915_gem_object_wait(struct drm_i915_gem_object *obj,
 			 unsigned int flags,
 			 long timeout);
 int i915_gem_object_wait_priority(struct drm_i915_gem_object *obj,
 				  unsigned int flags,
-				  int prio);
+				  int prio, ktime_t deadline);
 
 void __i915_gem_object_flush_frontbuffer(struct drm_i915_gem_object *obj,
 					 enum fb_op_origin origin);
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_wait.c b/drivers/gpu/drm/i915/gem/i915_gem_wait.c
index 4d1897c347b9..162f9737965f 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_wait.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_wait.c
@@ -92,11 +92,14 @@ i915_gem_object_wait_reservation(struct dma_resv *resv,
 	return timeout;
 }
 
-static void fence_set_priority(struct dma_fence *fence, int prio)
+static void
+fence_set_priority(struct dma_fence *fence, int prio, ktime_t deadline)
 {
 	if (dma_fence_is_signaled(fence) || !dma_fence_is_i915(fence))
 		return;
 
+	i915_request_set_deadline(to_request(fence),
+				  i915_sched_to_ticks(deadline));
 	i915_request_set_priority(to_request(fence), prio);
 }
 
@@ -105,7 +108,8 @@ static inline bool __dma_fence_is_chain(const struct dma_fence *fence)
 	return fence->ops == &dma_fence_chain_ops;
 }
 
-void i915_gem_fence_wait_priority(struct dma_fence *fence, int prio)
+void i915_gem_fence_wait_priority(struct dma_fence *fence,
+				  int prio, ktime_t deadline)
 {
 	if (dma_fence_is_signaled(fence))
 		return;
@@ -118,19 +122,19 @@ void i915_gem_fence_wait_priority(struct dma_fence *fence, int prio)
 		int i;
 
 		for (i = 0; i < array->num_fences; i++)
-			fence_set_priority(array->fences[i], prio);
+			fence_set_priority(array->fences[i], prio, deadline);
 	} else if (__dma_fence_is_chain(fence)) {
 		struct dma_fence *iter;
 
 		/* The chain is ordered; if we boost the last, we boost all */
 		dma_fence_chain_for_each(iter, fence) {
 			fence_set_priority(to_dma_fence_chain(iter)->fence,
-					   prio);
+					   prio, deadline);
 			break;
 		}
 		dma_fence_put(iter);
 	} else {
-		fence_set_priority(fence, prio);
+		fence_set_priority(fence, prio, deadline);
 	}
 
 	local_bh_enable(); /* kick the tasklets if queues were reprioritised */
@@ -139,7 +143,8 @@ void i915_gem_fence_wait_priority(struct dma_fence *fence, int prio)
 int
 i915_gem_object_wait_priority(struct drm_i915_gem_object *obj,
 			      unsigned int flags,
-			      int prio)
+			      int prio,
+			      ktime_t deadline)
 {
 	struct dma_fence *excl;
 
@@ -154,7 +159,7 @@ i915_gem_object_wait_priority(struct drm_i915_gem_object *obj,
 			return ret;
 
 		for (i = 0; i < count; i++) {
-			i915_gem_fence_wait_priority(shared[i], prio);
+			i915_gem_fence_wait_priority(shared[i], prio, deadline);
 			dma_fence_put(shared[i]);
 		}
 
@@ -164,7 +169,7 @@ i915_gem_object_wait_priority(struct drm_i915_gem_object *obj,
 	}
 
 	if (excl) {
-		i915_gem_fence_wait_priority(excl, prio);
+		i915_gem_fence_wait_priority(excl, prio, deadline);
 		dma_fence_put(excl);
 	}
 	return 0;
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 54+ messages in thread

* [Intel-gfx] [PATCH 13/31] drm/i915/gt: Support virtual engine queues
  2021-02-08 10:52 [Intel-gfx] [PATCH 01/31] drm/i915/gt: Ratelimit heartbeat completion probing Chris Wilson
                   ` (10 preceding siblings ...)
  2021-02-08 10:52 ` [Intel-gfx] [PATCH 12/31] drm/i915: Extend the priority boosting for the display with a deadline Chris Wilson
@ 2021-02-08 10:52 ` Chris Wilson
  2021-02-08 10:52 ` [Intel-gfx] [PATCH 14/31] drm/i915: Move saturated workload detection back to the context Chris Wilson
                   ` (21 subsequent siblings)
  33 siblings, 0 replies; 54+ messages in thread
From: Chris Wilson @ 2021-02-08 10:52 UTC (permalink / raw)
  To: intel-gfx; +Cc: Chris Wilson

Allow multiple requests to be queued unto a virtual engine, whereas
before we only allowed a single request to be queued at a time. The
advantage of keeping just one request in the queue was to ensure that we
always decided late which engine to use. However, with the introduction
of the virtual deadline we throttle submission and still only drip one
request into the sibling at a time (unless it is truly empty, but then a
second request will have an earlier deadline than the queued virtual
engine and force itself in front). This also takes advantage that a
virtual engine will remain bound while it is active, i.e. we can not
switch to a second engine until the context is completed -- such that we
cannot be as lazy as lazy can be.

By allowing a full queue, we avoid having to synchronize via the
breadcrumb interrupt everytime, letting the virtual engine reach the
full throughput of the siblings.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 .../drm/i915/gt/intel_execlists_submission.c  | 430 ++++++++----------
 drivers/gpu/drm/i915/gt/selftest_execlists.c  | 146 ------
 drivers/gpu/drm/i915/i915_request.c           |  12 +-
 drivers/gpu/drm/i915/i915_scheduler.c         |  79 +++-
 drivers/gpu/drm/i915/i915_scheduler.h         |   4 +-
 5 files changed, 276 insertions(+), 395 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
index e249b1423309..f91a126e0d94 100644
--- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
+++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
@@ -162,17 +162,6 @@ struct virtual_engine {
 	struct intel_context context;
 	struct rcu_work rcu;
 
-	/*
-	 * We allow only a single request through the virtual engine at a time
-	 * (each request in the timeline waits for the completion fence of
-	 * the previous before being submitted). By restricting ourselves to
-	 * only submitting a single request, each request is placed on to a
-	 * physical to maximise load spreading (by virtue of the late greedy
-	 * scheduling -- each real engine takes the next available request
-	 * upon idling).
-	 */
-	struct i915_request *request;
-
 	/*
 	 * We keep a rbtree of available virtual engines inside each physical
 	 * engine, sorted by priority. Here we preallocate the nodes we need
@@ -276,17 +265,24 @@ static struct i915_request *first_request(const struct i915_sched *se)
 					sched.link);
 }
 
-static struct i915_request *first_virtual(const struct intel_engine_cs *engine)
+static struct virtual_engine *
+first_virtual_engine(const struct intel_engine_cs *engine)
 {
-	struct rb_node *rb;
+	return rb_entry_safe(rb_first_cached(&engine->execlists.virtual),
+			     struct virtual_engine,
+			     nodes[engine->id].rb);
+}
 
-	rb = rb_first_cached(&engine->execlists.virtual);
-	if (!rb)
+static const struct i915_request *
+first_virtual(const struct intel_engine_cs *engine)
+{
+	struct virtual_engine *ve;
+
+	ve = first_virtual_engine(engine);
+	if (!ve)
 		return NULL;
 
-	return READ_ONCE(rb_entry(rb,
-				  struct virtual_engine,
-				  nodes[engine->id].rb)->request);
+	return first_request(&ve->base.sched);
 }
 
 static const struct i915_request *
@@ -502,7 +498,7 @@ static void execlists_schedule_in(struct i915_request *rq, int idx)
 	trace_i915_request_in(rq, idx);
 
 	old = ce->inflight;
-	if (!old)
+	if (!__intel_context_inflight_count(old))
 		old = __execlists_schedule_in(rq);
 	WRITE_ONCE(ce->inflight, ptr_inc(old));
 
@@ -512,31 +508,43 @@ static void execlists_schedule_in(struct i915_request *rq, int idx)
 static void
 resubmit_virtual_request(struct i915_request *rq, struct virtual_engine *ve)
 {
-	struct i915_sched *se = i915_request_get_scheduler(rq);
+	struct i915_sched *se = intel_engine_get_scheduler(&ve->base);
+	struct i915_sched *pv = i915_request_get_scheduler(rq);
+	struct i915_request *pos = rq;
+	struct intel_timeline *tl;
 
-	spin_lock_irq(&se->lock);
+	spin_lock_irq(&pv->lock);
 
-	clear_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags);
-	WRITE_ONCE(rq->engine, &ve->base);
-	ve->base.sched.submit_request(rq);
+	if (__i915_request_is_complete(rq))
+		goto unlock;
 
-	spin_unlock_irq(&se->lock);
+	tl = i915_request_active_timeline(rq);
+
+	/* Rewind back to the start of this virtual engine queue */
+	list_for_each_entry_continue_reverse(rq, &tl->requests, link) {
+		if (!i915_request_in_priority_queue(rq))
+			break;
+
+		pos = rq;
+	}
+
+	/* Resubmit the queue in execution order */
+	spin_lock(&se->lock);
+	list_for_each_entry_from(pos, &tl->requests, link) {
+		if (pos->engine == &ve->base)
+			break;
+
+		__i915_request_requeue(pos, &ve->base);
+	}
+	spin_unlock(&se->lock);
+
+unlock:
+	spin_unlock_irq(&pv->lock);
 }
 
 static void kick_siblings(struct i915_request *rq, struct intel_context *ce)
 {
 	struct virtual_engine *ve = container_of(ce, typeof(*ve), context);
-	struct intel_engine_cs *engine = rq->engine;
-
-	/*
-	 * After this point, the rq may be transferred to a new sibling, so
-	 * before we clear ce->inflight make sure that the context has been
-	 * removed from the b->signalers and furthermore we need to make sure
-	 * that the concurrent iterator in signal_irq_work is no longer
-	 * following ce->signal_link.
-	 */
-	if (!list_empty(&ce->signals))
-		intel_context_remove_breadcrumbs(ce, engine->breadcrumbs);
 
 	/*
 	 * This engine is now too busy to run this virtual request, so
@@ -545,11 +553,11 @@ static void kick_siblings(struct i915_request *rq, struct intel_context *ce)
 	 * same as other native request.
 	 */
 	if (i915_request_in_priority_queue(rq) &&
-	    rq->execution_mask != engine->mask)
+	    rq->execution_mask != rq->engine->mask)
 		resubmit_virtual_request(rq, ve);
 
-	if (READ_ONCE(ve->request))
-		intel_engine_kick_scheduler(&ve->base);
+	if (!i915_sched_is_idle(&ve->base.sched))
+		i915_sched_kick(&ve->base.sched);
 }
 
 static void __execlists_schedule_out(struct i915_request * const rq,
@@ -898,10 +906,16 @@ static bool ctx_single_port_submission(const struct intel_context *ce)
 		intel_context_force_single_submission(ce));
 }
 
+static bool __can_merge_ctx(const struct intel_context *prev,
+			    const struct intel_context *next)
+{
+	return prev == next;
+}
+
 static bool can_merge_ctx(const struct intel_context *prev,
 			  const struct intel_context *next)
 {
-	if (prev != next)
+	if (!__can_merge_ctx(prev, next))
 		return false;
 
 	if (ctx_single_port_submission(prev))
@@ -972,31 +986,6 @@ static bool virtual_matches(const struct virtual_engine *ve,
 	return true;
 }
 
-static struct virtual_engine *
-first_virtual_engine(struct intel_engine_cs *engine)
-{
-	struct intel_engine_execlists *el = &engine->execlists;
-	struct rb_node *rb = rb_first_cached(&el->virtual);
-
-	while (rb) {
-		struct virtual_engine *ve =
-			rb_entry(rb, typeof(*ve), nodes[engine->id].rb);
-		struct i915_request *rq = READ_ONCE(ve->request);
-
-		/* lazily cleanup after another engine handled rq */
-		if (!rq || !virtual_matches(ve, rq, engine)) {
-			rb_erase_cached(rb, &el->virtual);
-			RB_CLEAR_NODE(rb);
-			rb = rb_first_cached(&el->virtual);
-			continue;
-		}
-
-		return ve;
-	}
-
-	return NULL;
-}
-
 static void virtual_xfer_context(struct virtual_engine *ve,
 				 struct intel_engine_cs *engine)
 {
@@ -1005,6 +994,10 @@ static void virtual_xfer_context(struct virtual_engine *ve,
 	if (likely(engine == ve->siblings[0]))
 		return;
 
+	if (!list_empty(&ve->context.signals))
+		intel_context_remove_breadcrumbs(&ve->context,
+						 ve->siblings[0]->breadcrumbs);
+
 	GEM_BUG_ON(READ_ONCE(ve->context.inflight));
 	if (!intel_engine_has_relative_mmio(engine))
 		lrc_update_offsets(&ve->context, engine);
@@ -1177,6 +1170,118 @@ static bool completed(const struct i915_request *rq)
 	return __i915_request_is_complete(rq);
 }
 
+static void __virtual_dequeue(struct virtual_engine *ve,
+			      struct intel_engine_cs *sibling)
+{
+	struct ve_node * const node = &ve->nodes[sibling->id];
+	struct rb_node **parent, *rb;
+	struct i915_request *rq;
+	u64 deadline;
+	bool first;
+
+	rb_erase_cached(&node->rb, &sibling->execlists.virtual);
+	RB_CLEAR_NODE(&node->rb);
+
+	rq = first_request(&ve->base.sched);
+	if (!virtual_matches(ve, rq, sibling))
+		return;
+
+	rb = NULL;
+	first = true;
+	parent = &sibling->execlists.virtual.rb_root.rb_node;
+	deadline = rq_deadline(rq);
+	while (*parent) {
+		struct ve_node *other;
+
+		rb = *parent;
+		other = rb_entry(rb, typeof(*other), rb);
+		if (deadline <= other->deadline) {
+			parent = &rb->rb_left;
+		} else {
+			parent = &rb->rb_right;
+			first = false;
+		}
+	}
+
+	rb_link_node(&node->rb, rb, parent);
+	rb_insert_color_cached(&node->rb, &sibling->execlists.virtual, first);
+}
+
+static void virtual_requeue(struct intel_engine_cs *engine,
+			    struct i915_request *last)
+{
+	const struct i915_request * const first =
+		first_request(intel_engine_get_scheduler(engine));
+	struct virtual_engine *ve;
+
+	while ((ve = first_virtual_engine(engine))) {
+		struct i915_sched *se = intel_engine_get_scheduler(&ve->base);
+		struct i915_request *rq;
+
+		spin_lock(&se->lock);
+
+		rq = first_request(se);
+		if (unlikely(!virtual_matches(ve, rq, engine)))
+			/* lost the race to a sibling */
+			goto unlock;
+
+		GEM_BUG_ON(rq->engine != &ve->base);
+		GEM_BUG_ON(rq->context != &ve->context);
+
+		if (last && !__can_merge_ctx(last->context, rq->context)) {
+			spin_unlock(&se->lock);
+			return; /* leave this for another sibling? */
+		}
+
+		if (!dl_before(rq, first)) {
+			spin_unlock(&se->lock);
+			return;
+		}
+
+		ENGINE_TRACE(engine,
+			     "virtual rq=%llx:%lld%s, dl %lld, new engine? %s\n",
+			     rq->fence.context,
+			     rq->fence.seqno,
+			     __i915_request_is_complete(rq) ? "!" :
+			     __i915_request_has_started(rq) ? "*" :
+			     "",
+			     rq_deadline(rq),
+			     yesno(engine != ve->siblings[0]));
+
+		GEM_BUG_ON(!(rq->execution_mask & engine->mask));
+		if (__i915_request_requeue(rq, engine)) {
+			/*
+			 * Only after we confirm that we will submit
+			 * this request (i.e. it has not already
+			 * completed), do we want to update the context.
+			 *
+			 * This serves two purposes. It avoids
+			 * unnecessary work if we are resubmitting an
+			 * already completed request after timeslicing.
+			 * But more importantly, it prevents us altering
+			 * ve->siblings[] on an idle context, where
+			 * we may be using ve->siblings[] in
+			 * virtual_context_enter / virtual_context_exit.
+			 */
+			virtual_xfer_context(ve, engine);
+
+			/* Bind this ve before we release the lock */
+			if (!ve->context.inflight)
+				WRITE_ONCE(ve->context.inflight, engine);
+
+			GEM_BUG_ON(rq->engine != engine);
+			GEM_BUG_ON(ve->siblings[0] != engine);
+			GEM_BUG_ON(intel_context_inflight(rq->context) != engine);
+
+			last = rq;
+		}
+
+unlock:
+		__virtual_dequeue(ve, engine);
+		spin_unlock(&se->lock);
+	}
+}
+
 static void execlists_dequeue(struct intel_engine_cs *engine)
 {
 	struct intel_engine_execlists * const execlists = &engine->execlists;
@@ -1185,9 +1290,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
 	struct i915_request ** const last_port = port + execlists->port_mask;
 	struct i915_request *last, * const *active;
 	struct i915_request *rq, *rn;
-	struct virtual_engine *ve;
 	struct i915_priolist *pl;
-	struct rb_node *rb;
 	bool submit = false;
 
 	/*
@@ -1318,87 +1421,14 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
 		}
 	}
 
-	/* XXX virtual is always taking precedence */
-	while ((ve = first_virtual_engine(engine))) {
-		struct i915_request *rq;
-
-		spin_lock(&ve->base.sched.lock);
-
-		rq = ve->request;
-		if (unlikely(!virtual_matches(ve, rq, engine)))
-			goto unlock; /* lost the race to a sibling */
-
-		GEM_BUG_ON(rq->engine != &ve->base);
-		GEM_BUG_ON(rq->context != &ve->context);
-
-		if (!dl_before(rq, first_request(se))) {
-			spin_unlock(&ve->base.sched.lock);
-			break;
-		}
-
-		if (last && !can_merge_rq(last, rq)) {
-			spin_unlock(&ve->base.sched.lock);
-			spin_unlock(&se->lock);
-			return; /* leave this for another sibling */
-		}
-
-		ENGINE_TRACE(engine,
-			     "virtual rq=%llx:%lld%s, dl %llx, new engine? %s\n",
-			     rq->fence.context,
-			     rq->fence.seqno,
-			     __i915_request_is_complete(rq) ? "!" :
-			     __i915_request_has_started(rq) ? "*" :
-			     "",
-			     rq_deadline(rq),
-			     yesno(engine != ve->siblings[0]));
-		WRITE_ONCE(ve->request, NULL);
-
-		rb = &ve->nodes[engine->id].rb;
-		rb_erase_cached(rb, &execlists->virtual);
-		RB_CLEAR_NODE(rb);
-
-		GEM_BUG_ON(!(rq->execution_mask & engine->mask));
-		WRITE_ONCE(rq->engine, engine);
-
-		if (__i915_request_submit(rq)) {
-			/*
-			 * Only after we confirm that we will submit
-			 * this request (i.e. it has not already
-			 * completed), do we want to update the context.
-			 *
-			 * This serves two purposes. It avoids
-			 * unnecessary work if we are resubmitting an
-			 * already completed request after timeslicing.
-			 * But more importantly, it prevents us altering
-			 * ve->siblings[] on an idle context, where
-			 * we may be using ve->siblings[] in
-			 * virtual_context_enter / virtual_context_exit.
-			 */
-			virtual_xfer_context(ve, engine);
-			GEM_BUG_ON(ve->siblings[0] != engine);
-
-			submit = true;
-			last = rq;
-		}
-
-		i915_request_put(rq);
-unlock:
-		spin_unlock(&ve->base.sched.lock);
-
-		/*
-		 * Hmm, we have a bunch of virtual engine requests,
-		 * but the first one was already completed (thanks
-		 * preempt-to-busy!). Keep looking at the veng queue
-		 * until we have no more relevant requests (i.e.
-		 * the normal submit queue has higher priority).
-		 */
-		if (submit)
-			break;
-	}
+	if (!RB_EMPTY_ROOT(&execlists->virtual.rb_root))
+		virtual_requeue(engine, last);
 
 	i915_sched_dequeue(se, pl, rq, rn) {
 		bool merge = true;
 
+		GEM_BUG_ON(i915_request_get_scheduler(rq) != se);
+
 		/*
 		 * Can we combine this request with the current port?
 		 * It has to be the same context/ringbuffer and not
@@ -2674,8 +2704,8 @@ static void execlists_reset_cancel(struct intel_engine_cs *engine)
 {
 	struct intel_engine_execlists * const execlists = &engine->execlists;
 	struct i915_sched *se = intel_engine_get_scheduler(engine);
-	struct rb_node *rb;
 	unsigned long flags;
+	struct rb_node *rb;
 
 	ENGINE_TRACE(engine, "\n");
 
@@ -2704,21 +2734,12 @@ static void execlists_reset_cancel(struct intel_engine_cs *engine)
 	while ((rb = rb_first_cached(&execlists->virtual))) {
 		struct virtual_engine *ve =
 			rb_entry(rb, typeof(*ve), nodes[engine->id].rb);
-		struct i915_request *rq;
 
 		rb_erase_cached(rb, &execlists->virtual);
 		RB_CLEAR_NODE(rb);
 
 		spin_lock(&ve->base.sched.lock);
-		rq = fetch_and_zero(&ve->request);
-		if (rq) {
-			if (i915_request_mark_eio(rq)) {
-				rq->engine = engine;
-				__i915_request_submit(rq);
-				i915_request_put(rq);
-			}
-			i915_request_put(rq);
-		}
+		__i915_sched_cancel_queue(&ve->base.sched);
 		spin_unlock(&ve->base.sched.lock);
 	}
 
@@ -3018,11 +3039,6 @@ int intel_execlists_submission_setup(struct intel_engine_cs *engine)
 	return 0;
 }
 
-static struct list_head *virtual_queue(struct virtual_engine *ve)
-{
-	return &ve->base.sched.default_priolist.requests;
-}
-
 static void rcu_virtual_context_destroy(struct work_struct *wrk)
 {
 	struct virtual_engine *ve =
@@ -3033,19 +3049,12 @@ static void rcu_virtual_context_destroy(struct work_struct *wrk)
 	GEM_BUG_ON(ve->context.inflight);
 
 	/* Preempt-to-busy may leave a stale request behind. */
-	if (unlikely(ve->request)) {
-		struct i915_request *old;
-
+	if (unlikely(!i915_sched_is_idle(se))) {
 		spin_lock_irq(&se->lock);
-
-		old = fetch_and_zero(&ve->request);
-		if (old) {
-			GEM_BUG_ON(!__i915_request_is_complete(old));
-			__i915_request_submit(old);
-			i915_request_put(old);
-		}
-
+		__i915_sched_cancel_queue(se);
 		spin_unlock_irq(&se->lock);
+
+		GEM_BUG_ON(!i915_sched_is_idle(se));
 	}
 
 	/*
@@ -3074,7 +3083,6 @@ static void rcu_virtual_context_destroy(struct work_struct *wrk)
 		spin_unlock_irq(&sibling->sched.lock);
 	}
 	GEM_BUG_ON(__tasklet_is_scheduled(&se->tasklet));
-	GEM_BUG_ON(!list_empty(virtual_queue(ve)));
 
 	lrc_fini(&ve->context);
 	intel_context_fini(&ve->context);
@@ -3193,46 +3201,43 @@ static const struct intel_context_ops virtual_context_ops = {
 	.destroy = virtual_context_destroy,
 };
 
-static intel_engine_mask_t
+static struct i915_request *
 virtual_submission_mask(struct virtual_engine *ve, u64 *deadline)
 {
 	struct i915_request *rq;
-	intel_engine_mask_t mask;
 
-	rq = READ_ONCE(ve->request);
+	rq = first_request(&ve->base.sched);
 	if (!rq)
-		return 0;
+		return NULL;
 
 	/* The rq is ready for submission; rq->execution_mask is now stable. */
-	mask = rq->execution_mask;
-	if (unlikely(!mask)) {
+	if (unlikely(!rq->execution_mask)) {
 		/* Invalid selection, submit to a random engine in error */
 		i915_request_set_error_once(rq, -ENODEV);
-		mask = ve->siblings[0]->mask;
+		WRITE_ONCE(rq->execution_mask, ALL_ENGINES);
 	}
 
 	*deadline = rq_deadline(rq);
 
 	ENGINE_TRACE(&ve->base, "rq=%llx:%llu, mask=%x, dl=%llu\n",
 		     rq->fence.context, rq->fence.seqno,
-		     mask, *deadline);
+		     rq->execution_mask, *deadline);
 
-	return mask;
+	return rq;
 }
 
 static void virtual_submission_tasklet(struct tasklet_struct *t)
 {
 	struct virtual_engine * const ve =
 		from_tasklet(ve, t, base.sched.tasklet);
-	intel_engine_mask_t mask;
+	struct i915_request *rq;
 	unsigned int n;
 	u64 deadline;
 
 	rcu_read_lock();
-	mask = virtual_submission_mask(ve, &deadline);
-	rcu_read_unlock();
-	if (unlikely(!mask))
-		return;
+	rq = virtual_submission_mask(ve, &deadline);
+	if (unlikely(!rq))
+		goto out;
 
 	for (n = 0; n < ve->num_siblings; n++) {
 		struct intel_engine_cs *sibling = READ_ONCE(ve->siblings[n]);
@@ -3241,12 +3246,9 @@ static void virtual_submission_tasklet(struct tasklet_struct *t)
 		struct rb_node **parent, *rb;
 		bool first;
 
-		if (!READ_ONCE(ve->request))
-			break; /* already handled by a sibling's tasklet */
-
 		spin_lock_irq(&se->lock);
 
-		if (unlikely(!(mask & sibling->mask))) {
+		if (unlikely(!virtual_matches(ve, rq, sibling))) {
 			if (!RB_EMPTY_NODE(&node->rb)) {
 				rb_erase_cached(&node->rb,
 						&sibling->execlists.virtual);
@@ -3303,46 +3305,9 @@ static void virtual_submission_tasklet(struct tasklet_struct *t)
 		if (intel_context_inflight(&ve->context))
 			break;
 	}
-}
 
-static void virtual_submit_request(struct i915_request *rq)
-{
-	struct virtual_engine *ve = to_virtual_engine(rq->engine);
-	struct i915_sched *se = intel_engine_get_scheduler(&ve->base);
-	unsigned long flags;
-
-	ENGINE_TRACE(&ve->base, "rq=%llx:%lld\n",
-		     rq->fence.context,
-		     rq->fence.seqno);
-
-	GEM_BUG_ON(ve->base.sched.submit_request != virtual_submit_request);
-
-	spin_lock_irqsave(&se->lock, flags);
-
-	/* By the time we resubmit a request, it may be completed */
-	if (__i915_request_is_complete(rq)) {
-		__i915_request_submit(rq);
-		goto unlock;
-	}
-
-	if (ve->request) { /* background completion from preempt-to-busy */
-		GEM_BUG_ON(!__i915_request_is_complete(ve->request));
-		__i915_request_submit(ve->request);
-		i915_request_put(ve->request);
-	}
-
-	rq->sched.deadline =
-		min(rq->sched.deadline,
-		    i915_scheduler_next_virtual_deadline(rq_prio(rq)));
-	ve->request = i915_request_get(rq);
-
-	GEM_BUG_ON(!list_empty(virtual_queue(ve)));
-	list_move_tail(&rq->sched.link, virtual_queue(ve));
-
-	intel_engine_kick_scheduler(&ve->base);
-
-unlock:
-	spin_unlock_irqrestore(&se->lock, flags);
+out:
+	rcu_read_unlock();
 }
 
 static struct ve_bond *
@@ -3434,8 +3399,6 @@ intel_execlists_create_virtual(struct intel_engine_cs **siblings,
 
 	ve->base.bond_execute = virtual_bond_execute;
 
-	INIT_LIST_HEAD(virtual_queue(ve));
-
 	intel_context_init(&ve->context, &ve->base);
 
 	ve->base.breadcrumbs = intel_breadcrumbs_create(NULL);
@@ -3518,7 +3481,7 @@ intel_execlists_create_virtual(struct intel_engine_cs **siblings,
 			ENGINE_VIRTUAL);
 	ve->base.sched.flags = sched;
 
-	ve->base.sched.submit_request = virtual_submit_request;
+	ve->base.sched.submit_request = i915_request_enqueue;
 	ve->base.sched.revoke_context = execlists_revoke_context;
 	tasklet_setup(&ve->base.sched.tasklet, virtual_submission_tasklet);
 
@@ -3655,8 +3618,9 @@ static void execlists_show(struct drm_printer *m,
 	for (rb = rb_first_cached(&el->virtual); rb; rb = rb_next(rb)) {
 		struct virtual_engine *ve =
 			rb_entry(rb, typeof(*ve), nodes[engine->id].rb);
-		struct i915_request *rq = READ_ONCE(ve->request);
+		struct i915_request *rq;
 
+		rq = first_request(&ve->base.sched);
 		if (rq) {
 			if (count++ < max - 1)
 				show_request(m, rq, "\t", 0);
diff --git a/drivers/gpu/drm/i915/gt/selftest_execlists.c b/drivers/gpu/drm/i915/gt/selftest_execlists.c
index 112a09aa0d8d..080142c86339 100644
--- a/drivers/gpu/drm/i915/gt/selftest_execlists.c
+++ b/drivers/gpu/drm/i915/gt/selftest_execlists.c
@@ -4535,151 +4535,6 @@ static int live_virtual_bond(void *arg)
 	return 0;
 }
 
-static int reset_virtual_engine(struct intel_gt *gt,
-				struct intel_engine_cs **siblings,
-				unsigned int nsibling)
-{
-	struct intel_engine_cs *engine;
-	struct intel_context *ve;
-	struct igt_spinner spin;
-	struct i915_request *rq;
-	struct i915_sched *se;
-	unsigned int n;
-	int err = 0;
-
-	/*
-	 * In order to support offline error capture for fast preempt reset,
-	 * we need to decouple the guilty request and ensure that it and its
-	 * descendents are not executed while the capture is in progress.
-	 */
-
-	if (igt_spinner_init(&spin, gt))
-		return -ENOMEM;
-
-	ve = intel_execlists_create_virtual(siblings, nsibling);
-	if (IS_ERR(ve)) {
-		err = PTR_ERR(ve);
-		goto out_spin;
-	}
-
-	for (n = 0; n < nsibling; n++)
-		st_engine_heartbeat_disable(siblings[n]);
-
-	rq = igt_spinner_create_request(&spin, ve, MI_ARB_CHECK);
-	if (IS_ERR(rq)) {
-		err = PTR_ERR(rq);
-		goto out_heartbeat;
-	}
-	i915_request_add(rq);
-
-	if (!igt_wait_for_spinner(&spin, rq)) {
-		intel_gt_set_wedged(gt);
-		err = -ETIME;
-		goto out_heartbeat;
-	}
-
-	engine = rq->engine;
-	GEM_BUG_ON(engine == ve->engine);
-	se = intel_engine_get_scheduler(engine);
-
-	/* Take ownership of the reset and tasklet */
-	local_bh_disable();
-	if (test_and_set_bit(I915_RESET_ENGINE + engine->id,
-			     &gt->reset.flags)) {
-		local_bh_enable();
-		intel_gt_set_wedged(gt);
-		err = -EBUSY;
-		goto out_heartbeat;
-	}
-	tasklet_disable(&se->tasklet);
-
-	se->tasklet.callback(&se->tasklet);
-	GEM_BUG_ON(execlists_active(&engine->execlists) != rq);
-
-	/* Fake a preemption event; failed of course */
-	spin_lock_irq(&se->lock);
-	__i915_sched_rewind_requests(engine);
-	spin_unlock_irq(&se->lock);
-	GEM_BUG_ON(rq->engine != engine);
-
-	/* Reset the engine while keeping our active request on hold */
-	i915_sched_suspend_request(engine, rq);
-	GEM_BUG_ON(!i915_request_on_hold(rq));
-
-	__intel_engine_reset_bh(engine, NULL);
-	GEM_BUG_ON(rq->fence.error != -EIO);
-
-	/* Release our grasp on the engine, letting CS flow again */
-	tasklet_enable(&se->tasklet);
-	clear_and_wake_up_bit(I915_RESET_ENGINE + engine->id, &gt->reset.flags);
-	local_bh_enable();
-
-	/* Check that we do not resubmit the held request */
-	i915_request_get(rq);
-	if (!i915_request_wait(rq, 0, HZ / 5)) {
-		pr_err("%s: on hold request completed!\n",
-		       engine->name);
-		intel_gt_set_wedged(gt);
-		err = -EIO;
-		goto out_rq;
-	}
-	GEM_BUG_ON(!i915_request_on_hold(rq));
-
-	/* But is resubmitted on release */
-	i915_sched_resume_request(engine, rq);
-	if (i915_request_wait(rq, 0, HZ / 5) < 0) {
-		pr_err("%s: held request did not complete!\n",
-		       engine->name);
-		intel_gt_set_wedged(gt);
-		err = -ETIME;
-	}
-
-out_rq:
-	i915_request_put(rq);
-out_heartbeat:
-	for (n = 0; n < nsibling; n++)
-		st_engine_heartbeat_enable(siblings[n]);
-
-	intel_context_put(ve);
-out_spin:
-	igt_spinner_fini(&spin);
-	return err;
-}
-
-static int live_virtual_reset(void *arg)
-{
-	struct intel_gt *gt = arg;
-	struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1];
-	unsigned int class;
-
-	/*
-	 * Check that we handle a reset event within a virtual engine.
-	 * Only the physical engine is reset, but we have to check the flow
-	 * of the virtual requests around the reset, and make sure it is not
-	 * forgotten.
-	 */
-
-	if (intel_uc_uses_guc_submission(&gt->uc))
-		return 0;
-
-	if (!intel_has_reset_engine(gt))
-		return 0;
-
-	for (class = 0; class <= MAX_ENGINE_CLASS; class++) {
-		int nsibling, err;
-
-		nsibling = select_siblings(gt, class, siblings);
-		if (nsibling < 2)
-			continue;
-
-		err = reset_virtual_engine(gt, siblings, nsibling);
-		if (err)
-			return err;
-	}
-
-	return 0;
-}
-
 int intel_execlists_live_selftests(struct drm_i915_private *i915)
 {
 	static const struct i915_subtest tests[] = {
@@ -4711,7 +4566,6 @@ int intel_execlists_live_selftests(struct drm_i915_private *i915)
 		SUBTEST(live_virtual_preserved),
 		SUBTEST(live_virtual_slice),
 		SUBTEST(live_virtual_bond),
-		SUBTEST(live_virtual_reset),
 	};
 
 	if (i915->gt.submission_method != INTEL_SUBMISSION_ELSP)
diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
index ce828dc73402..aa12289ea14b 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -1290,6 +1290,7 @@ i915_request_await_request(struct i915_request *to, struct i915_request *from)
 
 	GEM_BUG_ON(to == from);
 	GEM_BUG_ON(to->timeline == from->timeline);
+	GEM_BUG_ON(to->context == from->context);
 
 	if (i915_request_completed(from)) {
 		i915_sw_fence_set_error_once(&to->submit, from->fence.error);
@@ -1436,6 +1437,15 @@ i915_request_await_object(struct i915_request *to,
 	return ret;
 }
 
+static bool in_order_submission(const struct i915_request *prev,
+				const struct i915_request *rq)
+{
+	if (likely(prev->context == rq->context))
+		return true;
+
+	return is_power_of_2(READ_ONCE(prev->engine)->mask | rq->engine->mask);
+}
+
 static struct i915_request *
 __i915_request_add_to_timeline(struct i915_request *rq)
 {
@@ -1475,7 +1485,7 @@ __i915_request_add_to_timeline(struct i915_request *rq)
 			   i915_seqno_passed(prev->fence.seqno,
 					     rq->fence.seqno));
 
-		if (is_power_of_2(READ_ONCE(prev->engine)->mask | rq->engine->mask))
+		if (in_order_submission(prev, rq))
 			i915_sw_fence_await_sw_fence(&rq->submit,
 						     &prev->submit,
 						     &rq->submitq);
diff --git a/drivers/gpu/drm/i915/i915_scheduler.c b/drivers/gpu/drm/i915/i915_scheduler.c
index 1d77ece46241..7483aeb66ea9 100644
--- a/drivers/gpu/drm/i915/i915_scheduler.c
+++ b/drivers/gpu/drm/i915/i915_scheduler.c
@@ -495,7 +495,9 @@ static void remove_from_priolist(struct i915_sched *se,
 	GEM_BUG_ON(!i915_request_in_priority_queue(rq));
 
 	__list_del_entry(&rq->sched.link);
-	if (tail)
+	if (!list)
+		INIT_LIST_HEAD(&rq->sched.link);
+	else if (tail)
 		list_add_tail(&rq->sched.link, list);
 	else
 		list_add(&rq->sched.link, list);
@@ -709,7 +711,7 @@ static u64 virtual_deadline(u64 kt, int priority)
 	return i915_sched_to_ticks(kt + prio_slice(priority));
 }
 
-u64 i915_scheduler_next_virtual_deadline(int priority)
+static u64 next_virtual_deadline(int priority)
 {
 	return virtual_deadline(ktime_get_mono_fast_ns(), priority);
 }
@@ -967,12 +969,11 @@ void i915_request_set_priority(struct i915_request *rq, int prio)
 	spin_unlock_irqrestore(&engine->sched.lock, flags);
 }
 
-void __i915_sched_defer_request(struct intel_engine_cs *engine,
-				struct i915_request *rq)
+static void __defer_request(struct i915_sched * const se,
+			    struct i915_request *rq,
+			    const u64 deadline)
 {
 	struct list_head *pos = &rq->sched.waiters_list;
-	struct i915_sched *se = intel_engine_get_scheduler(engine);
-	u64 deadline = rq_deadline(rq);
 	struct i915_request *rn;
 	LIST_HEAD(dfs);
 
@@ -981,9 +982,6 @@ void __i915_sched_defer_request(struct intel_engine_cs *engine,
 	lockdep_assert_held(&se->lock);
 	GEM_BUG_ON(!test_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags));
 
-	if (i915_sched_has_deadlines(se))
-		deadline = max(deadline, i915_scheduler_next_virtual_deadline(adj_prio(rq)));
-
 	/*
 	 * When we defer a request, we must maintain its order with respect
 	 * to those that are waiting upon it. So we traverse its chain of
@@ -1001,7 +999,7 @@ void __i915_sched_defer_request(struct intel_engine_cs *engine,
 				continue;
 
 			/* Leave semaphores spinning on the other engines */
-			if (w->engine != engine)
+			if (w->engine != rq->engine)
 				continue;
 
 			/* No waiter should start before its signaler */
@@ -1050,6 +1048,19 @@ void __i915_sched_defer_request(struct intel_engine_cs *engine,
 	}
 }
 
+void __i915_sched_defer_request(struct intel_engine_cs *engine,
+				struct i915_request *rq)
+{
+	struct i915_sched *se = intel_engine_get_scheduler(engine);
+	u64 deadline;
+
+	deadline = rq_deadline(rq);
+	if (i915_sched_has_deadlines(se))
+		deadline = max(deadline, next_virtual_deadline(adj_prio(rq)));
+
+	__defer_request(se, rq, deadline);
+}
+
 static bool queue_request(struct i915_sched *se, struct i915_request *rq)
 {
 	set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags);
@@ -1089,6 +1100,48 @@ static bool ancestor_on_hold(const struct i915_sched *se,
 	return unlikely(!list_empty(&se->hold)) && hold_request(rq);
 }
 
+bool __i915_request_requeue(struct i915_request *rq,
+			    struct intel_engine_cs *engine)
+{
+	struct i915_sched *se = intel_engine_get_scheduler(engine);
+
+	RQ_TRACE(rq, "transfer from %s to %s\n",
+		 rq->engine->name, engine->name);
+
+	lockdep_assert_held(&se->lock);
+	lockdep_assert_held(&i915_request_get_scheduler(rq)->lock);
+	GEM_BUG_ON(!test_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags));
+	GEM_BUG_ON(rq->engine == engine);
+
+	remove_from_priolist(i915_request_get_scheduler(rq), rq, NULL, false);
+	WRITE_ONCE(rq->engine, engine);
+
+	if (__i915_request_is_complete(rq)) {
+		clear_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags);
+		set_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags);
+		return false;
+	}
+
+	if (unlikely(ancestor_on_hold(se, rq))) {
+		RQ_TRACE(rq, "ancestor on hold\n");
+		clear_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags);
+		list_add_tail(&rq->sched.link, &se->hold);
+		i915_request_set_hold(rq);
+	} else {
+		u64 deadline = min(earliest_deadline(se, rq), rq_deadline(rq));
+
+		/* Maintain request ordering wrt to existing on target */
+		__i915_request_set_deadline(se, rq, deadline);
+		if (!list_empty(&rq->sched.waiters_list))
+			__defer_request(se, rq, deadline);
+
+		GEM_BUG_ON(rq_deadline(rq) == I915_DEADLINE_NEVER);
+	}
+
+	GEM_BUG_ON(list_empty(&rq->sched.link));
+	return true;
+}
+
 void i915_request_enqueue(struct i915_request *rq)
 {
 	struct i915_sched *se = i915_request_get_scheduler(rq);
@@ -1140,9 +1193,9 @@ __i915_sched_rewind_requests(struct intel_engine_cs *engine)
 
 		if (__i915_request_has_started(rq) &&
 		    i915_sched_has_deadlines(se)) {
-			u64 deadline =
-				i915_scheduler_next_virtual_deadline(rq_prio(rq));
-			rq->sched.deadline = min(rq_deadline(rq), deadline);
+			rq->sched.deadline =
+				min(rq_deadline(rq),
+				    next_virtual_deadline(rq_prio(rq)));
 		}
 		GEM_BUG_ON(rq_deadline(rq) == I915_DEADLINE_NEVER);
 
diff --git a/drivers/gpu/drm/i915/i915_scheduler.h b/drivers/gpu/drm/i915/i915_scheduler.h
index 14714e56ad80..522dfc5eb249 100644
--- a/drivers/gpu/drm/i915/i915_scheduler.h
+++ b/drivers/gpu/drm/i915/i915_scheduler.h
@@ -54,9 +54,9 @@ void i915_request_set_deadline(struct i915_request *request, u64 deadline);
 
 void i915_request_update_deadline(struct i915_request *request);
 
-u64 i915_scheduler_next_virtual_deadline(int priority);
-
 void i915_request_enqueue(struct i915_request *request);
+bool __i915_request_requeue(struct i915_request *rq,
+			    struct intel_engine_cs *engine);
 
 struct i915_request *
 __i915_sched_rewind_requests(struct intel_engine_cs *engine);
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 54+ messages in thread

* [Intel-gfx] [PATCH 14/31] drm/i915: Move saturated workload detection back to the context
  2021-02-08 10:52 [Intel-gfx] [PATCH 01/31] drm/i915/gt: Ratelimit heartbeat completion probing Chris Wilson
                   ` (11 preceding siblings ...)
  2021-02-08 10:52 ` [Intel-gfx] [PATCH 13/31] drm/i915/gt: Support virtual engine queues Chris Wilson
@ 2021-02-08 10:52 ` Chris Wilson
  2021-02-08 10:52 ` [Intel-gfx] [PATCH 15/31] drm/i915: Bump default timeslicing quantum to 5ms Chris Wilson
                   ` (20 subsequent siblings)
  33 siblings, 0 replies; 54+ messages in thread
From: Chris Wilson @ 2021-02-08 10:52 UTC (permalink / raw)
  To: intel-gfx; +Cc: Chris Wilson

When we introduced the saturated workload detection to tell us to back
off from semaphore usage [semaphores have a noticeable impact on
contended bus cycles with the CPU for some heavy workloads], we first
introduced it as a per-context tracker. This allows individual contexts
to try and optimise their own usage, but we found that with the local
tracking and the no-semaphore boosting, the first context to disable
semaphores got a massive priority boost and so would starve the rest and
all new contexts (as they started with semaphores enabled and lower
priority). Hence we moved the saturated workload detection to the
engine, and a consequence had to disable semaphores on virtual engines.

Now that we do not have semaphore priority boosting, and try to fairly
schedule irrespective of semaphore usage, we can move the tracking back
to the context and virtual engines can now utilise the faster inter-engine
synchronisation. If we see that any context fairs to use the semaphore,
because the system is oversubscribed and was busy doing something else
instead of spinning on the semaphore, we disable further usage of
semaphores with that context until it idles again. This should restrict
the semaphores to lightly utilised system where the latency between
requests is more noticeable, and curtail the bus-contention from checking
for signaled semaphores.

References: 44d89409a12e ("drm/i915: Make the semaphore saturation mask global")
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/gt/intel_context.c           |  3 +++
 drivers/gpu/drm/i915/gt/intel_context_types.h     |  2 ++
 drivers/gpu/drm/i915/gt/intel_engine_pm.c         |  2 --
 drivers/gpu/drm/i915/gt/intel_engine_types.h      |  2 --
 .../gpu/drm/i915/gt/intel_execlists_submission.c  | 15 ---------------
 drivers/gpu/drm/i915/i915_request.c               |  6 +++---
 6 files changed, 8 insertions(+), 22 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_context.c b/drivers/gpu/drm/i915/gt/intel_context.c
index daf537d1e415..57b6bde2b736 100644
--- a/drivers/gpu/drm/i915/gt/intel_context.c
+++ b/drivers/gpu/drm/i915/gt/intel_context.c
@@ -344,6 +344,9 @@ static int __intel_context_active(struct i915_active *active)
 {
 	struct intel_context *ce = container_of(active, typeof(*ce), active);
 
+	CE_TRACE(ce, "active\n");
+	ce->saturated = 0;
+
 	intel_context_get(ce);
 
 	/* everything should already be activated by intel_context_pre_pin() */
diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h b/drivers/gpu/drm/i915/gt/intel_context_types.h
index 0ea18c9e2aca..d1a35c3055a7 100644
--- a/drivers/gpu/drm/i915/gt/intel_context_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_context_types.h
@@ -109,6 +109,8 @@ struct intel_context {
 	} lrc;
 	u32 tag; /* cookie passed to HW to track this context on submission */
 
+	intel_engine_mask_t saturated; /* submitting semaphores too late? */
+
 	/** stats: Context GPU engine busyness tracking. */
 	struct intel_context_stats {
 		u64 active;
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pm.c b/drivers/gpu/drm/i915/gt/intel_engine_pm.c
index ef5064ea54e5..44948abe4bf8 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_pm.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_pm.c
@@ -253,8 +253,6 @@ static int __engine_park(struct intel_wakeref *wf)
 	struct intel_engine_cs *engine =
 		container_of(wf, typeof(*engine), wakeref);
 
-	engine->saturated = 0;
-
 	/*
 	 * If one and only one request is completed between pm events,
 	 * we know that we are inside the kernel context and it is
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h
index d1024e8717e1..416bb07c4ab7 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h
@@ -303,8 +303,6 @@ struct intel_engine_cs {
 
 	struct intel_context *kernel_context; /* pinned */
 
-	intel_engine_mask_t saturated; /* submitting semaphores too late? */
-
 	struct {
 		struct delayed_work work;
 		struct i915_request *systole;
diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
index f91a126e0d94..083204baedf9 100644
--- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
+++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
@@ -3375,21 +3375,6 @@ intel_execlists_create_virtual(struct intel_engine_cs **siblings,
 	ve->base.instance = I915_ENGINE_CLASS_INVALID_VIRTUAL;
 	ve->base.uabi_instance = I915_ENGINE_CLASS_INVALID_VIRTUAL;
 
-	/*
-	 * The decision on whether to submit a request using semaphores
-	 * depends on the saturated state of the engine. We only compute
-	 * this during HW submission of the request, and we need for this
-	 * state to be globally applied to all requests being submitted
-	 * to this engine. Virtual engines encompass more than one physical
-	 * engine and so we cannot accurately tell in advance if one of those
-	 * engines is already saturated and so cannot afford to use a semaphore
-	 * and be pessimized in priority for doing so -- if we are the only
-	 * context using semaphores after all other clients have stopped, we
-	 * will be starved on the saturated system. Such a global switch for
-	 * semaphores is less than ideal, but alas is the current compromise.
-	 */
-	ve->base.saturated = ALL_ENGINES;
-
 	snprintf(ve->base.name, sizeof(ve->base.name), "virtual");
 
 	intel_engine_init_execlists(&ve->base);
diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
index aa12289ea14b..352083889b97 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -516,7 +516,7 @@ bool __i915_request_submit(struct i915_request *request)
 	 */
 	if (request->sched.semaphores &&
 	    i915_sw_fence_signaled(&request->semaphore))
-		engine->saturated |= request->sched.semaphores;
+		request->context->saturated |= request->sched.semaphores;
 
 	engine->emit_fini_breadcrumb(request,
 				     request->ring->vaddr + request->postfix);
@@ -977,7 +977,7 @@ already_busywaiting(struct i915_request *rq)
 	 *
 	 * See the are-we-too-late? check in __i915_request_submit().
 	 */
-	return rq->sched.semaphores | READ_ONCE(rq->engine->saturated);
+	return rq->sched.semaphores | READ_ONCE(rq->context->saturated);
 }
 
 static int
@@ -1071,7 +1071,7 @@ emit_semaphore_wait(struct i915_request *to,
 	if (__emit_semaphore_wait(to, from, from->fence.seqno))
 		goto await_fence;
 
-	to->sched.semaphores |= mask;
+	to->sched.semaphores |= mask & ~to->engine->mask;
 	wait = &to->semaphore;
 
 await_fence:
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 54+ messages in thread

* [Intel-gfx] [PATCH 15/31] drm/i915: Bump default timeslicing quantum to 5ms
  2021-02-08 10:52 [Intel-gfx] [PATCH 01/31] drm/i915/gt: Ratelimit heartbeat completion probing Chris Wilson
                   ` (12 preceding siblings ...)
  2021-02-08 10:52 ` [Intel-gfx] [PATCH 14/31] drm/i915: Move saturated workload detection back to the context Chris Wilson
@ 2021-02-08 10:52 ` Chris Wilson
  2021-02-08 10:52 ` [Intel-gfx] [PATCH 16/31] drm/i915/gt: Delay taking irqoff for execlists submission Chris Wilson
                   ` (19 subsequent siblings)
  33 siblings, 0 replies; 54+ messages in thread
From: Chris Wilson @ 2021-02-08 10:52 UTC (permalink / raw)
  To: intel-gfx; +Cc: Chris Wilson

Primarily to smooth over differences with the guc backend that struggles
with smaller quanta, bump the default timeslicing to 5ms from 1ms.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/Kconfig.profile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/Kconfig.profile b/drivers/gpu/drm/i915/Kconfig.profile
index f1d009906f71..72ed001d238d 100644
--- a/drivers/gpu/drm/i915/Kconfig.profile
+++ b/drivers/gpu/drm/i915/Kconfig.profile
@@ -152,7 +152,7 @@ config DRM_I915_STOP_TIMEOUT
 
 config DRM_I915_TIMESLICE_DURATION
 	int "Scheduling quantum for userspace batches (ms, jiffy granularity)"
-	default 1 # milliseconds
+	default 5 # milliseconds
 	help
 	  When two user batches of equal priority are executing, we will
 	  alternate execution of each batch to ensure forward progress of
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 54+ messages in thread

* [Intel-gfx] [PATCH 16/31] drm/i915/gt: Delay taking irqoff for execlists submission
  2021-02-08 10:52 [Intel-gfx] [PATCH 01/31] drm/i915/gt: Ratelimit heartbeat completion probing Chris Wilson
                   ` (13 preceding siblings ...)
  2021-02-08 10:52 ` [Intel-gfx] [PATCH 15/31] drm/i915: Bump default timeslicing quantum to 5ms Chris Wilson
@ 2021-02-08 10:52 ` Chris Wilson
  2021-02-08 10:52 ` [Intel-gfx] [PATCH 17/31] drm/i915/gt: Convert the legacy ring submission to use the scheduling interface Chris Wilson
                   ` (18 subsequent siblings)
  33 siblings, 0 replies; 54+ messages in thread
From: Chris Wilson @ 2021-02-08 10:52 UTC (permalink / raw)
  To: intel-gfx; +Cc: Chris Wilson

Before we take the irqsafe spinlock to dequeue requests and submit them
to HW, first do the check whether we need to take any action (i.e.
whether the HW is ready for some work, or if we need to preempt the
currently executing context) without taking the lock. We will then
likely skip taking the spinlock, and so reduce contention.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 .../drm/i915/gt/intel_execlists_submission.c  | 88 ++++++++-----------
 1 file changed, 39 insertions(+), 49 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
index 083204baedf9..b7d28d09c9c1 100644
--- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
+++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
@@ -1016,24 +1016,6 @@ static void virtual_xfer_context(struct virtual_engine *ve,
 	}
 }
 
-static void defer_active(struct intel_engine_cs *engine)
-{
-	struct i915_request *rq;
-
-	rq = __i915_sched_rewind_requests(engine);
-	if (!rq)
-		return;
-
-	/*
-	 * We want to move the interrupted request to the back of
-	 * the round-robin list (i.e. its priority level), but
-	 * in doing so, we must then move all requests that were in
-	 * flight and were waiting for the interrupted request to
-	 * be run after it again.
-	 */
-	__i915_sched_defer_request(engine, rq);
-}
-
 static bool
 timeslice_yield(const struct intel_engine_execlists *el,
 		const struct i915_request *rq)
@@ -1315,8 +1297,6 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
 	 * and context switches) submission.
 	 */
 
-	spin_lock(&se->lock);
-
 	/*
 	 * If the queue is higher priority than the last
 	 * request in the currently active context, submit afresh.
@@ -1339,24 +1319,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
 				     rq_deadline(last),
 				     rq_prio(last));
 			record_preemption(execlists);
-
-			/*
-			 * Don't let the RING_HEAD advance past the breadcrumb
-			 * as we unwind (and until we resubmit) so that we do
-			 * not accidentally tell it to go backwards.
-			 */
-			ring_set_paused(engine, 1);
-
-			/*
-			 * Note that we have not stopped the GPU at this point,
-			 * so we are unwinding the incomplete requests as they
-			 * remain inflight and so by the time we do complete
-			 * the preemption, some of the unwound requests may
-			 * complete!
-			 */
-			__i915_sched_rewind_requests(engine);
-
-			last = NULL;
+			last = (void *)1;
 		} else if (timeslice_expired(engine, last)) {
 			ENGINE_TRACE(engine,
 				     "expired:%s last=%llx:%llu, deadline=%llu, now=%llu, yield?=%s\n",
@@ -1383,8 +1346,6 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
 			 * same context again, grant it a full timeslice.
 			 */
 			cancel_timer(&execlists->timer);
-			ring_set_paused(engine, 1);
-			defer_active(engine);
 
 			/*
 			 * Unlike for preemption, if we rewind and continue
@@ -1399,7 +1360,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
 			 * normal save/restore will preserve state and allow
 			 * us to later continue executing the same request.
 			 */
-			last = NULL;
+			last = (void *)3;
 		} else {
 			/*
 			 * Otherwise if we already have a request pending
@@ -1415,12 +1376,46 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
 				 * Even if ELSP[1] is occupied and not worthy
 				 * of timeslices, our queue might be.
 				 */
-				spin_unlock(&se->lock);
 				return;
 			}
 		}
 	}
 
+	local_irq_disable(); /* irq remains off until after ELSP write */
+	spin_lock(&se->lock);
+
+	if ((unsigned long)last & 1) {
+		bool defer = (unsigned long)last & 2;
+
+		/*
+		 * Don't let the RING_HEAD advance past the breadcrumb
+		 * as we unwind (and until we resubmit) so that we do
+		 * not accidentally tell it to go backwards.
+		 */
+		ring_set_paused(engine, (unsigned long)last);
+
+		/*
+		 * Note that we have not stopped the GPU at this point,
+		 * so we are unwinding the incomplete requests as they
+		 * remain inflight and so by the time we do complete
+		 * the preemption, some of the unwound requests may
+		 * complete!
+		 */
+		last = __i915_sched_rewind_requests(engine);
+
+		/*
+		 * We want to move the interrupted request to the back of
+		 * the round-robin list (i.e. its priority level), but
+		 * in doing so, we must then move all requests that were in
+		 * flight and were waiting for the interrupted request to
+		 * be run after it again.
+		 */
+		if (last && defer)
+			__i915_sched_defer_request(engine, last);
+
+		last = NULL;
+	}
+
 	if (!RB_EMPTY_ROOT(&execlists->virtual.rb_root))
 		virtual_requeue(engine, last);
 
@@ -1529,13 +1524,8 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
 			i915_request_put(*port);
 		*execlists->pending = NULL;
 	}
-}
 
-static void execlists_dequeue_irq(struct intel_engine_cs *engine)
-{
-	local_irq_disable(); /* Suspend interrupts across request submission */
-	execlists_dequeue(engine);
-	local_irq_enable(); /* flush irq_work (e.g. breadcrumb enabling) */
+	local_irq_enable();
 }
 
 static void clear_ports(struct i915_request **ports, int count)
@@ -2187,7 +2177,7 @@ static void execlists_submission_tasklet(struct tasklet_struct *t)
 		execlists_reset(engine);
 
 	if (!engine->execlists.pending[0]) {
-		execlists_dequeue_irq(engine);
+		execlists_dequeue(engine);
 		start_timeslice(engine);
 	}
 
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 54+ messages in thread

* [Intel-gfx] [PATCH 17/31] drm/i915/gt: Convert the legacy ring submission to use the scheduling interface
  2021-02-08 10:52 [Intel-gfx] [PATCH 01/31] drm/i915/gt: Ratelimit heartbeat completion probing Chris Wilson
                   ` (14 preceding siblings ...)
  2021-02-08 10:52 ` [Intel-gfx] [PATCH 16/31] drm/i915/gt: Delay taking irqoff for execlists submission Chris Wilson
@ 2021-02-08 10:52 ` Chris Wilson
  2021-02-08 10:52 ` [Intel-gfx] [PATCH 18/31] drm/i915/gt: Wrap intel_timeline.has_initial_breadcrumb Chris Wilson
                   ` (17 subsequent siblings)
  33 siblings, 0 replies; 54+ messages in thread
From: Chris Wilson @ 2021-02-08 10:52 UTC (permalink / raw)
  To: intel-gfx; +Cc: Chris Wilson

Adapt the old legacy ring submission to use a passthrough tasklet so
that we can plug it into the scheduler.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/gt/intel_engine_types.h  |   1 +
 .../gpu/drm/i915/gt/intel_ring_submission.c   | 167 +++++++++++-------
 2 files changed, 107 insertions(+), 61 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h
index 416bb07c4ab7..9f14631b8132 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h
@@ -431,6 +431,7 @@ struct intel_engine_cs {
 #define I915_ENGINE_IS_VIRTUAL       BIT(4)
 #define I915_ENGINE_HAS_RELATIVE_MMIO BIT(5)
 #define I915_ENGINE_REQUIRES_CMD_PARSER BIT(6)
+#define I915_ENGINE_NEEDS_WA_TAIL_WRITE BIT(7)
 	unsigned int flags;
 
 	/*
diff --git a/drivers/gpu/drm/i915/gt/intel_ring_submission.c b/drivers/gpu/drm/i915/gt/intel_ring_submission.c
index 282089d64789..47f05e7a4e8c 100644
--- a/drivers/gpu/drm/i915/gt/intel_ring_submission.c
+++ b/drivers/gpu/drm/i915/gt/intel_ring_submission.c
@@ -308,6 +308,8 @@ static void reset_prepare(struct intel_engine_cs *engine)
 	 * FIXME: Wa for more modern gens needs to be validated
 	 */
 	ENGINE_TRACE(engine, "\n");
+
+	i915_sched_disable_tasklet(intel_engine_get_scheduler(engine));
 	intel_engine_stop_cs(engine);
 
 	/* G45 ring initialization often fails to reset head to zero */
@@ -394,6 +396,7 @@ static void reset_rewind(struct intel_engine_cs *engine, bool stalled)
 
 static void reset_finish(struct intel_engine_cs *engine)
 {
+	i915_sched_enable_tasklet(intel_engine_get_scheduler(engine));
 }
 
 static void reset_cancel(struct intel_engine_cs *engine)
@@ -402,22 +405,12 @@ static void reset_cancel(struct intel_engine_cs *engine)
 	unsigned long flags;
 
 	spin_lock_irqsave(&se->lock, flags);
-
 	__i915_sched_cancel_queue(se);
-
 	spin_unlock_irqrestore(&se->lock, flags);
+
 	intel_engine_signal_breadcrumbs(engine);
 }
 
-static void i9xx_submit_request(struct i915_request *request)
-{
-	i915_request_submit(request);
-	wmb(); /* paranoid flush writes out of the WCB before mmio */
-
-	ENGINE_WRITE(request->engine, RING_TAIL,
-		     intel_ring_set_tail(request->ring, request->tail));
-}
-
 static void __ring_context_fini(struct intel_context *ce)
 {
 	i915_vma_put(ce->state);
@@ -929,52 +922,9 @@ static int ring_request_alloc(struct i915_request *request)
 	return 0;
 }
 
-static void gen6_bsd_submit_request(struct i915_request *request)
+static void set_default_submission(struct intel_engine_cs *engine)
 {
-	struct intel_uncore *uncore = request->engine->uncore;
-
-	intel_uncore_forcewake_get(uncore, FORCEWAKE_ALL);
-
-       /* Every tail move must follow the sequence below */
-
-	/* Disable notification that the ring is IDLE. The GT
-	 * will then assume that it is busy and bring it out of rc6.
-	 */
-	intel_uncore_write_fw(uncore, GEN6_BSD_SLEEP_PSMI_CONTROL,
-			      _MASKED_BIT_ENABLE(GEN6_BSD_SLEEP_MSG_DISABLE));
-
-	/* Clear the context id. Here be magic! */
-	intel_uncore_write64_fw(uncore, GEN6_BSD_RNCID, 0x0);
-
-	/* Wait for the ring not to be idle, i.e. for it to wake up. */
-	if (__intel_wait_for_register_fw(uncore,
-					 GEN6_BSD_SLEEP_PSMI_CONTROL,
-					 GEN6_BSD_SLEEP_INDICATOR,
-					 0,
-					 1000, 0, NULL))
-		drm_err(&uncore->i915->drm,
-			"timed out waiting for the BSD ring to wake up\n");
-
-	/* Now that the ring is fully powered up, update the tail */
-	i9xx_submit_request(request);
-
-	/* Let the ring send IDLE messages to the GT again,
-	 * and so let it sleep to conserve power when idle.
-	 */
-	intel_uncore_write_fw(uncore, GEN6_BSD_SLEEP_PSMI_CONTROL,
-			      _MASKED_BIT_DISABLE(GEN6_BSD_SLEEP_MSG_DISABLE));
-
-	intel_uncore_forcewake_put(uncore, FORCEWAKE_ALL);
-}
-
-static void i9xx_set_default_submission(struct intel_engine_cs *engine)
-{
-	engine->sched.submit_request = i9xx_submit_request;
-}
-
-static void gen6_bsd_set_default_submission(struct intel_engine_cs *engine)
-{
-	engine->sched.submit_request = gen6_bsd_submit_request;
+	engine->sched.submit_request = i915_request_enqueue;
 }
 
 static void ring_release(struct intel_engine_cs *engine)
@@ -1053,8 +1003,6 @@ static void setup_common(struct intel_engine_cs *engine)
 	if (IS_GEN(i915, 5))
 		engine->emit_fini_breadcrumb = gen5_emit_breadcrumb;
 
-	engine->set_default_submission = i9xx_set_default_submission;
-
 	if (INTEL_GEN(i915) >= 6)
 		engine->emit_bb_start = gen6_emit_bb_start;
 	else if (INTEL_GEN(i915) >= 4)
@@ -1063,6 +1011,8 @@ static void setup_common(struct intel_engine_cs *engine)
 		engine->emit_bb_start = i830_emit_bb_start;
 	else
 		engine->emit_bb_start = gen3_emit_bb_start;
+
+	engine->set_default_submission = set_default_submission;
 }
 
 static void setup_rcs(struct intel_engine_cs *engine)
@@ -1099,9 +1049,8 @@ static void setup_vcs(struct intel_engine_cs *engine)
 	struct drm_i915_private *i915 = engine->i915;
 
 	if (INTEL_GEN(i915) >= 6) {
-		/* gen6 bsd needs a special wa for tail updates */
-		if (IS_GEN(i915, 6))
-			engine->set_default_submission = gen6_bsd_set_default_submission;
+		if (IS_GEN(engine->i915, 6))
+			engine->flags |= I915_ENGINE_NEEDS_WA_TAIL_WRITE;
 		engine->emit_flush = gen6_emit_flush_vcs;
 		engine->irq_enable_mask = GT_BSD_USER_INTERRUPT;
 
@@ -1203,6 +1152,98 @@ static int gen7_ctx_switch_bb_init(struct intel_engine_cs *engine)
 	return err;
 }
 
+static void __write_tail(struct intel_engine_cs *engine,
+			 struct i915_request *rq)
+{
+	ENGINE_WRITE(engine, RING_TAIL,
+		     intel_ring_set_tail(rq->ring, rq->tail));
+}
+
+static void wa_write_tail(struct intel_engine_cs *engine,
+			  struct i915_request *rq)
+{
+	struct intel_uncore *uncore = engine->uncore;
+
+	intel_uncore_forcewake_get(uncore, FORCEWAKE_ALL);
+
+       /* Every tail move must follow the sequence below */
+
+	/* Disable notification that the ring is IDLE. The GT
+	 * will then assume that it is busy and bring it out of rc6.
+	 */
+	intel_uncore_write_fw(uncore, GEN6_BSD_SLEEP_PSMI_CONTROL,
+			      _MASKED_BIT_ENABLE(GEN6_BSD_SLEEP_MSG_DISABLE));
+
+	/* Clear the context id. Here be magic! */
+	intel_uncore_write64_fw(uncore, GEN6_BSD_RNCID, 0x0);
+
+	/* Wait for the ring not to be idle, i.e. for it to wake up. */
+	if (__intel_wait_for_register_fw(uncore,
+					 GEN6_BSD_SLEEP_PSMI_CONTROL,
+					 GEN6_BSD_SLEEP_INDICATOR,
+					 0,
+					 1000, 0, NULL))
+		drm_err(&uncore->i915->drm,
+			"timed out waiting for the BSD ring to wake up\n");
+
+	/* Now that the ring is fully powered up, update the tail */
+	__write_tail(engine, rq);
+
+	/* Let the ring send IDLE messages to the GT again,
+	 * and so let it sleep to conserve power when idle.
+	 */
+	intel_uncore_write_fw(uncore, GEN6_BSD_SLEEP_PSMI_CONTROL,
+			      _MASKED_BIT_DISABLE(GEN6_BSD_SLEEP_MSG_DISABLE));
+
+	intel_uncore_forcewake_put(uncore, FORCEWAKE_ALL);
+}
+
+static void write_tail(struct intel_engine_cs *engine,
+		       struct i915_request *rq)
+{
+	wmb(); /* paranoid flush writes out of the WCB before mmio */
+
+	if (engine->flags & I915_ENGINE_NEEDS_WA_TAIL_WRITE)
+		wa_write_tail(engine, rq);
+	else
+		__write_tail(engine, rq);
+}
+
+static void passthrough_tasklet(struct tasklet_struct *t)
+{
+	struct i915_sched *se = from_tasklet(se, t, tasklet);
+	struct intel_engine_cs *engine =
+		container_of(se, typeof(*engine), sched);
+	struct i915_request *last = NULL;
+	struct i915_request *rq, *rn;
+	struct i915_priolist *pl;
+
+	if (i915_sched_is_idle(se))
+		return;
+
+	local_irq_disable();
+
+	spin_lock(&se->lock);
+	i915_sched_dequeue(se, pl, rq, rn) {
+		__i915_request_submit(rq);
+		last = rq;
+	}
+	spin_unlock(&se->lock);
+
+	if (last)
+		write_tail(engine, last);
+
+	local_irq_enable();
+}
+
+static int init_sched(struct intel_engine_cs *engine)
+{
+	tasklet_setup(&engine->sched.tasklet, passthrough_tasklet);
+	i915_sched_select_mode(&engine->sched, I915_SCHED_MODE_NONE);
+
+	return 0;
+}
+
 int intel_ring_submission_setup(struct intel_engine_cs *engine)
 {
 	struct intel_timeline *timeline;
@@ -1229,6 +1270,10 @@ int intel_ring_submission_setup(struct intel_engine_cs *engine)
 		return -ENODEV;
 	}
 
+	err = init_sched(engine);
+	if (err)
+		goto err;
+
 	timeline = intel_timeline_create_from_engine(engine,
 						     I915_GEM_HWS_SEQNO_ADDR);
 	if (IS_ERR(timeline)) {
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 54+ messages in thread

* [Intel-gfx] [PATCH 18/31] drm/i915/gt: Wrap intel_timeline.has_initial_breadcrumb
  2021-02-08 10:52 [Intel-gfx] [PATCH 01/31] drm/i915/gt: Ratelimit heartbeat completion probing Chris Wilson
                   ` (15 preceding siblings ...)
  2021-02-08 10:52 ` [Intel-gfx] [PATCH 17/31] drm/i915/gt: Convert the legacy ring submission to use the scheduling interface Chris Wilson
@ 2021-02-08 10:52 ` Chris Wilson
  2021-02-08 10:52 ` [Intel-gfx] [PATCH 19/31] drm/i915/gt: Track timeline GGTT offset separately from subpage offset Chris Wilson
                   ` (16 subsequent siblings)
  33 siblings, 0 replies; 54+ messages in thread
From: Chris Wilson @ 2021-02-08 10:52 UTC (permalink / raw)
  To: intel-gfx; +Cc: Chris Wilson

In preparation for removing the has_initial_breadcrumb field, add a
helper function for the existing callers.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Mika Kuoppala <mika.kuoppala@linux.intel.com>
---
 drivers/gpu/drm/i915/gt/gen8_engine_cs.c        | 2 +-
 drivers/gpu/drm/i915/gt/intel_ring_submission.c | 4 ++--
 drivers/gpu/drm/i915/gt/intel_timeline.c        | 6 +++---
 drivers/gpu/drm/i915/gt/intel_timeline.h        | 6 ++++++
 drivers/gpu/drm/i915/gt/selftest_timeline.c     | 5 +++--
 5 files changed, 15 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/gen8_engine_cs.c b/drivers/gpu/drm/i915/gt/gen8_engine_cs.c
index 8791e03ebe61..d8763146e054 100644
--- a/drivers/gpu/drm/i915/gt/gen8_engine_cs.c
+++ b/drivers/gpu/drm/i915/gt/gen8_engine_cs.c
@@ -354,7 +354,7 @@ int gen8_emit_init_breadcrumb(struct i915_request *rq)
 	u32 *cs;
 
 	GEM_BUG_ON(i915_request_has_initial_breadcrumb(rq));
-	if (!i915_request_timeline(rq)->has_initial_breadcrumb)
+	if (!intel_timeline_has_initial_breadcrumb(i915_request_timeline(rq)))
 		return 0;
 
 	cs = intel_ring_begin(rq, 6);
diff --git a/drivers/gpu/drm/i915/gt/intel_ring_submission.c b/drivers/gpu/drm/i915/gt/intel_ring_submission.c
index 47f05e7a4e8c..00e85a3b228a 100644
--- a/drivers/gpu/drm/i915/gt/intel_ring_submission.c
+++ b/drivers/gpu/drm/i915/gt/intel_ring_submission.c
@@ -900,7 +900,7 @@ static int ring_request_alloc(struct i915_request *request)
 	int ret;
 
 	GEM_BUG_ON(!intel_context_is_pinned(request->context));
-	GEM_BUG_ON(i915_request_timeline(request)->has_initial_breadcrumb);
+	GEM_BUG_ON(intel_timeline_has_initial_breadcrumb(i915_request_timeline(request)));
 
 	/*
 	 * Flush enough space to reduce the likelihood of waiting after
@@ -1280,7 +1280,7 @@ int intel_ring_submission_setup(struct intel_engine_cs *engine)
 		err = PTR_ERR(timeline);
 		goto err;
 	}
-	GEM_BUG_ON(timeline->has_initial_breadcrumb);
+	GEM_BUG_ON(intel_timeline_has_initial_breadcrumb(timeline));
 
 	err = intel_timeline_pin(timeline, NULL);
 	if (err)
diff --git a/drivers/gpu/drm/i915/gt/intel_timeline.c b/drivers/gpu/drm/i915/gt/intel_timeline.c
index 491b8df174c2..1505dffbaba9 100644
--- a/drivers/gpu/drm/i915/gt/intel_timeline.c
+++ b/drivers/gpu/drm/i915/gt/intel_timeline.c
@@ -444,14 +444,14 @@ void intel_timeline_exit(struct intel_timeline *tl)
 static u32 timeline_advance(struct intel_timeline *tl)
 {
 	GEM_BUG_ON(!atomic_read(&tl->pin_count));
-	GEM_BUG_ON(tl->seqno & tl->has_initial_breadcrumb);
+	GEM_BUG_ON(tl->seqno & intel_timeline_has_initial_breadcrumb(tl));
 
-	return tl->seqno += 1 + tl->has_initial_breadcrumb;
+	return tl->seqno += 1 + intel_timeline_has_initial_breadcrumb(tl);
 }
 
 static void timeline_rollback(struct intel_timeline *tl)
 {
-	tl->seqno -= 1 + tl->has_initial_breadcrumb;
+	tl->seqno -= 1 + intel_timeline_has_initial_breadcrumb(tl);
 }
 
 static noinline int
diff --git a/drivers/gpu/drm/i915/gt/intel_timeline.h b/drivers/gpu/drm/i915/gt/intel_timeline.h
index b1f81d947f8d..7d6218b55df6 100644
--- a/drivers/gpu/drm/i915/gt/intel_timeline.h
+++ b/drivers/gpu/drm/i915/gt/intel_timeline.h
@@ -42,6 +42,12 @@ static inline void intel_timeline_put(struct intel_timeline *timeline)
 	kref_put(&timeline->kref, __intel_timeline_free);
 }
 
+static inline bool
+intel_timeline_has_initial_breadcrumb(const struct intel_timeline *tl)
+{
+	return tl->has_initial_breadcrumb;
+}
+
 static inline int __intel_timeline_sync_set(struct intel_timeline *tl,
 					    u64 context, u32 seqno)
 {
diff --git a/drivers/gpu/drm/i915/gt/selftest_timeline.c b/drivers/gpu/drm/i915/gt/selftest_timeline.c
index d283dce5b4ac..562a450d2832 100644
--- a/drivers/gpu/drm/i915/gt/selftest_timeline.c
+++ b/drivers/gpu/drm/i915/gt/selftest_timeline.c
@@ -665,7 +665,7 @@ static int live_hwsp_wrap(void *arg)
 	if (IS_ERR(tl))
 		return PTR_ERR(tl);
 
-	if (!tl->has_initial_breadcrumb || !tl->hwsp_cacheline)
+	if (!intel_timeline_has_initial_breadcrumb(tl) || !tl->hwsp_cacheline)
 		goto out_free;
 
 	err = intel_timeline_pin(tl, NULL);
@@ -1234,7 +1234,8 @@ static int live_hwsp_rollover_user(void *arg)
 			goto out;
 
 		tl = ce->timeline;
-		if (!tl->has_initial_breadcrumb || !tl->hwsp_cacheline)
+		if (!intel_timeline_has_initial_breadcrumb(tl) ||
+		    !tl->hwsp_cacheline)
 			goto out;
 
 		timeline_rollback(tl);
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 54+ messages in thread

* [Intel-gfx] [PATCH 19/31] drm/i915/gt: Track timeline GGTT offset separately from subpage offset
  2021-02-08 10:52 [Intel-gfx] [PATCH 01/31] drm/i915/gt: Ratelimit heartbeat completion probing Chris Wilson
                   ` (16 preceding siblings ...)
  2021-02-08 10:52 ` [Intel-gfx] [PATCH 18/31] drm/i915/gt: Wrap intel_timeline.has_initial_breadcrumb Chris Wilson
@ 2021-02-08 10:52 ` Chris Wilson
  2021-02-08 10:52 ` [Intel-gfx] [PATCH 20/31] drm/i915/gt: Add timeline "mode" Chris Wilson
                   ` (15 subsequent siblings)
  33 siblings, 0 replies; 54+ messages in thread
From: Chris Wilson @ 2021-02-08 10:52 UTC (permalink / raw)
  To: intel-gfx; +Cc: Chris Wilson

Currently we know that the timeline status page is at most a page in
size, and so we can preserve the lower 12bits of the offset when
relocating the status page in the GGTT. If we want to use a larger
object, such as the context state, we may not necessarily use a position
within the first page and so need more than 12b.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Matthew Brost <matthew.brost@intel.com>
---
 drivers/gpu/drm/i915/gt/gen6_engine_cs.c        |  4 ++--
 drivers/gpu/drm/i915/gt/gen8_engine_cs.c        |  2 +-
 .../drm/i915/gt/intel_execlists_submission.c    |  2 +-
 drivers/gpu/drm/i915/gt/intel_timeline.c        | 17 +++++++----------
 drivers/gpu/drm/i915/gt/intel_timeline_types.h  |  1 +
 drivers/gpu/drm/i915/gt/selftest_engine_cs.c    |  2 +-
 drivers/gpu/drm/i915/gt/selftest_rc6.c          |  2 +-
 drivers/gpu/drm/i915/gt/selftest_timeline.c     | 16 ++++++++--------
 drivers/gpu/drm/i915/i915_scheduler.c           |  2 +-
 9 files changed, 23 insertions(+), 25 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/gen6_engine_cs.c b/drivers/gpu/drm/i915/gt/gen6_engine_cs.c
index ce38d1bcaba3..2f59dd3bdc18 100644
--- a/drivers/gpu/drm/i915/gt/gen6_engine_cs.c
+++ b/drivers/gpu/drm/i915/gt/gen6_engine_cs.c
@@ -161,7 +161,7 @@ u32 *gen6_emit_breadcrumb_rcs(struct i915_request *rq, u32 *cs)
 		 PIPE_CONTROL_DC_FLUSH_ENABLE |
 		 PIPE_CONTROL_QW_WRITE |
 		 PIPE_CONTROL_CS_STALL);
-	*cs++ = i915_request_active_timeline(rq)->hwsp_offset |
+	*cs++ = i915_request_active_timeline(rq)->ggtt_offset |
 		PIPE_CONTROL_GLOBAL_GTT;
 	*cs++ = rq->fence.seqno;
 
@@ -359,7 +359,7 @@ u32 *gen7_emit_breadcrumb_rcs(struct i915_request *rq, u32 *cs)
 		 PIPE_CONTROL_QW_WRITE |
 		 PIPE_CONTROL_GLOBAL_GTT_IVB |
 		 PIPE_CONTROL_CS_STALL);
-	*cs++ = i915_request_active_timeline(rq)->hwsp_offset;
+	*cs++ = i915_request_active_timeline(rq)->ggtt_offset;
 	*cs++ = rq->fence.seqno;
 
 	*cs++ = MI_USER_INTERRUPT;
diff --git a/drivers/gpu/drm/i915/gt/gen8_engine_cs.c b/drivers/gpu/drm/i915/gt/gen8_engine_cs.c
index d8763146e054..187f1dad1054 100644
--- a/drivers/gpu/drm/i915/gt/gen8_engine_cs.c
+++ b/drivers/gpu/drm/i915/gt/gen8_engine_cs.c
@@ -346,7 +346,7 @@ static u32 hwsp_offset(const struct i915_request *rq)
 	if (cl)
 		return cl->ggtt_offset;
 
-	return rcu_dereference_protected(rq->timeline, 1)->hwsp_offset;
+	return rcu_dereference_protected(rq->timeline, 1)->ggtt_offset;
 }
 
 int gen8_emit_init_breadcrumb(struct i915_request *rq)
diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
index b7d28d09c9c1..86093d0b56ed 100644
--- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
+++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
@@ -3554,7 +3554,7 @@ static int print_ring(char *buf, int sz, struct i915_request *rq)
 		len = scnprintf(buf, sz,
 				"ring:{start:%08x, hwsp:%08x, seqno:%08x, runtime:%llums}, ",
 				i915_ggtt_offset(rq->ring->vma),
-				tl ? tl->hwsp_offset : 0,
+				tl ? tl->ggtt_offset : 0,
 				hwsp_seqno(rq),
 				DIV_ROUND_CLOSEST_ULL(intel_context_get_total_runtime_ns(rq->context),
 						      1000 * 1000));
diff --git a/drivers/gpu/drm/i915/gt/intel_timeline.c b/drivers/gpu/drm/i915/gt/intel_timeline.c
index 1505dffbaba9..b684322c879c 100644
--- a/drivers/gpu/drm/i915/gt/intel_timeline.c
+++ b/drivers/gpu/drm/i915/gt/intel_timeline.c
@@ -354,13 +354,11 @@ int intel_timeline_pin(struct intel_timeline *tl, struct i915_gem_ww_ctx *ww)
 	if (err)
 		return err;
 
-	tl->hwsp_offset =
-		i915_ggtt_offset(tl->hwsp_ggtt) +
-		offset_in_page(tl->hwsp_offset);
+	tl->ggtt_offset = i915_ggtt_offset(tl->hwsp_ggtt) + tl->hwsp_offset;
 	GT_TRACE(tl->gt, "timeline:%llx using HWSP offset:%x\n",
-		 tl->fence_context, tl->hwsp_offset);
+		 tl->fence_context, tl->ggtt_offset);
 
-	cacheline_acquire(tl->hwsp_cacheline, tl->hwsp_offset);
+	cacheline_acquire(tl->hwsp_cacheline, tl->ggtt_offset);
 	if (atomic_fetch_inc(&tl->pin_count)) {
 		cacheline_release(tl->hwsp_cacheline);
 		__i915_vma_unpin(tl->hwsp_ggtt);
@@ -528,14 +526,13 @@ __intel_timeline_get_seqno(struct intel_timeline *tl,
 
 	vaddr = page_mask_bits(cl->vaddr);
 	tl->hwsp_offset = cacheline * CACHELINE_BYTES;
-	tl->hwsp_seqno =
-		memset(vaddr + tl->hwsp_offset, 0, CACHELINE_BYTES);
+	tl->hwsp_seqno = memset(vaddr + tl->hwsp_offset, 0, CACHELINE_BYTES);
 
-	tl->hwsp_offset += i915_ggtt_offset(vma);
+	tl->ggtt_offset = i915_ggtt_offset(vma) + tl->hwsp_offset;
 	GT_TRACE(tl->gt, "timeline:%llx using HWSP offset:%x\n",
-		 tl->fence_context, tl->hwsp_offset);
+		 tl->fence_context, tl->ggtt_offset);
 
-	cacheline_acquire(cl, tl->hwsp_offset);
+	cacheline_acquire(cl, tl->ggtt_offset);
 	tl->hwsp_cacheline = cl;
 
 	*seqno = timeline_advance(tl);
diff --git a/drivers/gpu/drm/i915/gt/intel_timeline_types.h b/drivers/gpu/drm/i915/gt/intel_timeline_types.h
index 9f677c9b7d06..c5995cc290a0 100644
--- a/drivers/gpu/drm/i915/gt/intel_timeline_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_timeline_types.h
@@ -47,6 +47,7 @@ struct intel_timeline {
 	const u32 *hwsp_seqno;
 	struct i915_vma *hwsp_ggtt;
 	u32 hwsp_offset;
+	u32 ggtt_offset;
 
 	struct intel_timeline_cacheline *hwsp_cacheline;
 
diff --git a/drivers/gpu/drm/i915/gt/selftest_engine_cs.c b/drivers/gpu/drm/i915/gt/selftest_engine_cs.c
index 84d883de30ee..e33ec4e3b35d 100644
--- a/drivers/gpu/drm/i915/gt/selftest_engine_cs.c
+++ b/drivers/gpu/drm/i915/gt/selftest_engine_cs.c
@@ -53,7 +53,7 @@ static int write_timestamp(struct i915_request *rq, int slot)
 		cmd++;
 	*cs++ = cmd;
 	*cs++ = i915_mmio_reg_offset(RING_TIMESTAMP(rq->engine->mmio_base));
-	*cs++ = i915_request_timeline(rq)->hwsp_offset + slot * sizeof(u32);
+	*cs++ = i915_request_timeline(rq)->ggtt_offset + slot * sizeof(u32);
 	*cs++ = 0;
 
 	intel_ring_advance(rq, cs);
diff --git a/drivers/gpu/drm/i915/gt/selftest_rc6.c b/drivers/gpu/drm/i915/gt/selftest_rc6.c
index f097e420ac45..285cead849dd 100644
--- a/drivers/gpu/drm/i915/gt/selftest_rc6.c
+++ b/drivers/gpu/drm/i915/gt/selftest_rc6.c
@@ -137,7 +137,7 @@ static const u32 *__live_rc6_ctx(struct intel_context *ce)
 
 	*cs++ = cmd;
 	*cs++ = i915_mmio_reg_offset(GEN8_RC6_CTX_INFO);
-	*cs++ = ce->timeline->hwsp_offset + 8;
+	*cs++ = ce->timeline->ggtt_offset + 8;
 	*cs++ = 0;
 	intel_ring_advance(rq, cs);
 
diff --git a/drivers/gpu/drm/i915/gt/selftest_timeline.c b/drivers/gpu/drm/i915/gt/selftest_timeline.c
index 562a450d2832..6b412228a6fd 100644
--- a/drivers/gpu/drm/i915/gt/selftest_timeline.c
+++ b/drivers/gpu/drm/i915/gt/selftest_timeline.c
@@ -468,7 +468,7 @@ tl_write(struct intel_timeline *tl, struct intel_engine_cs *engine, u32 value)
 
 	i915_request_get(rq);
 
-	err = emit_ggtt_store_dw(rq, tl->hwsp_offset, value);
+	err = emit_ggtt_store_dw(rq, tl->ggtt_offset, value);
 	i915_request_add(rq);
 	if (err) {
 		i915_request_put(rq);
@@ -564,7 +564,7 @@ static int live_hwsp_engine(void *arg)
 
 		if (!err && READ_ONCE(*tl->hwsp_seqno) != n) {
 			GEM_TRACE_ERR("Invalid seqno:%lu stored in timeline %llu @ %x, found 0x%x\n",
-				      n, tl->fence_context, tl->hwsp_offset, *tl->hwsp_seqno);
+				      n, tl->fence_context, tl->ggtt_offset, *tl->hwsp_seqno);
 			GEM_TRACE_DUMP();
 			err = -EINVAL;
 		}
@@ -636,7 +636,7 @@ static int live_hwsp_alternate(void *arg)
 
 		if (!err && READ_ONCE(*tl->hwsp_seqno) != n) {
 			GEM_TRACE_ERR("Invalid seqno:%lu stored in timeline %llu @ %x, found 0x%x\n",
-				      n, tl->fence_context, tl->hwsp_offset, *tl->hwsp_seqno);
+				      n, tl->fence_context, tl->ggtt_offset, *tl->hwsp_seqno);
 			GEM_TRACE_DUMP();
 			err = -EINVAL;
 		}
@@ -696,9 +696,9 @@ static int live_hwsp_wrap(void *arg)
 			goto out;
 		}
 		pr_debug("seqno[0]:%08x, hwsp_offset:%08x\n",
-			 seqno[0], tl->hwsp_offset);
+			 seqno[0], tl->ggtt_offset);
 
-		err = emit_ggtt_store_dw(rq, tl->hwsp_offset, seqno[0]);
+		err = emit_ggtt_store_dw(rq, tl->ggtt_offset, seqno[0]);
 		if (err) {
 			i915_request_add(rq);
 			goto out;
@@ -713,9 +713,9 @@ static int live_hwsp_wrap(void *arg)
 			goto out;
 		}
 		pr_debug("seqno[1]:%08x, hwsp_offset:%08x\n",
-			 seqno[1], tl->hwsp_offset);
+			 seqno[1], tl->ggtt_offset);
 
-		err = emit_ggtt_store_dw(rq, tl->hwsp_offset, seqno[1]);
+		err = emit_ggtt_store_dw(rq, tl->ggtt_offset, seqno[1]);
 		if (err) {
 			i915_request_add(rq);
 			goto out;
@@ -1343,7 +1343,7 @@ static int live_hwsp_recycle(void *arg)
 			if (READ_ONCE(*tl->hwsp_seqno) != count) {
 				GEM_TRACE_ERR("Invalid seqno:%lu stored in timeline %llu @ %x found 0x%x\n",
 					      count, tl->fence_context,
-					      tl->hwsp_offset, *tl->hwsp_seqno);
+					      tl->ggtt_offset, *tl->hwsp_seqno);
 				GEM_TRACE_DUMP();
 				err = -EINVAL;
 			}
diff --git a/drivers/gpu/drm/i915/i915_scheduler.c b/drivers/gpu/drm/i915/i915_scheduler.c
index 7483aeb66ea9..ad4e15c9b8f9 100644
--- a/drivers/gpu/drm/i915/i915_scheduler.c
+++ b/drivers/gpu/drm/i915/i915_scheduler.c
@@ -1727,7 +1727,7 @@ void i915_sched_show(struct drm_printer *m,
 		drm_printf(m, "\tring->space:  0x%08x\n",
 			   rq->ring->space);
 		drm_printf(m, "\tring->hwsp:   0x%08x\n",
-			   i915_request_active_timeline(rq)->hwsp_offset);
+			   i915_request_active_timeline(rq)->ggtt_offset);
 
 		print_request_ring(m, rq);
 
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 54+ messages in thread

* [Intel-gfx] [PATCH 20/31] drm/i915/gt: Add timeline "mode"
  2021-02-08 10:52 [Intel-gfx] [PATCH 01/31] drm/i915/gt: Ratelimit heartbeat completion probing Chris Wilson
                   ` (17 preceding siblings ...)
  2021-02-08 10:52 ` [Intel-gfx] [PATCH 19/31] drm/i915/gt: Track timeline GGTT offset separately from subpage offset Chris Wilson
@ 2021-02-08 10:52 ` Chris Wilson
  2021-02-08 10:52 ` [Intel-gfx] [PATCH 21/31] drm/i915/gt: Use indices for writing into relative timelines Chris Wilson
                   ` (14 subsequent siblings)
  33 siblings, 0 replies; 54+ messages in thread
From: Chris Wilson @ 2021-02-08 10:52 UTC (permalink / raw)
  To: intel-gfx; +Cc: Chris Wilson

Explicitly differentiate between the absolute and relative timelines,
and the global HWSP and ppHWSP relative offsets. When using a timeline
that is relative to a known status page, we can replace the absolute
addressing in the commands with indexed variants.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Matthew Brost <matthew.brost@intel.com>
---
 drivers/gpu/drm/i915/gt/intel_timeline.c      | 21 ++++++++++++++++---
 drivers/gpu/drm/i915/gt/intel_timeline.h      |  2 +-
 .../gpu/drm/i915/gt/intel_timeline_types.h    | 10 +++++++--
 3 files changed, 27 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_timeline.c b/drivers/gpu/drm/i915/gt/intel_timeline.c
index b684322c879c..69052495c64a 100644
--- a/drivers/gpu/drm/i915/gt/intel_timeline.c
+++ b/drivers/gpu/drm/i915/gt/intel_timeline.c
@@ -226,7 +226,6 @@ static int intel_timeline_init(struct intel_timeline *timeline,
 
 	timeline->gt = gt;
 
-	timeline->has_initial_breadcrumb = !hwsp;
 	timeline->hwsp_cacheline = NULL;
 
 	if (!hwsp) {
@@ -243,13 +242,29 @@ static int intel_timeline_init(struct intel_timeline *timeline,
 			return PTR_ERR(cl);
 		}
 
+		timeline->mode = INTEL_TIMELINE_ABSOLUTE;
 		timeline->hwsp_cacheline = cl;
 		timeline->hwsp_offset = cacheline * CACHELINE_BYTES;
 
 		vaddr = page_mask_bits(cl->vaddr);
 	} else {
-		timeline->hwsp_offset = offset;
-		vaddr = i915_gem_object_pin_map(hwsp->obj, I915_MAP_WB);
+		int preferred;
+
+		if (offset & INTEL_TIMELINE_RELATIVE_CONTEXT) {
+			timeline->mode = INTEL_TIMELINE_RELATIVE_CONTEXT;
+			timeline->hwsp_offset =
+				offset & ~INTEL_TIMELINE_RELATIVE_CONTEXT;
+			preferred = i915_coherent_map_type(gt->i915);
+		} else {
+			timeline->mode = INTEL_TIMELINE_RELATIVE_ENGINE;
+			timeline->hwsp_offset = offset;
+			preferred = I915_MAP_WB;
+		}
+
+		vaddr = i915_gem_object_pin_map(hwsp->obj,
+						preferred | I915_MAP_OVERRIDE);
+		if (IS_ERR(vaddr))
+			vaddr = i915_gem_object_pin_map(hwsp->obj, I915_MAP_WC);
 		if (IS_ERR(vaddr))
 			return PTR_ERR(vaddr);
 	}
diff --git a/drivers/gpu/drm/i915/gt/intel_timeline.h b/drivers/gpu/drm/i915/gt/intel_timeline.h
index 7d6218b55df6..e1d522329757 100644
--- a/drivers/gpu/drm/i915/gt/intel_timeline.h
+++ b/drivers/gpu/drm/i915/gt/intel_timeline.h
@@ -45,7 +45,7 @@ static inline void intel_timeline_put(struct intel_timeline *timeline)
 static inline bool
 intel_timeline_has_initial_breadcrumb(const struct intel_timeline *tl)
 {
-	return tl->has_initial_breadcrumb;
+	return tl->mode == INTEL_TIMELINE_ABSOLUTE;
 }
 
 static inline int __intel_timeline_sync_set(struct intel_timeline *tl,
diff --git a/drivers/gpu/drm/i915/gt/intel_timeline_types.h b/drivers/gpu/drm/i915/gt/intel_timeline_types.h
index c5995cc290a0..61938d103a13 100644
--- a/drivers/gpu/drm/i915/gt/intel_timeline_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_timeline_types.h
@@ -19,6 +19,12 @@ struct i915_syncmap;
 struct intel_gt;
 struct intel_timeline_hwsp;
 
+enum intel_timeline_mode {
+	INTEL_TIMELINE_ABSOLUTE = 0,
+	INTEL_TIMELINE_RELATIVE_CONTEXT = BIT(0),
+	INTEL_TIMELINE_RELATIVE_ENGINE  = BIT(1),
+};
+
 struct intel_timeline {
 	u64 fence_context;
 	u32 seqno;
@@ -44,6 +50,8 @@ struct intel_timeline {
 	atomic_t pin_count;
 	atomic_t active_count;
 
+	enum intel_timeline_mode mode;
+
 	const u32 *hwsp_seqno;
 	struct i915_vma *hwsp_ggtt;
 	u32 hwsp_offset;
@@ -51,8 +59,6 @@ struct intel_timeline {
 
 	struct intel_timeline_cacheline *hwsp_cacheline;
 
-	bool has_initial_breadcrumb;
-
 	/**
 	 * List of breadcrumbs associated with GPU requests currently
 	 * outstanding.
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 54+ messages in thread

* [Intel-gfx] [PATCH 21/31] drm/i915/gt: Use indices for writing into relative timelines
  2021-02-08 10:52 [Intel-gfx] [PATCH 01/31] drm/i915/gt: Ratelimit heartbeat completion probing Chris Wilson
                   ` (18 preceding siblings ...)
  2021-02-08 10:52 ` [Intel-gfx] [PATCH 20/31] drm/i915/gt: Add timeline "mode" Chris Wilson
@ 2021-02-08 10:52 ` Chris Wilson
  2021-02-08 10:52 ` [Intel-gfx] [PATCH 22/31] drm/i915/selftests: Exercise relative timeline modes Chris Wilson
                   ` (13 subsequent siblings)
  33 siblings, 0 replies; 54+ messages in thread
From: Chris Wilson @ 2021-02-08 10:52 UTC (permalink / raw)
  To: intel-gfx; +Cc: Chris Wilson

Relative timelines are relative to either the global or per-process
HWSP, and so we can replace the absolute addressing with store-index
variants for position invariance.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Matthew Brost <matthew.brost@intel.com>
---
 drivers/gpu/drm/i915/gt/gen8_engine_cs.c | 98 +++++++++++++++++-------
 drivers/gpu/drm/i915/gt/intel_timeline.h | 12 +++
 2 files changed, 82 insertions(+), 28 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/gen8_engine_cs.c b/drivers/gpu/drm/i915/gt/gen8_engine_cs.c
index 187f1dad1054..7fd843369b41 100644
--- a/drivers/gpu/drm/i915/gt/gen8_engine_cs.c
+++ b/drivers/gpu/drm/i915/gt/gen8_engine_cs.c
@@ -518,7 +518,19 @@ gen8_emit_fini_breadcrumb_tail(struct i915_request *rq, u32 *cs)
 
 static u32 *emit_xcs_breadcrumb(struct i915_request *rq, u32 *cs)
 {
-	return gen8_emit_ggtt_write(cs, rq->fence.seqno, hwsp_offset(rq), 0);
+	struct intel_timeline *tl = rcu_dereference_protected(rq->timeline, 1);
+	unsigned int flags = MI_FLUSH_DW_OP_STOREDW;
+	u32 offset = hwsp_offset(rq);
+
+	if (intel_timeline_is_relative(tl)) {
+		offset = offset_in_page(offset);
+		flags |= MI_FLUSH_DW_STORE_INDEX;
+	}
+	GEM_BUG_ON(offset & 7);
+	if (!intel_timeline_in_context(tl))
+		offset |= MI_FLUSH_DW_USE_GTT;
+
+	return __gen8_emit_flush_dw(cs, rq->fence.seqno, offset, flags);
 }
 
 u32 *gen8_emit_fini_breadcrumb_xcs(struct i915_request *rq, u32 *cs)
@@ -528,6 +540,18 @@ u32 *gen8_emit_fini_breadcrumb_xcs(struct i915_request *rq, u32 *cs)
 
 u32 *gen8_emit_fini_breadcrumb_rcs(struct i915_request *rq, u32 *cs)
 {
+	struct intel_timeline *tl = rcu_dereference_protected(rq->timeline, 1);
+	unsigned int flags = PIPE_CONTROL_FLUSH_ENABLE | PIPE_CONTROL_CS_STALL;
+	u32 offset = hwsp_offset(rq);
+
+	if (intel_timeline_is_relative(tl)) {
+		offset = offset_in_page(offset);
+		flags |= PIPE_CONTROL_STORE_DATA_INDEX;
+	}
+	GEM_BUG_ON(offset & 7);
+	if (!intel_timeline_in_context(tl))
+		flags |= PIPE_CONTROL_GLOBAL_GTT_IVB;
+
 	cs = gen8_emit_pipe_control(cs,
 				    PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH |
 				    PIPE_CONTROL_DEPTH_CACHE_FLUSH |
@@ -535,26 +559,33 @@ u32 *gen8_emit_fini_breadcrumb_rcs(struct i915_request *rq, u32 *cs)
 				    0);
 
 	/* XXX flush+write+CS_STALL all in one upsets gem_concurrent_blt:kbl */
-	cs = gen8_emit_ggtt_write_rcs(cs,
-				      rq->fence.seqno,
-				      hwsp_offset(rq),
-				      PIPE_CONTROL_FLUSH_ENABLE |
-				      PIPE_CONTROL_CS_STALL);
+	cs = __gen8_emit_write_rcs(cs, rq->fence.seqno, offset, 0, flags);
 
 	return gen8_emit_fini_breadcrumb_tail(rq, cs);
 }
 
 u32 *gen11_emit_fini_breadcrumb_rcs(struct i915_request *rq, u32 *cs)
 {
-	cs = gen8_emit_ggtt_write_rcs(cs,
-				      rq->fence.seqno,
-				      hwsp_offset(rq),
-				      PIPE_CONTROL_CS_STALL |
-				      PIPE_CONTROL_TILE_CACHE_FLUSH |
-				      PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH |
-				      PIPE_CONTROL_DEPTH_CACHE_FLUSH |
-				      PIPE_CONTROL_DC_FLUSH_ENABLE |
-				      PIPE_CONTROL_FLUSH_ENABLE);
+	struct intel_timeline *tl = rcu_dereference_protected(rq->timeline, 1);
+	u32 offset = hwsp_offset(rq);
+	unsigned int flags;
+
+	flags = (PIPE_CONTROL_CS_STALL |
+		 PIPE_CONTROL_TILE_CACHE_FLUSH |
+		 PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH |
+		 PIPE_CONTROL_DEPTH_CACHE_FLUSH |
+		 PIPE_CONTROL_DC_FLUSH_ENABLE |
+		 PIPE_CONTROL_FLUSH_ENABLE);
+
+	if (intel_timeline_is_relative(tl)) {
+		offset = offset_in_page(offset);
+		flags |= PIPE_CONTROL_STORE_DATA_INDEX;
+	}
+	GEM_BUG_ON(offset & 7);
+	if (!intel_timeline_in_context(tl))
+		flags |= PIPE_CONTROL_GLOBAL_GTT_IVB;
+
+	cs = __gen8_emit_write_rcs(cs, rq->fence.seqno, offset, 0, flags);
 
 	return gen8_emit_fini_breadcrumb_tail(rq, cs);
 }
@@ -617,19 +648,30 @@ u32 *gen12_emit_fini_breadcrumb_xcs(struct i915_request *rq, u32 *cs)
 
 u32 *gen12_emit_fini_breadcrumb_rcs(struct i915_request *rq, u32 *cs)
 {
-	cs = gen12_emit_ggtt_write_rcs(cs,
-				       rq->fence.seqno,
-				       hwsp_offset(rq),
-				       PIPE_CONTROL0_HDC_PIPELINE_FLUSH,
-				       PIPE_CONTROL_CS_STALL |
-				       PIPE_CONTROL_TILE_CACHE_FLUSH |
-				       PIPE_CONTROL_FLUSH_L3 |
-				       PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH |
-				       PIPE_CONTROL_DEPTH_CACHE_FLUSH |
-				       /* Wa_1409600907:tgl */
-				       PIPE_CONTROL_DEPTH_STALL |
-				       PIPE_CONTROL_DC_FLUSH_ENABLE |
-				       PIPE_CONTROL_FLUSH_ENABLE);
+	struct intel_timeline *tl = rcu_dereference_protected(rq->timeline, 1);
+	u32 offset = hwsp_offset(rq);
+	unsigned int flags;
+
+	flags = (PIPE_CONTROL_CS_STALL |
+		 PIPE_CONTROL_TILE_CACHE_FLUSH |
+		 PIPE_CONTROL_FLUSH_L3 |
+		 PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH |
+		 PIPE_CONTROL_DEPTH_CACHE_FLUSH |
+		 /* Wa_1409600907:tgl */
+		 PIPE_CONTROL_DEPTH_STALL |
+		 PIPE_CONTROL_DC_FLUSH_ENABLE |
+		 PIPE_CONTROL_FLUSH_ENABLE);
+
+	if (intel_timeline_is_relative(tl)) {
+		offset = offset_in_page(offset);
+		flags |= PIPE_CONTROL_STORE_DATA_INDEX;
+	}
+	GEM_BUG_ON(offset & 7);
+	if (!intel_timeline_in_context(tl))
+		flags |= PIPE_CONTROL_GLOBAL_GTT_IVB;
+
+	cs = __gen8_emit_write_rcs(cs, rq->fence.seqno, offset,
+				   PIPE_CONTROL0_HDC_PIPELINE_FLUSH, flags);
 
 	return gen12_emit_fini_breadcrumb_tail(rq, cs);
 }
diff --git a/drivers/gpu/drm/i915/gt/intel_timeline.h b/drivers/gpu/drm/i915/gt/intel_timeline.h
index e1d522329757..9859a77a6f54 100644
--- a/drivers/gpu/drm/i915/gt/intel_timeline.h
+++ b/drivers/gpu/drm/i915/gt/intel_timeline.h
@@ -48,6 +48,18 @@ intel_timeline_has_initial_breadcrumb(const struct intel_timeline *tl)
 	return tl->mode == INTEL_TIMELINE_ABSOLUTE;
 }
 
+static inline bool
+intel_timeline_is_relative(const struct intel_timeline *tl)
+{
+	return tl->mode != INTEL_TIMELINE_ABSOLUTE;
+}
+
+static inline bool
+intel_timeline_in_context(const struct intel_timeline *tl)
+{
+	return tl->mode == INTEL_TIMELINE_RELATIVE_CONTEXT;
+}
+
 static inline int __intel_timeline_sync_set(struct intel_timeline *tl,
 					    u64 context, u32 seqno)
 {
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 54+ messages in thread

* [Intel-gfx] [PATCH 22/31] drm/i915/selftests: Exercise relative timeline modes
  2021-02-08 10:52 [Intel-gfx] [PATCH 01/31] drm/i915/gt: Ratelimit heartbeat completion probing Chris Wilson
                   ` (19 preceding siblings ...)
  2021-02-08 10:52 ` [Intel-gfx] [PATCH 21/31] drm/i915/gt: Use indices for writing into relative timelines Chris Wilson
@ 2021-02-08 10:52 ` Chris Wilson
  2021-02-08 10:52 ` [Intel-gfx] [PATCH 23/31] drm/i915/gt: Use ppHWSP for unshared non-semaphore related timelines Chris Wilson
                   ` (12 subsequent siblings)
  33 siblings, 0 replies; 54+ messages in thread
From: Chris Wilson @ 2021-02-08 10:52 UTC (permalink / raw)
  To: intel-gfx; +Cc: Chris Wilson

A quick test to verify that the backend accepts each type of timeline
and can use them to track and control request emission.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/gt/selftest_timeline.c | 105 ++++++++++++++++++++
 1 file changed, 105 insertions(+)

diff --git a/drivers/gpu/drm/i915/gt/selftest_timeline.c b/drivers/gpu/drm/i915/gt/selftest_timeline.c
index 6b412228a6fd..dcc03522b277 100644
--- a/drivers/gpu/drm/i915/gt/selftest_timeline.c
+++ b/drivers/gpu/drm/i915/gt/selftest_timeline.c
@@ -1364,9 +1364,114 @@ static int live_hwsp_recycle(void *arg)
 	return err;
 }
 
+static int live_hwsp_relative(void *arg)
+{
+	struct intel_gt *gt = arg;
+	struct intel_engine_cs *engine;
+	enum intel_engine_id id;
+
+	/*
+	 * Check backend support for different timeline modes.
+	 */
+
+	for_each_engine(engine, gt, id) {
+		enum intel_timeline_mode mode;
+
+		if (!intel_engine_has_scheduler(engine))
+			continue;
+
+		for (mode = INTEL_TIMELINE_ABSOLUTE;
+		     mode <= INTEL_TIMELINE_RELATIVE_ENGINE;
+		     mode++) {
+			struct intel_timeline *tl;
+			struct i915_request *rq;
+			struct intel_context *ce;
+			const char *msg;
+			int err;
+
+			if (mode == INTEL_TIMELINE_RELATIVE_CONTEXT &&
+			    !HAS_EXECLISTS(gt->i915))
+				continue;
+
+			ce = intel_context_create(engine);
+			if (IS_ERR(ce))
+				return PTR_ERR(ce);
+
+			err = intel_context_alloc_state(ce);
+			if (err) {
+				intel_context_put(ce);
+				return err;
+			}
+
+			switch (mode) {
+			case INTEL_TIMELINE_ABSOLUTE:
+				tl = intel_timeline_create(gt);
+				msg = "local";
+				break;
+
+			case INTEL_TIMELINE_RELATIVE_CONTEXT:
+				tl = __intel_timeline_create(gt,
+							     ce->state,
+							     INTEL_TIMELINE_RELATIVE_CONTEXT |
+							     0x400);
+				msg = "ppHWSP";
+				break;
+
+			case INTEL_TIMELINE_RELATIVE_ENGINE:
+				tl = __intel_timeline_create(gt,
+							     engine->status_page.vma,
+							     0x400);
+				msg = "HWSP";
+				break;
+			default:
+				continue;
+			}
+			if (IS_ERR(tl)) {
+				intel_context_put(ce);
+				return PTR_ERR(tl);
+			}
+
+			pr_info("Testing %s timeline on %s\n",
+				msg, engine->name);
+
+			intel_timeline_put(ce->timeline);
+			ce->timeline = tl;
+
+			err = intel_timeline_pin(tl, NULL);
+			if (err) {
+				intel_context_put(ce);
+				return err;
+			}
+			tl->seqno = 0xc0000000;
+			WRITE_ONCE(*(u32 *)tl->hwsp_seqno, tl->seqno);
+			intel_timeline_unpin(tl);
+
+			rq = intel_context_create_request(ce);
+			intel_context_put(ce);
+			if (IS_ERR(rq))
+				return PTR_ERR(rq);
+
+			GEM_BUG_ON(rcu_access_pointer(rq->timeline) != tl);
+
+			i915_request_get(rq);
+			i915_request_add(rq);
+
+			if (i915_request_wait(rq, 0, HZ / 5) < 0) {
+				i915_request_put(rq);
+				return -EIO;
+			}
+
+			i915_request_put(rq);
+		}
+	}
+
+	return 0;
+}
+
 int intel_timeline_live_selftests(struct drm_i915_private *i915)
 {
 	static const struct i915_subtest tests[] = {
+		SUBTEST(live_hwsp_relative),
 		SUBTEST(live_hwsp_recycle),
 		SUBTEST(live_hwsp_engine),
 		SUBTEST(live_hwsp_alternate),
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 54+ messages in thread

* [Intel-gfx] [PATCH 23/31] drm/i915/gt: Use ppHWSP for unshared non-semaphore related timelines
  2021-02-08 10:52 [Intel-gfx] [PATCH 01/31] drm/i915/gt: Ratelimit heartbeat completion probing Chris Wilson
                   ` (20 preceding siblings ...)
  2021-02-08 10:52 ` [Intel-gfx] [PATCH 22/31] drm/i915/selftests: Exercise relative timeline modes Chris Wilson
@ 2021-02-08 10:52 ` Chris Wilson
  2021-02-08 10:52 ` [Intel-gfx] [PATCH 24/31] Restore "drm/i915: drop engine_pin/unpin_breadcrumbs_irq" Chris Wilson
                   ` (11 subsequent siblings)
  33 siblings, 0 replies; 54+ messages in thread
From: Chris Wilson @ 2021-02-08 10:52 UTC (permalink / raw)
  To: intel-gfx; +Cc: Chris Wilson

When we are not using semaphores with a context/engine, we can simply
reuse the same seqno location across wraps, but we still require each
timeline to have its own address. For LRC submission, each context is
prefixed by a per-process HWSP, which provides us with a unique location
for each context-local timeline. A shared timeline that is common to
multiple contexts will continue to use a separate page.

This enables us to create position invariant contexts should we feel the
need to relocate them.

Initially they are automatically used by Broadwell/Braswell as they do
not require independent timelines.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Reviewed-by: Matthew Brost <matthew.brost@intel.com>
---
 drivers/gpu/drm/i915/gt/intel_lrc.c | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
index 8508b8d701c1..f9acd9e63066 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -833,6 +833,14 @@ pinned_timeline(struct intel_context *ce, struct intel_engine_cs *engine)
 	return intel_timeline_create_from_engine(engine, page_unmask_bits(tl));
 }
 
+static struct intel_timeline *
+pphwsp_timeline(struct intel_context *ce, struct i915_vma *state)
+{
+	return __intel_timeline_create(ce->engine->gt, state,
+				       I915_GEM_HWS_SEQNO_ADDR |
+				       INTEL_TIMELINE_RELATIVE_CONTEXT);
+}
+
 int lrc_alloc(struct intel_context *ce, struct intel_engine_cs *engine)
 {
 	struct intel_ring *ring;
@@ -860,8 +868,10 @@ int lrc_alloc(struct intel_context *ce, struct intel_engine_cs *engine)
 		 */
 		if (unlikely(ce->timeline))
 			tl = pinned_timeline(ce, engine);
-		else
+		else if (intel_engine_has_semaphores(engine))
 			tl = intel_timeline_create(engine->gt);
+		else
+			tl = pphwsp_timeline(ce, vma);
 		if (IS_ERR(tl)) {
 			err = PTR_ERR(tl);
 			goto err_ring;
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 54+ messages in thread

* [Intel-gfx] [PATCH 24/31] Restore "drm/i915: drop engine_pin/unpin_breadcrumbs_irq"
  2021-02-08 10:52 [Intel-gfx] [PATCH 01/31] drm/i915/gt: Ratelimit heartbeat completion probing Chris Wilson
                   ` (21 preceding siblings ...)
  2021-02-08 10:52 ` [Intel-gfx] [PATCH 23/31] drm/i915/gt: Use ppHWSP for unshared non-semaphore related timelines Chris Wilson
@ 2021-02-08 10:52 ` Chris Wilson
  2021-02-08 10:52 ` [Intel-gfx] [PATCH 25/31] drm/i915/gt: Support creation of 'internal' rings Chris Wilson
                   ` (10 subsequent siblings)
  33 siblings, 0 replies; 54+ messages in thread
From: Chris Wilson @ 2021-02-08 10:52 UTC (permalink / raw)
  To: intel-gfx; +Cc: Chris Wilson

This was removed in commit 478ffad6d690 ("drm/i915: drop
engine_pin/unpin_breadcrumbs_irq") as the last user had been removed,
but now there is a promise of a new user in the next patch.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Mika Kuoppala <mika.kuoppala@linux.intel.com>
---
 drivers/gpu/drm/i915/gt/intel_breadcrumbs.c | 24 +++++++++++++++++++++
 drivers/gpu/drm/i915/gt/intel_breadcrumbs.h |  3 +++
 2 files changed, 27 insertions(+)

diff --git a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c
index 38cc42783dfb..9e67810c7767 100644
--- a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c
+++ b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c
@@ -310,6 +310,30 @@ void intel_breadcrumbs_reset(struct intel_breadcrumbs *b)
 	spin_unlock_irqrestore(&b->irq_lock, flags);
 }
 
+void intel_breadcrumbs_pin_irq(struct intel_breadcrumbs *b)
+{
+	if (GEM_DEBUG_WARN_ON(!b->irq_engine))
+		return;
+
+	spin_lock_irq(&b->irq_lock);
+	if (!b->irq_enabled++)
+		irq_enable(b->irq_engine);
+	GEM_BUG_ON(!b->irq_enabled); /* no overflow! */
+	spin_unlock_irq(&b->irq_lock);
+}
+
+void intel_breadcrumbs_unpin_irq(struct intel_breadcrumbs *b)
+{
+	if (GEM_DEBUG_WARN_ON(!b->irq_engine))
+		return;
+
+	spin_lock_irq(&b->irq_lock);
+	GEM_BUG_ON(!b->irq_enabled); /* no underflow! */
+	if (!--b->irq_enabled)
+		irq_disable(b->irq_engine);
+	spin_unlock_irq(&b->irq_lock);
+}
+
 void __intel_breadcrumbs_park(struct intel_breadcrumbs *b)
 {
 	if (!READ_ONCE(b->irq_armed))
diff --git a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.h b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.h
index 3ce5ce270b04..c2bb3a79ca9f 100644
--- a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.h
+++ b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.h
@@ -19,6 +19,9 @@ struct intel_breadcrumbs *
 intel_breadcrumbs_create(struct intel_engine_cs *irq_engine);
 void intel_breadcrumbs_free(struct intel_breadcrumbs *b);
 
+void intel_breadcrumbs_pin_irq(struct intel_breadcrumbs *b);
+void intel_breadcrumbs_unpin_irq(struct intel_breadcrumbs *b);
+
 void intel_breadcrumbs_reset(struct intel_breadcrumbs *b);
 void __intel_breadcrumbs_park(struct intel_breadcrumbs *b);
 
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 54+ messages in thread

* [Intel-gfx] [PATCH 25/31] drm/i915/gt: Support creation of 'internal' rings
  2021-02-08 10:52 [Intel-gfx] [PATCH 01/31] drm/i915/gt: Ratelimit heartbeat completion probing Chris Wilson
                   ` (22 preceding siblings ...)
  2021-02-08 10:52 ` [Intel-gfx] [PATCH 24/31] Restore "drm/i915: drop engine_pin/unpin_breadcrumbs_irq" Chris Wilson
@ 2021-02-08 10:52 ` Chris Wilson
  2021-02-08 10:52 ` [Intel-gfx] [PATCH 26/31] drm/i915/gt: Use client timeline address for seqno writes Chris Wilson
                   ` (9 subsequent siblings)
  33 siblings, 0 replies; 54+ messages in thread
From: Chris Wilson @ 2021-02-08 10:52 UTC (permalink / raw)
  To: intel-gfx; +Cc: Chris Wilson

To support legacy ring buffer scheduling, we want a virtual ringbuffer
for each client. These rings are purely for holding the requests as they
are being constructed on the CPU and never accessed by the GPU, so they
should not be bound into the GGTT, and we can use plain old WB mapped
pages.

As they are not bound, we need to nerf a few assumptions that a rq->ring
is in the GGTT.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/gt/intel_context.c       |  2 +-
 .../drm/i915/gt/intel_execlists_submission.c  |  2 +-
 drivers/gpu/drm/i915/gt/intel_ring.c          | 69 ++++++++++++-------
 drivers/gpu/drm/i915/gt/intel_ring.h          | 17 ++++-
 drivers/gpu/drm/i915/gt/intel_ring_types.h    |  2 +
 drivers/gpu/drm/i915/i915_scheduler.c         |  2 +-
 6 files changed, 65 insertions(+), 29 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_context.c b/drivers/gpu/drm/i915/gt/intel_context.c
index 57b6bde2b736..c7ab4ed92da4 100644
--- a/drivers/gpu/drm/i915/gt/intel_context.c
+++ b/drivers/gpu/drm/i915/gt/intel_context.c
@@ -258,7 +258,7 @@ int __intel_context_do_pin_ww(struct intel_context *ce,
 		}
 
 		CE_TRACE(ce, "pin ring:{start:%08x, head:%04x, tail:%04x}\n",
-			 i915_ggtt_offset(ce->ring->vma),
+			 intel_ring_address(ce->ring),
 			 ce->ring->head, ce->ring->tail);
 
 		handoff = true;
diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
index 86093d0b56ed..46cab93cb45e 100644
--- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
+++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
@@ -3553,7 +3553,7 @@ static int print_ring(char *buf, int sz, struct i915_request *rq)
 
 		len = scnprintf(buf, sz,
 				"ring:{start:%08x, hwsp:%08x, seqno:%08x, runtime:%llums}, ",
-				i915_ggtt_offset(rq->ring->vma),
+				intel_ring_address(rq->ring),
 				tl ? tl->ggtt_offset : 0,
 				hwsp_seqno(rq),
 				DIV_ROUND_CLOSEST_ULL(intel_context_get_total_runtime_ns(rq->context),
diff --git a/drivers/gpu/drm/i915/gt/intel_ring.c b/drivers/gpu/drm/i915/gt/intel_ring.c
index aee0a77c77e0..521972c297a9 100644
--- a/drivers/gpu/drm/i915/gt/intel_ring.c
+++ b/drivers/gpu/drm/i915/gt/intel_ring.c
@@ -32,33 +32,42 @@ void __intel_ring_pin(struct intel_ring *ring)
 int intel_ring_pin(struct intel_ring *ring, struct i915_gem_ww_ctx *ww)
 {
 	struct i915_vma *vma = ring->vma;
-	unsigned int flags;
 	void *addr;
 	int ret;
 
 	if (atomic_fetch_inc(&ring->pin_count))
 		return 0;
 
-	/* Ring wraparound at offset 0 sometimes hangs. No idea why. */
-	flags = PIN_OFFSET_BIAS | i915_ggtt_pin_bias(vma);
+	if (!intel_ring_is_internal(ring)) {
+		int type = i915_coherent_map_type(vma->vm->i915);
+		unsigned int pin;
 
-	if (i915_gem_object_is_stolen(vma->obj))
-		flags |= PIN_MAPPABLE;
-	else
-		flags |= PIN_HIGH;
+		/* Ring wraparound at offset 0 sometimes hangs. No idea why. */
+		pin |= PIN_OFFSET_BIAS | i915_ggtt_pin_bias(vma);
 
-	ret = i915_ggtt_pin(vma, ww, 0, flags);
-	if (unlikely(ret))
-		goto err_unpin;
+		if (i915_gem_object_is_stolen(vma->obj))
+			pin |= PIN_MAPPABLE;
+		else
+			pin |= PIN_HIGH;
 
-	if (i915_vma_is_map_and_fenceable(vma))
-		addr = (void __force *)i915_vma_pin_iomap(vma);
-	else
-		addr = i915_gem_object_pin_map(vma->obj,
-					       i915_coherent_map_type(vma->vm->i915));
-	if (IS_ERR(addr)) {
-		ret = PTR_ERR(addr);
-		goto err_ring;
+		ret = i915_ggtt_pin(vma, ww, 0, pin);
+		if (unlikely(ret))
+			goto err_unpin;
+
+		if (i915_vma_is_map_and_fenceable(vma))
+			addr = (void __force *)i915_vma_pin_iomap(vma);
+		else
+			addr = i915_gem_object_pin_map(vma->obj, type);
+		if (IS_ERR(addr)) {
+			ret = PTR_ERR(addr);
+			goto err_ring;
+		}
+	} else {
+		addr = i915_gem_object_pin_map(vma->obj, I915_MAP_WB);
+		if (IS_ERR(addr)) {
+			ret = PTR_ERR(addr);
+			goto err_ring;
+		}
 	}
 
 	i915_vma_make_unshrinkable(vma);
@@ -99,19 +108,24 @@ void intel_ring_unpin(struct intel_ring *ring)
 		i915_gem_object_unpin_map(vma->obj);
 
 	i915_vma_make_purgeable(vma);
-	i915_vma_unpin(vma);
+	if (!intel_ring_is_internal(ring))
+		i915_vma_unpin(vma);
 }
 
-static struct i915_vma *create_ring_vma(struct i915_ggtt *ggtt, int size)
+static struct i915_vma *
+create_ring_vma(struct i915_ggtt *ggtt, int size, unsigned int flags)
 {
 	struct i915_address_space *vm = &ggtt->vm;
 	struct drm_i915_private *i915 = vm->i915;
 	struct drm_i915_gem_object *obj;
 	struct i915_vma *vma;
 
-	obj = i915_gem_object_create_lmem(i915, size, I915_BO_ALLOC_VOLATILE);
-	if (IS_ERR(obj) && i915_ggtt_has_aperture(ggtt))
-		obj = i915_gem_object_create_stolen(i915, size);
+	obj = ERR_PTR(-ENODEV);
+	if (!(flags & INTEL_RING_CREATE_INTERNAL)) {
+		obj = i915_gem_object_create_lmem(i915, size, I915_BO_ALLOC_VOLATILE);
+		if (IS_ERR(obj) && i915_ggtt_has_aperture(ggtt))
+			obj = i915_gem_object_create_stolen(i915, size);
+	}
 	if (IS_ERR(obj))
 		obj = i915_gem_object_create_internal(i915, size);
 	if (IS_ERR(obj))
@@ -136,12 +150,14 @@ static struct i915_vma *create_ring_vma(struct i915_ggtt *ggtt, int size)
 }
 
 struct intel_ring *
-intel_engine_create_ring(struct intel_engine_cs *engine, int size)
+intel_engine_create_ring(struct intel_engine_cs *engine, unsigned int size)
 {
 	struct drm_i915_private *i915 = engine->i915;
+	unsigned int flags = size & GENMASK(11, 0);
 	struct intel_ring *ring;
 	struct i915_vma *vma;
 
+	size ^= flags;
 	GEM_BUG_ON(!is_power_of_2(size));
 	GEM_BUG_ON(RING_CTL_SIZE(size) & ~RING_NR_PAGES);
 
@@ -150,8 +166,10 @@ intel_engine_create_ring(struct intel_engine_cs *engine, int size)
 		return ERR_PTR(-ENOMEM);
 
 	kref_init(&ring->ref);
+
 	ring->size = size;
 	ring->wrap = BITS_PER_TYPE(ring->size) - ilog2(size);
+	ring->flags = flags;
 
 	/*
 	 * Workaround an erratum on the i830 which causes a hang if
@@ -164,11 +182,12 @@ intel_engine_create_ring(struct intel_engine_cs *engine, int size)
 
 	intel_ring_update_space(ring);
 
-	vma = create_ring_vma(engine->gt->ggtt, size);
+	vma = create_ring_vma(engine->gt->ggtt, size, flags);
 	if (IS_ERR(vma)) {
 		kfree(ring);
 		return ERR_CAST(vma);
 	}
+
 	ring->vma = vma;
 
 	return ring;
diff --git a/drivers/gpu/drm/i915/gt/intel_ring.h b/drivers/gpu/drm/i915/gt/intel_ring.h
index dbf5f14a136f..89d79c22fe9e 100644
--- a/drivers/gpu/drm/i915/gt/intel_ring.h
+++ b/drivers/gpu/drm/i915/gt/intel_ring.h
@@ -8,12 +8,14 @@
 
 #include "i915_gem.h" /* GEM_BUG_ON */
 #include "i915_request.h"
+#include "i915_vma.h"
 #include "intel_ring_types.h"
 
 struct intel_engine_cs;
 
 struct intel_ring *
-intel_engine_create_ring(struct intel_engine_cs *engine, int size);
+intel_engine_create_ring(struct intel_engine_cs *engine, unsigned int size);
+#define INTEL_RING_CREATE_INTERNAL BIT(0)
 
 u32 *intel_ring_begin(struct i915_request *rq, unsigned int num_dwords);
 int intel_ring_cacheline_align(struct i915_request *rq);
@@ -138,4 +140,17 @@ __intel_ring_space(unsigned int head, unsigned int tail, unsigned int size)
 	return (head - tail - CACHELINE_BYTES) & (size - 1);
 }
 
+static inline u32 intel_ring_address(const struct intel_ring *ring)
+{
+	if (ring->flags & INTEL_RING_CREATE_INTERNAL)
+		return -1;
+
+	return i915_ggtt_offset(ring->vma);
+}
+
+static inline bool intel_ring_is_internal(const struct intel_ring *ring)
+{
+	return ring->flags & INTEL_RING_CREATE_INTERNAL;
+}
+
 #endif /* INTEL_RING_H */
diff --git a/drivers/gpu/drm/i915/gt/intel_ring_types.h b/drivers/gpu/drm/i915/gt/intel_ring_types.h
index 49ccb76dda3b..3d091c699110 100644
--- a/drivers/gpu/drm/i915/gt/intel_ring_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_ring_types.h
@@ -46,6 +46,8 @@ struct intel_ring {
 	u32 size;
 	u32 wrap;
 	u32 effective_size;
+
+	unsigned long flags;
 };
 
 #endif /* INTEL_RING_TYPES_H */
diff --git a/drivers/gpu/drm/i915/i915_scheduler.c b/drivers/gpu/drm/i915/i915_scheduler.c
index ad4e15c9b8f9..556fd47ad697 100644
--- a/drivers/gpu/drm/i915/i915_scheduler.c
+++ b/drivers/gpu/drm/i915/i915_scheduler.c
@@ -1717,7 +1717,7 @@ void i915_sched_show(struct drm_printer *m,
 		i915_request_show(m, rq, "Active ", 0);
 
 		drm_printf(m, "\tring->start:  0x%08x\n",
-			   i915_ggtt_offset(rq->ring->vma));
+			   intel_ring_address(rq->ring));
 		drm_printf(m, "\tring->head:   0x%08x\n",
 			   rq->ring->head);
 		drm_printf(m, "\tring->tail:   0x%08x\n",
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 54+ messages in thread

* [Intel-gfx] [PATCH 26/31] drm/i915/gt: Use client timeline address for seqno writes
  2021-02-08 10:52 [Intel-gfx] [PATCH 01/31] drm/i915/gt: Ratelimit heartbeat completion probing Chris Wilson
                   ` (23 preceding siblings ...)
  2021-02-08 10:52 ` [Intel-gfx] [PATCH 25/31] drm/i915/gt: Support creation of 'internal' rings Chris Wilson
@ 2021-02-08 10:52 ` Chris Wilson
  2021-02-08 10:52 ` [Intel-gfx] [PATCH 27/31] drm/i915/gt: Infrastructure for ring scheduling Chris Wilson
                   ` (8 subsequent siblings)
  33 siblings, 0 replies; 54+ messages in thread
From: Chris Wilson @ 2021-02-08 10:52 UTC (permalink / raw)
  To: intel-gfx; +Cc: Chris Wilson

If we allow for per-client timelines, even with legacy ring submission,
we open the door to a world full of possiblities [scheduling and
semaphores].

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/gt/gen2_engine_cs.c      | 72 ++++++++++++++-
 drivers/gpu/drm/i915/gt/gen2_engine_cs.h      |  5 +-
 drivers/gpu/drm/i915/gt/gen6_engine_cs.c      | 89 +++++++++++++------
 drivers/gpu/drm/i915/gt/gen8_engine_cs.c      | 23 ++---
 .../gpu/drm/i915/gt/intel_ring_submission.c   | 26 +++---
 drivers/gpu/drm/i915/i915_request.h           | 13 +++
 6 files changed, 167 insertions(+), 61 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/gen2_engine_cs.c b/drivers/gpu/drm/i915/gt/gen2_engine_cs.c
index b491a64919c8..b3fff7a955f2 100644
--- a/drivers/gpu/drm/i915/gt/gen2_engine_cs.c
+++ b/drivers/gpu/drm/i915/gt/gen2_engine_cs.c
@@ -172,9 +172,77 @@ u32 *gen3_emit_breadcrumb(struct i915_request *rq, u32 *cs)
 	return __gen2_emit_breadcrumb(rq, cs, 16, 8);
 }
 
-u32 *gen5_emit_breadcrumb(struct i915_request *rq, u32 *cs)
+static u32 *__gen4_emit_breadcrumb(struct i915_request *rq, u32 *cs,
+				   int flush, int post)
 {
-	return __gen2_emit_breadcrumb(rq, cs, 8, 8);
+	struct intel_timeline *tl = rcu_dereference_protected(rq->timeline, 1);
+	u32 offset = __i915_request_hwsp_offset(rq);
+
+	GEM_BUG_ON(tl->mode == INTEL_TIMELINE_RELATIVE_CONTEXT);
+
+	*cs++ = MI_FLUSH;
+
+	while (flush--) {
+		*cs++ = MI_STORE_DWORD_INDEX;
+		*cs++ = I915_GEM_HWS_SCRATCH * sizeof(u32);
+		*cs++ = rq->fence.seqno;
+	}
+
+	if (intel_timeline_is_relative(tl)) {
+		offset = offset_in_page(offset);
+		while (post--) {
+			*cs++ = MI_STORE_DWORD_INDEX;
+			*cs++ = offset;
+			*cs++ = rq->fence.seqno;
+			*cs++ = MI_NOOP;
+		}
+	} else {
+		while (post--) {
+			*cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
+			*cs++ = 0;
+			*cs++ = offset;
+			*cs++ = rq->fence.seqno;
+		}
+	}
+
+	*cs++ = MI_USER_INTERRUPT;
+
+	rq->tail = intel_ring_offset(rq, cs);
+	assert_ring_tail_valid(rq->ring, rq->tail);
+
+	return cs;
+}
+
+u32 *gen4_emit_breadcrumb_xcs(struct i915_request *rq, u32 *cs)
+{
+	return __gen4_emit_breadcrumb(rq, cs, 8, 8);
+}
+
+int gen4_emit_init_breadcrumb_xcs(struct i915_request *rq)
+{
+	struct intel_timeline *tl = i915_request_timeline(rq);
+	u32 *cs;
+
+	GEM_BUG_ON(i915_request_has_initial_breadcrumb(rq));
+	if (!intel_timeline_has_initial_breadcrumb(tl))
+		return 0;
+
+	cs = intel_ring_begin(rq, 4);
+	if (IS_ERR(cs))
+		return PTR_ERR(cs);
+
+	*cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
+	*cs++ = 0;
+	*cs++ = __i915_request_hwsp_offset(rq);
+	*cs++ = rq->fence.seqno - 1;
+
+	intel_ring_advance(rq, cs);
+
+	/* Record the updated position of the request's payload */
+	rq->infix = intel_ring_offset(rq, cs);
+
+	__set_bit(I915_FENCE_FLAG_INITIAL_BREADCRUMB, &rq->fence.flags);
+	return 0;
 }
 
 /* Just userspace ABI convention to limit the wa batch bo to a resonable size */
diff --git a/drivers/gpu/drm/i915/gt/gen2_engine_cs.h b/drivers/gpu/drm/i915/gt/gen2_engine_cs.h
index a5cd64a65c9e..ba7567b15229 100644
--- a/drivers/gpu/drm/i915/gt/gen2_engine_cs.h
+++ b/drivers/gpu/drm/i915/gt/gen2_engine_cs.h
@@ -16,7 +16,10 @@ int gen4_emit_flush_rcs(struct i915_request *rq, u32 mode);
 int gen4_emit_flush_vcs(struct i915_request *rq, u32 mode);
 
 u32 *gen3_emit_breadcrumb(struct i915_request *rq, u32 *cs);
-u32 *gen5_emit_breadcrumb(struct i915_request *rq, u32 *cs);
+u32 *gen4_emit_breadcrumb_xcs(struct i915_request *rq, u32 *cs);
+
+u32 *gen4_emit_breadcrumb_xcs(struct i915_request *rq, u32 *cs);
+int gen4_emit_init_breadcrumb_xcs(struct i915_request *rq);
 
 int i830_emit_bb_start(struct i915_request *rq,
 		       u64 offset, u32 len,
diff --git a/drivers/gpu/drm/i915/gt/gen6_engine_cs.c b/drivers/gpu/drm/i915/gt/gen6_engine_cs.c
index 2f59dd3bdc18..14cab4c726ce 100644
--- a/drivers/gpu/drm/i915/gt/gen6_engine_cs.c
+++ b/drivers/gpu/drm/i915/gt/gen6_engine_cs.c
@@ -141,6 +141,12 @@ int gen6_emit_flush_rcs(struct i915_request *rq, u32 mode)
 
 u32 *gen6_emit_breadcrumb_rcs(struct i915_request *rq, u32 *cs)
 {
+	struct intel_timeline *tl = rcu_dereference_protected(rq->timeline, 1);
+	u32 offset = __i915_request_hwsp_offset(rq);
+	unsigned int flags;
+
+	GEM_BUG_ON(tl->mode == INTEL_TIMELINE_RELATIVE_CONTEXT);
+
 	/* First we do the gen6_emit_post_sync_nonzero_flush w/a */
 	*cs++ = GFX_OP_PIPE_CONTROL(4);
 	*cs++ = PIPE_CONTROL_CS_STALL | PIPE_CONTROL_STALL_AT_SCOREBOARD;
@@ -154,15 +160,22 @@ u32 *gen6_emit_breadcrumb_rcs(struct i915_request *rq, u32 *cs)
 		PIPE_CONTROL_GLOBAL_GTT;
 	*cs++ = 0;
 
-	/* Finally we can flush and with it emit the breadcrumb */
-	*cs++ = GFX_OP_PIPE_CONTROL(4);
-	*cs++ = (PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH |
+	flags = (PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH |
 		 PIPE_CONTROL_DEPTH_CACHE_FLUSH |
 		 PIPE_CONTROL_DC_FLUSH_ENABLE |
 		 PIPE_CONTROL_QW_WRITE |
 		 PIPE_CONTROL_CS_STALL);
-	*cs++ = i915_request_active_timeline(rq)->ggtt_offset |
-		PIPE_CONTROL_GLOBAL_GTT;
+	if (intel_timeline_is_relative(tl)) {
+		offset = offset_in_page(offset);
+		flags |= PIPE_CONTROL_STORE_DATA_INDEX;
+	}
+	if (!intel_timeline_in_context(tl))
+		offset |= PIPE_CONTROL_GLOBAL_GTT;
+
+	/* Finally we can flush and with it emit the breadcrumb */
+	*cs++ = GFX_OP_PIPE_CONTROL(4);
+	*cs++ = flags;
+	*cs++ = offset;
 	*cs++ = rq->fence.seqno;
 
 	*cs++ = MI_USER_INTERRUPT;
@@ -351,15 +364,28 @@ int gen7_emit_flush_rcs(struct i915_request *rq, u32 mode)
 
 u32 *gen7_emit_breadcrumb_rcs(struct i915_request *rq, u32 *cs)
 {
-	*cs++ = GFX_OP_PIPE_CONTROL(4);
-	*cs++ = (PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH |
+	struct intel_timeline *tl = rcu_dereference_protected(rq->timeline, 1);
+	u32 offset = __i915_request_hwsp_offset(rq);
+	unsigned int flags;
+
+	GEM_BUG_ON(tl->mode == INTEL_TIMELINE_RELATIVE_CONTEXT);
+
+	flags = (PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH |
 		 PIPE_CONTROL_DEPTH_CACHE_FLUSH |
 		 PIPE_CONTROL_DC_FLUSH_ENABLE |
 		 PIPE_CONTROL_FLUSH_ENABLE |
 		 PIPE_CONTROL_QW_WRITE |
-		 PIPE_CONTROL_GLOBAL_GTT_IVB |
 		 PIPE_CONTROL_CS_STALL);
-	*cs++ = i915_request_active_timeline(rq)->ggtt_offset;
+	if (intel_timeline_is_relative(tl)) {
+		offset = offset_in_page(offset);
+		flags |= PIPE_CONTROL_STORE_DATA_INDEX;
+	}
+	if (!intel_timeline_in_context(tl))
+		flags |= PIPE_CONTROL_GLOBAL_GTT_IVB;
+
+	*cs++ = GFX_OP_PIPE_CONTROL(4);
+	*cs++ = flags;
+	*cs++ = offset;
 	*cs++ = rq->fence.seqno;
 
 	*cs++ = MI_USER_INTERRUPT;
@@ -373,11 +399,21 @@ u32 *gen7_emit_breadcrumb_rcs(struct i915_request *rq, u32 *cs)
 
 u32 *gen6_emit_breadcrumb_xcs(struct i915_request *rq, u32 *cs)
 {
-	GEM_BUG_ON(i915_request_active_timeline(rq)->hwsp_ggtt != rq->engine->status_page.vma);
-	GEM_BUG_ON(offset_in_page(i915_request_active_timeline(rq)->hwsp_offset) != I915_GEM_HWS_SEQNO_ADDR);
+	struct intel_timeline *tl = rcu_dereference_protected(rq->timeline, 1);
+	u32 offset = __i915_request_hwsp_offset(rq);
+	unsigned int flags = 0;
 
-	*cs++ = MI_FLUSH_DW | MI_FLUSH_DW_OP_STOREDW | MI_FLUSH_DW_STORE_INDEX;
-	*cs++ = I915_GEM_HWS_SEQNO_ADDR | MI_FLUSH_DW_USE_GTT;
+	GEM_BUG_ON(tl->mode == INTEL_TIMELINE_RELATIVE_CONTEXT);
+
+	if (intel_timeline_is_relative(tl)) {
+		offset = offset_in_page(offset);
+		flags |= MI_FLUSH_DW_STORE_INDEX;
+	}
+	if (!intel_timeline_in_context(tl))
+		offset |= MI_FLUSH_DW_USE_GTT;
+
+	*cs++ = MI_FLUSH_DW | MI_FLUSH_DW_OP_STOREDW | flags;
+	*cs++ = offset;
 	*cs++ = rq->fence.seqno;
 
 	*cs++ = MI_USER_INTERRUPT;
@@ -391,28 +427,31 @@ u32 *gen6_emit_breadcrumb_xcs(struct i915_request *rq, u32 *cs)
 #define GEN7_XCS_WA 32
 u32 *gen7_emit_breadcrumb_xcs(struct i915_request *rq, u32 *cs)
 {
+	struct intel_timeline *tl = rcu_dereference_protected(rq->timeline, 1);
+	u32 offset = __i915_request_hwsp_offset(rq);
+	u32 cmd = MI_FLUSH_DW | MI_FLUSH_DW_OP_STOREDW;
 	int i;
 
-	GEM_BUG_ON(i915_request_active_timeline(rq)->hwsp_ggtt != rq->engine->status_page.vma);
-	GEM_BUG_ON(offset_in_page(i915_request_active_timeline(rq)->hwsp_offset) != I915_GEM_HWS_SEQNO_ADDR);
+	GEM_BUG_ON(tl->mode == INTEL_TIMELINE_RELATIVE_CONTEXT);
 
-	*cs++ = MI_FLUSH_DW | MI_INVALIDATE_TLB |
-		MI_FLUSH_DW_OP_STOREDW | MI_FLUSH_DW_STORE_INDEX;
-	*cs++ = I915_GEM_HWS_SEQNO_ADDR | MI_FLUSH_DW_USE_GTT;
+	if (intel_timeline_is_relative(tl)) {
+		offset = offset_in_page(offset);
+		cmd |= MI_FLUSH_DW_STORE_INDEX;
+	}
+	if (!intel_timeline_in_context(tl))
+		offset |= MI_FLUSH_DW_USE_GTT;
+
+	*cs++ = cmd;
+	*cs++ = offset;
 	*cs++ = rq->fence.seqno;
 
 	for (i = 0; i < GEN7_XCS_WA; i++) {
-		*cs++ = MI_STORE_DWORD_INDEX;
-		*cs++ = I915_GEM_HWS_SEQNO_ADDR;
+		*cs++ = cmd;
+		*cs++ = offset;
 		*cs++ = rq->fence.seqno;
 	}
 
-	*cs++ = MI_FLUSH_DW;
-	*cs++ = 0;
-	*cs++ = 0;
-
 	*cs++ = MI_USER_INTERRUPT;
-	*cs++ = MI_NOOP;
 
 	rq->tail = intel_ring_offset(rq, cs);
 	assert_ring_tail_valid(rq->ring, rq->tail);
diff --git a/drivers/gpu/drm/i915/gt/gen8_engine_cs.c b/drivers/gpu/drm/i915/gt/gen8_engine_cs.c
index 7fd843369b41..4a0d32584ef0 100644
--- a/drivers/gpu/drm/i915/gt/gen8_engine_cs.c
+++ b/drivers/gpu/drm/i915/gt/gen8_engine_cs.c
@@ -336,19 +336,6 @@ static u32 preempt_address(struct intel_engine_cs *engine)
 		I915_GEM_HWS_PREEMPT_ADDR);
 }
 
-static u32 hwsp_offset(const struct i915_request *rq)
-{
-	const struct intel_timeline_cacheline *cl;
-
-	/* Before the request is executed, the timeline/cachline is fixed */
-
-	cl = rcu_dereference_protected(rq->hwsp_cacheline, 1);
-	if (cl)
-		return cl->ggtt_offset;
-
-	return rcu_dereference_protected(rq->timeline, 1)->ggtt_offset;
-}
-
 int gen8_emit_init_breadcrumb(struct i915_request *rq)
 {
 	u32 *cs;
@@ -362,7 +349,7 @@ int gen8_emit_init_breadcrumb(struct i915_request *rq)
 		return PTR_ERR(cs);
 
 	*cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
-	*cs++ = hwsp_offset(rq);
+	*cs++ = __i915_request_hwsp_offset(rq);
 	*cs++ = 0;
 	*cs++ = rq->fence.seqno - 1;
 
@@ -520,7 +507,7 @@ static u32 *emit_xcs_breadcrumb(struct i915_request *rq, u32 *cs)
 {
 	struct intel_timeline *tl = rcu_dereference_protected(rq->timeline, 1);
 	unsigned int flags = MI_FLUSH_DW_OP_STOREDW;
-	u32 offset = hwsp_offset(rq);
+	u32 offset = __i915_request_hwsp_offset(rq);
 
 	if (intel_timeline_is_relative(tl)) {
 		offset = offset_in_page(offset);
@@ -542,7 +529,7 @@ u32 *gen8_emit_fini_breadcrumb_rcs(struct i915_request *rq, u32 *cs)
 {
 	struct intel_timeline *tl = rcu_dereference_protected(rq->timeline, 1);
 	unsigned int flags = PIPE_CONTROL_FLUSH_ENABLE | PIPE_CONTROL_CS_STALL;
-	u32 offset = hwsp_offset(rq);
+	u32 offset = __i915_request_hwsp_offset(rq);
 
 	if (intel_timeline_is_relative(tl)) {
 		offset = offset_in_page(offset);
@@ -567,7 +554,7 @@ u32 *gen8_emit_fini_breadcrumb_rcs(struct i915_request *rq, u32 *cs)
 u32 *gen11_emit_fini_breadcrumb_rcs(struct i915_request *rq, u32 *cs)
 {
 	struct intel_timeline *tl = rcu_dereference_protected(rq->timeline, 1);
-	u32 offset = hwsp_offset(rq);
+	u32 offset = __i915_request_hwsp_offset(rq);
 	unsigned int flags;
 
 	flags = (PIPE_CONTROL_CS_STALL |
@@ -649,7 +636,7 @@ u32 *gen12_emit_fini_breadcrumb_xcs(struct i915_request *rq, u32 *cs)
 u32 *gen12_emit_fini_breadcrumb_rcs(struct i915_request *rq, u32 *cs)
 {
 	struct intel_timeline *tl = rcu_dereference_protected(rq->timeline, 1);
-	u32 offset = hwsp_offset(rq);
+	u32 offset = __i915_request_hwsp_offset(rq);
 	unsigned int flags;
 
 	flags = (PIPE_CONTROL_CS_STALL |
diff --git a/drivers/gpu/drm/i915/gt/intel_ring_submission.c b/drivers/gpu/drm/i915/gt/intel_ring_submission.c
index 00e85a3b228a..4bdde28c2c09 100644
--- a/drivers/gpu/drm/i915/gt/intel_ring_submission.c
+++ b/drivers/gpu/drm/i915/gt/intel_ring_submission.c
@@ -999,9 +999,6 @@ static void setup_common(struct intel_engine_cs *engine)
 	 * equivalent to our next initial bread so we can elide
 	 * engine->emit_init_breadcrumb().
 	 */
-	engine->emit_fini_breadcrumb = gen3_emit_breadcrumb;
-	if (IS_GEN(i915, 5))
-		engine->emit_fini_breadcrumb = gen5_emit_breadcrumb;
 
 	if (INTEL_GEN(i915) >= 6)
 		engine->emit_bb_start = gen6_emit_bb_start;
@@ -1012,6 +1009,15 @@ static void setup_common(struct intel_engine_cs *engine)
 	else
 		engine->emit_bb_start = gen3_emit_bb_start;
 
+	if (INTEL_GEN(i915) >= 7)
+		engine->emit_fini_breadcrumb = gen7_emit_breadcrumb_xcs;
+	else if (INTEL_GEN(i915) >= 6)
+		engine->emit_fini_breadcrumb = gen6_emit_breadcrumb_xcs;
+	else if (INTEL_GEN(i915) >= 4)
+		engine->emit_fini_breadcrumb = gen4_emit_breadcrumb_xcs;
+	else
+		engine->emit_fini_breadcrumb = gen3_emit_breadcrumb;
+
 	engine->set_default_submission = set_default_submission;
 }
 
@@ -1053,11 +1059,6 @@ static void setup_vcs(struct intel_engine_cs *engine)
 			engine->flags |= I915_ENGINE_NEEDS_WA_TAIL_WRITE;
 		engine->emit_flush = gen6_emit_flush_vcs;
 		engine->irq_enable_mask = GT_BSD_USER_INTERRUPT;
-
-		if (IS_GEN(i915, 6))
-			engine->emit_fini_breadcrumb = gen6_emit_breadcrumb_xcs;
-		else
-			engine->emit_fini_breadcrumb = gen7_emit_breadcrumb_xcs;
 	} else {
 		engine->emit_flush = gen4_emit_flush_vcs;
 		if (IS_GEN(i915, 5))
@@ -1071,13 +1072,10 @@ static void setup_bcs(struct intel_engine_cs *engine)
 {
 	struct drm_i915_private *i915 = engine->i915;
 
+	GEM_BUG_ON(INTEL_GEN(i915) < 6);
+
 	engine->emit_flush = gen6_emit_flush_xcs;
 	engine->irq_enable_mask = GT_BLT_USER_INTERRUPT;
-
-	if (IS_GEN(i915, 6))
-		engine->emit_fini_breadcrumb = gen6_emit_breadcrumb_xcs;
-	else
-		engine->emit_fini_breadcrumb = gen7_emit_breadcrumb_xcs;
 }
 
 static void setup_vecs(struct intel_engine_cs *engine)
@@ -1090,8 +1088,6 @@ static void setup_vecs(struct intel_engine_cs *engine)
 	engine->irq_enable_mask = PM_VEBOX_USER_INTERRUPT;
 	engine->irq_enable = hsw_irq_enable_vecs;
 	engine->irq_disable = hsw_irq_disable_vecs;
-
-	engine->emit_fini_breadcrumb = gen7_emit_breadcrumb_xcs;
 }
 
 static int gen7_ctx_switch_bb_setup(struct intel_engine_cs * const engine,
diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h
index 843e6a873148..02d923e615fc 100644
--- a/drivers/gpu/drm/i915/i915_request.h
+++ b/drivers/gpu/drm/i915/i915_request.h
@@ -625,6 +625,19 @@ i915_request_active_timeline(const struct i915_request *rq)
 					 lockdep_is_held(&i915_request_get_scheduler(rq)->lock));
 }
 
+static inline u32 __i915_request_hwsp_offset(const struct i915_request *rq)
+{
+	const struct intel_timeline_cacheline *cl;
+
+	/* Before the request is executed, the timeline/cachline is fixed */
+
+	cl = rcu_dereference_protected(rq->hwsp_cacheline, 1);
+	if (cl)
+		return cl->ggtt_offset;
+
+	return rcu_dereference_protected(rq->timeline, 1)->ggtt_offset;
+}
+
 static inline bool i915_request_use_scheduler(const struct i915_request *rq)
 {
 	return i915_sched_is_active(i915_request_get_scheduler(rq));
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 54+ messages in thread

* [Intel-gfx] [PATCH 27/31] drm/i915/gt: Infrastructure for ring scheduling
  2021-02-08 10:52 [Intel-gfx] [PATCH 01/31] drm/i915/gt: Ratelimit heartbeat completion probing Chris Wilson
                   ` (24 preceding siblings ...)
  2021-02-08 10:52 ` [Intel-gfx] [PATCH 26/31] drm/i915/gt: Use client timeline address for seqno writes Chris Wilson
@ 2021-02-08 10:52 ` Chris Wilson
  2021-02-08 10:52 ` [Intel-gfx] [PATCH 28/31] drm/i915/gt: Implement ring scheduler for gen4-7 Chris Wilson
                   ` (7 subsequent siblings)
  33 siblings, 0 replies; 54+ messages in thread
From: Chris Wilson @ 2021-02-08 10:52 UTC (permalink / raw)
  To: intel-gfx; +Cc: Chris Wilson

Build a bare bones scheduler to sit on top the global legacy ringbuffer
submission. This virtual execlists scheme should be applicable to all
older platforms.

A key problem we have with the legacy ring buffer submission is that it
only allows for FIFO queuing. All clients share the global request queue
and must contend for its lock when submitting. As any client may need to
wait for external events, all clients must then wait. However, if we
stage each client into their own virtual ringbuffer with their own
timelines, we can copy the client requests into the global ringbuffer
only when they are ready, reordering the submission around stalls.
Furthermore, the ability to reorder gives us rudimentarily priority
sorting -- although without preemption support, once something is on the
GPU it stays on the GPU, and so it is still possible for a hog to delay
a high priority request (such as updating the display). However, it does
means that in keeping a short submission queue, the high priority
request will be next. This design resembles the old guc submission
scheduler, for reordering requests onto a global workqueue.

The implementation uses the MI_USER_INTERRUPT at the end of every
request to track completion, so is more interrupt happy than execlists
[which has an interrupt for each context event, albeit two]. Our
interrupts on these system are relatively heavy, and in the past we have
been able to completely starve Sandybrige by the interrupt traffic. Our
interrupt handlers are being much better (in part offloading the work to
bottom halves leaving the interrupt itself only dealing with acking the
registers) but we can still see the impact of starvation in the uneven
submission latency on a saturated system.

Overall though, the short sumission queues and extra interrupts do not
appear to be affecting throughput (+-10%, some tasks even improve to the
reduced request overheads) and improve latency. [Which is a massive
improvement since the introduction of Sandybridge!]

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/Makefile                 |   1 +
 drivers/gpu/drm/i915/gt/intel_engine.h        |   1 +
 drivers/gpu/drm/i915/gt/intel_engine_types.h  |   1 +
 .../gpu/drm/i915/gt/intel_ring_scheduler.c    | 750 ++++++++++++++++++
 .../gpu/drm/i915/gt/intel_ring_submission.c   |  17 +-
 .../gpu/drm/i915/gt/intel_ring_submission.h   |  17 +
 6 files changed, 779 insertions(+), 8 deletions(-)
 create mode 100644 drivers/gpu/drm/i915/gt/intel_ring_scheduler.c
 create mode 100644 drivers/gpu/drm/i915/gt/intel_ring_submission.h

diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
index 235679637d1c..82d5e4d2bd0a 100644
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -115,6 +115,7 @@ gt-y += \
 	gt/intel_renderstate.o \
 	gt/intel_reset.o \
 	gt/intel_ring.o \
+	gt/intel_ring_scheduler.o \
 	gt/intel_ring_submission.o \
 	gt/intel_rps.o \
 	gt/intel_sseu.o \
diff --git a/drivers/gpu/drm/i915/gt/intel_engine.h b/drivers/gpu/drm/i915/gt/intel_engine.h
index e4f390bba009..915511714ac5 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine.h
@@ -193,6 +193,7 @@ void intel_engine_cleanup_common(struct intel_engine_cs *engine);
 int intel_engine_resume(struct intel_engine_cs *engine);
 
 int intel_ring_submission_setup(struct intel_engine_cs *engine);
+int intel_ring_scheduler_setup(struct intel_engine_cs *engine);
 
 int intel_engine_stop_cs(struct intel_engine_cs *engine);
 void intel_engine_cancel_stop_cs(struct intel_engine_cs *engine);
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h
index 9f14631b8132..c48078f14dfd 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h
@@ -319,6 +319,7 @@ struct intel_engine_cs {
 	struct {
 		struct intel_ring *ring;
 		struct intel_timeline *timeline;
+		struct intel_context *context;
 	} legacy;
 
 	/*
diff --git a/drivers/gpu/drm/i915/gt/intel_ring_scheduler.c b/drivers/gpu/drm/i915/gt/intel_ring_scheduler.c
new file mode 100644
index 000000000000..d7d6db9e6735
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/intel_ring_scheduler.c
@@ -0,0 +1,750 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2020 Intel Corporation
+ */
+
+#include <linux/log2.h>
+
+#include <drm/i915_drm.h>
+
+#include "i915_drv.h"
+#include "intel_breadcrumbs.h"
+#include "intel_context.h"
+#include "intel_engine_pm.h"
+#include "intel_engine_stats.h"
+#include "intel_gt.h"
+#include "intel_gt_pm.h"
+#include "intel_gt_requests.h"
+#include "intel_reset.h"
+#include "intel_ring.h"
+#include "intel_ring_submission.h"
+#include "shmem_utils.h"
+
+/*
+ * Rough estimate of the typical request size, performing a flush,
+ * set-context and then emitting the batch.
+ */
+#define LEGACY_REQUEST_SIZE 200
+
+static void
+set_current_context(struct intel_context **ptr, struct intel_context *ce)
+{
+	if (ce)
+		intel_context_get(ce);
+
+	ce = xchg(ptr, ce);
+
+	if (ce)
+		intel_context_put(ce);
+}
+
+static inline void runtime_start(struct intel_context *ce)
+{
+	struct intel_context_stats *stats = &ce->stats;
+
+	if (intel_context_is_barrier(ce))
+		return;
+
+	if (stats->active)
+		return;
+
+	WRITE_ONCE(stats->active, intel_context_clock());
+}
+
+static inline void runtime_stop(struct intel_context *ce)
+{
+	struct intel_context_stats *stats = &ce->stats;
+	ktime_t dt;
+
+	if (!stats->active)
+		return;
+
+	dt = ktime_sub(intel_context_clock(), stats->active);
+	ewma_runtime_add(&stats->runtime.avg, dt);
+	stats->runtime.total += dt;
+
+	WRITE_ONCE(stats->active, 0);
+}
+
+static struct intel_engine_cs *__schedule_in(struct i915_request *rq)
+{
+	struct intel_context *ce = rq->context;
+	struct intel_engine_cs *engine = rq->engine;
+
+	intel_context_get(ce);
+
+	__intel_gt_pm_get(engine->gt);
+	if (engine->fw_domain && !engine->fw_active++)
+		intel_uncore_forcewake_get(engine->uncore, engine->fw_domain);
+
+	intel_engine_context_in(engine);
+
+	CE_TRACE(ce, "schedule-in\n");
+
+	return engine;
+}
+
+static void schedule_in(struct i915_request *rq)
+{
+	struct intel_context * const ce = rq->context;
+	struct intel_engine_cs *old;
+
+	GEM_BUG_ON(!intel_engine_pm_is_awake(rq->engine));
+
+	old = ce->inflight;
+	if (!old)
+		old = __schedule_in(rq);
+	WRITE_ONCE(ce->inflight, ptr_inc(old));
+
+	GEM_BUG_ON(intel_context_inflight(ce) != rq->engine);
+	GEM_BUG_ON(!intel_context_inflight_count(ce));
+}
+
+static void __schedule_out(struct i915_request *rq)
+{
+	struct intel_context *ce = rq->context;
+	struct intel_engine_cs *engine = rq->engine;
+
+	CE_TRACE(ce, "schedule-out\n");
+
+	if (intel_timeline_is_last(ce->timeline, rq))
+		intel_engine_add_retire(engine, ce->timeline);
+	else
+		i915_request_update_deadline(list_next_entry(rq, link));
+
+	intel_engine_context_out(engine);
+
+	if (engine->fw_domain && !--engine->fw_active)
+		intel_uncore_forcewake_put(engine->uncore, engine->fw_domain);
+	intel_gt_pm_put_async(engine->gt);
+}
+
+static void schedule_out(struct i915_request *rq)
+{
+	struct intel_context *ce = rq->context;
+
+	GEM_BUG_ON(!ce->inflight);
+	ce->inflight = ptr_dec(ce->inflight);
+	if (!intel_context_inflight_count(ce)) {
+		GEM_BUG_ON(ce->inflight != rq->engine);
+		__schedule_out(rq);
+		WRITE_ONCE(ce->inflight, NULL);
+		intel_context_put(ce);
+	}
+
+	i915_request_put(rq);
+}
+
+static u32 *ring_map(struct intel_ring *ring, u32 len)
+{
+	u32 *va;
+
+	if (unlikely(ring->tail + len > ring->effective_size)) {
+		memset(ring->vaddr + ring->tail, 0, ring->size - ring->tail);
+		ring->tail = 0;
+	}
+
+	va = ring->vaddr + ring->tail;
+	ring->tail = intel_ring_wrap(ring, ring->tail + len);
+
+	return va;
+}
+
+static inline u32 *ring_map_dw(struct intel_ring *ring, u32 len)
+{
+	return ring_map(ring, len * sizeof(u32));
+}
+
+static inline void ring_advance(struct intel_ring *ring, void *map)
+{
+	GEM_BUG_ON(intel_ring_wrap(ring, map - ring->vaddr) != ring->tail);
+}
+
+static void ring_copy(struct intel_ring *dst,
+		      const struct intel_ring *src,
+		      u32 start, u32 end)
+{
+	unsigned int len;
+	void *out;
+
+	len = end - start;
+	if (end < start)
+		len += src->size;
+	out = ring_map(dst, len);
+
+	if (end < start) {
+		len = src->size - start;
+		memcpy(out, src->vaddr + start, len);
+		out += len;
+		start = 0;
+	}
+
+	memcpy(out, src->vaddr + start, end - start);
+}
+
+static void switch_context(struct intel_ring *ring, struct i915_request *rq)
+{
+}
+
+static struct i915_request *ring_submit(struct i915_request *rq)
+{
+	struct intel_ring *ring = rq->engine->legacy.ring;
+
+	__i915_request_submit(rq);
+
+	if (rq->engine->legacy.context != rq->context) {
+		switch_context(ring, rq);
+		set_current_context(&rq->engine->legacy.context, rq->context);
+	}
+
+	ring_copy(ring, rq->ring, rq->head, rq->tail);
+	return rq;
+}
+
+static struct i915_request **
+copy_active(struct i915_request **port, struct i915_request * const *active)
+{
+	while (*active)
+		*port++ = *active++;
+
+	return port;
+}
+
+static inline void
+copy_ports(struct i915_request **dst, struct i915_request **src, int count)
+{
+	/* A memcpy_p() would be very useful here! */
+	while (count--)
+		WRITE_ONCE(*dst++, *src++); /* avoid write tearing */
+}
+
+static inline void write_tail(const struct intel_engine_cs *engine)
+{
+	wmb(); /* paranoid flush of WCB before RING_TAIL write */
+	ENGINE_WRITE(engine, RING_TAIL, engine->legacy.ring->tail);
+}
+
+static void dequeue(struct i915_sched *se, struct intel_engine_cs *engine)
+{
+	struct intel_engine_execlists * const el = &engine->execlists;
+	struct i915_request ** const last_port = el->pending + el->port_mask;
+	struct i915_request **port, **first, *last;
+	struct i915_request *rq, *rn;
+	struct i915_priolist *pl;
+
+	first = copy_active(el->pending, el->active);
+	if (first > last_port)
+		return;
+
+	local_irq_disable();
+
+	last = NULL;
+	port = first;
+	spin_lock(&se->lock);
+	i915_sched_dequeue(se, pl, rq, rn) {
+		if (last && rq->context != last->context) {
+			if (port == last_port)
+				goto done;
+
+			*port++ = i915_request_get(last);
+		}
+
+		last = ring_submit(rq);
+	}
+done:
+	spin_unlock(&se->lock);
+
+	if (last) {
+		*port++ = i915_request_get(last);
+		*port = NULL;
+
+		if (!*el->active)
+			runtime_start((*el->pending)->context);
+		WRITE_ONCE(el->active, el->pending);
+
+		copy_ports(el->inflight, el->pending, port - el->pending + 1);
+		while (port-- != first)
+			schedule_in(*port);
+
+		write_tail(engine);
+
+		WRITE_ONCE(el->active, el->inflight);
+		GEM_BUG_ON(!*el->active);
+	}
+
+	WRITE_ONCE(el->pending[0], NULL);
+
+	local_irq_enable(); /* flush irq_work *after* RING_TAIL write */
+}
+
+static void post_process_csb(struct i915_request **port,
+			     struct i915_request **last)
+{
+	while (port != last)
+		schedule_out(*port++);
+}
+
+static struct i915_request **
+process_csb(struct intel_engine_execlists *el, struct i915_request **inactive)
+{
+	struct i915_request *rq;
+
+	while ((rq = *el->active)) {
+		if (!__i915_request_is_complete(rq)) {
+			runtime_start(rq->context);
+			break;
+		}
+
+		*inactive++ = rq;
+		el->active++;
+
+		runtime_stop(rq->context);
+	}
+
+	return inactive;
+}
+
+static void submission_tasklet(struct tasklet_struct *t)
+{
+	struct i915_sched *se = from_tasklet(se, t, tasklet);
+	struct intel_engine_cs * const engine =
+		container_of(se, typeof(*engine), sched);
+	struct i915_request *post[2 * EXECLIST_MAX_PORTS];
+	struct i915_request **inactive;
+
+	rcu_read_lock();
+	inactive = process_csb(&engine->execlists, post);
+	GEM_BUG_ON(inactive - post > ARRAY_SIZE(post));
+
+	if (!i915_sched_is_idle(se))
+		dequeue(se, engine);
+
+	post_process_csb(post, inactive);
+	rcu_read_unlock();
+}
+
+static void irq_handler(struct intel_engine_cs *engine, u16 iir)
+{
+	intel_engine_kick_scheduler(engine);
+	intel_engine_signal_breadcrumbs(engine);
+}
+
+static inline void clear_ports(struct i915_request **ports, int count)
+{
+	memset_p((void **)ports, NULL, count);
+}
+
+static struct i915_request **
+cancel_port_requests(struct intel_engine_execlists * const el,
+		     struct i915_request **inactive)
+{
+	struct i915_request * const *port;
+
+	clear_ports(el->pending, ARRAY_SIZE(el->pending));
+
+	/* Mark the end of active before we overwrite *active */
+	for (port = xchg(&el->active, el->pending); *port; port++)
+		*inactive++ = *port;
+	clear_ports(el->inflight, ARRAY_SIZE(el->inflight));
+
+	smp_wmb(); /* complete the seqlock for execlists_active() */
+	WRITE_ONCE(el->active, el->inflight);
+
+	return inactive;
+}
+
+static void __ring_rewind(struct intel_engine_cs *engine, bool stalled)
+{
+	struct i915_sched *se = intel_engine_get_scheduler(engine);
+	struct i915_request *rq;
+	unsigned long flags;
+
+	rcu_read_lock();
+	spin_lock_irqsave(&se->lock, flags);
+	rq = __i915_sched_rewind_requests(engine);
+	spin_unlock_irqrestore(&se->lock, flags);
+	if (rq && __i915_request_has_started(rq))
+		__i915_request_reset(rq, stalled);
+	rcu_read_unlock();
+}
+
+static void ring_reset_csb(struct intel_engine_cs *engine)
+{
+	struct intel_engine_execlists * const el = &engine->execlists;
+	struct i915_request *post[2 * EXECLIST_MAX_PORTS];
+	struct i915_request **inactive;
+
+	rcu_read_lock();
+	inactive = cancel_port_requests(el, post);
+
+	/* Clear the global submission state, we will submit from scratch */
+	intel_ring_reset(engine->legacy.ring, 0);
+	set_current_context(&engine->legacy.context, NULL);
+
+	post_process_csb(post, inactive);
+	rcu_read_unlock();
+}
+
+static void ring_reset_rewind(struct intel_engine_cs *engine, bool stalled)
+{
+	ring_reset_csb(engine);
+	__ring_rewind(engine, stalled);
+}
+
+static void ring_reset_cancel(struct intel_engine_cs *engine)
+{
+	struct i915_sched *se = intel_engine_get_scheduler(engine);
+	unsigned long flags;
+
+	ring_reset_csb(engine);
+
+	spin_lock_irqsave(&se->lock, flags);
+	__i915_sched_cancel_queue(se);
+	spin_unlock_irqrestore(&se->lock, flags);
+
+	intel_engine_signal_breadcrumbs(engine);
+}
+
+static void submission_park(struct intel_engine_cs *engine)
+{
+	/* drain the submit queue */
+	intel_breadcrumbs_unpin_irq(engine->breadcrumbs);
+	intel_engine_kick_scheduler(engine);
+}
+
+static void submission_unpark(struct intel_engine_cs *engine)
+{
+	intel_breadcrumbs_pin_irq(engine->breadcrumbs);
+}
+
+static void ring_context_destroy(struct kref *ref)
+{
+	struct intel_context *ce = container_of(ref, typeof(*ce), ref);
+
+	GEM_BUG_ON(intel_context_is_pinned(ce));
+
+	if (ce->state)
+		i915_vma_put(ce->state);
+	if (test_bit(CONTEXT_ALLOC_BIT, &ce->flags))
+		intel_ring_put(ce->ring);
+
+	intel_context_fini(ce);
+	intel_context_free(ce);
+}
+
+static int alloc_context_vma(struct intel_context *ce)
+
+{
+	struct intel_engine_cs *engine = ce->engine;
+	struct drm_i915_gem_object *obj;
+	struct i915_vma *vma;
+	int err;
+
+	obj = i915_gem_object_create_shmem(engine->i915, engine->context_size);
+	if (IS_ERR(obj))
+		return PTR_ERR(obj);
+
+	/*
+	 * Try to make the context utilize L3 as well as LLC.
+	 *
+	 * On VLV we don't have L3 controls in the PTEs so we
+	 * shouldn't touch the cache level, especially as that
+	 * would make the object snooped which might have a
+	 * negative performance impact.
+	 *
+	 * Snooping is required on non-llc platforms in execlist
+	 * mode, but since all GGTT accesses use PAT entry 0 we
+	 * get snooping anyway regardless of cache_level.
+	 *
+	 * This is only applicable for Ivy Bridge devices since
+	 * later platforms don't have L3 control bits in the PTE.
+	 */
+	if (IS_IVYBRIDGE(engine->i915))
+		i915_gem_object_set_cache_coherency(obj, I915_CACHE_L3_LLC);
+
+	if (engine->default_state) {
+		void *vaddr;
+
+		vaddr = i915_gem_object_pin_map(obj, I915_MAP_WB);
+		if (IS_ERR(vaddr)) {
+			err = PTR_ERR(vaddr);
+			goto err_obj;
+		}
+
+		shmem_read(engine->default_state, 0,
+			   vaddr, engine->context_size);
+		__set_bit(CONTEXT_VALID_BIT, &ce->flags);
+
+		i915_gem_object_flush_map(obj);
+		i915_gem_object_unpin_map(obj);
+	}
+
+	vma = i915_vma_instance(obj, &engine->gt->ggtt->vm, NULL);
+	if (IS_ERR(vma)) {
+		err = PTR_ERR(vma);
+		goto err_obj;
+	}
+
+	ce->state = vma;
+	return 0;
+
+err_obj:
+	i915_gem_object_put(obj);
+	return err;
+}
+
+static struct intel_timeline *pinned_timeline(struct intel_context *ce)
+{
+	struct intel_timeline *tl = fetch_and_zero(&ce->timeline);
+
+	return intel_timeline_create_from_engine(ce->engine,
+						 page_unmask_bits(tl));
+}
+
+static int alloc_timeline(struct intel_context *ce)
+{
+	struct intel_engine_cs *engine = ce->engine;
+	struct intel_timeline *tl;
+
+	if (unlikely(ce->timeline))
+		tl = pinned_timeline(ce);
+	else
+		tl = intel_timeline_create(engine->gt);
+	if (IS_ERR(tl))
+		return PTR_ERR(tl);
+
+	ce->timeline = tl;
+	return 0;
+}
+
+static int ring_context_alloc(struct intel_context *ce)
+{
+	struct intel_engine_cs *engine = ce->engine;
+	struct intel_ring *ring;
+	int err;
+
+	GEM_BUG_ON(ce->state);
+	if (engine->context_size) {
+		err = alloc_context_vma(ce);
+		if (err)
+			return err;
+	}
+
+	if (!page_mask_bits(ce->timeline)) {
+		err = alloc_timeline(ce);
+		if (err)
+			goto err_vma;
+	}
+
+	ring = intel_engine_create_ring(engine,
+					(unsigned long)ce->ring |
+					INTEL_RING_CREATE_INTERNAL);
+	if (IS_ERR(ring)) {
+		err = PTR_ERR(ring);
+		goto err_timeline;
+	}
+	ce->ring = ring;
+
+	return 0;
+
+err_timeline:
+	intel_timeline_put(ce->timeline);
+err_vma:
+	if (ce->state) {
+		i915_vma_put(ce->state);
+		ce->state = NULL;
+	}
+	return err;
+}
+
+static int ring_context_pre_pin(struct intel_context *ce,
+				struct i915_gem_ww_ctx *ww,
+				void **unused)
+{
+	return 0;
+}
+
+static int ring_context_pin(struct intel_context *ce, void *unused)
+{
+	return 0;
+}
+
+static void ring_context_unpin(struct intel_context *ce)
+{
+}
+
+static void ring_context_post_unpin(struct intel_context *ce)
+{
+}
+
+static void ring_context_reset(struct intel_context *ce)
+{
+	intel_ring_reset(ce->ring, 0);
+	clear_bit(CONTEXT_VALID_BIT, &ce->flags);
+}
+
+static const struct intel_context_ops ring_context_ops = {
+	.flags = COPS_HAS_INFLIGHT,
+
+	.alloc = ring_context_alloc,
+
+	.pre_pin = ring_context_pre_pin,
+	.pin = ring_context_pin,
+	.unpin = ring_context_unpin,
+	.post_unpin = ring_context_post_unpin,
+
+	.enter = intel_context_enter_engine,
+	.exit = intel_context_exit_engine,
+
+	.reset = ring_context_reset,
+	.destroy = ring_context_destroy,
+};
+
+static int ring_request_alloc(struct i915_request *rq)
+{
+	int ret;
+
+	GEM_BUG_ON(!intel_context_is_pinned(rq->context));
+
+	/*
+	 * Flush enough space to reduce the likelihood of waiting after
+	 * we start building the request - in which case we will just
+	 * have to repeat work.
+	 */
+	rq->reserved_space += LEGACY_REQUEST_SIZE;
+
+	/* Unconditionally invalidate GPU caches and TLBs. */
+	ret = rq->engine->emit_flush(rq, EMIT_INVALIDATE);
+	if (ret)
+		return ret;
+
+	rq->reserved_space -= LEGACY_REQUEST_SIZE;
+	return 0;
+}
+
+static void set_default_submission(struct intel_engine_cs *engine)
+{
+	engine->sched.submit_request = i915_request_enqueue;
+}
+
+static void ring_release(struct intel_engine_cs *engine)
+{
+	intel_engine_cleanup_common(engine);
+
+	set_current_context(&engine->legacy.context, NULL);
+
+	intel_ring_unpin(engine->legacy.ring);
+	intel_ring_put(engine->legacy.ring);
+}
+
+static void setup_irq(struct intel_engine_cs *engine)
+{
+	engine->irq_handler = irq_handler;
+}
+
+static void setup_common(struct intel_engine_cs *engine)
+{
+	struct drm_i915_private *i915 = engine->i915;
+
+	/* gen8+ are only supported with execlists */
+	GEM_BUG_ON(INTEL_GEN(i915) >= 8);
+	GEM_BUG_ON(INTEL_GEN(i915) < 8);
+
+	setup_irq(engine);
+
+	engine->park = submission_park;
+	engine->unpark = submission_unpark;
+
+	engine->resume = intel_ring_submission_resume;
+	engine->sanitize = intel_ring_submission_sanitize;
+
+	engine->reset.prepare = intel_ring_submission_reset_prepare;
+	engine->reset.rewind = ring_reset_rewind;
+	engine->reset.cancel = ring_reset_cancel;
+	engine->reset.finish = intel_ring_submission_reset_finish;
+
+	engine->cops = &ring_context_ops;
+	engine->request_alloc = ring_request_alloc;
+
+	engine->set_default_submission = set_default_submission;
+}
+
+static void setup_rcs(struct intel_engine_cs *engine)
+{
+}
+
+static void setup_vcs(struct intel_engine_cs *engine)
+{
+}
+
+static void setup_bcs(struct intel_engine_cs *engine)
+{
+}
+
+static void setup_vecs(struct intel_engine_cs *engine)
+{
+	GEM_BUG_ON(!IS_HASWELL(engine->i915));
+}
+
+static unsigned int global_ring_size(void)
+{
+	/* Enough space to hold 2 clients and the context switch */
+	return roundup_pow_of_two(EXECLIST_MAX_PORTS * SZ_16K + SZ_4K);
+}
+
+int intel_ring_scheduler_setup(struct intel_engine_cs *engine)
+{
+	struct intel_ring *ring;
+	int err;
+
+	GEM_BUG_ON(HAS_EXECLISTS(engine->i915));
+
+	tasklet_setup(&engine->sched.tasklet, submission_tasklet);
+
+	i915_sched_select_mode(&engine->sched, I915_SCHED_MODE_DEADLINE);
+
+	setup_common(engine);
+
+	switch (engine->class) {
+	case RENDER_CLASS:
+		setup_rcs(engine);
+		break;
+	case VIDEO_DECODE_CLASS:
+		setup_vcs(engine);
+		break;
+	case COPY_ENGINE_CLASS:
+		setup_bcs(engine);
+		break;
+	case VIDEO_ENHANCEMENT_CLASS:
+		setup_vecs(engine);
+		break;
+	default:
+		MISSING_CASE(engine->class);
+		return -ENODEV;
+	}
+
+	ring = intel_engine_create_ring(engine, global_ring_size());
+	if (IS_ERR(ring)) {
+		err = PTR_ERR(ring);
+		goto err;
+	}
+
+	err = intel_ring_pin(ring, NULL);
+	if (err)
+		goto err_ring;
+
+	GEM_BUG_ON(engine->legacy.ring);
+	engine->legacy.ring = ring;
+
+	engine->flags |= I915_ENGINE_SUPPORTS_STATS;
+
+	/* Finally, take ownership and responsibility for cleanup! */
+	engine->release = ring_release;
+	return 0;
+
+err_ring:
+	intel_ring_put(ring);
+err:
+	intel_engine_cleanup_common(engine);
+	return err;
+}
diff --git a/drivers/gpu/drm/i915/gt/intel_ring_submission.c b/drivers/gpu/drm/i915/gt/intel_ring_submission.c
index 4bdde28c2c09..be1fe7915af9 100644
--- a/drivers/gpu/drm/i915/gt/intel_ring_submission.c
+++ b/drivers/gpu/drm/i915/gt/intel_ring_submission.c
@@ -15,6 +15,7 @@
 #include "intel_gt_irq.h"
 #include "intel_reset.h"
 #include "intel_ring.h"
+#include "intel_ring_submission.h"
 #include "shmem_utils.h"
 
 /* Rough estimate of the typical request size, performing a flush,
@@ -177,7 +178,7 @@ static bool stop_ring(struct intel_engine_cs *engine)
 	return (ENGINE_READ_FW(engine, RING_HEAD) & HEAD_ADDR) == 0;
 }
 
-static int xcs_resume(struct intel_engine_cs *engine)
+int intel_ring_submission_resume(struct intel_engine_cs *engine)
 {
 	struct intel_ring *ring = engine->legacy.ring;
 
@@ -265,7 +266,7 @@ static void sanitize_hwsp(struct intel_engine_cs *engine)
 		intel_timeline_reset_seqno(tl);
 }
 
-static void xcs_sanitize(struct intel_engine_cs *engine)
+void intel_ring_submission_sanitize(struct intel_engine_cs *engine)
 {
 	/*
 	 * Poison residual state on resume, in case the suspend didn't!
@@ -290,7 +291,7 @@ static void xcs_sanitize(struct intel_engine_cs *engine)
 	clflush_cache_range(engine->status_page.addr, PAGE_SIZE);
 }
 
-static void reset_prepare(struct intel_engine_cs *engine)
+void intel_ring_submission_reset_prepare(struct intel_engine_cs *engine)
 {
 	/*
 	 * We stop engines, otherwise we might get failed reset and a
@@ -394,7 +395,7 @@ static void reset_rewind(struct intel_engine_cs *engine, bool stalled)
 	spin_unlock_irqrestore(&se->lock, flags);
 }
 
-static void reset_finish(struct intel_engine_cs *engine)
+void intel_ring_submission_reset_finish(struct intel_engine_cs *engine)
 {
 	i915_sched_enable_tasklet(intel_engine_get_scheduler(engine));
 }
@@ -983,13 +984,13 @@ static void setup_common(struct intel_engine_cs *engine)
 
 	setup_irq(engine);
 
-	engine->resume = xcs_resume;
-	engine->sanitize = xcs_sanitize;
+	engine->resume = intel_ring_submission_resume;
+	engine->sanitize = intel_ring_submission_sanitize;
 
-	engine->reset.prepare = reset_prepare;
+	engine->reset.prepare = intel_ring_submission_reset_prepare;
 	engine->reset.rewind = reset_rewind;
 	engine->reset.cancel = reset_cancel;
-	engine->reset.finish = reset_finish;
+	engine->reset.finish = intel_ring_submission_reset_finish;
 
 	engine->cops = &ring_context_ops;
 	engine->request_alloc = ring_request_alloc;
diff --git a/drivers/gpu/drm/i915/gt/intel_ring_submission.h b/drivers/gpu/drm/i915/gt/intel_ring_submission.h
new file mode 100644
index 000000000000..59a43c221748
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/intel_ring_submission.h
@@ -0,0 +1,17 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2020 Intel Corporation
+ */
+
+#ifndef __INTEL_RING_SUBMISSION_H__
+#define __INTEL_RING_SUBMISSION_H__
+
+struct intel_engine_cs;
+
+void intel_ring_submission_reset_prepare(struct intel_engine_cs *engine);
+void intel_ring_submission_reset_finish(struct intel_engine_cs *engine);
+
+int intel_ring_submission_resume(struct intel_engine_cs *engine);
+void intel_ring_submission_sanitize(struct intel_engine_cs *engine);
+
+#endif /* __INTEL_RING_SUBMISSION_H__ */
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 54+ messages in thread

* [Intel-gfx] [PATCH 28/31] drm/i915/gt: Implement ring scheduler for gen4-7
  2021-02-08 10:52 [Intel-gfx] [PATCH 01/31] drm/i915/gt: Ratelimit heartbeat completion probing Chris Wilson
                   ` (25 preceding siblings ...)
  2021-02-08 10:52 ` [Intel-gfx] [PATCH 27/31] drm/i915/gt: Infrastructure for ring scheduling Chris Wilson
@ 2021-02-08 10:52 ` Chris Wilson
  2021-02-08 10:52 ` [Intel-gfx] [PATCH 29/31] drm/i915/gt: Enable ring scheduling for gen5-7 Chris Wilson
                   ` (6 subsequent siblings)
  33 siblings, 0 replies; 54+ messages in thread
From: Chris Wilson @ 2021-02-08 10:52 UTC (permalink / raw)
  To: intel-gfx; +Cc: Chris Wilson

A key prolem with legacy ring buffer submission is that it is an inheret
FIFO queue across all clients; if one blocks, they all block. A
scheduler allows us to avoid that limitation, and ensures that all
clients can submit in parallel, removing the resource contention of the
global ringbuffer.

Having built the ring scheduler infrastructure over top of the global
ringbuffer submission, we now need to provide the HW knowledge required
to build command packets and implement context switching.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 .../gpu/drm/i915/gt/intel_ring_scheduler.c    | 460 +++++++++++++++++-
 drivers/gpu/drm/i915/i915_reg.h               |  10 +
 2 files changed, 467 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_ring_scheduler.c b/drivers/gpu/drm/i915/gt/intel_ring_scheduler.c
index d7d6db9e6735..6d1ad81b1a5a 100644
--- a/drivers/gpu/drm/i915/gt/intel_ring_scheduler.c
+++ b/drivers/gpu/drm/i915/gt/intel_ring_scheduler.c
@@ -7,7 +7,12 @@
 
 #include <drm/i915_drm.h>
 
+#include "gen2_engine_cs.h"
+#include "gen6_engine_cs.h"
+#include "gen6_ppgtt.h"
+#include "gen7_renderclear.h"
 #include "i915_drv.h"
+#include "i915_mitigations.h"
 #include "intel_breadcrumbs.h"
 #include "intel_context.h"
 #include "intel_engine_pm.h"
@@ -182,8 +187,270 @@ static void ring_copy(struct intel_ring *dst,
 	memcpy(out, src->vaddr + start, end - start);
 }
 
+static void mi_set_context(struct intel_ring *ring,
+			   struct intel_engine_cs *engine,
+			   struct intel_context *ce,
+			   u32 flags)
+{
+	struct drm_i915_private *i915 = engine->i915;
+	enum intel_engine_id id;
+	const int num_engines =
+		IS_HASWELL(i915) ? engine->gt->info.num_engines - 1 : 0;
+	int len;
+	u32 *cs;
+
+	len = 4;
+	if (IS_GEN(i915, 7))
+		len += 2 + (num_engines ? 4 * num_engines + 6 : 0);
+	else if (IS_GEN(i915, 5))
+		len += 2;
+
+	cs = ring_map_dw(ring, len);
+
+	/* WaProgramMiArbOnOffAroundMiSetContext:ivb,vlv,hsw,bdw,chv */
+	if (IS_GEN(i915, 7)) {
+		*cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
+		if (num_engines) {
+			struct intel_engine_cs *signaller;
+
+			*cs++ = MI_LOAD_REGISTER_IMM(num_engines);
+			for_each_engine(signaller, engine->gt, id) {
+				if (signaller == engine)
+					continue;
+
+				*cs++ = i915_mmio_reg_offset(
+					   RING_PSMI_CTL(signaller->mmio_base));
+				*cs++ = _MASKED_BIT_ENABLE(
+						GEN6_PSMI_SLEEP_MSG_DISABLE);
+			}
+		}
+	} else if (IS_GEN(i915, 5)) {
+		/*
+		 * This w/a is only listed for pre-production ilk a/b steppings,
+		 * but is also mentioned for programming the powerctx. To be
+		 * safe, just apply the workaround; we do not use SyncFlush so
+		 * this should never take effect and so be a no-op!
+		 */
+		*cs++ = MI_SUSPEND_FLUSH | MI_SUSPEND_FLUSH_EN;
+	}
+
+	*cs++ = MI_NOOP;
+	*cs++ = MI_SET_CONTEXT;
+	*cs++ = i915_ggtt_offset(ce->state) | flags;
+	/*
+	 * w/a: MI_SET_CONTEXT must always be followed by MI_NOOP
+	 * WaMiSetContext_Hang:snb,ivb,vlv
+	 */
+	*cs++ = MI_NOOP;
+
+	if (IS_GEN(i915, 7)) {
+		if (num_engines) {
+			struct intel_engine_cs *signaller;
+			i915_reg_t last_reg = {}; /* keep gcc quiet */
+
+			*cs++ = MI_LOAD_REGISTER_IMM(num_engines);
+			for_each_engine(signaller, engine->gt, id) {
+				if (signaller == engine)
+					continue;
+
+				last_reg = RING_PSMI_CTL(signaller->mmio_base);
+				*cs++ = i915_mmio_reg_offset(last_reg);
+				*cs++ = _MASKED_BIT_DISABLE(
+						GEN6_PSMI_SLEEP_MSG_DISABLE);
+			}
+
+			/* Insert a delay before the next switch! */
+			*cs++ = MI_STORE_REGISTER_MEM | MI_SRM_LRM_GLOBAL_GTT;
+			*cs++ = i915_mmio_reg_offset(last_reg);
+			*cs++ = intel_gt_scratch_offset(engine->gt,
+							INTEL_GT_SCRATCH_FIELD_DEFAULT);
+			*cs++ = MI_NOOP;
+		}
+		*cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
+	} else if (IS_GEN(i915, 5)) {
+		*cs++ = MI_SUSPEND_FLUSH;
+	}
+
+	ring_advance(ring, cs);
+}
+
+static struct i915_address_space *vm_alias(struct i915_address_space *vm)
+{
+	if (i915_is_ggtt(vm))
+		vm = &i915_vm_to_ggtt(vm)->alias->vm;
+
+	return vm;
+}
+
+static u32 pp_dir(const struct i915_ppgtt *ppgtt)
+{
+	return container_of(ppgtt, const struct gen6_ppgtt, base)->pp_dir;
+}
+
+static void load_pd_dir(struct intel_ring *ring,
+			struct intel_engine_cs *engine,
+			const struct i915_ppgtt *ppgtt)
+{
+	u32 *cs = ring_map_dw(ring, 10);
+
+	*cs++ = MI_LOAD_REGISTER_IMM(1);
+	*cs++ = i915_mmio_reg_offset(RING_PP_DIR_DCLV(engine->mmio_base));
+	*cs++ = PP_DIR_DCLV_2G;
+
+	*cs++ = MI_LOAD_REGISTER_IMM(1);
+	*cs++ = i915_mmio_reg_offset(RING_PP_DIR_BASE(engine->mmio_base));
+	*cs++ = pp_dir(ppgtt);
+
+	/* Stall until the page table load is complete? */
+	*cs++ = MI_STORE_REGISTER_MEM | MI_SRM_LRM_GLOBAL_GTT;
+	*cs++ = i915_mmio_reg_offset(RING_PP_DIR_BASE(engine->mmio_base));
+	*cs++ = intel_gt_scratch_offset(engine->gt,
+					INTEL_GT_SCRATCH_FIELD_DEFAULT);
+	*cs++ = MI_NOOP;
+
+	ring_advance(ring, cs);
+}
+
+static struct i915_address_space *current_vm(struct intel_engine_cs *engine)
+{
+	struct intel_context *old = engine->legacy.context;
+
+	return old ? vm_alias(old->vm) : NULL;
+}
+
+static void gen4_emit_invalidate_rcs(struct intel_ring *ring,
+				     struct intel_engine_cs *engine)
+{
+	u32 addr, flags;
+	u32 *cs;
+
+	addr = intel_gt_scratch_offset(engine->gt,
+				       INTEL_GT_SCRATCH_FIELD_RENDER_FLUSH);
+
+	flags = PIPE_CONTROL_QW_WRITE | PIPE_CONTROL_CS_STALL;
+	flags |= PIPE_CONTROL_TLB_INVALIDATE;
+
+	if (INTEL_GEN(engine->i915) >= 7)
+		flags |= PIPE_CONTROL_GLOBAL_GTT_IVB;
+	else
+		addr |= PIPE_CONTROL_GLOBAL_GTT;
+
+	cs = ring_map_dw(ring, 4);
+	*cs++ = GFX_OP_PIPE_CONTROL(4);
+	*cs++ = flags;
+	*cs++ = addr;
+	*cs++ = 0;
+	ring_advance(ring, cs);
+}
+
+static struct i915_address_space *
+clear_residuals(struct intel_ring *ring, struct intel_engine_cs *engine)
+{
+	struct intel_context *ce = engine->kernel_context;
+	struct i915_address_space *vm = vm_alias(engine->gt->vm);
+	u32 flags;
+
+	if (vm != current_vm(engine))
+		load_pd_dir(ring, engine, i915_vm_to_ppgtt(vm));
+
+	if (ce->state)
+		mi_set_context(ring, engine, ce,
+			       MI_MM_SPACE_GTT | MI_RESTORE_INHIBIT);
+
+	if (IS_HASWELL(engine->i915))
+		flags = MI_BATCH_PPGTT_HSW | MI_BATCH_NON_SECURE_HSW;
+	else
+		flags = MI_BATCH_NON_SECURE_I965;
+
+	__gen6_emit_bb_start(ring_map_dw(ring, 2),
+			     engine->wa_ctx.vma->node.start, flags);
+
+	return vm;
+}
+
+static void remap_l3_slice(struct intel_ring *ring,
+			   struct intel_engine_cs *engine,
+			   int slice)
+{
+	u32 *cs, *remap_info = engine->i915->l3_parity.remap_info[slice];
+	int i;
+
+	if (!remap_info)
+		return;
+
+	/*
+	 * Note: We do not worry about the concurrent register cacheline hang
+	 * here because no other code should access these registers other than
+	 * at initialization time.
+	 */
+	cs = ring_map_dw(ring, GEN7_L3LOG_SIZE / 4 * 2 + 2);
+	*cs++ = MI_LOAD_REGISTER_IMM(GEN7_L3LOG_SIZE / 4);
+	for (i = 0; i < GEN7_L3LOG_SIZE / 4; i++) {
+		*cs++ = i915_mmio_reg_offset(GEN7_L3LOG(slice, i));
+		*cs++ = remap_info[i];
+	}
+	*cs++ = MI_NOOP;
+	ring_advance(ring, cs);
+}
+
+static void remap_l3(struct intel_ring *ring,
+		     struct intel_engine_cs *engine,
+		     struct intel_context *ce)
+{
+	struct i915_gem_context *ctx =
+		rcu_dereference_protected(ce->gem_context, true);
+	int bit, idx = -1;
+
+	if (!ctx || !ctx->remap_slice)
+		return;
+
+	do {
+		bit = ffs(ctx->remap_slice);
+		remap_l3_slice(ring, engine, idx += bit);
+	} while (ctx->remap_slice >>= bit);
+}
+
 static void switch_context(struct intel_ring *ring, struct i915_request *rq)
 {
+	struct intel_engine_cs *engine = rq->engine;
+	struct i915_address_space *cvm = current_vm(engine);
+	struct intel_context *ce = rq->context;
+	struct i915_address_space *vm;
+
+	if (engine->wa_ctx.vma && ce != engine->kernel_context) {
+		if (engine->wa_ctx.vma->private != ce &&
+		    i915_mitigate_clear_residuals()) {
+			cvm = clear_residuals(ring, engine);
+			intel_context_put(engine->wa_ctx.vma->private);
+			engine->wa_ctx.vma->private = intel_context_get(ce);
+		}
+	}
+
+	vm = vm_alias(ce->vm);
+	if (vm != cvm)
+		load_pd_dir(ring, engine, i915_vm_to_ppgtt(vm));
+
+	if (ce->state) {
+		u32 flags;
+
+		GEM_BUG_ON(engine->id != RCS0);
+
+		/* For resource streamer on HSW+ and power context elsewhere */
+		BUILD_BUG_ON(HSW_MI_RS_SAVE_STATE_EN != MI_SAVE_EXT_STATE_EN);
+		BUILD_BUG_ON(HSW_MI_RS_RESTORE_STATE_EN != MI_RESTORE_EXT_STATE_EN);
+
+		flags = MI_SAVE_EXT_STATE_EN | MI_MM_SPACE_GTT;
+		if (test_bit(CONTEXT_VALID_BIT, &ce->flags)) {
+			gen4_emit_invalidate_rcs(ring, engine);
+			flags |= MI_RESTORE_EXT_STATE_EN;
+		} else {
+			flags |= MI_RESTORE_INHIBIT;
+		}
+
+		mi_set_context(ring, engine, ce, flags);
+	}
+
+	remap_l3(ring, engine, ce);
 }
 
 static struct i915_request *ring_submit(struct i915_request *rq)
@@ -218,10 +485,48 @@ copy_ports(struct i915_request **dst, struct i915_request **src, int count)
 		WRITE_ONCE(*dst++, *src++); /* avoid write tearing */
 }
 
+static inline void __write_tail(const struct intel_engine_cs *engine)
+{
+	ENGINE_WRITE(engine, RING_TAIL, engine->legacy.ring->tail);
+}
+
+static void wa_write_tail(const struct intel_engine_cs *engine)
+{
+	const i915_reg_t psmi = RING_PSMI_CTL(engine->mmio_base);
+	struct intel_uncore *uncore = engine->uncore;
+
+	intel_uncore_write_fw(uncore, psmi,
+			      _MASKED_BIT_ENABLE(PSMI_SLEEP_MSG_DISABLE));
+
+	/* Clear the context id. Here be magic! */
+	intel_uncore_write64_fw(uncore, RING_RNCID(engine->mmio_base), 0x0);
+
+	/* Wait for the ring not to be idle, i.e. for it to wake up. */
+	if (__intel_wait_for_register_fw(uncore, psmi,
+					 PSMI_SLEEP_INDICATOR, 0,
+					 1000, 0, NULL))
+		drm_err(&uncore->i915->drm,
+			"timed out waiting for %s to wake up\n",
+			engine->name);
+
+	/* Now that the ring is fully powered up, update the tail */
+	__write_tail(engine);
+
+	/*
+	 * Let the ring send IDLE messages to the GT again,
+	 * and so let it sleep to conserve power when idle.
+	 */
+	intel_uncore_write_fw(uncore, psmi,
+			      _MASKED_BIT_DISABLE(PSMI_SLEEP_MSG_DISABLE));
+}
+
 static inline void write_tail(const struct intel_engine_cs *engine)
 {
 	wmb(); /* paranoid flush of WCB before RING_TAIL write */
-	ENGINE_WRITE(engine, RING_TAIL, engine->legacy.ring->tail);
+	if (!engine->fw_active)
+		__write_tail(engine);
+	else
+		wa_write_tail(engine);
 }
 
 static void dequeue(struct i915_sched *se, struct intel_engine_cs *engine)
@@ -561,7 +866,14 @@ static int ring_context_pre_pin(struct intel_context *ce,
 				struct i915_gem_ww_ctx *ww,
 				void **unused)
 {
-	return 0;
+	struct i915_address_space *vm;
+	int err = 0;
+
+	vm = vm_alias(ce->vm);
+	if (vm)
+		err = gen6_ppgtt_pin(i915_vm_to_ppgtt((vm)), ww);
+
+	return err;
 }
 
 static int ring_context_pin(struct intel_context *ce, void *unused)
@@ -569,12 +881,22 @@ static int ring_context_pin(struct intel_context *ce, void *unused)
 	return 0;
 }
 
+static void __context_unpin_ppgtt(struct intel_context *ce)
+{
+	struct i915_address_space *vm;
+
+	vm = vm_alias(ce->vm);
+	if (vm)
+		gen6_ppgtt_unpin(i915_vm_to_ppgtt(vm));
+}
+
 static void ring_context_unpin(struct intel_context *ce)
 {
 }
 
 static void ring_context_post_unpin(struct intel_context *ce)
 {
+	__context_unpin_ppgtt(ce);
 }
 
 static void ring_context_reset(struct intel_context *ce)
@@ -633,6 +955,11 @@ static void ring_release(struct intel_engine_cs *engine)
 
 	set_current_context(&engine->legacy.context, NULL);
 
+	if (engine->wa_ctx.vma) {
+		intel_context_put(engine->wa_ctx.vma->private);
+		i915_vma_unpin_and_release(&engine->wa_ctx.vma, 0);
+	}
+
 	intel_ring_unpin(engine->legacy.ring);
 	intel_ring_put(engine->legacy.ring);
 }
@@ -640,6 +967,17 @@ static void ring_release(struct intel_engine_cs *engine)
 static void setup_irq(struct intel_engine_cs *engine)
 {
 	engine->irq_handler = irq_handler;
+
+	if (INTEL_GEN(engine->i915) >= 6) {
+		engine->irq_enable = gen6_irq_enable;
+		engine->irq_disable = gen6_irq_disable;
+	} else if (INTEL_GEN(engine->i915) >= 5) {
+		engine->irq_enable = gen5_irq_enable;
+		engine->irq_disable = gen5_irq_disable;
+	} else {
+		engine->irq_enable = gen3_irq_enable;
+		engine->irq_disable = gen3_irq_disable;
+	}
 }
 
 static void setup_common(struct intel_engine_cs *engine)
@@ -648,7 +986,7 @@ static void setup_common(struct intel_engine_cs *engine)
 
 	/* gen8+ are only supported with execlists */
 	GEM_BUG_ON(INTEL_GEN(i915) >= 8);
-	GEM_BUG_ON(INTEL_GEN(i915) < 8);
+	GEM_BUG_ON(INTEL_GEN(i915) < 4);
 
 	setup_irq(engine);
 
@@ -666,24 +1004,80 @@ static void setup_common(struct intel_engine_cs *engine)
 	engine->cops = &ring_context_ops;
 	engine->request_alloc = ring_request_alloc;
 
+	engine->emit_init_breadcrumb = gen4_emit_init_breadcrumb_xcs;
+
+	if (INTEL_GEN(i915) >= 6)
+		engine->emit_bb_start = gen6_emit_bb_start;
+	else
+		engine->emit_bb_start = gen4_emit_bb_start;
+
+	if (INTEL_GEN(i915) >= 7)
+		engine->emit_fini_breadcrumb = gen7_emit_breadcrumb_xcs;
+	else if (INTEL_GEN(i915) >= 6)
+		engine->emit_fini_breadcrumb = gen6_emit_breadcrumb_xcs;
+	else
+		engine->emit_fini_breadcrumb = gen4_emit_breadcrumb_xcs;
+
 	engine->set_default_submission = set_default_submission;
 }
 
 static void setup_rcs(struct intel_engine_cs *engine)
 {
+	struct drm_i915_private *i915 = engine->i915;
+
+	if (HAS_L3_DPF(i915))
+		engine->irq_keep_mask = GT_RENDER_L3_PARITY_ERROR_INTERRUPT;
+
+	engine->irq_enable_mask = GT_RENDER_USER_INTERRUPT;
+
+	if (INTEL_GEN(i915) >= 7) {
+		engine->emit_flush = gen7_emit_flush_rcs;
+		engine->emit_fini_breadcrumb = gen7_emit_breadcrumb_rcs;
+		if (IS_HASWELL(i915))
+			engine->emit_bb_start = hsw_emit_bb_start;
+	} else if (INTEL_GEN(i915) >= 6) {
+		engine->emit_flush = gen6_emit_flush_rcs;
+		engine->emit_fini_breadcrumb = gen6_emit_breadcrumb_rcs;
+	} else if (INTEL_GEN(i915) >= 5) {
+		engine->emit_flush = gen4_emit_flush_rcs;
+	} else {
+		engine->emit_flush = gen4_emit_flush_rcs;
+		engine->irq_enable_mask = I915_USER_INTERRUPT;
+	}
 }
 
 static void setup_vcs(struct intel_engine_cs *engine)
 {
+	if (INTEL_GEN(engine->i915) >= 6) {
+		if (IS_GEN(engine->i915, 6))
+			engine->fw_domain = FORCEWAKE_ALL;
+		engine->emit_flush = gen6_emit_flush_vcs;
+		engine->irq_enable_mask = GT_BSD_USER_INTERRUPT;
+	} else if (INTEL_GEN(engine->i915) >= 5) {
+		engine->emit_flush = gen4_emit_flush_vcs;
+		engine->irq_enable_mask = ILK_BSD_USER_INTERRUPT;
+	} else {
+		engine->emit_flush = gen4_emit_flush_vcs;
+		engine->irq_enable_mask = I915_BSD_USER_INTERRUPT;
+	}
 }
 
 static void setup_bcs(struct intel_engine_cs *engine)
 {
+	GEM_BUG_ON(INTEL_GEN(engine->i915) < 6);
+
+	engine->emit_flush = gen6_emit_flush_xcs;
+	engine->irq_enable_mask = GT_BLT_USER_INTERRUPT;
 }
 
 static void setup_vecs(struct intel_engine_cs *engine)
 {
 	GEM_BUG_ON(!IS_HASWELL(engine->i915));
+
+	engine->emit_flush = gen6_emit_flush_xcs;
+	engine->irq_enable_mask = PM_VEBOX_USER_INTERRUPT;
+	engine->irq_enable = hsw_irq_enable_vecs;
+	engine->irq_disable = hsw_irq_disable_vecs;
 }
 
 static unsigned int global_ring_size(void)
@@ -692,6 +1086,58 @@ static unsigned int global_ring_size(void)
 	return roundup_pow_of_two(EXECLIST_MAX_PORTS * SZ_16K + SZ_4K);
 }
 
+static int gen7_ctx_switch_bb_init(struct intel_engine_cs *engine)
+{
+	struct drm_i915_gem_object *obj;
+	struct i915_vma *vma;
+	int size;
+	int err;
+
+	size = gen7_setup_clear_gpr_bb(engine, NULL /* probe size */);
+	if (size <= 0)
+		return size;
+
+	size = ALIGN(size, PAGE_SIZE);
+	obj = i915_gem_object_create_internal(engine->i915, size);
+	if (IS_ERR(obj))
+		return PTR_ERR(obj);
+
+	vma = i915_vma_instance(obj, engine->gt->vm, NULL);
+	if (IS_ERR(vma)) {
+		err = PTR_ERR(vma);
+		goto err_obj;
+	}
+
+	vma->private = intel_context_create(engine); /* dummy residuals */
+	if (IS_ERR(vma->private)) {
+		err = PTR_ERR(vma->private);
+		goto err_obj;
+	}
+
+	err = i915_vma_pin(vma, 0, 0, PIN_USER | PIN_HIGH);
+	if (err)
+		goto err_private;
+
+	err = i915_vma_sync(vma);
+	if (err)
+		goto err_unpin;
+
+	size = gen7_setup_clear_gpr_bb(engine, vma);
+	if (err)
+		goto err_unpin;
+
+	engine->wa_ctx.vma = vma;
+	return 0;
+
+err_unpin:
+	i915_vma_unpin(vma);
+err_private:
+	intel_context_put(vma->private);
+err_obj:
+	i915_gem_object_put(obj);
+	return err;
+}
+
 int intel_ring_scheduler_setup(struct intel_engine_cs *engine)
 {
 	struct intel_ring *ring;
@@ -736,12 +1182,20 @@ int intel_ring_scheduler_setup(struct intel_engine_cs *engine)
 	GEM_BUG_ON(engine->legacy.ring);
 	engine->legacy.ring = ring;
 
+	if (IS_GEN(engine->i915, 7) && engine->class == RENDER_CLASS) {
+		err = gen7_ctx_switch_bb_init(engine);
+		if (err)
+			goto err_ring_unpin;
+	}
+
 	engine->flags |= I915_ENGINE_SUPPORTS_STATS;
 
 	/* Finally, take ownership and responsibility for cleanup! */
 	engine->release = ring_release;
 	return 0;
 
+err_ring_unpin:
+	intel_ring_unpin(ring);
 err_ring:
 	intel_ring_put(ring);
 err:
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index 224ad897af34..2f4584202e5d 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -2532,7 +2532,16 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg)
 #define GEN6_VERSYNC	(RING_SYNC_1(VEBOX_RING_BASE))
 #define GEN6_VEVSYNC	(RING_SYNC_2(VEBOX_RING_BASE))
 #define GEN6_NOSYNC	INVALID_MMIO_REG
+
 #define RING_PSMI_CTL(base)	_MMIO((base) + 0x50)
+#define   PSMI_SLEEP_MSG_DISABLE		REG_BIT(0)
+#define   PSMI_SLEEP_FLUSH_DISABLE		REG_BIT(2)
+#define   PSMI_SLEEP_INDICATOR			REG_BIT(3)
+#define   PSMI_GO_INDICATOR			REG_BIT(4)
+#define   GEN12_PSMI_WAIT_FOR_EVENT_POWER_DOWN_DISABLE REG_BIT(7)
+#define   GEN8_PSMI_FF_DOP_CLOCK_GATE_DISABLE	REG_BIT(10)
+#define   GEN8_PSMI_RC_SEMA_IDLE_MSG_DISABLE	REG_BIT(12)
+
 #define RING_MAX_IDLE(base)	_MMIO((base) + 0x54)
 #define RING_HWS_PGA(base)	_MMIO((base) + 0x80)
 #define RING_ID(base)		_MMIO((base) + 0x8c)
@@ -2542,6 +2551,7 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg)
 #define   RESET_CTL_READY_TO_RESET REG_BIT(1)
 #define   RESET_CTL_REQUEST_RESET  REG_BIT(0)
 
+#define RING_RNCID(base)	_MMIO((base) + 0x198)
 #define RING_SEMA_WAIT_POLL(base) _MMIO((base) + 0x24c)
 
 #define HSW_GTT_CACHE_EN	_MMIO(0x4024)
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 54+ messages in thread

* [Intel-gfx] [PATCH 29/31] drm/i915/gt: Enable ring scheduling for gen5-7
  2021-02-08 10:52 [Intel-gfx] [PATCH 01/31] drm/i915/gt: Ratelimit heartbeat completion probing Chris Wilson
                   ` (26 preceding siblings ...)
  2021-02-08 10:52 ` [Intel-gfx] [PATCH 28/31] drm/i915/gt: Implement ring scheduler for gen4-7 Chris Wilson
@ 2021-02-08 10:52 ` Chris Wilson
  2021-02-08 10:52 ` [Intel-gfx] [PATCH 30/31] drm/i915: Support secure dispatch on gen6/gen7 Chris Wilson
                   ` (5 subsequent siblings)
  33 siblings, 0 replies; 54+ messages in thread
From: Chris Wilson @ 2021-02-08 10:52 UTC (permalink / raw)
  To: intel-gfx; +Cc: Chris Wilson

Switch over from FIFO global submission to the priority-sorted
topographical scheduler. At the cost of more busy work on the CPU to
keep the GPU supplied with the next packet of requests, this allows us
to reorder requests around submission stalls and so allow low latency
under load while maintaining fairness between clients.

The downside is that we enable interrupts on all requests (unlike with
execlists where we have an interrupt for context switches). This means
that instead of receiving an interrupt for when we are waitng for
completion, we are processing them all the time, with noticeable
overhead of cpu time absorbed by the interrupt handler. The effect is
most pronounced on CPU-throughput limited renderers like uxa, where
performance can be degraded by 20% in the worst case. Nevertheless, this
is a pathological example of an obsolete userspace driver. (There are
also cases where uxa performs better by 20%, which is an interesting
quirk...) The glxgears-not-a-benchmark (cpu throughtput bound) is one
such example of a performance hit, only affecting uxa.

The expectation is that allowing request reordering will allow much
smoother UX that greatly compensates for reduced throughput under high
submission load (but low GPU load).

This also enables the timer based RPS for better powersaving, with the
exception of Valleyview whose PCU doesn't take kindly to our
interference.

References: 0f46832fab77 ("drm/i915: Mask USER interrupts on gen6 (until required)")
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c | 2 +-
 drivers/gpu/drm/i915/gt/intel_engine_cs.c             | 5 ++++-
 drivers/gpu/drm/i915/gt/intel_gt_types.h              | 1 +
 drivers/gpu/drm/i915/gt/intel_rps.c                   | 6 ++----
 4 files changed, 8 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c
index df949320f2b5..1ecd362b131a 100644
--- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c
@@ -94,7 +94,7 @@ static int live_nop_switch(void *arg)
 			rq = i915_request_get(this);
 			i915_request_add(this);
 		}
-		if (i915_request_wait(rq, 0, HZ / 5) < 0) {
+		if (i915_request_wait(rq, 0, HZ) < 0) {
 			pr_err("Failed to populated %d contexts\n", nctx);
 			intel_gt_set_wedged(&i915->gt);
 			i915_request_put(rq);
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
index 7d34bf03670b..9c731338837d 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
@@ -882,8 +882,11 @@ int intel_engines_init(struct intel_gt *gt)
 	} else if (HAS_EXECLISTS(gt->i915)) {
 		gt->submission_method = INTEL_SUBMISSION_ELSP;
 		setup = intel_execlists_submission_setup;
-	} else {
+	} else if (INTEL_GEN(gt->i915) >= 5) {
 		gt->submission_method = INTEL_SUBMISSION_RING;
+		setup = intel_ring_scheduler_setup;
+	} else {
+		gt->submission_method = INTEL_SUBMISSION_LEGACY;
 		setup = intel_ring_submission_setup;
 	}
 
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_types.h b/drivers/gpu/drm/i915/gt/intel_gt_types.h
index 626af37c7790..125b40f62644 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt_types.h
@@ -30,6 +30,7 @@ struct intel_engine_cs;
 struct intel_uncore;
 
 enum intel_submission_method {
+	INTEL_SUBMISSION_LEGACY,
 	INTEL_SUBMISSION_RING,
 	INTEL_SUBMISSION_ELSP,
 	INTEL_SUBMISSION_GUC,
diff --git a/drivers/gpu/drm/i915/gt/intel_rps.c b/drivers/gpu/drm/i915/gt/intel_rps.c
index 97cab1b99871..80044ece2b12 100644
--- a/drivers/gpu/drm/i915/gt/intel_rps.c
+++ b/drivers/gpu/drm/i915/gt/intel_rps.c
@@ -1081,9 +1081,7 @@ static bool gen6_rps_enable(struct intel_rps *rps)
 	intel_uncore_write_fw(uncore, GEN6_RP_DOWN_TIMEOUT, 50000);
 	intel_uncore_write_fw(uncore, GEN6_RP_IDLE_HYSTERSIS, 10);
 
-	rps->pm_events = (GEN6_PM_RP_UP_THRESHOLD |
-			  GEN6_PM_RP_DOWN_THRESHOLD |
-			  GEN6_PM_RP_DOWN_TIMEOUT);
+	rps->pm_events = GEN6_PM_RP_UP_THRESHOLD | GEN6_PM_RP_DOWN_THRESHOLD;
 
 	return rps_reset(rps);
 }
@@ -1391,7 +1389,7 @@ void intel_rps_enable(struct intel_rps *rps)
 	GEM_BUG_ON(rps->efficient_freq < rps->min_freq);
 	GEM_BUG_ON(rps->efficient_freq > rps->max_freq);
 
-	if (has_busy_stats(rps))
+	if (has_busy_stats(rps) && !IS_VALLEYVIEW(i915))
 		intel_rps_set_timer(rps);
 	else if (INTEL_GEN(i915) >= 6)
 		intel_rps_set_interrupts(rps);
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 54+ messages in thread

* [Intel-gfx] [PATCH 30/31] drm/i915: Support secure dispatch on gen6/gen7
  2021-02-08 10:52 [Intel-gfx] [PATCH 01/31] drm/i915/gt: Ratelimit heartbeat completion probing Chris Wilson
                   ` (27 preceding siblings ...)
  2021-02-08 10:52 ` [Intel-gfx] [PATCH 29/31] drm/i915/gt: Enable ring scheduling for gen5-7 Chris Wilson
@ 2021-02-08 10:52 ` Chris Wilson
  2021-02-08 20:55   ` Dave Airlie
  2021-02-08 10:52 ` [Intel-gfx] [PATCH 31/31] drm/i915/gt: Limit C-states while waiting for requests Chris Wilson
                   ` (4 subsequent siblings)
  33 siblings, 1 reply; 54+ messages in thread
From: Chris Wilson @ 2021-02-08 10:52 UTC (permalink / raw)
  To: intel-gfx; +Cc: Chris Wilson

Re-enable secure dispatch for gen6/gen7, primarily to workaround the
command parser and overly zealous command validation on Haswell. For
example this prevents making accurate measurements using a journal for
store results from the GPU without CPU intervention.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_drv.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index cee180ca7f5a..bb2660e4236a 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1630,7 +1630,7 @@ tgl_stepping_get(struct drm_i915_private *dev_priv)
 #define HAS_LLC(dev_priv)	(INTEL_INFO(dev_priv)->has_llc)
 #define HAS_SNOOP(dev_priv)	(INTEL_INFO(dev_priv)->has_snoop)
 #define HAS_EDRAM(dev_priv)	((dev_priv)->edram_size_mb)
-#define HAS_SECURE_BATCHES(dev_priv) (INTEL_GEN(dev_priv) < 6)
+#define HAS_SECURE_BATCHES(dev_priv) (INTEL_GEN(dev_priv) < 8)
 #define HAS_WT(dev_priv)	HAS_EDRAM(dev_priv)
 
 #define HWS_NEEDS_PHYSICAL(dev_priv)	(INTEL_INFO(dev_priv)->hws_needs_physical)
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 54+ messages in thread

* [Intel-gfx] [PATCH 31/31] drm/i915/gt: Limit C-states while waiting for requests
  2021-02-08 10:52 [Intel-gfx] [PATCH 01/31] drm/i915/gt: Ratelimit heartbeat completion probing Chris Wilson
                   ` (28 preceding siblings ...)
  2021-02-08 10:52 ` [Intel-gfx] [PATCH 30/31] drm/i915: Support secure dispatch on gen6/gen7 Chris Wilson
@ 2021-02-08 10:52 ` Chris Wilson
  2021-02-08 15:43 ` [Intel-gfx] ✗ Fi.CI.CHECKPATCH: warning for series starting with [01/31] drm/i915/gt: Ratelimit heartbeat completion probing Patchwork
                   ` (3 subsequent siblings)
  33 siblings, 0 replies; 54+ messages in thread
From: Chris Wilson @ 2021-02-08 10:52 UTC (permalink / raw)
  To: intel-gfx; +Cc: Eero Tamminen, Chris Wilson

Allow the sysadmin to specify whether we should prevent the CPU from
entering higher C-states while waiting for the CPU, in order to reduce
the latency of request completions and so speed up client continuations.

The target dma latency can be adjusted per-engine using,

	/sys/class/drm/card?/engine/*/dma_latency_ns

(For waiting on a virtual engine, the underlying physical engine is used
for the wait once the request is active, so set all the physical engines
in the virtual set to the same target dma latency.)

Note that in most cases, the ratelimiting step does not appear to the
interrupt latency per se, but secondary effects of avoiding additional
memory latencies while active.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Cc: Eero Tamminen <eero.t.tamminen@intel.com>
Cc: Francisco Jerez <currojerez@riseup.net>
Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Cc: Dmitry Rogozhkin <dmitry.v.rogozhkin@intel.com>
---
 drivers/gpu/drm/i915/Kconfig.profile          | 14 ++++++
 drivers/gpu/drm/i915/gt/intel_breadcrumbs.c   | 45 +++++++++++++++++++
 .../gpu/drm/i915/gt/intel_breadcrumbs_types.h |  7 +++
 drivers/gpu/drm/i915/gt/intel_engine_cs.c     |  2 +
 drivers/gpu/drm/i915/gt/intel_engine_types.h  |  2 +
 drivers/gpu/drm/i915/gt/sysfs_engines.c       | 43 ++++++++++++++++++
 6 files changed, 113 insertions(+)

diff --git a/drivers/gpu/drm/i915/Kconfig.profile b/drivers/gpu/drm/i915/Kconfig.profile
index 72ed001d238d..545a9f3c71fe 100644
--- a/drivers/gpu/drm/i915/Kconfig.profile
+++ b/drivers/gpu/drm/i915/Kconfig.profile
@@ -86,6 +86,20 @@ config DRM_I915_USERFAULT_AUTOSUSPEND
 	  May be 0 to disable the extra delay and solely use the device level
 	  runtime pm autosuspend delay tunable.
 
+config DRM_I915_DMA_LATENCY
+	int "Target CPU-DMA latency while waiting on active requests (ns)"
+	default -1 # nanoseconds
+	help
+	  Specify a target latency for DMA wakeup, see /dev/cpu_dma_latency,
+	  used while the CPU is waiting for GPU results.
+
+	  This is adjustable via
+	  /sys/class/drm/card?/engine/*/dma_latency_ns
+
+	  May be -1 to prevent specifying a target wakeup and let the CPU
+	  enter powersaving while waiting. Conversely, 0 may be used to
+	  prevent the CPU from entering any C-states while waiting.
+
 config DRM_I915_HEARTBEAT_INTERVAL
 	int "Interval between heartbeat pulses (ms)"
 	default 2500 # milliseconds
diff --git a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c
index 9e67810c7767..2527182b642e 100644
--- a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c
+++ b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c
@@ -15,6 +15,40 @@
 #include "intel_gt_pm.h"
 #include "intel_gt_requests.h"
 
+static void __dma_qos_update(struct work_struct *work)
+{
+	struct intel_breadcrumbs_dma_qos *qos =
+		container_of(work, typeof(*qos), update);
+
+	if (cpu_latency_qos_request_active(&qos->req)) {
+		if (qos->latency < 0)
+			cpu_latency_qos_remove_request(&qos->req);
+		else
+			cpu_latency_qos_update_request(&qos->req, qos->latency);
+	} else {
+		if (qos->latency != -1)
+			cpu_latency_qos_add_request(&qos->req, qos->latency);
+	}
+}
+
+static void dma_qos_add(struct intel_breadcrumbs *b, s32 latency)
+{
+	if (latency < 0)
+		return;
+
+	b->qos.latency = latency;
+	queue_work(system_highpri_wq, &b->qos.update);
+}
+
+static void dma_qos_del(struct intel_breadcrumbs *b)
+{
+	if (b->qos.latency < 0)
+		return;
+
+	b->qos.latency = -1;
+	queue_work(system_highpri_wq, &b->qos.update);
+}
+
 static bool irq_enable(struct intel_engine_cs *engine)
 {
 	if (!engine->irq_enable)
@@ -55,6 +89,7 @@ static void __intel_breadcrumbs_arm_irq(struct intel_breadcrumbs *b)
 	 * the irq.
 	 */
 	WRITE_ONCE(b->irq_armed, true);
+	dma_qos_add(b, b->irq_engine->props.dma_latency_ns);
 
 	/* Requests may have completed before we could enable the interrupt. */
 	if (!b->irq_enabled++ && irq_enable(b->irq_engine))
@@ -78,7 +113,9 @@ static void __intel_breadcrumbs_disarm_irq(struct intel_breadcrumbs *b)
 	if (!--b->irq_enabled)
 		irq_disable(b->irq_engine);
 
+	dma_qos_del(b);
 	WRITE_ONCE(b->irq_armed, false);
+
 	intel_gt_pm_put_async(b->irq_engine->gt);
 }
 
@@ -287,6 +324,9 @@ intel_breadcrumbs_create(struct intel_engine_cs *irq_engine)
 	INIT_LIST_HEAD(&b->signalers);
 	init_llist_head(&b->signaled_requests);
 
+	b->qos.latency = -1;
+	INIT_WORK(&b->qos.update, __dma_qos_update);
+
 	spin_lock_init(&b->irq_lock);
 	init_irq_work(&b->irq_work, signal_irq_work);
 
@@ -354,6 +394,11 @@ void intel_breadcrumbs_free(struct intel_breadcrumbs *b)
 	irq_work_sync(&b->irq_work);
 	GEM_BUG_ON(!list_empty(&b->signalers));
 	GEM_BUG_ON(b->irq_armed);
+
+	GEM_BUG_ON(b->qos.latency != -1);
+	flush_work(&b->qos.update);
+	GEM_BUG_ON(cpu_latency_qos_request_active(&b->qos.req));
+
 	kfree(b);
 }
 
diff --git a/drivers/gpu/drm/i915/gt/intel_breadcrumbs_types.h b/drivers/gpu/drm/i915/gt/intel_breadcrumbs_types.h
index 3a084ce8ff5e..d5ad47f36ba0 100644
--- a/drivers/gpu/drm/i915/gt/intel_breadcrumbs_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_breadcrumbs_types.h
@@ -8,6 +8,7 @@
 
 #include <linux/irq_work.h>
 #include <linux/list.h>
+#include <linux/pm_qos.h>
 #include <linux/spinlock.h>
 #include <linux/types.h>
 
@@ -43,6 +44,12 @@ struct intel_breadcrumbs {
 
 	/* Not all breadcrumbs are attached to physical HW */
 	struct intel_engine_cs *irq_engine;
+
+	struct intel_breadcrumbs_dma_qos {
+		struct pm_qos_request req;
+		struct work_struct update;
+		s32 latency;
+	} qos;
 };
 
 #endif /* __INTEL_BREADCRUMBS_TYPES__ */
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
index 9c731338837d..1846230dd8bd 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
@@ -303,6 +303,8 @@ static int intel_engine_setup(struct intel_gt *gt, enum intel_engine_id id)
 	engine->instance = info->instance;
 	__sprint_engine_name(engine);
 
+	engine->props.dma_latency_ns =
+		CONFIG_DRM_I915_DMA_LATENCY;
 	engine->props.heartbeat_interval_ms =
 		CONFIG_DRM_I915_HEARTBEAT_INTERVAL;
 	engine->props.max_busywait_duration_ns =
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h
index c48078f14dfd..0122a576baf8 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h
@@ -492,6 +492,8 @@ struct intel_engine_cs {
 		unsigned long preempt_timeout_ms;
 		unsigned long stop_timeout_ms;
 		unsigned long timeslice_duration_ms;
+
+		s32 dma_latency_ns;
 	} props, defaults;
 
 	I915_SELFTEST_DECLARE(struct fault_attr reset_timeout);
diff --git a/drivers/gpu/drm/i915/gt/sysfs_engines.c b/drivers/gpu/drm/i915/gt/sysfs_engines.c
index 70506f43d6be..30316e7ce4f5 100644
--- a/drivers/gpu/drm/i915/gt/sysfs_engines.c
+++ b/drivers/gpu/drm/i915/gt/sysfs_engines.c
@@ -299,6 +299,47 @@ stop_default(struct kobject *kobj, struct kobj_attribute *attr, char *buf)
 static struct kobj_attribute stop_timeout_def =
 __ATTR(stop_timeout_ms, 0444, stop_default, NULL);
 
+static ssize_t
+dma_latency_store(struct kobject *kobj, struct kobj_attribute *attr,
+		  const char *buf, size_t count)
+{
+	struct intel_engine_cs *engine = kobj_to_engine(kobj);
+	long long latency;
+	int err;
+
+	err = kstrtoll(buf, 0, &latency);
+	if (err)
+		return err;
+
+	if (latency > S32_MAX)
+		return -EINVAL;
+
+	WRITE_ONCE(engine->props.dma_latency_ns, latency);
+	return count;
+}
+
+static ssize_t
+dma_latency_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf)
+{
+	struct intel_engine_cs *engine = kobj_to_engine(kobj);
+
+	return sprintf(buf, "%d\n", engine->props.dma_latency_ns);
+}
+
+static struct kobj_attribute dma_latency_attr =
+__ATTR(dma_latency_ns, 0644, dma_latency_show, dma_latency_store);
+
+static ssize_t
+dma_latency_default(struct kobject *kobj, struct kobj_attribute *attr, char *buf)
+{
+	struct intel_engine_cs *engine = kobj_to_engine(kobj);
+
+	return sprintf(buf, "%d\n", engine->defaults.dma_latency_ns);
+}
+
+static struct kobj_attribute dma_latency_def =
+__ATTR(dma_latency_ns, 0444, dma_latency_default, NULL);
+
 static ssize_t
 preempt_timeout_store(struct kobject *kobj, struct kobj_attribute *attr,
 		      const char *buf, size_t count)
@@ -456,6 +497,7 @@ static void add_defaults(struct kobj_engine *parent)
 	static const struct attribute *files[] = {
 		&max_spin_def.attr,
 		&stop_timeout_def.attr,
+		&dma_latency_def.attr,
 #if CONFIG_DRM_I915_HEARTBEAT_INTERVAL
 		&heartbeat_interval_def.attr,
 #endif
@@ -498,6 +540,7 @@ void intel_engines_add_sysfs(struct drm_i915_private *i915)
 		&all_caps_attr.attr,
 		&max_spin_attr.attr,
 		&stop_timeout_attr.attr,
+		&dma_latency_attr.attr,
 #if CONFIG_DRM_I915_HEARTBEAT_INTERVAL
 		&heartbeat_interval_attr.attr,
 #endif
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 54+ messages in thread

* Re: [Intel-gfx] [PATCH 02/31] drm/i915: Move context revocation to scheduler
  2021-02-08 10:52 ` [Intel-gfx] [PATCH 02/31] drm/i915: Move context revocation to scheduler Chris Wilson
@ 2021-02-08 11:18   ` Tvrtko Ursulin
  0 siblings, 0 replies; 54+ messages in thread
From: Tvrtko Ursulin @ 2021-02-08 11:18 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 08/02/2021 10:52, Chris Wilson wrote:
> Centralise the means by which to remove a context from execution to the
> scheduler, allowing the backends to specialise as necessary. Note that
> without backend support, we can simplify the procedure to forcibly reset
> the HW to remove the context.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>   drivers/gpu/drm/i915/gem/i915_gem_context.c   | 117 +-----------------
>   .../drm/i915/gt/intel_execlists_submission.c  |  47 +++++++
>   drivers/gpu/drm/i915/i915_scheduler.c         |  20 +++
>   drivers/gpu/drm/i915/i915_scheduler_types.h   |   5 +
>   4 files changed, 75 insertions(+), 114 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c
> index ca37d93ef5e7..be75f861db67 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c
> @@ -382,104 +382,9 @@ __context_engines_static(const struct i915_gem_context *ctx)
>   	return rcu_dereference_protected(ctx->engines, true);
>   }
>   
> -static void __reset_context(struct i915_gem_context *ctx,
> -			    struct intel_engine_cs *engine)
> -{
> -	intel_gt_handle_error(engine->gt, engine->mask, 0,
> -			      "context closure in %s", ctx->name);
> -}
> -
> -static bool __cancel_engine(struct intel_engine_cs *engine)
> -{
> -	/*
> -	 * Send a "high priority pulse" down the engine to cause the
> -	 * current request to be momentarily preempted. (If it fails to
> -	 * be preempted, it will be reset). As we have marked our context
> -	 * as banned, any incomplete request, including any running, will
> -	 * be skipped following the preemption.
> -	 *
> -	 * If there is no hangchecking (one of the reasons why we try to
> -	 * cancel the context) and no forced preemption, there may be no
> -	 * means by which we reset the GPU and evict the persistent hog.
> -	 * Ergo if we are unable to inject a preemptive pulse that can
> -	 * kill the banned context, we fallback to doing a local reset
> -	 * instead.
> -	 */
> -	return intel_engine_pulse(engine) == 0;
> -}
> -
> -static bool
> -__active_engine(struct i915_request *rq, struct intel_engine_cs **active)
> -{
> -	struct intel_engine_cs *engine, *locked;
> -	bool ret = false;
> -
> -	/*
> -	 * Serialise with __i915_request_submit() so that it sees
> -	 * is-banned?, or we know the request is already inflight.
> -	 *
> -	 * Note that rq->engine is unstable, and so we double
> -	 * check that we have acquired the lock on the final engine.
> -	 */
> -	locked = READ_ONCE(rq->engine);
> -	spin_lock_irq(&locked->sched.lock);
> -	while (unlikely(locked != (engine = READ_ONCE(rq->engine)))) {
> -		spin_unlock(&locked->sched.lock);
> -		locked = engine;
> -		spin_lock(&locked->sched.lock);
> -	}
> -
> -	if (i915_request_is_active(rq)) {
> -		if (!__i915_request_is_complete(rq))
> -			*active = locked;
> -		ret = true;
> -	}
> -
> -	spin_unlock_irq(&locked->sched.lock);
> -
> -	return ret;
> -}
> -
> -static struct intel_engine_cs *active_engine(struct intel_context *ce)
> -{
> -	struct intel_engine_cs *engine = NULL;
> -	struct i915_request *rq;
> -
> -	if (intel_context_has_inflight(ce))
> -		return intel_context_inflight(ce);
> -
> -	if (!ce->timeline)
> -		return NULL;
> -
> -	/*
> -	 * rq->link is only SLAB_TYPESAFE_BY_RCU, we need to hold a reference
> -	 * to the request to prevent it being transferred to a new timeline
> -	 * (and onto a new timeline->requests list).
> -	 */
> -	rcu_read_lock();
> -	list_for_each_entry_reverse(rq, &ce->timeline->requests, link) {
> -		bool found;
> -
> -		/* timeline is already completed upto this point? */
> -		if (!i915_request_get_rcu(rq))
> -			break;
> -
> -		/* Check with the backend if the request is inflight */
> -		found = true;
> -		if (likely(rcu_access_pointer(rq->timeline) == ce->timeline))
> -			found = __active_engine(rq, &engine);
> -
> -		i915_request_put(rq);
> -		if (found)
> -			break;
> -	}
> -	rcu_read_unlock();
> -
> -	return engine;
> -}
> -
>   static void kill_engines(struct i915_gem_engines *engines, bool ban)
>   {
> +	const int error = ban ? -EIO : -EAGAIN;
>   	struct i915_gem_engines_iter it;
>   	struct intel_context *ce;
>   
> @@ -491,28 +396,12 @@ static void kill_engines(struct i915_gem_engines *engines, bool ban)
>   	 * engines on which there are incomplete requests.
>   	 */
>   	for_each_gem_engine(ce, engines, it) {
> -		struct intel_engine_cs *engine;
> +		struct i915_sched *se = intel_engine_get_scheduler(ce->engine);
>   
>   		if (ban && intel_context_set_banned(ce))
>   			continue;
>   
> -		/*
> -		 * Check the current active state of this context; if we
> -		 * are currently executing on the GPU we need to evict
> -		 * ourselves. On the other hand, if we haven't yet been
> -		 * submitted to the GPU or if everything is complete,
> -		 * we have nothing to do.
> -		 */
> -		engine = active_engine(ce);
> -
> -		/* First attempt to gracefully cancel the context */
> -		if (engine && !__cancel_engine(engine) && ban)
> -			/*
> -			 * If we are unable to send a preemptive pulse to bump
> -			 * the context from the GPU, we have to resort to a full
> -			 * reset. We hope the collateral damage is worth it.
> -			 */
> -			__reset_context(engines->ctx, engine);
> +		se->revoke_context(ce, ban ? engines->ctx->name : NULL, error);
>   	}
>   }
>   
> diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
> index 85ff5fe861b4..e51112302fb8 100644
> --- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
> +++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
> @@ -114,6 +114,7 @@
>   #include "gen8_engine_cs.h"
>   #include "intel_breadcrumbs.h"
>   #include "intel_context.h"
> +#include "intel_engine_heartbeat.h"
>   #include "intel_engine_pm.h"
>   #include "intel_engine_stats.h"
>   #include "intel_execlists_submission.h"
> @@ -2774,6 +2775,50 @@ execlists_active_request(const struct i915_sched *se)
>   	return rq;
>   }
>   
> +static bool __cancel_engine(struct intel_engine_cs *engine)
> +{
> +	/*
> +	 * Send a "high priority pulse" down the engine to cause the
> +	 * current request to be momentarily preempted. (If it fails to
> +	 * be preempted, it will be reset). As we have marked our context
> +	 * as banned, any incomplete request, including any running, will
> +	 * be skipped following the preemption.
> +	 *
> +	 * If there is no hangchecking (one of the reasons why we try to
> +	 * cancel the context) and no forced preemption, there may be no
> +	 * means by which we reset the GPU and evict the persistent hog.
> +	 * Ergo if we are unable to inject a preemptive pulse that can
> +	 * kill the banned context, we fallback to doing a local reset
> +	 * instead.
> +	 */
> +	return intel_engine_pulse(engine) == 0;
> +}
> +
> +static void
> +execlists_revoke_context(struct intel_context *ce, const char *force, int error)
> +{
> +	struct intel_engine_cs *engine;
> +
> +	/*
> +	 * Check the current active state of this context; if we
> +	 * are currently executing on the GPU we need to evict
> +	 * ourselves. On the other hand, if we haven't yet been
> +	 * submitted to the GPU or if everything is complete,
> +	 * we have nothing to do.
> +	 */
> +	engine = intel_context_inflight(ce);
> +
> +	/* First attempt to gracefully cancel the context */
> +	if (engine && !__cancel_engine(engine) && force)
> +		/*
> +		 * If we are unable to send a preemptive pulse to bump
> +		 * the context from the GPU, we have to resort to a full
> +		 * reset. We hope the collateral damage is worth it.
> +		 */
> +		intel_gt_handle_error(engine->gt, engine->mask, 0,
> +				      "context revoked from %s", force);
> +}
> +
>   static bool can_preempt(struct intel_engine_cs *engine)
>   {
>   	if (INTEL_GEN(engine->i915) > 8)
> @@ -2911,6 +2956,7 @@ static void init_execlists(struct intel_engine_cs *engine)
>   	u32 base = engine->mmio_base;
>   
>   	engine->sched.active_request = execlists_active_request;
> +	engine->sched.revoke_context = execlists_revoke_context;
>   	engine->sched.show = execlists_show;
>   	tasklet_setup(&engine->sched.tasklet, execlists_submission_tasklet);
>   
> @@ -3454,6 +3500,7 @@ intel_execlists_create_virtual(struct intel_engine_cs **siblings,
>   			ENGINE_VIRTUAL);
>   
>   	ve->base.sched.submit_request = virtual_submit_request;
> +	ve->base.sched.revoke_context = execlists_revoke_context;
>   	tasklet_setup(&ve->base.sched.tasklet, virtual_submission_tasklet);
>   
>   	virtual_engine_initial_hint(ve);
> diff --git a/drivers/gpu/drm/i915/i915_scheduler.c b/drivers/gpu/drm/i915/i915_scheduler.c
> index a8fb787278e6..7855601a4958 100644
> --- a/drivers/gpu/drm/i915/i915_scheduler.c
> +++ b/drivers/gpu/drm/i915/i915_scheduler.c
> @@ -135,6 +135,25 @@ i915_sched_default_active_request(const struct i915_sched *se)
>   	return active;
>   }
>   
> +static bool context_active(struct intel_context *ce)
> +{
> +	return i915_active_fence_isset(&ce->timeline->last_request);
> +}
> +
> +static void
> +i915_sched_default_revoke_context(struct intel_context *ce,
> +				  const char *force,
> +				  int error)
> +{
> +	/*
> +	 * Without backend support, we cannot remove the context from the
> +	 * HW gracefully. All we can do is force a reset, as a last resort.
> +	 */
> +	if (force && context_active(ce))
> +		intel_gt_handle_error(ce->engine->gt, ce->engine->mask, 0,
> +				      "context revoked from %s", force);
> +}
> +
>   void i915_sched_init(struct i915_sched *se,
>   		     struct device *dev,
>   		     const char *name,
> @@ -158,6 +177,7 @@ void i915_sched_init(struct i915_sched *se,
>   
>   	se->submit_request = i915_request_enqueue;
>   	se->active_request = i915_sched_default_active_request;
> +	se->revoke_context = i915_sched_default_revoke_context;
>   }
>   
>   void i915_sched_park(struct i915_sched *se)
> diff --git a/drivers/gpu/drm/i915/i915_scheduler_types.h b/drivers/gpu/drm/i915/i915_scheduler_types.h
> index a8502c94d7c5..84232a07163f 100644
> --- a/drivers/gpu/drm/i915/i915_scheduler_types.h
> +++ b/drivers/gpu/drm/i915/i915_scheduler_types.h
> @@ -15,6 +15,7 @@
>   
>   struct drm_printer;
>   struct i915_request;
> +struct intel_context;
>   
>   /**
>    * struct i915_sched - funnels requests towards hardware
> @@ -40,6 +41,10 @@ struct i915_sched {
>   
>   	struct i915_request *(*active_request)(const struct i915_sched *se);
>   
> +	void (*revoke_context)(struct intel_context *ce,
> +			       const char *whom,
> +			       int error);
> +
>   	void (*show)(struct drm_printer *m,
>   		     struct i915_sched *se,
>   		     void (*show_request)(struct drm_printer *m,
> 

Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 54+ messages in thread

* Re: [Intel-gfx] [PATCH 04/31] drm/i915: Move timeslicing flag to scheduler
  2021-02-08 10:52 ` [Intel-gfx] [PATCH 04/31] drm/i915: Move timeslicing flag to scheduler Chris Wilson
@ 2021-02-08 11:43   ` Tvrtko Ursulin
  0 siblings, 0 replies; 54+ messages in thread
From: Tvrtko Ursulin @ 2021-02-08 11:43 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 08/02/2021 10:52, Chris Wilson wrote:
> Whether a scheduler chooses to implement timeslicing is up to it, and
> not an underlying property of the HW engine. The scheduler does depend
> on the HW supporting preemption.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>   drivers/gpu/drm/i915/gt/intel_engine.h           |  6 ++++++
>   drivers/gpu/drm/i915/gt/intel_engine_types.h     | 16 +++-------------
>   .../gpu/drm/i915/gt/intel_execlists_submission.c |  8 +++++---
>   drivers/gpu/drm/i915/gt/selftest_execlists.c     |  2 +-
>   drivers/gpu/drm/i915/i915_scheduler_types.h      | 10 ++++++++++
>   5 files changed, 25 insertions(+), 17 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/gt/intel_engine.h b/drivers/gpu/drm/i915/gt/intel_engine.h
> index 875fde52bcb6..5d3bcbfe8f6e 100644
> --- a/drivers/gpu/drm/i915/gt/intel_engine.h
> +++ b/drivers/gpu/drm/i915/gt/intel_engine.h
> @@ -280,4 +280,10 @@ intel_engine_flush_scheduler(struct intel_engine_cs *engine)
>   	i915_sched_flush(intel_engine_get_scheduler(engine));
>   }
>   
> +static inline bool
> +intel_engine_has_timeslices(struct intel_engine_cs *engine)
> +{
> +	return i915_sched_has_timeslices(intel_engine_get_scheduler(engine));
> +}
> +
>   #endif /* _INTEL_RINGBUFFER_H_ */
> diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h
> index ce5732099815..08bddc5263aa 100644
> --- a/drivers/gpu/drm/i915/gt/intel_engine_types.h
> +++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h
> @@ -444,10 +444,9 @@ struct intel_engine_cs {
>   #define I915_ENGINE_SUPPORTS_STATS   BIT(1)
>   #define I915_ENGINE_HAS_PREEMPTION   BIT(2)
>   #define I915_ENGINE_HAS_SEMAPHORES   BIT(3)
> -#define I915_ENGINE_HAS_TIMESLICES   BIT(4)
> -#define I915_ENGINE_IS_VIRTUAL       BIT(5)
> -#define I915_ENGINE_HAS_RELATIVE_MMIO BIT(6)
> -#define I915_ENGINE_REQUIRES_CMD_PARSER BIT(7)
> +#define I915_ENGINE_IS_VIRTUAL       BIT(4)
> +#define I915_ENGINE_HAS_RELATIVE_MMIO BIT(5)
> +#define I915_ENGINE_REQUIRES_CMD_PARSER BIT(6)
>   	unsigned int flags;
>   
>   	/*
> @@ -542,15 +541,6 @@ intel_engine_has_semaphores(const struct intel_engine_cs *engine)
>   	return engine->flags & I915_ENGINE_HAS_SEMAPHORES;
>   }
>   
> -static inline bool
> -intel_engine_has_timeslices(const struct intel_engine_cs *engine)
> -{
> -	if (!IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION))
> -		return false;
> -
> -	return engine->flags & I915_ENGINE_HAS_TIMESLICES;
> -}
> -
>   static inline bool
>   intel_engine_is_virtual(const struct intel_engine_cs *engine)
>   {
> diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
> index 0f2c3c62cac9..aa1816d28def 100644
> --- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
> +++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
> @@ -1025,7 +1025,7 @@ static bool needs_timeslice(const struct intel_engine_cs *engine,
>   {
>   	const struct i915_sched *se = &engine->sched;
>   
> -	if (!intel_engine_has_timeslices(engine))
> +	if (!i915_sched_has_timeslices(se))
>   		return false;
>   
>   	/* If not currently active, or about to switch, wait for next event */
> @@ -2896,8 +2896,6 @@ logical_ring_default_vfuncs(struct intel_engine_cs *engine)
>   		engine->flags |= I915_ENGINE_HAS_SEMAPHORES;
>   		if (can_preempt(engine)) {
>   			engine->flags |= I915_ENGINE_HAS_PREEMPTION;
> -			if (IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION))
> -				engine->flags |= I915_ENGINE_HAS_TIMESLICES;
>   		}
>   	}
>   
> @@ -2961,6 +2959,10 @@ static void init_execlists(struct intel_engine_cs *engine)
>   
>   	i915_sched_select_mode(&engine->sched, I915_SCHED_MODE_PRIORITY);
>   
> +	if (IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION) &&
> +	    intel_engine_has_preemption(engine))
> +		__set_bit(I915_SCHED_TIMESLICE_BIT, &engine->sched.flags);
> +
>   	timer_setup(&engine->execlists.timer, execlists_timeslice, 0);
>   	timer_setup(&engine->execlists.preempt, execlists_preempt, 0);
>   
> diff --git a/drivers/gpu/drm/i915/gt/selftest_execlists.c b/drivers/gpu/drm/i915/gt/selftest_execlists.c
> index 04ded3a2d491..be99fbd7cfab 100644
> --- a/drivers/gpu/drm/i915/gt/selftest_execlists.c
> +++ b/drivers/gpu/drm/i915/gt/selftest_execlists.c
> @@ -3809,7 +3809,7 @@ static unsigned int
>   __select_siblings(struct intel_gt *gt,
>   		  unsigned int class,
>   		  struct intel_engine_cs **siblings,
> -		  bool (*filter)(const struct intel_engine_cs *))
> +		  bool (*filter)(struct intel_engine_cs *))
>   {
>   	unsigned int n = 0;
>   	unsigned int inst;
> diff --git a/drivers/gpu/drm/i915/i915_scheduler_types.h b/drivers/gpu/drm/i915/i915_scheduler_types.h
> index 2cb46b2e1ac8..3c94378def52 100644
> --- a/drivers/gpu/drm/i915/i915_scheduler_types.h
> +++ b/drivers/gpu/drm/i915/i915_scheduler_types.h
> @@ -12,6 +12,7 @@
>   #include <linux/workqueue.h>
>   
>   #include "i915_priolist_types.h"
> +#include "i915_utils.h"
>   
>   struct drm_printer;
>   struct i915_request;
> @@ -21,6 +22,7 @@ enum {
>   	I915_SCHED_ENABLE_BIT = 0,
>   	I915_SCHED_ACTIVE_BIT, /* can reorder the request flow */
>   	I915_SCHED_PRIORITY_BIT, /* priority sorting of queue */
> +	I915_SCHED_TIMESLICE_BIT, /* multitasking for long workloads */
>   };
>   
>   /**
> @@ -240,4 +242,12 @@ static inline bool i915_sched_has_priorities(const struct i915_sched *se)
>   	return test_bit(I915_SCHED_PRIORITY_BIT, &se->flags);
>   }
>   
> +static inline bool i915_sched_has_timeslices(const struct i915_sched *se)
> +{
> +	if (!IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION))
> +		return false;
> +
> +	return test_bit(I915_SCHED_TIMESLICE_BIT, &se->flags);
> +}
> +
>   #endif /* _I915_SCHEDULER_TYPES_H_ */
> 

Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 54+ messages in thread

* Re: [Intel-gfx] [PATCH 05/31] drm/i915/gt: Declare when we enabled timeslicing
  2021-02-08 10:52 ` [Intel-gfx] [PATCH 05/31] drm/i915/gt: Declare when we enabled timeslicing Chris Wilson
@ 2021-02-08 11:44   ` Tvrtko Ursulin
  0 siblings, 0 replies; 54+ messages in thread
From: Tvrtko Ursulin @ 2021-02-08 11:44 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 08/02/2021 10:52, Chris Wilson wrote:
> Let userspace know if they can trust timeslicing by including it as part
> of the I915_PARAM_HAS_SCHEDULER::I915_SCHEDULER_CAP_TIMESLICING
> 
> v2: Only declare timeslicing if we can safely preempt userspace.
> 
> Fixes: 8ee36e048c98 ("drm/i915/execlists: Minimalistic timeslicing")
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> ---
>   drivers/gpu/drm/i915/gt/intel_engine_user.c | 1 +
>   include/uapi/drm/i915_drm.h                 | 1 +
>   2 files changed, 2 insertions(+)
> 
> diff --git a/drivers/gpu/drm/i915/gt/intel_engine_user.c b/drivers/gpu/drm/i915/gt/intel_engine_user.c
> index 3d3cdc080c32..3fab439ba22b 100644
> --- a/drivers/gpu/drm/i915/gt/intel_engine_user.c
> +++ b/drivers/gpu/drm/i915/gt/intel_engine_user.c
> @@ -102,6 +102,7 @@ static void set_scheduler_caps(struct drm_i915_private *i915)
>   #define MAP(x, y) { I915_SCHED_##x, ilog2(I915_SCHEDULER_CAP_##y) }
>   		MAP(ACTIVE_BIT, ENABLED),
>   		MAP(PRIORITY_BIT, PRIORITY),
> +		MAP(TIMESLICE_BIT, TIMESLICING),
>   #undef MAP
>   	};
>   	struct intel_engine_cs *engine;
> diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
> index 1987e2ea79a3..cda0f391d965 100644
> --- a/include/uapi/drm/i915_drm.h
> +++ b/include/uapi/drm/i915_drm.h
> @@ -524,6 +524,7 @@ typedef struct drm_i915_irq_wait {
>   #define   I915_SCHEDULER_CAP_PREEMPTION	(1ul << 2)
>   #define   I915_SCHEDULER_CAP_SEMAPHORES	(1ul << 3)
>   #define   I915_SCHEDULER_CAP_ENGINE_BUSY_STATS	(1ul << 4)
> +#define   I915_SCHEDULER_CAP_TIMESLICING	(1ul << 5)
>   
>   #define I915_PARAM_HUC_STATUS		 42
>   
> 

Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 54+ messages in thread

* Re: [Intel-gfx] [PATCH 09/31] drm/i915: Replace priolist rbtree with a skiplist
  2021-02-08 10:52 ` [Intel-gfx] [PATCH 09/31] drm/i915: Replace priolist rbtree with a skiplist Chris Wilson
@ 2021-02-08 12:29   ` Tvrtko Ursulin
  2021-02-08 12:46     ` Chris Wilson
  2021-02-08 15:23   ` Tvrtko Ursulin
  1 sibling, 1 reply; 54+ messages in thread
From: Tvrtko Ursulin @ 2021-02-08 12:29 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 08/02/2021 10:52, Chris Wilson wrote:
> Replace the priolist rbtree with a skiplist. The crucial difference is
> that walking and removing the first element of a skiplist is O(1), but
> O(lgN) for an rbtree, as we need to rebalance on remove. This is a
> hindrance for submission latency as it occurs between picking a request
> for the priolist and submitting it to hardware, as well effectively
> tripling the number of O(lgN) operations required under the irqoff lock.
> This is critical to reducing the latency jitter with multiple clients.
> 
> The downsides to skiplists are that lookup/insertion is only
> probabilistically O(lgN) and there is a significant memory penalty to
> as each skip node is larger than the rbtree equivalent. Furthermore, we
> don't use dynamic arrays for the skiplist, so the allocation is fixed,
> and imposes an upper bound on the scalability wrt to the number of
> inflight requests.
> 
> In the following patches, we introduce a new sort key to the scheduler,
> a virtual deadline. This imposes a different structure to the tree.
> Using a priority sort, we have very few priority levels active at any
> time, most likely just the default priority and so the rbtree degenerates
> to a single elements containing the list of all ready requests. The
> deadlines in contrast are very sparse, and typically each request has a
> unique deadline. Instead of being able to simply walk the list during
> dequeue, with the deadline scheduler we have to iterate through the bst
> on the critical submission path. Skiplists are vastly superior in this
> instance due to the O(1) iteration during dequeue, with very similar
> characteristics [on average] to the rbtree for insertion.
> 
> This means that by using skiplists we can introduce a sparse sort key
> without degrading latency on the critical submission path.
> 
> As an example, one simple case where we try to do lots of
> semi-independent work without any priority management (gem_exec_parallel),
> the lock hold times were:
> [worst]        [total]    [avg]
>   973.05     6301584.84     0.35 # plain rbtree
>   559.82     5424915.25     0.33 # best rbtree with pruning
>   208.21     3898784.09     0.24 # skiplist
>    34.05     5784106.01     0.32 # rbtree without deadlines
>    23.35     4152999.80     0.24 # skiplist without deadlines
> 
> Based on the skiplist implementation by Dr Con Kolivas for MuQSS.
> 
> References: https://en.wikipedia.org/wiki/Skip_list
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>   .../drm/i915/gt/intel_execlists_submission.c  | 168 +++++-----
>   .../gpu/drm/i915/gt/uc/intel_guc_submission.c |  41 +--
>   drivers/gpu/drm/i915/i915_priolist_types.h    |  64 +++-
>   drivers/gpu/drm/i915/i915_scheduler.c         | 304 +++++++++++++-----
>   drivers/gpu/drm/i915/i915_scheduler.h         |  16 +-
>   drivers/gpu/drm/i915/i915_scheduler_types.h   |   2 +-
>   .../drm/i915/selftests/i915_mock_selftests.h  |   1 +
>   .../gpu/drm/i915/selftests/i915_scheduler.c   |  53 ++-
>   8 files changed, 454 insertions(+), 195 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
> index 78fda9b4f626..4a0258347c10 100644
> --- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
> +++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
> @@ -254,11 +254,6 @@ static void ring_set_paused(const struct intel_engine_cs *engine, int state)
>   		wmb();
>   }
>   
> -static struct i915_priolist *to_priolist(struct rb_node *rb)
> -{
> -	return rb_entry(rb, struct i915_priolist, node);
> -}
> -
>   static int rq_prio(const struct i915_request *rq)
>   {
>   	return READ_ONCE(rq->sched.attr.priority);
> @@ -282,15 +277,27 @@ static int effective_prio(const struct i915_request *rq)
>   	return prio;
>   }
>   
> +static struct i915_request *first_request(const struct i915_sched *se)
> +{
> +	struct i915_priolist *pl = se->queue.sentinel.next[0];
> +
> +	if (pl == &se->queue.sentinel)
> +		return NULL;
> +
> +	return list_first_entry_or_null(&pl->requests,
> +					struct i915_request,
> +					sched.link);
> +}
> +
>   static int queue_prio(const struct i915_sched *se)
>   {
> -	struct rb_node *rb;
> +	struct i915_request *rq;
>   
> -	rb = rb_first_cached(&se->queue);
> -	if (!rb)
> +	rq = first_request(se);
> +	if (!rq)
>   		return INT_MIN;
>   
> -	return to_priolist(rb)->priority;
> +	return rq_prio(rq);
>   }
>   
>   static int virtual_prio(const struct intel_engine_execlists *el)
> @@ -300,7 +307,7 @@ static int virtual_prio(const struct intel_engine_execlists *el)
>   	return rb ? rb_entry(rb, struct ve_node, rb)->prio : INT_MIN;
>   }
>   
> -static bool need_preempt(const struct intel_engine_cs *engine,
> +static bool need_preempt(struct intel_engine_cs *engine,
>   			 const struct i915_request *rq)
>   {
>   	const struct i915_sched *se = &engine->sched;
> @@ -1144,7 +1151,9 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
>   	struct i915_request **port = execlists->pending;
>   	struct i915_request ** const last_port = port + execlists->port_mask;
>   	struct i915_request *last, * const *active;
> +	struct i915_request *rq, *rn;
>   	struct virtual_engine *ve;
> +	struct i915_priolist *pl;
>   	struct rb_node *rb;
>   	bool submit = false;
>   
> @@ -1355,87 +1364,79 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
>   			break;
>   	}
>   
> -	while ((rb = rb_first_cached(&se->queue))) {
> -		struct i915_priolist *p = to_priolist(rb);
> -		struct i915_request *rq, *rn;
> +	i915_sched_dequeue(se, pl, rq, rn) {
> +		bool merge = true;
>   
> -		priolist_for_each_request_consume(rq, rn, p) {
> -			bool merge = true;
> +		/*
> +		 * Can we combine this request with the current port?
> +		 * It has to be the same context/ringbuffer and not
> +		 * have any exceptions (e.g. GVT saying never to
> +		 * combine contexts).
> +		 *
> +		 * If we can combine the requests, we can execute both
> +		 * by updating the RING_TAIL to point to the end of the
> +		 * second request, and so we never need to tell the
> +		 * hardware about the first.
> +		 */
> +		if (last && !can_merge_rq(last, rq)) {
> +			/*
> +			 * If we are on the second port and cannot
> +			 * combine this request with the last, then we
> +			 * are done.
> +			 */
> +			if (port == last_port)
> +				goto done;
>   
>   			/*
> -			 * Can we combine this request with the current port?
> -			 * It has to be the same context/ringbuffer and not
> -			 * have any exceptions (e.g. GVT saying never to
> -			 * combine contexts).
> -			 *
> -			 * If we can combine the requests, we can execute both
> -			 * by updating the RING_TAIL to point to the end of the
> -			 * second request, and so we never need to tell the
> -			 * hardware about the first.
> +			 * We must not populate both ELSP[] with the
> +			 * same LRCA, i.e. we must submit 2 different
> +			 * contexts if we submit 2 ELSP.
>   			 */
> -			if (last && !can_merge_rq(last, rq)) {
> -				/*
> -				 * If we are on the second port and cannot
> -				 * combine this request with the last, then we
> -				 * are done.
> -				 */
> -				if (port == last_port)
> -					goto done;
> +			if (last->context == rq->context)
> +				goto done;
>   
> -				/*
> -				 * We must not populate both ELSP[] with the
> -				 * same LRCA, i.e. we must submit 2 different
> -				 * contexts if we submit 2 ELSP.
> -				 */
> -				if (last->context == rq->context)
> -					goto done;
> +			if (i915_request_has_sentinel(last))
> +				goto done;
>   
> -				if (i915_request_has_sentinel(last))
> -					goto done;
> +			/*
> +			 * We avoid submitting virtual requests into
> +			 * the secondary ports so that we can migrate
> +			 * the request immediately to another engine
> +			 * rather than wait for the primary request.
> +			 */
> +			if (rq->execution_mask != engine->mask)
> +				goto done;
>   
> -				/*
> -				 * We avoid submitting virtual requests into
> -				 * the secondary ports so that we can migrate
> -				 * the request immediately to another engine
> -				 * rather than wait for the primary request.
> -				 */
> -				if (rq->execution_mask != engine->mask)
> -					goto done;
> +			/*
> +			 * If GVT overrides us we only ever submit
> +			 * port[0], leaving port[1] empty. Note that we
> +			 * also have to be careful that we don't queue
> +			 * the same context (even though a different
> +			 * request) to the second port.
> +			 */
> +			if (ctx_single_port_submission(last->context) ||
> +			    ctx_single_port_submission(rq->context))
> +				goto done;
>   
> -				/*
> -				 * If GVT overrides us we only ever submit
> -				 * port[0], leaving port[1] empty. Note that we
> -				 * also have to be careful that we don't queue
> -				 * the same context (even though a different
> -				 * request) to the second port.
> -				 */
> -				if (ctx_single_port_submission(last->context) ||
> -				    ctx_single_port_submission(rq->context))
> -					goto done;
> -
> -				merge = false;
> -			}
> -
> -			if (__i915_request_submit(rq)) {
> -				if (!merge) {
> -					*port++ = i915_request_get(last);
> -					last = NULL;
> -				}
> -
> -				GEM_BUG_ON(last &&
> -					   !can_merge_ctx(last->context,
> -							  rq->context));
> -				GEM_BUG_ON(last &&
> -					   i915_seqno_passed(last->fence.seqno,
> -							     rq->fence.seqno));
> -
> -				submit = true;
> -				last = rq;
> -			}
> +			merge = false;
>   		}
>   
> -		rb_erase_cached(&p->node, &se->queue);
> -		i915_priolist_free(p);
> +		if (__i915_request_submit(rq)) {
> +			if (!merge) {
> +				*port++ = i915_request_get(last);
> +				last = NULL;
> +			}
> +
> +			GEM_BUG_ON(last &&
> +				   !can_merge_ctx(last->context,
> +						  rq->context));
> +			GEM_BUG_ON(last &&
> +				   i915_seqno_passed(last->fence.seqno,
> +						     rq->fence.seqno));
> +
> +			submit = true;
> +			last = rq;
> +		}
>   	}
>   done:
>   	*port++ = i915_request_get(last);
> @@ -1456,7 +1457,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
>   	 * request triggering preemption on the next dequeue (or subsequent
>   	 * interrupt for secondary ports).
>   	 */
> -	execlists->queue_priority_hint = queue_prio(se);
> +	execlists->queue_priority_hint = pl->priority;
>   	spin_unlock(&se->lock);
>   
>   	/*
> @@ -2716,7 +2717,6 @@ static void execlists_reset_cancel(struct intel_engine_cs *engine)
>   	}
>   
>   	execlists->queue_priority_hint = INT_MIN;
> -	se->queue = RB_ROOT_CACHED;
>   
>   	GEM_BUG_ON(__tasklet_is_enabled(&se->tasklet));
>   	se->tasklet.callback = nop_submission_tasklet;
> @@ -3173,6 +3173,8 @@ static void virtual_context_exit(struct intel_context *ce)
>   
>   	for (n = 0; n < ve->num_siblings; n++)
>   		intel_engine_pm_put(ve->siblings[n]);
> +
> +	i915_sched_park(intel_engine_get_scheduler(&ve->base));
>   }
>   
>   static const struct intel_context_ops virtual_context_ops = {
> diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
> index d14b9db77df8..c16393df42a0 100644
> --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
> +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
> @@ -60,11 +60,6 @@
>   
>   #define GUC_REQUEST_SIZE 64 /* bytes */
>   
> -static inline struct i915_priolist *to_priolist(struct rb_node *rb)
> -{
> -	return rb_entry(rb, struct i915_priolist, node);
> -}
> -
>   static struct guc_stage_desc *__get_stage_desc(struct intel_guc *guc, u32 id)
>   {
>   	struct guc_stage_desc *base = guc->stage_desc_pool_vaddr;
> @@ -186,9 +181,10 @@ static void __guc_dequeue(struct intel_engine_cs *engine)
>   	struct i915_request **first = execlists->inflight;
>   	struct i915_request ** const last_port = first + execlists->port_mask;
>   	struct i915_request *last = first[0];
> +	struct i915_request *rq, *rn;
>   	struct i915_request **port;
> +	struct i915_priolist *pl;
>   	bool submit = false;
> -	struct rb_node *rb;
>   
>   	lockdep_assert_held(&se->lock);
>   
> @@ -205,32 +201,22 @@ static void __guc_dequeue(struct intel_engine_cs *engine)
>   	 * event.
>   	 */
>   	port = first;
> -	while ((rb = rb_first_cached(&se->queue))) {
> -		struct i915_priolist *p = to_priolist(rb);
> -		struct i915_request *rq, *rn;
> +	i915_sched_dequeue(se, pl, rq, rn) {
> +		if (last && rq->context != last->context) {
> +			if (port == last_port)
> +				goto done;
>   
> -		priolist_for_each_request_consume(rq, rn, p) {
> -			if (last && rq->context != last->context) {
> -				if (port == last_port)
> -					goto done;
> -
> -				*port = schedule_in(last,
> -						    port - execlists->inflight);
> -				port++;
> -			}
> -
> -			list_del_init(&rq->sched.link);
> -			__i915_request_submit(rq);
> -			submit = true;
> -			last = rq;
> +			*port = schedule_in(last, port - execlists->inflight);
> +			port++;
>   		}
>   
> -		rb_erase_cached(&p->node, &se->queue);
> -		i915_priolist_free(p);
> +		list_del_init(&rq->sched.link);
> +		__i915_request_submit(rq);
> +		submit = true;
> +		last = rq;
>   	}
>   done:
> -	execlists->queue_priority_hint =
> -		rb ? to_priolist(rb)->priority : INT_MIN;
> +	execlists->queue_priority_hint = pl->priority;
>   	if (submit) {
>   		*port = schedule_in(last, port - execlists->inflight);
>   		*++port = NULL;
> @@ -361,7 +347,6 @@ static void guc_reset_cancel(struct intel_engine_cs *engine)
>   	__i915_sched_cancel_queue(se);
>   
>   	execlists->queue_priority_hint = INT_MIN;
> -	se->queue = RB_ROOT_CACHED;
>   
>   	spin_unlock_irqrestore(&se->lock, flags);
>   	intel_engine_signal_breadcrumbs(engine);
> diff --git a/drivers/gpu/drm/i915/i915_priolist_types.h b/drivers/gpu/drm/i915/i915_priolist_types.h
> index bc2fa84f98a8..ee7482b9c813 100644
> --- a/drivers/gpu/drm/i915/i915_priolist_types.h
> +++ b/drivers/gpu/drm/i915/i915_priolist_types.h
> @@ -38,10 +38,72 @@ enum {
>   #define I915_PRIORITY_UNPREEMPTABLE INT_MAX
>   #define I915_PRIORITY_BARRIER (I915_PRIORITY_UNPREEMPTABLE - 1)
>   
> +/*
> + * The slab returns power-of-two chunks of memory, so fill out the
> + * node to the next cacheline.
> + *
> + * We can estimate how many requests the skiplist will scale to based
> + * on its height:
> + *   11 =>  4 million requests
> + *   12 => 16 million requests
> + */
> +#ifdef CONFIG_64BIT
> +#define I915_PRIOLIST_HEIGHT 12
> +#else
> +#define I915_PRIOLIST_HEIGHT 11
> +#endif
> +
> +/*
> + * i915_priolist forms a skiplist. The skiplist is built in layers,
> + * starting at the base [0] is a singly linked list of all i915_priolist.
> + * Each higher layer contains a fraction of the i915_priolist from the
> + * previous layer:
> + *
> + * S[0] 0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF S
> + * E[1] >1>3>5>7>9>B>D>F>1>3>5>7>9>B>D>F>1>3>5>7>9>B>D>F>1>3>5>7>9>B>D>F E
> + * N[2] -->3-->7-->B-->F-->3-->7-->B-->F-->3-->7-->B-->F-->3-->7-->B-->F N
> + * T[3] ------>7------>F-------7------>F------>7------>F------>7------>F T
> + * I[4] -------------->F-------------->F-------------->F-------------->F I
> + * N[5] ------------------------------>F------------------------------>F N
> + * E[6] ------------------------------>F-------------------------------> E
> + * L[7] ---------------------------------------------------------------> L
> + *
> + * To iterate through all active i915_priolist, we only need to follow
> + * the chain in i915_priolist.next[0] (see for_each_priolist()).
> + *
> + * To quickly find a specific key (or insert point), we can perform a binary
> + * search by starting at the highest level and following the linked list
> + * at that level until we either find the node, or have gone passed the key.
> + * Then we descend a level, and start walking the list again starting from
> + * the current position, until eventually we find our key, or we run out of
> + * levels.
> + *
> + * https://en.wikipedia.org/wiki/Skip_list
> + */
>   struct i915_priolist {
>   	struct list_head requests;
> -	struct rb_node node;
>   	int priority;
> +
> +	int level;
> +	struct i915_priolist *next[I915_PRIOLIST_HEIGHT];
>   };
>   
> +struct i915_priolist_root {
> +	struct i915_priolist sentinel;
> +	u32 prng;
> +};
> +
> +#define i915_priolist_is_empty(root) ((root)->sentinel.level < 0)
> +
> +#define for_each_priolist(p, root) \
> +	for ((p) = (root)->sentinel.next[0]; \
> +	     (p) != &(root)->sentinel; \
> +	     (p) = (p)->next[0])
> +
> +#define priolist_for_each_request(it, plist) \
> +	list_for_each_entry(it, &(plist)->requests, sched.link)
> +
> +#define priolist_for_each_request_safe(it, n, plist) \
> +	list_for_each_entry_safe(it, n, &(plist)->requests, sched.link)
> +
>   #endif /* _I915_PRIOLIST_TYPES_H_ */
> diff --git a/drivers/gpu/drm/i915/i915_scheduler.c b/drivers/gpu/drm/i915/i915_scheduler.c
> index 312e1538d001..518eac67959e 100644
> --- a/drivers/gpu/drm/i915/i915_scheduler.c
> +++ b/drivers/gpu/drm/i915/i915_scheduler.c
> @@ -4,7 +4,9 @@
>    * Copyright © 2018 Intel Corporation
>    */
>   
> +#include <linux/bitops.h>
>   #include <linux/mutex.h>
> +#include <linux/prandom.h>
>   
>   #include "gt/intel_ring.h"
>   #include "gt/intel_lrc_reg.h"
> @@ -168,6 +170,16 @@ void i915_sched_select_mode(struct i915_sched *se, enum i915_sched_mode mode)
>   	}
>   }
>   
> +static void init_priolist(struct i915_priolist_root *const root)
> +{
> +	struct i915_priolist *pl = &root->sentinel;
> +
> +	memset_p((void **)pl->next, pl, ARRAY_SIZE(pl->next));
> +	pl->requests.prev = NULL;
> +	pl->priority = INT_MIN;
> +	pl->level = -1;
> +}
> +
>   void i915_sched_init(struct i915_sched *se,
>   		     struct device *dev,
>   		     const char *name,
> @@ -183,9 +195,9 @@ void i915_sched_init(struct i915_sched *se,
>   
>   	se->mask = mask;
>   
> +	init_priolist(&se->queue);
>   	INIT_LIST_HEAD(&se->requests);
>   	INIT_LIST_HEAD(&se->hold);
> -	se->queue = RB_ROOT_CACHED;
>   
>   	init_ipi(&se->ipi);
>   
> @@ -194,8 +206,60 @@ void i915_sched_init(struct i915_sched *se,
>   	se->revoke_context = i915_sched_default_revoke_context;
>   }
>   
> +__maybe_unused static bool priolist_idle(struct i915_priolist_root *root)
> +{
> +	struct i915_priolist *pl = &root->sentinel;
> +	int lvl;
> +
> +	for (lvl = 0; lvl < ARRAY_SIZE(pl->next); lvl++) {
> +		if (pl->next[lvl] != pl) {
> +			GEM_TRACE_ERR("root[%d] is not empty\n", lvl);
> +			return false;
> +		}
> +	}
> +
> +	if (pl->level != -1) {
> +		GEM_TRACE_ERR("root is not clear: %d\n", pl->level);
> +		return false;
> +	}
> +
> +	return true;
> +}
> +
> +static bool pl_empty(struct list_head *st)
> +{
> +	return !st->prev;
> +}
> +
> +static void pl_push(struct i915_priolist *pl, struct list_head *st)
> +{
> +	/* Keep list_empty(&pl->requests) valid for concurrent readers */
> +	pl->requests.prev = st->prev;
> +	st->prev = &pl->requests;
> +	GEM_BUG_ON(pl_empty(st));
> +}
> +
> +static struct i915_priolist *pl_pop(struct list_head *st)
> +{
> +	struct i915_priolist *pl;
> +
> +	GEM_BUG_ON(pl_empty(st));
> +	pl = container_of(st->prev, typeof(*pl), requests);
> +	st->prev = pl->requests.prev;
> +
> +	return pl;
> +}
> +
>   void i915_sched_park(struct i915_sched *se)
>   {
> +	struct i915_priolist_root *root = &se->queue;
> +	struct list_head *list = &root->sentinel.requests;
> +
> +	GEM_BUG_ON(!priolist_idle(root));
> +
> +	while (!pl_empty(list))
> +		kmem_cache_free(global.slab_priorities, pl_pop(list));
> +
>   	GEM_BUG_ON(!i915_sched_is_idle(se));
>   	se->no_priolist = false;
>   }
> @@ -251,70 +315,71 @@ static inline bool node_signaled(const struct i915_sched_node *node)
>   	return i915_request_completed(node_to_request(node));
>   }
>   
> -static inline struct i915_priolist *to_priolist(struct rb_node *rb)
> +static inline unsigned int random_level(struct i915_priolist_root *root)
>   {
> -	return rb_entry(rb, struct i915_priolist, node);
> -}
> -
> -static void assert_priolists(struct i915_sched * const se)
> -{
> -	struct rb_node *rb;
> -	long last_prio;
> -
> -	if (!IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
> -		return;
> -
> -	GEM_BUG_ON(rb_first_cached(&se->queue) !=
> -		   rb_first(&se->queue.rb_root));
> -
> -	last_prio = INT_MAX;
> -	for (rb = rb_first_cached(&se->queue); rb; rb = rb_next(rb)) {
> -		const struct i915_priolist *p = to_priolist(rb);
> -
> -		GEM_BUG_ON(p->priority > last_prio);
> -		last_prio = p->priority;
> -	}
> +	/*
> +	 * Given a uniform distribution of random numbers over the u32, then
> +	 * the probability each bit being unset is P=0.5. The probability of a
> +	 * successive sequence of bits being unset is P(n) = 0.5^n [n > 0].
> +	 *   P(level:1) = 0.5
> +	 *   P(level:2) = 0.25
> +	 *   P(level:3) = 0.125
> +	 *   P(level:4) = 0.0625
> +	 *   ...
> +	 * So we can use ffs() on a good random number generator to pick our
> +	 * level. We divide by two to reduce the probability of choosing a
> +	 * level to .25, as the cost of descending a level is the same as
> +	 * following an extra link in the chain at that level (so we can
> +	 * pack more nodes into fewer levels without incurring extra cost,
> +	 * and allow scaling to higher volumes of requests without expanding
> +	 * the height of the skiplist).
> +	 */
> +	root->prng = next_pseudo_random32(root->prng);
> +	return  __ffs(root->prng) / 2;
>   }
>   
>   static struct list_head *
>   lookup_priolist(struct i915_sched *se, int prio)
>   {
> -	struct i915_priolist *p;
> -	struct rb_node **parent, *rb;
> -	bool first = true;
> +	struct i915_priolist *update[I915_PRIOLIST_HEIGHT];
> +	struct i915_priolist_root *const root = &se->queue;
> +	struct i915_priolist *pl, *tmp;
> +	int lvl;
>   
>   	lockdep_assert_held(&se->lock);
> -	assert_priolists(se);
> -
>   	if (unlikely(se->no_priolist))
>   		prio = I915_PRIORITY_NORMAL;
>   
> +	for_each_priolist(pl, root) { /* recycle any empty elements before us */
> +		if (pl->priority <= prio || !list_empty(&pl->requests))
> +			break;
> +
> +		__i915_sched_dequeue_next(se);
> +	}
> +
>   find_priolist:
> -	/* most positive priority is scheduled first, equal priorities fifo */
> -	rb = NULL;
> -	parent = &se->queue.rb_root.rb_node;
> -	while (*parent) {
> -		rb = *parent;
> -		p = to_priolist(rb);
> -		if (prio > p->priority) {
> -			parent = &rb->rb_left;
> -		} else if (prio < p->priority) {
> -			parent = &rb->rb_right;
> -			first = false;
> -		} else {
> -			return &p->requests;
> -		}
> +	pl = &root->sentinel;
> +	lvl = pl->level;
> +	while (lvl >= 0) {
> +		while (tmp = pl->next[lvl], tmp->priority >= prio)
> +			pl = tmp;
> +		if (pl->priority == prio)
> +			goto out;
> +		update[lvl--] = pl;
>   	}
>   
>   	if (prio == I915_PRIORITY_NORMAL) {
> -		p = &se->default_priolist;
> +		pl = &se->default_priolist;
> +	} else if (!pl_empty(&root->sentinel.requests)) {
> +		pl = pl_pop(&root->sentinel.requests);
>   	} else {
> -		p = kmem_cache_alloc(global.slab_priorities, GFP_ATOMIC);
> +		pl = kmem_cache_alloc(global.slab_priorities, GFP_ATOMIC);
>   		/* Convert an allocation failure to a priority bump */
> -		if (unlikely(!p)) {
> +		if (unlikely(!pl)) {
>   			prio = I915_PRIORITY_NORMAL; /* recurses just once */
>   
> -			/* To maintain ordering with all rendering, after an
> +			/*
> +			 * To maintain ordering with all rendering, after an
>   			 * allocation failure we have to disable all scheduling.
>   			 * Requests will then be executed in fifo, and schedule
>   			 * will ensure that dependencies are emitted in fifo.
> @@ -327,18 +392,123 @@ lookup_priolist(struct i915_sched *se, int prio)
>   		}
>   	}
>   
> -	p->priority = prio;
> -	INIT_LIST_HEAD(&p->requests);
> +	pl->priority = prio;
> +	INIT_LIST_HEAD(&pl->requests);
>   
> -	rb_link_node(&p->node, rb, parent);
> -	rb_insert_color_cached(&p->node, &se->queue, first);
> +	lvl = random_level(root);
> +	if (lvl > root->sentinel.level) {
> +		if (root->sentinel.level < I915_PRIOLIST_HEIGHT - 1) {
> +			lvl = ++root->sentinel.level;
> +			update[lvl] = &root->sentinel;
> +		} else {
> +			lvl = I915_PRIOLIST_HEIGHT - 1;
> +		}
> +	}
> +	GEM_BUG_ON(lvl < 0);
> +	GEM_BUG_ON(lvl >= ARRAY_SIZE(pl->next));
>   
> -	return &p->requests;
> +	pl->level = lvl;
> +	do {
> +		tmp = update[lvl];
> +		pl->next[lvl] = tmp->next[lvl];
> +		tmp->next[lvl] = pl;
> +	} while (--lvl >= 0);
> +
> +	if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) {
> +		struct i915_priolist *chk;
> +
> +		chk = &root->sentinel;
> +		lvl = chk->level;
> +		do {
> +			while (tmp = chk->next[lvl], tmp->priority >= prio)
> +				chk = tmp;
> +		} while (--lvl >= 0);
> +
> +		GEM_BUG_ON(chk != pl);
> +	}
> +
> +out:
> +	GEM_BUG_ON(pl == &root->sentinel);
> +	return &pl->requests;
>   }
>   
> -void __i915_priolist_free(struct i915_priolist *p)
> +static void __remove_priolist(struct i915_sched *se, struct list_head *plist)
>   {
> -	kmem_cache_free(global.slab_priorities, p);
> +	struct i915_priolist_root *root = &se->queue;
> +	struct i915_priolist *pl, *tmp;
> +	struct i915_priolist *old =
> +		container_of(plist, struct i915_priolist, requests);
> +	int prio = old->priority;
> +	int lvl;
> +
> +	lockdep_assert_held(&se->lock);
> +	GEM_BUG_ON(!list_empty(plist));
> +
> +	pl = &root->sentinel;
> +	lvl = pl->level;
> +	GEM_BUG_ON(lvl < 0);
> +
> +	if (prio != I915_PRIORITY_NORMAL)
> +		pl_push(old, &pl->requests);
> +
> +	do {
> +		while (tmp = pl->next[lvl], tmp->priority > prio)
> +			pl = tmp;
> +		if (lvl <= old->level) {
> +			pl->next[lvl] = old->next[lvl];
> +			if (pl == &root->sentinel && old->next[lvl] == pl) {
> +				GEM_BUG_ON(pl->level != lvl);
> +				pl->level--;
> +			}
> +		}
> +	} while (--lvl >= 0);
> +	GEM_BUG_ON(tmp != old);
> +}
> +
> +static void remove_from_priolist(struct i915_sched *se,
> +				 struct i915_request *rq,
> +				 struct list_head *list,
> +				 bool tail)
> +{
> +	struct list_head *prev = rq->sched.link.prev;

This depends on rq being at the head of it's list?

> +
> +	GEM_BUG_ON(!i915_request_in_priority_queue(rq));
> +
> +	__list_del_entry(&rq->sched.link);
> +	if (tail)
> +		list_add_tail(&rq->sched.link, list);
> +	else
> +		list_add(&rq->sched.link, list);

So it is more move than remove(_from_priolist) ?

> +
> +	/* If we just removed the last element in the old plist, delete it */
> +	if (list_empty(prev))
> +		__remove_priolist(se, prev);
> +}
> +
> +struct i915_priolist *__i915_sched_dequeue_next(struct i915_sched *se)
> +{
> +	struct i915_priolist * const s = &se->queue.sentinel;
> +	struct i915_priolist *pl = s->next[0];
> +	int lvl;
> +
> +	GEM_BUG_ON(!list_empty(&pl->requests));

Lost as to why pl->requests has to be empty at this point. Considering:

+#define i915_sched_dequeue(se, pl, rq, rn) \
+	for ((pl) = (se)->queue.sentinel.next[0]; \
+	     (pl) != &(se)->queue.sentinel; \
+	     (pl) = __i915_sched_dequeue_next(se)) \
+		priolist_for_each_request_safe(rq, rn, pl)
+

I also don't understand what it would de-queue. Whole priolist woth of 
requests at a time? But it can't be empty to dequeue something. And who 
puts any unconsumed requests back on somewhere in this case.

Regards,

Tvrtko

> +	GEM_BUG_ON(pl == s);
> +
> +	/* Keep pl->next[0] valid for for_each_priolist iteration */
> +	if (pl->priority != I915_PRIORITY_NORMAL)
> +		pl_push(pl, &s->requests);
> +
> +	lvl = pl->level;
> +	GEM_BUG_ON(lvl < 0);
> +	do {
> +		s->next[lvl] = pl->next[lvl];
> +		if (pl->next[lvl] == s) {
> +			GEM_BUG_ON(s->level != lvl);
> +			s->level--;
> +		}
> +	} while (--lvl >= 0);
> +
> +	return pl->next[0];
>   }
>   
>   static struct i915_request *
> @@ -491,7 +661,7 @@ static void __i915_request_set_priority(struct i915_request *rq, int prio)
>   
>   		GEM_BUG_ON(rq->engine != engine);
>   		if (i915_request_in_priority_queue(rq))
> -			list_move_tail(&rq->sched.link, plist);
> +			remove_from_priolist(se, rq, plist, true);
>   
>   		/* Defer (tasklet) submission until after all updates. */
>   		kick_submission(engine, rq, prio);
> @@ -627,8 +797,7 @@ void __i915_sched_defer_request(struct intel_engine_cs *engine,
>   
>   		/* Note list is reversed for waiters wrt signal hierarchy */
>   		GEM_BUG_ON(rq->engine != engine);
> -		GEM_BUG_ON(!i915_request_in_priority_queue(rq));
> -		list_move(&rq->sched.link, &dfs);
> +		remove_from_priolist(se, rq, &dfs, false);
>   
>   		/* Track our visit, and prevent duplicate processing */
>   		clear_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags);
> @@ -927,7 +1096,7 @@ void i915_sched_resume_request(struct intel_engine_cs *engine,
>   void __i915_sched_cancel_queue(struct i915_sched *se)
>   {
>   	struct i915_request *rq, *rn;
> -	struct rb_node *rb;
> +	struct i915_priolist *pl;
>   
>   	lockdep_assert_held(&se->lock);
>   
> @@ -936,16 +1105,9 @@ void __i915_sched_cancel_queue(struct i915_sched *se)
>   		i915_request_put(i915_request_mark_eio(rq));
>   
>   	/* Flush the queued requests to the timeline list (for retiring). */
> -	while ((rb = rb_first_cached(&se->queue))) {
> -		struct i915_priolist *p = to_priolist(rb);
> -
> -		priolist_for_each_request_consume(rq, rn, p) {
> -			i915_request_put(i915_request_mark_eio(rq));
> -			__i915_request_submit(rq);
> -		}
> -
> -		rb_erase_cached(&p->node, &se->queue);
> -		i915_priolist_free(p);
> +	i915_sched_dequeue(se, pl, rq, rn) {
> +		i915_request_put(i915_request_mark_eio(rq));
> +		__i915_request_submit(rq);
>   	}
>   	GEM_BUG_ON(!i915_sched_is_idle(se));
>   
> @@ -1225,9 +1387,9 @@ void i915_sched_show(struct drm_printer *m,
>   		     unsigned int max)
>   {
>   	const struct i915_request *rq, *last;
> +	struct i915_priolist *pl;
>   	unsigned long flags;
>   	unsigned int count;
> -	struct rb_node *rb;
>   
>   	rcu_read_lock();
>   	spin_lock_irqsave(&se->lock, flags);
> @@ -1282,10 +1444,8 @@ void i915_sched_show(struct drm_printer *m,
>   
>   	last = NULL;
>   	count = 0;
> -	for (rb = rb_first_cached(&se->queue); rb; rb = rb_next(rb)) {
> -		struct i915_priolist *p = rb_entry(rb, typeof(*p), node);
> -
> -		priolist_for_each_request(rq, p) {
> +	for_each_priolist(pl, &se->queue) {
> +		priolist_for_each_request(rq, pl) {
>   			if (count++ < max - 1)
>   				show_request(m, rq, "\t", 0);
>   			else
> diff --git a/drivers/gpu/drm/i915/i915_scheduler.h b/drivers/gpu/drm/i915/i915_scheduler.h
> index fe392109b112..872d221f6ba7 100644
> --- a/drivers/gpu/drm/i915/i915_scheduler.h
> +++ b/drivers/gpu/drm/i915/i915_scheduler.h
> @@ -24,12 +24,6 @@ struct intel_engine_cs;
>   		  ##__VA_ARGS__);					\
>   } while (0)
>   
> -#define priolist_for_each_request(it, plist) \
> -	list_for_each_entry(it, &(plist)->requests, sched.link)
> -
> -#define priolist_for_each_request_consume(it, n, plist) \
> -	list_for_each_entry_safe(it, n, &(plist)->requests, sched.link)
> -
>   void i915_sched_node_init(struct i915_sched_node *node);
>   void i915_sched_node_reinit(struct i915_sched_node *node);
>   
> @@ -100,7 +94,7 @@ static inline void i915_priolist_free(struct i915_priolist *p)
>   
>   static inline bool i915_sched_is_idle(const struct i915_sched *se)
>   {
> -	return RB_EMPTY_ROOT(&se->queue.rb_root);
> +	return i915_priolist_is_empty(&se->queue);
>   }
>   
>   static inline bool
> @@ -168,6 +162,14 @@ i915_sched_get_active_request(const struct i915_sched *se)
>   	return NULL;
>   }
>   
> +/* Walk the scheduler queue of requests (in submission order) and remove them */
> +struct i915_priolist *__i915_sched_dequeue_next(struct i915_sched *se);
> +#define i915_sched_dequeue(se, pl, rq, rn) \
> +	for ((pl) = (se)->queue.sentinel.next[0]; \
> +	     (pl) != &(se)->queue.sentinel; \
> +	     (pl) = __i915_sched_dequeue_next(se)) \
> +		priolist_for_each_request_safe(rq, rn, pl)
> +
>   void i915_request_show_with_schedule(struct drm_printer *m,
>   				     const struct i915_request *rq,
>   				     const char *prefix,
> diff --git a/drivers/gpu/drm/i915/i915_scheduler_types.h b/drivers/gpu/drm/i915/i915_scheduler_types.h
> index 5ca2dc1b4fb5..bc668f375097 100644
> --- a/drivers/gpu/drm/i915/i915_scheduler_types.h
> +++ b/drivers/gpu/drm/i915/i915_scheduler_types.h
> @@ -115,7 +115,7 @@ struct i915_sched {
>   	 * @queue is only used to transfer requests from the scheduler
>   	 * frontend to the back.
>   	 */
> -	struct rb_root_cached queue;
> +	struct i915_priolist_root queue;
>   
>   	/**
>   	 * @tasklet: softirq tasklet for bottom half
> diff --git a/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h b/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h
> index 3db34d3eea58..946c93441c1f 100644
> --- a/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h
> +++ b/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h
> @@ -25,6 +25,7 @@ selftest(ring, intel_ring_mock_selftests)
>   selftest(engine, intel_engine_cs_mock_selftests)
>   selftest(timelines, intel_timeline_mock_selftests)
>   selftest(requests, i915_request_mock_selftests)
> +selftest(scheduler, i915_scheduler_mock_selftests)
>   selftest(objects, i915_gem_object_mock_selftests)
>   selftest(phys, i915_gem_phys_mock_selftests)
>   selftest(dmabuf, i915_gem_dmabuf_mock_selftests)
> diff --git a/drivers/gpu/drm/i915/selftests/i915_scheduler.c b/drivers/gpu/drm/i915/selftests/i915_scheduler.c
> index f54bdbeaa48b..2bb2d3d07d06 100644
> --- a/drivers/gpu/drm/i915/selftests/i915_scheduler.c
> +++ b/drivers/gpu/drm/i915/selftests/i915_scheduler.c
> @@ -12,6 +12,54 @@
>   #include "selftests/igt_spinner.h"
>   #include "selftests/i915_random.h"
>   
> +static int mock_skiplist_levels(void *dummy)
> +{
> +	struct i915_priolist_root root = {};
> +	struct i915_priolist *pl = &root.sentinel;
> +	IGT_TIMEOUT(end_time);
> +	unsigned long total;
> +	int count, lvl;
> +
> +	total = 0;
> +	do {
> +		for (count = 0; count < 16384; count++) {
> +			lvl = random_level(&root);
> +			if (lvl > pl->level) {
> +				if (lvl < I915_PRIOLIST_HEIGHT - 1)
> +					lvl = ++pl->level;
> +				else
> +					lvl = I915_PRIOLIST_HEIGHT - 1;
> +			}
> +
> +			pl->next[lvl] = ptr_inc(pl->next[lvl]);
> +		}
> +		total += count;
> +	} while (!__igt_timeout(end_time, NULL));
> +
> +	pr_info("Total %9lu\n", total);
> +	for (lvl = 0; lvl <= pl->level; lvl++) {
> +		int x = ilog2((unsigned long)pl->next[lvl]);
> +		char row[80];
> +
> +		memset(row, '*', x);
> +		row[x] = '\0';
> +
> +		pr_info(" [%2d] %9lu %s\n",
> +			lvl, (unsigned long)pl->next[lvl], row);
> +	}
> +
> +	return 0;
> +}
> +
> +int i915_scheduler_mock_selftests(void)
> +{
> +	static const struct i915_subtest tests[] = {
> +		SUBTEST(mock_skiplist_levels),
> +	};
> +
> +	return i915_subtests(tests, NULL);
> +}
> +
>   static void scheduling_disable(struct intel_engine_cs *engine)
>   {
>   	engine->props.preempt_timeout_ms = 0;
> @@ -80,9 +128,9 @@ static int all_engines(struct drm_i915_private *i915,
>   static bool check_context_order(struct i915_sched *se)
>   {
>   	u64 last_seqno, last_context;
> +	struct i915_priolist *p;
>   	unsigned long count;
>   	bool result = false;
> -	struct rb_node *rb;
>   	int last_prio;
>   
>   	/* We expect the execution order to follow ascending fence-context */
> @@ -92,8 +140,7 @@ static bool check_context_order(struct i915_sched *se)
>   	last_context = 0;
>   	last_seqno = 0;
>   	last_prio = 0;
> -	for (rb = rb_first_cached(&se->queue); rb; rb = rb_next(rb)) {
> -		struct i915_priolist *p = rb_entry(rb, typeof(*p), node);
> +	for_each_priolist(p, &se->queue) {
>   		struct i915_request *rq;
>   
>   		priolist_for_each_request(rq, p) {
> 
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 54+ messages in thread

* Re: [Intel-gfx] [PATCH 09/31] drm/i915: Replace priolist rbtree with a skiplist
  2021-02-08 12:29   ` Tvrtko Ursulin
@ 2021-02-08 12:46     ` Chris Wilson
  2021-02-08 15:10       ` Tvrtko Ursulin
  0 siblings, 1 reply; 54+ messages in thread
From: Chris Wilson @ 2021-02-08 12:46 UTC (permalink / raw)
  To: Tvrtko Ursulin, intel-gfx

Quoting Tvrtko Ursulin (2021-02-08 12:29:14)
> 
> On 08/02/2021 10:52, Chris Wilson wrote:
> > +static void remove_from_priolist(struct i915_sched *se,
> > +                              struct i915_request *rq,
> > +                              struct list_head *list,
> > +                              bool tail)
> > +{
> > +     struct list_head *prev = rq->sched.link.prev;
> 
> This depends on rq being at the head of it's list?

Not depends. We are testing if the list is singular, that is by removing
this request from the i915_priolist.requests that list becomes empty,
and so the i915_priolist can be removed from the skiplist.

> > +
> > +     GEM_BUG_ON(!i915_request_in_priority_queue(rq));
> > +
> > +     __list_del_entry(&rq->sched.link);
> > +     if (tail)
> > +             list_add_tail(&rq->sched.link, list);
> > +     else
> > +             list_add(&rq->sched.link, list);
> 
> So it is more move than remove(_from_priolist) ?

Yes, we can quite happily just keep the list_move(), except we then end
up with lots of empty levels. At first I thought the walk through those
(during dequeue) would be cheaper than removing. The max lock holdtime
strongly favours the removal as we move requests around (which will
happen in dribs-and-drabs) over doing a bulk remove at dequeue.

> > +     /* If we just removed the last element in the old plist, delete it */
> > +     if (list_empty(prev))
> > +             __remove_priolist(se, prev);
> > +}
> > +
> > +struct i915_priolist *__i915_sched_dequeue_next(struct i915_sched *se)
> > +{
> > +     struct i915_priolist * const s = &se->queue.sentinel;
> > +     struct i915_priolist *pl = s->next[0];
> > +     int lvl;
> > +
> > +     GEM_BUG_ON(!list_empty(&pl->requests));
> 
> Lost as to why pl->requests has to be empty at this point. Considering:
> 
> +#define i915_sched_dequeue(se, pl, rq, rn) \
> +       for ((pl) = (se)->queue.sentinel.next[0]; \
> +            (pl) != &(se)->queue.sentinel; \
> +            (pl) = __i915_sched_dequeue_next(se)) \
> +               priolist_for_each_request_safe(rq, rn, pl)
> +
> 
> I also don't understand what it would de-queue. Whole priolist woth of 
> requests at a time? But it can't be empty to dequeue something. And who 
> puts any unconsumed requests back on somewhere in this case.

It's a double for-loop. I think the flattening of the logic is worth it.

During dequeue, we always move the very first request onto the next list
(i.e. i915_sched.active). Then when we have finished with all the
requests in one priority level, we move onto the next i915_priolist
(calling __i915_sched_dequeue_next).

So in __i915_sched_dequeue_next, we are always dealing with an empty
i915_priolist and want to advance the start of the skiplist to the next.

I was thinking that in order to hide the double for-loop, we could
handle the non-empty i915_priolist case causing it to break out of the
outer loop. So we could get rid of the goto done.
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 54+ messages in thread

* Re: [Intel-gfx] [PATCH 10/31] drm/i915: Fair low-latency scheduling
  2021-02-08 10:52 ` [Intel-gfx] [PATCH 10/31] drm/i915: Fair low-latency scheduling Chris Wilson
@ 2021-02-08 14:56   ` Tvrtko Ursulin
  2021-02-08 15:29     ` Chris Wilson
  2021-02-09  9:37   ` Tvrtko Ursulin
  1 sibling, 1 reply; 54+ messages in thread
From: Tvrtko Ursulin @ 2021-02-08 14:56 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 08/02/2021 10:52, Chris Wilson wrote:
> The first "scheduler" was a topographical sorting of requests into
> priority order. The execution order was deterministic, the earliest
> submitted, highest priority request would be executed first. Priority
> inheritance ensured that inversions were kept at bay, and allowed us to
> dynamically boost priorities (e.g. for interactive pageflips).
> 
> The minimalistic timeslicing scheme was an attempt to introduce fairness
> between long running requests, by evicting the active request at the end
> of a timeslice and moving it to the back of its priority queue (while
> ensuring that dependencies were kept in order). For short running
> requests from many clients of equal priority, the scheme is still very
> much FIFO submission ordering, and as unfair as before.
> 
> To impose fairness, we need an external metric that ensures that clients
> are interspersed, so we don't execute one long chain from client A before
> executing any of client B. This could be imposed by the clients
> themselves by using fences based on an external clock, that is they only
> submit work for a "frame" at frame-intervals, instead of submitting as
> much work as they are able to. The standard SwapBuffers approach is akin
> to double buffering, where as one frame is being executed, the next is
> being submitted, such that there is always a maximum of two frames per
> client in the pipeline and so ideally maintains consistent input-output
> latency. Even this scheme exhibits unfairness under load as a single
> client will execute two frames back to back before the next, and with
> enough clients, deadlines will be missed.
> 
> The idea introduced by BFS/MuQSS is that fairness is introduced by
> metering with an external clock. Every request, when it becomes ready to
> execute is assigned a virtual deadline, and execution order is then
> determined by earliest deadline. Priority is used as a hint, rather than
> strict ordering, where high priority requests have earlier deadlines,
> but not necessarily earlier than outstanding work. Thus work is executed
> in order of 'readiness', with timeslicing to demote long running work.
> 
> The Achille's heel of this scheduler is its strong preference for
> low-latency and favouring of new queues. Whereas it was easy to dominate
> the old scheduler by flooding it with many requests over a short period
> of time, the new scheduler can be dominated by a 'synchronous' client
> that waits for each of its requests to complete before submitting the
> next. As such a client has no history, it is always considered
> ready-to-run and receives an earlier deadline than the long running
> requests. This is compensated for by refreshing the current execution's
> deadline and by disallowing preemption for timeslice shuffling.
> 
> In contrast, one key advantage of disconnecting the sort key from the
> priority value is that we can freely adjust the deadline to compensate
> for other factors. This is used in conjunction with submitting requests
> ahead-of-schedule that then busywait on the GPU using semaphores. Since
> we don't want to spend a timeslice busywaiting instead of doing real
> work when available, we deprioritise work by giving the semaphore waits
> a later virtual deadline. The priority deboost is applied to semaphore
> workloads after they miss a semaphore wait and a new context is pending.
> The request is then restored to its normal priority once the semaphores
> are signaled so that it not unfairly penalised under contention by
> remaining at a far future deadline. This is a much improved and cleaner
> version of commit f9e9e9de58c7 ("drm/i915: Prioritise non-busywait
> semaphore workloads").
> 
> To check the impact on throughput (often the downfall of latency
> sensitive schedulers), we used gem_wsim to simulate various transcode
> workloads with different load balancers, and varying the number of
> competing [heterogeneous] clients. On Kabylake gt3e running at fixed
> cpu/gpu clocks,
> 
> +delta%------------------------------------------------------------------+
> |       a                                                                |
> |       a                                                                |
> |       a                                                                |
> |       a                                                                |
> |       aa                                                               |
> |      aaa                                                               |
> |      aaaa                                                              |
> |     aaaaaa                                                             |
> |     aaaaaa                                                             |
> |     aaaaaa   a                a                                        |
> | aa  aaaaaa a a      a  a   aa a       a         a       a             a|
> ||______M__A__________|                                                  |
> +------------------------------------------------------------------------+
>      N           Min           Max        Median          Avg       Stddev
>    108    -4.6326643     47.797855 -0.00069639128     2.116185   7.6764049
> 
> Each point is the relative percentage change in gem_wsim's work-per-second
> score [using the median result of 120 25s runs, the relative change
> computed as (B/A - 1) * 100]; 0 being no change.
> 
> Reviewing the same workloads on Tigerlake,
> 
> +delta%------------------------------------------------------------------+
> |       a                                                                |
> |       a                                                                |
> |       a                                                                |
> |       aa a                                                             |
> |       aaaa                                                             |
> |       aaaa                                                             |
> |    aaaaaaa                                                             |
> |    aaaaaaa                                                             |
> |    aaaaaaa      a   a   aa  a         a                         a      |
> | aaaaaaaaaa a aa a a a aaaa aa   a     a        aa               a     a|
> ||_______M____A_____________|                                            |
> +------------------------------------------------------------------------+
>      N           Min           Max        Median          Avg       Stddev
>    108     -4.258712      46.83081    0.36853159    4.1415662     9.461689
> 
> The expectation is that by deliberately increasing the number of context
> switches to improve fairness between clients, throughput will be
> diminished. What we do see are small fluctuations around no change, with
> the median result being improved throughput. The dramatic improvement is
> from reintroducing the improved no-semaphore boosting, which avoids
> accidentally preventing scheduling of ready workloads due to busy
> spinners (i.e. avoids wasting cycles when there is work to be done).
> 
> We expect to see no change in single client workloads such as games,
> though running multiple applications on a desktop should have reduced
> jitter i.e. smoother input-output latency.
> 
> This scheduler is based on MuQSS by Dr Con Kolivas.
> 
> v2: More commentary, especially around where we reset the deadlines.
> 
> Testcase: igt/gem_exec_fair
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> ---
>   drivers/gpu/drm/i915/Kconfig.profile          |  62 +++
>   drivers/gpu/drm/i915/gt/intel_engine_cs.c     |   2 -
>   .../gpu/drm/i915/gt/intel_engine_heartbeat.c  |   1 +
>   drivers/gpu/drm/i915/gt/intel_engine_pm.c     |   4 +-
>   drivers/gpu/drm/i915/gt/intel_engine_types.h  |  14 -
>   drivers/gpu/drm/i915/gt/intel_engine_user.c   |   1 +
>   .../drm/i915/gt/intel_execlists_submission.c  | 233 ++++----
>   drivers/gpu/drm/i915/gt/selftest_execlists.c  |  30 +-
>   drivers/gpu/drm/i915/gt/selftest_hangcheck.c  |   5 +-
>   drivers/gpu/drm/i915/gt/selftest_lrc.c        |   1 +
>   .../gpu/drm/i915/gt/uc/intel_guc_submission.c |   4 -
>   drivers/gpu/drm/i915/i915_priolist_types.h    |   7 +-
>   drivers/gpu/drm/i915/i915_request.c           |  19 +-
>   drivers/gpu/drm/i915/i915_scheduler.c         | 518 +++++++++++++-----
>   drivers/gpu/drm/i915/i915_scheduler.h         |  18 +-
>   drivers/gpu/drm/i915/i915_scheduler_types.h   |  39 ++
>   drivers/gpu/drm/i915/selftests/i915_request.c |   1 +
>   .../gpu/drm/i915/selftests/i915_scheduler.c   | 136 +++++
>   include/uapi/drm/i915_drm.h                   |   1 +
>   19 files changed, 810 insertions(+), 286 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/Kconfig.profile b/drivers/gpu/drm/i915/Kconfig.profile
> index 35bbe2b80596..f1d009906f71 100644
> --- a/drivers/gpu/drm/i915/Kconfig.profile
> +++ b/drivers/gpu/drm/i915/Kconfig.profile
> @@ -1,3 +1,65 @@
> +choice
> +	prompt "Preferred scheduler"
> +	default DRM_I915_SCHED_VIRTUAL_DEADLINE
> +	help
> +	  Select the preferred method to decide the order of execution.
> +
> +	  The scheduler is used for two purposes. First to defer unready
> +	  jobs to not block execution of independent ready clients, so
> +	  preventing GPU stalls while work waits for other tasks. The second
> +	  purpose is to decide which task to run next, as well as decide
> +	  if that task should preempt the currently running task, or if
> +	  the current task has exceeded its allotment of GPU time and should
> +	  be replaced.
> +
> +	config DRM_I915_SCHED_FIFO
> +	bool "FIFO"
> +	help
> +	  No task reordering, tasks are executed in order of readiness.
> +	  First in, first out.
> +
> +	  Unready tasks do not block execution of other, independent clients.
> +	  A client will not be scheduled for execution until all of its
> +	  prerequisite work has completed.
> +
> +	  This disables the scheduler and puts it into a pass-through mode.
> +
> +	config DRM_I915_SCHED_PRIORITY
> +	bool "Priority"
> +	help
> +	  Strict priority ordering, equal priority tasks are executed
> +	  in order of readiness. Clients are liable to starve other clients,
> +	  causing uneven execution and excess task latency. High priority
> +	  clients will preempt lower priority clients and will run
> +	  uninterrupted.
> +
> +	  Note that interactive desktops will implicitly perform priority
> +	  boosting to minimise frame jitter.
> +
> +	config DRM_I915_SCHED_VIRTUAL_DEADLINE
> +	bool "Virtual Deadline"
> +	help
> +	  A fair scheduler based on MuQSS with priority-hinting.
> +
> +	  When a task is ready for execution, it is given a quota (from the
> +	  engine's timeslice) and a virtual deadline. The virtual deadline is
> +	  derived from the current time and the timeslice scaled by the
> +	  task's priority. Higher priority tasks are given an earlier
> +	  deadline and receive a large portion of the execution bandwidth.
> +
> +	  Requests are then executed in order of deadline completion.
> +	  Requests with earlier deadlines and higher priority than currently
> +	  executing on the engine will preempt the active task.
> +
> +endchoice
> +
> +config DRM_I915_SCHED
> +	int
> +	default 2 if DRM_I915_SCHED_VIRTUAL_DEADLINE
> +	default 1 if DRM_I915_SCHED_PRIORITY
> +	default 0 if DRM_I915_SCHED_FIFO
> +	default -1
> +
>   config DRM_I915_FENCE_TIMEOUT
>   	int "Timeout for unsignaled foreign fences (ms, jiffy granularity)"
>   	default 10000 # milliseconds
> diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
> index da2447f18daa..7d34bf03670b 100644
> --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
> +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
> @@ -579,8 +579,6 @@ void intel_engine_init_execlists(struct intel_engine_cs *engine)
>   	memset(execlists->pending, 0, sizeof(execlists->pending));
>   	execlists->active =
>   		memset(execlists->inflight, 0, sizeof(execlists->inflight));
> -
> -	execlists->queue_priority_hint = INT_MIN;
>   }
>   
>   static void cleanup_status_page(struct intel_engine_cs *engine)
> diff --git a/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c b/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c
> index 5ed263f36f93..1d0e7daa6285 100644
> --- a/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c
> +++ b/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c
> @@ -313,6 +313,7 @@ static int __intel_engine_pulse(struct intel_engine_cs *engine)
>   	if (IS_ERR(rq))
>   		return PTR_ERR(rq);
>   
> +	rq->sched.deadline = 0;
>   	__set_bit(I915_FENCE_FLAG_SENTINEL, &rq->fence.flags);
>   
>   	heartbeat_commit(rq, &attr);
> diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pm.c b/drivers/gpu/drm/i915/gt/intel_engine_pm.c
> index 27d9d17b35cb..ef5064ea54e5 100644
> --- a/drivers/gpu/drm/i915/gt/intel_engine_pm.c
> +++ b/drivers/gpu/drm/i915/gt/intel_engine_pm.c
> @@ -211,6 +211,7 @@ static bool switch_to_kernel_context(struct intel_engine_cs *engine)
>   	i915_request_add_active_barriers(rq);
>   
>   	/* Install ourselves as a preemption barrier */
> +	rq->sched.deadline = 0;
>   	rq->sched.attr.priority = I915_PRIORITY_BARRIER;
>   	if (likely(!__i915_request_commit(rq))) { /* engine should be idle! */
>   		/*
> @@ -271,9 +272,6 @@ static int __engine_park(struct intel_wakeref *wf)
>   	intel_engine_park_heartbeat(engine);
>   	intel_breadcrumbs_park(engine->breadcrumbs);
>   
> -	/* Must be reset upon idling, or we may miss the busy wakeup. */
> -	GEM_BUG_ON(engine->execlists.queue_priority_hint != INT_MIN);
> -
>   	if (engine->park)
>   		engine->park(engine);
>   
> diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h
> index 08bddc5263aa..d1024e8717e1 100644
> --- a/drivers/gpu/drm/i915/gt/intel_engine_types.h
> +++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h
> @@ -223,20 +223,6 @@ struct intel_engine_execlists {
>   	 */
>   	unsigned int port_mask;
>   
> -	/**
> -	 * @queue_priority_hint: Highest pending priority.
> -	 *
> -	 * When we add requests into the queue, or adjust the priority of
> -	 * executing requests, we compute the maximum priority of those
> -	 * pending requests. We can then use this value to determine if
> -	 * we need to preempt the executing requests to service the queue.
> -	 * However, since the we may have recorded the priority of an inflight
> -	 * request we wanted to preempt but since completed, at the time of
> -	 * dequeuing the priority hint may no longer may match the highest
> -	 * available request priority.
> -	 */
> -	int queue_priority_hint;
> -
>   	struct rb_root_cached virtual;
>   
>   	/**
> diff --git a/drivers/gpu/drm/i915/gt/intel_engine_user.c b/drivers/gpu/drm/i915/gt/intel_engine_user.c
> index 3fab439ba22b..92632afa52ae 100644
> --- a/drivers/gpu/drm/i915/gt/intel_engine_user.c
> +++ b/drivers/gpu/drm/i915/gt/intel_engine_user.c
> @@ -102,6 +102,7 @@ static void set_scheduler_caps(struct drm_i915_private *i915)
>   #define MAP(x, y) { I915_SCHED_##x, ilog2(I915_SCHEDULER_CAP_##y) }
>   		MAP(ACTIVE_BIT, ENABLED),
>   		MAP(PRIORITY_BIT, PRIORITY),
> +		MAP(DEADLINE_BIT, FAIR),
>   		MAP(TIMESLICE_BIT, TIMESLICING),
>   #undef MAP
>   	};
> diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
> index 4a0258347c10..e249b1423309 100644
> --- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
> +++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
> @@ -180,7 +180,7 @@ struct virtual_engine {
>   	 */
>   	struct ve_node {
>   		struct rb_node rb;
> -		int prio;
> +		u64 deadline;
>   	} nodes[I915_NUM_ENGINES];
>   
>   	/*
> @@ -256,25 +256,12 @@ static void ring_set_paused(const struct intel_engine_cs *engine, int state)
>   
>   static int rq_prio(const struct i915_request *rq)
>   {
> -	return READ_ONCE(rq->sched.attr.priority);
> +	return rq->sched.attr.priority;
>   }
>   
> -static int effective_prio(const struct i915_request *rq)
> +static u64 rq_deadline(const struct i915_request *rq)
>   {
> -	int prio = rq_prio(rq);
> -
> -	/*
> -	 * If this request is special and must not be interrupted at any
> -	 * cost, so be it. Note we are only checking the most recent request
> -	 * in the context and so may be masking an earlier vip request. It
> -	 * is hoped that under the conditions where nopreempt is used, this
> -	 * will not matter (i.e. all requests to that context will be
> -	 * nopreempt for as long as desired).
> -	 */
> -	if (i915_request_has_nopreempt(rq))
> -		prio = I915_PRIORITY_UNPREEMPTABLE;
> -
> -	return prio;
> +	return rq->sched.deadline;
>   }
>   
>   static struct i915_request *first_request(const struct i915_sched *se)
> @@ -289,62 +276,62 @@ static struct i915_request *first_request(const struct i915_sched *se)
>   					sched.link);
>   }
>   
> -static int queue_prio(const struct i915_sched *se)
> +static struct i915_request *first_virtual(const struct intel_engine_cs *engine)
>   {
> -	struct i915_request *rq;
> +	struct rb_node *rb;
>   
> -	rq = first_request(se);
> -	if (!rq)
> -		return INT_MIN;
> +	rb = rb_first_cached(&engine->execlists.virtual);
> +	if (!rb)
> +		return NULL;
>   
> -	return rq_prio(rq);
> +	return READ_ONCE(rb_entry(rb,
> +				  struct virtual_engine,
> +				  nodes[engine->id].rb)->request);
>   }
>   
> -static int virtual_prio(const struct intel_engine_execlists *el)
> +static const struct i915_request *
> +next_elsp_request(const struct i915_sched *se, const struct i915_request *rq)
>   {
> -	struct rb_node *rb = rb_first_cached(&el->virtual);
> +	if (i915_sched_is_last_request(se, rq))
> +		return NULL;
>   
> -	return rb ? rb_entry(rb, struct ve_node, rb)->prio : INT_MIN;
> +	return list_next_entry(rq, sched.link);
>   }
>   
> -static bool need_preempt(struct intel_engine_cs *engine,
> +static bool
> +dl_before(const struct i915_request *next, const struct i915_request *prev)
> +{
> +	return !prev || (next && rq_deadline(next) < rq_deadline(prev));
> +}
> +
> +static bool need_preempt(const struct intel_engine_cs *engine,
>   			 const struct i915_request *rq)
>   {
>   	const struct i915_sched *se = &engine->sched;
> -	int last_prio;
> +	const struct i915_request *first = NULL;
> +	const struct i915_request *next;
>   
>   	if (!i915_sched_use_busywait(se))
>   		return false;
>   
>   	/*
> -	 * Check if the current priority hint merits a preemption attempt.
> -	 *
> -	 * We record the highest value priority we saw during rescheduling
> -	 * prior to this dequeue, therefore we know that if it is strictly
> -	 * less than the current tail of ESLP[0], we do not need to force
> -	 * a preempt-to-idle cycle.
> -	 *
> -	 * However, the priority hint is a mere hint that we may need to
> -	 * preempt. If that hint is stale or we may be trying to preempt
> -	 * ourselves, ignore the request.
> -	 *
> -	 * More naturally we would write
> -	 *      prio >= max(0, last);
> -	 * except that we wish to prevent triggering preemption at the same
> -	 * priority level: the task that is running should remain running
> -	 * to preserve FIFO ordering of dependencies.
> +	 * If this request is special and must not be interrupted at any
> +	 * cost, so be it. Note we are only checking the most recent request
> +	 * in the context and so may be masking an earlier vip request. It
> +	 * is hoped that under the conditions where nopreempt is used, this
> +	 * will not matter (i.e. all requests to that context will be
> +	 * nopreempt for as long as desired).
>   	 */
> -	last_prio = max(effective_prio(rq), I915_PRIORITY_NORMAL - 1);
> -	if (engine->execlists.queue_priority_hint <= last_prio)
> +	if (i915_request_has_nopreempt(rq))
>   		return false;
>   
>   	/*
>   	 * Check against the first request in ELSP[1], it will, thanks to the
>   	 * power of PI, be the highest priority of that context.
>   	 */
> -	if (!list_is_last(&rq->sched.link, &se->requests) &&
> -	    rq_prio(list_next_entry(rq, sched.link)) > last_prio)
> -		return true;
> +	next = next_elsp_request(se, rq);
> +	if (dl_before(next, first))

Here first is always NULL so dl_before always returns true, meaning it 
appears redundant to call it.

> +		first = next;
>   
>   	/*
>   	 * If the inflight context did not trigger the preemption, then maybe
> @@ -356,8 +343,31 @@ static bool need_preempt(struct intel_engine_cs *engine,
>   	 * ELSP[0] or ELSP[1] as, thanks again to PI, if it was the same
>   	 * context, it's priority would not exceed ELSP[0] aka last_prio.
>   	 */
> -	return max(virtual_prio(&engine->execlists),
> -		   queue_prio(se)) > last_prio;
> +	next = first_request(se);
> +	if (dl_before(next, first))
> +		first = next; > +
> +	next = first_virtual(engine);
> +	if (dl_before(next, first))
> +		first = next;
> +
> +	if (!dl_before(first, rq))
> +		return false;

Ends up earliest deadline between list of picks: elsp[1] (or maybe next 
in context, depends on coalescing criteria), first in the priolist, 
first virtual.

Virtual has a separate queue so that's understandable, but can "elsp[1]" 
really have an earlier deadling than first_request() (head of thepriolist)?

> +
> +	/*
> +	 * While a request may have been queued that has an earlier deadline
> +	 * than is currently running, we only allow it to perform an urgent
> +	 * preemption if it also has higher priority. The cost of frequently
> +	 * switching between contexts is noticeable, so we try to keep
> +	 * the deadline shuffling only to timeslice boundaries.
> +	 */
> +	ENGINE_TRACE(engine,
> +		     "preempt for first=%llx:%llu, dl=%llu, prio=%d?\n",
> +		     first->fence.context,
> +		     first->fence.seqno,
> +		     rq_deadline(first),
> +		     rq_prio(first));
> +	return rq_prio(first) > max(rq_prio(rq), I915_PRIORITY_NORMAL - 1);

Okay.

>   }
>   
>   __maybe_unused static bool
> @@ -374,7 +384,15 @@ assert_priority_queue(const struct i915_request *prev,
>   	if (i915_request_is_active(prev))
>   		return true;
>   
> -	return rq_prio(prev) >= rq_prio(next);
> +	if (rq_deadline(prev) <= rq_deadline(next))
> +		return true;
> +
> +	ENGINE_TRACE(prev->engine,
> +		     "next %llx:%lld dl %lld is before prev %llx:%lld dl %lld\n",
> +		     next->fence.context, next->fence.seqno, rq_deadline(next),
> +		     prev->fence.context, prev->fence.seqno, rq_deadline(prev));
> +
> +	return false;
>   }
>   
>   static void
> @@ -555,10 +573,25 @@ static void __execlists_schedule_out(struct i915_request * const rq,
>   	/*
>   	 * If we have just completed this context, the engine may now be
>   	 * idle and we want to re-enter powersaving.
> +	 *
> +	 * If the context is still active, update the deadline on the next
> +	 * request as we submitted it much earlier with an estimation based
> +	 * on this request and all those before it consuming their whole budget.
> +	 * Since the next request is ready but may have a deadline set far in
> +	 * the future, we will prefer any new client before executing this
> +	 * context again. If the other clients are submitting synchronous
> +	 * workloads, each submission appears as a fresh piece of work and ready
> +	 * to run; each time they will receive a deadline that is likely earlier
> +	 * than the accumulated deadline of this context. So we re-evaluate this
> +	 * context's deadline and put it on an equal footing with the
> +	 * synchronous clients.
>   	 */
> -	if (intel_timeline_is_last(ce->timeline, rq) &&
> -	    __i915_request_is_complete(rq))
> -		intel_engine_add_retire(engine, ce->timeline);
> +	if (__i915_request_is_complete(rq)) {
> +		if (!intel_timeline_is_last(ce->timeline, rq))
> +			i915_request_update_deadline(list_next_entry(rq, link));
> +		else
> +			intel_engine_add_retire(engine, ce->timeline);
> +	}
>   
>   	ccid = ce->lrc.ccid;
>   	ccid >>= GEN11_SW_CTX_ID_SHIFT - 32;
> @@ -668,14 +701,14 @@ dump_port(char *buf, int buflen, const char *prefix, struct i915_request *rq)
>   	if (!rq)
>   		return "";
>   
> -	snprintf(buf, buflen, "%sccid:%x %llx:%lld%s prio %d",
> +	snprintf(buf, buflen, "%sccid:%x %llx:%lld%s dl:%llu",
>   		 prefix,
>   		 rq->context->lrc.ccid,
>   		 rq->fence.context, rq->fence.seqno,
>   		 __i915_request_is_complete(rq) ? "!" :
>   		 __i915_request_has_started(rq) ? "*" :
>   		 "",
> -		 rq_prio(rq));
> +		 rq_deadline(rq));
>   
>   	return buf;
>   }
> @@ -1197,11 +1230,11 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
>   	if (last) {
>   		if (need_preempt(engine, last)) {
>   			ENGINE_TRACE(engine,
> -				     "preempting last=%llx:%lld, prio=%d, hint=%d\n",
> +				     "preempting last=%llx:%llu, dl=%llu, prio=%d\n",
>   				     last->fence.context,
>   				     last->fence.seqno,
> -				     last->sched.attr.priority,
> -				     execlists->queue_priority_hint);
> +				     rq_deadline(last),
> +				     rq_prio(last));
>   			record_preemption(execlists);
>   
>   			/*
> @@ -1223,11 +1256,11 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
>   			last = NULL;
>   		} else if (timeslice_expired(engine, last)) {
>   			ENGINE_TRACE(engine,
> -				     "expired:%s last=%llx:%lld, prio=%d, hint=%d, yield?=%s\n",
> +				     "expired:%s last=%llx:%llu, deadline=%llu, now=%llu, yield?=%s\n",
>   				     yesno(timer_expired(&execlists->timer)),
>   				     last->fence.context, last->fence.seqno,
> -				     rq_prio(last),
> -				     execlists->queue_priority_hint,
> +				     rq_deadline(last),
> +				     i915_sched_to_ticks(ktime_get()),
>   				     yesno(timeslice_yield(execlists, last)));
>   
>   			/*
> @@ -1298,7 +1331,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
>   		GEM_BUG_ON(rq->engine != &ve->base);
>   		GEM_BUG_ON(rq->context != &ve->context);
>   
> -		if (unlikely(rq_prio(rq) < queue_prio(se))) {
> +		if (!dl_before(rq, first_request(se))) {
>   			spin_unlock(&ve->base.sched.lock);
>   			break;
>   		}
> @@ -1310,16 +1343,15 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
>   		}
>   
>   		ENGINE_TRACE(engine,
> -			     "virtual rq=%llx:%lld%s, new engine? %s\n",
> +			     "virtual rq=%llx:%lld%s, dl %llx, new engine? %s\n",
>   			     rq->fence.context,
>   			     rq->fence.seqno,
>   			     __i915_request_is_complete(rq) ? "!" :
>   			     __i915_request_has_started(rq) ? "*" :
>   			     "",
> +			     rq_deadline(rq),
>   			     yesno(engine != ve->siblings[0]));
> -
>   		WRITE_ONCE(ve->request, NULL);
> -		WRITE_ONCE(ve->base.execlists.queue_priority_hint, INT_MIN);
>   
>   		rb = &ve->nodes[engine->id].rb;
>   		rb_erase_cached(rb, &execlists->virtual);
> @@ -1407,6 +1439,9 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
>   			if (rq->execution_mask != engine->mask)
>   				goto done;
>   
> +			if (unlikely(dl_before(first_virtual(engine), rq)))
> +				goto done;
> +
>   			/*
>   			 * If GVT overrides us we only ever submit
>   			 * port[0], leaving port[1] empty. Note that we
> @@ -1440,24 +1475,6 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
>   	}
>   done:
>   	*port++ = i915_request_get(last);
> -
> -	/*
> -	 * Here be a bit of magic! Or sleight-of-hand, whichever you prefer.
> -	 *
> -	 * We choose the priority hint such that if we add a request of greater
> -	 * priority than this, we kick the submission tasklet to decide on
> -	 * the right order of submitting the requests to hardware. We must
> -	 * also be prepared to reorder requests as they are in-flight on the
> -	 * HW. We derive the priority hint then as the first "hole" in
> -	 * the HW submission ports and if there are no available slots,
> -	 * the priority of the lowest executing request, i.e. last.
> -	 *
> -	 * When we do receive a higher priority request ready to run from the
> -	 * user, see queue_request(), the priority hint is bumped to that
> -	 * request triggering preemption on the next dequeue (or subsequent
> -	 * interrupt for secondary ports).
> -	 */
> -	execlists->queue_priority_hint = pl->priority;
>   	spin_unlock(&se->lock);
>   
>   	/*
> @@ -2653,15 +2670,6 @@ static void execlists_reset_rewind(struct intel_engine_cs *engine, bool stalled)
>   	rcu_read_unlock();
>   }
>   
> -static void nop_submission_tasklet(struct tasklet_struct *t)
> -{
> -	struct intel_engine_cs * const engine =
> -		from_tasklet(engine, t, sched.tasklet);
> -
> -	/* The driver is wedged; don't process any more events. */
> -	WRITE_ONCE(engine->execlists.queue_priority_hint, INT_MIN);
> -}
> -
>   static void execlists_reset_cancel(struct intel_engine_cs *engine)
>   {
>   	struct intel_engine_execlists * const execlists = &engine->execlists;
> @@ -2710,17 +2718,10 @@ static void execlists_reset_cancel(struct intel_engine_cs *engine)
>   				i915_request_put(rq);
>   			}
>   			i915_request_put(rq);
> -
> -			ve->base.execlists.queue_priority_hint = INT_MIN;
>   		}
>   		spin_unlock(&ve->base.sched.lock);
>   	}
>   
> -	execlists->queue_priority_hint = INT_MIN;
> -
> -	GEM_BUG_ON(__tasklet_is_enabled(&se->tasklet));
> -	se->tasklet.callback = nop_submission_tasklet;
> -
>   	spin_unlock_irqrestore(&se->lock, flags);
>   	rcu_read_unlock();
>   
> @@ -2831,7 +2832,6 @@ static bool can_preempt(struct intel_engine_cs *engine)
>   static void execlists_set_default_submission(struct intel_engine_cs *engine)
>   {
>   	engine->sched.submit_request = i915_request_enqueue;
> -	engine->sched.tasklet.callback = execlists_submission_tasklet;
>   }
>   
>   static void execlists_shutdown(struct intel_engine_cs *engine)
> @@ -2957,7 +2957,7 @@ static void init_execlists(struct intel_engine_cs *engine)
>   	engine->sched.show = execlists_show;
>   	tasklet_setup(&engine->sched.tasklet, execlists_submission_tasklet);
>   
> -	i915_sched_select_mode(&engine->sched, I915_SCHED_MODE_PRIORITY);
> +	i915_sched_select_mode(&engine->sched, I915_SCHED_MODE_DEADLINE);
>   
>   	if (IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION) &&
>   	    intel_engine_has_preemption(engine))
> @@ -3193,7 +3193,8 @@ static const struct intel_context_ops virtual_context_ops = {
>   	.destroy = virtual_context_destroy,
>   };
>   
> -static intel_engine_mask_t virtual_submission_mask(struct virtual_engine *ve)
> +static intel_engine_mask_t
> +virtual_submission_mask(struct virtual_engine *ve, u64 *deadline)
>   {
>   	struct i915_request *rq;
>   	intel_engine_mask_t mask;
> @@ -3210,9 +3211,11 @@ static intel_engine_mask_t virtual_submission_mask(struct virtual_engine *ve)
>   		mask = ve->siblings[0]->mask;
>   	}
>   
> -	ENGINE_TRACE(&ve->base, "rq=%llx:%lld, mask=%x, prio=%d\n",
> +	*deadline = rq_deadline(rq);
> +
> +	ENGINE_TRACE(&ve->base, "rq=%llx:%llu, mask=%x, dl=%llu\n",
>   		     rq->fence.context, rq->fence.seqno,
> -		     mask, ve->base.execlists.queue_priority_hint);
> +		     mask, *deadline);
>   
>   	return mask;
>   }
> @@ -3221,12 +3224,12 @@ static void virtual_submission_tasklet(struct tasklet_struct *t)
>   {
>   	struct virtual_engine * const ve =
>   		from_tasklet(ve, t, base.sched.tasklet);
> -	const int prio = READ_ONCE(ve->base.execlists.queue_priority_hint);
>   	intel_engine_mask_t mask;
>   	unsigned int n;
> +	u64 deadline;
>   
>   	rcu_read_lock();
> -	mask = virtual_submission_mask(ve);
> +	mask = virtual_submission_mask(ve, &deadline);
>   	rcu_read_unlock();
>   	if (unlikely(!mask))
>   		return;
> @@ -3260,7 +3263,8 @@ static void virtual_submission_tasklet(struct tasklet_struct *t)
>   			 */
>   			first = rb_first_cached(&sibling->execlists.virtual) ==
>   				&node->rb;
> -			if (prio == node->prio || (prio > node->prio && first))
> +			if (deadline == node->deadline ||
> +			    (deadline < node->deadline && first))
>   				goto submit_engine;
>   
>   			rb_erase_cached(&node->rb, &sibling->execlists.virtual);
> @@ -3274,7 +3278,7 @@ static void virtual_submission_tasklet(struct tasklet_struct *t)
>   
>   			rb = *parent;
>   			other = rb_entry(rb, typeof(*other), rb);
> -			if (prio > other->prio) {
> +			if (deadline < other->deadline) {
>   				parent = &rb->rb_left;
>   			} else {
>   				parent = &rb->rb_right;
> @@ -3289,8 +3293,8 @@ static void virtual_submission_tasklet(struct tasklet_struct *t)
>   
>   submit_engine:
>   		GEM_BUG_ON(RB_EMPTY_NODE(&node->rb));
> -		node->prio = prio;
> -		if (first && prio > sibling->execlists.queue_priority_hint)
> +		node->deadline = deadline;
> +		if (first)
>   			i915_sched_kick(se);
>   
>   unlock_engine:
> @@ -3327,7 +3331,9 @@ static void virtual_submit_request(struct i915_request *rq)
>   		i915_request_put(ve->request);
>   	}
>   
> -	ve->base.execlists.queue_priority_hint = rq_prio(rq);
> +	rq->sched.deadline =
> +		min(rq->sched.deadline,
> +		    i915_scheduler_next_virtual_deadline(rq_prio(rq)));
>   	ve->request = i915_request_get(rq);
>   
>   	GEM_BUG_ON(!list_empty(virtual_queue(ve)));
> @@ -3429,7 +3435,6 @@ intel_execlists_create_virtual(struct intel_engine_cs **siblings,
>   	ve->base.bond_execute = virtual_bond_execute;
>   
>   	INIT_LIST_HEAD(virtual_queue(ve));
> -	ve->base.execlists.queue_priority_hint = INT_MIN;
>   
>   	intel_context_init(&ve->context, &ve->base);
>   
> diff --git a/drivers/gpu/drm/i915/gt/selftest_execlists.c b/drivers/gpu/drm/i915/gt/selftest_execlists.c
> index be99fbd7cfab..112a09aa0d8d 100644
> --- a/drivers/gpu/drm/i915/gt/selftest_execlists.c
> +++ b/drivers/gpu/drm/i915/gt/selftest_execlists.c
> @@ -868,7 +868,7 @@ semaphore_queue(struct intel_engine_cs *engine, struct i915_vma *vma, int idx)
>   static int
>   release_queue(struct intel_engine_cs *engine,
>   	      struct i915_vma *vma,
> -	      int idx, int prio)
> +	      int idx, u64 deadline)
>   {
>   	struct i915_request *rq;
>   	u32 *cs;
> @@ -893,10 +893,7 @@ release_queue(struct intel_engine_cs *engine,
>   	i915_request_get(rq);
>   	i915_request_add(rq);
>   
> -	local_bh_disable();
> -	i915_request_set_priority(rq, prio);
> -	local_bh_enable(); /* kick tasklet */
> -
> +	i915_request_set_deadline(rq, deadline);
>   	i915_request_put(rq);
>   
>   	return 0;
> @@ -910,6 +907,7 @@ slice_semaphore_queue(struct intel_engine_cs *outer,
>   	struct intel_engine_cs *engine;
>   	struct i915_request *head;
>   	enum intel_engine_id id;
> +	long timeout;
>   	int err, i, n = 0;
>   
>   	head = semaphore_queue(outer, vma, n++);
> @@ -933,12 +931,16 @@ slice_semaphore_queue(struct intel_engine_cs *outer,
>   		}
>   	}
>   
> -	err = release_queue(outer, vma, n, I915_PRIORITY_BARRIER);
> +	err = release_queue(outer, vma, n, 0);
>   	if (err)
>   		goto out;
>   
> -	if (i915_request_wait(head, 0,
> -			      2 * outer->gt->info.num_engines * (count + 2) * (count + 3)) < 0) {
> +	/* Expected number of pessimal slices required */
> +	timeout = outer->gt->info.num_engines * (count + 2) * (count + 3);
> +	timeout *= 4; /* safety factor, including bucketing */
> +	timeout += HZ / 2; /* and include the request completion */
> +
> +	if (i915_request_wait(head, 0, timeout) < 0) {
>   		pr_err("%s: Failed to slice along semaphore chain of length (%d, %d)!\n",
>   		       outer->name, count, n);
>   		GEM_TRACE_DUMP();
> @@ -1043,6 +1045,8 @@ create_rewinder(struct intel_context *ce,
>   		err = i915_request_await_dma_fence(rq, &wait->fence);
>   		if (err)
>   			goto err;
> +
> +		i915_request_set_deadline(rq, rq_deadline(wait));
>   	}
>   
>   	cs = intel_ring_begin(rq, 14);
> @@ -1319,6 +1323,7 @@ static int live_timeslice_queue(void *arg)
>   			goto err_heartbeat;
>   		}
>   		i915_request_set_priority(rq, I915_PRIORITY_MAX);
> +		i915_request_set_deadline(rq, 0);
>   		err = wait_for_submit(engine, rq, HZ / 2);
>   		if (err) {
>   			pr_err("%s: Timed out trying to submit semaphores\n",
> @@ -1341,10 +1346,9 @@ static int live_timeslice_queue(void *arg)
>   		}
>   
>   		GEM_BUG_ON(i915_request_completed(rq));
> -		GEM_BUG_ON(execlists_active(&engine->execlists) != rq);
>   
>   		/* Queue: semaphore signal, matching priority as semaphore */
> -		err = release_queue(engine, vma, 1, effective_prio(rq));
> +		err = release_queue(engine, vma, 1, rq_deadline(rq));
>   		if (err)
>   			goto err_rq;
>   
> @@ -1455,6 +1459,7 @@ static int live_timeslice_nopreempt(void *arg)
>   			goto out_spin;
>   		}
>   
> +		rq->sched.deadline = 0;
>   		rq->sched.attr.priority = I915_PRIORITY_BARRIER;
>   		i915_request_get(rq);
>   		i915_request_add(rq);
> @@ -1818,6 +1823,7 @@ static int live_late_preempt(void *arg)
>   
>   	/* Make sure ctx_lo stays before ctx_hi until we trigger preemption. */
>   	ctx_lo->sched.priority = 1;
> +	ctx_hi->sched.priority = I915_PRIORITY_MIN;
>   
>   	for_each_engine(engine, gt, id) {
>   		struct igt_live_test t;
> @@ -2985,6 +2991,9 @@ static int live_preempt_gang(void *arg)
>   		while (rq) { /* wait for each rq from highest to lowest prio */
>   			struct i915_request *n = list_next_entry(rq, mock.link);
>   
> +			/* With deadlines, no strict priority ordering */
> +			i915_request_set_deadline(rq, 0);
> +
>   			if (err == 0 && i915_request_wait(rq, 0, HZ / 5) < 0) {
>   				struct drm_printer p =
>   					drm_info_printer(engine->i915->drm.dev);
> @@ -3207,6 +3216,7 @@ static int preempt_user(struct intel_engine_cs *engine,
>   	i915_request_add(rq);
>   
>   	i915_request_set_priority(rq, I915_PRIORITY_MAX);
> +	i915_request_set_deadline(rq, 0);
>   
>   	if (i915_request_wait(rq, 0, HZ / 2) < 0)
>   		err = -ETIME;
> diff --git a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c
> index cdb0ceff3be1..5323fd56efd6 100644
> --- a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c
> +++ b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c
> @@ -1010,7 +1010,10 @@ static int __igt_reset_engines(struct intel_gt *gt,
>   					break;
>   				}
>   
> -				if (i915_request_wait(rq, 0, HZ / 5) < 0) {
> +				/* With deadlines, no strict priority */
> +				i915_request_set_deadline(rq, 0);
> +
> +				if (i915_request_wait(rq, 0, HZ / 2) < 0) {
>   					struct drm_printer p =
>   						drm_info_printer(gt->i915->drm.dev);
>   
> diff --git a/drivers/gpu/drm/i915/gt/selftest_lrc.c b/drivers/gpu/drm/i915/gt/selftest_lrc.c
> index 6d73add47109..b7dd5646c882 100644
> --- a/drivers/gpu/drm/i915/gt/selftest_lrc.c
> +++ b/drivers/gpu/drm/i915/gt/selftest_lrc.c
> @@ -1257,6 +1257,7 @@ poison_registers(struct intel_context *ce,
>   
>   	intel_ring_advance(rq, cs);
>   
> +	rq->sched.deadline = 0;
>   	rq->sched.attr.priority = I915_PRIORITY_BARRIER;
>   err_rq:
>   	i915_request_add(rq);
> diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
> index c16393df42a0..79205e9a84ba 100644
> --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
> +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
> @@ -216,7 +216,6 @@ static void __guc_dequeue(struct intel_engine_cs *engine)
>   		last = rq;
>   	}
>   done:
> -	execlists->queue_priority_hint = pl->priority;
>   	if (submit) {
>   		*port = schedule_in(last, port - execlists->inflight);
>   		*++port = NULL;
> @@ -322,7 +321,6 @@ static void guc_reset_rewind(struct intel_engine_cs *engine, bool stalled)
>   
>   static void guc_reset_cancel(struct intel_engine_cs *engine)
>   {
> -	struct intel_engine_execlists * const execlists = &engine->execlists;
>   	struct i915_sched *se = intel_engine_get_scheduler(engine);
>   	unsigned long flags;
>   
> @@ -346,8 +344,6 @@ static void guc_reset_cancel(struct intel_engine_cs *engine)
>   
>   	__i915_sched_cancel_queue(se);
>   
> -	execlists->queue_priority_hint = INT_MIN;
> -
>   	spin_unlock_irqrestore(&se->lock, flags);
>   	intel_engine_signal_breadcrumbs(engine);
>   }
> diff --git a/drivers/gpu/drm/i915/i915_priolist_types.h b/drivers/gpu/drm/i915/i915_priolist_types.h
> index ee7482b9c813..542b47078104 100644
> --- a/drivers/gpu/drm/i915/i915_priolist_types.h
> +++ b/drivers/gpu/drm/i915/i915_priolist_types.h
> @@ -22,6 +22,8 @@ enum {
>   
>   	/* Interactive workload, scheduled for immediate pageflipping */
>   	I915_PRIORITY_DISPLAY,
> +
> +	__I915_PRIORITY_KERNEL__
>   };
>   
>   /* Smallest priority value that cannot be bumped. */
> @@ -35,8 +37,7 @@ enum {
>    * i.e. nothing can have higher priority and force us to usurp the
>    * active request.
>    */
> -#define I915_PRIORITY_UNPREEMPTABLE INT_MAX
> -#define I915_PRIORITY_BARRIER (I915_PRIORITY_UNPREEMPTABLE - 1)
> +#define I915_PRIORITY_BARRIER INT_MAX
>   
>   /*
>    * The slab returns power-of-two chunks of memory, so fill out the
> @@ -82,7 +83,7 @@ enum {
>    */
>   struct i915_priolist {
>   	struct list_head requests;
> -	int priority;
> +	u64 deadline;
>   
>   	int level;
>   	struct i915_priolist *next[I915_PRIOLIST_HEIGHT];
> diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
> index e7b4c4bc41a6..ce828dc73402 100644
> --- a/drivers/gpu/drm/i915/i915_request.c
> +++ b/drivers/gpu/drm/i915/i915_request.c
> @@ -467,7 +467,7 @@ bool __i915_request_submit(struct i915_request *request)
>   	struct i915_sched *se = intel_engine_get_scheduler(engine);
>   	bool result = false;
>   
> -	RQ_TRACE(request, "\n");
> +	RQ_TRACE(request, "dl %llu\n", request->sched.deadline);
>   
>   	lockdep_assert_held(&se->lock);
>   
> @@ -650,6 +650,12 @@ semaphore_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state)
>   
>   	switch (state) {
>   	case FENCE_COMPLETE:
> +		/*
> +		 * The request is now ready to run; re-evaluate its deadline
> +		 * to remove the semaphore deprioritisation and to assign
> +		 * a deadline relative to its point-of-readiness [now].
> +		 */
> +		i915_request_update_deadline(rq);
>   		break;
>   
>   	case FENCE_FREE:
> @@ -1810,14 +1816,15 @@ long i915_request_wait(struct i915_request *rq,
>   	return timeout;
>   }
>   
> -static int print_sched_attr(const struct i915_sched_attr *attr,
> -			    char *buf, int x, int len)
> +static int print_sched(const struct i915_sched_node *node,
> +		       char *buf, int x, int len)
>   {
> -	if (attr->priority == I915_PRIORITY_INVALID)
> +	if (node->attr.priority == I915_PRIORITY_INVALID)
>   		return x;
>   
>   	x += snprintf(buf + x, len - x,
> -		      " prio=%d", attr->priority);
> +		      " prio=%d, dl=%llu",
> +		      node->attr.priority, node->deadline);
>   
>   	return x;
>   }
> @@ -1903,7 +1910,7 @@ void i915_request_show(struct drm_printer *m,
>   	 *    - the request has been temporarily suspended from execution
>   	 */
>   
> -	x = print_sched_attr(&rq->sched.attr, buf, x, sizeof(buf));
> +	x = print_sched(&rq->sched, buf, x, sizeof(buf));
>   
>   	drm_printf(m, "%s%.*s%c %llx:%lld%s%s %s @ %dms: %s\n",
>   		   prefix, indent, "                ",
> diff --git a/drivers/gpu/drm/i915/i915_scheduler.c b/drivers/gpu/drm/i915/i915_scheduler.c
> index 518eac67959e..1d77ece46241 100644
> --- a/drivers/gpu/drm/i915/i915_scheduler.c
> +++ b/drivers/gpu/drm/i915/i915_scheduler.c
> @@ -54,6 +54,11 @@ static void node_put(struct i915_sched_node *node)
>   	i915_request_put(container_of(node, struct i915_request, sched));
>   }
>   
> +static inline u64 rq_deadline(const struct i915_request *rq)
> +{
> +	return READ_ONCE(rq->sched.deadline);
> +}
> +
>   static inline int rq_prio(const struct i915_request *rq)
>   {
>   	return READ_ONCE(rq->sched.attr.priority);
> @@ -67,6 +72,14 @@ static int ipi_get_prio(struct i915_request *rq)
>   	return xchg(&rq->sched.ipi_priority, I915_PRIORITY_INVALID);
>   }
>   
> +static u64 ipi_get_deadline(struct i915_request *rq)
> +{
> +	if (READ_ONCE(rq->sched.ipi_deadline) == I915_DEADLINE_NEVER)
> +		return I915_DEADLINE_NEVER;
> +
> +	return xchg64(&rq->sched.ipi_deadline, I915_DEADLINE_NEVER);
> +}
> +
>   static void ipi_schedule(struct work_struct *wrk)
>   {
>   	struct i915_sched_ipi *ipi = container_of(wrk, typeof(*ipi), work);
> @@ -74,9 +87,11 @@ static void ipi_schedule(struct work_struct *wrk)
>   
>   	do {
>   		struct i915_request *rn = xchg(&rq->sched.ipi_link, NULL);
> +		u64 deadline;
>   		int prio;
>   
>   		prio = ipi_get_prio(rq);
> +		deadline = ipi_get_deadline(rq);
>   
>   		/*
>   		 * For cross-engine scheduling to work we rely on one of two
> @@ -101,6 +116,7 @@ static void ipi_schedule(struct work_struct *wrk)
>   		 */
>   		local_bh_disable();
>   		i915_request_set_priority(rq, prio);
> +		i915_request_set_deadline(rq, deadline);
>   		local_bh_enable();
>   
>   		i915_request_put(rq);
> @@ -158,7 +174,10 @@ i915_sched_default_revoke_context(struct intel_context *ce,
>   
>   void i915_sched_select_mode(struct i915_sched *se, enum i915_sched_mode mode)
>   {
> -	switch (mode) {
> +	switch (min_t(int, mode, CONFIG_DRM_I915_SCHED)) {
> +	case I915_SCHED_MODE_DEADLINE:
> +		__set_bit(I915_SCHED_DEADLINE_BIT, &se->flags);
> +		fallthrough;
>   	case I915_SCHED_MODE_PRIORITY:
>   		__set_bit(I915_SCHED_PRIORITY_BIT, &se->flags);
>   		fallthrough;
> @@ -175,8 +194,8 @@ static void init_priolist(struct i915_priolist_root *const root)
>   	struct i915_priolist *pl = &root->sentinel;
>   
>   	memset_p((void **)pl->next, pl, ARRAY_SIZE(pl->next));
> +	pl->deadline = I915_DEADLINE_NEVER;
>   	pl->requests.prev = NULL;
> -	pl->priority = INT_MIN;
>   	pl->level = -1;
>   }
>   
> @@ -339,19 +358,20 @@ static inline unsigned int random_level(struct i915_priolist_root *root)
>   }
>   
>   static struct list_head *
> -lookup_priolist(struct i915_sched *se, int prio)
> +lookup_priolist(struct i915_sched * const se, u64 deadline)
>   {
>   	struct i915_priolist *update[I915_PRIOLIST_HEIGHT];
>   	struct i915_priolist_root *const root = &se->queue;
>   	struct i915_priolist *pl, *tmp;
>   	int lvl;
>   
> +	GEM_BUG_ON(deadline == I915_DEADLINE_NEVER);
>   	lockdep_assert_held(&se->lock);
>   	if (unlikely(se->no_priolist))
> -		prio = I915_PRIORITY_NORMAL;
> +		deadline = 0;
>   
>   	for_each_priolist(pl, root) { /* recycle any empty elements before us */
> -		if (pl->priority <= prio || !list_empty(&pl->requests))
> +		if (pl->deadline >= deadline || !list_empty(&pl->requests))
>   			break;
>   
>   		__i915_sched_dequeue_next(se);
> @@ -361,14 +381,14 @@ lookup_priolist(struct i915_sched *se, int prio)
>   	pl = &root->sentinel;
>   	lvl = pl->level;
>   	while (lvl >= 0) {
> -		while (tmp = pl->next[lvl], tmp->priority >= prio)
> +		while (tmp = pl->next[lvl], tmp->deadline <= deadline)
>   			pl = tmp;
> -		if (pl->priority == prio)
> +		if (pl->deadline == deadline)
>   			goto out;
>   		update[lvl--] = pl;
>   	}
>   
> -	if (prio == I915_PRIORITY_NORMAL) {
> +	if (!deadline) {
>   		pl = &se->default_priolist;
>   	} else if (!pl_empty(&root->sentinel.requests)) {
>   		pl = pl_pop(&root->sentinel.requests);
> @@ -376,7 +396,7 @@ lookup_priolist(struct i915_sched *se, int prio)
>   		pl = kmem_cache_alloc(global.slab_priorities, GFP_ATOMIC);
>   		/* Convert an allocation failure to a priority bump */
>   		if (unlikely(!pl)) {
> -			prio = I915_PRIORITY_NORMAL; /* recurses just once */
> +			deadline = 0; /* recurses just once */
>   
>   			/*
>   			 * To maintain ordering with all rendering, after an
> @@ -392,7 +412,7 @@ lookup_priolist(struct i915_sched *se, int prio)
>   		}
>   	}
>   
> -	pl->priority = prio;
> +	pl->deadline = deadline;
>   	INIT_LIST_HEAD(&pl->requests);
>   
>   	lvl = random_level(root);
> @@ -420,7 +440,7 @@ lookup_priolist(struct i915_sched *se, int prio)
>   		chk = &root->sentinel;
>   		lvl = chk->level;
>   		do {
> -			while (tmp = chk->next[lvl], tmp->priority >= prio)
> +			while (tmp = chk->next[lvl], tmp->deadline <= deadline)
>   				chk = tmp;
>   		} while (--lvl >= 0);
>   
> @@ -438,7 +458,7 @@ static void __remove_priolist(struct i915_sched *se, struct list_head *plist)
>   	struct i915_priolist *pl, *tmp;
>   	struct i915_priolist *old =
>   		container_of(plist, struct i915_priolist, requests);
> -	int prio = old->priority;
> +	u64 deadline = old->deadline;
>   	int lvl;
>   
>   	lockdep_assert_held(&se->lock);
> @@ -448,11 +468,11 @@ static void __remove_priolist(struct i915_sched *se, struct list_head *plist)
>   	lvl = pl->level;
>   	GEM_BUG_ON(lvl < 0);
>   
> -	if (prio != I915_PRIORITY_NORMAL)
> +	if (deadline)
>   		pl_push(old, &pl->requests);
>   
>   	do {
> -		while (tmp = pl->next[lvl], tmp->priority > prio)
> +		while (tmp = pl->next[lvl], tmp->deadline < deadline)
>   			pl = tmp;
>   		if (lvl <= old->level) {
>   			pl->next[lvl] = old->next[lvl];
> @@ -495,7 +515,7 @@ struct i915_priolist *__i915_sched_dequeue_next(struct i915_sched *se)
>   	GEM_BUG_ON(pl == s);
>   
>   	/* Keep pl->next[0] valid for for_each_priolist iteration */
> -	if (pl->priority != I915_PRIORITY_NORMAL)
> +	if (pl->deadline)
>   		pl_push(pl, &s->requests);
>   
>   	lvl = pl->level;
> @@ -531,52 +551,267 @@ stack_pop(struct i915_request *rq,
>   	return rq;
>   }
>   
> -static inline bool need_preempt(int prio, int active)
> +static void ipi_deadline(struct i915_request *rq, u64 deadline)
>   {
> -	/*
> -	 * Allow preemption of low -> normal -> high, but we do
> -	 * not allow low priority tasks to preempt other low priority
> -	 * tasks under the impression that latency for low priority
> -	 * tasks does not matter (as much as background throughput),
> -	 * so kiss.
> -	 */
> -	return prio >= max(I915_PRIORITY_NORMAL, active);
> +	u64 old = READ_ONCE(rq->sched.ipi_deadline);
> +
> +	do {
> +		if (deadline >= old)
> +			return;
> +	} while (!try_cmpxchg64(&rq->sched.ipi_deadline, &old, deadline));
> +
> +	__ipi_add(rq);
>   }
>   
> -static void kick_submission(struct intel_engine_cs *engine,
> -			    const struct i915_request *rq,
> -			    int prio)
> +static bool is_first_priolist(const struct i915_sched *se,
> +			      const struct list_head *requests)
>   {
> -	const struct i915_request *inflight;
> +	return requests == &se->queue.sentinel.next[0]->requests;
> +}
> +
> +static bool
> +__i915_request_set_deadline(struct i915_sched * const se,
> +			    struct i915_request *rq,
> +			    u64 deadline)
> +{
> +	struct intel_engine_cs *engine = rq->engine;
> +	struct list_head *pos = &rq->sched.signalers_list;
> +	struct list_head *plist;
> +
> +	if (unlikely(!i915_request_in_priority_queue(rq))) {
> +		rq->sched.deadline = deadline;
> +		return false;
> +	}
> +
> +	/* Fifo and depth-first replacement ensure our deps execute first */
> +	plist = lookup_priolist(se, deadline);
> +
> +	rq->sched.dfs.prev = NULL;
> +	do {
> +		if (i915_sched_has_deadlines(se)) {
> +			list_for_each_continue(pos, &rq->sched.signalers_list) {
> +				struct i915_dependency *p =
> +					list_entry(pos, typeof(*p), signal_link);
> +				struct i915_request *s =
> +					container_of(p->signaler, typeof(*s), sched);
> +
> +				if (rq_deadline(s) <= deadline)
> +					continue;
> +
> +				if (__i915_request_is_complete(s))
> +					continue;
> +
> +				if (s->engine != engine) {
> +					ipi_deadline(s, deadline);
> +					continue;
> +				}
> +
> +				/* Remember our position along this branch */
> +				rq = stack_push(s, rq, pos);
> +				pos = &rq->sched.signalers_list;
> +			}
> +		}
> +
> +		RQ_TRACE(rq, "set-deadline:%llu\n", deadline);
> +		WRITE_ONCE(rq->sched.deadline, deadline);
> +
> +		/*
> +		 * Once the request is ready, it will be placed into the
> +		 * priority lists and then onto the HW runlist. Before the
> +		 * request is ready, it does not contribute to our preemption
> +		 * decisions and we can safely ignore it, as it will, and
> +		 * any preemption required, be dealt with upon submission.
> +		 * See engine->submit_request()
> +		 */
> +		GEM_BUG_ON(i915_request_get_scheduler(rq) != se);
> +		if (i915_request_in_priority_queue(rq))
> +			remove_from_priolist(se, rq, plist, true);
> +	} while ((rq = stack_pop(rq, &pos)));
> +
> +	return is_first_priolist(se, plist);
> +}
> +
> +void i915_request_set_deadline(struct i915_request *rq, u64 deadline)
> +{
> +	struct intel_engine_cs *engine;
> +	unsigned long flags;
> +
> +	if (deadline >= rq_deadline(rq))
> +		return;
> +
> +	engine = lock_engine_irqsave(rq, flags);
> +	if (!i915_sched_has_deadlines(&engine->sched))
> +		goto unlock;
> +
> +	if (deadline >= rq_deadline(rq))
> +		goto unlock;
> +
> +	if (__i915_request_is_complete(rq))
> +		goto unlock;
> +
> +	rcu_read_lock();
> +	if (__i915_request_set_deadline(&engine->sched, rq, deadline))
> +		i915_sched_kick(&engine->sched);
> +	rcu_read_unlock();
> +	GEM_BUG_ON(rq_deadline(rq) != deadline);
> +
> +unlock:
> +	spin_unlock_irqrestore(&engine->sched.lock, flags);
> +}
> +
> +static u64 prio_slice(int prio)
> +{
> +	u64 slice;
> +	int sf;
>   
>   	/*
> -	 * We only need to kick the tasklet once for the high priority
> -	 * new context we add into the queue.
> +	 * This is the central heuristic to the virtual deadlines. By
> +	 * imposing that each task takes an equal amount of time, we
> +	 * let each client have an equal slice of the GPU time. By
> +	 * bringing the virtual deadline forward, that client will then
> +	 * have more GPU time, and vice versa a lower priority client will
> +	 * have a later deadline and receive less GPU time.
> +	 *
> +	 * In BFS/MuQSS, the prio_ratios[] are based on the task nice range of
> +	 * [-20, 20], with each lower priority having a ~10% longer deadline,
> +	 * with the note that the proportion of CPU time between two clients
> +	 * of different priority will be the square of the relative prio_slice.
> +	 *
> +	 * This property that the budget of each client is proprotional to
> +	 * the relative priority, and that the scheduler fairly distributes
> +	 * work according to that budget, opens up a very powerful tool
> +	 * for managing clients.
> +	 *
> +	 * In contrast, this prio_slice() curve was chosen because it gave good
> +	 * results with igt/gem_exec_schedule. It may not be the best choice!
> +	 *
> +	 * With a 1ms scheduling quantum:
> +	 *
> +	 *   MAX USER:  ~32us deadline
> +	 *   0:         ~16ms deadline
> +	 *   MIN_USER: 1000ms deadline
>   	 */
> -	if (prio <= engine->execlists.queue_priority_hint)
> -		return;
>   
> -	/* Nothing currently active? We're overdue for a submission! */
> -	inflight = execlists_active(&engine->execlists);
> -	if (!inflight)
> -		return;
> +	if (prio >= __I915_PRIORITY_KERNEL__)
> +		return INT_MAX - prio;
> +
> +	slice = __I915_PRIORITY_KERNEL__ - prio;
> +	if (prio >= 0)
> +		sf = 20 - 6;
> +	else
> +		sf = 20 - 1;
> +
> +	return slice << sf;
> +}
> +
> +static u64 virtual_deadline(u64 kt, int priority)
> +{
> +	return i915_sched_to_ticks(kt + prio_slice(priority));
> +}
> +
> +u64 i915_scheduler_next_virtual_deadline(int priority)
> +{
> +	return virtual_deadline(ktime_get_mono_fast_ns(), priority);
> +}

This helpers becomes a bit odd in that the only two callers are rewind 
and defer. And it queries ktime, while before deadline was set based on 
signalers.

Where is the place which set the ktime based deadline (converted to 
ticks) for requests with no signalers?

> +
> +static u64 signal_deadline(const struct i915_request *rq)
> +{
> +	u64 last = ktime_get_mono_fast_ns();
> +	const struct i915_dependency *p;
>   
>   	/*
> -	 * If we are already the currently executing context, don't
> -	 * bother evaluating if we should preempt ourselves.
> +	 * Find the earliest point at which we will become 'ready',
> +	 * which we infer from the deadline of all active signalers.
> +	 * We will position ourselves at the end of that chain of work.
>   	 */
> -	if (inflight->context == rq->context)
> -		return;
>   
> -	SCHED_TRACE(&engine->sched,
> -		    "bumping queue-priority-hint:%d for rq:" RQ_FMT ", inflight:" RQ_FMT " prio %d\n",
> -		    prio,
> -		    RQ_ARG(rq), RQ_ARG(inflight),
> -		    inflight->sched.attr.priority);
> +	rcu_read_lock();
> +	for_each_signaler(p, rq) {
> +		const struct i915_request *s =
> +			container_of(p->signaler, typeof(*s), sched);
> +		u64 deadline;
> +		int prio;
>   
> -	engine->execlists.queue_priority_hint = prio;
> -	if (need_preempt(prio, rq_prio(inflight)))
> -		intel_engine_kick_scheduler(engine);
> +		if (__i915_request_is_complete(s))
> +			continue;
> +
> +		if (s->timeline == rq->timeline &&
> +		    __i915_request_has_started(s))
> +			continue;
> +
> +		prio = rq_prio(s);
> +		if (prio < rq_prio(rq))
> +			continue;
> +
> +		deadline = rq_deadline(s);
> +		if (deadline == I915_DEADLINE_NEVER) /* retired & reused */
> +			continue;
> +
> +		if (s->context == rq->context) /* break ties in favour of hot */
> +			deadline--;
> +
> +		deadline = i915_sched_to_ns(deadline);
> +		if (p->flags & I915_DEPENDENCY_WEAK)
> +			deadline -= prio_slice(prio);
> +
> +		last = max(last, deadline);
> +	}
> +	rcu_read_unlock();
> +
> +	return last;
> +}
> +
> +static int adj_prio(const struct i915_request *rq)
> +{
> +	int prio = rq_prio(rq);
> +
> +	/*
> +	 * Deprioritize semaphore waiters. We only want to run these if there
> +	 * is nothing ready to run first.
> +	 *
> +	 * Note by giving a more distant deadline (due to a lower priority)
> +	 * we do not prevent them from having a slice of the GPU, and if there
> +	 * is still contention at that point, we expect to immediately yield
> +	 * on the semaphore.
> +	 *
> +	 * When all semaphores are signaled, we will update the request
> +	 * to remove the semaphore penalty.
> +	 */
> +	if (!i915_sw_fence_signaled(&rq->semaphore))
> +		prio -= __I915_PRIORITY_KERNEL__;
> +
> +	return prio;
> +}
> +
> +static u64
> +earliest_deadline(const struct i915_sched *se, const struct i915_request *rq)
> +{
> +	/*
> +	 * At its heart, the scheduler is simply a topological sort into
> +	 * a linear sequence of requests. As we use a single ascending index,
> +	 * we can repurpose the sort to achieve different goals, or to disable
> +	 * the sort entirely and funnel all requests onto a single list for
> +	 * immediate extraction.
> +	 */
> +	if (i915_sched_has_deadlines(se))
> +		return virtual_deadline(signal_deadline(rq), rq_prio(rq));
> +	else if (i915_sched_has_priorities(se))
> +		return INT_MAX - rq_prio(rq);
> +	else
> +		return 0;
> +}
> +
> +static bool
> +set_earliest_deadline(struct i915_sched *se, struct i915_request *rq, u64 old)
> +{
> +	u64 dl;
> +
> +	/* Recompute our deadlines and promote after a priority change */
> +	dl = min(earliest_deadline(se, rq), rq_deadline(rq));
> +	if (dl >= old)
> +		return false;
> +
> +	return __i915_request_set_deadline(se, rq, dl);
>   }
>   
>   static void ipi_priority(struct i915_request *rq, int prio)
> @@ -591,17 +826,16 @@ static void ipi_priority(struct i915_request *rq, int prio)
>   	__ipi_add(rq);
>   }
>   
> -static void __i915_request_set_priority(struct i915_request *rq, int prio)
> +static bool
> +__i915_request_set_priority(struct i915_sched * const se,
> +			    struct i915_request *rq,
> +			    int prio)
>   {
>   	struct intel_engine_cs *engine = rq->engine;
> -	struct i915_sched *se = intel_engine_get_scheduler(engine);
>   	struct list_head *pos = &rq->sched.signalers_list;
> -	struct list_head *plist;
> +	bool kick = false;
>   
> -	SCHED_TRACE(&engine->sched, "PI for " RQ_FMT ", prio:%d\n",
> -		    RQ_ARG(rq), prio);
> -
> -	plist = lookup_priolist(se, prio);
> +	SCHED_TRACE(se, "PI for " RQ_FMT ", prio:%d\n", RQ_ARG(rq), prio);
>   
>   	/*
>   	 * Recursively bump all dependent priorities to match the new request.
> @@ -623,31 +857,37 @@ static void __i915_request_set_priority(struct i915_request *rq, int prio)
>   	 */
>   	rq->sched.dfs.prev = NULL;
>   	do {
> -		list_for_each_continue(pos, &rq->sched.signalers_list) {
> -			struct i915_dependency *p =
> -				list_entry(pos, typeof(*p), signal_link);
> -			struct i915_request *s =
> -				container_of(p->signaler, typeof(*s), sched);
> +		struct i915_request *next;
>   
> -			if (rq_prio(s) >= prio)
> -				continue;
> +		if (i915_sched_has_priorities(i915_request_get_scheduler(rq))) {
> +			list_for_each_continue(pos, &rq->sched.signalers_list) {
> +				struct i915_dependency *p =
> +					list_entry(pos, typeof(*p), signal_link);
> +				struct i915_request *s =
> +					container_of(p->signaler, typeof(*s), sched);
>   
> -			if (__i915_request_is_complete(s))
> -				continue;
> +				if (rq_prio(s) >= prio)
> +					continue;
>   
> -			if (s->engine != engine) {
> -				ipi_priority(s, prio);
> -				continue;
> +				if (__i915_request_is_complete(s))
> +					continue;
> +
> +				if (s->engine != engine) {
> +					ipi_priority(s, prio);
> +					continue;
> +				}
> +
> +				/* Remember our position along this branch */
> +				rq = stack_push(s, rq, pos);
> +				pos = &rq->sched.signalers_list;
>   			}
> -
> -			/* Remember our position along this branch */
> -			rq = stack_push(s, rq, pos);
> -			pos = &rq->sched.signalers_list;
>   		}
>   
>   		RQ_TRACE(rq, "set-priority:%d\n", prio);
>   		WRITE_ONCE(rq->sched.attr.priority, prio);
>   
> +		next = stack_pop(rq, &pos);
> +
>   		/*
>   		 * Once the request is ready, it will be placed into the
>   		 * priority lists and then onto the HW runlist. Before the
> @@ -656,16 +896,15 @@ static void __i915_request_set_priority(struct i915_request *rq, int prio)
>   		 * any preemption required, be dealt with upon submission.
>   		 * See engine->submit_request()
>   		 */
> -		if (!i915_request_is_ready(rq))
> -			continue;
> -
>   		GEM_BUG_ON(rq->engine != engine);
> -		if (i915_request_in_priority_queue(rq))
> -			remove_from_priolist(se, rq, plist, true);
> +		if (i915_request_is_ready(rq) &&
> +		    set_earliest_deadline(se, rq, rq_deadline(rq)))
> +			kick = true;
>   
> -		/* Defer (tasklet) submission until after all updates. */
> -		kick_submission(engine, rq, prio);
> -	} while ((rq = stack_pop(rq, &pos)));
> +		rq = next;
> +	} while (rq);
> +
> +	return kick;
>   }
>   
>   #define all_signalers_checked(p, rq) \
> @@ -718,13 +957,9 @@ void i915_request_set_priority(struct i915_request *rq, int prio)
>   	if (__i915_request_is_complete(rq))
>   		goto unlock;
>   
> -	if (!i915_sched_has_priorities(&engine->sched)) {
> -		rq->sched.attr.priority = prio;
> -		goto unlock;
> -	}
> -
>   	rcu_read_lock();
> -	__i915_request_set_priority(rq, prio);
> +	if (__i915_request_set_priority(&engine->sched, rq, prio))
> +		i915_sched_kick(&engine->sched);
>   	rcu_read_unlock();
>   	GEM_BUG_ON(rq_prio(rq) != prio);
>   
> @@ -737,7 +972,7 @@ void __i915_sched_defer_request(struct intel_engine_cs *engine,
>   {
>   	struct list_head *pos = &rq->sched.waiters_list;
>   	struct i915_sched *se = intel_engine_get_scheduler(engine);
> -	const int prio = rq_prio(rq);
> +	u64 deadline = rq_deadline(rq);
>   	struct i915_request *rn;
>   	LIST_HEAD(dfs);
>   
> @@ -746,6 +981,9 @@ void __i915_sched_defer_request(struct intel_engine_cs *engine,
>   	lockdep_assert_held(&se->lock);
>   	GEM_BUG_ON(!test_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags));
>   
> +	if (i915_sched_has_deadlines(se))
> +		deadline = max(deadline, i915_scheduler_next_virtual_deadline(adj_prio(rq)));
> +
>   	/*
>   	 * When we defer a request, we must maintain its order with respect
>   	 * to those that are waiting upon it. So we traverse its chain of
> @@ -771,62 +1009,51 @@ void __i915_sched_defer_request(struct intel_engine_cs *engine,
>   				   __i915_request_has_started(w) &&
>   				   !__i915_request_is_complete(rq));
>   
> +			/* An unready waiter imposes no deadline */
>   			if (!i915_request_in_priority_queue(w))
>   				continue;
>   
>   			/*
> -			 * We also need to reorder within the same priority.
> +			 * We also need to reorder within the same deadline.
>   			 *
>   			 * This is unlike priority-inheritance, where if the
>   			 * signaler already has a higher priority [earlier
>   			 * deadline] than us, we can ignore as it will be
>   			 * scheduled first. If a waiter already has the
> -			 * same priority, we still have to push it to the end
> +			 * same deadline, we still have to push it to the end
>   			 * of the list. This unfortunately means we cannot
>   			 * use the rq_deadline() itself as a 'visited' bit.
>   			 */
> -			if (rq_prio(w) < prio)
> +			if (rq_deadline(w) > deadline)
>   				continue;
>   
> -			GEM_BUG_ON(rq_prio(w) != prio);
> -
>   			/* Remember our position along this branch */
>   			rq = stack_push(w, rq, pos);
>   			pos = &rq->sched.waiters_list;
>   		}
>   
> +		RQ_TRACE(rq, "set-deadline:%llu\n", deadline);
> +		WRITE_ONCE(rq->sched.deadline, deadline);
> +
>   		/* Note list is reversed for waiters wrt signal hierarchy */
> -		GEM_BUG_ON(rq->engine != engine);
> +		GEM_BUG_ON(i915_request_get_scheduler(rq) != se);
>   		remove_from_priolist(se, rq, &dfs, false);
>   
>   		/* Track our visit, and prevent duplicate processing */
>   		clear_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags);
>   	} while ((rq = stack_pop(rq, &pos)));
>   
> -	pos = lookup_priolist(se, prio);
> +	pos = lookup_priolist(se, deadline);
>   	list_for_each_entry_safe(rq, rn, &dfs, sched.link) {
>   		set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags);
>   		list_add_tail(&rq->sched.link, pos);
>   	}
>   }
>   
> -static void queue_request(struct i915_sched *se, struct i915_request *rq)
> +static bool queue_request(struct i915_sched *se, struct i915_request *rq)
>   {
> -	GEM_BUG_ON(!list_empty(&rq->sched.link));
> -	list_add_tail(&rq->sched.link, lookup_priolist(se, rq_prio(rq)));
>   	set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags);
> -}
> -
> -static bool submit_queue(struct intel_engine_cs *engine,
> -			 const struct i915_request *rq)
> -{
> -	struct intel_engine_execlists *execlists = &engine->execlists;
> -
> -	if (rq_prio(rq) <= execlists->queue_priority_hint)
> -		return false;
> -
> -	execlists->queue_priority_hint = rq_prio(rq);
> -	return true;
> +	return set_earliest_deadline(se, rq, I915_DEADLINE_NEVER);
>   }
>   
>   static bool hold_request(const struct i915_request *rq)
> @@ -864,8 +1091,8 @@ static bool ancestor_on_hold(const struct i915_sched *se,
>   
>   void i915_request_enqueue(struct i915_request *rq)
>   {
> -	struct intel_engine_cs *engine = rq->engine;
> -	struct i915_sched *se = intel_engine_get_scheduler(engine);
> +	struct i915_sched *se = i915_request_get_scheduler(rq);
> +	u64 dl = earliest_deadline(se, rq);
>   	unsigned long flags;
>   	bool kick = false;
>   
> @@ -880,11 +1107,11 @@ void i915_request_enqueue(struct i915_request *rq)
>   		list_add_tail(&rq->sched.link, &se->hold);
>   		i915_request_set_hold(rq);
>   	} else {
> -		queue_request(se, rq);
> -
> +		set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags);
> +		kick = __i915_request_set_deadline(se, rq,
> +						   min(dl, rq_deadline(rq)));

What is this min for? Dl has been computed above based on rq, so I 
wonder why rq_deadline has to be considered again.

Because earliest_deadline does not actually consider rq->sched.deadline? 
So conceptually earliest_deadline would be described as what?

> +		GEM_BUG_ON(rq_deadline(rq) == I915_DEADLINE_NEVER);
>   		GEM_BUG_ON(i915_sched_is_idle(se));
> -
> -		kick = submit_queue(engine, rq);
>   	}
>   
>   	GEM_BUG_ON(list_empty(&rq->sched.link));
> @@ -898,8 +1125,8 @@ __i915_sched_rewind_requests(struct intel_engine_cs *engine)
>   {
>   	struct i915_sched *se = intel_engine_get_scheduler(engine);
>   	struct i915_request *rq, *rn, *active = NULL;
> +	u64 deadline = I915_DEADLINE_NEVER;
>   	struct list_head *pl;
> -	int prio = I915_PRIORITY_INVALID;
>   
>   	lockdep_assert_held(&se->lock);
>   
> @@ -911,13 +1138,21 @@ __i915_sched_rewind_requests(struct intel_engine_cs *engine)
>   
>   		__i915_request_unsubmit(rq);
>   
> -		GEM_BUG_ON(rq_prio(rq) == I915_PRIORITY_INVALID);
> -		if (rq_prio(rq) != prio) {
> -			prio = rq_prio(rq);
> -			pl = lookup_priolist(se, prio);
> +		if (__i915_request_has_started(rq) &&
> +		    i915_sched_has_deadlines(se)) {
> +			u64 deadline =
> +				i915_scheduler_next_virtual_deadline(rq_prio(rq));
> +			rq->sched.deadline = min(rq_deadline(rq), deadline);
> +		}
> +		GEM_BUG_ON(rq_deadline(rq) == I915_DEADLINE_NEVER);
> +
> +		if (rq_deadline(rq) != deadline) {
> +			deadline = rq_deadline(rq);
> +			pl = lookup_priolist(se, deadline);
>   		}
>   		GEM_BUG_ON(i915_sched_is_idle(se));
>   
> +		GEM_BUG_ON(i915_request_in_priority_queue(rq));
>   		list_move(&rq->sched.link, pl);
>   		set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags);
>   
> @@ -1023,14 +1258,10 @@ void __i915_sched_resume_request(struct intel_engine_cs *engine,
>   {
>   	struct i915_sched *se = intel_engine_get_scheduler(engine);
>   	LIST_HEAD(list);
> +	bool submit = false;
>   
>   	lockdep_assert_held(&se->lock);
>   
> -	if (rq_prio(rq) > engine->execlists.queue_priority_hint) {
> -		engine->execlists.queue_priority_hint = rq_prio(rq);
> -		i915_sched_kick(se);
> -	}
> -
>   	if (!i915_request_on_hold(rq))
>   		return;
>   
> @@ -1051,7 +1282,7 @@ void __i915_sched_resume_request(struct intel_engine_cs *engine,
>   		i915_request_clear_hold(rq);
>   		list_del_init(&rq->sched.link);
>   
> -		queue_request(se, rq);
> +		submit |= queue_request(se, rq);
>   
>   		/* Also release any children on this engine that are ready */
>   		for_each_waiter(p, rq) {
> @@ -1081,6 +1312,24 @@ void __i915_sched_resume_request(struct intel_engine_cs *engine,
>   
>   		rq = list_first_entry_or_null(&list, typeof(*rq), sched.link);
>   	} while (rq);
> +
> +	if (submit)
> +		i915_sched_kick(se);
> +}
> +
> +static u64
> +update_deadline(const struct i915_request *rq)
> +{
> +	return earliest_deadline(i915_request_get_scheduler(rq), rq);
> +}
> +
> +void i915_request_update_deadline(struct i915_request *rq)
> +{
> +	if (!i915_request_in_priority_queue(rq))
> +		return;
> +
> +	/* Recompute our deadlines and promote after a priority change */
> +	i915_request_set_deadline(rq, update_deadline(rq));
>   }
>   
>   void i915_sched_resume_request(struct intel_engine_cs *engine,
> @@ -1134,10 +1383,12 @@ void i915_sched_node_init(struct i915_sched_node *node)
>   void i915_sched_node_reinit(struct i915_sched_node *node)
>   {
>   	node->attr.priority = I915_PRIORITY_INVALID;
> +	node->deadline = I915_DEADLINE_NEVER;
>   	node->semaphores = 0;
>   	node->flags = 0;
>   
>   	GEM_BUG_ON(node->ipi_link);
> +	node->ipi_deadline = I915_DEADLINE_NEVER;
>   	node->ipi_priority = I915_PRIORITY_INVALID;
>   
>   	GEM_BUG_ON(!list_empty(&node->signalers_list));
> @@ -1378,6 +1629,20 @@ print_request_ring(struct drm_printer *m, const struct i915_request *rq)
>   	}
>   }
>   
> +static const char *repr_mode(const struct i915_sched *se)
> +{
> +	if (i915_sched_has_deadlines(se))
> +		return "Deadline";
> +
> +	if (i915_sched_has_priorities(se))
> +		return "Priority";
> +
> +	if (i915_sched_is_active(se))
> +		return "FIFO";
> +
> +	return "None";
> +}
> +
>   void i915_sched_show(struct drm_printer *m,
>   		     struct i915_sched *se,
>   		     void (*show_request)(struct drm_printer *m,
> @@ -1419,6 +1684,9 @@ void i915_sched_show(struct drm_printer *m,
>   		}
>   	}
>   
> +	drm_printf(m, "Scheduler: %s (%s)\n", repr_mode(se),
> +		   enableddisabled(test_bit(I915_SCHED_ENABLE_BIT,
> +					    &se->flags)));
>   	drm_printf(m, "Tasklet queued? %s (%s)\n",
>   		   yesno(test_bit(TASKLET_STATE_SCHED, &se->tasklet.state)),
>   		   enableddisabled(!atomic_read(&se->tasklet.count)));
> diff --git a/drivers/gpu/drm/i915/i915_scheduler.h b/drivers/gpu/drm/i915/i915_scheduler.h
> index 872d221f6ba7..14714e56ad80 100644
> --- a/drivers/gpu/drm/i915/i915_scheduler.h
> +++ b/drivers/gpu/drm/i915/i915_scheduler.h
> @@ -47,7 +47,14 @@ void i915_sched_select_mode(struct i915_sched *se, enum i915_sched_mode mode);
>   void i915_sched_park(struct i915_sched *se);
>   void i915_sched_fini(struct i915_sched *se);
>   
> +void i915_sched_select_mode(struct i915_sched *se, enum i915_sched_mode mode);
> +
>   void i915_request_set_priority(struct i915_request *request, int prio);
> +void i915_request_set_deadline(struct i915_request *request, u64 deadline);
> +
> +void i915_request_update_deadline(struct i915_request *request);
> +
> +u64 i915_scheduler_next_virtual_deadline(int priority);
>   
>   void i915_request_enqueue(struct i915_request *request);
>   
> @@ -85,11 +92,14 @@ static inline void i915_sched_disable(struct i915_sched *se)
>   	clear_bit(I915_SCHED_ENABLE_BIT, &se->flags);
>   }
>   
> -void __i915_priolist_free(struct i915_priolist *p);
> -static inline void i915_priolist_free(struct i915_priolist *p)
> +static inline u64 i915_sched_to_ticks(ktime_t kt)
>   {
> -	if (p->priority != I915_PRIORITY_NORMAL)
> -		__i915_priolist_free(p);
> +	return ktime_to_ns(kt) >> I915_SCHED_DEADLINE_SHIFT;
> +}
> +
> +static inline u64 i915_sched_to_ns(u64 deadline)
> +{
> +	return deadline << I915_SCHED_DEADLINE_SHIFT;
>   }
>   
>   static inline bool i915_sched_is_idle(const struct i915_sched *se)
> diff --git a/drivers/gpu/drm/i915/i915_scheduler_types.h b/drivers/gpu/drm/i915/i915_scheduler_types.h
> index bc668f375097..89cccda35ecd 100644
> --- a/drivers/gpu/drm/i915/i915_scheduler_types.h
> +++ b/drivers/gpu/drm/i915/i915_scheduler_types.h
> @@ -22,6 +22,7 @@ enum {
>   	I915_SCHED_ENABLE_BIT = 0,
>   	I915_SCHED_ACTIVE_BIT, /* can reorder the request flow */
>   	I915_SCHED_PRIORITY_BIT, /* priority sorting of queue */
> +	I915_SCHED_DEADLINE_BIT, /* sorting by virtual deadline */
>   	I915_SCHED_TIMESLICE_BIT, /* multitasking for long workloads */
>   	I915_SCHED_PREEMPT_RESET_BIT, /* reset if preemption times out */
>   	I915_SCHED_BUSYWAIT_BIT, /* preempt-to-busy */
> @@ -51,6 +52,7 @@ enum i915_sched_mode {
>   	I915_SCHED_MODE_NONE = -1, /* inactive, no bubble prevention */
>   	I915_SCHED_MODE_FIFO, /* pass-through of ready, first in first out */
>   	I915_SCHED_MODE_PRIORITY, /* reorder strictly by priority */
> +	I915_SCHED_MODE_DEADLINE, /* reorder to meet soft deadlines; fair */
>   };
>   
>   /**
> @@ -207,8 +209,31 @@ struct i915_sched_node {
>   #define I915_SCHED_HAS_EXTERNAL_CHAIN	BIT(0)
>   	unsigned long semaphores;
>   
> +	/**
> +	 * @deadline: [virtual] deadline
> +	 *
> +	 * When the request is ready for execution, it is given a quota
> +	 * (the engine's timeslice) and a virtual deadline. The virtual
> +	 * deadline is derived from the current time:
> +	 *     ktime_get() + (prio_ratio * timeslice)
> +	 *
> +	 * Requests are then executed in order of deadline completion.
> +	 * Requests with earlier deadlines than currently executing on
> +	 * the engine will preempt the active requests.
> +	 *
> +	 * By treating it as a virtual deadline, we use it as a hint for
> +	 * when it is appropriate for a request to start with respect to
> +	 * all other requests in the system. It is not a hard deadline, as
> +	 * we allow requests to miss them, and we do not account for the
> +	 * request runtime.
> +	 */
> +	u64 deadline;
> +#define I915_SCHED_DEADLINE_SHIFT 19 /* i.e. roughly 500us buckets */
> +#define I915_DEADLINE_NEVER U64_MAX
> +
>   	/* handle being scheduled for PI from outside of our active.lock */
>   	struct i915_request *ipi_link;
> +	u64 ipi_deadline;
>   	int ipi_priority;
>   };
>   
> @@ -236,14 +261,28 @@ struct i915_dependency {
>   
>   static inline bool i915_sched_is_active(const struct i915_sched *se)
>   {
> +	if (CONFIG_DRM_I915_SCHED < 0)
> +		return false;
> +
>   	return test_bit(I915_SCHED_ACTIVE_BIT, &se->flags);
>   }
>   
>   static inline bool i915_sched_has_priorities(const struct i915_sched *se)
>   {
> +	if (CONFIG_DRM_I915_SCHED < 1)
> +		return false;
> +
>   	return test_bit(I915_SCHED_PRIORITY_BIT, &se->flags);
>   }
>   
> +static inline bool i915_sched_has_deadlines(const struct i915_sched *se)
> +{
> +	if (CONFIG_DRM_I915_SCHED < 2)
> +		return false;
> +
> +	return test_bit(I915_SCHED_DEADLINE_BIT, &se->flags);
> +}
> +
>   static inline bool i915_sched_has_timeslices(const struct i915_sched *se)
>   {
>   	if (!IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION))
> diff --git a/drivers/gpu/drm/i915/selftests/i915_request.c b/drivers/gpu/drm/i915/selftests/i915_request.c
> index 8035ea7565ed..c5d7427bd429 100644
> --- a/drivers/gpu/drm/i915/selftests/i915_request.c
> +++ b/drivers/gpu/drm/i915/selftests/i915_request.c
> @@ -2129,6 +2129,7 @@ static int measure_preemption(struct intel_context *ce)
>   
>   		intel_ring_advance(rq, cs);
>   		rq->sched.attr.priority = I915_PRIORITY_BARRIER;
> +		rq->sched.deadline = 0;
>   
>   		elapsed[i - 1] = ENGINE_READ_FW(ce->engine, RING_TIMESTAMP);
>   		i915_request_add(rq);
> diff --git a/drivers/gpu/drm/i915/selftests/i915_scheduler.c b/drivers/gpu/drm/i915/selftests/i915_scheduler.c
> index 2bb2d3d07d06..59df7f834dad 100644
> --- a/drivers/gpu/drm/i915/selftests/i915_scheduler.c
> +++ b/drivers/gpu/drm/i915/selftests/i915_scheduler.c
> @@ -12,6 +12,40 @@
>   #include "selftests/igt_spinner.h"
>   #include "selftests/i915_random.h"
>   
> +static int mock_scheduler_slices(void *dummy)
> +{
> +	u64 min, max, normal, kernel;
> +
> +	min = prio_slice(I915_PRIORITY_MIN);
> +	pr_info("%8s slice: %lluus\n", "min", min >> 10);
> +
> +	normal = prio_slice(0);
> +	pr_info("%8s slice: %lluus\n", "normal", normal >> 10);
> +
> +	max = prio_slice(I915_PRIORITY_MAX);
> +	pr_info("%8s slice: %lluus\n", "max", max >> 10);
> +
> +	kernel = prio_slice(I915_PRIORITY_BARRIER);
> +	pr_info("%8s slice: %lluus\n", "kernel", kernel >> 10);
> +
> +	if (kernel != 0) {
> +		pr_err("kernel prio slice should be 0\n");
> +		return -EINVAL;
> +	}
> +
> +	if (max >= normal) {
> +		pr_err("maximum prio slice should be shorter than normal\n");
> +		return -EINVAL;
> +	}
> +
> +	if (min <= normal) {
> +		pr_err("minimum prio slice should be longer than normal\n");
> +		return -EINVAL;
> +	}
> +
> +	return 0;
> +}
> +
>   static int mock_skiplist_levels(void *dummy)
>   {
>   	struct i915_priolist_root root = {};
> @@ -54,6 +88,7 @@ static int mock_skiplist_levels(void *dummy)
>   int i915_scheduler_mock_selftests(void)
>   {
>   	static const struct i915_subtest tests[] = {
> +		SUBTEST(mock_scheduler_slices),
>   		SUBTEST(mock_skiplist_levels),
>   	};
>   
> @@ -556,6 +591,53 @@ static int igt_priority_chains(void *arg)
>   	return igt_schedule_chains(arg, igt_priority);
>   }
>   
> +static bool igt_deadline(struct i915_request *rq,
> +			 unsigned long v, unsigned long e)
> +{
> +	i915_request_set_deadline(rq, 0);
> +	GEM_BUG_ON(rq_deadline(rq) != 0);
> +	return true;
> +}
> +
> +static int igt_deadline_chains(void *arg)
> +{
> +	return igt_schedule_chains(arg, igt_deadline);
> +}
> +
> +static bool igt_defer(struct i915_request *rq, unsigned long v, unsigned long e)
> +{
> +	struct intel_engine_cs *engine = rq->engine;
> +	struct i915_sched *se = intel_engine_get_scheduler(engine);
> +
> +	/* XXX No generic means to unwind incomplete requests yet */
> +	if (!i915_request_in_priority_queue(rq))
> +		return false;
> +
> +	if (!intel_engine_has_preemption(engine))
> +		return false;
> +
> +	spin_lock_irq(&se->lock);
> +
> +	/* Push all the requests to the same deadline */
> +	__i915_request_set_deadline(se, rq, 0);
> +	GEM_BUG_ON(rq_deadline(rq) != 0);
> +
> +	/* Then the very first request must be the one everyone depends on */
> +	rq = list_first_entry(lookup_priolist(se, 0), typeof(*rq), sched.link);
> +	GEM_BUG_ON(rq->engine != engine);
> +
> +	/* Deferring the first request will then have to defer all requests */
> +	__i915_sched_defer_request(engine, rq);
> +
> +	spin_unlock_irq(&se->lock);
> +	return true;
> +}
> +
> +static int igt_deadline_defer(void *arg)
> +{
> +	return igt_schedule_chains(arg, igt_defer);
> +}
> +
>   static struct i915_request *
>   __write_timestamp(struct intel_engine_cs *engine,
>   		  struct drm_i915_gem_object *obj,
> @@ -767,13 +849,22 @@ static int igt_priority_cycle(void *arg)
>   	return __igt_schedule_cycle(arg, igt_priority);
>   }
>   
> +static int igt_deadline_cycle(void *arg)
> +{
> +	return __igt_schedule_cycle(arg, igt_deadline);
> +}
> +
>   int i915_scheduler_live_selftests(struct drm_i915_private *i915)
>   {
>   	static const struct i915_subtest tests[] = {
> +		SUBTEST(igt_deadline_chains),
>   		SUBTEST(igt_priority_chains),
>   
>   		SUBTEST(igt_schedule_cycle),
> +		SUBTEST(igt_deadline_cycle),
>   		SUBTEST(igt_priority_cycle),
> +
> +		SUBTEST(igt_deadline_defer),
>   	};
>   
>   	return i915_subtests(tests, i915);
> @@ -909,9 +1000,54 @@ static int sparse_priority(void *arg)
>   	return sparse(arg, set_priority);
>   }
>   
> +static u64 __set_deadline(struct i915_request *rq, u64 deadline)
> +{
> +	u64 dt;
> +
> +	preempt_disable();
> +	dt = ktime_get_raw_fast_ns();
> +	i915_request_set_deadline(rq, deadline);
> +	dt = ktime_get_raw_fast_ns() - dt;
> +	preempt_enable();
> +
> +	return dt;
> +}
> +
> +static bool set_deadline(struct i915_request *rq,
> +			 unsigned long v, unsigned long e)
> +{
> +	report("set-deadline", v, e, __set_deadline(rq, 0));
> +	return true;
> +}
> +
> +static int single_deadline(void *arg)
> +{
> +	return single(arg, set_deadline);
> +}
> +
> +static int wide_deadline(void *arg)
> +{
> +	return wide(arg, set_deadline);
> +}
> +
> +static int inv_deadline(void *arg)
> +{
> +	return inv(arg, set_deadline);
> +}
> +
> +static int sparse_deadline(void *arg)
> +{
> +	return sparse(arg, set_deadline);
> +}
> +
>   int i915_scheduler_perf_selftests(struct drm_i915_private *i915)
>   {
>   	static const struct i915_subtest tests[] = {
> +		SUBTEST(single_deadline),
> +		SUBTEST(wide_deadline),
> +		SUBTEST(inv_deadline),
> +		SUBTEST(sparse_deadline),
> +
>   		SUBTEST(single_priority),
>   		SUBTEST(wide_priority),
>   		SUBTEST(inv_priority),
> diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
> index cda0f391d965..4efc5801173c 100644
> --- a/include/uapi/drm/i915_drm.h
> +++ b/include/uapi/drm/i915_drm.h
> @@ -525,6 +525,7 @@ typedef struct drm_i915_irq_wait {
>   #define   I915_SCHEDULER_CAP_SEMAPHORES	(1ul << 3)
>   #define   I915_SCHEDULER_CAP_ENGINE_BUSY_STATS	(1ul << 4)
>   #define   I915_SCHEDULER_CAP_TIMESLICING	(1ul << 5)
> +#define   I915_SCHEDULER_CAP_FAIR	(1ul << 6)
>   
>   #define I915_PARAM_HUC_STATUS		 42
>   
> 

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 54+ messages in thread

* Re: [Intel-gfx] [PATCH 09/31] drm/i915: Replace priolist rbtree with a skiplist
  2021-02-08 12:46     ` Chris Wilson
@ 2021-02-08 15:10       ` Tvrtko Ursulin
  0 siblings, 0 replies; 54+ messages in thread
From: Tvrtko Ursulin @ 2021-02-08 15:10 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 08/02/2021 12:46, Chris Wilson wrote:
> Quoting Tvrtko Ursulin (2021-02-08 12:29:14)
>>
>> On 08/02/2021 10:52, Chris Wilson wrote:
>>> +static void remove_from_priolist(struct i915_sched *se,
>>> +                              struct i915_request *rq,
>>> +                              struct list_head *list,
>>> +                              bool tail)
>>> +{
>>> +     struct list_head *prev = rq->sched.link.prev;
>>
>> This depends on rq being at the head of it's list?
> 
> Not depends. We are testing if the list is singular, that is by removing
> this request from the i915_priolist.requests that list becomes empty,
> and so the i915_priolist can be removed from the skiplist.

Ah so obvious now, thanks.

> 
>>> +
>>> +     GEM_BUG_ON(!i915_request_in_priority_queue(rq));
>>> +
>>> +     __list_del_entry(&rq->sched.link);
>>> +     if (tail)
>>> +             list_add_tail(&rq->sched.link, list);
>>> +     else
>>> +             list_add(&rq->sched.link, list);
>>
>> So it is more move than remove(_from_priolist) ?
> 
> Yes, we can quite happily just keep the list_move(), except we then end
> up with lots of empty levels. At first I thought the walk through those
> (during dequeue) would be cheaper than removing. The max lock holdtime
> strongly favours the removal as we move requests around (which will
> happen in dribs-and-drabs) over doing a bulk remove at dequeue.

Give it a name to reflect it is a move like move_to_priolist?

> 
>>> +     /* If we just removed the last element in the old plist, delete it */
>>> +     if (list_empty(prev))
>>> +             __remove_priolist(se, prev);
>>> +}
>>> +
>>> +struct i915_priolist *__i915_sched_dequeue_next(struct i915_sched *se)
>>> +{
>>> +     struct i915_priolist * const s = &se->queue.sentinel;
>>> +     struct i915_priolist *pl = s->next[0];
>>> +     int lvl;
>>> +
>>> +     GEM_BUG_ON(!list_empty(&pl->requests));
>>
>> Lost as to why pl->requests has to be empty at this point. Considering:
>>
>> +#define i915_sched_dequeue(se, pl, rq, rn) \
>> +       for ((pl) = (se)->queue.sentinel.next[0]; \
>> +            (pl) != &(se)->queue.sentinel; \
>> +            (pl) = __i915_sched_dequeue_next(se)) \
>> +               priolist_for_each_request_safe(rq, rn, pl)
>> +
>>
>> I also don't understand what it would de-queue. Whole priolist woth of
>> requests at a time? But it can't be empty to dequeue something. And who
>> puts any unconsumed requests back on somewhere in this case.
> 
> It's a double for-loop. I think the flattening of the logic is worth it.
> 
> During dequeue, we always move the very first request onto the next list
> (i.e. i915_sched.active). Then when we have finished with all the
> requests in one priority level, we move onto the next i915_priolist
> (calling __i915_sched_dequeue_next).
> 
> So in __i915_sched_dequeue_next, we are always dealing with an empty
> i915_priolist and want to advance the start of the skiplist to the next.

Ah yes, __i915_sched_dequeue_next is only if there isn't a "goto done" 
from within the inner loop (priolist_for_each_request_safe). Well it's a 
bit fragile if someone does a break one day. But I guess bug on will be 
hit then so it's okay.

Right, I have some more questions for which I'll start a new sub-thread.

Regards,

Tvrtko

> 
> I was thinking that in order to hide the double for-loop, we could
> handle the non-empty i915_priolist case causing it to break out of the
> outer loop. So we could get rid of the goto done.
> -Chris
> 
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 54+ messages in thread

* Re: [Intel-gfx] [PATCH 09/31] drm/i915: Replace priolist rbtree with a skiplist
  2021-02-08 10:52 ` [Intel-gfx] [PATCH 09/31] drm/i915: Replace priolist rbtree with a skiplist Chris Wilson
  2021-02-08 12:29   ` Tvrtko Ursulin
@ 2021-02-08 15:23   ` Tvrtko Ursulin
  2021-02-08 16:19     ` Chris Wilson
  1 sibling, 1 reply; 54+ messages in thread
From: Tvrtko Ursulin @ 2021-02-08 15:23 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 08/02/2021 10:52, Chris Wilson wrote:
> Replace the priolist rbtree with a skiplist. The crucial difference is
> that walking and removing the first element of a skiplist is O(1), but
> O(lgN) for an rbtree, as we need to rebalance on remove. This is a
> hindrance for submission latency as it occurs between picking a request
> for the priolist and submitting it to hardware, as well effectively
> tripling the number of O(lgN) operations required under the irqoff lock.
> This is critical to reducing the latency jitter with multiple clients.
> 
> The downsides to skiplists are that lookup/insertion is only
> probabilistically O(lgN) and there is a significant memory penalty to
> as each skip node is larger than the rbtree equivalent. Furthermore, we
> don't use dynamic arrays for the skiplist, so the allocation is fixed,
> and imposes an upper bound on the scalability wrt to the number of
> inflight requests.
> 
> In the following patches, we introduce a new sort key to the scheduler,
> a virtual deadline. This imposes a different structure to the tree.
> Using a priority sort, we have very few priority levels active at any
> time, most likely just the default priority and so the rbtree degenerates
> to a single elements containing the list of all ready requests. The
> deadlines in contrast are very sparse, and typically each request has a
> unique deadline. Instead of being able to simply walk the list during
> dequeue, with the deadline scheduler we have to iterate through the bst
> on the critical submission path. Skiplists are vastly superior in this
> instance due to the O(1) iteration during dequeue, with very similar
> characteristics [on average] to the rbtree for insertion.
> 
> This means that by using skiplists we can introduce a sparse sort key
> without degrading latency on the critical submission path.
> 
> As an example, one simple case where we try to do lots of
> semi-independent work without any priority management (gem_exec_parallel),
> the lock hold times were:
> [worst]        [total]    [avg]
>   973.05     6301584.84     0.35 # plain rbtree
>   559.82     5424915.25     0.33 # best rbtree with pruning
>   208.21     3898784.09     0.24 # skiplist
>    34.05     5784106.01     0.32 # rbtree without deadlines
>    23.35     4152999.80     0.24 # skiplist without deadlines
> 
> Based on the skiplist implementation by Dr Con Kolivas for MuQSS.
> 
> References: https://en.wikipedia.org/wiki/Skip_list
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>   .../drm/i915/gt/intel_execlists_submission.c  | 168 +++++-----
>   .../gpu/drm/i915/gt/uc/intel_guc_submission.c |  41 +--
>   drivers/gpu/drm/i915/i915_priolist_types.h    |  64 +++-
>   drivers/gpu/drm/i915/i915_scheduler.c         | 304 +++++++++++++-----
>   drivers/gpu/drm/i915/i915_scheduler.h         |  16 +-
>   drivers/gpu/drm/i915/i915_scheduler_types.h   |   2 +-
>   .../drm/i915/selftests/i915_mock_selftests.h  |   1 +
>   .../gpu/drm/i915/selftests/i915_scheduler.c   |  53 ++-
>   8 files changed, 454 insertions(+), 195 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
> index 78fda9b4f626..4a0258347c10 100644
> --- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
> +++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
> @@ -254,11 +254,6 @@ static void ring_set_paused(const struct intel_engine_cs *engine, int state)
>   		wmb();
>   }
>   
> -static struct i915_priolist *to_priolist(struct rb_node *rb)
> -{
> -	return rb_entry(rb, struct i915_priolist, node);
> -}
> -
>   static int rq_prio(const struct i915_request *rq)
>   {
>   	return READ_ONCE(rq->sched.attr.priority);
> @@ -282,15 +277,27 @@ static int effective_prio(const struct i915_request *rq)
>   	return prio;
>   }
>   
> +static struct i915_request *first_request(const struct i915_sched *se)
> +{
> +	struct i915_priolist *pl = se->queue.sentinel.next[0];
> +
> +	if (pl == &se->queue.sentinel)
> +		return NULL;
> +
> +	return list_first_entry_or_null(&pl->requests,
> +					struct i915_request,
> +					sched.link);
> +}
> +
>   static int queue_prio(const struct i915_sched *se)
>   {
> -	struct rb_node *rb;
> +	struct i915_request *rq;
>   
> -	rb = rb_first_cached(&se->queue);
> -	if (!rb)
> +	rq = first_request(se);
> +	if (!rq)
>   		return INT_MIN;
>   
> -	return to_priolist(rb)->priority;
> +	return rq_prio(rq);
>   }
>   
>   static int virtual_prio(const struct intel_engine_execlists *el)
> @@ -300,7 +307,7 @@ static int virtual_prio(const struct intel_engine_execlists *el)
>   	return rb ? rb_entry(rb, struct ve_node, rb)->prio : INT_MIN;
>   }
>   
> -static bool need_preempt(const struct intel_engine_cs *engine,
> +static bool need_preempt(struct intel_engine_cs *engine,
>   			 const struct i915_request *rq)
>   {
>   	const struct i915_sched *se = &engine->sched;
> @@ -1144,7 +1151,9 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
>   	struct i915_request **port = execlists->pending;
>   	struct i915_request ** const last_port = port + execlists->port_mask;
>   	struct i915_request *last, * const *active;
> +	struct i915_request *rq, *rn;
>   	struct virtual_engine *ve;
> +	struct i915_priolist *pl;
>   	struct rb_node *rb;
>   	bool submit = false;
>   
> @@ -1355,87 +1364,79 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
>   			break;
>   	}
>   
> -	while ((rb = rb_first_cached(&se->queue))) {
> -		struct i915_priolist *p = to_priolist(rb);
> -		struct i915_request *rq, *rn;
> +	i915_sched_dequeue(se, pl, rq, rn) {
> +		bool merge = true;
>   
> -		priolist_for_each_request_consume(rq, rn, p) {
> -			bool merge = true;
> +		/*
> +		 * Can we combine this request with the current port?
> +		 * It has to be the same context/ringbuffer and not
> +		 * have any exceptions (e.g. GVT saying never to
> +		 * combine contexts).
> +		 *
> +		 * If we can combine the requests, we can execute both
> +		 * by updating the RING_TAIL to point to the end of the
> +		 * second request, and so we never need to tell the
> +		 * hardware about the first.
> +		 */
> +		if (last && !can_merge_rq(last, rq)) {
> +			/*
> +			 * If we are on the second port and cannot
> +			 * combine this request with the last, then we
> +			 * are done.
> +			 */
> +			if (port == last_port)
> +				goto done;
>   
>   			/*
> -			 * Can we combine this request with the current port?
> -			 * It has to be the same context/ringbuffer and not
> -			 * have any exceptions (e.g. GVT saying never to
> -			 * combine contexts).
> -			 *
> -			 * If we can combine the requests, we can execute both
> -			 * by updating the RING_TAIL to point to the end of the
> -			 * second request, and so we never need to tell the
> -			 * hardware about the first.
> +			 * We must not populate both ELSP[] with the
> +			 * same LRCA, i.e. we must submit 2 different
> +			 * contexts if we submit 2 ELSP.
>   			 */
> -			if (last && !can_merge_rq(last, rq)) {
> -				/*
> -				 * If we are on the second port and cannot
> -				 * combine this request with the last, then we
> -				 * are done.
> -				 */
> -				if (port == last_port)
> -					goto done;
> +			if (last->context == rq->context)
> +				goto done;
>   
> -				/*
> -				 * We must not populate both ELSP[] with the
> -				 * same LRCA, i.e. we must submit 2 different
> -				 * contexts if we submit 2 ELSP.
> -				 */
> -				if (last->context == rq->context)
> -					goto done;
> +			if (i915_request_has_sentinel(last))
> +				goto done;
>   
> -				if (i915_request_has_sentinel(last))
> -					goto done;
> +			/*
> +			 * We avoid submitting virtual requests into
> +			 * the secondary ports so that we can migrate
> +			 * the request immediately to another engine
> +			 * rather than wait for the primary request.
> +			 */
> +			if (rq->execution_mask != engine->mask)
> +				goto done;
>   
> -				/*
> -				 * We avoid submitting virtual requests into
> -				 * the secondary ports so that we can migrate
> -				 * the request immediately to another engine
> -				 * rather than wait for the primary request.
> -				 */
> -				if (rq->execution_mask != engine->mask)
> -					goto done;
> +			/*
> +			 * If GVT overrides us we only ever submit
> +			 * port[0], leaving port[1] empty. Note that we
> +			 * also have to be careful that we don't queue
> +			 * the same context (even though a different
> +			 * request) to the second port.
> +			 */
> +			if (ctx_single_port_submission(last->context) ||
> +			    ctx_single_port_submission(rq->context))
> +				goto done;
>   
> -				/*
> -				 * If GVT overrides us we only ever submit
> -				 * port[0], leaving port[1] empty. Note that we
> -				 * also have to be careful that we don't queue
> -				 * the same context (even though a different
> -				 * request) to the second port.
> -				 */
> -				if (ctx_single_port_submission(last->context) ||
> -				    ctx_single_port_submission(rq->context))
> -					goto done;
> -
> -				merge = false;
> -			}
> -
> -			if (__i915_request_submit(rq)) {
> -				if (!merge) {
> -					*port++ = i915_request_get(last);
> -					last = NULL;
> -				}
> -
> -				GEM_BUG_ON(last &&
> -					   !can_merge_ctx(last->context,
> -							  rq->context));
> -				GEM_BUG_ON(last &&
> -					   i915_seqno_passed(last->fence.seqno,
> -							     rq->fence.seqno));
> -
> -				submit = true;
> -				last = rq;
> -			}
> +			merge = false;
>   		}
>   
> -		rb_erase_cached(&p->node, &se->queue);
> -		i915_priolist_free(p);
> +		if (__i915_request_submit(rq)) {
> +			if (!merge) {
> +				*port++ = i915_request_get(last);
> +				last = NULL;
> +			}
> +
> +			GEM_BUG_ON(last &&
> +				   !can_merge_ctx(last->context,
> +						  rq->context));
> +			GEM_BUG_ON(last &&
> +				   i915_seqno_passed(last->fence.seqno,
> +						     rq->fence.seqno));
> +
> +			submit = true;
> +			last = rq;
> +		}
>   	}
>   done:
>   	*port++ = i915_request_get(last);
> @@ -1456,7 +1457,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
>   	 * request triggering preemption on the next dequeue (or subsequent
>   	 * interrupt for secondary ports).
>   	 */
> -	execlists->queue_priority_hint = queue_prio(se);
> +	execlists->queue_priority_hint = pl->priority;
>   	spin_unlock(&se->lock);
>   
>   	/*
> @@ -2716,7 +2717,6 @@ static void execlists_reset_cancel(struct intel_engine_cs *engine)
>   	}
>   
>   	execlists->queue_priority_hint = INT_MIN;
> -	se->queue = RB_ROOT_CACHED;
>   
>   	GEM_BUG_ON(__tasklet_is_enabled(&se->tasklet));
>   	se->tasklet.callback = nop_submission_tasklet;
> @@ -3173,6 +3173,8 @@ static void virtual_context_exit(struct intel_context *ce)
>   
>   	for (n = 0; n < ve->num_siblings; n++)
>   		intel_engine_pm_put(ve->siblings[n]);
> +
> +	i915_sched_park(intel_engine_get_scheduler(&ve->base));
>   }
>   
>   static const struct intel_context_ops virtual_context_ops = {
> diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
> index d14b9db77df8..c16393df42a0 100644
> --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
> +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
> @@ -60,11 +60,6 @@
>   
>   #define GUC_REQUEST_SIZE 64 /* bytes */
>   
> -static inline struct i915_priolist *to_priolist(struct rb_node *rb)
> -{
> -	return rb_entry(rb, struct i915_priolist, node);
> -}
> -
>   static struct guc_stage_desc *__get_stage_desc(struct intel_guc *guc, u32 id)
>   {
>   	struct guc_stage_desc *base = guc->stage_desc_pool_vaddr;
> @@ -186,9 +181,10 @@ static void __guc_dequeue(struct intel_engine_cs *engine)
>   	struct i915_request **first = execlists->inflight;
>   	struct i915_request ** const last_port = first + execlists->port_mask;
>   	struct i915_request *last = first[0];
> +	struct i915_request *rq, *rn;
>   	struct i915_request **port;
> +	struct i915_priolist *pl;
>   	bool submit = false;
> -	struct rb_node *rb;
>   
>   	lockdep_assert_held(&se->lock);
>   
> @@ -205,32 +201,22 @@ static void __guc_dequeue(struct intel_engine_cs *engine)
>   	 * event.
>   	 */
>   	port = first;
> -	while ((rb = rb_first_cached(&se->queue))) {
> -		struct i915_priolist *p = to_priolist(rb);
> -		struct i915_request *rq, *rn;
> +	i915_sched_dequeue(se, pl, rq, rn) {
> +		if (last && rq->context != last->context) {
> +			if (port == last_port)
> +				goto done;
>   
> -		priolist_for_each_request_consume(rq, rn, p) {
> -			if (last && rq->context != last->context) {
> -				if (port == last_port)
> -					goto done;
> -
> -				*port = schedule_in(last,
> -						    port - execlists->inflight);
> -				port++;
> -			}
> -
> -			list_del_init(&rq->sched.link);
> -			__i915_request_submit(rq);
> -			submit = true;
> -			last = rq;
> +			*port = schedule_in(last, port - execlists->inflight);
> +			port++;
>   		}
>   
> -		rb_erase_cached(&p->node, &se->queue);
> -		i915_priolist_free(p);
> +		list_del_init(&rq->sched.link);
> +		__i915_request_submit(rq);
> +		submit = true;
> +		last = rq;
>   	}
>   done:
> -	execlists->queue_priority_hint =
> -		rb ? to_priolist(rb)->priority : INT_MIN;
> +	execlists->queue_priority_hint = pl->priority;
>   	if (submit) {
>   		*port = schedule_in(last, port - execlists->inflight);
>   		*++port = NULL;
> @@ -361,7 +347,6 @@ static void guc_reset_cancel(struct intel_engine_cs *engine)
>   	__i915_sched_cancel_queue(se);
>   
>   	execlists->queue_priority_hint = INT_MIN;
> -	se->queue = RB_ROOT_CACHED;
>   
>   	spin_unlock_irqrestore(&se->lock, flags);
>   	intel_engine_signal_breadcrumbs(engine);
> diff --git a/drivers/gpu/drm/i915/i915_priolist_types.h b/drivers/gpu/drm/i915/i915_priolist_types.h
> index bc2fa84f98a8..ee7482b9c813 100644
> --- a/drivers/gpu/drm/i915/i915_priolist_types.h
> +++ b/drivers/gpu/drm/i915/i915_priolist_types.h
> @@ -38,10 +38,72 @@ enum {
>   #define I915_PRIORITY_UNPREEMPTABLE INT_MAX
>   #define I915_PRIORITY_BARRIER (I915_PRIORITY_UNPREEMPTABLE - 1)
>   
> +/*
> + * The slab returns power-of-two chunks of memory, so fill out the
> + * node to the next cacheline.
> + *
> + * We can estimate how many requests the skiplist will scale to based
> + * on its height:
> + *   11 =>  4 million requests
> + *   12 => 16 million requests
> + */
> +#ifdef CONFIG_64BIT
> +#define I915_PRIOLIST_HEIGHT 12
> +#else
> +#define I915_PRIOLIST_HEIGHT 11
> +#endif
> +
> +/*
> + * i915_priolist forms a skiplist. The skiplist is built in layers,
> + * starting at the base [0] is a singly linked list of all i915_priolist.
> + * Each higher layer contains a fraction of the i915_priolist from the
> + * previous layer:
> + *
> + * S[0] 0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF S
> + * E[1] >1>3>5>7>9>B>D>F>1>3>5>7>9>B>D>F>1>3>5>7>9>B>D>F>1>3>5>7>9>B>D>F E
> + * N[2] -->3-->7-->B-->F-->3-->7-->B-->F-->3-->7-->B-->F-->3-->7-->B-->F N
> + * T[3] ------>7------>F-------7------>F------>7------>F------>7------>F T
> + * I[4] -------------->F-------------->F-------------->F-------------->F I
> + * N[5] ------------------------------>F------------------------------>F N
> + * E[6] ------------------------------>F-------------------------------> E
> + * L[7] ---------------------------------------------------------------> L
> + *
> + * To iterate through all active i915_priolist, we only need to follow
> + * the chain in i915_priolist.next[0] (see for_each_priolist()).
> + *
> + * To quickly find a specific key (or insert point), we can perform a binary
> + * search by starting at the highest level and following the linked list
> + * at that level until we either find the node, or have gone passed the key.
> + * Then we descend a level, and start walking the list again starting from
> + * the current position, until eventually we find our key, or we run out of
> + * levels.
> + *
> + * https://en.wikipedia.org/wiki/Skip_list
> + */
>   struct i915_priolist {
>   	struct list_head requests;
> -	struct rb_node node;
>   	int priority;
> +
> +	int level;
> +	struct i915_priolist *next[I915_PRIOLIST_HEIGHT];
>   };
>   
> +struct i915_priolist_root {
> +	struct i915_priolist sentinel;
> +	u32 prng;
> +};
> +
> +#define i915_priolist_is_empty(root) ((root)->sentinel.level < 0)
> +
> +#define for_each_priolist(p, root) \
> +	for ((p) = (root)->sentinel.next[0]; \
> +	     (p) != &(root)->sentinel; \
> +	     (p) = (p)->next[0])
> +
> +#define priolist_for_each_request(it, plist) \
> +	list_for_each_entry(it, &(plist)->requests, sched.link)
> +
> +#define priolist_for_each_request_safe(it, n, plist) \
> +	list_for_each_entry_safe(it, n, &(plist)->requests, sched.link)
> +
>   #endif /* _I915_PRIOLIST_TYPES_H_ */
> diff --git a/drivers/gpu/drm/i915/i915_scheduler.c b/drivers/gpu/drm/i915/i915_scheduler.c
> index 312e1538d001..518eac67959e 100644
> --- a/drivers/gpu/drm/i915/i915_scheduler.c
> +++ b/drivers/gpu/drm/i915/i915_scheduler.c
> @@ -4,7 +4,9 @@
>    * Copyright © 2018 Intel Corporation
>    */
>   
> +#include <linux/bitops.h>
>   #include <linux/mutex.h>
> +#include <linux/prandom.h>
>   
>   #include "gt/intel_ring.h"
>   #include "gt/intel_lrc_reg.h"
> @@ -168,6 +170,16 @@ void i915_sched_select_mode(struct i915_sched *se, enum i915_sched_mode mode)
>   	}
>   }
>   
> +static void init_priolist(struct i915_priolist_root *const root)
> +{
> +	struct i915_priolist *pl = &root->sentinel;
> +
> +	memset_p((void **)pl->next, pl, ARRAY_SIZE(pl->next));
> +	pl->requests.prev = NULL;
> +	pl->priority = INT_MIN;
> +	pl->level = -1;
> +}
> +
>   void i915_sched_init(struct i915_sched *se,
>   		     struct device *dev,
>   		     const char *name,
> @@ -183,9 +195,9 @@ void i915_sched_init(struct i915_sched *se,
>   
>   	se->mask = mask;
>   
> +	init_priolist(&se->queue);
>   	INIT_LIST_HEAD(&se->requests);
>   	INIT_LIST_HEAD(&se->hold);
> -	se->queue = RB_ROOT_CACHED;
>   
>   	init_ipi(&se->ipi);
>   
> @@ -194,8 +206,60 @@ void i915_sched_init(struct i915_sched *se,
>   	se->revoke_context = i915_sched_default_revoke_context;
>   }
>   
> +__maybe_unused static bool priolist_idle(struct i915_priolist_root *root)
> +{
> +	struct i915_priolist *pl = &root->sentinel;
> +	int lvl;
> +
> +	for (lvl = 0; lvl < ARRAY_SIZE(pl->next); lvl++) {
> +		if (pl->next[lvl] != pl) {
> +			GEM_TRACE_ERR("root[%d] is not empty\n", lvl);
> +			return false;
> +		}
> +	}
> +
> +	if (pl->level != -1) {
> +		GEM_TRACE_ERR("root is not clear: %d\n", pl->level);
> +		return false;
> +	}
> +
> +	return true;
> +}
> +
> +static bool pl_empty(struct list_head *st)
> +{
> +	return !st->prev;
> +}
> +
> +static void pl_push(struct i915_priolist *pl, struct list_head *st)
> +{
> +	/* Keep list_empty(&pl->requests) valid for concurrent readers */
> +	pl->requests.prev = st->prev;
> +	st->prev = &pl->requests;
> +	GEM_BUG_ON(pl_empty(st));
> +}
> +
> +static struct i915_priolist *pl_pop(struct list_head *st)
> +{
> +	struct i915_priolist *pl;
> +
> +	GEM_BUG_ON(pl_empty(st));
> +	pl = container_of(st->prev, typeof(*pl), requests);
> +	st->prev = pl->requests.prev;
> +
> +	return pl;
> +}
> +
>   void i915_sched_park(struct i915_sched *se)
>   {
> +	struct i915_priolist_root *root = &se->queue;
> +	struct list_head *list = &root->sentinel.requests;
> +
> +	GEM_BUG_ON(!priolist_idle(root));
> +
> +	while (!pl_empty(list))
> +		kmem_cache_free(global.slab_priorities, pl_pop(list));
> +
>   	GEM_BUG_ON(!i915_sched_is_idle(se));
>   	se->no_priolist = false;
>   }
> @@ -251,70 +315,71 @@ static inline bool node_signaled(const struct i915_sched_node *node)
>   	return i915_request_completed(node_to_request(node));
>   }
>   
> -static inline struct i915_priolist *to_priolist(struct rb_node *rb)
> +static inline unsigned int random_level(struct i915_priolist_root *root)
>   {
> -	return rb_entry(rb, struct i915_priolist, node);
> -}
> -
> -static void assert_priolists(struct i915_sched * const se)
> -{
> -	struct rb_node *rb;
> -	long last_prio;
> -
> -	if (!IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
> -		return;
> -
> -	GEM_BUG_ON(rb_first_cached(&se->queue) !=
> -		   rb_first(&se->queue.rb_root));
> -
> -	last_prio = INT_MAX;
> -	for (rb = rb_first_cached(&se->queue); rb; rb = rb_next(rb)) {
> -		const struct i915_priolist *p = to_priolist(rb);
> -
> -		GEM_BUG_ON(p->priority > last_prio);
> -		last_prio = p->priority;
> -	}
> +	/*
> +	 * Given a uniform distribution of random numbers over the u32, then
> +	 * the probability each bit being unset is P=0.5. The probability of a
> +	 * successive sequence of bits being unset is P(n) = 0.5^n [n > 0].
> +	 *   P(level:1) = 0.5
> +	 *   P(level:2) = 0.25
> +	 *   P(level:3) = 0.125
> +	 *   P(level:4) = 0.0625
> +	 *   ...
> +	 * So we can use ffs() on a good random number generator to pick our
> +	 * level. We divide by two to reduce the probability of choosing a
> +	 * level to .25, as the cost of descending a level is the same as
> +	 * following an extra link in the chain at that level (so we can
> +	 * pack more nodes into fewer levels without incurring extra cost,
> +	 * and allow scaling to higher volumes of requests without expanding
> +	 * the height of the skiplist).
> +	 */
> +	root->prng = next_pseudo_random32(root->prng);
> +	return  __ffs(root->prng) / 2;
>   }
>   
>   static struct list_head *
>   lookup_priolist(struct i915_sched *se, int prio)
>   {
> -	struct i915_priolist *p;
> -	struct rb_node **parent, *rb;
> -	bool first = true;
> +	struct i915_priolist *update[I915_PRIOLIST_HEIGHT];
> +	struct i915_priolist_root *const root = &se->queue;
> +	struct i915_priolist *pl, *tmp;
> +	int lvl;
>   
>   	lockdep_assert_held(&se->lock);
> -	assert_priolists(se);
> -
>   	if (unlikely(se->no_priolist))
>   		prio = I915_PRIORITY_NORMAL;
>   
> +	for_each_priolist(pl, root) { /* recycle any empty elements before us */
> +		if (pl->priority <= prio || !list_empty(&pl->requests))
> +			break;

This less part of the less-or-equal condition keeps confusing me as a 
break criteria. If premise is cleaning up, why break on first smaller 
prio? Would the idea be to prune all empty lists up to, not including, 
the lookup prio?

> +
> +		__i915_sched_dequeue_next(se);
> +	}
> +
>   find_priolist:
> -	/* most positive priority is scheduled first, equal priorities fifo */
> -	rb = NULL;
> -	parent = &se->queue.rb_root.rb_node;
> -	while (*parent) {
> -		rb = *parent;
> -		p = to_priolist(rb);
> -		if (prio > p->priority) {
> -			parent = &rb->rb_left;
> -		} else if (prio < p->priority) {
> -			parent = &rb->rb_right;
> -			first = false;
> -		} else {
> -			return &p->requests;
> -		}
> +	pl = &root->sentinel;
> +	lvl = pl->level;
> +	while (lvl >= 0) {
> +		while (tmp = pl->next[lvl], tmp->priority >= prio)
> +			pl = tmp;
> +		if (pl->priority == prio)
> +			goto out;
> +		update[lvl--] = pl;
>   	}
>   
>   	if (prio == I915_PRIORITY_NORMAL) {
> -		p = &se->default_priolist;
> +		pl = &se->default_priolist;
> +	} else if (!pl_empty(&root->sentinel.requests)) {
> +		pl = pl_pop(&root->sentinel.requests);
>   	} else {
> -		p = kmem_cache_alloc(global.slab_priorities, GFP_ATOMIC);
> +		pl = kmem_cache_alloc(global.slab_priorities, GFP_ATOMIC);
>   		/* Convert an allocation failure to a priority bump */
> -		if (unlikely(!p)) {
> +		if (unlikely(!pl)) {
>   			prio = I915_PRIORITY_NORMAL; /* recurses just once */
>   
> -			/* To maintain ordering with all rendering, after an
> +			/*
> +			 * To maintain ordering with all rendering, after an
>   			 * allocation failure we have to disable all scheduling.
>   			 * Requests will then be executed in fifo, and schedule
>   			 * will ensure that dependencies are emitted in fifo.
> @@ -327,18 +392,123 @@ lookup_priolist(struct i915_sched *se, int prio)
>   		}
>   	}
>   
> -	p->priority = prio;
> -	INIT_LIST_HEAD(&p->requests);
> +	pl->priority = prio;
> +	INIT_LIST_HEAD(&pl->requests);
>   
> -	rb_link_node(&p->node, rb, parent);
> -	rb_insert_color_cached(&p->node, &se->queue, first);
> +	lvl = random_level(root);
> +	if (lvl > root->sentinel.level) {
> +		if (root->sentinel.level < I915_PRIOLIST_HEIGHT - 1) {
> +			lvl = ++root->sentinel.level;
> +			update[lvl] = &root->sentinel;
> +		} else {
> +			lvl = I915_PRIOLIST_HEIGHT - 1;
> +		}
> +	}
> +	GEM_BUG_ON(lvl < 0);
> +	GEM_BUG_ON(lvl >= ARRAY_SIZE(pl->next));
>   
> -	return &p->requests;
> +	pl->level = lvl;
> +	do {
> +		tmp = update[lvl];
> +		pl->next[lvl] = tmp->next[lvl];
> +		tmp->next[lvl] = pl;
> +	} while (--lvl >= 0);
> +
> +	if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) {
> +		struct i915_priolist *chk;
> +
> +		chk = &root->sentinel;
> +		lvl = chk->level;
> +		do {
> +			while (tmp = chk->next[lvl], tmp->priority >= prio)
> +				chk = tmp;
> +		} while (--lvl >= 0);
> +
> +		GEM_BUG_ON(chk != pl);
> +	}
> +
> +out:
> +	GEM_BUG_ON(pl == &root->sentinel);
> +	return &pl->requests;
>   }
>   
> -void __i915_priolist_free(struct i915_priolist *p)
> +static void __remove_priolist(struct i915_sched *se, struct list_head *plist)
>   {
> -	kmem_cache_free(global.slab_priorities, p);
> +	struct i915_priolist_root *root = &se->queue;
> +	struct i915_priolist *pl, *tmp;
> +	struct i915_priolist *old =
> +		container_of(plist, struct i915_priolist, requests);
> +	int prio = old->priority;
> +	int lvl;
> +
> +	lockdep_assert_held(&se->lock);
> +	GEM_BUG_ON(!list_empty(plist));
> +
> +	pl = &root->sentinel;
> +	lvl = pl->level;
> +	GEM_BUG_ON(lvl < 0);
> +
> +	if (prio != I915_PRIORITY_NORMAL)
> +		pl_push(old, &pl->requests);
> +
> +	do {
> +		while (tmp = pl->next[lvl], tmp->priority > prio)
> +			pl = tmp;
> +		if (lvl <= old->level) {
> +			pl->next[lvl] = old->next[lvl];
> +			if (pl == &root->sentinel && old->next[lvl] == pl) {
> +				GEM_BUG_ON(pl->level != lvl);
> +				pl->level--;
> +			}
> +		}
> +	} while (--lvl >= 0);
> +	GEM_BUG_ON(tmp != old);
> +}
> +
> +static void remove_from_priolist(struct i915_sched *se,
> +				 struct i915_request *rq,
> +				 struct list_head *list,
> +				 bool tail)
> +{
> +	struct list_head *prev = rq->sched.link.prev;
> +
> +	GEM_BUG_ON(!i915_request_in_priority_queue(rq));
> +
> +	__list_del_entry(&rq->sched.link);
> +	if (tail)
> +		list_add_tail(&rq->sched.link, list);
> +	else
> +		list_add(&rq->sched.link, list);
> +
> +	/* If we just removed the last element in the old plist, delete it */
> +	if (list_empty(prev))
> +		__remove_priolist(se, prev);
> +}
> +
> +struct i915_priolist *__i915_sched_dequeue_next(struct i915_sched *se)
> +{
> +	struct i915_priolist * const s = &se->queue.sentinel;
> +	struct i915_priolist *pl = s->next[0];
> +	int lvl;
> +
> +	GEM_BUG_ON(!list_empty(&pl->requests));
> +	GEM_BUG_ON(pl == s);
> +
> +	/* Keep pl->next[0] valid for for_each_priolist iteration */
> +	if (pl->priority != I915_PRIORITY_NORMAL)
> +		pl_push(pl, &s->requests);
> +
> +	lvl = pl->level;
> +	GEM_BUG_ON(lvl < 0);
> +	do {
> +		s->next[lvl] = pl->next[lvl];
> +		if (pl->next[lvl] == s) {
> +			GEM_BUG_ON(s->level != lvl);
> +			s->level--;
> +		}
> +	} while (--lvl >= 0);
> +
> +	return pl->next[0];
>   }

If both __i915_sched_dequeue_next and __remove_priolist are removing an 
empty list from the hieararchy, why can't they shared some code?

Regards,

Tvrtko

>   
>   static struct i915_request *
> @@ -491,7 +661,7 @@ static void __i915_request_set_priority(struct i915_request *rq, int prio)
>   
>   		GEM_BUG_ON(rq->engine != engine);
>   		if (i915_request_in_priority_queue(rq))
> -			list_move_tail(&rq->sched.link, plist);
> +			remove_from_priolist(se, rq, plist, true);
>   
>   		/* Defer (tasklet) submission until after all updates. */
>   		kick_submission(engine, rq, prio);
> @@ -627,8 +797,7 @@ void __i915_sched_defer_request(struct intel_engine_cs *engine,
>   
>   		/* Note list is reversed for waiters wrt signal hierarchy */
>   		GEM_BUG_ON(rq->engine != engine);
> -		GEM_BUG_ON(!i915_request_in_priority_queue(rq));
> -		list_move(&rq->sched.link, &dfs);
> +		remove_from_priolist(se, rq, &dfs, false);
>   
>   		/* Track our visit, and prevent duplicate processing */
>   		clear_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags);
> @@ -927,7 +1096,7 @@ void i915_sched_resume_request(struct intel_engine_cs *engine,
>   void __i915_sched_cancel_queue(struct i915_sched *se)
>   {
>   	struct i915_request *rq, *rn;
> -	struct rb_node *rb;
> +	struct i915_priolist *pl;
>   
>   	lockdep_assert_held(&se->lock);
>   
> @@ -936,16 +1105,9 @@ void __i915_sched_cancel_queue(struct i915_sched *se)
>   		i915_request_put(i915_request_mark_eio(rq));
>   
>   	/* Flush the queued requests to the timeline list (for retiring). */
> -	while ((rb = rb_first_cached(&se->queue))) {
> -		struct i915_priolist *p = to_priolist(rb);
> -
> -		priolist_for_each_request_consume(rq, rn, p) {
> -			i915_request_put(i915_request_mark_eio(rq));
> -			__i915_request_submit(rq);
> -		}
> -
> -		rb_erase_cached(&p->node, &se->queue);
> -		i915_priolist_free(p);
> +	i915_sched_dequeue(se, pl, rq, rn) {
> +		i915_request_put(i915_request_mark_eio(rq));
> +		__i915_request_submit(rq);
>   	}
>   	GEM_BUG_ON(!i915_sched_is_idle(se));
>   
> @@ -1225,9 +1387,9 @@ void i915_sched_show(struct drm_printer *m,
>   		     unsigned int max)
>   {
>   	const struct i915_request *rq, *last;
> +	struct i915_priolist *pl;
>   	unsigned long flags;
>   	unsigned int count;
> -	struct rb_node *rb;
>   
>   	rcu_read_lock();
>   	spin_lock_irqsave(&se->lock, flags);
> @@ -1282,10 +1444,8 @@ void i915_sched_show(struct drm_printer *m,
>   
>   	last = NULL;
>   	count = 0;
> -	for (rb = rb_first_cached(&se->queue); rb; rb = rb_next(rb)) {
> -		struct i915_priolist *p = rb_entry(rb, typeof(*p), node);
> -
> -		priolist_for_each_request(rq, p) {
> +	for_each_priolist(pl, &se->queue) {
> +		priolist_for_each_request(rq, pl) {
>   			if (count++ < max - 1)
>   				show_request(m, rq, "\t", 0);
>   			else
> diff --git a/drivers/gpu/drm/i915/i915_scheduler.h b/drivers/gpu/drm/i915/i915_scheduler.h
> index fe392109b112..872d221f6ba7 100644
> --- a/drivers/gpu/drm/i915/i915_scheduler.h
> +++ b/drivers/gpu/drm/i915/i915_scheduler.h
> @@ -24,12 +24,6 @@ struct intel_engine_cs;
>   		  ##__VA_ARGS__);					\
>   } while (0)
>   
> -#define priolist_for_each_request(it, plist) \
> -	list_for_each_entry(it, &(plist)->requests, sched.link)
> -
> -#define priolist_for_each_request_consume(it, n, plist) \
> -	list_for_each_entry_safe(it, n, &(plist)->requests, sched.link)
> -
>   void i915_sched_node_init(struct i915_sched_node *node);
>   void i915_sched_node_reinit(struct i915_sched_node *node);
>   
> @@ -100,7 +94,7 @@ static inline void i915_priolist_free(struct i915_priolist *p)
>   
>   static inline bool i915_sched_is_idle(const struct i915_sched *se)
>   {
> -	return RB_EMPTY_ROOT(&se->queue.rb_root);
> +	return i915_priolist_is_empty(&se->queue);
>   }
>   
>   static inline bool
> @@ -168,6 +162,14 @@ i915_sched_get_active_request(const struct i915_sched *se)
>   	return NULL;
>   }
>   
> +/* Walk the scheduler queue of requests (in submission order) and remove them */
> +struct i915_priolist *__i915_sched_dequeue_next(struct i915_sched *se);
> +#define i915_sched_dequeue(se, pl, rq, rn) \
> +	for ((pl) = (se)->queue.sentinel.next[0]; \
> +	     (pl) != &(se)->queue.sentinel; \
> +	     (pl) = __i915_sched_dequeue_next(se)) \
> +		priolist_for_each_request_safe(rq, rn, pl)
> +
>   void i915_request_show_with_schedule(struct drm_printer *m,
>   				     const struct i915_request *rq,
>   				     const char *prefix,
> diff --git a/drivers/gpu/drm/i915/i915_scheduler_types.h b/drivers/gpu/drm/i915/i915_scheduler_types.h
> index 5ca2dc1b4fb5..bc668f375097 100644
> --- a/drivers/gpu/drm/i915/i915_scheduler_types.h
> +++ b/drivers/gpu/drm/i915/i915_scheduler_types.h
> @@ -115,7 +115,7 @@ struct i915_sched {
>   	 * @queue is only used to transfer requests from the scheduler
>   	 * frontend to the back.
>   	 */
> -	struct rb_root_cached queue;
> +	struct i915_priolist_root queue;
>   
>   	/**
>   	 * @tasklet: softirq tasklet for bottom half
> diff --git a/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h b/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h
> index 3db34d3eea58..946c93441c1f 100644
> --- a/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h
> +++ b/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h
> @@ -25,6 +25,7 @@ selftest(ring, intel_ring_mock_selftests)
>   selftest(engine, intel_engine_cs_mock_selftests)
>   selftest(timelines, intel_timeline_mock_selftests)
>   selftest(requests, i915_request_mock_selftests)
> +selftest(scheduler, i915_scheduler_mock_selftests)
>   selftest(objects, i915_gem_object_mock_selftests)
>   selftest(phys, i915_gem_phys_mock_selftests)
>   selftest(dmabuf, i915_gem_dmabuf_mock_selftests)
> diff --git a/drivers/gpu/drm/i915/selftests/i915_scheduler.c b/drivers/gpu/drm/i915/selftests/i915_scheduler.c
> index f54bdbeaa48b..2bb2d3d07d06 100644
> --- a/drivers/gpu/drm/i915/selftests/i915_scheduler.c
> +++ b/drivers/gpu/drm/i915/selftests/i915_scheduler.c
> @@ -12,6 +12,54 @@
>   #include "selftests/igt_spinner.h"
>   #include "selftests/i915_random.h"
>   
> +static int mock_skiplist_levels(void *dummy)
> +{
> +	struct i915_priolist_root root = {};
> +	struct i915_priolist *pl = &root.sentinel;
> +	IGT_TIMEOUT(end_time);
> +	unsigned long total;
> +	int count, lvl;
> +
> +	total = 0;
> +	do {
> +		for (count = 0; count < 16384; count++) {
> +			lvl = random_level(&root);
> +			if (lvl > pl->level) {
> +				if (lvl < I915_PRIOLIST_HEIGHT - 1)
> +					lvl = ++pl->level;
> +				else
> +					lvl = I915_PRIOLIST_HEIGHT - 1;
> +			}
> +
> +			pl->next[lvl] = ptr_inc(pl->next[lvl]);
> +		}
> +		total += count;
> +	} while (!__igt_timeout(end_time, NULL));
> +
> +	pr_info("Total %9lu\n", total);
> +	for (lvl = 0; lvl <= pl->level; lvl++) {
> +		int x = ilog2((unsigned long)pl->next[lvl]);
> +		char row[80];
> +
> +		memset(row, '*', x);
> +		row[x] = '\0';
> +
> +		pr_info(" [%2d] %9lu %s\n",
> +			lvl, (unsigned long)pl->next[lvl], row);
> +	}
> +
> +	return 0;
> +}
> +
> +int i915_scheduler_mock_selftests(void)
> +{
> +	static const struct i915_subtest tests[] = {
> +		SUBTEST(mock_skiplist_levels),
> +	};
> +
> +	return i915_subtests(tests, NULL);
> +}
> +
>   static void scheduling_disable(struct intel_engine_cs *engine)
>   {
>   	engine->props.preempt_timeout_ms = 0;
> @@ -80,9 +128,9 @@ static int all_engines(struct drm_i915_private *i915,
>   static bool check_context_order(struct i915_sched *se)
>   {
>   	u64 last_seqno, last_context;
> +	struct i915_priolist *p;
>   	unsigned long count;
>   	bool result = false;
> -	struct rb_node *rb;
>   	int last_prio;
>   
>   	/* We expect the execution order to follow ascending fence-context */
> @@ -92,8 +140,7 @@ static bool check_context_order(struct i915_sched *se)
>   	last_context = 0;
>   	last_seqno = 0;
>   	last_prio = 0;
> -	for (rb = rb_first_cached(&se->queue); rb; rb = rb_next(rb)) {
> -		struct i915_priolist *p = rb_entry(rb, typeof(*p), node);
> +	for_each_priolist(p, &se->queue) {
>   		struct i915_request *rq;
>   
>   		priolist_for_each_request(rq, p) {
> 
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 54+ messages in thread

* Re: [Intel-gfx] [PATCH 10/31] drm/i915: Fair low-latency scheduling
  2021-02-08 14:56   ` Tvrtko Ursulin
@ 2021-02-08 15:29     ` Chris Wilson
  2021-02-08 16:03       ` Tvrtko Ursulin
  0 siblings, 1 reply; 54+ messages in thread
From: Chris Wilson @ 2021-02-08 15:29 UTC (permalink / raw)
  To: Tvrtko Ursulin, intel-gfx

Quoting Tvrtko Ursulin (2021-02-08 14:56:31)
> On 08/02/2021 10:52, Chris Wilson wrote:
> > +static bool need_preempt(const struct intel_engine_cs *engine,
> >                        const struct i915_request *rq)
> >   {
> >       const struct i915_sched *se = &engine->sched;
> > -     int last_prio;
> > +     const struct i915_request *first = NULL;
> > +     const struct i915_request *next;
> >   
> >       if (!i915_sched_use_busywait(se))
> >               return false;
> >   
> >       /*
> > -      * Check if the current priority hint merits a preemption attempt.
> > -      *
> > -      * We record the highest value priority we saw during rescheduling
> > -      * prior to this dequeue, therefore we know that if it is strictly
> > -      * less than the current tail of ESLP[0], we do not need to force
> > -      * a preempt-to-idle cycle.
> > -      *
> > -      * However, the priority hint is a mere hint that we may need to
> > -      * preempt. If that hint is stale or we may be trying to preempt
> > -      * ourselves, ignore the request.
> > -      *
> > -      * More naturally we would write
> > -      *      prio >= max(0, last);
> > -      * except that we wish to prevent triggering preemption at the same
> > -      * priority level: the task that is running should remain running
> > -      * to preserve FIFO ordering of dependencies.
> > +      * If this request is special and must not be interrupted at any
> > +      * cost, so be it. Note we are only checking the most recent request
> > +      * in the context and so may be masking an earlier vip request. It
> > +      * is hoped that under the conditions where nopreempt is used, this
> > +      * will not matter (i.e. all requests to that context will be
> > +      * nopreempt for as long as desired).
> >        */
> > -     last_prio = max(effective_prio(rq), I915_PRIORITY_NORMAL - 1);
> > -     if (engine->execlists.queue_priority_hint <= last_prio)
> > +     if (i915_request_has_nopreempt(rq))
> >               return false;
> >   
> >       /*
> >        * Check against the first request in ELSP[1], it will, thanks to the
> >        * power of PI, be the highest priority of that context.
> >        */
> > -     if (!list_is_last(&rq->sched.link, &se->requests) &&
> > -         rq_prio(list_next_entry(rq, sched.link)) > last_prio)
> > -             return true;
> > +     next = next_elsp_request(se, rq);
> > +     if (dl_before(next, first))
> 
> Here first is always NULL so dl_before always returns true, meaning it 
> appears redundant to call it.

I was applying a pattern :)

> 
> > +             first = next;
> >   
> >       /*
> >        * If the inflight context did not trigger the preemption, then maybe
> > @@ -356,8 +343,31 @@ static bool need_preempt(struct intel_engine_cs *engine,
> >        * ELSP[0] or ELSP[1] as, thanks again to PI, if it was the same
> >        * context, it's priority would not exceed ELSP[0] aka last_prio.
> >        */
> > -     return max(virtual_prio(&engine->execlists),
> > -                queue_prio(se)) > last_prio;
> > +     next = first_request(se);
> > +     if (dl_before(next, first))
> > +             first = next; > +
> > +     next = first_virtual(engine);
> > +     if (dl_before(next, first))
> > +             first = next;
> > +
> > +     if (!dl_before(first, rq))
> > +             return false;
> 
> Ends up earliest deadline between list of picks: elsp[1] (or maybe next 
> in context, depends on coalescing criteria), first in the priolist, 
> first virtual.
> 
> Virtual has a separate queue so that's understandable, but can "elsp[1]" 
> really have an earlier deadling than first_request() (head of thepriolist)?

elsp[1] could have been promoted and thus now have an earlier deadline
than elsp[0]. Consider the heartbeat as a trivial example that is first
submitted at very low priority, but by the end has absolute priority.

> > +static u64 virtual_deadline(u64 kt, int priority)
> > +{
> > +     return i915_sched_to_ticks(kt + prio_slice(priority));
> > +}
> > +
> > +u64 i915_scheduler_next_virtual_deadline(int priority)
> > +{
> > +     return virtual_deadline(ktime_get_mono_fast_ns(), priority);
> > +}
> 
> This helpers becomes a bit odd in that the only two callers are rewind 
> and defer. And it queries ktime, while before deadline was set based on 
> signalers.
> 
> Where is the place which set the ktime based deadline (converted to 
> ticks) for requests with no signalers?

signal_deadline() with no signalers returns now. So the first request in
a sequence is queued with virtual_deadline(now() + prio_slice()).

> >   void i915_request_enqueue(struct i915_request *rq)
> >   {
> > -     struct intel_engine_cs *engine = rq->engine;
> > -     struct i915_sched *se = intel_engine_get_scheduler(engine);
> > +     struct i915_sched *se = i915_request_get_scheduler(rq);
> > +     u64 dl = earliest_deadline(se, rq);
> >       unsigned long flags;
> >       bool kick = false;
> >   
> > @@ -880,11 +1107,11 @@ void i915_request_enqueue(struct i915_request *rq)
> >               list_add_tail(&rq->sched.link, &se->hold);
> >               i915_request_set_hold(rq);
> >       } else {
> > -             queue_request(se, rq);
> > -
> > +             set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags);
> > +             kick = __i915_request_set_deadline(se, rq,
> > +                                                min(dl, rq_deadline(rq)));
> 
> What is this min for? Dl has been computed above based on rq, so I 
> wonder why rq_deadline has to be considered again.

earliest_deadline() only looks at the signalers (or now if none) and
picks the next deadline in that sequence. However, some requests we may
set the deadline explicitly (e.g. heartbeat has a known deadline, vblank
rendering we can approximate a deadline) and so we also consider what
deadline has already been specified.

> Because earliest_deadline does not actually consider rq->sched.deadline? 
> So conceptually earliest_deadline would be described as what?

sequence_deadline() ?

earliest_deadline_for_this_sequence() ?
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 54+ messages in thread

* [Intel-gfx] ✗ Fi.CI.CHECKPATCH: warning for series starting with [01/31] drm/i915/gt: Ratelimit heartbeat completion probing
  2021-02-08 10:52 [Intel-gfx] [PATCH 01/31] drm/i915/gt: Ratelimit heartbeat completion probing Chris Wilson
                   ` (29 preceding siblings ...)
  2021-02-08 10:52 ` [Intel-gfx] [PATCH 31/31] drm/i915/gt: Limit C-states while waiting for requests Chris Wilson
@ 2021-02-08 15:43 ` Patchwork
  2021-02-08 15:45 ` [Intel-gfx] ✗ Fi.CI.SPARSE: " Patchwork
                   ` (2 subsequent siblings)
  33 siblings, 0 replies; 54+ messages in thread
From: Patchwork @ 2021-02-08 15:43 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

== Series Details ==

Series: series starting with [01/31] drm/i915/gt: Ratelimit heartbeat completion probing
URL   : https://patchwork.freedesktop.org/series/86841/
State : warning

== Summary ==

$ dim checkpatch origin/drm-tip
206aa3c9677c drm/i915/gt: Ratelimit heartbeat completion probing
2494ed145240 drm/i915: Move context revocation to scheduler
3dbe5f872455 drm/i915: Introduce the scheduling mode
5b350d2836cf drm/i915: Move timeslicing flag to scheduler
4ab52e53c2c0 drm/i915/gt: Declare when we enabled timeslicing
-:15: WARNING:BAD_SIGN_OFF: Duplicate signature
#15: 
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

total: 0 errors, 1 warnings, 0 checks, 14 lines checked
483235989602 drm/i915: Move busywaiting control to the scheduler
7a092999c7b2 drm/i915: Move preempt-reset flag to the scheduler
b5a1080e1523 drm/i915: Fix the iterative dfs for defering requests
029c20aa2d56 drm/i915: Replace priolist rbtree with a skiplist
-:439: CHECK:MACRO_ARG_REUSE: Macro argument reuse 'p' - possible side-effects?
#439: FILE: drivers/gpu/drm/i915/i915_priolist_types.h:98:
+#define for_each_priolist(p, root) \
+	for ((p) = (root)->sentinel.next[0]; \
+	     (p) != &(root)->sentinel; \
+	     (p) = (p)->next[0])

-:439: CHECK:MACRO_ARG_REUSE: Macro argument reuse 'root' - possible side-effects?
#439: FILE: drivers/gpu/drm/i915/i915_priolist_types.h:98:
+#define for_each_priolist(p, root) \
+	for ((p) = (root)->sentinel.next[0]; \
+	     (p) != &(root)->sentinel; \
+	     (p) = (p)->next[0])

-:906: CHECK:MACRO_ARG_REUSE: Macro argument reuse 'se' - possible side-effects?
#906: FILE: drivers/gpu/drm/i915/i915_scheduler.h:167:
+#define i915_sched_dequeue(se, pl, rq, rn) \
+	for ((pl) = (se)->queue.sentinel.next[0]; \
+	     (pl) != &(se)->queue.sentinel; \
+	     (pl) = __i915_sched_dequeue_next(se)) \
+		priolist_for_each_request_safe(rq, rn, pl)

-:906: CHECK:MACRO_ARG_REUSE: Macro argument reuse 'pl' - possible side-effects?
#906: FILE: drivers/gpu/drm/i915/i915_scheduler.h:167:
+#define i915_sched_dequeue(se, pl, rq, rn) \
+	for ((pl) = (se)->queue.sentinel.next[0]; \
+	     (pl) != &(se)->queue.sentinel; \
+	     (pl) = __i915_sched_dequeue_next(se)) \
+		priolist_for_each_request_safe(rq, rn, pl)

-:952: WARNING:LINE_SPACING: Missing a blank line after declarations
#952: FILE: drivers/gpu/drm/i915/selftests/i915_scheduler.c:19:
+	struct i915_priolist *pl = &root.sentinel;
+	IGT_TIMEOUT(end_time);

total: 0 errors, 1 warnings, 4 checks, 904 lines checked
dbfb352bf7b0 drm/i915: Fair low-latency scheduling
9a611ddd1f7a drm/i915/gt: Specify a deadline for the heartbeat
022c4065c1a9 drm/i915: Extend the priority boosting for the display with a deadline
d8b43dca33f0 drm/i915/gt: Support virtual engine queues
27f940af2d22 drm/i915: Move saturated workload detection back to the context
-:29: WARNING:COMMIT_LOG_LONG_LINE: Possible unwrapped commit description (prefer a maximum 75 chars per line)
#29: 
References: 44d89409a12e ("drm/i915: Make the semaphore saturation mask global")

-:29: ERROR:GIT_COMMIT_ID: Please use git commit description style 'commit <12+ chars of sha1> ("<title line>")' - ie: 'commit 44d89409a12e ("drm/i915: Make the semaphore saturation mask global")'
#29: 
References: 44d89409a12e ("drm/i915: Make the semaphore saturation mask global")

total: 1 errors, 1 warnings, 0 checks, 78 lines checked
4109a26f879f drm/i915: Bump default timeslicing quantum to 5ms
2091c45fb062 drm/i915/gt: Delay taking irqoff for execlists submission
8b8ee05f1953 drm/i915/gt: Convert the legacy ring submission to use the scheduling interface
b8c672225e3d drm/i915/gt: Wrap intel_timeline.has_initial_breadcrumb
186491248d9b drm/i915/gt: Track timeline GGTT offset separately from subpage offset
cbdded299ad1 drm/i915/gt: Add timeline "mode"
75d076c67057 drm/i915/gt: Use indices for writing into relative timelines
b755887ce2eb drm/i915/selftests: Exercise relative timeline modes
3510121480a9 drm/i915/gt: Use ppHWSP for unshared non-semaphore related timelines
1f38aec71c0a Restore "drm/i915: drop engine_pin/unpin_breadcrumbs_irq"
681705c8f00d drm/i915/gt: Support creation of 'internal' rings
a1edb8d6fe76 drm/i915/gt: Use client timeline address for seqno writes
d11da5d6110b drm/i915/gt: Infrastructure for ring scheduling
-:79: WARNING:FILE_PATH_CHANGES: added, moved or deleted file(s), does MAINTAINERS need updating?
#79: 
new file mode 100644

total: 0 errors, 1 warnings, 0 checks, 844 lines checked
282aad02df82 drm/i915/gt: Implement ring scheduler for gen4-7
-:70: CHECK:OPEN_ENDED_LINE: Lines should not end with a '('
#70: FILE: drivers/gpu/drm/i915/gt/intel_ring_scheduler.c:221:
+				*cs++ = i915_mmio_reg_offset(

-:72: CHECK:OPEN_ENDED_LINE: Lines should not end with a '('
#72: FILE: drivers/gpu/drm/i915/gt/intel_ring_scheduler.c:223:
+				*cs++ = _MASKED_BIT_ENABLE(

-:107: CHECK:OPEN_ENDED_LINE: Lines should not end with a '('
#107: FILE: drivers/gpu/drm/i915/gt/intel_ring_scheduler.c:258:
+				*cs++ = _MASKED_BIT_DISABLE(

total: 0 errors, 0 warnings, 3 checks, 585 lines checked
ce6065610e18 drm/i915/gt: Enable ring scheduling for gen5-7
-:32: WARNING:COMMIT_LOG_LONG_LINE: Possible unwrapped commit description (prefer a maximum 75 chars per line)
#32: 
References: 0f46832fab77 ("drm/i915: Mask USER interrupts on gen6 (until required)")

-:32: ERROR:GIT_COMMIT_ID: Please use git commit description style 'commit <12+ chars of sha1> ("<title line>")' - ie: 'commit 0f46832fab77 ("drm/i915: Mask USER interrupts on gen6 (until required)")'
#32: 
References: 0f46832fab77 ("drm/i915: Mask USER interrupts on gen6 (until required)")

total: 1 errors, 1 warnings, 0 checks, 45 lines checked
8412a0fc0ad0 drm/i915: Support secure dispatch on gen6/gen7
c652a65f34ac drm/i915/gt: Limit C-states while waiting for requests


_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 54+ messages in thread

* [Intel-gfx] ✗ Fi.CI.SPARSE: warning for series starting with [01/31] drm/i915/gt: Ratelimit heartbeat completion probing
  2021-02-08 10:52 [Intel-gfx] [PATCH 01/31] drm/i915/gt: Ratelimit heartbeat completion probing Chris Wilson
                   ` (30 preceding siblings ...)
  2021-02-08 15:43 ` [Intel-gfx] ✗ Fi.CI.CHECKPATCH: warning for series starting with [01/31] drm/i915/gt: Ratelimit heartbeat completion probing Patchwork
@ 2021-02-08 15:45 ` Patchwork
  2021-02-08 16:13 ` [Intel-gfx] ✗ Fi.CI.BAT: failure " Patchwork
  2021-02-09 17:52 ` [Intel-gfx] [PATCH 01/31] " Mika Kuoppala
  33 siblings, 0 replies; 54+ messages in thread
From: Patchwork @ 2021-02-08 15:45 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

== Series Details ==

Series: series starting with [01/31] drm/i915/gt: Ratelimit heartbeat completion probing
URL   : https://patchwork.freedesktop.org/series/86841/
State : warning

== Summary ==

$ dim sparse --fast origin/drm-tip
Sparse version: v0.6.2
Fast mode used, each commit won't be checked separately.
+drivers/gpu/drm/i915/selftests/i915_syncmap.c:80:54: warning: dubious: x | !y


_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 54+ messages in thread

* Re: [Intel-gfx] [PATCH 10/31] drm/i915: Fair low-latency scheduling
  2021-02-08 15:29     ` Chris Wilson
@ 2021-02-08 16:03       ` Tvrtko Ursulin
  2021-02-08 16:11         ` Chris Wilson
  0 siblings, 1 reply; 54+ messages in thread
From: Tvrtko Ursulin @ 2021-02-08 16:03 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 08/02/2021 15:29, Chris Wilson wrote:
> Quoting Tvrtko Ursulin (2021-02-08 14:56:31)
>> On 08/02/2021 10:52, Chris Wilson wrote:
>>> +static bool need_preempt(const struct intel_engine_cs *engine,
>>>                         const struct i915_request *rq)
>>>    {
>>>        const struct i915_sched *se = &engine->sched;
>>> -     int last_prio;
>>> +     const struct i915_request *first = NULL;
>>> +     const struct i915_request *next;
>>>    
>>>        if (!i915_sched_use_busywait(se))
>>>                return false;
>>>    
>>>        /*
>>> -      * Check if the current priority hint merits a preemption attempt.
>>> -      *
>>> -      * We record the highest value priority we saw during rescheduling
>>> -      * prior to this dequeue, therefore we know that if it is strictly
>>> -      * less than the current tail of ESLP[0], we do not need to force
>>> -      * a preempt-to-idle cycle.
>>> -      *
>>> -      * However, the priority hint is a mere hint that we may need to
>>> -      * preempt. If that hint is stale or we may be trying to preempt
>>> -      * ourselves, ignore the request.
>>> -      *
>>> -      * More naturally we would write
>>> -      *      prio >= max(0, last);
>>> -      * except that we wish to prevent triggering preemption at the same
>>> -      * priority level: the task that is running should remain running
>>> -      * to preserve FIFO ordering of dependencies.
>>> +      * If this request is special and must not be interrupted at any
>>> +      * cost, so be it. Note we are only checking the most recent request
>>> +      * in the context and so may be masking an earlier vip request. It
>>> +      * is hoped that under the conditions where nopreempt is used, this
>>> +      * will not matter (i.e. all requests to that context will be
>>> +      * nopreempt for as long as desired).
>>>         */
>>> -     last_prio = max(effective_prio(rq), I915_PRIORITY_NORMAL - 1);
>>> -     if (engine->execlists.queue_priority_hint <= last_prio)
>>> +     if (i915_request_has_nopreempt(rq))
>>>                return false;
>>>    
>>>        /*
>>>         * Check against the first request in ELSP[1], it will, thanks to the
>>>         * power of PI, be the highest priority of that context.
>>>         */
>>> -     if (!list_is_last(&rq->sched.link, &se->requests) &&
>>> -         rq_prio(list_next_entry(rq, sched.link)) > last_prio)
>>> -             return true;
>>> +     next = next_elsp_request(se, rq);
>>> +     if (dl_before(next, first))
>>
>> Here first is always NULL so dl_before always returns true, meaning it
>> appears redundant to call it.
> 
> I was applying a pattern :)

Yeah, thought so. It's fine.

> 
>>
>>> +             first = next;
>>>    
>>>        /*
>>>         * If the inflight context did not trigger the preemption, then maybe
>>> @@ -356,8 +343,31 @@ static bool need_preempt(struct intel_engine_cs *engine,
>>>         * ELSP[0] or ELSP[1] as, thanks again to PI, if it was the same
>>>         * context, it's priority would not exceed ELSP[0] aka last_prio.
>>>         */
>>> -     return max(virtual_prio(&engine->execlists),
>>> -                queue_prio(se)) > last_prio;
>>> +     next = first_request(se);
>>> +     if (dl_before(next, first))
>>> +             first = next; > +
>>> +     next = first_virtual(engine);
>>> +     if (dl_before(next, first))
>>> +             first = next;
>>> +
>>> +     if (!dl_before(first, rq))
>>> +             return false;
>>
>> Ends up earliest deadline between list of picks: elsp[1] (or maybe next
>> in context, depends on coalescing criteria), first in the priolist,
>> first virtual.
>>
>> Virtual has a separate queue so that's understandable, but can "elsp[1]"
>> really have an earlier deadling than first_request() (head of thepriolist)?
> 
> elsp[1] could have been promoted and thus now have an earlier deadline
> than elsp[0]. Consider the heartbeat as a trivial example that is first
> submitted at very low priority, but by the end has absolute priority.

The tree is not kept sorted at all times, or at least at the time 
need_preempt peeks at it?

> 
>>> +static u64 virtual_deadline(u64 kt, int priority)
>>> +{
>>> +     return i915_sched_to_ticks(kt + prio_slice(priority));
>>> +}
>>> +
>>> +u64 i915_scheduler_next_virtual_deadline(int priority)
>>> +{
>>> +     return virtual_deadline(ktime_get_mono_fast_ns(), priority);
>>> +}
>>
>> This helpers becomes a bit odd in that the only two callers are rewind
>> and defer. And it queries ktime, while before deadline was set based on
>> signalers.
>>
>> Where is the place which set the ktime based deadline (converted to
>> ticks) for requests with no signalers?
> 
> signal_deadline() with no signalers returns now. So the first request in
> a sequence is queued with virtual_deadline(now() + prio_slice()).

Ah ok.

> 
>>>    void i915_request_enqueue(struct i915_request *rq)
>>>    {
>>> -     struct intel_engine_cs *engine = rq->engine;
>>> -     struct i915_sched *se = intel_engine_get_scheduler(engine);
>>> +     struct i915_sched *se = i915_request_get_scheduler(rq);
>>> +     u64 dl = earliest_deadline(se, rq);
>>>        unsigned long flags;
>>>        bool kick = false;
>>>    
>>> @@ -880,11 +1107,11 @@ void i915_request_enqueue(struct i915_request *rq)
>>>                list_add_tail(&rq->sched.link, &se->hold);
>>>                i915_request_set_hold(rq);
>>>        } else {
>>> -             queue_request(se, rq);
>>> -
>>> +             set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags);
>>> +             kick = __i915_request_set_deadline(se, rq,
>>> +                                                min(dl, rq_deadline(rq)));
>>
>> What is this min for? Dl has been computed above based on rq, so I
>> wonder why rq_deadline has to be considered again.
> 
> earliest_deadline() only looks at the signalers (or now if none) and
> picks the next deadline in that sequence. However, some requests we may
> set the deadline explicitly (e.g. heartbeat has a known deadline, vblank
> rendering we can approximate a deadline) and so we also consider what
> deadline has already been specified.
> 
>> Because earliest_deadline does not actually consider rq->sched.deadline?
>> So conceptually earliest_deadline would be described as what?
> 
> sequence_deadline() ?
> 
> earliest_deadline_for_this_sequence() ?

Don't know really. Don't think it's a matter of names just me building a 
good image of the operation.

But as earliest does imply earliest, which then gets potentially 
overwritten with something even earlier, hm.. baseline? :) Default? 
Nah.. Scheduling_deadline? Tree deadline? Sorted deadline?

Regards,

Tvrtko

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 54+ messages in thread

* Re: [Intel-gfx] [PATCH 10/31] drm/i915: Fair low-latency scheduling
  2021-02-08 16:03       ` Tvrtko Ursulin
@ 2021-02-08 16:11         ` Chris Wilson
  0 siblings, 0 replies; 54+ messages in thread
From: Chris Wilson @ 2021-02-08 16:11 UTC (permalink / raw)
  To: Tvrtko Ursulin, intel-gfx

Quoting Tvrtko Ursulin (2021-02-08 16:03:03)
> 
> On 08/02/2021 15:29, Chris Wilson wrote:
> > Quoting Tvrtko Ursulin (2021-02-08 14:56:31)
> >> On 08/02/2021 10:52, Chris Wilson wrote:
> >>> +static bool need_preempt(const struct intel_engine_cs *engine,
> >>>                         const struct i915_request *rq)
> >>>    {
> >>>        const struct i915_sched *se = &engine->sched;
> >>> -     int last_prio;
> >>> +     const struct i915_request *first = NULL;
> >>> +     const struct i915_request *next;
> >>>    
> >>>        if (!i915_sched_use_busywait(se))
> >>>                return false;
> >>>    
> >>>        /*
> >>> -      * Check if the current priority hint merits a preemption attempt.
> >>> -      *
> >>> -      * We record the highest value priority we saw during rescheduling
> >>> -      * prior to this dequeue, therefore we know that if it is strictly
> >>> -      * less than the current tail of ESLP[0], we do not need to force
> >>> -      * a preempt-to-idle cycle.
> >>> -      *
> >>> -      * However, the priority hint is a mere hint that we may need to
> >>> -      * preempt. If that hint is stale or we may be trying to preempt
> >>> -      * ourselves, ignore the request.
> >>> -      *
> >>> -      * More naturally we would write
> >>> -      *      prio >= max(0, last);
> >>> -      * except that we wish to prevent triggering preemption at the same
> >>> -      * priority level: the task that is running should remain running
> >>> -      * to preserve FIFO ordering of dependencies.
> >>> +      * If this request is special and must not be interrupted at any
> >>> +      * cost, so be it. Note we are only checking the most recent request
> >>> +      * in the context and so may be masking an earlier vip request. It
> >>> +      * is hoped that under the conditions where nopreempt is used, this
> >>> +      * will not matter (i.e. all requests to that context will be
> >>> +      * nopreempt for as long as desired).
> >>>         */
> >>> -     last_prio = max(effective_prio(rq), I915_PRIORITY_NORMAL - 1);
> >>> -     if (engine->execlists.queue_priority_hint <= last_prio)
> >>> +     if (i915_request_has_nopreempt(rq))
> >>>                return false;
> >>>    
> >>>        /*
> >>>         * Check against the first request in ELSP[1], it will, thanks to the
> >>>         * power of PI, be the highest priority of that context.
> >>>         */
> >>> -     if (!list_is_last(&rq->sched.link, &se->requests) &&
> >>> -         rq_prio(list_next_entry(rq, sched.link)) > last_prio)
> >>> -             return true;
> >>> +     next = next_elsp_request(se, rq);
> >>> +     if (dl_before(next, first))
> >>
> >> Here first is always NULL so dl_before always returns true, meaning it
> >> appears redundant to call it.
> > 
> > I was applying a pattern :)
> 
> Yeah, thought so. It's fine.
> 
> > 
> >>
> >>> +             first = next;
> >>>    
> >>>        /*
> >>>         * If the inflight context did not trigger the preemption, then maybe
> >>> @@ -356,8 +343,31 @@ static bool need_preempt(struct intel_engine_cs *engine,
> >>>         * ELSP[0] or ELSP[1] as, thanks again to PI, if it was the same
> >>>         * context, it's priority would not exceed ELSP[0] aka last_prio.
> >>>         */
> >>> -     return max(virtual_prio(&engine->execlists),
> >>> -                queue_prio(se)) > last_prio;
> >>> +     next = first_request(se);
> >>> +     if (dl_before(next, first))
> >>> +             first = next; > +
> >>> +     next = first_virtual(engine);
> >>> +     if (dl_before(next, first))
> >>> +             first = next;
> >>> +
> >>> +     if (!dl_before(first, rq))
> >>> +             return false;
> >>
> >> Ends up earliest deadline between list of picks: elsp[1] (or maybe next
> >> in context, depends on coalescing criteria), first in the priolist,
> >> first virtual.
> >>
> >> Virtual has a separate queue so that's understandable, but can "elsp[1]"
> >> really have an earlier deadling than first_request() (head of thepriolist)?
> > 
> > elsp[1] could have been promoted and thus now have an earlier deadline
> > than elsp[0]. Consider the heartbeat as a trivial example that is first
> > submitted at very low priority, but by the end has absolute priority.
> 
> The tree is not kept sorted at all times, or at least at the time 
> need_preempt peeks at it?

The tree of priorites/deadline itself is sorted. ELSP[] is the HW
runlist, which is a snapshot at the time of submission (and while it
should have been in order, may not now e).

need_preempt() tries to answer the question of "if I were to unwind
everything, would the first request in the resulting priority tree be of
earlier deadline & higher priority than the currently running request?".
So we have to guess the future shape of the tree.

> >>> +static u64 virtual_deadline(u64 kt, int priority)
> >>> +{
> >>> +     return i915_sched_to_ticks(kt + prio_slice(priority));
> >>> +}
> >>> +
> >>> +u64 i915_scheduler_next_virtual_deadline(int priority)
> >>> +{
> >>> +     return virtual_deadline(ktime_get_mono_fast_ns(), priority);
> >>> +}
> >>
> >> This helpers becomes a bit odd in that the only two callers are rewind
> >> and defer. And it queries ktime, while before deadline was set based on
> >> signalers.
> >>
> >> Where is the place which set the ktime based deadline (converted to
> >> ticks) for requests with no signalers?
> > 
> > signal_deadline() with no signalers returns now. So the first request in
> > a sequence is queued with virtual_deadline(now() + prio_slice()).
> 
> Ah ok.
> 
> > 
> >>>    void i915_request_enqueue(struct i915_request *rq)
> >>>    {
> >>> -     struct intel_engine_cs *engine = rq->engine;
> >>> -     struct i915_sched *se = intel_engine_get_scheduler(engine);
> >>> +     struct i915_sched *se = i915_request_get_scheduler(rq);
> >>> +     u64 dl = earliest_deadline(se, rq);
> >>>        unsigned long flags;
> >>>        bool kick = false;
> >>>    
> >>> @@ -880,11 +1107,11 @@ void i915_request_enqueue(struct i915_request *rq)
> >>>                list_add_tail(&rq->sched.link, &se->hold);
> >>>                i915_request_set_hold(rq);
> >>>        } else {
> >>> -             queue_request(se, rq);
> >>> -
> >>> +             set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags);
> >>> +             kick = __i915_request_set_deadline(se, rq,
> >>> +                                                min(dl, rq_deadline(rq)));
> >>
> >> What is this min for? Dl has been computed above based on rq, so I
> >> wonder why rq_deadline has to be considered again.
> > 
> > earliest_deadline() only looks at the signalers (or now if none) and
> > picks the next deadline in that sequence. However, some requests we may
> > set the deadline explicitly (e.g. heartbeat has a known deadline, vblank
> > rendering we can approximate a deadline) and so we also consider what
> > deadline has already been specified.
> > 
> >> Because earliest_deadline does not actually consider rq->sched.deadline?
> >> So conceptually earliest_deadline would be described as what?
> > 
> > sequence_deadline() ?
> > 
> > earliest_deadline_for_this_sequence() ?
> 
> Don't know really. Don't think it's a matter of names just me building a 
> good image of the operation.
> 
> But as earliest does imply earliest, which then gets potentially 
> overwritten with something even earlier, hm.. baseline? :) Default? 
> Nah.. Scheduling_deadline? Tree deadline? Sorted deadline?

Maybe current_deadline().
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 54+ messages in thread

* [Intel-gfx] ✗ Fi.CI.BAT: failure for series starting with [01/31] drm/i915/gt: Ratelimit heartbeat completion probing
  2021-02-08 10:52 [Intel-gfx] [PATCH 01/31] drm/i915/gt: Ratelimit heartbeat completion probing Chris Wilson
                   ` (31 preceding siblings ...)
  2021-02-08 15:45 ` [Intel-gfx] ✗ Fi.CI.SPARSE: " Patchwork
@ 2021-02-08 16:13 ` Patchwork
  2021-02-09 17:52 ` [Intel-gfx] [PATCH 01/31] " Mika Kuoppala
  33 siblings, 0 replies; 54+ messages in thread
From: Patchwork @ 2021-02-08 16:13 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx


[-- Attachment #1.1: Type: text/plain, Size: 8393 bytes --]

== Series Details ==

Series: series starting with [01/31] drm/i915/gt: Ratelimit heartbeat completion probing
URL   : https://patchwork.freedesktop.org/series/86841/
State : failure

== Summary ==

CI Bug Log - changes from CI_DRM_9747 -> Patchwork_19626
====================================================

Summary
-------

  **FAILURE**

  Serious unknown changes coming with Patchwork_19626 absolutely need to be
  verified manually.
  
  If you think the reported changes have nothing to do with the changes
  introduced in Patchwork_19626, please notify your bug team to allow them
  to document this new failure mode, which will reduce false positives in CI.

  External URL: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_19626/index.html

Possible new issues
-------------------

  Here are the unknown changes that may have been introduced in Patchwork_19626:

### IGT changes ###

#### Possible regressions ####

  * igt@i915_selftest@live@execlists:
    - fi-bsw-kefka:       [PASS][1] -> [DMESG-FAIL][2]
   [1]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9747/fi-bsw-kefka/igt@i915_selftest@live@execlists.html
   [2]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_19626/fi-bsw-kefka/igt@i915_selftest@live@execlists.html

  
Known issues
------------

  Here are the changes found in Patchwork_19626 that come from known issues:

### IGT changes ###

#### Issues hit ####

  * igt@amdgpu/amd_cs_nop@sync-fork-gfx0:
    - fi-cfl-8700k:       NOTRUN -> [SKIP][3] ([fdo#109271]) +25 similar issues
   [3]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_19626/fi-cfl-8700k/igt@amdgpu/amd_cs_nop@sync-fork-gfx0.html

  * igt@gem_exec_fence@basic-busy@bcs0:
    - fi-kbl-soraka:      NOTRUN -> [SKIP][4] ([fdo#109271]) +23 similar issues
   [4]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_19626/fi-kbl-soraka/igt@gem_exec_fence@basic-busy@bcs0.html

  * igt@gem_exec_suspend@basic-s3:
    - fi-tgl-y:           [PASS][5] -> [DMESG-WARN][6] ([i915#2411] / [i915#402])
   [5]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9747/fi-tgl-y/igt@gem_exec_suspend@basic-s3.html
   [6]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_19626/fi-tgl-y/igt@gem_exec_suspend@basic-s3.html

  * igt@gem_huc_copy@huc-copy:
    - fi-kbl-soraka:      NOTRUN -> [SKIP][7] ([fdo#109271] / [i915#2190])
   [7]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_19626/fi-kbl-soraka/igt@gem_huc_copy@huc-copy.html
    - fi-cfl-8700k:       NOTRUN -> [SKIP][8] ([fdo#109271] / [i915#2190])
   [8]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_19626/fi-cfl-8700k/igt@gem_huc_copy@huc-copy.html

  * igt@i915_selftest@live@gt_pm:
    - fi-kbl-soraka:      NOTRUN -> [DMESG-FAIL][9] ([i915#1886] / [i915#2291])
   [9]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_19626/fi-kbl-soraka/igt@i915_selftest@live@gt_pm.html

  * igt@kms_chamelium@common-hpd-after-suspend:
    - fi-kbl-soraka:      NOTRUN -> [SKIP][10] ([fdo#109271] / [fdo#111827]) +8 similar issues
   [10]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_19626/fi-kbl-soraka/igt@kms_chamelium@common-hpd-after-suspend.html

  * igt@kms_chamelium@hdmi-edid-read:
    - fi-kbl-7500u:       [PASS][11] -> [FAIL][12] ([i915#2128])
   [11]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9747/fi-kbl-7500u/igt@kms_chamelium@hdmi-edid-read.html
   [12]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_19626/fi-kbl-7500u/igt@kms_chamelium@hdmi-edid-read.html

  * igt@kms_chamelium@vga-edid-read:
    - fi-cfl-8700k:       NOTRUN -> [SKIP][13] ([fdo#109271] / [fdo#111827]) +8 similar issues
   [13]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_19626/fi-cfl-8700k/igt@kms_chamelium@vga-edid-read.html

  * igt@kms_pipe_crc_basic@compare-crc-sanitycheck-pipe-d:
    - fi-cfl-8700k:       NOTRUN -> [SKIP][14] ([fdo#109271] / [i915#533])
   [14]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_19626/fi-cfl-8700k/igt@kms_pipe_crc_basic@compare-crc-sanitycheck-pipe-d.html
    - fi-kbl-soraka:      NOTRUN -> [SKIP][15] ([fdo#109271] / [i915#533])
   [15]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_19626/fi-kbl-soraka/igt@kms_pipe_crc_basic@compare-crc-sanitycheck-pipe-d.html

  * igt@prime_self_import@basic-with_one_bo_two_files:
    - fi-tgl-y:           [PASS][16] -> [DMESG-WARN][17] ([i915#402]) +1 similar issue
   [16]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9747/fi-tgl-y/igt@prime_self_import@basic-with_one_bo_two_files.html
   [17]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_19626/fi-tgl-y/igt@prime_self_import@basic-with_one_bo_two_files.html

  
#### Possible fixes ####

  * igt@prime_self_import@basic-with_two_bos:
    - fi-tgl-y:           [DMESG-WARN][18] ([i915#402]) -> [PASS][19] +1 similar issue
   [18]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9747/fi-tgl-y/igt@prime_self_import@basic-with_two_bos.html
   [19]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_19626/fi-tgl-y/igt@prime_self_import@basic-with_two_bos.html

  
  [fdo#109271]: https://bugs.freedesktop.org/show_bug.cgi?id=109271
  [fdo#111827]: https://bugs.freedesktop.org/show_bug.cgi?id=111827
  [i915#1886]: https://gitlab.freedesktop.org/drm/intel/issues/1886
  [i915#2128]: https://gitlab.freedesktop.org/drm/intel/issues/2128
  [i915#2190]: https://gitlab.freedesktop.org/drm/intel/issues/2190
  [i915#2291]: https://gitlab.freedesktop.org/drm/intel/issues/2291
  [i915#2411]: https://gitlab.freedesktop.org/drm/intel/issues/2411
  [i915#402]: https://gitlab.freedesktop.org/drm/intel/issues/402
  [i915#533]: https://gitlab.freedesktop.org/drm/intel/issues/533


Participating hosts (42 -> 38)
------------------------------

  Additional (2): fi-kbl-soraka fi-cfl-8700k 
  Missing    (6): fi-jsl-1 fi-ilk-m540 fi-hsw-4200u fi-bsw-cyan fi-ctg-p8600 fi-bdw-samus 


Build changes
-------------

  * Linux: CI_DRM_9747 -> Patchwork_19626

  CI-20190529: 20190529
  CI_DRM_9747: 65d67e70f9f78c7f8c2796956fdbdb69cffc7c98 @ git://anongit.freedesktop.org/gfx-ci/linux
  IGT_5998: b0160aad9e547d2205341e0b6783e12aa143566e @ git://anongit.freedesktop.org/xorg/app/intel-gpu-tools
  Patchwork_19626: c652a65f34ac9ee26182b9924bb868d889a13f30 @ git://anongit.freedesktop.org/gfx-ci/linux


== Linux commits ==

c652a65f34ac drm/i915/gt: Limit C-states while waiting for requests
8412a0fc0ad0 drm/i915: Support secure dispatch on gen6/gen7
ce6065610e18 drm/i915/gt: Enable ring scheduling for gen5-7
282aad02df82 drm/i915/gt: Implement ring scheduler for gen4-7
d11da5d6110b drm/i915/gt: Infrastructure for ring scheduling
a1edb8d6fe76 drm/i915/gt: Use client timeline address for seqno writes
681705c8f00d drm/i915/gt: Support creation of 'internal' rings
1f38aec71c0a Restore "drm/i915: drop engine_pin/unpin_breadcrumbs_irq"
3510121480a9 drm/i915/gt: Use ppHWSP for unshared non-semaphore related timelines
b755887ce2eb drm/i915/selftests: Exercise relative timeline modes
75d076c67057 drm/i915/gt: Use indices for writing into relative timelines
cbdded299ad1 drm/i915/gt: Add timeline "mode"
186491248d9b drm/i915/gt: Track timeline GGTT offset separately from subpage offset
b8c672225e3d drm/i915/gt: Wrap intel_timeline.has_initial_breadcrumb
8b8ee05f1953 drm/i915/gt: Convert the legacy ring submission to use the scheduling interface
2091c45fb062 drm/i915/gt: Delay taking irqoff for execlists submission
4109a26f879f drm/i915: Bump default timeslicing quantum to 5ms
27f940af2d22 drm/i915: Move saturated workload detection back to the context
d8b43dca33f0 drm/i915/gt: Support virtual engine queues
022c4065c1a9 drm/i915: Extend the priority boosting for the display with a deadline
9a611ddd1f7a drm/i915/gt: Specify a deadline for the heartbeat
dbfb352bf7b0 drm/i915: Fair low-latency scheduling
029c20aa2d56 drm/i915: Replace priolist rbtree with a skiplist
b5a1080e1523 drm/i915: Fix the iterative dfs for defering requests
7a092999c7b2 drm/i915: Move preempt-reset flag to the scheduler
483235989602 drm/i915: Move busywaiting control to the scheduler
4ab52e53c2c0 drm/i915/gt: Declare when we enabled timeslicing
5b350d2836cf drm/i915: Move timeslicing flag to scheduler
3dbe5f872455 drm/i915: Introduce the scheduling mode
2494ed145240 drm/i915: Move context revocation to scheduler
206aa3c9677c drm/i915/gt: Ratelimit heartbeat completion probing

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_19626/index.html

[-- Attachment #1.2: Type: text/html, Size: 10306 bytes --]

[-- Attachment #2: Type: text/plain, Size: 160 bytes --]

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 54+ messages in thread

* Re: [Intel-gfx] [PATCH 09/31] drm/i915: Replace priolist rbtree with a skiplist
  2021-02-08 15:23   ` Tvrtko Ursulin
@ 2021-02-08 16:19     ` Chris Wilson
  2021-02-09 16:11       ` Tvrtko Ursulin
  0 siblings, 1 reply; 54+ messages in thread
From: Chris Wilson @ 2021-02-08 16:19 UTC (permalink / raw)
  To: Tvrtko Ursulin, intel-gfx

Quoting Tvrtko Ursulin (2021-02-08 15:23:17)
> 
> On 08/02/2021 10:52, Chris Wilson wrote:
> >   static struct list_head *
> >   lookup_priolist(struct i915_sched *se, int prio)
> >   {
> > -     struct i915_priolist *p;
> > -     struct rb_node **parent, *rb;
> > -     bool first = true;
> > +     struct i915_priolist *update[I915_PRIOLIST_HEIGHT];
> > +     struct i915_priolist_root *const root = &se->queue;
> > +     struct i915_priolist *pl, *tmp;
> > +     int lvl;
> >   
> >       lockdep_assert_held(&se->lock);
> > -     assert_priolists(se);
> > -
> >       if (unlikely(se->no_priolist))
> >               prio = I915_PRIORITY_NORMAL;
> >   
> > +     for_each_priolist(pl, root) { /* recycle any empty elements before us */
> > +             if (pl->priority <= prio || !list_empty(&pl->requests))
> > +                     break;
> 
> This less part of the less-or-equal condition keeps confusing me as a 
> break criteria. If premise is cleaning up, why break on first smaller 
> prio? Would the idea be to prune all empty lists up to, not including, 
> the lookup prio?

Just parcelling up the work. If we tidy up all the unused nodes before
us, we insert ourselves at the head of the tree, and all the cheap
checks to see if this is the first request, or to find the first
request are happy.

It's not expected to find anything unused with the tweaks to tidy up
empty elements as we move between i915_priolist.requests, but it seems
sensible to keep as then it should be just checking the first
i915_priolist and breaking out.

> > -void __i915_priolist_free(struct i915_priolist *p)
> > +static void __remove_priolist(struct i915_sched *se, struct list_head *plist)
> >   {
> > -     kmem_cache_free(global.slab_priorities, p);
> > +     struct i915_priolist_root *root = &se->queue;
> > +     struct i915_priolist *pl, *tmp;
> > +     struct i915_priolist *old =
> > +             container_of(plist, struct i915_priolist, requests);
> > +     int prio = old->priority;
> > +     int lvl;
> > +
> > +     lockdep_assert_held(&se->lock);
> > +     GEM_BUG_ON(!list_empty(plist));
> > +
> > +     pl = &root->sentinel;
> > +     lvl = pl->level;
> > +     GEM_BUG_ON(lvl < 0);
> > +
> > +     if (prio != I915_PRIORITY_NORMAL)
> > +             pl_push(old, &pl->requests);
> > +
> > +     do {
> > +             while (tmp = pl->next[lvl], tmp->priority > prio)
> > +                     pl = tmp;
> > +             if (lvl <= old->level) {
> > +                     pl->next[lvl] = old->next[lvl];
> > +                     if (pl == &root->sentinel && old->next[lvl] == pl) {
> > +                             GEM_BUG_ON(pl->level != lvl);
> > +                             pl->level--;
> > +                     }
> > +             }
> > +     } while (--lvl >= 0);
> > +     GEM_BUG_ON(tmp != old);
> > +}

> > +struct i915_priolist *__i915_sched_dequeue_next(struct i915_sched *se)
> > +{
> > +     struct i915_priolist * const s = &se->queue.sentinel;
> > +     struct i915_priolist *pl = s->next[0];
> > +     int lvl;
> > +
> > +     GEM_BUG_ON(!list_empty(&pl->requests));
> > +     GEM_BUG_ON(pl == s);
> > +
> > +     /* Keep pl->next[0] valid for for_each_priolist iteration */
> > +     if (pl->priority != I915_PRIORITY_NORMAL)
> > +             pl_push(pl, &s->requests);
> > +
> > +     lvl = pl->level;
> > +     GEM_BUG_ON(lvl < 0);
> > +     do {
> > +             s->next[lvl] = pl->next[lvl];
> > +             if (pl->next[lvl] == s) {
> > +                     GEM_BUG_ON(s->level != lvl);
> > +                     s->level--;
> > +             }
> > +     } while (--lvl >= 0);
> > +
> > +     return pl->next[0];
> >   }
> 
> If both __i915_sched_dequeue_next and __remove_priolist are removing an 
> empty list from the hieararchy, why can't they shared some code?

The __remove_priolist does the general search and remove, whereas
dequeue_next is trying to keep O(1) remove-from-head. dequeue_next is
meant to be called many, many more times than __remove_priolist.
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 54+ messages in thread

* Re: [Intel-gfx] [PATCH 30/31] drm/i915: Support secure dispatch on gen6/gen7
  2021-02-08 10:52 ` [Intel-gfx] [PATCH 30/31] drm/i915: Support secure dispatch on gen6/gen7 Chris Wilson
@ 2021-02-08 20:55   ` Dave Airlie
  2021-02-08 22:49     ` Chris Wilson
  2021-02-09 11:02     ` Tvrtko Ursulin
  0 siblings, 2 replies; 54+ messages in thread
From: Dave Airlie @ 2021-02-08 20:55 UTC (permalink / raw)
  To: Chris Wilson; +Cc: Intel Graphics Development, Matthew Auld

On Mon, 8 Feb 2021 at 20:53, Chris Wilson <chris@chris-wilson.co.uk> wrote:
>
> Re-enable secure dispatch for gen6/gen7, primarily to workaround the
> command parser and overly zealous command validation on Haswell. For
> example this prevents making accurate measurements using a journal for
> store results from the GPU without CPU intervention.

There's 31 patches in this series, and I can't find any 00/31 or
justification for any of this work.

I see patches like this which seem to undo work done for security
reasons under CVE patches with no oversight.

Again, the GT team is not doing the right thing here, stop focusing on
individual pieces of Chris's work, push back for high level
architectural reviews and I want them on the list in public.

All I want from the GT team in the next pull request is dma_resv
locking work and restoring the hangcheck timers that seems like a
regression that Chris found acceptable and nobody has pushed back on.

For like the 500th time, if you want DG1 and stuff in the tree, stop
this shit already, real reviewers, high-level architectural reviews,
NAK the bullshit in public on the list.

Dave.
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 54+ messages in thread

* Re: [Intel-gfx] [PATCH 30/31] drm/i915: Support secure dispatch on gen6/gen7
  2021-02-08 20:55   ` Dave Airlie
@ 2021-02-08 22:49     ` Chris Wilson
  2021-02-09 11:02     ` Tvrtko Ursulin
  1 sibling, 0 replies; 54+ messages in thread
From: Chris Wilson @ 2021-02-08 22:49 UTC (permalink / raw)
  To: Dave Airlie; +Cc: Intel Graphics Development, Matthew Auld

Quoting Dave Airlie (2021-02-08 20:55:19)
> On Mon, 8 Feb 2021 at 20:53, Chris Wilson <chris@chris-wilson.co.uk> wrote:
> >
> > Re-enable secure dispatch for gen6/gen7, primarily to workaround the
> > command parser and overly zealous command validation on Haswell. For
> > example this prevents making accurate measurements using a journal for
> > store results from the GPU without CPU intervention.
> 
> There's 31 patches in this series, and I can't find any 00/31 or
> justification for any of this work.

You don't agree with the overview in 11? Or the test design to reproduce
the reported problems with multiple clients?

There's some code motion to align with upstreaming guc patches later on;
a bug fix for an iterative depth-first-search not being a
depth-first-search; the change in sort key for scheduling policy;
switching the late greedy virtual engine to work on the same interface
as execlists/guc; the CS emitters to switch off absolute addressing for
breadcrumbs; and finally request reordering for the ringbuffer.
 
> I see patches like this which seem to undo work done for security
> reasons under CVE patches with no oversight.

Seems to remove clear_residuals? The same clear_residuals between contexts
on gen7 is there.

> Again, the GT team is not doing the right thing here, stop focusing on
> individual pieces of Chris's work, push back for high level
> architectural reviews and I want them on the list in public.

The architectural bit here is the code motion; getting the backend
agnostic list management all into a common layer. Trying to align that
with what drm_sched offers, with the optimistic view that one day
drm_sched may offer enough to start replacing it.

> All I want from the GT team in the next pull request is dma_resv
> locking work and restoring the hangcheck timers that seems like a
> regression that Chris found acceptable and nobody has pushed back on.

The choice here in sort key is still entirely orthogonal to dma-resv. The
hangcheck is still driven off a timer. The behaviour of the current code
is still the same as the much older global seqno hangcheck around
preemption (hangcheck being postponed whenever the seqno changed and/or
RING_START changed). The direction to use periodic pulses for both
issuing resets (which is actually much faster in detecting hangs than the
older seqno hangchecking allowed for), power management and tracking of
GPU resource was not mine alone, but yes I did find it acceptable.

> For like the 500th time, if you want DG1 and stuff in the tree, stop
> this shit already, real reviewers, high-level architectural reviews,
> NAK the bullshit in public on the list.

I do not understand the hostility to fixing user issues, and improving
both existing and future products when it does not interfere with
anything else.
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 54+ messages in thread

* Re: [Intel-gfx] [PATCH 10/31] drm/i915: Fair low-latency scheduling
  2021-02-08 10:52 ` [Intel-gfx] [PATCH 10/31] drm/i915: Fair low-latency scheduling Chris Wilson
  2021-02-08 14:56   ` Tvrtko Ursulin
@ 2021-02-09  9:37   ` Tvrtko Ursulin
  2021-02-09 10:31     ` Chris Wilson
  1 sibling, 1 reply; 54+ messages in thread
From: Tvrtko Ursulin @ 2021-02-09  9:37 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 08/02/2021 10:52, Chris Wilson wrote:

> diff --git a/drivers/gpu/drm/i915/Kconfig.profile b/drivers/gpu/drm/i915/Kconfig.profile
> index 35bbe2b80596..f1d009906f71 100644
> --- a/drivers/gpu/drm/i915/Kconfig.profile
> +++ b/drivers/gpu/drm/i915/Kconfig.profile
> @@ -1,3 +1,65 @@
> +choice
> +	prompt "Preferred scheduler"
> +	default DRM_I915_SCHED_VIRTUAL_DEADLINE
> +	help
> +	  Select the preferred method to decide the order of execution.
> +
> +	  The scheduler is used for two purposes. First to defer unready
> +	  jobs to not block execution of independent ready clients, so
> +	  preventing GPU stalls while work waits for other tasks. The second
> +	  purpose is to decide which task to run next, as well as decide
> +	  if that task should preempt the currently running task, or if
> +	  the current task has exceeded its allotment of GPU time and should
> +	  be replaced.
> +
> +	config DRM_I915_SCHED_FIFO
> +	bool "FIFO"
> +	help
> +	  No task reordering, tasks are executed in order of readiness.
> +	  First in, first out.
> +
> +	  Unready tasks do not block execution of other, independent clients.
> +	  A client will not be scheduled for execution until all of its
> +	  prerequisite work has completed.
> +
> +	  This disables the scheduler and puts it into a pass-through mode.
> +
> +	config DRM_I915_SCHED_PRIORITY
> +	bool "Priority"
> +	help
> +	  Strict priority ordering, equal priority tasks are executed
> +	  in order of readiness. Clients are liable to starve other clients,
> +	  causing uneven execution and excess task latency. High priority
> +	  clients will preempt lower priority clients and will run
> +	  uninterrupted.
> +
> +	  Note that interactive desktops will implicitly perform priority
> +	  boosting to minimise frame jitter.
> +
> +	config DRM_I915_SCHED_VIRTUAL_DEADLINE
> +	bool "Virtual Deadline"
> +	help
> +	  A fair scheduler based on MuQSS with priority-hinting.
> +
> +	  When a task is ready for execution, it is given a quota (from the
> +	  engine's timeslice) and a virtual deadline. The virtual deadline is
> +	  derived from the current time and the timeslice scaled by the
> +	  task's priority. Higher priority tasks are given an earlier
> +	  deadline and receive a large portion of the execution bandwidth.
> +
> +	  Requests are then executed in order of deadline completion.
> +	  Requests with earlier deadlines and higher priority than currently
> +	  executing on the engine will preempt the active task.
> +
> +endchoice
> +
> +config DRM_I915_SCHED
> +	int
> +	default 2 if DRM_I915_SCHED_VIRTUAL_DEADLINE
> +	default 1 if DRM_I915_SCHED_PRIORITY
> +	default 0 if DRM_I915_SCHED_FIFO
> +	default -1

Default -1 would mean it would ask the user and not default to deadline?

Implementation wise it is very neat how you did it so there is basically 
very little cost for the compiled out options. And code maintenance cost 
to support multiple options is pretty trivial as well.

Only cost I can see is potential bug reports if "wrong" scheduler was 
picked by someone. What do you envisage, or who, would be the use cases 
for not going with deadline? (I think deadline should be default.)

Then there is a question of how these kconfig will interact, or at least 
what their semantics would be, considering the GuC.

I think we can modify the kconfig blurb to say they only apply to 
execlists platforms, once we get a GuC scheduling platform upstream. And 
fudge some sched mode bits for sysfs reporting in that case.

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 54+ messages in thread

* Re: [Intel-gfx] [PATCH 10/31] drm/i915: Fair low-latency scheduling
  2021-02-09  9:37   ` Tvrtko Ursulin
@ 2021-02-09 10:31     ` Chris Wilson
  2021-02-09 10:40       ` Tvrtko Ursulin
  0 siblings, 1 reply; 54+ messages in thread
From: Chris Wilson @ 2021-02-09 10:31 UTC (permalink / raw)
  To: Tvrtko Ursulin, intel-gfx

Quoting Tvrtko Ursulin (2021-02-09 09:37:19)
> 
> On 08/02/2021 10:52, Chris Wilson wrote:
> 
> > diff --git a/drivers/gpu/drm/i915/Kconfig.profile b/drivers/gpu/drm/i915/Kconfig.profile
> > index 35bbe2b80596..f1d009906f71 100644
> > --- a/drivers/gpu/drm/i915/Kconfig.profile
> > +++ b/drivers/gpu/drm/i915/Kconfig.profile
> > @@ -1,3 +1,65 @@
> > +choice
> > +     prompt "Preferred scheduler"
> > +     default DRM_I915_SCHED_VIRTUAL_DEADLINE
> > +     help
> > +       Select the preferred method to decide the order of execution.
> > +
> > +       The scheduler is used for two purposes. First to defer unready
> > +       jobs to not block execution of independent ready clients, so
> > +       preventing GPU stalls while work waits for other tasks. The second
> > +       purpose is to decide which task to run next, as well as decide
> > +       if that task should preempt the currently running task, or if
> > +       the current task has exceeded its allotment of GPU time and should
> > +       be replaced.
> > +
> > +     config DRM_I915_SCHED_FIFO
> > +     bool "FIFO"
> > +     help
> > +       No task reordering, tasks are executed in order of readiness.
> > +       First in, first out.
> > +
> > +       Unready tasks do not block execution of other, independent clients.
> > +       A client will not be scheduled for execution until all of its
> > +       prerequisite work has completed.
> > +
> > +       This disables the scheduler and puts it into a pass-through mode.
> > +
> > +     config DRM_I915_SCHED_PRIORITY
> > +     bool "Priority"
> > +     help
> > +       Strict priority ordering, equal priority tasks are executed
> > +       in order of readiness. Clients are liable to starve other clients,
> > +       causing uneven execution and excess task latency. High priority
> > +       clients will preempt lower priority clients and will run
> > +       uninterrupted.
> > +
> > +       Note that interactive desktops will implicitly perform priority
> > +       boosting to minimise frame jitter.
> > +
> > +     config DRM_I915_SCHED_VIRTUAL_DEADLINE
> > +     bool "Virtual Deadline"
> > +     help
> > +       A fair scheduler based on MuQSS with priority-hinting.
> > +
> > +       When a task is ready for execution, it is given a quota (from the
> > +       engine's timeslice) and a virtual deadline. The virtual deadline is
> > +       derived from the current time and the timeslice scaled by the
> > +       task's priority. Higher priority tasks are given an earlier
> > +       deadline and receive a large portion of the execution bandwidth.
> > +
> > +       Requests are then executed in order of deadline completion.
> > +       Requests with earlier deadlines and higher priority than currently
> > +       executing on the engine will preempt the active task.
> > +
> > +endchoice
> > +
> > +config DRM_I915_SCHED
> > +     int
> > +     default 2 if DRM_I915_SCHED_VIRTUAL_DEADLINE
> > +     default 1 if DRM_I915_SCHED_PRIORITY
> > +     default 0 if DRM_I915_SCHED_FIFO
> > +     default -1
> 
> Default -1 would mean it would ask the user and not default to deadline?

CONFIG_DRM_I915_SCHED is unnamed, it is never itself presented to the
user. The choice is, and that ends up setting one of the 3 values, which
is then mapped to an integer value by DRM_I915_SCHED. That was done to
give the hierarchy to the policies which resulted in the cascade of
supporting fifo as a subset of priorites and priorities as a subset of
deadlines. Which also ties nicely into the different backends being able
to select different scheduling levels for themselves (no scheduling at
all for legacy ringbuffer and mock, deadlines for execlists/ringscheduler,
and fifo for guc).

> Implementation wise it is very neat how you did it so there is basically 
> very little cost for the compiled out options. And code maintenance cost 
> to support multiple options is pretty trivial as well.
> 
> Only cost I can see is potential bug reports if "wrong" scheduler was 
> picked by someone. What do you envisage, or who, would be the use cases 
> for not going with deadline? (I think deadline should be default.)

The first thing I did with it was compare none/priority/deadlines with
wsim and ift, that's what I would expect most to try as well (replace
wsim with their favourite benchmark). For instance, it was reassuring
that timeslicing just worked, even without priorities. Beyond testing, it
is a gesture to putting policy back into the hands of the user, though
to truly do that we would make it a sysfs attribute.

That found a couple of bugs to make sure i915_sched_defer_request
degraded back into sorting by priorities (or not). And suggested maybe
we should try harder to avoid semaphores without the more adaptable
scheduling modes.

As for feedback in bugs, the choice should be included with the engine
state dump.

> Then there is a question of how these kconfig will interact, or at least 
> what their semantics would be, considering the GuC.

Hence the weasel word of "preferred". This config is the maximum
scheduling level, if the backend does not provide for request reordering
at all (e.g. the ringbuffer), then the user wishing to use a different
scheduler is out of luck. Also being a module level parameter, different
devices within the system may support different schedulers, and yet we
still want them to interact. Which poses a very real risk of priority
inversion across the boundaries. That I do not have an answer for, just
the intention to write tests to demonstrate the issue.

> I think we can modify the kconfig blurb to say they only apply to 
> execlists platforms, once we get a GuC scheduling platform upstream. And 
> fudge some sched mode bits for sysfs reporting in that case.

Aye, we will need some fudging for the GuC as it presents a very limited
interface and probably merits some unique caps.
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 54+ messages in thread

* Re: [Intel-gfx] [PATCH 10/31] drm/i915: Fair low-latency scheduling
  2021-02-09 10:31     ` Chris Wilson
@ 2021-02-09 10:40       ` Tvrtko Ursulin
  0 siblings, 0 replies; 54+ messages in thread
From: Tvrtko Ursulin @ 2021-02-09 10:40 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 09/02/2021 10:31, Chris Wilson wrote:
> Quoting Tvrtko Ursulin (2021-02-09 09:37:19)
>>
>> On 08/02/2021 10:52, Chris Wilson wrote:
>>
>>> diff --git a/drivers/gpu/drm/i915/Kconfig.profile b/drivers/gpu/drm/i915/Kconfig.profile
>>> index 35bbe2b80596..f1d009906f71 100644
>>> --- a/drivers/gpu/drm/i915/Kconfig.profile
>>> +++ b/drivers/gpu/drm/i915/Kconfig.profile
>>> @@ -1,3 +1,65 @@
>>> +choice
>>> +     prompt "Preferred scheduler"
>>> +     default DRM_I915_SCHED_VIRTUAL_DEADLINE
>>> +     help
>>> +       Select the preferred method to decide the order of execution.
>>> +
>>> +       The scheduler is used for two purposes. First to defer unready
>>> +       jobs to not block execution of independent ready clients, so
>>> +       preventing GPU stalls while work waits for other tasks. The second
>>> +       purpose is to decide which task to run next, as well as decide
>>> +       if that task should preempt the currently running task, or if
>>> +       the current task has exceeded its allotment of GPU time and should
>>> +       be replaced.
>>> +
>>> +     config DRM_I915_SCHED_FIFO
>>> +     bool "FIFO"
>>> +     help
>>> +       No task reordering, tasks are executed in order of readiness.
>>> +       First in, first out.
>>> +
>>> +       Unready tasks do not block execution of other, independent clients.
>>> +       A client will not be scheduled for execution until all of its
>>> +       prerequisite work has completed.
>>> +
>>> +       This disables the scheduler and puts it into a pass-through mode.
>>> +
>>> +     config DRM_I915_SCHED_PRIORITY
>>> +     bool "Priority"
>>> +     help
>>> +       Strict priority ordering, equal priority tasks are executed
>>> +       in order of readiness. Clients are liable to starve other clients,
>>> +       causing uneven execution and excess task latency. High priority
>>> +       clients will preempt lower priority clients and will run
>>> +       uninterrupted.
>>> +
>>> +       Note that interactive desktops will implicitly perform priority
>>> +       boosting to minimise frame jitter.
>>> +
>>> +     config DRM_I915_SCHED_VIRTUAL_DEADLINE
>>> +     bool "Virtual Deadline"
>>> +     help
>>> +       A fair scheduler based on MuQSS with priority-hinting.
>>> +
>>> +       When a task is ready for execution, it is given a quota (from the
>>> +       engine's timeslice) and a virtual deadline. The virtual deadline is
>>> +       derived from the current time and the timeslice scaled by the
>>> +       task's priority. Higher priority tasks are given an earlier
>>> +       deadline and receive a large portion of the execution bandwidth.
>>> +
>>> +       Requests are then executed in order of deadline completion.
>>> +       Requests with earlier deadlines and higher priority than currently
>>> +       executing on the engine will preempt the active task.
>>> +
>>> +endchoice
>>> +
>>> +config DRM_I915_SCHED
>>> +     int
>>> +     default 2 if DRM_I915_SCHED_VIRTUAL_DEADLINE
>>> +     default 1 if DRM_I915_SCHED_PRIORITY
>>> +     default 0 if DRM_I915_SCHED_FIFO
>>> +     default -1
>>
>> Default -1 would mean it would ask the user and not default to deadline?
> 
> CONFIG_DRM_I915_SCHED is unnamed, it is never itself presented to the
> user. The choice is, and that ends up setting one of the 3 values, which
> is then mapped to an integer value by DRM_I915_SCHED. That was done to
> give the hierarchy to the policies which resulted in the cascade of
> supporting fifo as a subset of priorites and priorities as a subset of
> deadlines. Which also ties nicely into the different backends being able
> to select different scheduling levels for themselves (no scheduling at
> all for legacy ringbuffer and mock, deadlines for execlists/ringscheduler,
> and fifo for guc).

Yes sorry, there is "default DRM_I915_SCHED_VIRTUAL_DEADLINE" above 
which I missed.

>> Implementation wise it is very neat how you did it so there is basically
>> very little cost for the compiled out options. And code maintenance cost
>> to support multiple options is pretty trivial as well.
>>
>> Only cost I can see is potential bug reports if "wrong" scheduler was
>> picked by someone. What do you envisage, or who, would be the use cases
>> for not going with deadline? (I think deadline should be default.)
> 
> The first thing I did with it was compare none/priority/deadlines with
> wsim and ift, that's what I would expect most to try as well (replace
> wsim with their favourite benchmark). For instance, it was reassuring
> that timeslicing just worked, even without priorities. Beyond testing, it
> is a gesture to putting policy back into the hands of the user, though
> to truly do that we would make it a sysfs attribute.
> 
> That found a couple of bugs to make sure i915_sched_defer_request
> degraded back into sorting by priorities (or not). And suggested maybe
> we should try harder to avoid semaphores without the more adaptable
> scheduling modes.
> 
> As for feedback in bugs, the choice should be included with the engine
> state dump.

I think as minimum some strong sentences should be put into the 
"Preferred scheduler" kconfig help saying not to change the default away 
from deadline unless one really really knows what they are doing. You 
know the usual kconfig language for these sort of situations.

>> Then there is a question of how these kconfig will interact, or at least
>> what their semantics would be, considering the GuC.
> 
> Hence the weasel word of "preferred". This config is the maximum
> scheduling level, if the backend does not provide for request reordering
> at all (e.g. the ringbuffer), then the user wishing to use a different
> scheduler is out of luck. Also being a module level parameter, different
> devices within the system may support different schedulers, and yet we
> still want them to interact. Which poses a very real risk of priority
> inversion across the boundaries. That I do not have an answer for, just
> the intention to write tests to demonstrate the issue.

Yes modparam vs multi-gpu we can solve in a generic fashion one day.

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 54+ messages in thread

* Re: [Intel-gfx] [PATCH 30/31] drm/i915: Support secure dispatch on gen6/gen7
  2021-02-08 20:55   ` Dave Airlie
  2021-02-08 22:49     ` Chris Wilson
@ 2021-02-09 11:02     ` Tvrtko Ursulin
  1 sibling, 0 replies; 54+ messages in thread
From: Tvrtko Ursulin @ 2021-02-09 11:02 UTC (permalink / raw)
  To: Dave Airlie, Chris Wilson; +Cc: Intel Graphics Development, Matthew Auld


On 08/02/2021 20:55, Dave Airlie wrote:
> On Mon, 8 Feb 2021 at 20:53, Chris Wilson <chris@chris-wilson.co.uk> wrote:
>>
>> Re-enable secure dispatch for gen6/gen7, primarily to workaround the
>> command parser and overly zealous command validation on Haswell. For
>> example this prevents making accurate measurements using a journal for
>> store results from the GPU without CPU intervention.
> 
> There's 31 patches in this series, and I can't find any 00/31 or
> justification for any of this work.
> 
> I see patches like this which seem to undo work done for security
> reasons under CVE patches with no oversight.
> 
> Again, the GT team is not doing the right thing here, stop focusing on
> individual pieces of Chris's work, push back for high level
> architectural reviews and I want them on the list in public.
> 
> All I want from the GT team in the next pull request is dma_resv
> locking work and restoring the hangcheck timers that seems like a
> regression that Chris found acceptable and nobody has pushed back on.
> 
> For like the 500th time, if you want DG1 and stuff in the tree, stop
> this shit already, real reviewers, high-level architectural reviews,
> NAK the bullshit in public on the list.

Since it's mostly been me reviewing the scheduler improvements in this 
series, I gather we have met and talked, or that you have at least have 
been following me closely enough to conclude I am not a "real" reviewer. 
Fair?

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 54+ messages in thread

* Re: [Intel-gfx] [PATCH 09/31] drm/i915: Replace priolist rbtree with a skiplist
  2021-02-08 16:19     ` Chris Wilson
@ 2021-02-09 16:11       ` Tvrtko Ursulin
  0 siblings, 0 replies; 54+ messages in thread
From: Tvrtko Ursulin @ 2021-02-09 16:11 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 08/02/2021 16:19, Chris Wilson wrote:
> Quoting Tvrtko Ursulin (2021-02-08 15:23:17)
>>
>> On 08/02/2021 10:52, Chris Wilson wrote:
>>>    static struct list_head *
>>>    lookup_priolist(struct i915_sched *se, int prio)
>>>    {
>>> -     struct i915_priolist *p;
>>> -     struct rb_node **parent, *rb;
>>> -     bool first = true;
>>> +     struct i915_priolist *update[I915_PRIOLIST_HEIGHT];
>>> +     struct i915_priolist_root *const root = &se->queue;
>>> +     struct i915_priolist *pl, *tmp;
>>> +     int lvl;
>>>    
>>>        lockdep_assert_held(&se->lock);
>>> -     assert_priolists(se);
>>> -
>>>        if (unlikely(se->no_priolist))
>>>                prio = I915_PRIORITY_NORMAL;
>>>    
>>> +     for_each_priolist(pl, root) { /* recycle any empty elements before us */
>>> +             if (pl->priority <= prio || !list_empty(&pl->requests))
>>> +                     break;
>>
>> This less part of the less-or-equal condition keeps confusing me as a
>> break criteria. If premise is cleaning up, why break on first smaller
>> prio? Would the idea be to prune all empty lists up to, not including,
>> the lookup prio?
> 
> Just parcelling up the work. If we tidy up all the unused nodes before
> us, we insert ourselves at the head of the tree, and all the cheap
> checks to see if this is the first request, or to find the first
> request are happy.
> 
> It's not expected to find anything unused with the tweaks to tidy up
> empty elements as we move between i915_priolist.requests, but it seems
> sensible to keep as then it should be just checking the first
> i915_priolist and breaking out.

It's fine, for some reason I missed the order is descending. Probably 
thinking about deadlines already. Need to see how that works there then. 
But a comment indicating the order would be cool.

>>> -void __i915_priolist_free(struct i915_priolist *p)
>>> +static void __remove_priolist(struct i915_sched *se, struct list_head *plist)
>>>    {
>>> -     kmem_cache_free(global.slab_priorities, p);
>>> +     struct i915_priolist_root *root = &se->queue;
>>> +     struct i915_priolist *pl, *tmp;
>>> +     struct i915_priolist *old =
>>> +             container_of(plist, struct i915_priolist, requests);
>>> +     int prio = old->priority;
>>> +     int lvl;
>>> +
>>> +     lockdep_assert_held(&se->lock);
>>> +     GEM_BUG_ON(!list_empty(plist));
>>> +
>>> +     pl = &root->sentinel;
>>> +     lvl = pl->level;
>>> +     GEM_BUG_ON(lvl < 0);
>>> +
>>> +     if (prio != I915_PRIORITY_NORMAL)
>>> +             pl_push(old, &pl->requests);
>>> +
>>> +     do {
>>> +             while (tmp = pl->next[lvl], tmp->priority > prio)
>>> +                     pl = tmp;

Ah okay, this is needed because the list is singly linked. I suggest a 
comment.

Doubly linked would not be interesting?

>>> +             if (lvl <= old->level) {
>>> +                     pl->next[lvl] = old->next[lvl];
>>> +                     if (pl == &root->sentinel && old->next[lvl] == pl) {
>>> +                             GEM_BUG_ON(pl->level != lvl);
>>> +                             pl->level--;
>>> +                     }
>>> +             }
>>> +     } while (--lvl >= 0);
>>> +     GEM_BUG_ON(tmp != old);
>>> +}
> 
>>> +struct i915_priolist *__i915_sched_dequeue_next(struct i915_sched *se)
>>> +{
>>> +     struct i915_priolist * const s = &se->queue.sentinel;
>>> +     struct i915_priolist *pl = s->next[0];
>>> +     int lvl;
>>> +
>>> +     GEM_BUG_ON(!list_empty(&pl->requests));
>>> +     GEM_BUG_ON(pl == s);
>>> +
>>> +     /* Keep pl->next[0] valid for for_each_priolist iteration */
>>> +     if (pl->priority != I915_PRIORITY_NORMAL)
>>> +             pl_push(pl, &s->requests);
>>> +
>>> +     lvl = pl->level;
>>> +     GEM_BUG_ON(lvl < 0);
>>> +     do {
>>> +             s->next[lvl] = pl->next[lvl];
>>> +             if (pl->next[lvl] == s) {
>>> +                     GEM_BUG_ON(s->level != lvl);
>>> +                     s->level--;
>>> +             }
>>> +     } while (--lvl >= 0);
>>> +
>>> +     return pl->next[0];
>>>    }
>>
>> If both __i915_sched_dequeue_next and __remove_priolist are removing an
>> empty list from the hieararchy, why can't they shared some code?
> 
> The __remove_priolist does the general search and remove, whereas
> dequeue_next is trying to keep O(1) remove-from-head. dequeue_next is
> meant to be called many, many more times than __remove_priolist.

Ok.

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 54+ messages in thread

* Re: [Intel-gfx] [PATCH 01/31] drm/i915/gt: Ratelimit heartbeat completion probing
  2021-02-08 10:52 [Intel-gfx] [PATCH 01/31] drm/i915/gt: Ratelimit heartbeat completion probing Chris Wilson
                   ` (32 preceding siblings ...)
  2021-02-08 16:13 ` [Intel-gfx] ✗ Fi.CI.BAT: failure " Patchwork
@ 2021-02-09 17:52 ` Mika Kuoppala
  33 siblings, 0 replies; 54+ messages in thread
From: Mika Kuoppala @ 2021-02-09 17:52 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx; +Cc: Chris Wilson

Chris Wilson <chris@chris-wilson.co.uk> writes:

> The heartbeat runs through a few phases that we expect to complete
> within a certain number of heartbeat intervals. First we must submit the
> heartbeat to the queue, and if the queue is occupied it may take a
> couple of intervals before the heartbeat preempts the workload and is
> submitted to HW. Once running on HW, completion is not instantaneous as
> it may have to first reset the current workload before it itself runs
> through the empty request and signals completion. As such, we know that
> the heartbeat must take at least the preempt reset timeout and before we
> have had a chance to reset the engine, we do not want to issue a global
> reset ourselves (simply so that we only try to do one reset at a time
> and not confuse ourselves by resetting twice and hitting an innocent.)
>
> So by taking into consideration that once running the request must take
> a finite amount of time, we can delay the final completion check to
> accommodate that and avoid checking too early (before we've had a chance
> to handle any engine resets required).
>
> v2: Attach a callback to flush the work immediately upon the heartbeat
> completion and insert the delay before the next.
>
> Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/2853
> Suggested-by: CQ Tang <cq.tang@intel.com>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>  .../gpu/drm/i915/gt/intel_engine_heartbeat.c  | 95 ++++++++++++++++---
>  drivers/gpu/drm/i915/gt/intel_engine_types.h  |  1 +
>  .../drm/i915/gt/selftest_engine_heartbeat.c   | 65 ++++++-------
>  drivers/gpu/drm/i915/gt/selftest_execlists.c  |  5 +-
>  4 files changed, 117 insertions(+), 49 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c b/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c
> index 0b062fad1837..209a477af412 100644
> --- a/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c
> +++ b/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c
> @@ -20,6 +20,18 @@
>   * issue a reset -- in the hope that restores progress.
>   */
>  
> +#define HEARTBEAT_COMPLETION 50u /* milliseconds */
> +
> +static long completion_timeout(const struct intel_engine_cs *engine)
> +{
> +	long timeout = HEARTBEAT_COMPLETION;
> +
> +	if (intel_engine_has_preempt_reset(engine))
> +		timeout += READ_ONCE(engine->props.preempt_timeout_ms);

Was pondering that we dont add slack but the slack is in
the initial value.

> +
> +	return msecs_to_jiffies(timeout);
> +}
> +
>  static bool next_heartbeat(struct intel_engine_cs *engine)
>  {
>  	long delay;
> @@ -29,6 +41,26 @@ static bool next_heartbeat(struct intel_engine_cs *engine)
>  		return false;
>  
>  	delay = msecs_to_jiffies_timeout(delay);
> +
> +	/*
> +	 * Once we submit a heartbeat to the HW, we know that it will take
> +	 * at least a certain amount of time to complete. On a hanging system
> +	 * it will first have to wait for the preempt reset timeout, and
> +	 * then it will take some time for the reset to resume with the
> +	 * heartbeat and for it to complete. So once we have submitted the
> +	 * heartbeat to HW, we can wait a while longer before declaring the
> +	 * engine stuck and forcing a reset ourselves. If we do a reset
> +	 * and the engine is also doing a reset, it is possible that we
> +	 * reset the engine twice, harming an innocent.
> +	 *
> +	 * Before we have sumitted the heartbeat, we do not want to change

s/sumitted/submitted.

> +	 * the interval as we to promote the heartbeat and trigger preemption

s/we to/we want to/ ?
> +	 * in a deterministic time frame.
> +	 */
> +	if (engine->heartbeat.systole &&
> +	    i915_request_is_active(engine->heartbeat.systole))
> +		delay = max(delay, completion_timeout(engine));

I see no harm to just always switch to the max.

> +
>  	if (delay >= HZ)
>  		delay = round_jiffies_up_relative(delay);
>  	mod_delayed_work(system_highpri_wq, &engine->heartbeat.work, delay + 1);
> @@ -48,12 +80,49 @@ heartbeat_create(struct intel_context *ce, gfp_t gfp)
>  	return rq;
>  }
>  
> +static void defibrillator(struct dma_fence *f, struct dma_fence_cb *cb)
> +{
> +	struct intel_engine_cs *engine =
> +		container_of(cb, typeof(*engine), heartbeat.cb);
> +
> +	if (READ_ONCE(engine->heartbeat.systole))

This particular spot is not a problem but we do manipulate
the systole, without lock, in heartbeat().

So I see race in idle_pulse where we swap the systole.

-Mika

> +		mod_delayed_work(system_highpri_wq, &engine->heartbeat.work, 0);
> +}
> +
> +static void
> +untrack_heartbeat(struct intel_engine_cs *engine)
> +{
> +	struct i915_request *rq;
> +
> +	rq = fetch_and_zero(&engine->heartbeat.systole);
> +	if (!rq)
> +		return;
> +
> +	ENGINE_TRACE(engine, "heartbeat " RQ_FMT "completed\n", RQ_ARG(rq));
> +
> +	dma_fence_remove_callback(&rq->fence, &engine->heartbeat.cb);
> +	i915_request_put(rq);
> +}
> +
> +static void
> +track_heartbeat(struct intel_engine_cs *engine, struct i915_request *rq)
> +{
> +	ENGINE_TRACE(engine, "heartbeat " RQ_FMT "started\n", RQ_ARG(rq));
> +
> +	dma_fence_add_callback(&rq->fence,
> +			       &engine->heartbeat.cb,
> +			       defibrillator);
> +	engine->heartbeat.systole = i915_request_get(rq);
> +	if (!next_heartbeat(engine))
> +		untrack_heartbeat(engine);
> +}
> +
>  static void idle_pulse(struct intel_engine_cs *engine, struct i915_request *rq)
>  {
>  	engine->wakeref_serial = READ_ONCE(engine->serial) + 1;
>  	i915_request_add_active_barriers(rq);
>  	if (!engine->heartbeat.systole && intel_engine_has_heartbeat(engine))
> -		engine->heartbeat.systole = i915_request_get(rq);
> +		track_heartbeat(engine, rq);
>  }
>  
>  static void heartbeat_commit(struct i915_request *rq,
> @@ -106,13 +175,8 @@ static void heartbeat(struct work_struct *wrk)
>  	intel_engine_flush_scheduler(engine);
>  
>  	rq = engine->heartbeat.systole;
> -	if (rq && i915_request_completed(rq)) {
> -		ENGINE_TRACE(engine,
> -			     "heartbeat " RQ_FMT "completed\n",
> -			     RQ_ARG(rq));
> -		i915_request_put(rq);
> -		engine->heartbeat.systole = NULL;
> -	}
> +	if (rq && i915_request_completed(rq))
> +		untrack_heartbeat(engine);
>  
>  	if (!intel_engine_pm_get_if_awake(engine))
>  		return;
> @@ -180,6 +244,11 @@ static void heartbeat(struct work_struct *wrk)
>  		goto out;
>  	}
>  
> +	/* Just completed one heartbeat, wait a tick before the next */
> +	if (rq)
> +		goto out;
> +
> +	/* The engine is parking. We can rest until the next user */
>  	serial = READ_ONCE(engine->serial);
>  	if (engine->wakeref_serial == serial)
>  		goto out;
> @@ -198,14 +267,14 @@ static void heartbeat(struct work_struct *wrk)
>  	if (IS_ERR(rq))
>  		goto unlock;
>  
> -	ENGINE_TRACE(engine, "heartbeat " RQ_FMT "started\n", RQ_ARG(rq));
>  	heartbeat_commit(rq, &attr);
>  
>  unlock:
>  	mutex_unlock(&ce->timeline->mutex);
>  out:
> +	intel_engine_flush_scheduler(engine);
>  	if (!engine->i915->params.enable_hangcheck || !next_heartbeat(engine))
> -		i915_request_put(fetch_and_zero(&engine->heartbeat.systole));
> +		untrack_heartbeat(engine);
>  	intel_engine_pm_put(engine);
>  }
>  
> @@ -219,8 +288,10 @@ void intel_engine_unpark_heartbeat(struct intel_engine_cs *engine)
>  
>  void intel_engine_park_heartbeat(struct intel_engine_cs *engine)
>  {
> -	if (cancel_delayed_work(&engine->heartbeat.work))
> -		i915_request_put(fetch_and_zero(&engine->heartbeat.systole));
> +	/* completion may rearm work */
> +	while (cancel_delayed_work(&engine->heartbeat.work))
> +		;
> +	untrack_heartbeat(engine);
>  }
>  
>  void intel_engine_init_heartbeat(struct intel_engine_cs *engine)
> diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h
> index 7efa6290cc3e..d27a44070cb1 100644
> --- a/drivers/gpu/drm/i915/gt/intel_engine_types.h
> +++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h
> @@ -322,6 +322,7 @@ struct intel_engine_cs {
>  	struct {
>  		struct delayed_work work;
>  		struct i915_request *systole;
> +		struct dma_fence_cb cb;
>  		unsigned long blocked;
>  	} heartbeat;
>  
> diff --git a/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c b/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c
> index b2c369317bf1..812c4a168b01 100644
> --- a/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c
> +++ b/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c
> @@ -202,47 +202,44 @@ static int cmp_u32(const void *_a, const void *_b)
>  
>  static int __live_heartbeat_fast(struct intel_engine_cs *engine)
>  {
> -	const unsigned int error_threshold = max(20000u, jiffies_to_usecs(6));
> -	struct intel_context *ce;
> -	struct i915_request *rq;
> -	ktime_t t0, t1;
> +	const unsigned int error_threshold =
> +		max(3 * HEARTBEAT_COMPLETION * 1000, jiffies_to_usecs(6));
> +	struct intel_context *ce = engine->kernel_context;
>  	u32 times[5];
>  	int err;
>  	int i;
>  
> -	ce = intel_context_create(engine);
> -	if (IS_ERR(ce))
> -		return PTR_ERR(ce);
> -
>  	intel_engine_pm_get(engine);
>  
>  	err = intel_engine_set_heartbeat(engine, 1);
>  	if (err)
>  		goto err_pm;
>  
> +	flush_delayed_work(&engine->heartbeat.work);
> +	while (engine->heartbeat.systole)
> +		intel_engine_park_heartbeat(engine);
> +
>  	for (i = 0; i < ARRAY_SIZE(times); i++) {
> -		do {
> -			/* Manufacture a tick */
> -			intel_engine_park_heartbeat(engine);
> -			GEM_BUG_ON(engine->heartbeat.systole);
> -			engine->serial++; /*  pretend we are not idle! */
> -			intel_engine_unpark_heartbeat(engine);
> +		struct i915_sched_attr attr = { .priority = I915_PRIORITY_MIN };
> +		struct i915_request *rq;
> +		ktime_t t0, t1;
>  
> -			flush_delayed_work(&engine->heartbeat.work);
> -			if (!delayed_work_pending(&engine->heartbeat.work)) {
> -				pr_err("%s: heartbeat %d did not start\n",
> -				       engine->name, i);
> -				err = -EINVAL;
> -				goto err_pm;
> -			}
> +		GEM_BUG_ON(READ_ONCE(engine->heartbeat.systole));
>  
> -			rcu_read_lock();
> -			rq = READ_ONCE(engine->heartbeat.systole);
> -			if (rq)
> -				rq = i915_request_get_rcu(rq);
> -			rcu_read_unlock();
> -		} while (!rq);
> +		/* Manufacture a tick */
> +		mutex_lock(&ce->timeline->mutex);
> +		rq = heartbeat_create(ce, GFP_KERNEL);
> +		if (!IS_ERR(rq)) {
> +			i915_request_get(rq);
> +			heartbeat_commit(rq, &attr);
> +		}
> +		mutex_unlock(&ce->timeline->mutex);
> +		if (IS_ERR(rq)) {
> +			err = PTR_ERR(rq);
> +			goto err_reset;
> +		}
>  
> +		/* Time how long before the heartbeat monitor checks */
>  		t0 = ktime_get();
>  		while (rq == READ_ONCE(engine->heartbeat.systole))
>  			yield(); /* work is on the local cpu! */
> @@ -275,10 +272,10 @@ static int __live_heartbeat_fast(struct intel_engine_cs *engine)
>  		err = -EINVAL;
>  	}
>  
> +err_reset:
>  	reset_heartbeat(engine);
>  err_pm:
>  	intel_engine_pm_put(engine);
> -	intel_context_put(ce);
>  	return err;
>  }
>  
> @@ -308,20 +305,16 @@ static int __live_heartbeat_off(struct intel_engine_cs *engine)
>  
>  	intel_engine_pm_get(engine);
>  
> +	/* Kick once, so that we change an active heartbeat */
>  	engine->serial++;
> -	flush_delayed_work(&engine->heartbeat.work);
> -	if (!delayed_work_pending(&engine->heartbeat.work)) {
> -		pr_err("%s: heartbeat not running\n",
> -		       engine->name);
> -		err = -EINVAL;
> -		goto err_pm;
> -	}
> +	intel_engine_unpark_heartbeat(engine);
>  
>  	err = intel_engine_set_heartbeat(engine, 0);
>  	if (err)
>  		goto err_pm;
>  
> -	engine->serial++;
> +	/* The next heartbeat work should cancel the heartbeat */
> +	engine->serial++; /* pretend the engine is still active */
>  	flush_delayed_work(&engine->heartbeat.work);
>  	if (delayed_work_pending(&engine->heartbeat.work)) {
>  		pr_err("%s: heartbeat still running\n",
> diff --git a/drivers/gpu/drm/i915/gt/selftest_execlists.c b/drivers/gpu/drm/i915/gt/selftest_execlists.c
> index f625c29023ea..04ded3a2d491 100644
> --- a/drivers/gpu/drm/i915/gt/selftest_execlists.c
> +++ b/drivers/gpu/drm/i915/gt/selftest_execlists.c
> @@ -2325,13 +2325,16 @@ static int __cancel_fail(struct live_preempt_cancel *arg)
>  	del_timer_sync(&engine->execlists.preempt);
>  	intel_engine_flush_scheduler(engine);
>  
> +	engine->props.preempt_timeout_ms = 0;
>  	cancel_reset_timeout(engine);
>  
> -	/* after failure, require heartbeats to reset device */
> +	/* after failure, require fast heartbeats to reset device */
>  	intel_engine_set_heartbeat(engine, 1);
>  	err = wait_for_reset(engine, rq, HZ / 2);
>  	intel_engine_set_heartbeat(engine,
>  				   engine->defaults.heartbeat_interval_ms);
> +
> +	engine->props.preempt_timeout_ms = engine->defaults.preempt_timeout_ms;
>  	if (err) {
>  		pr_err("Cancelled inflight0 request did not reset\n");
>  		goto out;
> -- 
> 2.20.1
>
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/intel-gfx
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 54+ messages in thread

end of thread, other threads:[~2021-02-09 17:52 UTC | newest]

Thread overview: 54+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-02-08 10:52 [Intel-gfx] [PATCH 01/31] drm/i915/gt: Ratelimit heartbeat completion probing Chris Wilson
2021-02-08 10:52 ` [Intel-gfx] [PATCH 02/31] drm/i915: Move context revocation to scheduler Chris Wilson
2021-02-08 11:18   ` Tvrtko Ursulin
2021-02-08 10:52 ` [Intel-gfx] [PATCH 03/31] drm/i915: Introduce the scheduling mode Chris Wilson
2021-02-08 10:52 ` [Intel-gfx] [PATCH 04/31] drm/i915: Move timeslicing flag to scheduler Chris Wilson
2021-02-08 11:43   ` Tvrtko Ursulin
2021-02-08 10:52 ` [Intel-gfx] [PATCH 05/31] drm/i915/gt: Declare when we enabled timeslicing Chris Wilson
2021-02-08 11:44   ` Tvrtko Ursulin
2021-02-08 10:52 ` [Intel-gfx] [PATCH 06/31] drm/i915: Move busywaiting control to the scheduler Chris Wilson
2021-02-08 10:52 ` [Intel-gfx] [PATCH 07/31] drm/i915: Move preempt-reset flag " Chris Wilson
2021-02-08 10:52 ` [Intel-gfx] [PATCH 08/31] drm/i915: Fix the iterative dfs for defering requests Chris Wilson
2021-02-08 10:52 ` [Intel-gfx] [PATCH 09/31] drm/i915: Replace priolist rbtree with a skiplist Chris Wilson
2021-02-08 12:29   ` Tvrtko Ursulin
2021-02-08 12:46     ` Chris Wilson
2021-02-08 15:10       ` Tvrtko Ursulin
2021-02-08 15:23   ` Tvrtko Ursulin
2021-02-08 16:19     ` Chris Wilson
2021-02-09 16:11       ` Tvrtko Ursulin
2021-02-08 10:52 ` [Intel-gfx] [PATCH 10/31] drm/i915: Fair low-latency scheduling Chris Wilson
2021-02-08 14:56   ` Tvrtko Ursulin
2021-02-08 15:29     ` Chris Wilson
2021-02-08 16:03       ` Tvrtko Ursulin
2021-02-08 16:11         ` Chris Wilson
2021-02-09  9:37   ` Tvrtko Ursulin
2021-02-09 10:31     ` Chris Wilson
2021-02-09 10:40       ` Tvrtko Ursulin
2021-02-08 10:52 ` [Intel-gfx] [PATCH 11/31] drm/i915/gt: Specify a deadline for the heartbeat Chris Wilson
2021-02-08 10:52 ` [Intel-gfx] [PATCH 12/31] drm/i915: Extend the priority boosting for the display with a deadline Chris Wilson
2021-02-08 10:52 ` [Intel-gfx] [PATCH 13/31] drm/i915/gt: Support virtual engine queues Chris Wilson
2021-02-08 10:52 ` [Intel-gfx] [PATCH 14/31] drm/i915: Move saturated workload detection back to the context Chris Wilson
2021-02-08 10:52 ` [Intel-gfx] [PATCH 15/31] drm/i915: Bump default timeslicing quantum to 5ms Chris Wilson
2021-02-08 10:52 ` [Intel-gfx] [PATCH 16/31] drm/i915/gt: Delay taking irqoff for execlists submission Chris Wilson
2021-02-08 10:52 ` [Intel-gfx] [PATCH 17/31] drm/i915/gt: Convert the legacy ring submission to use the scheduling interface Chris Wilson
2021-02-08 10:52 ` [Intel-gfx] [PATCH 18/31] drm/i915/gt: Wrap intel_timeline.has_initial_breadcrumb Chris Wilson
2021-02-08 10:52 ` [Intel-gfx] [PATCH 19/31] drm/i915/gt: Track timeline GGTT offset separately from subpage offset Chris Wilson
2021-02-08 10:52 ` [Intel-gfx] [PATCH 20/31] drm/i915/gt: Add timeline "mode" Chris Wilson
2021-02-08 10:52 ` [Intel-gfx] [PATCH 21/31] drm/i915/gt: Use indices for writing into relative timelines Chris Wilson
2021-02-08 10:52 ` [Intel-gfx] [PATCH 22/31] drm/i915/selftests: Exercise relative timeline modes Chris Wilson
2021-02-08 10:52 ` [Intel-gfx] [PATCH 23/31] drm/i915/gt: Use ppHWSP for unshared non-semaphore related timelines Chris Wilson
2021-02-08 10:52 ` [Intel-gfx] [PATCH 24/31] Restore "drm/i915: drop engine_pin/unpin_breadcrumbs_irq" Chris Wilson
2021-02-08 10:52 ` [Intel-gfx] [PATCH 25/31] drm/i915/gt: Support creation of 'internal' rings Chris Wilson
2021-02-08 10:52 ` [Intel-gfx] [PATCH 26/31] drm/i915/gt: Use client timeline address for seqno writes Chris Wilson
2021-02-08 10:52 ` [Intel-gfx] [PATCH 27/31] drm/i915/gt: Infrastructure for ring scheduling Chris Wilson
2021-02-08 10:52 ` [Intel-gfx] [PATCH 28/31] drm/i915/gt: Implement ring scheduler for gen4-7 Chris Wilson
2021-02-08 10:52 ` [Intel-gfx] [PATCH 29/31] drm/i915/gt: Enable ring scheduling for gen5-7 Chris Wilson
2021-02-08 10:52 ` [Intel-gfx] [PATCH 30/31] drm/i915: Support secure dispatch on gen6/gen7 Chris Wilson
2021-02-08 20:55   ` Dave Airlie
2021-02-08 22:49     ` Chris Wilson
2021-02-09 11:02     ` Tvrtko Ursulin
2021-02-08 10:52 ` [Intel-gfx] [PATCH 31/31] drm/i915/gt: Limit C-states while waiting for requests Chris Wilson
2021-02-08 15:43 ` [Intel-gfx] ✗ Fi.CI.CHECKPATCH: warning for series starting with [01/31] drm/i915/gt: Ratelimit heartbeat completion probing Patchwork
2021-02-08 15:45 ` [Intel-gfx] ✗ Fi.CI.SPARSE: " Patchwork
2021-02-08 16:13 ` [Intel-gfx] ✗ Fi.CI.BAT: failure " Patchwork
2021-02-09 17:52 ` [Intel-gfx] [PATCH 01/31] " Mika Kuoppala

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.