All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 1/7] drm/i915/selftests: Smoketest preemption
@ 2018-09-25  8:31 Chris Wilson
  2018-09-25  8:31 ` [PATCH 2/7] drm/i915/execlists: Avoid kicking priority on the current context Chris Wilson
                   ` (13 more replies)
  0 siblings, 14 replies; 24+ messages in thread
From: Chris Wilson @ 2018-09-25  8:31 UTC (permalink / raw)
  To: intel-gfx

Very light stress test to bombard the submission backends with a large
stream with requests of randomly assigned priorities. Preemption will be
occasionally requested, but never succeed!

v2: Include a second pattern with more frequent preemption

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Michał Winiarski <michal.winiarski@intel.com>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 drivers/gpu/drm/i915/selftests/intel_lrc.c | 137 +++++++++++++++++++++
 1 file changed, 137 insertions(+)

diff --git a/drivers/gpu/drm/i915/selftests/intel_lrc.c b/drivers/gpu/drm/i915/selftests/intel_lrc.c
index 1aea7a8f2224..3a474bb64c05 100644
--- a/drivers/gpu/drm/i915/selftests/intel_lrc.c
+++ b/drivers/gpu/drm/i915/selftests/intel_lrc.c
@@ -6,6 +6,7 @@
 
 #include "../i915_selftest.h"
 #include "igt_flush_test.h"
+#include "i915_random.h"
 
 #include "mock_context.h"
 
@@ -573,6 +574,141 @@ static int live_preempt_hang(void *arg)
 	return err;
 }
 
+static int random_range(struct rnd_state *rnd, int min, int max)
+{
+	return i915_prandom_u32_max_state(max - min, rnd) + min;
+}
+
+static int random_priority(struct rnd_state *rnd)
+{
+	return random_range(rnd, I915_PRIORITY_MIN, I915_PRIORITY_MAX);
+}
+
+struct preempt_smoke {
+	struct drm_i915_private *i915;
+	struct i915_gem_context **contexts;
+	unsigned int ncontext;
+	struct rnd_state prng;
+};
+
+static struct i915_gem_context *smoke_context(struct preempt_smoke *smoke)
+{
+	return smoke->contexts[i915_prandom_u32_max_state(smoke->ncontext,
+							  &smoke->prng)];
+}
+
+static int smoke_crescendo(struct preempt_smoke *smoke)
+{
+	struct intel_engine_cs *engine;
+	enum intel_engine_id id;
+	unsigned long count;
+
+	count = 0;
+	for_each_engine(engine, smoke->i915, id) {
+		IGT_TIMEOUT(end_time);
+
+		do {
+			struct i915_gem_context *ctx = smoke_context(smoke);
+			struct i915_request *rq;
+
+			ctx->sched.priority = count % I915_PRIORITY_MAX;
+
+			rq = i915_request_alloc(engine, ctx);
+			if (IS_ERR(rq))
+				return PTR_ERR(rq);
+
+			i915_request_add(rq);
+			count++;
+		} while (!__igt_timeout(end_time, NULL));
+	}
+
+	pr_info("Submitted %lu crescendo requests across %d engines and %d contexts\n",
+		count, INTEL_INFO(smoke->i915)->num_rings, smoke->ncontext);
+	return 0;
+}
+
+static int smoke_random(struct preempt_smoke *smoke)
+{
+	struct intel_engine_cs *engine;
+	enum intel_engine_id id;
+	IGT_TIMEOUT(end_time);
+	unsigned long count;
+
+	count = 0;
+	do {
+		for_each_engine(engine, smoke->i915, id) {
+			struct i915_gem_context *ctx = smoke_context(smoke);
+			struct i915_request *rq;
+
+			ctx->sched.priority = random_priority(&smoke->prng);
+
+			rq = i915_request_alloc(engine, ctx);
+			if (IS_ERR(rq))
+				return PTR_ERR(rq);
+
+			i915_request_add(rq);
+			count++;
+		}
+	} while (!__igt_timeout(end_time, NULL));
+
+	pr_info("Submitted %lu random requests across %d engines and %d contexts\n",
+		count, INTEL_INFO(smoke->i915)->num_rings, smoke->ncontext);
+	return 0;
+}
+
+static int live_preempt_smoke(void *arg)
+{
+	struct preempt_smoke smoke = {
+		.i915 = arg,
+		.prng = I915_RND_STATE_INITIALIZER(i915_selftest.random_seed),
+		.ncontext = 1024,
+	};
+	int err = -ENOMEM;
+	int n;
+
+	if (!HAS_LOGICAL_RING_PREEMPTION(smoke.i915))
+		return 0;
+
+	smoke.contexts = kmalloc_array(smoke.ncontext,
+				       sizeof(*smoke.contexts),
+				       GFP_KERNEL);
+	if (!smoke.contexts)
+		return -ENOMEM;
+
+	mutex_lock(&smoke.i915->drm.struct_mutex);
+	intel_runtime_pm_get(smoke.i915);
+
+	for (n = 0; n < smoke.ncontext; n++) {
+		smoke.contexts[n] = kernel_context(smoke.i915);
+		if (!smoke.contexts[n])
+			goto err_ctx;
+	}
+
+	err = smoke_crescendo(&smoke);
+	if (err)
+		goto err_ctx;
+
+	err = smoke_random(&smoke);
+	if (err)
+		goto err_ctx;
+
+err_ctx:
+	if (igt_flush_test(smoke.i915, I915_WAIT_LOCKED))
+		err = -EIO;
+
+	for (n = 0; n < smoke.ncontext; n++) {
+		if (!smoke.contexts[n])
+			break;
+		kernel_context_close(smoke.contexts[n]);
+	}
+
+	intel_runtime_pm_put(smoke.i915);
+	mutex_unlock(&smoke.i915->drm.struct_mutex);
+	kfree(smoke.contexts);
+
+	return err;
+}
+
 int intel_execlists_live_selftests(struct drm_i915_private *i915)
 {
 	static const struct i915_subtest tests[] = {
@@ -580,6 +716,7 @@ int intel_execlists_live_selftests(struct drm_i915_private *i915)
 		SUBTEST(live_preempt),
 		SUBTEST(live_late_preempt),
 		SUBTEST(live_preempt_hang),
+		SUBTEST(live_preempt_smoke),
 	};
 
 	if (!HAS_EXECLISTS(i915))
-- 
2.19.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 24+ messages in thread

* [PATCH 2/7] drm/i915/execlists: Avoid kicking priority on the current context
  2018-09-25  8:31 [PATCH 1/7] drm/i915/selftests: Smoketest preemption Chris Wilson
@ 2018-09-25  8:31 ` Chris Wilson
  2018-09-25 10:14   ` Tvrtko Ursulin
  2018-09-25  8:32 ` [PATCH 3/7] drm/i915: Reserve some priority bits for internal use Chris Wilson
                   ` (12 subsequent siblings)
  13 siblings, 1 reply; 24+ messages in thread
From: Chris Wilson @ 2018-09-25  8:31 UTC (permalink / raw)
  To: intel-gfx

If the request is currently on the HW (in port 0), then we do not need
to kick the submission tasklet to evaluate whether we should be
preempting itself in order to execute it again.

In the case that was annoying me:

   execlists_schedule: rq(18:211173).prio=0 -> 2
   need_preempt: last(18:211174).prio=0, queue.prio=2

We are bumping the priority of the first of a pair of requests running
in the current context. Then when evaluating preempt, we would see that
that our priority request is higher than the last executing request in
ELSP0 and so trigger preemption, not realising that our intended request
was already executing.

v2: As we assume state of the execlists->port[] that is only valid while
we hold the timeline lock we have to repeat some earlier tests that on
the validity of the node.
v3: Wrap guc submission under the timeline.lock as is now the way of all
things.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 drivers/gpu/drm/i915/intel_guc_submission.c | 18 +++------
 drivers/gpu/drm/i915/intel_lrc.c            | 41 +++++++++++++++------
 2 files changed, 36 insertions(+), 23 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_guc_submission.c b/drivers/gpu/drm/i915/intel_guc_submission.c
index a81f04d46e87..4874a212754c 100644
--- a/drivers/gpu/drm/i915/intel_guc_submission.c
+++ b/drivers/gpu/drm/i915/intel_guc_submission.c
@@ -791,19 +791,8 @@ static bool __guc_dequeue(struct intel_engine_cs *engine)
 
 static void guc_dequeue(struct intel_engine_cs *engine)
 {
-	unsigned long flags;
-	bool submit;
-
-	local_irq_save(flags);
-
-	spin_lock(&engine->timeline.lock);
-	submit = __guc_dequeue(engine);
-	spin_unlock(&engine->timeline.lock);
-
-	if (submit)
+	if (__guc_dequeue(engine))
 		guc_submit(engine);
-
-	local_irq_restore(flags);
 }
 
 static void guc_submission_tasklet(unsigned long data)
@@ -812,6 +801,9 @@ static void guc_submission_tasklet(unsigned long data)
 	struct intel_engine_execlists * const execlists = &engine->execlists;
 	struct execlist_port *port = execlists->port;
 	struct i915_request *rq;
+	unsigned long flags;
+
+	spin_lock_irqsave(&engine->timeline.lock, flags);
 
 	rq = port_request(port);
 	while (rq && i915_request_completed(rq)) {
@@ -835,6 +827,8 @@ static void guc_submission_tasklet(unsigned long data)
 
 	if (!execlists_is_active(execlists, EXECLISTS_ACTIVE_PREEMPT))
 		guc_dequeue(engine);
+
+	spin_unlock_irqrestore(&engine->timeline.lock, flags);
 }
 
 static struct i915_request *
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 5b58c10bc600..e8de250c3413 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -356,13 +356,8 @@ execlists_unwind_incomplete_requests(struct intel_engine_execlists *execlists)
 {
 	struct intel_engine_cs *engine =
 		container_of(execlists, typeof(*engine), execlists);
-	unsigned long flags;
-
-	spin_lock_irqsave(&engine->timeline.lock, flags);
 
 	__unwind_incomplete_requests(engine);
-
-	spin_unlock_irqrestore(&engine->timeline.lock, flags);
 }
 
 static inline void
@@ -1234,9 +1229,13 @@ static void execlists_schedule(struct i915_request *request,
 
 		engine = sched_lock_engine(node, engine);
 
+		/* Recheck after acquiring the engine->timeline.lock */
 		if (prio <= node->attr.priority)
 			continue;
 
+		if (i915_sched_node_signaled(node))
+			continue;
+
 		node->attr.priority = prio;
 		if (!list_empty(&node->link)) {
 			if (last != engine) {
@@ -1245,14 +1244,34 @@ static void execlists_schedule(struct i915_request *request,
 			}
 			GEM_BUG_ON(pl->priority != prio);
 			list_move_tail(&node->link, &pl->requests);
+		} else {
+			/*
+			 * If the request is not in the priolist queue because
+			 * it is not yet runnable, then it doesn't contribute
+			 * to our preemption decisions. On the other hand,
+			 * if the request is on the HW, it too is not in the
+			 * queue; but in that case we may still need to reorder
+			 * the inflight requests.
+			 */
+			if (!i915_sw_fence_done(&sched_to_request(node)->submit))
+				continue;
 		}
 
-		if (prio > engine->execlists.queue_priority &&
-		    i915_sw_fence_done(&sched_to_request(node)->submit)) {
-			/* defer submission until after all of our updates */
-			__update_queue(engine, prio);
-			tasklet_hi_schedule(&engine->execlists.tasklet);
-		}
+		if (prio <= engine->execlists.queue_priority)
+			continue;
+
+		/*
+		 * If we are already the currently executing context, don't
+		 * bother evaluating if we should preempt ourselves.
+		 */
+		if (sched_to_request(node)->global_seqno &&
+		    i915_seqno_passed(port_request(engine->execlists.port)->global_seqno,
+				      sched_to_request(node)->global_seqno))
+			continue;
+
+		/* Defer (tasklet) submission until after all of our updates. */
+		__update_queue(engine, prio);
+		tasklet_hi_schedule(&engine->execlists.tasklet);
 	}
 
 	spin_unlock_irq(&engine->timeline.lock);
-- 
2.19.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 24+ messages in thread

* [PATCH 3/7] drm/i915: Reserve some priority bits for internal use
  2018-09-25  8:31 [PATCH 1/7] drm/i915/selftests: Smoketest preemption Chris Wilson
  2018-09-25  8:31 ` [PATCH 2/7] drm/i915/execlists: Avoid kicking priority on the current context Chris Wilson
@ 2018-09-25  8:32 ` Chris Wilson
  2018-09-25  8:32 ` [PATCH 4/7] drm/i915: Combine multiple internal plists into the same i915_priolist bucket Chris Wilson
                   ` (11 subsequent siblings)
  13 siblings, 0 replies; 24+ messages in thread
From: Chris Wilson @ 2018-09-25  8:32 UTC (permalink / raw)
  To: intel-gfx

In the next few patches, we will want to give a small priority boost to
some requests/queues but not so much that we perturb the user controlled
order. As such we will shift the user priority bits higher leaving
ourselves a few low priority bits for our internal bumping.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 drivers/gpu/drm/i915/i915_drv.h            | 2 +-
 drivers/gpu/drm/i915/i915_gem_context.c    | 9 +++++----
 drivers/gpu/drm/i915/i915_scheduler.h      | 6 ++++++
 drivers/gpu/drm/i915/selftests/intel_lrc.c | 8 +++++---
 4 files changed, 17 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 8624b4bdc242..98ca7c0d598e 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -3230,7 +3230,7 @@ int i915_gem_object_wait(struct drm_i915_gem_object *obj,
 int i915_gem_object_wait_priority(struct drm_i915_gem_object *obj,
 				  unsigned int flags,
 				  const struct i915_sched_attr *attr);
-#define I915_PRIORITY_DISPLAY I915_PRIORITY_MAX
+#define I915_PRIORITY_DISPLAY I915_USER_PRIORITY(I915_PRIORITY_MAX)
 
 int __must_check
 i915_gem_object_set_to_wc_domain(struct drm_i915_gem_object *obj, bool write);
diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
index f772593b99ab..150d7a6b2bd3 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -337,7 +337,7 @@ __create_hw_context(struct drm_i915_private *dev_priv,
 	kref_init(&ctx->ref);
 	list_add_tail(&ctx->link, &dev_priv->contexts.list);
 	ctx->i915 = dev_priv;
-	ctx->sched.priority = I915_PRIORITY_NORMAL;
+	ctx->sched.priority = I915_USER_PRIORITY(I915_PRIORITY_NORMAL);
 
 	for (n = 0; n < ARRAY_SIZE(ctx->__engine); n++) {
 		struct intel_context *ce = &ctx->__engine[n];
@@ -504,7 +504,7 @@ i915_gem_context_create_kernel(struct drm_i915_private *i915, int prio)
 	}
 
 	i915_gem_context_clear_bannable(ctx);
-	ctx->sched.priority = prio;
+	ctx->sched.priority = I915_USER_PRIORITY(prio);
 	ctx->ring_size = PAGE_SIZE;
 
 	GEM_BUG_ON(!i915_gem_context_is_kernel(ctx));
@@ -879,7 +879,7 @@ int i915_gem_context_getparam_ioctl(struct drm_device *dev, void *data,
 		args->value = i915_gem_context_is_bannable(ctx);
 		break;
 	case I915_CONTEXT_PARAM_PRIORITY:
-		args->value = ctx->sched.priority;
+		args->value = ctx->sched.priority >> I915_USER_PRIORITY_SHIFT;
 		break;
 	default:
 		ret = -EINVAL;
@@ -948,7 +948,8 @@ int i915_gem_context_setparam_ioctl(struct drm_device *dev, void *data,
 				 !capable(CAP_SYS_NICE))
 				ret = -EPERM;
 			else
-				ctx->sched.priority = priority;
+				ctx->sched.priority =
+					I915_USER_PRIORITY(priority);
 		}
 		break;
 
diff --git a/drivers/gpu/drm/i915/i915_scheduler.h b/drivers/gpu/drm/i915/i915_scheduler.h
index 70a42220358d..89d456312557 100644
--- a/drivers/gpu/drm/i915/i915_scheduler.h
+++ b/drivers/gpu/drm/i915/i915_scheduler.h
@@ -19,6 +19,12 @@ enum {
 	I915_PRIORITY_INVALID = INT_MIN
 };
 
+#define I915_USER_PRIORITY_SHIFT 0
+#define I915_USER_PRIORITY(x) ((x) << I915_USER_PRIORITY_SHIFT)
+
+#define I915_PRIORITY_COUNT BIT(I915_USER_PRIORITY_SHIFT)
+#define I915_PRIORITY_MASK (I915_PRIORITY_COUNT - 1)
+
 struct i915_sched_attr {
 	/**
 	 * @priority: execution and service priority
diff --git a/drivers/gpu/drm/i915/selftests/intel_lrc.c b/drivers/gpu/drm/i915/selftests/intel_lrc.c
index 3a474bb64c05..b8beee8551d5 100644
--- a/drivers/gpu/drm/i915/selftests/intel_lrc.c
+++ b/drivers/gpu/drm/i915/selftests/intel_lrc.c
@@ -292,12 +292,14 @@ static int live_preempt(void *arg)
 	ctx_hi = kernel_context(i915);
 	if (!ctx_hi)
 		goto err_spin_lo;
-	ctx_hi->sched.priority = I915_CONTEXT_MAX_USER_PRIORITY;
+	ctx_hi->sched.priority =
+		I915_USER_PRIORITY(I915_CONTEXT_MAX_USER_PRIORITY);
 
 	ctx_lo = kernel_context(i915);
 	if (!ctx_lo)
 		goto err_ctx_hi;
-	ctx_lo->sched.priority = I915_CONTEXT_MIN_USER_PRIORITY;
+	ctx_lo->sched.priority =
+		I915_USER_PRIORITY(I915_CONTEXT_MIN_USER_PRIORITY);
 
 	for_each_engine(engine, i915, id) {
 		struct i915_request *rq;
@@ -418,7 +420,7 @@ static int live_late_preempt(void *arg)
 			goto err_wedged;
 		}
 
-		attr.priority = I915_PRIORITY_MAX;
+		attr.priority = I915_USER_PRIORITY(I915_PRIORITY_MAX);
 		engine->schedule(rq, &attr);
 
 		if (!wait_for_spinner(&spin_hi, rq)) {
-- 
2.19.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 24+ messages in thread

* [PATCH 4/7] drm/i915: Combine multiple internal plists into the same i915_priolist bucket
  2018-09-25  8:31 [PATCH 1/7] drm/i915/selftests: Smoketest preemption Chris Wilson
  2018-09-25  8:31 ` [PATCH 2/7] drm/i915/execlists: Avoid kicking priority on the current context Chris Wilson
  2018-09-25  8:32 ` [PATCH 3/7] drm/i915: Reserve some priority bits for internal use Chris Wilson
@ 2018-09-25  8:32 ` Chris Wilson
  2018-09-25  9:48   ` Tvrtko Ursulin
  2018-09-25  8:32 ` [PATCH 5/7] drm/i915: Priority boost for new clients Chris Wilson
                   ` (10 subsequent siblings)
  13 siblings, 1 reply; 24+ messages in thread
From: Chris Wilson @ 2018-09-25  8:32 UTC (permalink / raw)
  To: intel-gfx

As we are about to allow ourselves to slightly bump the user priority
into a few different sublevels, packthose internal priority lists
into the same i915_priolist to keep the rbtree compact and avoid having
to allocate the default user priority even after the internal bumping.
The downside to having an requests[] rather than a node per active list,
is that we then have to walk over the empty higher priority lists. To
compensate, we track the active buckets and use a small bitmap to skip
over any inactive ones.

v2: Use MASK of internal levels to simplify our usage.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 drivers/gpu/drm/i915/intel_engine_cs.c      |  6 +-
 drivers/gpu/drm/i915/intel_guc_submission.c | 12 ++-
 drivers/gpu/drm/i915/intel_lrc.c            | 87 ++++++++++++++-------
 drivers/gpu/drm/i915/intel_ringbuffer.h     | 13 ++-
 4 files changed, 80 insertions(+), 38 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c
index 217ed3ee1cab..83f2f7774c1f 100644
--- a/drivers/gpu/drm/i915/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/intel_engine_cs.c
@@ -1534,10 +1534,10 @@ void intel_engine_dump(struct intel_engine_cs *engine,
 	count = 0;
 	drm_printf(m, "\t\tQueue priority: %d\n", execlists->queue_priority);
 	for (rb = rb_first_cached(&execlists->queue); rb; rb = rb_next(rb)) {
-		struct i915_priolist *p =
-			rb_entry(rb, typeof(*p), node);
+		struct i915_priolist *p = rb_entry(rb, typeof(*p), node);
+		int i;
 
-		list_for_each_entry(rq, &p->requests, sched.link) {
+		priolist_for_each_request(rq, p, i) {
 			if (count++ < MAX_REQUESTS_TO_SHOW - 1)
 				print_request(m, rq, "\t\tQ ");
 			else
diff --git a/drivers/gpu/drm/i915/intel_guc_submission.c b/drivers/gpu/drm/i915/intel_guc_submission.c
index 4874a212754c..ac862b42f6a1 100644
--- a/drivers/gpu/drm/i915/intel_guc_submission.c
+++ b/drivers/gpu/drm/i915/intel_guc_submission.c
@@ -746,30 +746,28 @@ static bool __guc_dequeue(struct intel_engine_cs *engine)
 	while ((rb = rb_first_cached(&execlists->queue))) {
 		struct i915_priolist *p = to_priolist(rb);
 		struct i915_request *rq, *rn;
+		int i;
 
-		list_for_each_entry_safe(rq, rn, &p->requests, sched.link) {
+		priolist_for_each_request_consume(rq, rn, p, i) {
 			if (last && rq->hw_context != last->hw_context) {
-				if (port == last_port) {
-					__list_del_many(&p->requests,
-							&rq->sched.link);
+				if (port == last_port)
 					goto done;
-				}
 
 				if (submit)
 					port_assign(port, last);
 				port++;
 			}
 
-			INIT_LIST_HEAD(&rq->sched.link);
+			list_del_init(&rq->sched.link);
 
 			__i915_request_submit(rq);
 			trace_i915_request_in(rq, port_index(port, execlists));
+
 			last = rq;
 			submit = true;
 		}
 
 		rb_erase_cached(&p->node, &execlists->queue);
-		INIT_LIST_HEAD(&p->requests);
 		if (p->priority != I915_PRIORITY_NORMAL)
 			kmem_cache_free(engine->i915->priorities, p);
 	}
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index e8de250c3413..b1b3f67d1120 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -259,14 +259,49 @@ intel_lr_context_descriptor_update(struct i915_gem_context *ctx,
 	ce->lrc_desc = desc;
 }
 
-static struct i915_priolist *
+static void assert_priolists(struct intel_engine_execlists * const execlists,
+			     int queue_priority)
+{
+	struct rb_node *rb;
+	int last_prio, i;
+
+	if (!IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
+		return;
+
+	GEM_BUG_ON(rb_first_cached(&execlists->queue) !=
+		   rb_first(&execlists->queue.rb_root));
+
+	last_prio = (queue_priority >> I915_USER_PRIORITY_SHIFT) + 1;
+	for (rb = rb_first_cached(&execlists->queue); rb; rb = rb_next(rb)) {
+		struct i915_priolist *p = to_priolist(rb);
+
+		GEM_BUG_ON(p->priority >= last_prio);
+		last_prio = p->priority;
+
+		GEM_BUG_ON(!p->used);
+		for (i = 0; i < ARRAY_SIZE(p->requests); i++) {
+			if (list_empty(&p->requests[i]))
+				continue;
+
+			GEM_BUG_ON(!(p->used & BIT(i)));
+		}
+	}
+}
+
+static struct list_head *
 lookup_priolist(struct intel_engine_cs *engine, int prio)
 {
 	struct intel_engine_execlists * const execlists = &engine->execlists;
 	struct i915_priolist *p;
 	struct rb_node **parent, *rb;
 	bool first = true;
+	int idx, i;
+
+	assert_priolists(execlists, INT_MAX);
 
+	/* buckets sorted from highest [in slot 0] to lowest priority */
+	idx = I915_PRIORITY_MASK - (prio & I915_PRIORITY_MASK);
+	prio >>= I915_USER_PRIORITY_SHIFT;
 	if (unlikely(execlists->no_priolist))
 		prio = I915_PRIORITY_NORMAL;
 
@@ -283,7 +318,7 @@ lookup_priolist(struct intel_engine_cs *engine, int prio)
 			parent = &rb->rb_right;
 			first = false;
 		} else {
-			return p;
+			goto out;
 		}
 	}
 
@@ -309,11 +344,15 @@ lookup_priolist(struct intel_engine_cs *engine, int prio)
 	}
 
 	p->priority = prio;
-	INIT_LIST_HEAD(&p->requests);
+	for (i = 0; i < ARRAY_SIZE(p->requests); i++)
+		INIT_LIST_HEAD(&p->requests[i]);
 	rb_link_node(&p->node, rb, parent);
 	rb_insert_color_cached(&p->node, &execlists->queue, first);
+	p->used = 0;
 
-	return p;
+out:
+	p->used |= BIT(idx);
+	return &p->requests[idx];
 }
 
 static void unwind_wa_tail(struct i915_request *rq)
@@ -325,7 +364,7 @@ static void unwind_wa_tail(struct i915_request *rq)
 static void __unwind_incomplete_requests(struct intel_engine_cs *engine)
 {
 	struct i915_request *rq, *rn;
-	struct i915_priolist *uninitialized_var(p);
+	struct list_head *uninitialized_var(pl);
 	int last_prio = I915_PRIORITY_INVALID;
 
 	lockdep_assert_held(&engine->timeline.lock);
@@ -342,12 +381,11 @@ static void __unwind_incomplete_requests(struct intel_engine_cs *engine)
 		GEM_BUG_ON(rq_prio(rq) == I915_PRIORITY_INVALID);
 		if (rq_prio(rq) != last_prio) {
 			last_prio = rq_prio(rq);
-			p = lookup_priolist(engine, last_prio);
+			pl = lookup_priolist(engine, last_prio);
 		}
 		GEM_BUG_ON(RB_EMPTY_ROOT(&engine->execlists.queue.rb_root));
 
-		GEM_BUG_ON(p->priority != rq_prio(rq));
-		list_add(&rq->sched.link, &p->requests);
+		list_add(&rq->sched.link, pl);
 	}
 }
 
@@ -665,8 +703,9 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
 	while ((rb = rb_first_cached(&execlists->queue))) {
 		struct i915_priolist *p = to_priolist(rb);
 		struct i915_request *rq, *rn;
+		int i;
 
-		list_for_each_entry_safe(rq, rn, &p->requests, sched.link) {
+		priolist_for_each_request_consume(rq, rn, p, i) {
 			/*
 			 * Can we combine this request with the current port?
 			 * It has to be the same context/ringbuffer and not
@@ -685,11 +724,8 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
 				 * combine this request with the last, then we
 				 * are done.
 				 */
-				if (port == last_port) {
-					__list_del_many(&p->requests,
-							&rq->sched.link);
+				if (port == last_port)
 					goto done;
-				}
 
 				/*
 				 * If GVT overrides us we only ever submit
@@ -699,11 +735,8 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
 				 * request) to the second port.
 				 */
 				if (ctx_single_port_submission(last->hw_context) ||
-				    ctx_single_port_submission(rq->hw_context)) {
-					__list_del_many(&p->requests,
-							&rq->sched.link);
+				    ctx_single_port_submission(rq->hw_context))
 					goto done;
-				}
 
 				GEM_BUG_ON(last->hw_context == rq->hw_context);
 
@@ -714,15 +747,16 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
 				GEM_BUG_ON(port_isset(port));
 			}
 
-			INIT_LIST_HEAD(&rq->sched.link);
+			list_del_init(&rq->sched.link);
+
 			__i915_request_submit(rq);
 			trace_i915_request_in(rq, port_index(port, execlists));
+
 			last = rq;
 			submit = true;
 		}
 
 		rb_erase_cached(&p->node, &execlists->queue);
-		INIT_LIST_HEAD(&p->requests);
 		if (p->priority != I915_PRIORITY_NORMAL)
 			kmem_cache_free(engine->i915->priorities, p);
 	}
@@ -746,6 +780,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
 	 */
 	execlists->queue_priority =
 		port != execlists->port ? rq_prio(last) : INT_MIN;
+	assert_priolists(execlists, execlists->queue_priority);
 
 	if (submit) {
 		port_assign(port, last);
@@ -857,16 +892,16 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine)
 	/* Flush the queued requests to the timeline list (for retiring). */
 	while ((rb = rb_first_cached(&execlists->queue))) {
 		struct i915_priolist *p = to_priolist(rb);
+		int i;
 
-		list_for_each_entry_safe(rq, rn, &p->requests, sched.link) {
-			INIT_LIST_HEAD(&rq->sched.link);
+		priolist_for_each_request_consume(rq, rn, p, i) {
+			list_del_init(&rq->sched.link);
 
 			dma_fence_set_error(&rq->fence, -EIO);
 			__i915_request_submit(rq);
 		}
 
 		rb_erase_cached(&p->node, &execlists->queue);
-		INIT_LIST_HEAD(&p->requests);
 		if (p->priority != I915_PRIORITY_NORMAL)
 			kmem_cache_free(engine->i915->priorities, p);
 	}
@@ -1072,8 +1107,7 @@ static void queue_request(struct intel_engine_cs *engine,
 			  struct i915_sched_node *node,
 			  int prio)
 {
-	list_add_tail(&node->link,
-		      &lookup_priolist(engine, prio)->requests);
+	list_add_tail(&node->link, lookup_priolist(engine, prio));
 }
 
 static void __update_queue(struct intel_engine_cs *engine, int prio)
@@ -1143,7 +1177,7 @@ sched_lock_engine(struct i915_sched_node *node, struct intel_engine_cs *locked)
 static void execlists_schedule(struct i915_request *request,
 			       const struct i915_sched_attr *attr)
 {
-	struct i915_priolist *uninitialized_var(pl);
+	struct list_head *uninitialized_var(pl);
 	struct intel_engine_cs *engine, *last;
 	struct i915_dependency *dep, *p;
 	struct i915_dependency stack;
@@ -1242,8 +1276,7 @@ static void execlists_schedule(struct i915_request *request,
 				pl = lookup_priolist(engine, prio);
 				last = engine;
 			}
-			GEM_BUG_ON(pl->priority != prio);
-			list_move_tail(&node->link, &pl->requests);
+			list_move_tail(&node->link, pl);
 		} else {
 			/*
 			 * If the request is not in the priolist queue because
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index 2dfa585712c2..1534de5bb852 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -190,11 +190,22 @@ enum intel_engine_id {
 };
 
 struct i915_priolist {
+	struct list_head requests[I915_PRIORITY_COUNT];
 	struct rb_node node;
-	struct list_head requests;
+	unsigned long used;
 	int priority;
 };
 
+#define priolist_for_each_request(it, plist, idx) \
+	for (idx = 0; idx < ARRAY_SIZE((plist)->requests); idx++) \
+		list_for_each_entry(it, &(plist)->requests[idx], sched.link)
+
+#define priolist_for_each_request_consume(it, n, plist, idx) \
+	for (; (idx = ffs((plist)->used)); (plist)->used &= ~BIT(idx - 1)) \
+		list_for_each_entry_safe(it, n, \
+					 &(plist)->requests[idx - 1], \
+					 sched.link)
+
 struct st_preempt_hang {
 	struct completion completion;
 	bool inject_hang;
-- 
2.19.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 24+ messages in thread

* [PATCH 5/7] drm/i915: Priority boost for new clients
  2018-09-25  8:31 [PATCH 1/7] drm/i915/selftests: Smoketest preemption Chris Wilson
                   ` (2 preceding siblings ...)
  2018-09-25  8:32 ` [PATCH 4/7] drm/i915: Combine multiple internal plists into the same i915_priolist bucket Chris Wilson
@ 2018-09-25  8:32 ` Chris Wilson
  2018-09-25  8:32 ` [PATCH 6/7] drm/i915: Pull scheduling under standalone lock Chris Wilson
                   ` (9 subsequent siblings)
  13 siblings, 0 replies; 24+ messages in thread
From: Chris Wilson @ 2018-09-25  8:32 UTC (permalink / raw)
  To: intel-gfx

Taken from an idea used for FQ_CODEL, we give the first request of a
new request flows a small priority boost. These flows are likely to
correspond with short, interactive tasks and so be more latency sensitive
than the longer free running queues. As soon as the client has more than
one request in the queue, further requests are not boosted and it settles
down into ordinary steady state behaviour.  Such small kicks dramatically
help combat the starvation issue, by allowing each client the opportunity
to run even when the system is under heavy throughput load (within the
constraints of the user selected priority).

v2: Mark the preempted request as the start of a new flow, to prevent a
single client being continually gazumped by its peers.

Testcase: igt/benchmarks/rrul
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 drivers/gpu/drm/i915/i915_request.c   | 16 ++++++++++++++--
 drivers/gpu/drm/i915/i915_scheduler.h |  4 +++-
 drivers/gpu/drm/i915/intel_lrc.c      | 25 +++++++++++++++++++------
 3 files changed, 36 insertions(+), 9 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
index a492385b2089..56140ca054e8 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -1127,8 +1127,20 @@ void i915_request_add(struct i915_request *request)
 	 */
 	local_bh_disable();
 	rcu_read_lock(); /* RCU serialisation for set-wedged protection */
-	if (engine->schedule)
-		engine->schedule(request, &request->gem_context->sched);
+	if (engine->schedule) {
+		struct i915_sched_attr attr = request->gem_context->sched;
+
+		/*
+		 * Boost priorities to new clients (new request flows).
+		 *
+		 * Allow interactive/synchronous clients to jump ahead of
+		 * the bulk clients. (FQ_CODEL)
+		 */
+		if (!prev || i915_request_completed(prev))
+			attr.priority |= I915_PRIORITY_NEWCLIENT;
+
+		engine->schedule(request, &attr);
+	}
 	rcu_read_unlock();
 	i915_sw_fence_commit(&request->submit);
 	local_bh_enable(); /* Kick the execlists tasklet if just scheduled */
diff --git a/drivers/gpu/drm/i915/i915_scheduler.h b/drivers/gpu/drm/i915/i915_scheduler.h
index 89d456312557..53dc7dbf88b9 100644
--- a/drivers/gpu/drm/i915/i915_scheduler.h
+++ b/drivers/gpu/drm/i915/i915_scheduler.h
@@ -19,12 +19,14 @@ enum {
 	I915_PRIORITY_INVALID = INT_MIN
 };
 
-#define I915_USER_PRIORITY_SHIFT 0
+#define I915_USER_PRIORITY_SHIFT 1
 #define I915_USER_PRIORITY(x) ((x) << I915_USER_PRIORITY_SHIFT)
 
 #define I915_PRIORITY_COUNT BIT(I915_USER_PRIORITY_SHIFT)
 #define I915_PRIORITY_MASK (I915_PRIORITY_COUNT - 1)
 
+#define I915_PRIORITY_NEWCLIENT	((u8)BIT(0))
+
 struct i915_sched_attr {
 	/**
 	 * @priority: execution and service priority
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index b1b3f67d1120..e882466712c6 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -363,9 +363,9 @@ static void unwind_wa_tail(struct i915_request *rq)
 
 static void __unwind_incomplete_requests(struct intel_engine_cs *engine)
 {
-	struct i915_request *rq, *rn;
+	struct i915_request *rq, *rn, *active = NULL;
 	struct list_head *uninitialized_var(pl);
-	int last_prio = I915_PRIORITY_INVALID;
+	int prio = I915_PRIORITY_INVALID | I915_PRIORITY_NEWCLIENT;
 
 	lockdep_assert_held(&engine->timeline.lock);
 
@@ -373,19 +373,32 @@ static void __unwind_incomplete_requests(struct intel_engine_cs *engine)
 					 &engine->timeline.requests,
 					 link) {
 		if (i915_request_completed(rq))
-			return;
+			break;
 
 		__i915_request_unsubmit(rq);
 		unwind_wa_tail(rq);
 
 		GEM_BUG_ON(rq_prio(rq) == I915_PRIORITY_INVALID);
-		if (rq_prio(rq) != last_prio) {
-			last_prio = rq_prio(rq);
-			pl = lookup_priolist(engine, last_prio);
+		if (rq_prio(rq) != prio) {
+			prio = rq_prio(rq);
+			pl = lookup_priolist(engine, prio);
 		}
 		GEM_BUG_ON(RB_EMPTY_ROOT(&engine->execlists.queue.rb_root));
 
 		list_add(&rq->sched.link, pl);
+
+		active = rq;
+	}
+
+	/*
+	 * The active request is now effectively the start of a new client
+	 * stream, so give it the equivalent small priority bump to prevent
+	 * it being gazumped a second time by another peer.
+	 */
+	if (!(prio & I915_PRIORITY_NEWCLIENT)) {
+		prio |= I915_PRIORITY_NEWCLIENT;
+		list_move_tail(&active->sched.link,
+			       lookup_priolist(engine, prio));
 	}
 }
 
-- 
2.19.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 24+ messages in thread

* [PATCH 6/7] drm/i915: Pull scheduling under standalone lock
  2018-09-25  8:31 [PATCH 1/7] drm/i915/selftests: Smoketest preemption Chris Wilson
                   ` (3 preceding siblings ...)
  2018-09-25  8:32 ` [PATCH 5/7] drm/i915: Priority boost for new clients Chris Wilson
@ 2018-09-25  8:32 ` Chris Wilson
  2018-09-25  8:32 ` [PATCH 7/7] drm/i915: Priority boost for waiting clients Chris Wilson
                   ` (8 subsequent siblings)
  13 siblings, 0 replies; 24+ messages in thread
From: Chris Wilson @ 2018-09-25  8:32 UTC (permalink / raw)
  To: intel-gfx

Currently, the backend scheduling code abuses struct_mutex into order to
have a global lock to manipulate a temporary list (without widespread
allocation) and to protect against list modifications. This is an
extraneous coupling to struct_mutex and further can not extend beyond
the local device.

Pull all the code that needs to be under the one true lock into
i915_scheduler.c, and make it so.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 drivers/gpu/drm/i915/Makefile           |   1 +
 drivers/gpu/drm/i915/i915_request.c     |  85 ------
 drivers/gpu/drm/i915/i915_request.h     |   8 -
 drivers/gpu/drm/i915/i915_scheduler.c   | 377 ++++++++++++++++++++++++
 drivers/gpu/drm/i915/i915_scheduler.h   |  25 ++
 drivers/gpu/drm/i915/intel_display.c    |   3 +-
 drivers/gpu/drm/i915/intel_lrc.c        | 268 +----------------
 drivers/gpu/drm/i915/intel_ringbuffer.h |   5 +-
 8 files changed, 411 insertions(+), 361 deletions(-)
 create mode 100644 drivers/gpu/drm/i915/i915_scheduler.c

diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
index 5794f102f9b8..ef1480c14e4e 100644
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -75,6 +75,7 @@ i915-y += i915_cmd_parser.o \
 	  i915_gemfs.o \
 	  i915_query.o \
 	  i915_request.o \
+	  i915_scheduler.o \
 	  i915_timeline.o \
 	  i915_trace_points.o \
 	  i915_vma.o \
diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
index 56140ca054e8..d73ad490a261 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -111,91 +111,6 @@ i915_request_remove_from_client(struct i915_request *request)
 	spin_unlock(&file_priv->mm.lock);
 }
 
-static struct i915_dependency *
-i915_dependency_alloc(struct drm_i915_private *i915)
-{
-	return kmem_cache_alloc(i915->dependencies, GFP_KERNEL);
-}
-
-static void
-i915_dependency_free(struct drm_i915_private *i915,
-		     struct i915_dependency *dep)
-{
-	kmem_cache_free(i915->dependencies, dep);
-}
-
-static void
-__i915_sched_node_add_dependency(struct i915_sched_node *node,
-				 struct i915_sched_node *signal,
-				 struct i915_dependency *dep,
-				 unsigned long flags)
-{
-	INIT_LIST_HEAD(&dep->dfs_link);
-	list_add(&dep->wait_link, &signal->waiters_list);
-	list_add(&dep->signal_link, &node->signalers_list);
-	dep->signaler = signal;
-	dep->flags = flags;
-}
-
-static int
-i915_sched_node_add_dependency(struct drm_i915_private *i915,
-			       struct i915_sched_node *node,
-			       struct i915_sched_node *signal)
-{
-	struct i915_dependency *dep;
-
-	dep = i915_dependency_alloc(i915);
-	if (!dep)
-		return -ENOMEM;
-
-	__i915_sched_node_add_dependency(node, signal, dep,
-					 I915_DEPENDENCY_ALLOC);
-	return 0;
-}
-
-static void
-i915_sched_node_fini(struct drm_i915_private *i915,
-		     struct i915_sched_node *node)
-{
-	struct i915_dependency *dep, *tmp;
-
-	GEM_BUG_ON(!list_empty(&node->link));
-
-	/*
-	 * Everyone we depended upon (the fences we wait to be signaled)
-	 * should retire before us and remove themselves from our list.
-	 * However, retirement is run independently on each timeline and
-	 * so we may be called out-of-order.
-	 */
-	list_for_each_entry_safe(dep, tmp, &node->signalers_list, signal_link) {
-		GEM_BUG_ON(!i915_sched_node_signaled(dep->signaler));
-		GEM_BUG_ON(!list_empty(&dep->dfs_link));
-
-		list_del(&dep->wait_link);
-		if (dep->flags & I915_DEPENDENCY_ALLOC)
-			i915_dependency_free(i915, dep);
-	}
-
-	/* Remove ourselves from everyone who depends upon us */
-	list_for_each_entry_safe(dep, tmp, &node->waiters_list, wait_link) {
-		GEM_BUG_ON(dep->signaler != node);
-		GEM_BUG_ON(!list_empty(&dep->dfs_link));
-
-		list_del(&dep->signal_link);
-		if (dep->flags & I915_DEPENDENCY_ALLOC)
-			i915_dependency_free(i915, dep);
-	}
-}
-
-static void
-i915_sched_node_init(struct i915_sched_node *node)
-{
-	INIT_LIST_HEAD(&node->signalers_list);
-	INIT_LIST_HEAD(&node->waiters_list);
-	INIT_LIST_HEAD(&node->link);
-	node->attr.priority = I915_PRIORITY_INVALID;
-}
-
 static int reset_all_global_seqno(struct drm_i915_private *i915, u32 seqno)
 {
 	struct intel_engine_cs *engine;
diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h
index 7fa94b024968..5f7361e0fca6 100644
--- a/drivers/gpu/drm/i915/i915_request.h
+++ b/drivers/gpu/drm/i915/i915_request.h
@@ -332,14 +332,6 @@ static inline bool i915_request_completed(const struct i915_request *rq)
 	return __i915_request_completed(rq, seqno);
 }
 
-static inline bool i915_sched_node_signaled(const struct i915_sched_node *node)
-{
-	const struct i915_request *rq =
-		container_of(node, const struct i915_request, sched);
-
-	return i915_request_completed(rq);
-}
-
 void i915_retire_requests(struct drm_i915_private *i915);
 
 /*
diff --git a/drivers/gpu/drm/i915/i915_scheduler.c b/drivers/gpu/drm/i915/i915_scheduler.c
new file mode 100644
index 000000000000..7bfc82f089e1
--- /dev/null
+++ b/drivers/gpu/drm/i915/i915_scheduler.c
@@ -0,0 +1,377 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2018 Intel Corporation
+ */
+
+#include <linux/mutex.h>
+
+#include "i915_drv.h"
+#include "i915_request.h"
+#include "i915_scheduler.h"
+
+static DEFINE_SPINLOCK(schedule_lock);
+
+static const struct i915_request *
+node_to_request(const struct i915_sched_node *node)
+{
+	return container_of(node, const struct i915_request, sched);
+}
+
+static inline bool node_signaled(const struct i915_sched_node *node)
+{
+	return i915_request_completed(node_to_request(node));
+}
+
+void i915_sched_node_init(struct i915_sched_node *node)
+{
+	INIT_LIST_HEAD(&node->signalers_list);
+	INIT_LIST_HEAD(&node->waiters_list);
+	INIT_LIST_HEAD(&node->link);
+	node->attr.priority = I915_PRIORITY_INVALID;
+}
+
+static struct i915_dependency *
+i915_dependency_alloc(struct drm_i915_private *i915)
+{
+	return kmem_cache_alloc(i915->dependencies, GFP_KERNEL);
+}
+
+static void
+i915_dependency_free(struct drm_i915_private *i915,
+		     struct i915_dependency *dep)
+{
+	kmem_cache_free(i915->dependencies, dep);
+}
+
+bool __i915_sched_node_add_dependency(struct i915_sched_node *node,
+				      struct i915_sched_node *signal,
+				      struct i915_dependency *dep,
+				      unsigned long flags)
+{
+	bool ret = false;
+
+	spin_lock(&schedule_lock);
+
+	if (!node_signaled(signal)) {
+		INIT_LIST_HEAD(&dep->dfs_link);
+		list_add(&dep->wait_link, &signal->waiters_list);
+		list_add(&dep->signal_link, &node->signalers_list);
+		dep->signaler = signal;
+		dep->flags = flags;
+
+		ret = true;
+	}
+
+	spin_unlock(&schedule_lock);
+
+	return ret;
+}
+
+int i915_sched_node_add_dependency(struct drm_i915_private *i915,
+				   struct i915_sched_node *node,
+				   struct i915_sched_node *signal)
+{
+	struct i915_dependency *dep;
+
+	dep = i915_dependency_alloc(i915);
+	if (!dep)
+		return -ENOMEM;
+
+	if (!__i915_sched_node_add_dependency(node, signal, dep,
+					      I915_DEPENDENCY_ALLOC))
+		i915_dependency_free(i915, dep);
+
+	return 0;
+}
+
+void i915_sched_node_fini(struct drm_i915_private *i915,
+			  struct i915_sched_node *node)
+{
+	struct i915_dependency *dep, *tmp;
+
+	GEM_BUG_ON(!list_empty(&node->link));
+
+	spin_lock(&schedule_lock);
+
+	/*
+	 * Everyone we depended upon (the fences we wait to be signaled)
+	 * should retire before us and remove themselves from our list.
+	 * However, retirement is run independently on each timeline and
+	 * so we may be called out-of-order.
+	 */
+	list_for_each_entry_safe(dep, tmp, &node->signalers_list, signal_link) {
+		GEM_BUG_ON(!node_signaled(dep->signaler));
+		GEM_BUG_ON(!list_empty(&dep->dfs_link));
+
+		list_del(&dep->wait_link);
+		if (dep->flags & I915_DEPENDENCY_ALLOC)
+			i915_dependency_free(i915, dep);
+	}
+
+	/* Remove ourselves from everyone who depends upon us */
+	list_for_each_entry_safe(dep, tmp, &node->waiters_list, wait_link) {
+		GEM_BUG_ON(dep->signaler != node);
+		GEM_BUG_ON(!list_empty(&dep->dfs_link));
+
+		list_del(&dep->signal_link);
+		if (dep->flags & I915_DEPENDENCY_ALLOC)
+			i915_dependency_free(i915, dep);
+	}
+
+	spin_unlock(&schedule_lock);
+}
+
+static inline struct i915_priolist *to_priolist(struct rb_node *rb)
+{
+	return rb_entry(rb, struct i915_priolist, node);
+}
+
+static void assert_priolists(struct intel_engine_execlists * const execlists,
+			     int queue_priority)
+{
+	struct rb_node *rb;
+	int last_prio, i;
+
+	if (!IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
+		return;
+
+	GEM_BUG_ON(rb_first_cached(&execlists->queue) !=
+		   rb_first(&execlists->queue.rb_root));
+
+	last_prio = (queue_priority >> I915_USER_PRIORITY_SHIFT) + 1;
+	for (rb = rb_first_cached(&execlists->queue); rb; rb = rb_next(rb)) {
+		const struct i915_priolist *p = to_priolist(rb);
+
+		GEM_BUG_ON(p->priority >= last_prio);
+		last_prio = p->priority;
+
+		GEM_BUG_ON(!p->used);
+		for (i = 0; i < ARRAY_SIZE(p->requests); i++) {
+			if (list_empty(&p->requests[i]))
+				continue;
+
+			GEM_BUG_ON(!(p->used & BIT(i)));
+		}
+	}
+}
+
+struct list_head *
+i915_sched_lookup_priolist(struct intel_engine_cs *engine, int prio)
+{
+	struct intel_engine_execlists * const execlists = &engine->execlists;
+	struct i915_priolist *p;
+	struct rb_node **parent, *rb;
+	bool first = true;
+	int idx, i;
+
+	lockdep_assert_held(&engine->timeline.lock);
+	assert_priolists(execlists, INT_MAX);
+
+	/* buckets sorted from highest [in slot 0] to lowest priority */
+	idx = I915_PRIORITY_MASK - (prio & I915_PRIORITY_MASK);
+	prio >>= I915_USER_PRIORITY_SHIFT;
+	if (unlikely(execlists->no_priolist))
+		prio = I915_PRIORITY_NORMAL;
+
+find_priolist:
+	/* most positive priority is scheduled first, equal priorities fifo */
+	rb = NULL;
+	parent = &execlists->queue.rb_root.rb_node;
+	while (*parent) {
+		rb = *parent;
+		p = to_priolist(rb);
+		if (prio > p->priority) {
+			parent = &rb->rb_left;
+		} else if (prio < p->priority) {
+			parent = &rb->rb_right;
+			first = false;
+		} else {
+			goto out;
+		}
+	}
+
+	if (prio == I915_PRIORITY_NORMAL) {
+		p = &execlists->default_priolist;
+	} else {
+		p = kmem_cache_alloc(engine->i915->priorities, GFP_ATOMIC);
+		/* Convert an allocation failure to a priority bump */
+		if (unlikely(!p)) {
+			prio = I915_PRIORITY_NORMAL; /* recurses just once */
+
+			/* To maintain ordering with all rendering, after an
+			 * allocation failure we have to disable all scheduling.
+			 * Requests will then be executed in fifo, and schedule
+			 * will ensure that dependencies are emitted in fifo.
+			 * There will be still some reordering with existing
+			 * requests, so if userspace lied about their
+			 * dependencies that reordering may be visible.
+			 */
+			execlists->no_priolist = true;
+			goto find_priolist;
+		}
+	}
+
+	p->priority = prio;
+	for (i = 0; i < ARRAY_SIZE(p->requests); i++)
+		INIT_LIST_HEAD(&p->requests[i]);
+	rb_link_node(&p->node, rb, parent);
+	rb_insert_color_cached(&p->node, &execlists->queue, first);
+	p->used = 0;
+
+out:
+	p->used |= BIT(idx);
+	return &p->requests[idx];
+}
+
+static struct intel_engine_cs *
+sched_lock_engine(struct i915_sched_node *node, struct intel_engine_cs *locked)
+{
+	struct intel_engine_cs *engine = node_to_request(node)->engine;
+
+	GEM_BUG_ON(!locked);
+
+	if (engine != locked) {
+		spin_unlock(&locked->timeline.lock);
+		spin_lock(&engine->timeline.lock);
+	}
+
+	return engine;
+}
+
+void i915_schedule(struct i915_request *rq, const struct i915_sched_attr *attr)
+{
+	struct list_head *uninitialized_var(pl);
+	struct intel_engine_cs *engine, *last;
+	struct i915_dependency *dep, *p;
+	struct i915_dependency stack;
+	const int prio = attr->priority;
+	LIST_HEAD(dfs);
+
+	GEM_BUG_ON(prio == I915_PRIORITY_INVALID);
+
+	if (i915_request_completed(rq))
+		return;
+
+	if (prio <= READ_ONCE(rq->sched.attr.priority))
+		return;
+
+	/* Needed in order to use the temporary link inside i915_dependency */
+	spin_lock(&schedule_lock);
+
+	stack.signaler = &rq->sched;
+	list_add(&stack.dfs_link, &dfs);
+
+	/*
+	 * Recursively bump all dependent priorities to match the new request.
+	 *
+	 * A naive approach would be to use recursion:
+	 * static void update_priorities(struct i915_sched_node *node, prio) {
+	 *	list_for_each_entry(dep, &node->signalers_list, signal_link)
+	 *		update_priorities(dep->signal, prio)
+	 *	queue_request(node);
+	 * }
+	 * but that may have unlimited recursion depth and so runs a very
+	 * real risk of overunning the kernel stack. Instead, we build
+	 * a flat list of all dependencies starting with the current request.
+	 * As we walk the list of dependencies, we add all of its dependencies
+	 * to the end of the list (this may include an already visited
+	 * request) and continue to walk onwards onto the new dependencies. The
+	 * end result is a topological list of requests in reverse order, the
+	 * last element in the list is the request we must execute first.
+	 */
+	list_for_each_entry(dep, &dfs, dfs_link) {
+		struct i915_sched_node *node = dep->signaler;
+
+		/*
+		 * Within an engine, there can be no cycle, but we may
+		 * refer to the same dependency chain multiple times
+		 * (redundant dependencies are not eliminated) and across
+		 * engines.
+		 */
+		list_for_each_entry(p, &node->signalers_list, signal_link) {
+			GEM_BUG_ON(p == dep); /* no cycles! */
+
+			if (node_signaled(p->signaler))
+				continue;
+
+			GEM_BUG_ON(p->signaler->attr.priority < node->attr.priority);
+			if (prio > READ_ONCE(p->signaler->attr.priority))
+				list_move_tail(&p->dfs_link, &dfs);
+		}
+	}
+
+	/*
+	 * If we didn't need to bump any existing priorities, and we haven't
+	 * yet submitted this request (i.e. there is no potential race with
+	 * execlists_submit_request()), we can set our own priority and skip
+	 * acquiring the engine locks.
+	 */
+	if (rq->sched.attr.priority == I915_PRIORITY_INVALID) {
+		GEM_BUG_ON(!list_empty(&rq->sched.link));
+		rq->sched.attr = *attr;
+
+		if (stack.dfs_link.next == stack.dfs_link.prev)
+			goto out_unlock;
+
+		__list_del_entry(&stack.dfs_link);
+	}
+
+	last = NULL;
+	engine = rq->engine;
+	spin_lock_irq(&engine->timeline.lock);
+
+	/* Fifo and depth-first replacement ensure our deps execute before us */
+	list_for_each_entry_safe_reverse(dep, p, &dfs, dfs_link) {
+		struct i915_sched_node *node = dep->signaler;
+
+		INIT_LIST_HEAD(&dep->dfs_link);
+
+		engine = sched_lock_engine(node, engine);
+
+		/* Recheck after acquiring the engine->timeline.lock */
+		if (prio <= node->attr.priority || node_signaled(node))
+			continue;
+
+		node->attr.priority = prio;
+		if (!list_empty(&node->link)) {
+			if (last != engine) {
+				pl = i915_sched_lookup_priolist(engine, prio);
+				last = engine;
+			}
+			list_move_tail(&node->link, pl);
+		} else {
+			/*
+			 * If the request is not in the priolist queue because
+			 * it is not yet runnable, then it doesn't contribute
+			 * to our preemption decisions. On the other hand,
+			 * if the request is on the HW, it too is not in the
+			 * queue; but in that case we may still need to reorder
+			 * the inflight requests.
+			 */
+			if (!i915_sw_fence_done(&node_to_request(node)->submit))
+				continue;
+		}
+
+		if (prio <= engine->execlists.queue_priority)
+			continue;
+
+		/*
+		 * If we are already the currently executing context, don't
+		 * bother evaluating if we should preempt ourselves.
+		 */
+		if (node_to_request(node)->global_seqno &&
+		    i915_seqno_passed(port_request(engine->execlists.port)->global_seqno,
+				      node_to_request(node)->global_seqno))
+			continue;
+
+		/* Defer (tasklet) submission until after all of our updates. */
+		engine->execlists.queue_priority = prio;
+		tasklet_hi_schedule(&engine->execlists.tasklet);
+	}
+
+	spin_unlock_irq(&engine->timeline.lock);
+
+out_unlock:
+	spin_unlock(&schedule_lock);
+}
diff --git a/drivers/gpu/drm/i915/i915_scheduler.h b/drivers/gpu/drm/i915/i915_scheduler.h
index 53dc7dbf88b9..68d84a45ad7f 100644
--- a/drivers/gpu/drm/i915/i915_scheduler.h
+++ b/drivers/gpu/drm/i915/i915_scheduler.h
@@ -8,9 +8,14 @@
 #define _I915_SCHEDULER_H_
 
 #include <linux/bitops.h>
+#include <linux/kernel.h>
 
 #include <uapi/drm/i915_drm.h>
 
+struct drm_i915_private;
+struct i915_request;
+struct intel_engine_cs;
+
 enum {
 	I915_PRIORITY_MIN = I915_CONTEXT_MIN_USER_PRIORITY - 1,
 	I915_PRIORITY_NORMAL = I915_CONTEXT_DEFAULT_PRIORITY,
@@ -77,4 +82,24 @@ struct i915_dependency {
 #define I915_DEPENDENCY_ALLOC BIT(0)
 };
 
+void i915_sched_node_init(struct i915_sched_node *node);
+
+bool __i915_sched_node_add_dependency(struct i915_sched_node *node,
+				      struct i915_sched_node *signal,
+				      struct i915_dependency *dep,
+				      unsigned long flags);
+
+int i915_sched_node_add_dependency(struct drm_i915_private *i915,
+				   struct i915_sched_node *node,
+				   struct i915_sched_node *signal);
+
+void i915_sched_node_fini(struct drm_i915_private *i915,
+			  struct i915_sched_node *node);
+
+void i915_schedule(struct i915_request *request,
+		   const struct i915_sched_attr *attr);
+
+struct list_head *
+i915_sched_lookup_priolist(struct intel_engine_cs *engine, int prio);
+
 #endif /* _I915_SCHEDULER_H_ */
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index 931898013506..eefd0d431731 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -13174,13 +13174,12 @@ intel_prepare_plane_fb(struct drm_plane *plane,
 
 	ret = intel_plane_pin_fb(to_intel_plane_state(new_state));
 
-	fb_obj_bump_render_priority(obj);
-
 	mutex_unlock(&dev_priv->drm.struct_mutex);
 	i915_gem_object_unpin_pages(obj);
 	if (ret)
 		return ret;
 
+	fb_obj_bump_render_priority(obj);
 	intel_fb_obj_flush(obj, ORIGIN_DIRTYFB);
 
 	if (!new_state->fence) { /* implicit fencing */
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index e882466712c6..74be9a49ef9e 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -259,102 +259,6 @@ intel_lr_context_descriptor_update(struct i915_gem_context *ctx,
 	ce->lrc_desc = desc;
 }
 
-static void assert_priolists(struct intel_engine_execlists * const execlists,
-			     int queue_priority)
-{
-	struct rb_node *rb;
-	int last_prio, i;
-
-	if (!IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
-		return;
-
-	GEM_BUG_ON(rb_first_cached(&execlists->queue) !=
-		   rb_first(&execlists->queue.rb_root));
-
-	last_prio = (queue_priority >> I915_USER_PRIORITY_SHIFT) + 1;
-	for (rb = rb_first_cached(&execlists->queue); rb; rb = rb_next(rb)) {
-		struct i915_priolist *p = to_priolist(rb);
-
-		GEM_BUG_ON(p->priority >= last_prio);
-		last_prio = p->priority;
-
-		GEM_BUG_ON(!p->used);
-		for (i = 0; i < ARRAY_SIZE(p->requests); i++) {
-			if (list_empty(&p->requests[i]))
-				continue;
-
-			GEM_BUG_ON(!(p->used & BIT(i)));
-		}
-	}
-}
-
-static struct list_head *
-lookup_priolist(struct intel_engine_cs *engine, int prio)
-{
-	struct intel_engine_execlists * const execlists = &engine->execlists;
-	struct i915_priolist *p;
-	struct rb_node **parent, *rb;
-	bool first = true;
-	int idx, i;
-
-	assert_priolists(execlists, INT_MAX);
-
-	/* buckets sorted from highest [in slot 0] to lowest priority */
-	idx = I915_PRIORITY_MASK - (prio & I915_PRIORITY_MASK);
-	prio >>= I915_USER_PRIORITY_SHIFT;
-	if (unlikely(execlists->no_priolist))
-		prio = I915_PRIORITY_NORMAL;
-
-find_priolist:
-	/* most positive priority is scheduled first, equal priorities fifo */
-	rb = NULL;
-	parent = &execlists->queue.rb_root.rb_node;
-	while (*parent) {
-		rb = *parent;
-		p = to_priolist(rb);
-		if (prio > p->priority) {
-			parent = &rb->rb_left;
-		} else if (prio < p->priority) {
-			parent = &rb->rb_right;
-			first = false;
-		} else {
-			goto out;
-		}
-	}
-
-	if (prio == I915_PRIORITY_NORMAL) {
-		p = &execlists->default_priolist;
-	} else {
-		p = kmem_cache_alloc(engine->i915->priorities, GFP_ATOMIC);
-		/* Convert an allocation failure to a priority bump */
-		if (unlikely(!p)) {
-			prio = I915_PRIORITY_NORMAL; /* recurses just once */
-
-			/* To maintain ordering with all rendering, after an
-			 * allocation failure we have to disable all scheduling.
-			 * Requests will then be executed in fifo, and schedule
-			 * will ensure that dependencies are emitted in fifo.
-			 * There will be still some reordering with existing
-			 * requests, so if userspace lied about their
-			 * dependencies that reordering may be visible.
-			 */
-			execlists->no_priolist = true;
-			goto find_priolist;
-		}
-	}
-
-	p->priority = prio;
-	for (i = 0; i < ARRAY_SIZE(p->requests); i++)
-		INIT_LIST_HEAD(&p->requests[i]);
-	rb_link_node(&p->node, rb, parent);
-	rb_insert_color_cached(&p->node, &execlists->queue, first);
-	p->used = 0;
-
-out:
-	p->used |= BIT(idx);
-	return &p->requests[idx];
-}
-
 static void unwind_wa_tail(struct i915_request *rq)
 {
 	rq->tail = intel_ring_wrap(rq->ring, rq->wa_tail - WA_TAIL_BYTES);
@@ -381,7 +285,7 @@ static void __unwind_incomplete_requests(struct intel_engine_cs *engine)
 		GEM_BUG_ON(rq_prio(rq) == I915_PRIORITY_INVALID);
 		if (rq_prio(rq) != prio) {
 			prio = rq_prio(rq);
-			pl = lookup_priolist(engine, prio);
+			pl = i915_sched_lookup_priolist(engine, prio);
 		}
 		GEM_BUG_ON(RB_EMPTY_ROOT(&engine->execlists.queue.rb_root));
 
@@ -398,7 +302,7 @@ static void __unwind_incomplete_requests(struct intel_engine_cs *engine)
 	if (!(prio & I915_PRIORITY_NEWCLIENT)) {
 		prio |= I915_PRIORITY_NEWCLIENT;
 		list_move_tail(&active->sched.link,
-			       lookup_priolist(engine, prio));
+			       i915_sched_lookup_priolist(engine, prio));
 	}
 }
 
@@ -793,7 +697,6 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
 	 */
 	execlists->queue_priority =
 		port != execlists->port ? rq_prio(last) : INT_MIN;
-	assert_priolists(execlists, execlists->queue_priority);
 
 	if (submit) {
 		port_assign(port, last);
@@ -1120,12 +1023,7 @@ static void queue_request(struct intel_engine_cs *engine,
 			  struct i915_sched_node *node,
 			  int prio)
 {
-	list_add_tail(&node->link, lookup_priolist(engine, prio));
-}
-
-static void __update_queue(struct intel_engine_cs *engine, int prio)
-{
-	engine->execlists.queue_priority = prio;
+	list_add_tail(&node->link, i915_sched_lookup_priolist(engine, prio));
 }
 
 static void __submit_queue_imm(struct intel_engine_cs *engine)
@@ -1144,7 +1042,7 @@ static void __submit_queue_imm(struct intel_engine_cs *engine)
 static void submit_queue(struct intel_engine_cs *engine, int prio)
 {
 	if (prio > engine->execlists.queue_priority) {
-		__update_queue(engine, prio);
+		engine->execlists.queue_priority = prio;
 		__submit_queue_imm(engine);
 	}
 }
@@ -1167,162 +1065,6 @@ static void execlists_submit_request(struct i915_request *request)
 	spin_unlock_irqrestore(&engine->timeline.lock, flags);
 }
 
-static struct i915_request *sched_to_request(struct i915_sched_node *node)
-{
-	return container_of(node, struct i915_request, sched);
-}
-
-static struct intel_engine_cs *
-sched_lock_engine(struct i915_sched_node *node, struct intel_engine_cs *locked)
-{
-	struct intel_engine_cs *engine = sched_to_request(node)->engine;
-
-	GEM_BUG_ON(!locked);
-
-	if (engine != locked) {
-		spin_unlock(&locked->timeline.lock);
-		spin_lock(&engine->timeline.lock);
-	}
-
-	return engine;
-}
-
-static void execlists_schedule(struct i915_request *request,
-			       const struct i915_sched_attr *attr)
-{
-	struct list_head *uninitialized_var(pl);
-	struct intel_engine_cs *engine, *last;
-	struct i915_dependency *dep, *p;
-	struct i915_dependency stack;
-	const int prio = attr->priority;
-	LIST_HEAD(dfs);
-
-	GEM_BUG_ON(prio == I915_PRIORITY_INVALID);
-
-	if (i915_request_completed(request))
-		return;
-
-	if (prio <= READ_ONCE(request->sched.attr.priority))
-		return;
-
-	/* Need BKL in order to use the temporary link inside i915_dependency */
-	lockdep_assert_held(&request->i915->drm.struct_mutex);
-
-	stack.signaler = &request->sched;
-	list_add(&stack.dfs_link, &dfs);
-
-	/*
-	 * Recursively bump all dependent priorities to match the new request.
-	 *
-	 * A naive approach would be to use recursion:
-	 * static void update_priorities(struct i915_sched_node *node, prio) {
-	 *	list_for_each_entry(dep, &node->signalers_list, signal_link)
-	 *		update_priorities(dep->signal, prio)
-	 *	queue_request(node);
-	 * }
-	 * but that may have unlimited recursion depth and so runs a very
-	 * real risk of overunning the kernel stack. Instead, we build
-	 * a flat list of all dependencies starting with the current request.
-	 * As we walk the list of dependencies, we add all of its dependencies
-	 * to the end of the list (this may include an already visited
-	 * request) and continue to walk onwards onto the new dependencies. The
-	 * end result is a topological list of requests in reverse order, the
-	 * last element in the list is the request we must execute first.
-	 */
-	list_for_each_entry(dep, &dfs, dfs_link) {
-		struct i915_sched_node *node = dep->signaler;
-
-		/*
-		 * Within an engine, there can be no cycle, but we may
-		 * refer to the same dependency chain multiple times
-		 * (redundant dependencies are not eliminated) and across
-		 * engines.
-		 */
-		list_for_each_entry(p, &node->signalers_list, signal_link) {
-			GEM_BUG_ON(p == dep); /* no cycles! */
-
-			if (i915_sched_node_signaled(p->signaler))
-				continue;
-
-			GEM_BUG_ON(p->signaler->attr.priority < node->attr.priority);
-			if (prio > READ_ONCE(p->signaler->attr.priority))
-				list_move_tail(&p->dfs_link, &dfs);
-		}
-	}
-
-	/*
-	 * If we didn't need to bump any existing priorities, and we haven't
-	 * yet submitted this request (i.e. there is no potential race with
-	 * execlists_submit_request()), we can set our own priority and skip
-	 * acquiring the engine locks.
-	 */
-	if (request->sched.attr.priority == I915_PRIORITY_INVALID) {
-		GEM_BUG_ON(!list_empty(&request->sched.link));
-		request->sched.attr = *attr;
-		if (stack.dfs_link.next == stack.dfs_link.prev)
-			return;
-		__list_del_entry(&stack.dfs_link);
-	}
-
-	last = NULL;
-	engine = request->engine;
-	spin_lock_irq(&engine->timeline.lock);
-
-	/* Fifo and depth-first replacement ensure our deps execute before us */
-	list_for_each_entry_safe_reverse(dep, p, &dfs, dfs_link) {
-		struct i915_sched_node *node = dep->signaler;
-
-		INIT_LIST_HEAD(&dep->dfs_link);
-
-		engine = sched_lock_engine(node, engine);
-
-		/* Recheck after acquiring the engine->timeline.lock */
-		if (prio <= node->attr.priority)
-			continue;
-
-		if (i915_sched_node_signaled(node))
-			continue;
-
-		node->attr.priority = prio;
-		if (!list_empty(&node->link)) {
-			if (last != engine) {
-				pl = lookup_priolist(engine, prio);
-				last = engine;
-			}
-			list_move_tail(&node->link, pl);
-		} else {
-			/*
-			 * If the request is not in the priolist queue because
-			 * it is not yet runnable, then it doesn't contribute
-			 * to our preemption decisions. On the other hand,
-			 * if the request is on the HW, it too is not in the
-			 * queue; but in that case we may still need to reorder
-			 * the inflight requests.
-			 */
-			if (!i915_sw_fence_done(&sched_to_request(node)->submit))
-				continue;
-		}
-
-		if (prio <= engine->execlists.queue_priority)
-			continue;
-
-		/*
-		 * If we are already the currently executing context, don't
-		 * bother evaluating if we should preempt ourselves.
-		 */
-		if (sched_to_request(node)->global_seqno &&
-		    i915_seqno_passed(port_request(engine->execlists.port)->global_seqno,
-				      sched_to_request(node)->global_seqno))
-			continue;
-
-		/* Defer (tasklet) submission until after all of our updates. */
-		__update_queue(engine, prio);
-		tasklet_hi_schedule(&engine->execlists.tasklet);
-	}
-
-	spin_unlock_irq(&engine->timeline.lock);
-}
-
 static void execlists_context_destroy(struct intel_context *ce)
 {
 	GEM_BUG_ON(ce->pin_count);
@@ -2360,7 +2102,7 @@ void intel_execlists_set_default_submission(struct intel_engine_cs *engine)
 {
 	engine->submit_request = execlists_submit_request;
 	engine->cancel_requests = execlists_cancel_requests;
-	engine->schedule = execlists_schedule;
+	engine->schedule = i915_schedule;
 	engine->execlists.tasklet.func = execlists_submission_tasklet;
 
 	engine->reset.prepare = execlists_reset_prepare;
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index 1534de5bb852..f6ec48a75a69 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -498,11 +498,10 @@ struct intel_engine_cs {
 	 */
 	void		(*submit_request)(struct i915_request *rq);
 
-	/* Call when the priority on a request has changed and it and its
+	/*
+	 * Call when the priority on a request has changed and it and its
 	 * dependencies may need rescheduling. Note the request itself may
 	 * not be ready to run!
-	 *
-	 * Called under the struct_mutex.
 	 */
 	void		(*schedule)(struct i915_request *request,
 				    const struct i915_sched_attr *attr);
-- 
2.19.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 24+ messages in thread

* [PATCH 7/7] drm/i915: Priority boost for waiting clients
  2018-09-25  8:31 [PATCH 1/7] drm/i915/selftests: Smoketest preemption Chris Wilson
                   ` (4 preceding siblings ...)
  2018-09-25  8:32 ` [PATCH 6/7] drm/i915: Pull scheduling under standalone lock Chris Wilson
@ 2018-09-25  8:32 ` Chris Wilson
  2018-09-25  9:04   ` [PATCH v2] " Chris Wilson
  2018-09-25  9:09 ` ✗ Fi.CI.CHECKPATCH: warning for series starting with [1/7] drm/i915/selftests: Smoketest preemption Patchwork
                   ` (7 subsequent siblings)
  13 siblings, 1 reply; 24+ messages in thread
From: Chris Wilson @ 2018-09-25  8:32 UTC (permalink / raw)
  To: intel-gfx

Latency is in the eye of the beholder. In the case where a client stops
and waits for the gpu, give that request chain a small priority boost
(not so that it overtakes higher priority clients, to preserve the
external ordering) so that ideally the wait completes earlier.

Testcase: igt/gem_sync/switch-default
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Cc: Dmitry Rogozhkin <dmitry.v.rogozhkin@intel.com>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 drivers/gpu/drm/i915/i915_gem.c       |  5 +++-
 drivers/gpu/drm/i915/i915_request.c   |  2 ++
 drivers/gpu/drm/i915/i915_request.h   |  5 ++--
 drivers/gpu/drm/i915/i915_scheduler.c | 34 ++++++++++++++++++++++-----
 drivers/gpu/drm/i915/i915_scheduler.h |  5 +++-
 5 files changed, 41 insertions(+), 10 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index db9688d14912..63fb886d0aff 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -1748,6 +1748,7 @@ i915_gem_set_domain_ioctl(struct drm_device *dev, void *data,
 	 */
 	err = i915_gem_object_wait(obj,
 				   I915_WAIT_INTERRUPTIBLE |
+				   I915_WAIT_PRIORITY |
 				   (write_domain ? I915_WAIT_ALL : 0),
 				   MAX_SCHEDULE_TIMEOUT,
 				   to_rps_client(file));
@@ -3751,7 +3752,9 @@ i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
 	start = ktime_get();
 
 	ret = i915_gem_object_wait(obj,
-				   I915_WAIT_INTERRUPTIBLE | I915_WAIT_ALL,
+				   I915_WAIT_INTERRUPTIBLE |
+				   I915_WAIT_PRIORITY |
+				   I915_WAIT_ALL,
 				   to_wait_timeout(args->timeout_ns),
 				   to_rps_client(file));
 
diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
index d73ad490a261..abd4dacbab8e 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -1237,6 +1237,8 @@ long i915_request_wait(struct i915_request *rq,
 		add_wait_queue(errq, &reset);
 
 	intel_wait_init(&wait);
+	if (flags & I915_WAIT_PRIORITY)
+		i915_schedule_bump_priority(rq, I915_PRIORITY_WAIT);
 
 restart:
 	do {
diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h
index 5f7361e0fca6..90e9d170a0cd 100644
--- a/drivers/gpu/drm/i915/i915_request.h
+++ b/drivers/gpu/drm/i915/i915_request.h
@@ -277,8 +277,9 @@ long i915_request_wait(struct i915_request *rq,
 	__attribute__((nonnull(1)));
 #define I915_WAIT_INTERRUPTIBLE	BIT(0)
 #define I915_WAIT_LOCKED	BIT(1) /* struct_mutex held, handle GPU reset */
-#define I915_WAIT_ALL		BIT(2) /* used by i915_gem_object_wait() */
-#define I915_WAIT_FOR_IDLE_BOOST BIT(3)
+#define I915_WAIT_PRIORITY	BIT(2) /* small priority bump for the request */
+#define I915_WAIT_ALL		BIT(3) /* used by i915_gem_object_wait() */
+#define I915_WAIT_FOR_IDLE_BOOST BIT(4)
 
 static inline bool intel_engine_has_started(struct intel_engine_cs *engine,
 					    u32 seqno);
diff --git a/drivers/gpu/drm/i915/i915_scheduler.c b/drivers/gpu/drm/i915/i915_scheduler.c
index 7bfc82f089e1..fcd98bf93f31 100644
--- a/drivers/gpu/drm/i915/i915_scheduler.c
+++ b/drivers/gpu/drm/i915/i915_scheduler.c
@@ -239,7 +239,8 @@ sched_lock_engine(struct i915_sched_node *node, struct intel_engine_cs *locked)
 	return engine;
 }
 
-void i915_schedule(struct i915_request *rq, const struct i915_sched_attr *attr)
+static void __i915_schedule(struct i915_request *rq,
+			    const struct i915_sched_attr *attr)
 {
 	struct list_head *uninitialized_var(pl);
 	struct intel_engine_cs *engine, *last;
@@ -248,6 +249,8 @@ void i915_schedule(struct i915_request *rq, const struct i915_sched_attr *attr)
 	const int prio = attr->priority;
 	LIST_HEAD(dfs);
 
+	/* Needed in order to use the temporary link inside i915_dependency */
+	lockdep_assert_held(&schedule_lock);
 	GEM_BUG_ON(prio == I915_PRIORITY_INVALID);
 
 	if (i915_request_completed(rq))
@@ -256,9 +259,6 @@ void i915_schedule(struct i915_request *rq, const struct i915_sched_attr *attr)
 	if (prio <= READ_ONCE(rq->sched.attr.priority))
 		return;
 
-	/* Needed in order to use the temporary link inside i915_dependency */
-	spin_lock(&schedule_lock);
-
 	stack.signaler = &rq->sched;
 	list_add(&stack.dfs_link, &dfs);
 
@@ -312,7 +312,7 @@ void i915_schedule(struct i915_request *rq, const struct i915_sched_attr *attr)
 		rq->sched.attr = *attr;
 
 		if (stack.dfs_link.next == stack.dfs_link.prev)
-			goto out_unlock;
+			return;
 
 		__list_del_entry(&stack.dfs_link);
 	}
@@ -371,7 +371,29 @@ void i915_schedule(struct i915_request *rq, const struct i915_sched_attr *attr)
 	}
 
 	spin_unlock_irq(&engine->timeline.lock);
+}
 
-out_unlock:
+void i915_schedule(struct i915_request *rq, const struct i915_sched_attr *attr)
+{
+	spin_lock(&schedule_lock);
+	__i915_schedule(rq, attr);
 	spin_unlock(&schedule_lock);
 }
+
+void i915_schedule_bump_priority(struct i915_request *rq, unsigned int bump)
+{
+	struct i915_sched_attr attr;
+
+	GEM_BUG_ON(bump & ~I915_PRIORITY_MASK);
+
+	if (READ_ONCE(rq->sched.attr.priority) == I915_PRIORITY_INVALID)
+		return;
+
+	spin_lock_bh(&schedule_lock);
+
+	attr = rq->sched.attr;
+	attr.priority |= bump;
+	__i915_schedule(rq, &attr);
+
+	spin_unlock_bh(&schedule_lock);
+}
diff --git a/drivers/gpu/drm/i915/i915_scheduler.h b/drivers/gpu/drm/i915/i915_scheduler.h
index 68d84a45ad7f..89c3462e6fa6 100644
--- a/drivers/gpu/drm/i915/i915_scheduler.h
+++ b/drivers/gpu/drm/i915/i915_scheduler.h
@@ -24,12 +24,13 @@ enum {
 	I915_PRIORITY_INVALID = INT_MIN
 };
 
-#define I915_USER_PRIORITY_SHIFT 1
+#define I915_USER_PRIORITY_SHIFT 2
 #define I915_USER_PRIORITY(x) ((x) << I915_USER_PRIORITY_SHIFT)
 
 #define I915_PRIORITY_COUNT BIT(I915_USER_PRIORITY_SHIFT)
 #define I915_PRIORITY_MASK (I915_PRIORITY_COUNT - 1)
 
+#define I915_PRIORITY_WAIT	((u8)BIT(1))
 #define I915_PRIORITY_NEWCLIENT	((u8)BIT(0))
 
 struct i915_sched_attr {
@@ -99,6 +100,8 @@ void i915_sched_node_fini(struct drm_i915_private *i915,
 void i915_schedule(struct i915_request *request,
 		   const struct i915_sched_attr *attr);
 
+void i915_schedule_bump_priority(struct i915_request *rq, unsigned int bump);
+
 struct list_head *
 i915_sched_lookup_priolist(struct intel_engine_cs *engine, int prio);
 
-- 
2.19.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 24+ messages in thread

* [PATCH v2] drm/i915: Priority boost for waiting clients
  2018-09-25  8:32 ` [PATCH 7/7] drm/i915: Priority boost for waiting clients Chris Wilson
@ 2018-09-25  9:04   ` Chris Wilson
  0 siblings, 0 replies; 24+ messages in thread
From: Chris Wilson @ 2018-09-25  9:04 UTC (permalink / raw)
  To: intel-gfx

Latency is in the eye of the beholder. In the case where a client stops
and waits for the gpu, give that request chain a small priority boost
(not so that it overtakes higher priority clients, to preserve the
external ordering) so that ideally the wait completes earlier.

v2: Tvrtko recommends to keep the boost-from-user-stall as small as
possible and to allow new client flows to be preferred for interactivity
over stalls.

Testcase: igt/gem_sync/switch-default
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Cc: Dmitry Rogozhkin <dmitry.v.rogozhkin@intel.com>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 drivers/gpu/drm/i915/i915_gem.c       |  5 +++-
 drivers/gpu/drm/i915/i915_request.c   |  2 ++
 drivers/gpu/drm/i915/i915_request.h   |  5 ++--
 drivers/gpu/drm/i915/i915_scheduler.c | 34 ++++++++++++++++++++++-----
 drivers/gpu/drm/i915/i915_scheduler.h |  7 ++++--
 5 files changed, 42 insertions(+), 11 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index db9688d14912..63fb886d0aff 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -1748,6 +1748,7 @@ i915_gem_set_domain_ioctl(struct drm_device *dev, void *data,
 	 */
 	err = i915_gem_object_wait(obj,
 				   I915_WAIT_INTERRUPTIBLE |
+				   I915_WAIT_PRIORITY |
 				   (write_domain ? I915_WAIT_ALL : 0),
 				   MAX_SCHEDULE_TIMEOUT,
 				   to_rps_client(file));
@@ -3751,7 +3752,9 @@ i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
 	start = ktime_get();
 
 	ret = i915_gem_object_wait(obj,
-				   I915_WAIT_INTERRUPTIBLE | I915_WAIT_ALL,
+				   I915_WAIT_INTERRUPTIBLE |
+				   I915_WAIT_PRIORITY |
+				   I915_WAIT_ALL,
 				   to_wait_timeout(args->timeout_ns),
 				   to_rps_client(file));
 
diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
index d73ad490a261..abd4dacbab8e 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -1237,6 +1237,8 @@ long i915_request_wait(struct i915_request *rq,
 		add_wait_queue(errq, &reset);
 
 	intel_wait_init(&wait);
+	if (flags & I915_WAIT_PRIORITY)
+		i915_schedule_bump_priority(rq, I915_PRIORITY_WAIT);
 
 restart:
 	do {
diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h
index 5f7361e0fca6..90e9d170a0cd 100644
--- a/drivers/gpu/drm/i915/i915_request.h
+++ b/drivers/gpu/drm/i915/i915_request.h
@@ -277,8 +277,9 @@ long i915_request_wait(struct i915_request *rq,
 	__attribute__((nonnull(1)));
 #define I915_WAIT_INTERRUPTIBLE	BIT(0)
 #define I915_WAIT_LOCKED	BIT(1) /* struct_mutex held, handle GPU reset */
-#define I915_WAIT_ALL		BIT(2) /* used by i915_gem_object_wait() */
-#define I915_WAIT_FOR_IDLE_BOOST BIT(3)
+#define I915_WAIT_PRIORITY	BIT(2) /* small priority bump for the request */
+#define I915_WAIT_ALL		BIT(3) /* used by i915_gem_object_wait() */
+#define I915_WAIT_FOR_IDLE_BOOST BIT(4)
 
 static inline bool intel_engine_has_started(struct intel_engine_cs *engine,
 					    u32 seqno);
diff --git a/drivers/gpu/drm/i915/i915_scheduler.c b/drivers/gpu/drm/i915/i915_scheduler.c
index 7bfc82f089e1..fcd98bf93f31 100644
--- a/drivers/gpu/drm/i915/i915_scheduler.c
+++ b/drivers/gpu/drm/i915/i915_scheduler.c
@@ -239,7 +239,8 @@ sched_lock_engine(struct i915_sched_node *node, struct intel_engine_cs *locked)
 	return engine;
 }
 
-void i915_schedule(struct i915_request *rq, const struct i915_sched_attr *attr)
+static void __i915_schedule(struct i915_request *rq,
+			    const struct i915_sched_attr *attr)
 {
 	struct list_head *uninitialized_var(pl);
 	struct intel_engine_cs *engine, *last;
@@ -248,6 +249,8 @@ void i915_schedule(struct i915_request *rq, const struct i915_sched_attr *attr)
 	const int prio = attr->priority;
 	LIST_HEAD(dfs);
 
+	/* Needed in order to use the temporary link inside i915_dependency */
+	lockdep_assert_held(&schedule_lock);
 	GEM_BUG_ON(prio == I915_PRIORITY_INVALID);
 
 	if (i915_request_completed(rq))
@@ -256,9 +259,6 @@ void i915_schedule(struct i915_request *rq, const struct i915_sched_attr *attr)
 	if (prio <= READ_ONCE(rq->sched.attr.priority))
 		return;
 
-	/* Needed in order to use the temporary link inside i915_dependency */
-	spin_lock(&schedule_lock);
-
 	stack.signaler = &rq->sched;
 	list_add(&stack.dfs_link, &dfs);
 
@@ -312,7 +312,7 @@ void i915_schedule(struct i915_request *rq, const struct i915_sched_attr *attr)
 		rq->sched.attr = *attr;
 
 		if (stack.dfs_link.next == stack.dfs_link.prev)
-			goto out_unlock;
+			return;
 
 		__list_del_entry(&stack.dfs_link);
 	}
@@ -371,7 +371,29 @@ void i915_schedule(struct i915_request *rq, const struct i915_sched_attr *attr)
 	}
 
 	spin_unlock_irq(&engine->timeline.lock);
+}
 
-out_unlock:
+void i915_schedule(struct i915_request *rq, const struct i915_sched_attr *attr)
+{
+	spin_lock(&schedule_lock);
+	__i915_schedule(rq, attr);
 	spin_unlock(&schedule_lock);
 }
+
+void i915_schedule_bump_priority(struct i915_request *rq, unsigned int bump)
+{
+	struct i915_sched_attr attr;
+
+	GEM_BUG_ON(bump & ~I915_PRIORITY_MASK);
+
+	if (READ_ONCE(rq->sched.attr.priority) == I915_PRIORITY_INVALID)
+		return;
+
+	spin_lock_bh(&schedule_lock);
+
+	attr = rq->sched.attr;
+	attr.priority |= bump;
+	__i915_schedule(rq, &attr);
+
+	spin_unlock_bh(&schedule_lock);
+}
diff --git a/drivers/gpu/drm/i915/i915_scheduler.h b/drivers/gpu/drm/i915/i915_scheduler.h
index 68d84a45ad7f..dbe9cb7ecd82 100644
--- a/drivers/gpu/drm/i915/i915_scheduler.h
+++ b/drivers/gpu/drm/i915/i915_scheduler.h
@@ -24,13 +24,14 @@ enum {
 	I915_PRIORITY_INVALID = INT_MIN
 };
 
-#define I915_USER_PRIORITY_SHIFT 1
+#define I915_USER_PRIORITY_SHIFT 2
 #define I915_USER_PRIORITY(x) ((x) << I915_USER_PRIORITY_SHIFT)
 
 #define I915_PRIORITY_COUNT BIT(I915_USER_PRIORITY_SHIFT)
 #define I915_PRIORITY_MASK (I915_PRIORITY_COUNT - 1)
 
-#define I915_PRIORITY_NEWCLIENT	((u8)BIT(0))
+#define I915_PRIORITY_WAIT	((u8)BIT(0))
+#define I915_PRIORITY_NEWCLIENT	((u8)BIT(1))
 
 struct i915_sched_attr {
 	/**
@@ -99,6 +100,8 @@ void i915_sched_node_fini(struct drm_i915_private *i915,
 void i915_schedule(struct i915_request *request,
 		   const struct i915_sched_attr *attr);
 
+void i915_schedule_bump_priority(struct i915_request *rq, unsigned int bump);
+
 struct list_head *
 i915_sched_lookup_priolist(struct intel_engine_cs *engine, int prio);
 
-- 
2.19.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 24+ messages in thread

* ✗ Fi.CI.CHECKPATCH: warning for series starting with [1/7] drm/i915/selftests: Smoketest preemption
  2018-09-25  8:31 [PATCH 1/7] drm/i915/selftests: Smoketest preemption Chris Wilson
                   ` (5 preceding siblings ...)
  2018-09-25  8:32 ` [PATCH 7/7] drm/i915: Priority boost for waiting clients Chris Wilson
@ 2018-09-25  9:09 ` Patchwork
  2018-09-25  9:12 ` ✗ Fi.CI.SPARSE: " Patchwork
                   ` (6 subsequent siblings)
  13 siblings, 0 replies; 24+ messages in thread
From: Patchwork @ 2018-09-25  9:09 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

== Series Details ==

Series: series starting with [1/7] drm/i915/selftests: Smoketest preemption
URL   : https://patchwork.freedesktop.org/series/50137/
State : warning

== Summary ==

$ dim checkpatch origin/drm-tip
94c8bbdc1288 drm/i915/selftests: Smoketest preemption
-:92: WARNING:LINE_SPACING: Missing a blank line after declarations
#92: FILE: drivers/gpu/drm/i915/selftests/intel_lrc.c:634:
+	enum intel_engine_id id;
+	IGT_TIMEOUT(end_time);

total: 0 errors, 1 warnings, 0 checks, 155 lines checked
db029ada9348 drm/i915/execlists: Avoid kicking priority on the current context
e77648982b5d drm/i915: Reserve some priority bits for internal use
33f522195a47 drm/i915: Combine multiple internal plists into the same i915_priolist bucket
-:166: WARNING:FUNCTION_ARGUMENTS: function definition argument 'pl' should also have an identifier name
#166: FILE: drivers/gpu/drm/i915/intel_lrc.c:367:
+	struct list_head *uninitialized_var(pl);

-:284: WARNING:FUNCTION_ARGUMENTS: function definition argument 'pl' should also have an identifier name
#284: FILE: drivers/gpu/drm/i915/intel_lrc.c:1180:
+	struct list_head *uninitialized_var(pl);

-:313: CHECK:MACRO_ARG_REUSE: Macro argument reuse 'plist' - possible side-effects?
#313: FILE: drivers/gpu/drm/i915/intel_ringbuffer.h:199:
+#define priolist_for_each_request(it, plist, idx) \
+	for (idx = 0; idx < ARRAY_SIZE((plist)->requests); idx++) \
+		list_for_each_entry(it, &(plist)->requests[idx], sched.link)

-:313: CHECK:MACRO_ARG_REUSE: Macro argument reuse 'idx' - possible side-effects?
#313: FILE: drivers/gpu/drm/i915/intel_ringbuffer.h:199:
+#define priolist_for_each_request(it, plist, idx) \
+	for (idx = 0; idx < ARRAY_SIZE((plist)->requests); idx++) \
+		list_for_each_entry(it, &(plist)->requests[idx], sched.link)

-:317: CHECK:MACRO_ARG_REUSE: Macro argument reuse 'plist' - possible side-effects?
#317: FILE: drivers/gpu/drm/i915/intel_ringbuffer.h:203:
+#define priolist_for_each_request_consume(it, n, plist, idx) \
+	for (; (idx = ffs((plist)->used)); (plist)->used &= ~BIT(idx - 1)) \
+		list_for_each_entry_safe(it, n, \
+					 &(plist)->requests[idx - 1], \
+					 sched.link)

-:317: CHECK:MACRO_ARG_REUSE: Macro argument reuse 'idx' - possible side-effects?
#317: FILE: drivers/gpu/drm/i915/intel_ringbuffer.h:203:
+#define priolist_for_each_request_consume(it, n, plist, idx) \
+	for (; (idx = ffs((plist)->used)); (plist)->used &= ~BIT(idx - 1)) \
+		list_for_each_entry_safe(it, n, \
+					 &(plist)->requests[idx - 1], \
+					 sched.link)

total: 0 errors, 2 warnings, 4 checks, 272 lines checked
dfcd4482c432 drm/i915: Priority boost for new clients
bb22baf4add9 drm/i915: Pull scheduling under standalone lock
-:146: WARNING:FILE_PATH_CHANGES: added, moved or deleted file(s), does MAINTAINERS need updating?
#146: 
new file mode 100644

-:151: WARNING:SPDX_LICENSE_TAG: Missing or malformed SPDX-License-Identifier tag in line 1
#151: FILE: drivers/gpu/drm/i915/i915_scheduler.c:1:
+/*

-:394: WARNING:FUNCTION_ARGUMENTS: function definition argument 'pl' should also have an identifier name
#394: FILE: drivers/gpu/drm/i915/i915_scheduler.c:244:
+	struct list_head *uninitialized_var(pl);

total: 0 errors, 3 warnings, 0 checks, 870 lines checked
000a8c39d4ba drm/i915: Priority boost for waiting clients

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 24+ messages in thread

* ✗ Fi.CI.SPARSE: warning for series starting with [1/7] drm/i915/selftests: Smoketest preemption
  2018-09-25  8:31 [PATCH 1/7] drm/i915/selftests: Smoketest preemption Chris Wilson
                   ` (6 preceding siblings ...)
  2018-09-25  9:09 ` ✗ Fi.CI.CHECKPATCH: warning for series starting with [1/7] drm/i915/selftests: Smoketest preemption Patchwork
@ 2018-09-25  9:12 ` Patchwork
  2018-09-25  9:31 ` ✓ Fi.CI.BAT: success " Patchwork
                   ` (5 subsequent siblings)
  13 siblings, 0 replies; 24+ messages in thread
From: Patchwork @ 2018-09-25  9:12 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

== Series Details ==

Series: series starting with [1/7] drm/i915/selftests: Smoketest preemption
URL   : https://patchwork.freedesktop.org/series/50137/
State : warning

== Summary ==

$ dim sparse origin/drm-tip
Commit: drm/i915/selftests: Smoketest preemption
+./include/linux/slab.h:631:13: error: undefined identifier '__builtin_mul_overflow'
+./include/linux/slab.h:631:13: warning: call with no type!

Commit: drm/i915/execlists: Avoid kicking priority on the current context
Okay!

Commit: drm/i915: Reserve some priority bits for internal use
Okay!

Commit: drm/i915: Combine multiple internal plists into the same i915_priolist bucket
Okay!

Commit: drm/i915: Priority boost for new clients
Okay!

Commit: drm/i915: Pull scheduling under standalone lock
+./include/uapi/linux/perf_event.h:147:56: warning: cast truncates bits from constant value (8000000000000000 becomes 0)

Commit: drm/i915: Priority boost for waiting clients
Okay!

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 24+ messages in thread

* ✓ Fi.CI.BAT: success for series starting with [1/7] drm/i915/selftests: Smoketest preemption
  2018-09-25  8:31 [PATCH 1/7] drm/i915/selftests: Smoketest preemption Chris Wilson
                   ` (7 preceding siblings ...)
  2018-09-25  9:12 ` ✗ Fi.CI.SPARSE: " Patchwork
@ 2018-09-25  9:31 ` Patchwork
  2018-09-25  9:39 ` [PATCH 1/7] " Tvrtko Ursulin
                   ` (4 subsequent siblings)
  13 siblings, 0 replies; 24+ messages in thread
From: Patchwork @ 2018-09-25  9:31 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

== Series Details ==

Series: series starting with [1/7] drm/i915/selftests: Smoketest preemption
URL   : https://patchwork.freedesktop.org/series/50137/
State : success

== Summary ==

= CI Bug Log - changes from CI_DRM_4870 -> Patchwork_10273 =

== Summary - SUCCESS ==

  No regressions found.

  External URL: https://patchwork.freedesktop.org/api/1.0/series/50137/revisions/1/mbox/

== Known issues ==

  Here are the changes found in Patchwork_10273 that come from known issues:

  === IGT changes ===

    ==== Issues hit ====

    igt@gem_exec_suspend@basic-s3:
      fi-blb-e6850:       PASS -> INCOMPLETE (fdo#107718)

    igt@kms_pipe_crc_basic@read-crc-pipe-a-frame-sequence:
      fi-byt-clapper:     PASS -> FAIL (fdo#103191, fdo#107362)

    
    ==== Possible fixes ====

    igt@gem_mmap_gtt@basic-small-bo-tiledy:
      fi-skl-6770hq:      DMESG-WARN (fdo#105541) -> PASS

    igt@kms_frontbuffer_tracking@basic:
      fi-byt-clapper:     FAIL (fdo#103167) -> PASS

    igt@kms_pipe_crc_basic@nonblocking-crc-pipe-a:
      fi-byt-clapper:     FAIL (fdo#107362) -> PASS

    igt@kms_pipe_crc_basic@nonblocking-crc-pipe-a-frame-sequence:
      fi-byt-clapper:     FAIL (fdo#103191, fdo#107362) -> PASS

    igt@kms_psr@primary_page_flip:
      fi-cnl-u:           FAIL (fdo#107336) -> PASS

    
  fdo#103167 https://bugs.freedesktop.org/show_bug.cgi?id=103167
  fdo#103191 https://bugs.freedesktop.org/show_bug.cgi?id=103191
  fdo#105541 https://bugs.freedesktop.org/show_bug.cgi?id=105541
  fdo#107336 https://bugs.freedesktop.org/show_bug.cgi?id=107336
  fdo#107362 https://bugs.freedesktop.org/show_bug.cgi?id=107362
  fdo#107718 https://bugs.freedesktop.org/show_bug.cgi?id=107718


== Participating hosts (46 -> 43) ==

  Additional (2): fi-kbl-7560u fi-skl-6700hq 
  Missing    (5): fi-ctg-p8600 fi-ilk-m540 fi-byt-squawks fi-bsw-cyan fi-hsw-4200u 


== Build changes ==

    * Linux: CI_DRM_4870 -> Patchwork_10273

  CI_DRM_4870: d948aaddee519f5948458c98ed918a8d9dd7db61 @ git://anongit.freedesktop.org/gfx-ci/linux
  IGT_4649: 19b0c74d20d9b53d4c82be14af0909a3b6846010 @ git://anongit.freedesktop.org/xorg/app/intel-gpu-tools
  Patchwork_10273: 000a8c39d4ba8c331e96cc66ce3e279142e80d27 @ git://anongit.freedesktop.org/gfx-ci/linux


== Linux commits ==

000a8c39d4ba drm/i915: Priority boost for waiting clients
bb22baf4add9 drm/i915: Pull scheduling under standalone lock
dfcd4482c432 drm/i915: Priority boost for new clients
33f522195a47 drm/i915: Combine multiple internal plists into the same i915_priolist bucket
e77648982b5d drm/i915: Reserve some priority bits for internal use
db029ada9348 drm/i915/execlists: Avoid kicking priority on the current context
94c8bbdc1288 drm/i915/selftests: Smoketest preemption

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_10273/issues.html
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH 1/7] drm/i915/selftests: Smoketest preemption
  2018-09-25  8:31 [PATCH 1/7] drm/i915/selftests: Smoketest preemption Chris Wilson
                   ` (8 preceding siblings ...)
  2018-09-25  9:31 ` ✓ Fi.CI.BAT: success " Patchwork
@ 2018-09-25  9:39 ` Tvrtko Ursulin
  2018-09-25 10:17   ` Chris Wilson
  2018-09-25 10:28 ` ✗ Fi.CI.CHECKPATCH: warning for series starting with [1/7] drm/i915/selftests: Smoketest preemption (rev2) Patchwork
                   ` (3 subsequent siblings)
  13 siblings, 1 reply; 24+ messages in thread
From: Tvrtko Ursulin @ 2018-09-25  9:39 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 25/09/2018 09:31, Chris Wilson wrote:
> Very light stress test to bombard the submission backends with a large
> stream with requests of randomly assigned priorities. Preemption will be
> occasionally requested, but never succeed!

Why won't it ever succeed? By design or because test is targetting some bug?

> 
> v2: Include a second pattern with more frequent preemption
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Michał Winiarski <michal.winiarski@intel.com>
> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> ---
>   drivers/gpu/drm/i915/selftests/intel_lrc.c | 137 +++++++++++++++++++++
>   1 file changed, 137 insertions(+)
> 
> diff --git a/drivers/gpu/drm/i915/selftests/intel_lrc.c b/drivers/gpu/drm/i915/selftests/intel_lrc.c
> index 1aea7a8f2224..3a474bb64c05 100644
> --- a/drivers/gpu/drm/i915/selftests/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/selftests/intel_lrc.c
> @@ -6,6 +6,7 @@
>   
>   #include "../i915_selftest.h"
>   #include "igt_flush_test.h"
> +#include "i915_random.h"
>   
>   #include "mock_context.h"
>   
> @@ -573,6 +574,141 @@ static int live_preempt_hang(void *arg)
>   	return err;
>   }
>   
> +static int random_range(struct rnd_state *rnd, int min, int max)
> +{
> +	return i915_prandom_u32_max_state(max - min, rnd) + min;
> +}
> +
> +static int random_priority(struct rnd_state *rnd)
> +{
> +	return random_range(rnd, I915_PRIORITY_MIN, I915_PRIORITY_MAX);
> +}
> +
> +struct preempt_smoke {
> +	struct drm_i915_private *i915;
> +	struct i915_gem_context **contexts;
> +	unsigned int ncontext;
> +	struct rnd_state prng;
> +};
> +
> +static struct i915_gem_context *smoke_context(struct preempt_smoke *smoke)
> +{
> +	return smoke->contexts[i915_prandom_u32_max_state(smoke->ncontext,
> +							  &smoke->prng)];
> +}
> +
> +static int smoke_crescendo(struct preempt_smoke *smoke)
> +{
> +	struct intel_engine_cs *engine;
> +	enum intel_engine_id id;
> +	unsigned long count;
> +
> +	count = 0;
> +	for_each_engine(engine, smoke->i915, id) {
> +		IGT_TIMEOUT(end_time);
> +
> +		do {
> +			struct i915_gem_context *ctx = smoke_context(smoke);
> +			struct i915_request *rq;
> +
> +			ctx->sched.priority = count % I915_PRIORITY_MAX;
> +
> +			rq = i915_request_alloc(engine, ctx);
> +			if (IS_ERR(rq))
> +				return PTR_ERR(rq);
> +
> +			i915_request_add(rq);
> +			count++;
> +		} while (!__igt_timeout(end_time, NULL));
> +	}
> +
> +	pr_info("Submitted %lu crescendo requests across %d engines and %d contexts\n",
> +		count, INTEL_INFO(smoke->i915)->num_rings, smoke->ncontext);
> +	return 0;
> +}
> +
> +static int smoke_random(struct preempt_smoke *smoke)
> +{
> +	struct intel_engine_cs *engine;
> +	enum intel_engine_id id;
> +	IGT_TIMEOUT(end_time);
> +	unsigned long count;
> +
> +	count = 0;
> +	do {
> +		for_each_engine(engine, smoke->i915, id) {
> +			struct i915_gem_context *ctx = smoke_context(smoke);
> +			struct i915_request *rq;
> +
> +			ctx->sched.priority = random_priority(&smoke->prng);
> +
> +			rq = i915_request_alloc(engine, ctx);
> +			if (IS_ERR(rq))
> +				return PTR_ERR(rq);
> +
> +			i915_request_add(rq);
> +			count++;
> +		}
> +	} while (!__igt_timeout(end_time, NULL));
> +
> +	pr_info("Submitted %lu random requests across %d engines and %d contexts\n",
> +		count, INTEL_INFO(smoke->i915)->num_rings, smoke->ncontext);
> +	return 0;
> +}

Merge smoke_crescendo and smoke_random into one which takes flags to 
decide on priority assign policy, since that seems like the only difference?

> +
> +static int live_preempt_smoke(void *arg)
> +{
> +	struct preempt_smoke smoke = {
> +		.i915 = arg,
> +		.prng = I915_RND_STATE_INITIALIZER(i915_selftest.random_seed),
> +		.ncontext = 1024,
> +	};
> +	int err = -ENOMEM;
> +	int n;
> +
> +	if (!HAS_LOGICAL_RING_PREEMPTION(smoke.i915))
> +		return 0;
> +
> +	smoke.contexts = kmalloc_array(smoke.ncontext,
> +				       sizeof(*smoke.contexts),
> +				       GFP_KERNEL);
> +	if (!smoke.contexts)
> +		return -ENOMEM;
> +
> +	mutex_lock(&smoke.i915->drm.struct_mutex);
> +	intel_runtime_pm_get(smoke.i915);
> +
> +	for (n = 0; n < smoke.ncontext; n++) {
> +		smoke.contexts[n] = kernel_context(smoke.i915);
> +		if (!smoke.contexts[n])
> +			goto err_ctx;

There isn't any request emission on context creation I think so here 
could jump to a new label which only frees.

> +	}
> +
> +	err = smoke_crescendo(&smoke);
> +	if (err)
> +		goto err_ctx;

Sync/idle/flush between subtests?

> +
> +	err = smoke_random(&smoke);
> +	if (err)
> +		goto err_ctx;
> +
> +err_ctx:
> +	if (igt_flush_test(smoke.i915, I915_WAIT_LOCKED))
> +		err = -EIO;
> +
> +	for (n = 0; n < smoke.ncontext; n++) {
> +		if (!smoke.contexts[n])
> +			break;

So GFP_ZERO or a post-clear is needed on the array. Or kcalloc.

> +		kernel_context_close(smoke.contexts[n]);
> +	}
> +
> +	intel_runtime_pm_put(smoke.i915);
> +	mutex_unlock(&smoke.i915->drm.struct_mutex);
> +	kfree(smoke.contexts);
> +
> +	return err;
> +}
> +
>   int intel_execlists_live_selftests(struct drm_i915_private *i915)
>   {
>   	static const struct i915_subtest tests[] = {
> @@ -580,6 +716,7 @@ int intel_execlists_live_selftests(struct drm_i915_private *i915)
>   		SUBTEST(live_preempt),
>   		SUBTEST(live_late_preempt),
>   		SUBTEST(live_preempt_hang),
> +		SUBTEST(live_preempt_smoke),
>   	};
>   
>   	if (!HAS_EXECLISTS(i915))
> 

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH 4/7] drm/i915: Combine multiple internal plists into the same i915_priolist bucket
  2018-09-25  8:32 ` [PATCH 4/7] drm/i915: Combine multiple internal plists into the same i915_priolist bucket Chris Wilson
@ 2018-09-25  9:48   ` Tvrtko Ursulin
  2018-09-27 13:05     ` Chris Wilson
  0 siblings, 1 reply; 24+ messages in thread
From: Tvrtko Ursulin @ 2018-09-25  9:48 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 25/09/2018 09:32, Chris Wilson wrote:
> As we are about to allow ourselves to slightly bump the user priority
> into a few different sublevels, packthose internal priority lists
> into the same i915_priolist to keep the rbtree compact and avoid having
> to allocate the default user priority even after the internal bumping.
> The downside to having an requests[] rather than a node per active list,
> is that we then have to walk over the empty higher priority lists. To
> compensate, we track the active buckets and use a small bitmap to skip
> over any inactive ones.
> 
> v2: Use MASK of internal levels to simplify our usage.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> ---
>   drivers/gpu/drm/i915/intel_engine_cs.c      |  6 +-
>   drivers/gpu/drm/i915/intel_guc_submission.c | 12 ++-
>   drivers/gpu/drm/i915/intel_lrc.c            | 87 ++++++++++++++-------
>   drivers/gpu/drm/i915/intel_ringbuffer.h     | 13 ++-
>   4 files changed, 80 insertions(+), 38 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c
> index 217ed3ee1cab..83f2f7774c1f 100644
> --- a/drivers/gpu/drm/i915/intel_engine_cs.c
> +++ b/drivers/gpu/drm/i915/intel_engine_cs.c
> @@ -1534,10 +1534,10 @@ void intel_engine_dump(struct intel_engine_cs *engine,
>   	count = 0;
>   	drm_printf(m, "\t\tQueue priority: %d\n", execlists->queue_priority);
>   	for (rb = rb_first_cached(&execlists->queue); rb; rb = rb_next(rb)) {
> -		struct i915_priolist *p =
> -			rb_entry(rb, typeof(*p), node);
> +		struct i915_priolist *p = rb_entry(rb, typeof(*p), node);
> +		int i;
>   
> -		list_for_each_entry(rq, &p->requests, sched.link) {
> +		priolist_for_each_request(rq, p, i) {
>   			if (count++ < MAX_REQUESTS_TO_SHOW - 1)
>   				print_request(m, rq, "\t\tQ ");
>   			else
> diff --git a/drivers/gpu/drm/i915/intel_guc_submission.c b/drivers/gpu/drm/i915/intel_guc_submission.c
> index 4874a212754c..ac862b42f6a1 100644
> --- a/drivers/gpu/drm/i915/intel_guc_submission.c
> +++ b/drivers/gpu/drm/i915/intel_guc_submission.c
> @@ -746,30 +746,28 @@ static bool __guc_dequeue(struct intel_engine_cs *engine)
>   	while ((rb = rb_first_cached(&execlists->queue))) {
>   		struct i915_priolist *p = to_priolist(rb);
>   		struct i915_request *rq, *rn;
> +		int i;
>   
> -		list_for_each_entry_safe(rq, rn, &p->requests, sched.link) {
> +		priolist_for_each_request_consume(rq, rn, p, i) {
>   			if (last && rq->hw_context != last->hw_context) {
> -				if (port == last_port) {
> -					__list_del_many(&p->requests,
> -							&rq->sched.link);
> +				if (port == last_port)
>   					goto done;
> -				}
>   
>   				if (submit)
>   					port_assign(port, last);
>   				port++;
>   			}
>   
> -			INIT_LIST_HEAD(&rq->sched.link);
> +			list_del_init(&rq->sched.link);
>   
>   			__i915_request_submit(rq);
>   			trace_i915_request_in(rq, port_index(port, execlists));
> +
>   			last = rq;
>   			submit = true;
>   		}
>   
>   		rb_erase_cached(&p->node, &execlists->queue);
> -		INIT_LIST_HEAD(&p->requests);
>   		if (p->priority != I915_PRIORITY_NORMAL)
>   			kmem_cache_free(engine->i915->priorities, p);
>   	}
> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> index e8de250c3413..b1b3f67d1120 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/intel_lrc.c
> @@ -259,14 +259,49 @@ intel_lr_context_descriptor_update(struct i915_gem_context *ctx,
>   	ce->lrc_desc = desc;
>   }
>   
> -static struct i915_priolist *
> +static void assert_priolists(struct intel_engine_execlists * const execlists,
> +			     int queue_priority)
> +{
> +	struct rb_node *rb;
> +	int last_prio, i;
> +
> +	if (!IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
> +		return;
> +
> +	GEM_BUG_ON(rb_first_cached(&execlists->queue) !=
> +		   rb_first(&execlists->queue.rb_root));
> +
> +	last_prio = (queue_priority >> I915_USER_PRIORITY_SHIFT) + 1;
> +	for (rb = rb_first_cached(&execlists->queue); rb; rb = rb_next(rb)) {
> +		struct i915_priolist *p = to_priolist(rb);
> +
> +		GEM_BUG_ON(p->priority >= last_prio);
> +		last_prio = p->priority;
> +
> +		GEM_BUG_ON(!p->used);
> +		for (i = 0; i < ARRAY_SIZE(p->requests); i++) {
> +			if (list_empty(&p->requests[i]))
> +				continue;
> +
> +			GEM_BUG_ON(!(p->used & BIT(i)));
> +		}
> +	}
> +}
> +
> +static struct list_head *
>   lookup_priolist(struct intel_engine_cs *engine, int prio)
>   {
>   	struct intel_engine_execlists * const execlists = &engine->execlists;
>   	struct i915_priolist *p;
>   	struct rb_node **parent, *rb;
>   	bool first = true;
> +	int idx, i;
> +
> +	assert_priolists(execlists, INT_MAX);
>   
> +	/* buckets sorted from highest [in slot 0] to lowest priority */
> +	idx = I915_PRIORITY_MASK - (prio & I915_PRIORITY_MASK);

How about:

#define I915_INTERNAL_PRIO(p) (p & I915_PRIORITY_MASK)

idx = I915_PRIORITY_COUNT - 1 - I915_INTERNAL_PRIO(prio); ?

Regards,

Tvrtko

> +	prio >>= I915_USER_PRIORITY_SHIFT;
>   	if (unlikely(execlists->no_priolist))
>   		prio = I915_PRIORITY_NORMAL;
>   
> @@ -283,7 +318,7 @@ lookup_priolist(struct intel_engine_cs *engine, int prio)
>   			parent = &rb->rb_right;
>   			first = false;
>   		} else {
> -			return p;
> +			goto out;
>   		}
>   	}
>   
> @@ -309,11 +344,15 @@ lookup_priolist(struct intel_engine_cs *engine, int prio)
>   	}
>   
>   	p->priority = prio;
> -	INIT_LIST_HEAD(&p->requests);
> +	for (i = 0; i < ARRAY_SIZE(p->requests); i++)
> +		INIT_LIST_HEAD(&p->requests[i]);
>   	rb_link_node(&p->node, rb, parent);
>   	rb_insert_color_cached(&p->node, &execlists->queue, first);
> +	p->used = 0;
>   
> -	return p;
> +out:
> +	p->used |= BIT(idx);
> +	return &p->requests[idx];
>   }
>   
>   static void unwind_wa_tail(struct i915_request *rq)
> @@ -325,7 +364,7 @@ static void unwind_wa_tail(struct i915_request *rq)
>   static void __unwind_incomplete_requests(struct intel_engine_cs *engine)
>   {
>   	struct i915_request *rq, *rn;
> -	struct i915_priolist *uninitialized_var(p);
> +	struct list_head *uninitialized_var(pl);
>   	int last_prio = I915_PRIORITY_INVALID;
>   
>   	lockdep_assert_held(&engine->timeline.lock);
> @@ -342,12 +381,11 @@ static void __unwind_incomplete_requests(struct intel_engine_cs *engine)
>   		GEM_BUG_ON(rq_prio(rq) == I915_PRIORITY_INVALID);
>   		if (rq_prio(rq) != last_prio) {
>   			last_prio = rq_prio(rq);
> -			p = lookup_priolist(engine, last_prio);
> +			pl = lookup_priolist(engine, last_prio);
>   		}
>   		GEM_BUG_ON(RB_EMPTY_ROOT(&engine->execlists.queue.rb_root));
>   
> -		GEM_BUG_ON(p->priority != rq_prio(rq));
> -		list_add(&rq->sched.link, &p->requests);
> +		list_add(&rq->sched.link, pl);
>   	}
>   }
>   
> @@ -665,8 +703,9 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
>   	while ((rb = rb_first_cached(&execlists->queue))) {
>   		struct i915_priolist *p = to_priolist(rb);
>   		struct i915_request *rq, *rn;
> +		int i;
>   
> -		list_for_each_entry_safe(rq, rn, &p->requests, sched.link) {
> +		priolist_for_each_request_consume(rq, rn, p, i) {
>   			/*
>   			 * Can we combine this request with the current port?
>   			 * It has to be the same context/ringbuffer and not
> @@ -685,11 +724,8 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
>   				 * combine this request with the last, then we
>   				 * are done.
>   				 */
> -				if (port == last_port) {
> -					__list_del_many(&p->requests,
> -							&rq->sched.link);
> +				if (port == last_port)
>   					goto done;
> -				}
>   
>   				/*
>   				 * If GVT overrides us we only ever submit
> @@ -699,11 +735,8 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
>   				 * request) to the second port.
>   				 */
>   				if (ctx_single_port_submission(last->hw_context) ||
> -				    ctx_single_port_submission(rq->hw_context)) {
> -					__list_del_many(&p->requests,
> -							&rq->sched.link);
> +				    ctx_single_port_submission(rq->hw_context))
>   					goto done;
> -				}
>   
>   				GEM_BUG_ON(last->hw_context == rq->hw_context);
>   
> @@ -714,15 +747,16 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
>   				GEM_BUG_ON(port_isset(port));
>   			}
>   
> -			INIT_LIST_HEAD(&rq->sched.link);
> +			list_del_init(&rq->sched.link);
> +
>   			__i915_request_submit(rq);
>   			trace_i915_request_in(rq, port_index(port, execlists));
> +
>   			last = rq;
>   			submit = true;
>   		}
>   
>   		rb_erase_cached(&p->node, &execlists->queue);
> -		INIT_LIST_HEAD(&p->requests);
>   		if (p->priority != I915_PRIORITY_NORMAL)
>   			kmem_cache_free(engine->i915->priorities, p);
>   	}
> @@ -746,6 +780,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
>   	 */
>   	execlists->queue_priority =
>   		port != execlists->port ? rq_prio(last) : INT_MIN;
> +	assert_priolists(execlists, execlists->queue_priority);
>   
>   	if (submit) {
>   		port_assign(port, last);
> @@ -857,16 +892,16 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine)
>   	/* Flush the queued requests to the timeline list (for retiring). */
>   	while ((rb = rb_first_cached(&execlists->queue))) {
>   		struct i915_priolist *p = to_priolist(rb);
> +		int i;
>   
> -		list_for_each_entry_safe(rq, rn, &p->requests, sched.link) {
> -			INIT_LIST_HEAD(&rq->sched.link);
> +		priolist_for_each_request_consume(rq, rn, p, i) {
> +			list_del_init(&rq->sched.link);
>   
>   			dma_fence_set_error(&rq->fence, -EIO);
>   			__i915_request_submit(rq);
>   		}
>   
>   		rb_erase_cached(&p->node, &execlists->queue);
> -		INIT_LIST_HEAD(&p->requests);
>   		if (p->priority != I915_PRIORITY_NORMAL)
>   			kmem_cache_free(engine->i915->priorities, p);
>   	}
> @@ -1072,8 +1107,7 @@ static void queue_request(struct intel_engine_cs *engine,
>   			  struct i915_sched_node *node,
>   			  int prio)
>   {
> -	list_add_tail(&node->link,
> -		      &lookup_priolist(engine, prio)->requests);
> +	list_add_tail(&node->link, lookup_priolist(engine, prio));
>   }
>   
>   static void __update_queue(struct intel_engine_cs *engine, int prio)
> @@ -1143,7 +1177,7 @@ sched_lock_engine(struct i915_sched_node *node, struct intel_engine_cs *locked)
>   static void execlists_schedule(struct i915_request *request,
>   			       const struct i915_sched_attr *attr)
>   {
> -	struct i915_priolist *uninitialized_var(pl);
> +	struct list_head *uninitialized_var(pl);
>   	struct intel_engine_cs *engine, *last;
>   	struct i915_dependency *dep, *p;
>   	struct i915_dependency stack;
> @@ -1242,8 +1276,7 @@ static void execlists_schedule(struct i915_request *request,
>   				pl = lookup_priolist(engine, prio);
>   				last = engine;
>   			}
> -			GEM_BUG_ON(pl->priority != prio);
> -			list_move_tail(&node->link, &pl->requests);
> +			list_move_tail(&node->link, pl);
>   		} else {
>   			/*
>   			 * If the request is not in the priolist queue because
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
> index 2dfa585712c2..1534de5bb852 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.h
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
> @@ -190,11 +190,22 @@ enum intel_engine_id {
>   };
>   
>   struct i915_priolist {
> +	struct list_head requests[I915_PRIORITY_COUNT];
>   	struct rb_node node;
> -	struct list_head requests;
> +	unsigned long used;
>   	int priority;
>   };
>   
> +#define priolist_for_each_request(it, plist, idx) \
> +	for (idx = 0; idx < ARRAY_SIZE((plist)->requests); idx++) \
> +		list_for_each_entry(it, &(plist)->requests[idx], sched.link)
> +
> +#define priolist_for_each_request_consume(it, n, plist, idx) \
> +	for (; (idx = ffs((plist)->used)); (plist)->used &= ~BIT(idx - 1)) \
> +		list_for_each_entry_safe(it, n, \
> +					 &(plist)->requests[idx - 1], \
> +					 sched.link)
> +
>   struct st_preempt_hang {
>   	struct completion completion;
>   	bool inject_hang;
> 
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH 2/7] drm/i915/execlists: Avoid kicking priority on the current context
  2018-09-25  8:31 ` [PATCH 2/7] drm/i915/execlists: Avoid kicking priority on the current context Chris Wilson
@ 2018-09-25 10:14   ` Tvrtko Ursulin
  0 siblings, 0 replies; 24+ messages in thread
From: Tvrtko Ursulin @ 2018-09-25 10:14 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 25/09/2018 09:31, Chris Wilson wrote:
> If the request is currently on the HW (in port 0), then we do not need
> to kick the submission tasklet to evaluate whether we should be
> preempting itself in order to execute it again.
> 
> In the case that was annoying me:
> 
>     execlists_schedule: rq(18:211173).prio=0 -> 2
>     need_preempt: last(18:211174).prio=0, queue.prio=2
> 
> We are bumping the priority of the first of a pair of requests running
> in the current context. Then when evaluating preempt, we would see that
> that our priority request is higher than the last executing request in
> ELSP0 and so trigger preemption, not realising that our intended request
> was already executing.
> 
> v2: As we assume state of the execlists->port[] that is only valid while
> we hold the timeline lock we have to repeat some earlier tests that on
> the validity of the node.
> v3: Wrap guc submission under the timeline.lock as is now the way of all
> things.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> ---
>   drivers/gpu/drm/i915/intel_guc_submission.c | 18 +++------
>   drivers/gpu/drm/i915/intel_lrc.c            | 41 +++++++++++++++------
>   2 files changed, 36 insertions(+), 23 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/intel_guc_submission.c b/drivers/gpu/drm/i915/intel_guc_submission.c
> index a81f04d46e87..4874a212754c 100644
> --- a/drivers/gpu/drm/i915/intel_guc_submission.c
> +++ b/drivers/gpu/drm/i915/intel_guc_submission.c
> @@ -791,19 +791,8 @@ static bool __guc_dequeue(struct intel_engine_cs *engine)
>   
>   static void guc_dequeue(struct intel_engine_cs *engine)
>   {
> -	unsigned long flags;
> -	bool submit;
> -
> -	local_irq_save(flags);
> -
> -	spin_lock(&engine->timeline.lock);
> -	submit = __guc_dequeue(engine);
> -	spin_unlock(&engine->timeline.lock);
> -
> -	if (submit)
> +	if (__guc_dequeue(engine))
>   		guc_submit(engine);
> -
> -	local_irq_restore(flags);
>   }
>   
>   static void guc_submission_tasklet(unsigned long data)
> @@ -812,6 +801,9 @@ static void guc_submission_tasklet(unsigned long data)
>   	struct intel_engine_execlists * const execlists = &engine->execlists;
>   	struct execlist_port *port = execlists->port;
>   	struct i915_request *rq;
> +	unsigned long flags;
> +
> +	spin_lock_irqsave(&engine->timeline.lock, flags);
>   
>   	rq = port_request(port);
>   	while (rq && i915_request_completed(rq)) {
> @@ -835,6 +827,8 @@ static void guc_submission_tasklet(unsigned long data)
>   
>   	if (!execlists_is_active(execlists, EXECLISTS_ACTIVE_PREEMPT))
>   		guc_dequeue(engine);
> +
> +	spin_unlock_irqrestore(&engine->timeline.lock, flags);
>   }
>   
>   static struct i915_request *
> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> index 5b58c10bc600..e8de250c3413 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/intel_lrc.c
> @@ -356,13 +356,8 @@ execlists_unwind_incomplete_requests(struct intel_engine_execlists *execlists)
>   {
>   	struct intel_engine_cs *engine =
>   		container_of(execlists, typeof(*engine), execlists);
> -	unsigned long flags;
> -
> -	spin_lock_irqsave(&engine->timeline.lock, flags);
>   
>   	__unwind_incomplete_requests(engine);
> -
> -	spin_unlock_irqrestore(&engine->timeline.lock, flags);
>   }

Could move to header as static inline since it is a trivial wrapper now.

>   
>   static inline void
> @@ -1234,9 +1229,13 @@ static void execlists_schedule(struct i915_request *request,
>   
>   		engine = sched_lock_engine(node, engine);
>   
> +		/* Recheck after acquiring the engine->timeline.lock */
>   		if (prio <= node->attr.priority)
>   			continue;
>   
> +		if (i915_sched_node_signaled(node))
> +			continue;
> +
>   		node->attr.priority = prio;
>   		if (!list_empty(&node->link)) {
>   			if (last != engine) {
> @@ -1245,14 +1244,34 @@ static void execlists_schedule(struct i915_request *request,
>   			}
>   			GEM_BUG_ON(pl->priority != prio);
>   			list_move_tail(&node->link, &pl->requests);
> +		} else {
> +			/*
> +			 * If the request is not in the priolist queue because
> +			 * it is not yet runnable, then it doesn't contribute
> +			 * to our preemption decisions. On the other hand,
> +			 * if the request is on the HW, it too is not in the
> +			 * queue; but in that case we may still need to reorder
> +			 * the inflight requests.
> +			 */
> +			if (!i915_sw_fence_done(&sched_to_request(node)->submit))
> +				continue;
>   		}
>   
> -		if (prio > engine->execlists.queue_priority &&
> -		    i915_sw_fence_done(&sched_to_request(node)->submit)) {
> -			/* defer submission until after all of our updates */
> -			__update_queue(engine, prio);
> -			tasklet_hi_schedule(&engine->execlists.tasklet);
> -		}
> +		if (prio <= engine->execlists.queue_priority)
> +			continue;
> +
> +		/*
> +		 * If we are already the currently executing context, don't
> +		 * bother evaluating if we should preempt ourselves.
> +		 */
> +		if (sched_to_request(node)->global_seqno &&
> +		    i915_seqno_passed(port_request(engine->execlists.port)->global_seqno,
> +				      sched_to_request(node)->global_seqno))
> +			continue;
> +
> +		/* Defer (tasklet) submission until after all of our updates. */
> +		__update_queue(engine, prio);
> +		tasklet_hi_schedule(&engine->execlists.tasklet);
>   	}
>   
>   	spin_unlock_irq(&engine->timeline.lock);
> 

Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH 1/7] drm/i915/selftests: Smoketest preemption
  2018-09-25  9:39 ` [PATCH 1/7] " Tvrtko Ursulin
@ 2018-09-25 10:17   ` Chris Wilson
  2018-09-25 10:24     ` Tvrtko Ursulin
  0 siblings, 1 reply; 24+ messages in thread
From: Chris Wilson @ 2018-09-25 10:17 UTC (permalink / raw)
  To: Tvrtko Ursulin, intel-gfx

Quoting Tvrtko Ursulin (2018-09-25 10:39:06)
> 
> On 25/09/2018 09:31, Chris Wilson wrote:
> > Very light stress test to bombard the submission backends with a large
> > stream with requests of randomly assigned priorities. Preemption will be
> > occasionally requested, but never succeed!
> 
> Why won't it ever succeed? By design or because test is targetting some bug?

There's no batch, and for all but a small window for arbitration between
requests, we disallow preemption in the ring.

> > v2: Include a second pattern with more frequent preemption
> > 
> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > Cc: Michał Winiarski <michal.winiarski@intel.com>
> > Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> > ---
> >   drivers/gpu/drm/i915/selftests/intel_lrc.c | 137 +++++++++++++++++++++
> >   1 file changed, 137 insertions(+)
> > 
> > diff --git a/drivers/gpu/drm/i915/selftests/intel_lrc.c b/drivers/gpu/drm/i915/selftests/intel_lrc.c
> > index 1aea7a8f2224..3a474bb64c05 100644
> > --- a/drivers/gpu/drm/i915/selftests/intel_lrc.c
> > +++ b/drivers/gpu/drm/i915/selftests/intel_lrc.c
> > @@ -6,6 +6,7 @@
> >   
> >   #include "../i915_selftest.h"
> >   #include "igt_flush_test.h"
> > +#include "i915_random.h"
> >   
> >   #include "mock_context.h"
> >   
> > @@ -573,6 +574,141 @@ static int live_preempt_hang(void *arg)
> >       return err;
> >   }
> >   
> > +static int random_range(struct rnd_state *rnd, int min, int max)
> > +{
> > +     return i915_prandom_u32_max_state(max - min, rnd) + min;
> > +}
> > +
> > +static int random_priority(struct rnd_state *rnd)
> > +{
> > +     return random_range(rnd, I915_PRIORITY_MIN, I915_PRIORITY_MAX);
> > +}
> > +
> > +struct preempt_smoke {
> > +     struct drm_i915_private *i915;
> > +     struct i915_gem_context **contexts;
> > +     unsigned int ncontext;
> > +     struct rnd_state prng;
> > +};
> > +
> > +static struct i915_gem_context *smoke_context(struct preempt_smoke *smoke)
> > +{
> > +     return smoke->contexts[i915_prandom_u32_max_state(smoke->ncontext,
> > +                                                       &smoke->prng)];
> > +}
> > +
> > +static int smoke_crescendo(struct preempt_smoke *smoke)
> > +{
> > +     struct intel_engine_cs *engine;
> > +     enum intel_engine_id id;
> > +     unsigned long count;
> > +
> > +     count = 0;
> > +     for_each_engine(engine, smoke->i915, id) {
> > +             IGT_TIMEOUT(end_time);
> > +
> > +             do {
> > +                     struct i915_gem_context *ctx = smoke_context(smoke);
> > +                     struct i915_request *rq;
> > +
> > +                     ctx->sched.priority = count % I915_PRIORITY_MAX;
> > +
> > +                     rq = i915_request_alloc(engine, ctx);
> > +                     if (IS_ERR(rq))
> > +                             return PTR_ERR(rq);
> > +
> > +                     i915_request_add(rq);
> > +                     count++;
> > +             } while (!__igt_timeout(end_time, NULL));
> > +     }
> > +
> > +     pr_info("Submitted %lu crescendo requests across %d engines and %d contexts\n",
> > +             count, INTEL_INFO(smoke->i915)->num_rings, smoke->ncontext);
> > +     return 0;
> > +}
> > +
> > +static int smoke_random(struct preempt_smoke *smoke)
> > +{
> > +     struct intel_engine_cs *engine;
> > +     enum intel_engine_id id;
> > +     IGT_TIMEOUT(end_time);
> > +     unsigned long count;
> > +
> > +     count = 0;
> > +     do {
> > +             for_each_engine(engine, smoke->i915, id) {
> > +                     struct i915_gem_context *ctx = smoke_context(smoke);
> > +                     struct i915_request *rq;
> > +
> > +                     ctx->sched.priority = random_priority(&smoke->prng);
> > +
> > +                     rq = i915_request_alloc(engine, ctx);
> > +                     if (IS_ERR(rq))
> > +                             return PTR_ERR(rq);
> > +
> > +                     i915_request_add(rq);
> > +                     count++;
> > +             }
> > +     } while (!__igt_timeout(end_time, NULL));
> > +
> > +     pr_info("Submitted %lu random requests across %d engines and %d contexts\n",
> > +             count, INTEL_INFO(smoke->i915)->num_rings, smoke->ncontext);
> > +     return 0;
> > +}
> 
> Merge smoke_crescendo and smoke_random into one which takes flags to 
> decide on priority assign policy, since that seems like the only difference?

The chaining along engines from the loop construct is a big difference.

> > +static int live_preempt_smoke(void *arg)
> > +{
> > +     struct preempt_smoke smoke = {
> > +             .i915 = arg,
> > +             .prng = I915_RND_STATE_INITIALIZER(i915_selftest.random_seed),
> > +             .ncontext = 1024,
> > +     };
> > +     int err = -ENOMEM;
> > +     int n;
> > +
> > +     if (!HAS_LOGICAL_RING_PREEMPTION(smoke.i915))
> > +             return 0;
> > +
> > +     smoke.contexts = kmalloc_array(smoke.ncontext,
> > +                                    sizeof(*smoke.contexts),
> > +                                    GFP_KERNEL);
> > +     if (!smoke.contexts)
> > +             return -ENOMEM;
> > +
> > +     mutex_lock(&smoke.i915->drm.struct_mutex);
> > +     intel_runtime_pm_get(smoke.i915);
> > +
> > +     for (n = 0; n < smoke.ncontext; n++) {
> > +             smoke.contexts[n] = kernel_context(smoke.i915);
> > +             if (!smoke.contexts[n])
> > +                     goto err_ctx;
> 
> There isn't any request emission on context creation I think so here 
> could jump to a new label which only frees.

Sure. The flush gives peace of mind.

> > +     }
> > +
> > +     err = smoke_crescendo(&smoke);
> > +     if (err)
> > +             goto err_ctx;
> 
> Sync/idle/flush between subtests?

Doesn't make much difference since it's all about trying to get the
magic smoke to leak.

> > +     err = smoke_random(&smoke);
> > +     if (err)
> > +             goto err_ctx;
> > +
> > +err_ctx:
> > +     if (igt_flush_test(smoke.i915, I915_WAIT_LOCKED))
> > +             err = -EIO;
> > +
> > +     for (n = 0; n < smoke.ncontext; n++) {
> > +             if (!smoke.contexts[n])
> > +                     break;
> 
> So GFP_ZERO or a post-clear is needed on the array. Or kcalloc.

Oh no it isn't as we never see the uninitialised entries. NULL
indicates the error and time to abort.
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH 1/7] drm/i915/selftests: Smoketest preemption
  2018-09-25 10:17   ` Chris Wilson
@ 2018-09-25 10:24     ` Tvrtko Ursulin
  0 siblings, 0 replies; 24+ messages in thread
From: Tvrtko Ursulin @ 2018-09-25 10:24 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 25/09/2018 11:17, Chris Wilson wrote:
> Quoting Tvrtko Ursulin (2018-09-25 10:39:06)
>>
>> On 25/09/2018 09:31, Chris Wilson wrote:
>>> Very light stress test to bombard the submission backends with a large
>>> stream with requests of randomly assigned priorities. Preemption will be
>>> occasionally requested, but never succeed!
>>
>> Why won't it ever succeed? By design or because test is targetting some bug?
> 
> There's no batch, and for all but a small window for arbitration between
> requests, we disallow preemption in the ring.

This described in the commit message would be good.

>>> v2: Include a second pattern with more frequent preemption
>>>
>>> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
>>> Cc: Michał Winiarski <michal.winiarski@intel.com>
>>> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>>> ---
>>>    drivers/gpu/drm/i915/selftests/intel_lrc.c | 137 +++++++++++++++++++++
>>>    1 file changed, 137 insertions(+)
>>>
>>> diff --git a/drivers/gpu/drm/i915/selftests/intel_lrc.c b/drivers/gpu/drm/i915/selftests/intel_lrc.c
>>> index 1aea7a8f2224..3a474bb64c05 100644
>>> --- a/drivers/gpu/drm/i915/selftests/intel_lrc.c
>>> +++ b/drivers/gpu/drm/i915/selftests/intel_lrc.c
>>> @@ -6,6 +6,7 @@
>>>    
>>>    #include "../i915_selftest.h"
>>>    #include "igt_flush_test.h"
>>> +#include "i915_random.h"
>>>    
>>>    #include "mock_context.h"
>>>    
>>> @@ -573,6 +574,141 @@ static int live_preempt_hang(void *arg)
>>>        return err;
>>>    }
>>>    
>>> +static int random_range(struct rnd_state *rnd, int min, int max)
>>> +{
>>> +     return i915_prandom_u32_max_state(max - min, rnd) + min;
>>> +}
>>> +
>>> +static int random_priority(struct rnd_state *rnd)
>>> +{
>>> +     return random_range(rnd, I915_PRIORITY_MIN, I915_PRIORITY_MAX);
>>> +}
>>> +
>>> +struct preempt_smoke {
>>> +     struct drm_i915_private *i915;
>>> +     struct i915_gem_context **contexts;
>>> +     unsigned int ncontext;
>>> +     struct rnd_state prng;
>>> +};
>>> +
>>> +static struct i915_gem_context *smoke_context(struct preempt_smoke *smoke)
>>> +{
>>> +     return smoke->contexts[i915_prandom_u32_max_state(smoke->ncontext,
>>> +                                                       &smoke->prng)];
>>> +}
>>> +
>>> +static int smoke_crescendo(struct preempt_smoke *smoke)
>>> +{
>>> +     struct intel_engine_cs *engine;
>>> +     enum intel_engine_id id;
>>> +     unsigned long count;
>>> +
>>> +     count = 0;
>>> +     for_each_engine(engine, smoke->i915, id) {
>>> +             IGT_TIMEOUT(end_time);
>>> +
>>> +             do {
>>> +                     struct i915_gem_context *ctx = smoke_context(smoke);
>>> +                     struct i915_request *rq;
>>> +
>>> +                     ctx->sched.priority = count % I915_PRIORITY_MAX;
>>> +
>>> +                     rq = i915_request_alloc(engine, ctx);
>>> +                     if (IS_ERR(rq))
>>> +                             return PTR_ERR(rq);
>>> +
>>> +                     i915_request_add(rq);
>>> +                     count++;
>>> +             } while (!__igt_timeout(end_time, NULL));
>>> +     }
>>> +
>>> +     pr_info("Submitted %lu crescendo requests across %d engines and %d contexts\n",
>>> +             count, INTEL_INFO(smoke->i915)->num_rings, smoke->ncontext);
>>> +     return 0;
>>> +}
>>> +
>>> +static int smoke_random(struct preempt_smoke *smoke)
>>> +{
>>> +     struct intel_engine_cs *engine;
>>> +     enum intel_engine_id id;
>>> +     IGT_TIMEOUT(end_time);
>>> +     unsigned long count;
>>> +
>>> +     count = 0;
>>> +     do {
>>> +             for_each_engine(engine, smoke->i915, id) {
>>> +                     struct i915_gem_context *ctx = smoke_context(smoke);
>>> +                     struct i915_request *rq;
>>> +
>>> +                     ctx->sched.priority = random_priority(&smoke->prng);
>>> +
>>> +                     rq = i915_request_alloc(engine, ctx);
>>> +                     if (IS_ERR(rq))
>>> +                             return PTR_ERR(rq);
>>> +
>>> +                     i915_request_add(rq);
>>> +                     count++;
>>> +             }
>>> +     } while (!__igt_timeout(end_time, NULL));
>>> +
>>> +     pr_info("Submitted %lu random requests across %d engines and %d contexts\n",
>>> +             count, INTEL_INFO(smoke->i915)->num_rings, smoke->ncontext);
>>> +     return 0;
>>> +}
>>
>> Merge smoke_crescendo and smoke_random into one which takes flags to
>> decide on priority assign policy, since that seems like the only difference?
> 
> The chaining along engines from the loop construct is a big difference.

True, I missed that when eyeballing it.

> 
>>> +static int live_preempt_smoke(void *arg)
>>> +{
>>> +     struct preempt_smoke smoke = {
>>> +             .i915 = arg,
>>> +             .prng = I915_RND_STATE_INITIALIZER(i915_selftest.random_seed),
>>> +             .ncontext = 1024,
>>> +     };
>>> +     int err = -ENOMEM;
>>> +     int n;
>>> +
>>> +     if (!HAS_LOGICAL_RING_PREEMPTION(smoke.i915))
>>> +             return 0;
>>> +
>>> +     smoke.contexts = kmalloc_array(smoke.ncontext,
>>> +                                    sizeof(*smoke.contexts),
>>> +                                    GFP_KERNEL);
>>> +     if (!smoke.contexts)
>>> +             return -ENOMEM;
>>> +
>>> +     mutex_lock(&smoke.i915->drm.struct_mutex);
>>> +     intel_runtime_pm_get(smoke.i915);
>>> +
>>> +     for (n = 0; n < smoke.ncontext; n++) {
>>> +             smoke.contexts[n] = kernel_context(smoke.i915);
>>> +             if (!smoke.contexts[n])
>>> +                     goto err_ctx;
>>
>> There isn't any request emission on context creation I think so here
>> could jump to a new label which only frees.
> 
> Sure. The flush gives peace of mind.
> 
>>> +     }
>>> +
>>> +     err = smoke_crescendo(&smoke);
>>> +     if (err)
>>> +             goto err_ctx;
>>
>> Sync/idle/flush between subtests?
> 
> Doesn't make much difference since it's all about trying to get the
> magic smoke to leak.
> 
>>> +     err = smoke_random(&smoke);
>>> +     if (err)
>>> +             goto err_ctx;
>>> +
>>> +err_ctx:
>>> +     if (igt_flush_test(smoke.i915, I915_WAIT_LOCKED))
>>> +             err = -EIO;
>>> +
>>> +     for (n = 0; n < smoke.ncontext; n++) {
>>> +             if (!smoke.contexts[n])
>>> +                     break;
>>
>> So GFP_ZERO or a post-clear is needed on the array. Or kcalloc.
> 
> Oh no it isn't as we never see the uninitialised entries. NULL
> indicates the error and time to abort.

True my bad.

In this case with a small update to the commit message:

Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 24+ messages in thread

* ✗ Fi.CI.CHECKPATCH: warning for series starting with [1/7] drm/i915/selftests: Smoketest preemption (rev2)
  2018-09-25  8:31 [PATCH 1/7] drm/i915/selftests: Smoketest preemption Chris Wilson
                   ` (9 preceding siblings ...)
  2018-09-25  9:39 ` [PATCH 1/7] " Tvrtko Ursulin
@ 2018-09-25 10:28 ` Patchwork
  2018-09-25 10:31 ` ✗ Fi.CI.SPARSE: " Patchwork
                   ` (2 subsequent siblings)
  13 siblings, 0 replies; 24+ messages in thread
From: Patchwork @ 2018-09-25 10:28 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

== Series Details ==

Series: series starting with [1/7] drm/i915/selftests: Smoketest preemption (rev2)
URL   : https://patchwork.freedesktop.org/series/50137/
State : warning

== Summary ==

$ dim checkpatch origin/drm-tip
74896f3f3355 drm/i915/selftests: Smoketest preemption
-:92: WARNING:LINE_SPACING: Missing a blank line after declarations
#92: FILE: drivers/gpu/drm/i915/selftests/intel_lrc.c:634:
+	enum intel_engine_id id;
+	IGT_TIMEOUT(end_time);

total: 0 errors, 1 warnings, 0 checks, 155 lines checked
77aeead3e917 drm/i915/execlists: Avoid kicking priority on the current context
7662098ea41a drm/i915: Reserve some priority bits for internal use
24ee6def0a44 drm/i915: Combine multiple internal plists into the same i915_priolist bucket
-:166: WARNING:FUNCTION_ARGUMENTS: function definition argument 'pl' should also have an identifier name
#166: FILE: drivers/gpu/drm/i915/intel_lrc.c:367:
+	struct list_head *uninitialized_var(pl);

-:284: WARNING:FUNCTION_ARGUMENTS: function definition argument 'pl' should also have an identifier name
#284: FILE: drivers/gpu/drm/i915/intel_lrc.c:1180:
+	struct list_head *uninitialized_var(pl);

-:313: CHECK:MACRO_ARG_REUSE: Macro argument reuse 'plist' - possible side-effects?
#313: FILE: drivers/gpu/drm/i915/intel_ringbuffer.h:199:
+#define priolist_for_each_request(it, plist, idx) \
+	for (idx = 0; idx < ARRAY_SIZE((plist)->requests); idx++) \
+		list_for_each_entry(it, &(plist)->requests[idx], sched.link)

-:313: CHECK:MACRO_ARG_REUSE: Macro argument reuse 'idx' - possible side-effects?
#313: FILE: drivers/gpu/drm/i915/intel_ringbuffer.h:199:
+#define priolist_for_each_request(it, plist, idx) \
+	for (idx = 0; idx < ARRAY_SIZE((plist)->requests); idx++) \
+		list_for_each_entry(it, &(plist)->requests[idx], sched.link)

-:317: CHECK:MACRO_ARG_REUSE: Macro argument reuse 'plist' - possible side-effects?
#317: FILE: drivers/gpu/drm/i915/intel_ringbuffer.h:203:
+#define priolist_for_each_request_consume(it, n, plist, idx) \
+	for (; (idx = ffs((plist)->used)); (plist)->used &= ~BIT(idx - 1)) \
+		list_for_each_entry_safe(it, n, \
+					 &(plist)->requests[idx - 1], \
+					 sched.link)

-:317: CHECK:MACRO_ARG_REUSE: Macro argument reuse 'idx' - possible side-effects?
#317: FILE: drivers/gpu/drm/i915/intel_ringbuffer.h:203:
+#define priolist_for_each_request_consume(it, n, plist, idx) \
+	for (; (idx = ffs((plist)->used)); (plist)->used &= ~BIT(idx - 1)) \
+		list_for_each_entry_safe(it, n, \
+					 &(plist)->requests[idx - 1], \
+					 sched.link)

total: 0 errors, 2 warnings, 4 checks, 272 lines checked
c1b7e47bf87e drm/i915: Priority boost for new clients
6a0069950fd8 drm/i915: Pull scheduling under standalone lock
-:146: WARNING:FILE_PATH_CHANGES: added, moved or deleted file(s), does MAINTAINERS need updating?
#146: 
new file mode 100644

-:151: WARNING:SPDX_LICENSE_TAG: Missing or malformed SPDX-License-Identifier tag in line 1
#151: FILE: drivers/gpu/drm/i915/i915_scheduler.c:1:
+/*

-:394: WARNING:FUNCTION_ARGUMENTS: function definition argument 'pl' should also have an identifier name
#394: FILE: drivers/gpu/drm/i915/i915_scheduler.c:244:
+	struct list_head *uninitialized_var(pl);

total: 0 errors, 3 warnings, 0 checks, 870 lines checked
53a576f33e94 drm/i915: Priority boost for waiting clients

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 24+ messages in thread

* ✗ Fi.CI.SPARSE: warning for series starting with [1/7] drm/i915/selftests: Smoketest preemption (rev2)
  2018-09-25  8:31 [PATCH 1/7] drm/i915/selftests: Smoketest preemption Chris Wilson
                   ` (10 preceding siblings ...)
  2018-09-25 10:28 ` ✗ Fi.CI.CHECKPATCH: warning for series starting with [1/7] drm/i915/selftests: Smoketest preemption (rev2) Patchwork
@ 2018-09-25 10:31 ` Patchwork
  2018-09-25 10:50 ` ✓ Fi.CI.BAT: success " Patchwork
  2018-09-25 12:21 ` ✓ Fi.CI.IGT: " Patchwork
  13 siblings, 0 replies; 24+ messages in thread
From: Patchwork @ 2018-09-25 10:31 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

== Series Details ==

Series: series starting with [1/7] drm/i915/selftests: Smoketest preemption (rev2)
URL   : https://patchwork.freedesktop.org/series/50137/
State : warning

== Summary ==

$ dim sparse origin/drm-tip
Commit: drm/i915/selftests: Smoketest preemption
+./include/linux/slab.h:631:13: error: undefined identifier '__builtin_mul_overflow'
+./include/linux/slab.h:631:13: warning: call with no type!

Commit: drm/i915/execlists: Avoid kicking priority on the current context
Okay!

Commit: drm/i915: Reserve some priority bits for internal use
Okay!

Commit: drm/i915: Combine multiple internal plists into the same i915_priolist bucket
Okay!

Commit: drm/i915: Priority boost for new clients
Okay!

Commit: drm/i915: Pull scheduling under standalone lock
+./include/uapi/linux/perf_event.h:147:56: warning: cast truncates bits from constant value (8000000000000000 becomes 0)

Commit: drm/i915: Priority boost for waiting clients
Okay!

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 24+ messages in thread

* ✓ Fi.CI.BAT: success for series starting with [1/7] drm/i915/selftests: Smoketest preemption (rev2)
  2018-09-25  8:31 [PATCH 1/7] drm/i915/selftests: Smoketest preemption Chris Wilson
                   ` (11 preceding siblings ...)
  2018-09-25 10:31 ` ✗ Fi.CI.SPARSE: " Patchwork
@ 2018-09-25 10:50 ` Patchwork
  2018-09-25 12:21 ` ✓ Fi.CI.IGT: " Patchwork
  13 siblings, 0 replies; 24+ messages in thread
From: Patchwork @ 2018-09-25 10:50 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

== Series Details ==

Series: series starting with [1/7] drm/i915/selftests: Smoketest preemption (rev2)
URL   : https://patchwork.freedesktop.org/series/50137/
State : success

== Summary ==

= CI Bug Log - changes from CI_DRM_4872 -> Patchwork_10275 =

== Summary - SUCCESS ==

  No regressions found.

  External URL: https://patchwork.freedesktop.org/api/1.0/series/50137/revisions/2/mbox/

== Known issues ==

  Here are the changes found in Patchwork_10275 that come from known issues:

  === IGT changes ===

    ==== Issues hit ====

    igt@drv_selftest@live_hangcheck:
      fi-kbl-7560u:       PASS -> INCOMPLETE (fdo#108044)

    igt@kms_pipe_crc_basic@suspend-read-crc-pipe-c:
      fi-bdw-samus:       NOTRUN -> INCOMPLETE (fdo#107773)

    
    ==== Possible fixes ====

    igt@gem_exec_suspend@basic-s3:
      fi-bdw-samus:       INCOMPLETE (fdo#107773) -> PASS

    igt@kms_psr@primary_page_flip:
      fi-kbl-7560u:       FAIL (fdo#107336) -> PASS
      fi-cnl-u:           FAIL (fdo#107336) -> PASS

    
  fdo#107336 https://bugs.freedesktop.org/show_bug.cgi?id=107336
  fdo#107773 https://bugs.freedesktop.org/show_bug.cgi?id=107773
  fdo#108044 https://bugs.freedesktop.org/show_bug.cgi?id=108044


== Participating hosts (48 -> 43) ==

  Missing    (5): fi-ctg-p8600 fi-ilk-m540 fi-byt-squawks fi-bsw-cyan fi-hsw-4200u 


== Build changes ==

    * Linux: CI_DRM_4872 -> Patchwork_10275

  CI_DRM_4872: 479243f75f1a764b294bd9f4bf45e9bc0b5e5ce9 @ git://anongit.freedesktop.org/gfx-ci/linux
  IGT_4649: 19b0c74d20d9b53d4c82be14af0909a3b6846010 @ git://anongit.freedesktop.org/xorg/app/intel-gpu-tools
  Patchwork_10275: 53a576f33e94b1182d82ccdcc3956e83d547eb85 @ git://anongit.freedesktop.org/gfx-ci/linux


== Linux commits ==

53a576f33e94 drm/i915: Priority boost for waiting clients
6a0069950fd8 drm/i915: Pull scheduling under standalone lock
c1b7e47bf87e drm/i915: Priority boost for new clients
24ee6def0a44 drm/i915: Combine multiple internal plists into the same i915_priolist bucket
7662098ea41a drm/i915: Reserve some priority bits for internal use
77aeead3e917 drm/i915/execlists: Avoid kicking priority on the current context
74896f3f3355 drm/i915/selftests: Smoketest preemption

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_10275/issues.html
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 24+ messages in thread

* ✓ Fi.CI.IGT: success for series starting with [1/7] drm/i915/selftests: Smoketest preemption (rev2)
  2018-09-25  8:31 [PATCH 1/7] drm/i915/selftests: Smoketest preemption Chris Wilson
                   ` (12 preceding siblings ...)
  2018-09-25 10:50 ` ✓ Fi.CI.BAT: success " Patchwork
@ 2018-09-25 12:21 ` Patchwork
  13 siblings, 0 replies; 24+ messages in thread
From: Patchwork @ 2018-09-25 12:21 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

== Series Details ==

Series: series starting with [1/7] drm/i915/selftests: Smoketest preemption (rev2)
URL   : https://patchwork.freedesktop.org/series/50137/
State : success

== Summary ==

= CI Bug Log - changes from CI_DRM_4872_full -> Patchwork_10275_full =

== Summary - WARNING ==

  Minor unknown changes coming with Patchwork_10275_full need to be verified
  manually.
  
  If you think the reported changes have nothing to do with the changes
  introduced in Patchwork_10275_full, please notify your bug team to allow them
  to document this new failure mode, which will reduce false positives in CI.

  

== Possible new issues ==

  Here are the unknown changes that may have been introduced in Patchwork_10275_full:

  === IGT changes ===

    ==== Warnings ====

    igt@perf_pmu@rc6:
      shard-kbl:          PASS -> SKIP

    
== Known issues ==

  Here are the changes found in Patchwork_10275_full that come from known issues:

  === IGT changes ===

    ==== Issues hit ====

    igt@gem_exec_schedule@pi-ringfull-bsd:
      shard-glk:          NOTRUN -> FAIL (fdo#103158)

    igt@kms_busy@extended-modeset-hang-newfb-render-b:
      shard-snb:          NOTRUN -> DMESG-WARN (fdo#107956)

    igt@kms_busy@extended-modeset-hang-newfb-with-reset-render-b:
      shard-glk:          NOTRUN -> DMESG-WARN (fdo#107956) +1

    
    ==== Possible fixes ====

    igt@gem_exec_big:
      shard-hsw:          TIMEOUT (fdo#107937) -> PASS

    igt@kms_cursor_crc@cursor-256x256-suspend:
      shard-kbl:          INCOMPLETE (fdo#103665) -> PASS

    igt@kms_flip@modeset-vs-vblank-race-interruptible:
      shard-kbl:          FAIL (fdo#103060) -> PASS

    
  fdo#103060 https://bugs.freedesktop.org/show_bug.cgi?id=103060
  fdo#103158 https://bugs.freedesktop.org/show_bug.cgi?id=103158
  fdo#103665 https://bugs.freedesktop.org/show_bug.cgi?id=103665
  fdo#107937 https://bugs.freedesktop.org/show_bug.cgi?id=107937
  fdo#107956 https://bugs.freedesktop.org/show_bug.cgi?id=107956


== Participating hosts (5 -> 5) ==

  No changes in participating hosts


== Build changes ==

    * Linux: CI_DRM_4872 -> Patchwork_10275

  CI_DRM_4872: 479243f75f1a764b294bd9f4bf45e9bc0b5e5ce9 @ git://anongit.freedesktop.org/gfx-ci/linux
  IGT_4649: 19b0c74d20d9b53d4c82be14af0909a3b6846010 @ git://anongit.freedesktop.org/xorg/app/intel-gpu-tools
  Patchwork_10275: 53a576f33e94b1182d82ccdcc3956e83d547eb85 @ git://anongit.freedesktop.org/gfx-ci/linux
  piglit_4509: fdc5a4ca11124ab8413c7988896eec4c97336694 @ git://anongit.freedesktop.org/piglit

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_10275/shards.html
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH 4/7] drm/i915: Combine multiple internal plists into the same i915_priolist bucket
  2018-09-25  9:48   ` Tvrtko Ursulin
@ 2018-09-27 13:05     ` Chris Wilson
  2018-09-28 13:04       ` Tvrtko Ursulin
  0 siblings, 1 reply; 24+ messages in thread
From: Chris Wilson @ 2018-09-27 13:05 UTC (permalink / raw)
  To: Tvrtko Ursulin, intel-gfx

Quoting Tvrtko Ursulin (2018-09-25 10:48:44)
> 
> On 25/09/2018 09:32, Chris Wilson wrote:
> > As we are about to allow ourselves to slightly bump the user priority
> > into a few different sublevels, packthose internal priority lists
> > into the same i915_priolist to keep the rbtree compact and avoid having
> > to allocate the default user priority even after the internal bumping.
> > The downside to having an requests[] rather than a node per active list,
> > is that we then have to walk over the empty higher priority lists. To
> > compensate, we track the active buckets and use a small bitmap to skip
> > over any inactive ones.
> > 
> > v2: Use MASK of internal levels to simplify our usage.
> > 
> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> > ---
> >   drivers/gpu/drm/i915/intel_engine_cs.c      |  6 +-
> >   drivers/gpu/drm/i915/intel_guc_submission.c | 12 ++-
> >   drivers/gpu/drm/i915/intel_lrc.c            | 87 ++++++++++++++-------
> >   drivers/gpu/drm/i915/intel_ringbuffer.h     | 13 ++-
> >   4 files changed, 80 insertions(+), 38 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c
> > index 217ed3ee1cab..83f2f7774c1f 100644
> > --- a/drivers/gpu/drm/i915/intel_engine_cs.c
> > +++ b/drivers/gpu/drm/i915/intel_engine_cs.c
> > @@ -1534,10 +1534,10 @@ void intel_engine_dump(struct intel_engine_cs *engine,
> >       count = 0;
> >       drm_printf(m, "\t\tQueue priority: %d\n", execlists->queue_priority);
> >       for (rb = rb_first_cached(&execlists->queue); rb; rb = rb_next(rb)) {
> > -             struct i915_priolist *p =
> > -                     rb_entry(rb, typeof(*p), node);
> > +             struct i915_priolist *p = rb_entry(rb, typeof(*p), node);
> > +             int i;
> >   
> > -             list_for_each_entry(rq, &p->requests, sched.link) {
> > +             priolist_for_each_request(rq, p, i) {
> >                       if (count++ < MAX_REQUESTS_TO_SHOW - 1)
> >                               print_request(m, rq, "\t\tQ ");
> >                       else
> > diff --git a/drivers/gpu/drm/i915/intel_guc_submission.c b/drivers/gpu/drm/i915/intel_guc_submission.c
> > index 4874a212754c..ac862b42f6a1 100644
> > --- a/drivers/gpu/drm/i915/intel_guc_submission.c
> > +++ b/drivers/gpu/drm/i915/intel_guc_submission.c
> > @@ -746,30 +746,28 @@ static bool __guc_dequeue(struct intel_engine_cs *engine)
> >       while ((rb = rb_first_cached(&execlists->queue))) {
> >               struct i915_priolist *p = to_priolist(rb);
> >               struct i915_request *rq, *rn;
> > +             int i;
> >   
> > -             list_for_each_entry_safe(rq, rn, &p->requests, sched.link) {
> > +             priolist_for_each_request_consume(rq, rn, p, i) {
> >                       if (last && rq->hw_context != last->hw_context) {
> > -                             if (port == last_port) {
> > -                                     __list_del_many(&p->requests,
> > -                                                     &rq->sched.link);
> > +                             if (port == last_port)
> >                                       goto done;
> > -                             }
> >   
> >                               if (submit)
> >                                       port_assign(port, last);
> >                               port++;
> >                       }
> >   
> > -                     INIT_LIST_HEAD(&rq->sched.link);
> > +                     list_del_init(&rq->sched.link);
> >   
> >                       __i915_request_submit(rq);
> >                       trace_i915_request_in(rq, port_index(port, execlists));
> > +
> >                       last = rq;
> >                       submit = true;
> >               }
> >   
> >               rb_erase_cached(&p->node, &execlists->queue);
> > -             INIT_LIST_HEAD(&p->requests);
> >               if (p->priority != I915_PRIORITY_NORMAL)
> >                       kmem_cache_free(engine->i915->priorities, p);
> >       }
> > diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> > index e8de250c3413..b1b3f67d1120 100644
> > --- a/drivers/gpu/drm/i915/intel_lrc.c
> > +++ b/drivers/gpu/drm/i915/intel_lrc.c
> > @@ -259,14 +259,49 @@ intel_lr_context_descriptor_update(struct i915_gem_context *ctx,
> >       ce->lrc_desc = desc;
> >   }
> >   
> > -static struct i915_priolist *
> > +static void assert_priolists(struct intel_engine_execlists * const execlists,
> > +                          int queue_priority)
> > +{
> > +     struct rb_node *rb;
> > +     int last_prio, i;
> > +
> > +     if (!IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
> > +             return;
> > +
> > +     GEM_BUG_ON(rb_first_cached(&execlists->queue) !=
> > +                rb_first(&execlists->queue.rb_root));
> > +
> > +     last_prio = (queue_priority >> I915_USER_PRIORITY_SHIFT) + 1;
> > +     for (rb = rb_first_cached(&execlists->queue); rb; rb = rb_next(rb)) {
> > +             struct i915_priolist *p = to_priolist(rb);
> > +
> > +             GEM_BUG_ON(p->priority >= last_prio);
> > +             last_prio = p->priority;
> > +
> > +             GEM_BUG_ON(!p->used);
> > +             for (i = 0; i < ARRAY_SIZE(p->requests); i++) {
> > +                     if (list_empty(&p->requests[i]))
> > +                             continue;
> > +
> > +                     GEM_BUG_ON(!(p->used & BIT(i)));
> > +             }
> > +     }
> > +}
> > +
> > +static struct list_head *
> >   lookup_priolist(struct intel_engine_cs *engine, int prio)
> >   {
> >       struct intel_engine_execlists * const execlists = &engine->execlists;
> >       struct i915_priolist *p;
> >       struct rb_node **parent, *rb;
> >       bool first = true;
> > +     int idx, i;
> > +
> > +     assert_priolists(execlists, INT_MAX);
> >   
> > +     /* buckets sorted from highest [in slot 0] to lowest priority */
> > +     idx = I915_PRIORITY_MASK - (prio & I915_PRIORITY_MASK);
> 
> How about:
> 
> #define I915_INTERNAL_PRIO(p) (p & I915_PRIORITY_MASK)

Not convinced. It's the only place we use it (one use of MASK later to
assert correct less). There's two views, here the user/kernel priority
levels are not as important as what we are concerned about are the split
lists with the chunking of lowest priority bits. At the extreme we could
separate the two and make the chunks BITS_PER_LONG -- I think that's a
waste and my goal was simply to avoid kmallocs for the common case of
default user priority.

The intention is that we don't even think about the user/internal split,
and simply consider it an integrated field, with most positive
priorities executing first and most negative last.
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH 4/7] drm/i915: Combine multiple internal plists into the same i915_priolist bucket
  2018-09-27 13:05     ` Chris Wilson
@ 2018-09-28 13:04       ` Tvrtko Ursulin
  2018-09-28 13:08         ` Chris Wilson
  0 siblings, 1 reply; 24+ messages in thread
From: Tvrtko Ursulin @ 2018-09-28 13:04 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 27/09/2018 14:05, Chris Wilson wrote:
> Quoting Tvrtko Ursulin (2018-09-25 10:48:44)
>>
>> On 25/09/2018 09:32, Chris Wilson wrote:
>>> As we are about to allow ourselves to slightly bump the user priority
>>> into a few different sublevels, packthose internal priority lists
>>> into the same i915_priolist to keep the rbtree compact and avoid having
>>> to allocate the default user priority even after the internal bumping.
>>> The downside to having an requests[] rather than a node per active list,
>>> is that we then have to walk over the empty higher priority lists. To
>>> compensate, we track the active buckets and use a small bitmap to skip
>>> over any inactive ones.
>>>
>>> v2: Use MASK of internal levels to simplify our usage.
>>>
>>> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
>>> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>>> ---
>>>    drivers/gpu/drm/i915/intel_engine_cs.c      |  6 +-
>>>    drivers/gpu/drm/i915/intel_guc_submission.c | 12 ++-
>>>    drivers/gpu/drm/i915/intel_lrc.c            | 87 ++++++++++++++-------
>>>    drivers/gpu/drm/i915/intel_ringbuffer.h     | 13 ++-
>>>    4 files changed, 80 insertions(+), 38 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c
>>> index 217ed3ee1cab..83f2f7774c1f 100644
>>> --- a/drivers/gpu/drm/i915/intel_engine_cs.c
>>> +++ b/drivers/gpu/drm/i915/intel_engine_cs.c
>>> @@ -1534,10 +1534,10 @@ void intel_engine_dump(struct intel_engine_cs *engine,
>>>        count = 0;
>>>        drm_printf(m, "\t\tQueue priority: %d\n", execlists->queue_priority);
>>>        for (rb = rb_first_cached(&execlists->queue); rb; rb = rb_next(rb)) {
>>> -             struct i915_priolist *p =
>>> -                     rb_entry(rb, typeof(*p), node);
>>> +             struct i915_priolist *p = rb_entry(rb, typeof(*p), node);
>>> +             int i;
>>>    
>>> -             list_for_each_entry(rq, &p->requests, sched.link) {
>>> +             priolist_for_each_request(rq, p, i) {
>>>                        if (count++ < MAX_REQUESTS_TO_SHOW - 1)
>>>                                print_request(m, rq, "\t\tQ ");
>>>                        else
>>> diff --git a/drivers/gpu/drm/i915/intel_guc_submission.c b/drivers/gpu/drm/i915/intel_guc_submission.c
>>> index 4874a212754c..ac862b42f6a1 100644
>>> --- a/drivers/gpu/drm/i915/intel_guc_submission.c
>>> +++ b/drivers/gpu/drm/i915/intel_guc_submission.c
>>> @@ -746,30 +746,28 @@ static bool __guc_dequeue(struct intel_engine_cs *engine)
>>>        while ((rb = rb_first_cached(&execlists->queue))) {
>>>                struct i915_priolist *p = to_priolist(rb);
>>>                struct i915_request *rq, *rn;
>>> +             int i;
>>>    
>>> -             list_for_each_entry_safe(rq, rn, &p->requests, sched.link) {
>>> +             priolist_for_each_request_consume(rq, rn, p, i) {
>>>                        if (last && rq->hw_context != last->hw_context) {
>>> -                             if (port == last_port) {
>>> -                                     __list_del_many(&p->requests,
>>> -                                                     &rq->sched.link);
>>> +                             if (port == last_port)
>>>                                        goto done;
>>> -                             }
>>>    
>>>                                if (submit)
>>>                                        port_assign(port, last);
>>>                                port++;
>>>                        }
>>>    
>>> -                     INIT_LIST_HEAD(&rq->sched.link);
>>> +                     list_del_init(&rq->sched.link);
>>>    
>>>                        __i915_request_submit(rq);
>>>                        trace_i915_request_in(rq, port_index(port, execlists));
>>> +
>>>                        last = rq;
>>>                        submit = true;
>>>                }
>>>    
>>>                rb_erase_cached(&p->node, &execlists->queue);
>>> -             INIT_LIST_HEAD(&p->requests);
>>>                if (p->priority != I915_PRIORITY_NORMAL)
>>>                        kmem_cache_free(engine->i915->priorities, p);
>>>        }
>>> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
>>> index e8de250c3413..b1b3f67d1120 100644
>>> --- a/drivers/gpu/drm/i915/intel_lrc.c
>>> +++ b/drivers/gpu/drm/i915/intel_lrc.c
>>> @@ -259,14 +259,49 @@ intel_lr_context_descriptor_update(struct i915_gem_context *ctx,
>>>        ce->lrc_desc = desc;
>>>    }
>>>    
>>> -static struct i915_priolist *
>>> +static void assert_priolists(struct intel_engine_execlists * const execlists,
>>> +                          int queue_priority)
>>> +{
>>> +     struct rb_node *rb;
>>> +     int last_prio, i;
>>> +
>>> +     if (!IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
>>> +             return;
>>> +
>>> +     GEM_BUG_ON(rb_first_cached(&execlists->queue) !=
>>> +                rb_first(&execlists->queue.rb_root));
>>> +
>>> +     last_prio = (queue_priority >> I915_USER_PRIORITY_SHIFT) + 1;
>>> +     for (rb = rb_first_cached(&execlists->queue); rb; rb = rb_next(rb)) {
>>> +             struct i915_priolist *p = to_priolist(rb);
>>> +
>>> +             GEM_BUG_ON(p->priority >= last_prio);
>>> +             last_prio = p->priority;
>>> +
>>> +             GEM_BUG_ON(!p->used);
>>> +             for (i = 0; i < ARRAY_SIZE(p->requests); i++) {
>>> +                     if (list_empty(&p->requests[i]))
>>> +                             continue;
>>> +
>>> +                     GEM_BUG_ON(!(p->used & BIT(i)));
>>> +             }
>>> +     }
>>> +}
>>> +
>>> +static struct list_head *
>>>    lookup_priolist(struct intel_engine_cs *engine, int prio)
>>>    {
>>>        struct intel_engine_execlists * const execlists = &engine->execlists;
>>>        struct i915_priolist *p;
>>>        struct rb_node **parent, *rb;
>>>        bool first = true;
>>> +     int idx, i;
>>> +
>>> +     assert_priolists(execlists, INT_MAX);
>>>    
>>> +     /* buckets sorted from highest [in slot 0] to lowest priority */
>>> +     idx = I915_PRIORITY_MASK - (prio & I915_PRIORITY_MASK);
>>
>> How about:
>>
>> #define I915_INTERNAL_PRIO(p) (p & I915_PRIORITY_MASK)
> 
> Not convinced. It's the only place we use it (one use of MASK later to
> assert correct less). There's two views, here the user/kernel priority
> levels are not as important as what we are concerned about are the split
> lists with the chunking of lowest priority bits. At the extreme we could
> separate the two and make the chunks BITS_PER_LONG -- I think that's a
> waste and my goal was simply to avoid kmallocs for the common case of
> default user priority.
> 
> The intention is that we don't even think about the user/internal split,
> and simply consider it an integrated field, with most positive
> priorities executing first and most negative last.

I just find MASK - INT confusing, those two data flavours do not match 
in my head.  With the local macro I clarified that we are getting an INT 
from prio, and then the bit you did not quote I suggested we do not 
substract from the mask but from the count - 1. It is effectively the 
same thing just IMHO easier to understand while reading the code. You 
can skip the macro and decrement from the count, I'd be happy with that 
as well.

Regards,

Tvrtko


_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH 4/7] drm/i915: Combine multiple internal plists into the same i915_priolist bucket
  2018-09-28 13:04       ` Tvrtko Ursulin
@ 2018-09-28 13:08         ` Chris Wilson
  2018-09-28 13:25           ` Tvrtko Ursulin
  0 siblings, 1 reply; 24+ messages in thread
From: Chris Wilson @ 2018-09-28 13:08 UTC (permalink / raw)
  To: Tvrtko Ursulin, intel-gfx

Quoting Tvrtko Ursulin (2018-09-28 14:04:34)
> 
> On 27/09/2018 14:05, Chris Wilson wrote:
> > Quoting Tvrtko Ursulin (2018-09-25 10:48:44)
> >>
> >> On 25/09/2018 09:32, Chris Wilson wrote:
> >>> As we are about to allow ourselves to slightly bump the user priority
> >>> into a few different sublevels, packthose internal priority lists
> >>> into the same i915_priolist to keep the rbtree compact and avoid having
> >>> to allocate the default user priority even after the internal bumping.
> >>> The downside to having an requests[] rather than a node per active list,
> >>> is that we then have to walk over the empty higher priority lists. To
> >>> compensate, we track the active buckets and use a small bitmap to skip
> >>> over any inactive ones.
> >>>
> >>> v2: Use MASK of internal levels to simplify our usage.
> >>>
> >>> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> >>> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> >>> ---
> >>>    drivers/gpu/drm/i915/intel_engine_cs.c      |  6 +-
> >>>    drivers/gpu/drm/i915/intel_guc_submission.c | 12 ++-
> >>>    drivers/gpu/drm/i915/intel_lrc.c            | 87 ++++++++++++++-------
> >>>    drivers/gpu/drm/i915/intel_ringbuffer.h     | 13 ++-
> >>>    4 files changed, 80 insertions(+), 38 deletions(-)
> >>>
> >>> diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c
> >>> index 217ed3ee1cab..83f2f7774c1f 100644
> >>> --- a/drivers/gpu/drm/i915/intel_engine_cs.c
> >>> +++ b/drivers/gpu/drm/i915/intel_engine_cs.c
> >>> @@ -1534,10 +1534,10 @@ void intel_engine_dump(struct intel_engine_cs *engine,
> >>>        count = 0;
> >>>        drm_printf(m, "\t\tQueue priority: %d\n", execlists->queue_priority);
> >>>        for (rb = rb_first_cached(&execlists->queue); rb; rb = rb_next(rb)) {
> >>> -             struct i915_priolist *p =
> >>> -                     rb_entry(rb, typeof(*p), node);
> >>> +             struct i915_priolist *p = rb_entry(rb, typeof(*p), node);
> >>> +             int i;
> >>>    
> >>> -             list_for_each_entry(rq, &p->requests, sched.link) {
> >>> +             priolist_for_each_request(rq, p, i) {
> >>>                        if (count++ < MAX_REQUESTS_TO_SHOW - 1)
> >>>                                print_request(m, rq, "\t\tQ ");
> >>>                        else
> >>> diff --git a/drivers/gpu/drm/i915/intel_guc_submission.c b/drivers/gpu/drm/i915/intel_guc_submission.c
> >>> index 4874a212754c..ac862b42f6a1 100644
> >>> --- a/drivers/gpu/drm/i915/intel_guc_submission.c
> >>> +++ b/drivers/gpu/drm/i915/intel_guc_submission.c
> >>> @@ -746,30 +746,28 @@ static bool __guc_dequeue(struct intel_engine_cs *engine)
> >>>        while ((rb = rb_first_cached(&execlists->queue))) {
> >>>                struct i915_priolist *p = to_priolist(rb);
> >>>                struct i915_request *rq, *rn;
> >>> +             int i;
> >>>    
> >>> -             list_for_each_entry_safe(rq, rn, &p->requests, sched.link) {
> >>> +             priolist_for_each_request_consume(rq, rn, p, i) {
> >>>                        if (last && rq->hw_context != last->hw_context) {
> >>> -                             if (port == last_port) {
> >>> -                                     __list_del_many(&p->requests,
> >>> -                                                     &rq->sched.link);
> >>> +                             if (port == last_port)
> >>>                                        goto done;
> >>> -                             }
> >>>    
> >>>                                if (submit)
> >>>                                        port_assign(port, last);
> >>>                                port++;
> >>>                        }
> >>>    
> >>> -                     INIT_LIST_HEAD(&rq->sched.link);
> >>> +                     list_del_init(&rq->sched.link);
> >>>    
> >>>                        __i915_request_submit(rq);
> >>>                        trace_i915_request_in(rq, port_index(port, execlists));
> >>> +
> >>>                        last = rq;
> >>>                        submit = true;
> >>>                }
> >>>    
> >>>                rb_erase_cached(&p->node, &execlists->queue);
> >>> -             INIT_LIST_HEAD(&p->requests);
> >>>                if (p->priority != I915_PRIORITY_NORMAL)
> >>>                        kmem_cache_free(engine->i915->priorities, p);
> >>>        }
> >>> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> >>> index e8de250c3413..b1b3f67d1120 100644
> >>> --- a/drivers/gpu/drm/i915/intel_lrc.c
> >>> +++ b/drivers/gpu/drm/i915/intel_lrc.c
> >>> @@ -259,14 +259,49 @@ intel_lr_context_descriptor_update(struct i915_gem_context *ctx,
> >>>        ce->lrc_desc = desc;
> >>>    }
> >>>    
> >>> -static struct i915_priolist *
> >>> +static void assert_priolists(struct intel_engine_execlists * const execlists,
> >>> +                          int queue_priority)
> >>> +{
> >>> +     struct rb_node *rb;
> >>> +     int last_prio, i;
> >>> +
> >>> +     if (!IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
> >>> +             return;
> >>> +
> >>> +     GEM_BUG_ON(rb_first_cached(&execlists->queue) !=
> >>> +                rb_first(&execlists->queue.rb_root));
> >>> +
> >>> +     last_prio = (queue_priority >> I915_USER_PRIORITY_SHIFT) + 1;
> >>> +     for (rb = rb_first_cached(&execlists->queue); rb; rb = rb_next(rb)) {
> >>> +             struct i915_priolist *p = to_priolist(rb);
> >>> +
> >>> +             GEM_BUG_ON(p->priority >= last_prio);
> >>> +             last_prio = p->priority;
> >>> +
> >>> +             GEM_BUG_ON(!p->used);
> >>> +             for (i = 0; i < ARRAY_SIZE(p->requests); i++) {
> >>> +                     if (list_empty(&p->requests[i]))
> >>> +                             continue;
> >>> +
> >>> +                     GEM_BUG_ON(!(p->used & BIT(i)));
> >>> +             }
> >>> +     }
> >>> +}
> >>> +
> >>> +static struct list_head *
> >>>    lookup_priolist(struct intel_engine_cs *engine, int prio)
> >>>    {
> >>>        struct intel_engine_execlists * const execlists = &engine->execlists;
> >>>        struct i915_priolist *p;
> >>>        struct rb_node **parent, *rb;
> >>>        bool first = true;
> >>> +     int idx, i;
> >>> +
> >>> +     assert_priolists(execlists, INT_MAX);
> >>>    
> >>> +     /* buckets sorted from highest [in slot 0] to lowest priority */
> >>> +     idx = I915_PRIORITY_MASK - (prio & I915_PRIORITY_MASK);
> >>
> >> How about:
> >>
> >> #define I915_INTERNAL_PRIO(p) (p & I915_PRIORITY_MASK)
> > 
> > Not convinced. It's the only place we use it (one use of MASK later to
> > assert correct less). There's two views, here the user/kernel priority
> > levels are not as important as what we are concerned about are the split
> > lists with the chunking of lowest priority bits. At the extreme we could
> > separate the two and make the chunks BITS_PER_LONG -- I think that's a
> > waste and my goal was simply to avoid kmallocs for the common case of
> > default user priority.
> > 
> > The intention is that we don't even think about the user/internal split,
> > and simply consider it an integrated field, with most positive
> > priorities executing first and most negative last.
> 
> I just find MASK - INT confusing, those two data flavours do not match 
> in my head.  With the local macro I clarified that we are getting an INT 
> from prio, and then the bit you did not quote I suggested we do not 
> substract from the mask but from the count - 1. It is effectively the 
> same thing just IMHO easier to understand while reading the code. You 
> can skip the macro and decrement from the count, I'd be happy with that 
> as well.

I swear that COUNT-1 was the bit you complained about last time ;)

I put the comment there to explain why we reverse the index, as leftmost
is highest priority.

Preemptive r-b for COUNT-1 - (prio & I915_PRIORITY_MASK) then?
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH 4/7] drm/i915: Combine multiple internal plists into the same i915_priolist bucket
  2018-09-28 13:08         ` Chris Wilson
@ 2018-09-28 13:25           ` Tvrtko Ursulin
  0 siblings, 0 replies; 24+ messages in thread
From: Tvrtko Ursulin @ 2018-09-28 13:25 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 28/09/2018 14:08, Chris Wilson wrote:
> Quoting Tvrtko Ursulin (2018-09-28 14:04:34)
>>
>> On 27/09/2018 14:05, Chris Wilson wrote:
>>> Quoting Tvrtko Ursulin (2018-09-25 10:48:44)
>>>>
>>>> On 25/09/2018 09:32, Chris Wilson wrote:
>>>>> As we are about to allow ourselves to slightly bump the user priority
>>>>> into a few different sublevels, packthose internal priority lists
>>>>> into the same i915_priolist to keep the rbtree compact and avoid having
>>>>> to allocate the default user priority even after the internal bumping.
>>>>> The downside to having an requests[] rather than a node per active list,
>>>>> is that we then have to walk over the empty higher priority lists. To
>>>>> compensate, we track the active buckets and use a small bitmap to skip
>>>>> over any inactive ones.
>>>>>
>>>>> v2: Use MASK of internal levels to simplify our usage.
>>>>>
>>>>> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
>>>>> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>>>>> ---
>>>>>     drivers/gpu/drm/i915/intel_engine_cs.c      |  6 +-
>>>>>     drivers/gpu/drm/i915/intel_guc_submission.c | 12 ++-
>>>>>     drivers/gpu/drm/i915/intel_lrc.c            | 87 ++++++++++++++-------
>>>>>     drivers/gpu/drm/i915/intel_ringbuffer.h     | 13 ++-
>>>>>     4 files changed, 80 insertions(+), 38 deletions(-)
>>>>>
>>>>> diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c
>>>>> index 217ed3ee1cab..83f2f7774c1f 100644
>>>>> --- a/drivers/gpu/drm/i915/intel_engine_cs.c
>>>>> +++ b/drivers/gpu/drm/i915/intel_engine_cs.c
>>>>> @@ -1534,10 +1534,10 @@ void intel_engine_dump(struct intel_engine_cs *engine,
>>>>>         count = 0;
>>>>>         drm_printf(m, "\t\tQueue priority: %d\n", execlists->queue_priority);
>>>>>         for (rb = rb_first_cached(&execlists->queue); rb; rb = rb_next(rb)) {
>>>>> -             struct i915_priolist *p =
>>>>> -                     rb_entry(rb, typeof(*p), node);
>>>>> +             struct i915_priolist *p = rb_entry(rb, typeof(*p), node);
>>>>> +             int i;
>>>>>     
>>>>> -             list_for_each_entry(rq, &p->requests, sched.link) {
>>>>> +             priolist_for_each_request(rq, p, i) {
>>>>>                         if (count++ < MAX_REQUESTS_TO_SHOW - 1)
>>>>>                                 print_request(m, rq, "\t\tQ ");
>>>>>                         else
>>>>> diff --git a/drivers/gpu/drm/i915/intel_guc_submission.c b/drivers/gpu/drm/i915/intel_guc_submission.c
>>>>> index 4874a212754c..ac862b42f6a1 100644
>>>>> --- a/drivers/gpu/drm/i915/intel_guc_submission.c
>>>>> +++ b/drivers/gpu/drm/i915/intel_guc_submission.c
>>>>> @@ -746,30 +746,28 @@ static bool __guc_dequeue(struct intel_engine_cs *engine)
>>>>>         while ((rb = rb_first_cached(&execlists->queue))) {
>>>>>                 struct i915_priolist *p = to_priolist(rb);
>>>>>                 struct i915_request *rq, *rn;
>>>>> +             int i;
>>>>>     
>>>>> -             list_for_each_entry_safe(rq, rn, &p->requests, sched.link) {
>>>>> +             priolist_for_each_request_consume(rq, rn, p, i) {
>>>>>                         if (last && rq->hw_context != last->hw_context) {
>>>>> -                             if (port == last_port) {
>>>>> -                                     __list_del_many(&p->requests,
>>>>> -                                                     &rq->sched.link);
>>>>> +                             if (port == last_port)
>>>>>                                         goto done;
>>>>> -                             }
>>>>>     
>>>>>                                 if (submit)
>>>>>                                         port_assign(port, last);
>>>>>                                 port++;
>>>>>                         }
>>>>>     
>>>>> -                     INIT_LIST_HEAD(&rq->sched.link);
>>>>> +                     list_del_init(&rq->sched.link);
>>>>>     
>>>>>                         __i915_request_submit(rq);
>>>>>                         trace_i915_request_in(rq, port_index(port, execlists));
>>>>> +
>>>>>                         last = rq;
>>>>>                         submit = true;
>>>>>                 }
>>>>>     
>>>>>                 rb_erase_cached(&p->node, &execlists->queue);
>>>>> -             INIT_LIST_HEAD(&p->requests);
>>>>>                 if (p->priority != I915_PRIORITY_NORMAL)
>>>>>                         kmem_cache_free(engine->i915->priorities, p);
>>>>>         }
>>>>> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
>>>>> index e8de250c3413..b1b3f67d1120 100644
>>>>> --- a/drivers/gpu/drm/i915/intel_lrc.c
>>>>> +++ b/drivers/gpu/drm/i915/intel_lrc.c
>>>>> @@ -259,14 +259,49 @@ intel_lr_context_descriptor_update(struct i915_gem_context *ctx,
>>>>>         ce->lrc_desc = desc;
>>>>>     }
>>>>>     
>>>>> -static struct i915_priolist *
>>>>> +static void assert_priolists(struct intel_engine_execlists * const execlists,
>>>>> +                          int queue_priority)
>>>>> +{
>>>>> +     struct rb_node *rb;
>>>>> +     int last_prio, i;
>>>>> +
>>>>> +     if (!IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
>>>>> +             return;
>>>>> +
>>>>> +     GEM_BUG_ON(rb_first_cached(&execlists->queue) !=
>>>>> +                rb_first(&execlists->queue.rb_root));
>>>>> +
>>>>> +     last_prio = (queue_priority >> I915_USER_PRIORITY_SHIFT) + 1;
>>>>> +     for (rb = rb_first_cached(&execlists->queue); rb; rb = rb_next(rb)) {
>>>>> +             struct i915_priolist *p = to_priolist(rb);
>>>>> +
>>>>> +             GEM_BUG_ON(p->priority >= last_prio);
>>>>> +             last_prio = p->priority;
>>>>> +
>>>>> +             GEM_BUG_ON(!p->used);
>>>>> +             for (i = 0; i < ARRAY_SIZE(p->requests); i++) {
>>>>> +                     if (list_empty(&p->requests[i]))
>>>>> +                             continue;
>>>>> +
>>>>> +                     GEM_BUG_ON(!(p->used & BIT(i)));
>>>>> +             }
>>>>> +     }
>>>>> +}
>>>>> +
>>>>> +static struct list_head *
>>>>>     lookup_priolist(struct intel_engine_cs *engine, int prio)
>>>>>     {
>>>>>         struct intel_engine_execlists * const execlists = &engine->execlists;
>>>>>         struct i915_priolist *p;
>>>>>         struct rb_node **parent, *rb;
>>>>>         bool first = true;
>>>>> +     int idx, i;
>>>>> +
>>>>> +     assert_priolists(execlists, INT_MAX);
>>>>>     
>>>>> +     /* buckets sorted from highest [in slot 0] to lowest priority */
>>>>> +     idx = I915_PRIORITY_MASK - (prio & I915_PRIORITY_MASK);
>>>>
>>>> How about:
>>>>
>>>> #define I915_INTERNAL_PRIO(p) (p & I915_PRIORITY_MASK)
>>>
>>> Not convinced. It's the only place we use it (one use of MASK later to
>>> assert correct less). There's two views, here the user/kernel priority
>>> levels are not as important as what we are concerned about are the split
>>> lists with the chunking of lowest priority bits. At the extreme we could
>>> separate the two and make the chunks BITS_PER_LONG -- I think that's a
>>> waste and my goal was simply to avoid kmallocs for the common case of
>>> default user priority.
>>>
>>> The intention is that we don't even think about the user/internal split,
>>> and simply consider it an integrated field, with most positive
>>> priorities executing first and most negative last.
>>
>> I just find MASK - INT confusing, those two data flavours do not match
>> in my head.  With the local macro I clarified that we are getting an INT
>> from prio, and then the bit you did not quote I suggested we do not
>> substract from the mask but from the count - 1. It is effectively the
>> same thing just IMHO easier to understand while reading the code. You
>> can skip the macro and decrement from the count, I'd be happy with that
>> as well.
> 
> I swear that COUNT-1 was the bit you complained about last time ;)

Blast.. I complained that count was one when it actually should be zero, 
and you were also negating the mask for extra confusion.. no big harm 
done I hope?

> I put the comment there to explain why we reverse the index, as leftmost
> is highest priority.
> 
> Preemptive r-b for COUNT-1 - (prio & I915_PRIORITY_MASK) then?

Yes r-b.

Regards,

Tvrtko

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 24+ messages in thread

end of thread, other threads:[~2018-09-28 13:26 UTC | newest]

Thread overview: 24+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-09-25  8:31 [PATCH 1/7] drm/i915/selftests: Smoketest preemption Chris Wilson
2018-09-25  8:31 ` [PATCH 2/7] drm/i915/execlists: Avoid kicking priority on the current context Chris Wilson
2018-09-25 10:14   ` Tvrtko Ursulin
2018-09-25  8:32 ` [PATCH 3/7] drm/i915: Reserve some priority bits for internal use Chris Wilson
2018-09-25  8:32 ` [PATCH 4/7] drm/i915: Combine multiple internal plists into the same i915_priolist bucket Chris Wilson
2018-09-25  9:48   ` Tvrtko Ursulin
2018-09-27 13:05     ` Chris Wilson
2018-09-28 13:04       ` Tvrtko Ursulin
2018-09-28 13:08         ` Chris Wilson
2018-09-28 13:25           ` Tvrtko Ursulin
2018-09-25  8:32 ` [PATCH 5/7] drm/i915: Priority boost for new clients Chris Wilson
2018-09-25  8:32 ` [PATCH 6/7] drm/i915: Pull scheduling under standalone lock Chris Wilson
2018-09-25  8:32 ` [PATCH 7/7] drm/i915: Priority boost for waiting clients Chris Wilson
2018-09-25  9:04   ` [PATCH v2] " Chris Wilson
2018-09-25  9:09 ` ✗ Fi.CI.CHECKPATCH: warning for series starting with [1/7] drm/i915/selftests: Smoketest preemption Patchwork
2018-09-25  9:12 ` ✗ Fi.CI.SPARSE: " Patchwork
2018-09-25  9:31 ` ✓ Fi.CI.BAT: success " Patchwork
2018-09-25  9:39 ` [PATCH 1/7] " Tvrtko Ursulin
2018-09-25 10:17   ` Chris Wilson
2018-09-25 10:24     ` Tvrtko Ursulin
2018-09-25 10:28 ` ✗ Fi.CI.CHECKPATCH: warning for series starting with [1/7] drm/i915/selftests: Smoketest preemption (rev2) Patchwork
2018-09-25 10:31 ` ✗ Fi.CI.SPARSE: " Patchwork
2018-09-25 10:50 ` ✓ Fi.CI.BAT: success " Patchwork
2018-09-25 12:21 ` ✓ Fi.CI.IGT: " Patchwork

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.