* [PATCH] drm/i915/preemption: Allow preemption between submission ports
@ 2018-02-22 14:11 Chris Wilson
2018-02-22 14:22 ` Chris Wilson
` (3 more replies)
0 siblings, 4 replies; 10+ messages in thread
From: Chris Wilson @ 2018-02-22 14:11 UTC (permalink / raw)
To: intel-gfx
Sometimes we need to boost the priority of an in-flight request, which
may lead to the situation where the second submission port then contains
a higher priority context than the first and so we need to inject a
preemption event. To do so we must always check inside
execlists_dequeue() whether there is a priority inversion between the
ports themselves as well as the head of the priority sorted queue, and we
cannot just skip dequeuing if the queue is empty.
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Michał Winiarski <michal.winiarski@intel.com>
Cc: Michel Thierry <michel.thierry@intel.com>
Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
drivers/gpu/drm/i915/intel_engine_cs.c | 2 +
drivers/gpu/drm/i915/intel_guc_submission.c | 17 +--
drivers/gpu/drm/i915/intel_lrc.c | 161 ++++++++++++++++------------
drivers/gpu/drm/i915/intel_ringbuffer.h | 5 +
4 files changed, 107 insertions(+), 78 deletions(-)
diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c
index c31544406974..ce7fcf55ba18 100644
--- a/drivers/gpu/drm/i915/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/intel_engine_cs.c
@@ -423,6 +423,7 @@ static void intel_engine_init_execlist(struct intel_engine_cs *engine)
BUILD_BUG_ON_NOT_POWER_OF_2(execlists_num_ports(execlists));
GEM_BUG_ON(execlists_num_ports(execlists) > EXECLIST_MAX_PORTS);
+ execlists->queue_priority = INT_MIN;
execlists->queue = RB_ROOT;
execlists->first = NULL;
}
@@ -1903,6 +1904,7 @@ void intel_engine_dump(struct intel_engine_cs *engine,
spin_lock_irq(&engine->timeline->lock);
list_for_each_entry(rq, &engine->timeline->requests, link)
print_request(m, rq, "\t\tE ");
+ drm_printf(m, "\t\tQueue priority: %d\n", execlists->queue_priority);
for (rb = execlists->first; rb; rb = rb_next(rb)) {
struct i915_priolist *p =
rb_entry(rb, typeof(*p), node);
diff --git a/drivers/gpu/drm/i915/intel_guc_submission.c b/drivers/gpu/drm/i915/intel_guc_submission.c
index 649113c7a3c2..586dde579903 100644
--- a/drivers/gpu/drm/i915/intel_guc_submission.c
+++ b/drivers/gpu/drm/i915/intel_guc_submission.c
@@ -75,6 +75,11 @@
*
*/
+static inline struct i915_priolist *to_priolist(struct rb_node *rb)
+{
+ return rb_entry(rb, struct i915_priolist, node);
+}
+
static inline bool is_high_priority(struct intel_guc_client *client)
{
return (client->priority == GUC_CLIENT_PRIORITY_KMD_HIGH ||
@@ -682,15 +687,12 @@ static void guc_dequeue(struct intel_engine_cs *engine)
rb = execlists->first;
GEM_BUG_ON(rb_first(&execlists->queue) != rb);
- if (!rb)
- goto unlock;
-
if (port_isset(port)) {
if (engine->i915->preempt_context) {
struct guc_preempt_work *preempt_work =
&engine->i915->guc.preempt_work[engine->id];
- if (rb_entry(rb, struct i915_priolist, node)->priority >
+ if (execlists->queue_priority >
max(port_request(port)->priotree.priority, 0)) {
execlists_set_active(execlists,
EXECLISTS_ACTIVE_PREEMPT);
@@ -706,8 +708,8 @@ static void guc_dequeue(struct intel_engine_cs *engine)
}
GEM_BUG_ON(port_isset(port));
- do {
- struct i915_priolist *p = rb_entry(rb, typeof(*p), node);
+ while (rb) {
+ struct i915_priolist *p = to_priolist(rb);
struct i915_request *rq, *rn;
list_for_each_entry_safe(rq, rn, &p->requests, priotree.link) {
@@ -736,8 +738,9 @@ static void guc_dequeue(struct intel_engine_cs *engine)
INIT_LIST_HEAD(&p->requests);
if (p->priority != I915_PRIORITY_NORMAL)
kmem_cache_free(engine->i915->priorities, p);
- } while (rb);
+ }
done:
+ execlists->queue_priority = rb ? to_priolist(rb)->priority : INT_MIN;
execlists->first = rb;
if (submit) {
port_assign(port, last);
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index e781c912f197..8a98da7a5c83 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -169,6 +169,23 @@ static void execlists_init_reg_state(u32 *reg_state,
struct intel_engine_cs *engine,
struct intel_ring *ring);
+static inline struct i915_priolist *to_priolist(struct rb_node *rb)
+{
+ return rb_entry(rb, struct i915_priolist, node);
+}
+
+static inline int rq_prio(const struct i915_request *rq)
+{
+ return rq->priotree.priority;
+}
+
+static inline bool need_preempt(const struct intel_engine_cs *engine,
+ const struct i915_request *last,
+ int prio)
+{
+ return engine->i915->preempt_context && prio > max(rq_prio(last), 0);
+}
+
/**
* intel_lr_context_descriptor_update() - calculate & cache the descriptor
* descriptor for a pinned context
@@ -224,7 +241,7 @@ lookup_priolist(struct intel_engine_cs *engine,
parent = &execlists->queue.rb_node;
while (*parent) {
rb = *parent;
- p = rb_entry(rb, typeof(*p), node);
+ p = to_priolist(rb);
if (prio > p->priority) {
parent = &rb->rb_left;
} else if (prio < p->priority) {
@@ -264,7 +281,7 @@ lookup_priolist(struct intel_engine_cs *engine,
if (first)
execlists->first = &p->node;
- return ptr_pack_bits(p, first, 1);
+ return p;
}
static void unwind_wa_tail(struct i915_request *rq)
@@ -290,14 +307,10 @@ static void __unwind_incomplete_requests(struct intel_engine_cs *engine)
__i915_request_unsubmit(rq);
unwind_wa_tail(rq);
- GEM_BUG_ON(rq->priotree.priority == I915_PRIORITY_INVALID);
- if (rq->priotree.priority != last_prio) {
- p = lookup_priolist(engine,
- &rq->priotree,
- rq->priotree.priority);
- p = ptr_mask_bits(p, 1);
-
- last_prio = rq->priotree.priority;
+ GEM_BUG_ON(rq_prio(rq) == I915_PRIORITY_INVALID);
+ if (rq_prio(rq) != last_prio) {
+ last_prio = rq_prio(rq);
+ p = lookup_priolist(engine, &rq->priotree, last_prio);
}
list_add(&rq->priotree.link, &p->requests);
@@ -397,10 +410,11 @@ static void execlists_submit_ports(struct intel_engine_cs *engine)
desc = execlists_update_context(rq);
GEM_DEBUG_EXEC(port[n].context_id = upper_32_bits(desc));
- GEM_TRACE("%s in[%d]: ctx=%d.%d, seqno=%x\n",
+ GEM_TRACE("%s in[%d]: ctx=%d.%d, seqno=%x, prio=%d\n",
engine->name, n,
port[n].context_id, count,
- rq->global_seqno);
+ rq->global_seqno,
+ rq_prio(rq));
} else {
GEM_BUG_ON(!n);
desc = 0;
@@ -453,12 +467,17 @@ static void inject_preempt_context(struct intel_engine_cs *engine)
_MASKED_BIT_ENABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT |
CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT));
+ /*
+ * Switch to our empty preempt context so
+ * the state of the GPU is known (idle).
+ */
GEM_TRACE("%s\n", engine->name);
for (n = execlists_num_ports(&engine->execlists); --n; )
elsp_write(0, engine->execlists.elsp);
elsp_write(ce->lrc_desc, engine->execlists.elsp);
execlists_clear_active(&engine->execlists, EXECLISTS_ACTIVE_HWACK);
+ execlists_set_active(&engine->execlists, EXECLISTS_ACTIVE_PREEMPT);
}
static void execlists_dequeue(struct intel_engine_cs *engine)
@@ -495,8 +514,6 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
spin_lock_irq(&engine->timeline->lock);
rb = execlists->first;
GEM_BUG_ON(rb_first(&execlists->queue) != rb);
- if (!rb)
- goto unlock;
if (last) {
/*
@@ -519,54 +536,48 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
if (!execlists_is_active(execlists, EXECLISTS_ACTIVE_HWACK))
goto unlock;
- if (engine->i915->preempt_context &&
- rb_entry(rb, struct i915_priolist, node)->priority >
- max(last->priotree.priority, 0)) {
- /*
- * Switch to our empty preempt context so
- * the state of the GPU is known (idle).
- */
+ if (need_preempt(engine, last, execlists->queue_priority)) {
inject_preempt_context(engine);
- execlists_set_active(execlists,
- EXECLISTS_ACTIVE_PREEMPT);
goto unlock;
- } else {
- /*
- * In theory, we could coalesce more requests onto
- * the second port (the first port is active, with
- * no preemptions pending). However, that means we
- * then have to deal with the possible lite-restore
- * of the second port (as we submit the ELSP, there
- * may be a context-switch) but also we may complete
- * the resubmission before the context-switch. Ergo,
- * coalescing onto the second port will cause a
- * preemption event, but we cannot predict whether
- * that will affect port[0] or port[1].
- *
- * If the second port is already active, we can wait
- * until the next context-switch before contemplating
- * new requests. The GPU will be busy and we should be
- * able to resubmit the new ELSP before it idles,
- * avoiding pipeline bubbles (momentary pauses where
- * the driver is unable to keep up the supply of new
- * work).
- */
- if (port_count(&port[1]))
- goto unlock;
-
- /* WaIdleLiteRestore:bdw,skl
- * Apply the wa NOOPs to prevent
- * ring:HEAD == rq:TAIL as we resubmit the
- * request. See gen8_emit_breadcrumb() for
- * where we prepare the padding after the
- * end of the request.
- */
- last->tail = last->wa_tail;
}
+
+ /*
+ * In theory, we could coalesce more requests onto
+ * the second port (the first port is active, with
+ * no preemptions pending). However, that means we
+ * then have to deal with the possible lite-restore
+ * of the second port (as we submit the ELSP, there
+ * may be a context-switch) but also we may complete
+ * the resubmission before the context-switch. Ergo,
+ * coalescing onto the second port will cause a
+ * preemption event, but we cannot predict whether
+ * that will affect port[0] or port[1].
+ *
+ * If the second port is already active, we can wait
+ * until the next context-switch before contemplating
+ * new requests. The GPU will be busy and we should be
+ * able to resubmit the new ELSP before it idles,
+ * avoiding pipeline bubbles (momentary pauses where
+ * the driver is unable to keep up the supply of new
+ * work). However, we have to double check that the
+ * priorities of the ports haven't been switch.
+ */
+ if (port_count(&port[1]))
+ goto unlock;
+
+ /*
+ * WaIdleLiteRestore:bdw,skl
+ * Apply the wa NOOPs to prevent
+ * ring:HEAD == rq:TAIL as we resubmit the
+ * request. See gen8_emit_breadcrumb() for
+ * where we prepare the padding after the
+ * end of the request.
+ */
+ last->tail = last->wa_tail;
}
- do {
- struct i915_priolist *p = rb_entry(rb, typeof(*p), node);
+ while (rb) {
+ struct i915_priolist *p = to_priolist(rb);
struct i915_request *rq, *rn;
list_for_each_entry_safe(rq, rn, &p->requests, priotree.link) {
@@ -628,8 +639,9 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
INIT_LIST_HEAD(&p->requests);
if (p->priority != I915_PRIORITY_NORMAL)
kmem_cache_free(engine->i915->priorities, p);
- } while (rb);
+ }
done:
+ execlists->queue_priority = rb ? to_priolist(rb)->priority : INT_MIN;
execlists->first = rb;
if (submit)
port_assign(port, last);
@@ -690,7 +702,7 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine)
/* Flush the queued requests to the timeline list (for retiring). */
rb = execlists->first;
while (rb) {
- struct i915_priolist *p = rb_entry(rb, typeof(*p), node);
+ struct i915_priolist *p = to_priolist(rb);
list_for_each_entry_safe(rq, rn, &p->requests, priotree.link) {
INIT_LIST_HEAD(&rq->priotree.link);
@@ -708,7 +720,7 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine)
/* Remaining _unready_ requests will be nop'ed when submitted */
-
+ execlists->queue_priority = INT_MIN;
execlists->queue = RB_ROOT;
execlists->first = NULL;
GEM_BUG_ON(port_isset(execlists->port));
@@ -867,10 +879,11 @@ static void execlists_submission_tasklet(unsigned long data)
GEM_DEBUG_BUG_ON(buf[2 * head + 1] != port->context_id);
rq = port_unpack(port, &count);
- GEM_TRACE("%s out[0]: ctx=%d.%d, seqno=%x\n",
+ GEM_TRACE("%s out[0]: ctx=%d.%d, seqno=%x, prio=%d\n",
engine->name,
port->context_id, count,
- rq ? rq->global_seqno : 0);
+ rq ? rq->global_seqno : 0,
+ rq ? rq_prio(rq) : 0);
GEM_BUG_ON(count == 0);
if (--count == 0) {
GEM_BUG_ON(status & GEN8_CTX_STATUS_PREEMPTED);
@@ -908,15 +921,19 @@ static void execlists_submission_tasklet(unsigned long data)
intel_uncore_forcewake_put(dev_priv, execlists->fw_domains);
}
-static void insert_request(struct intel_engine_cs *engine,
- struct i915_priotree *pt,
- int prio)
+static void queue_request(struct intel_engine_cs *engine,
+ struct i915_priotree *pt,
+ int prio)
{
- struct i915_priolist *p = lookup_priolist(engine, pt, prio);
+ list_add_tail(&pt->link, &lookup_priolist(engine, pt, prio)->requests);
+}
- list_add_tail(&pt->link, &ptr_mask_bits(p, 1)->requests);
- if (ptr_unmask_bits(p, 1))
+static void submit_queue(struct intel_engine_cs *engine, int prio)
+{
+ if (prio > engine->execlists.queue_priority) {
+ engine->execlists.queue_priority = prio;
tasklet_hi_schedule(&engine->execlists.tasklet);
+ }
}
static void execlists_submit_request(struct i915_request *request)
@@ -927,7 +944,8 @@ static void execlists_submit_request(struct i915_request *request)
/* Will be called from irq-context when using foreign fences. */
spin_lock_irqsave(&engine->timeline->lock, flags);
- insert_request(engine, &request->priotree, request->priotree.priority);
+ queue_request(engine, &request->priotree, rq_prio(request));
+ submit_queue(engine, rq_prio(request));
GEM_BUG_ON(!engine->execlists.first);
GEM_BUG_ON(list_empty(&request->priotree.link));
@@ -983,7 +1001,7 @@ static void execlists_schedule(struct i915_request *request, int prio)
* static void update_priorities(struct i915_priotree *pt, prio) {
* list_for_each_entry(dep, &pt->signalers_list, signal_link)
* update_priorities(dep->signal, prio)
- * insert_request(pt);
+ * queue_request(pt);
* }
* but that may have unlimited recursion depth and so runs a very
* real risk of overunning the kernel stack. Instead, we build
@@ -1046,8 +1064,9 @@ static void execlists_schedule(struct i915_request *request, int prio)
pt->priority = prio;
if (!list_empty(&pt->link)) {
__list_del_entry(&pt->link);
- insert_request(engine, pt, prio);
+ queue_request(engine, pt, prio);
}
+ submit_queue(engine, prio);
}
spin_unlock_irq(&engine->timeline->lock);
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index a9b83bf7e837..c4e9022b34e3 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -257,6 +257,11 @@ struct intel_engine_execlists {
*/
unsigned int port_mask;
+ /**
+ * @queue_priority: Highest pending priority.
+ */
+ int queue_priority;
+
/**
* @queue: queue of requests, in priority lists
*/
--
2.16.1
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply related [flat|nested] 10+ messages in thread
* [PATCH] drm/i915/preemption: Allow preemption between submission ports
2018-02-22 14:11 [PATCH] drm/i915/preemption: Allow preemption between submission ports Chris Wilson
@ 2018-02-22 14:22 ` Chris Wilson
2018-02-22 14:36 ` Chris Wilson
` (2 more replies)
2018-02-22 14:59 ` ✓ Fi.CI.BAT: success for drm/i915/preemption: Allow preemption between submission ports (rev2) Patchwork
` (2 subsequent siblings)
3 siblings, 3 replies; 10+ messages in thread
From: Chris Wilson @ 2018-02-22 14:22 UTC (permalink / raw)
To: intel-gfx
Sometimes we need to boost the priority of an in-flight request, which
may lead to the situation where the second submission port then contains
a higher priority context than the first and so we need to inject a
preemption event. To do so we must always check inside
execlists_dequeue() whether there is a priority inversion between the
ports themselves as well as the head of the priority sorted queue, and we
cannot just skip dequeuing if the queue is empty.
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Michał Winiarski <michal.winiarski@intel.com>
Cc: Michel Thierry <michel.thierry@intel.com>
Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
Rebase for conflicts
-Chris
---
drivers/gpu/drm/i915/intel_engine_cs.c | 2 +
drivers/gpu/drm/i915/intel_guc_submission.c | 17 +--
drivers/gpu/drm/i915/intel_lrc.c | 161 ++++++++++++++++------------
drivers/gpu/drm/i915/intel_ringbuffer.h | 5 +
4 files changed, 107 insertions(+), 78 deletions(-)
diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c
index c31544406974..ce7fcf55ba18 100644
--- a/drivers/gpu/drm/i915/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/intel_engine_cs.c
@@ -423,6 +423,7 @@ static void intel_engine_init_execlist(struct intel_engine_cs *engine)
BUILD_BUG_ON_NOT_POWER_OF_2(execlists_num_ports(execlists));
GEM_BUG_ON(execlists_num_ports(execlists) > EXECLIST_MAX_PORTS);
+ execlists->queue_priority = INT_MIN;
execlists->queue = RB_ROOT;
execlists->first = NULL;
}
@@ -1903,6 +1904,7 @@ void intel_engine_dump(struct intel_engine_cs *engine,
spin_lock_irq(&engine->timeline->lock);
list_for_each_entry(rq, &engine->timeline->requests, link)
print_request(m, rq, "\t\tE ");
+ drm_printf(m, "\t\tQueue priority: %d\n", execlists->queue_priority);
for (rb = execlists->first; rb; rb = rb_next(rb)) {
struct i915_priolist *p =
rb_entry(rb, typeof(*p), node);
diff --git a/drivers/gpu/drm/i915/intel_guc_submission.c b/drivers/gpu/drm/i915/intel_guc_submission.c
index 649113c7a3c2..586dde579903 100644
--- a/drivers/gpu/drm/i915/intel_guc_submission.c
+++ b/drivers/gpu/drm/i915/intel_guc_submission.c
@@ -75,6 +75,11 @@
*
*/
+static inline struct i915_priolist *to_priolist(struct rb_node *rb)
+{
+ return rb_entry(rb, struct i915_priolist, node);
+}
+
static inline bool is_high_priority(struct intel_guc_client *client)
{
return (client->priority == GUC_CLIENT_PRIORITY_KMD_HIGH ||
@@ -682,15 +687,12 @@ static void guc_dequeue(struct intel_engine_cs *engine)
rb = execlists->first;
GEM_BUG_ON(rb_first(&execlists->queue) != rb);
- if (!rb)
- goto unlock;
-
if (port_isset(port)) {
if (engine->i915->preempt_context) {
struct guc_preempt_work *preempt_work =
&engine->i915->guc.preempt_work[engine->id];
- if (rb_entry(rb, struct i915_priolist, node)->priority >
+ if (execlists->queue_priority >
max(port_request(port)->priotree.priority, 0)) {
execlists_set_active(execlists,
EXECLISTS_ACTIVE_PREEMPT);
@@ -706,8 +708,8 @@ static void guc_dequeue(struct intel_engine_cs *engine)
}
GEM_BUG_ON(port_isset(port));
- do {
- struct i915_priolist *p = rb_entry(rb, typeof(*p), node);
+ while (rb) {
+ struct i915_priolist *p = to_priolist(rb);
struct i915_request *rq, *rn;
list_for_each_entry_safe(rq, rn, &p->requests, priotree.link) {
@@ -736,8 +738,9 @@ static void guc_dequeue(struct intel_engine_cs *engine)
INIT_LIST_HEAD(&p->requests);
if (p->priority != I915_PRIORITY_NORMAL)
kmem_cache_free(engine->i915->priorities, p);
- } while (rb);
+ }
done:
+ execlists->queue_priority = rb ? to_priolist(rb)->priority : INT_MIN;
execlists->first = rb;
if (submit) {
port_assign(port, last);
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 964885b5d7cb..4bc72fbaf793 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -169,6 +169,23 @@ static void execlists_init_reg_state(u32 *reg_state,
struct intel_engine_cs *engine,
struct intel_ring *ring);
+static inline struct i915_priolist *to_priolist(struct rb_node *rb)
+{
+ return rb_entry(rb, struct i915_priolist, node);
+}
+
+static inline int rq_prio(const struct i915_request *rq)
+{
+ return rq->priotree.priority;
+}
+
+static inline bool need_preempt(const struct intel_engine_cs *engine,
+ const struct i915_request *last,
+ int prio)
+{
+ return engine->i915->preempt_context && prio > max(rq_prio(last), 0);
+}
+
/**
* intel_lr_context_descriptor_update() - calculate & cache the descriptor
* descriptor for a pinned context
@@ -224,7 +241,7 @@ lookup_priolist(struct intel_engine_cs *engine,
parent = &execlists->queue.rb_node;
while (*parent) {
rb = *parent;
- p = rb_entry(rb, typeof(*p), node);
+ p = to_priolist(rb);
if (prio > p->priority) {
parent = &rb->rb_left;
} else if (prio < p->priority) {
@@ -264,7 +281,7 @@ lookup_priolist(struct intel_engine_cs *engine,
if (first)
execlists->first = &p->node;
- return ptr_pack_bits(p, first, 1);
+ return p;
}
static void unwind_wa_tail(struct i915_request *rq)
@@ -290,14 +307,10 @@ static void __unwind_incomplete_requests(struct intel_engine_cs *engine)
__i915_request_unsubmit(rq);
unwind_wa_tail(rq);
- GEM_BUG_ON(rq->priotree.priority == I915_PRIORITY_INVALID);
- if (rq->priotree.priority != last_prio) {
- p = lookup_priolist(engine,
- &rq->priotree,
- rq->priotree.priority);
- p = ptr_mask_bits(p, 1);
-
- last_prio = rq->priotree.priority;
+ GEM_BUG_ON(rq_prio(rq) == I915_PRIORITY_INVALID);
+ if (rq_prio(rq) != last_prio) {
+ last_prio = rq_prio(rq);
+ p = lookup_priolist(engine, &rq->priotree, last_prio);
}
list_add(&rq->priotree.link, &p->requests);
@@ -397,10 +410,11 @@ static void execlists_submit_ports(struct intel_engine_cs *engine)
desc = execlists_update_context(rq);
GEM_DEBUG_EXEC(port[n].context_id = upper_32_bits(desc));
- GEM_TRACE("%s in[%d]: ctx=%d.%d, seqno=%x\n",
+ GEM_TRACE("%s in[%d]: ctx=%d.%d, seqno=%x, prio=%d\n",
engine->name, n,
port[n].context_id, count,
- rq->global_seqno);
+ rq->global_seqno,
+ rq_prio(rq));
} else {
GEM_BUG_ON(!n);
desc = 0;
@@ -453,12 +467,17 @@ static void inject_preempt_context(struct intel_engine_cs *engine)
_MASKED_BIT_ENABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT |
CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT));
+ /*
+ * Switch to our empty preempt context so
+ * the state of the GPU is known (idle).
+ */
GEM_TRACE("%s\n", engine->name);
for (n = execlists_num_ports(&engine->execlists); --n; )
elsp_write(0, engine->execlists.elsp);
elsp_write(ce->lrc_desc, engine->execlists.elsp);
execlists_clear_active(&engine->execlists, EXECLISTS_ACTIVE_HWACK);
+ execlists_set_active(&engine->execlists, EXECLISTS_ACTIVE_PREEMPT);
}
static void execlists_dequeue(struct intel_engine_cs *engine)
@@ -495,8 +514,6 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
spin_lock_irq(&engine->timeline->lock);
rb = execlists->first;
GEM_BUG_ON(rb_first(&execlists->queue) != rb);
- if (!rb)
- goto unlock;
if (last) {
/*
@@ -519,54 +536,48 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
if (!execlists_is_active(execlists, EXECLISTS_ACTIVE_HWACK))
goto unlock;
- if (engine->i915->preempt_context &&
- rb_entry(rb, struct i915_priolist, node)->priority >
- max(last->priotree.priority, 0)) {
- /*
- * Switch to our empty preempt context so
- * the state of the GPU is known (idle).
- */
+ if (need_preempt(engine, last, execlists->queue_priority)) {
inject_preempt_context(engine);
- execlists_set_active(execlists,
- EXECLISTS_ACTIVE_PREEMPT);
goto unlock;
- } else {
- /*
- * In theory, we could coalesce more requests onto
- * the second port (the first port is active, with
- * no preemptions pending). However, that means we
- * then have to deal with the possible lite-restore
- * of the second port (as we submit the ELSP, there
- * may be a context-switch) but also we may complete
- * the resubmission before the context-switch. Ergo,
- * coalescing onto the second port will cause a
- * preemption event, but we cannot predict whether
- * that will affect port[0] or port[1].
- *
- * If the second port is already active, we can wait
- * until the next context-switch before contemplating
- * new requests. The GPU will be busy and we should be
- * able to resubmit the new ELSP before it idles,
- * avoiding pipeline bubbles (momentary pauses where
- * the driver is unable to keep up the supply of new
- * work).
- */
- if (port_count(&port[1]))
- goto unlock;
-
- /* WaIdleLiteRestore:bdw,skl
- * Apply the wa NOOPs to prevent
- * ring:HEAD == rq:TAIL as we resubmit the
- * request. See gen8_emit_breadcrumb() for
- * where we prepare the padding after the
- * end of the request.
- */
- last->tail = last->wa_tail;
}
+
+ /*
+ * In theory, we could coalesce more requests onto
+ * the second port (the first port is active, with
+ * no preemptions pending). However, that means we
+ * then have to deal with the possible lite-restore
+ * of the second port (as we submit the ELSP, there
+ * may be a context-switch) but also we may complete
+ * the resubmission before the context-switch. Ergo,
+ * coalescing onto the second port will cause a
+ * preemption event, but we cannot predict whether
+ * that will affect port[0] or port[1].
+ *
+ * If the second port is already active, we can wait
+ * until the next context-switch before contemplating
+ * new requests. The GPU will be busy and we should be
+ * able to resubmit the new ELSP before it idles,
+ * avoiding pipeline bubbles (momentary pauses where
+ * the driver is unable to keep up the supply of new
+ * work). However, we have to double check that the
+ * priorities of the ports haven't been switch.
+ */
+ if (port_count(&port[1]))
+ goto unlock;
+
+ /*
+ * WaIdleLiteRestore:bdw,skl
+ * Apply the wa NOOPs to prevent
+ * ring:HEAD == rq:TAIL as we resubmit the
+ * request. See gen8_emit_breadcrumb() for
+ * where we prepare the padding after the
+ * end of the request.
+ */
+ last->tail = last->wa_tail;
}
- do {
- struct i915_priolist *p = rb_entry(rb, typeof(*p), node);
+ while (rb) {
+ struct i915_priolist *p = to_priolist(rb);
struct i915_request *rq, *rn;
list_for_each_entry_safe(rq, rn, &p->requests, priotree.link) {
@@ -628,8 +639,9 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
INIT_LIST_HEAD(&p->requests);
if (p->priority != I915_PRIORITY_NORMAL)
kmem_cache_free(engine->i915->priorities, p);
- } while (rb);
+ }
done:
+ execlists->queue_priority = rb ? to_priolist(rb)->priority : INT_MIN;
execlists->first = rb;
if (submit)
port_assign(port, last);
@@ -690,7 +702,7 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine)
/* Flush the queued requests to the timeline list (for retiring). */
rb = execlists->first;
while (rb) {
- struct i915_priolist *p = rb_entry(rb, typeof(*p), node);
+ struct i915_priolist *p = to_priolist(rb);
list_for_each_entry_safe(rq, rn, &p->requests, priotree.link) {
INIT_LIST_HEAD(&rq->priotree.link);
@@ -708,7 +720,7 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine)
/* Remaining _unready_ requests will be nop'ed when submitted */
-
+ execlists->queue_priority = INT_MIN;
execlists->queue = RB_ROOT;
execlists->first = NULL;
GEM_BUG_ON(port_isset(execlists->port));
@@ -864,10 +876,11 @@ static void execlists_submission_tasklet(unsigned long data)
EXECLISTS_ACTIVE_USER));
rq = port_unpack(port, &count);
- GEM_TRACE("%s out[0]: ctx=%d.%d, seqno=%x\n",
+ GEM_TRACE("%s out[0]: ctx=%d.%d, seqno=%x, prio=%d\n",
engine->name,
port->context_id, count,
- rq ? rq->global_seqno : 0);
+ rq ? rq->global_seqno : 0,
+ rq ? rq_prio(rq) : 0);
/* Check the context/desc id for this event matches */
GEM_DEBUG_BUG_ON(buf[2 * head + 1] != port->context_id);
@@ -912,15 +925,19 @@ static void execlists_submission_tasklet(unsigned long data)
intel_uncore_forcewake_put(dev_priv, execlists->fw_domains);
}
-static void insert_request(struct intel_engine_cs *engine,
- struct i915_priotree *pt,
- int prio)
+static void queue_request(struct intel_engine_cs *engine,
+ struct i915_priotree *pt,
+ int prio)
{
- struct i915_priolist *p = lookup_priolist(engine, pt, prio);
+ list_add_tail(&pt->link, &lookup_priolist(engine, pt, prio)->requests);
+}
- list_add_tail(&pt->link, &ptr_mask_bits(p, 1)->requests);
- if (ptr_unmask_bits(p, 1))
+static void submit_queue(struct intel_engine_cs *engine, int prio)
+{
+ if (prio > engine->execlists.queue_priority) {
+ engine->execlists.queue_priority = prio;
tasklet_hi_schedule(&engine->execlists.tasklet);
+ }
}
static void execlists_submit_request(struct i915_request *request)
@@ -931,7 +948,8 @@ static void execlists_submit_request(struct i915_request *request)
/* Will be called from irq-context when using foreign fences. */
spin_lock_irqsave(&engine->timeline->lock, flags);
- insert_request(engine, &request->priotree, request->priotree.priority);
+ queue_request(engine, &request->priotree, rq_prio(request));
+ submit_queue(engine, rq_prio(request));
GEM_BUG_ON(!engine->execlists.first);
GEM_BUG_ON(list_empty(&request->priotree.link));
@@ -987,7 +1005,7 @@ static void execlists_schedule(struct i915_request *request, int prio)
* static void update_priorities(struct i915_priotree *pt, prio) {
* list_for_each_entry(dep, &pt->signalers_list, signal_link)
* update_priorities(dep->signal, prio)
- * insert_request(pt);
+ * queue_request(pt);
* }
* but that may have unlimited recursion depth and so runs a very
* real risk of overunning the kernel stack. Instead, we build
@@ -1050,8 +1068,9 @@ static void execlists_schedule(struct i915_request *request, int prio)
pt->priority = prio;
if (!list_empty(&pt->link)) {
__list_del_entry(&pt->link);
- insert_request(engine, pt, prio);
+ queue_request(engine, pt, prio);
}
+ submit_queue(engine, prio);
}
spin_unlock_irq(&engine->timeline->lock);
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index a9b83bf7e837..c4e9022b34e3 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -257,6 +257,11 @@ struct intel_engine_execlists {
*/
unsigned int port_mask;
+ /**
+ * @queue_priority: Highest pending priority.
+ */
+ int queue_priority;
+
/**
* @queue: queue of requests, in priority lists
*/
--
2.16.1
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply related [flat|nested] 10+ messages in thread
* Re: [PATCH] drm/i915/preemption: Allow preemption between submission ports
2018-02-22 14:22 ` Chris Wilson
@ 2018-02-22 14:36 ` Chris Wilson
2018-02-23 14:06 ` Mika Kuoppala
2018-02-23 15:18 ` Mika Kuoppala
2 siblings, 0 replies; 10+ messages in thread
From: Chris Wilson @ 2018-02-22 14:36 UTC (permalink / raw)
To: intel-gfx
Quoting Chris Wilson (2018-02-22 14:22:29)
> Sometimes we need to boost the priority of an in-flight request, which
> may lead to the situation where the second submission port then contains
> a higher priority context than the first and so we need to inject a
> preemption event. To do so we must always check inside
> execlists_dequeue() whether there is a priority inversion between the
> ports themselves as well as the head of the priority sorted queue, and we
> cannot just skip dequeuing if the queue is empty.
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Michał Winiarski <michal.winiarski@intel.com>
> Cc: Michel Thierry <michel.thierry@intel.com>
> Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Note this probably merits a Fixes tag. Outside of igt causing priority
inversion between GPU hogs, is it a big enough problem to deserve
sending to stable@?
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 10+ messages in thread
* ✓ Fi.CI.BAT: success for drm/i915/preemption: Allow preemption between submission ports (rev2)
2018-02-22 14:11 [PATCH] drm/i915/preemption: Allow preemption between submission ports Chris Wilson
2018-02-22 14:22 ` Chris Wilson
@ 2018-02-22 14:59 ` Patchwork
2018-02-22 19:48 ` ✓ Fi.CI.IGT: " Patchwork
2018-02-23 13:35 ` [PATCH] drm/i915/preemption: Allow preemption between submission ports Chris Wilson
3 siblings, 0 replies; 10+ messages in thread
From: Patchwork @ 2018-02-22 14:59 UTC (permalink / raw)
To: Chris Wilson; +Cc: intel-gfx
== Series Details ==
Series: drm/i915/preemption: Allow preemption between submission ports (rev2)
URL : https://patchwork.freedesktop.org/series/38774/
State : success
== Summary ==
Series 38774v2 drm/i915/preemption: Allow preemption between submission ports
https://patchwork.freedesktop.org/api/1.0/series/38774/revisions/2/mbox/
Test kms_pipe_crc_basic:
Subgroup suspend-read-crc-pipe-c:
pass -> DMESG-WARN (fi-skl-6700k2) fdo#104108
fdo#104108 https://bugs.freedesktop.org/show_bug.cgi?id=104108
fi-bdw-5557u total:288 pass:267 dwarn:0 dfail:0 fail:0 skip:21 time:421s
fi-bdw-gvtdvm total:288 pass:264 dwarn:0 dfail:0 fail:0 skip:24 time:424s
fi-blb-e6850 total:288 pass:223 dwarn:1 dfail:0 fail:0 skip:64 time:374s
fi-bsw-n3050 total:288 pass:242 dwarn:0 dfail:0 fail:0 skip:46 time:479s
fi-bwr-2160 total:288 pass:183 dwarn:0 dfail:0 fail:0 skip:105 time:286s
fi-bxt-j4205 total:288 pass:259 dwarn:0 dfail:0 fail:0 skip:29 time:479s
fi-byt-j1900 total:288 pass:253 dwarn:0 dfail:0 fail:0 skip:35 time:465s
fi-byt-n2820 total:288 pass:249 dwarn:0 dfail:0 fail:0 skip:39 time:456s
fi-cfl-s2 total:288 pass:262 dwarn:0 dfail:0 fail:0 skip:26 time:564s
fi-elk-e7500 total:288 pass:229 dwarn:0 dfail:0 fail:0 skip:59 time:414s
fi-gdg-551 total:288 pass:179 dwarn:0 dfail:0 fail:1 skip:108 time:288s
fi-glk-1 total:288 pass:260 dwarn:0 dfail:0 fail:0 skip:28 time:503s
fi-hsw-4770 total:288 pass:261 dwarn:0 dfail:0 fail:0 skip:27 time:386s
fi-ilk-650 total:288 pass:227 dwarn:0 dfail:0 fail:1 skip:60 time:406s
fi-ivb-3520m total:288 pass:259 dwarn:0 dfail:0 fail:0 skip:29 time:453s
fi-ivb-3770 total:288 pass:255 dwarn:0 dfail:0 fail:0 skip:33 time:413s
fi-kbl-7500u total:288 pass:263 dwarn:1 dfail:0 fail:0 skip:24 time:450s
fi-kbl-7560u total:288 pass:269 dwarn:0 dfail:0 fail:0 skip:19 time:495s
fi-kbl-7567u total:288 pass:268 dwarn:0 dfail:0 fail:0 skip:20 time:454s
fi-kbl-r total:288 pass:261 dwarn:0 dfail:0 fail:0 skip:27 time:494s
fi-pnv-d510 total:288 pass:222 dwarn:1 dfail:0 fail:0 skip:65 time:589s
fi-skl-6260u total:288 pass:268 dwarn:0 dfail:0 fail:0 skip:20 time:429s
fi-skl-6600u total:288 pass:261 dwarn:0 dfail:0 fail:0 skip:27 time:504s
fi-skl-6700hq total:288 pass:262 dwarn:0 dfail:0 fail:0 skip:26 time:519s
fi-skl-6700k2 total:288 pass:263 dwarn:1 dfail:0 fail:0 skip:24 time:482s
fi-skl-6770hq total:288 pass:268 dwarn:0 dfail:0 fail:0 skip:20 time:472s
fi-skl-guc total:288 pass:260 dwarn:0 dfail:0 fail:0 skip:28 time:406s
fi-skl-gvtdvm total:288 pass:265 dwarn:0 dfail:0 fail:0 skip:23 time:431s
fi-snb-2520m total:288 pass:248 dwarn:0 dfail:0 fail:0 skip:40 time:523s
fi-snb-2600 total:288 pass:248 dwarn:0 dfail:0 fail:0 skip:40 time:404s
Blacklisted hosts:
fi-cfl-8700k total:288 pass:260 dwarn:0 dfail:0 fail:0 skip:28 time:390s
fi-bxt-dsi failed to collect. IGT log at Patchwork_8127/fi-bxt-dsi/run0.log
77655eb53ee4b22bff8505da1d8141b93d5b864b drm-tip: 2018y-02m-22d-14h-14m-34s UTC integration manifest
376cc66b8d72 drm/i915/preemption: Allow preemption between submission ports
== Logs ==
For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_8127/issues.html
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 10+ messages in thread
* ✓ Fi.CI.IGT: success for drm/i915/preemption: Allow preemption between submission ports (rev2)
2018-02-22 14:11 [PATCH] drm/i915/preemption: Allow preemption between submission ports Chris Wilson
2018-02-22 14:22 ` Chris Wilson
2018-02-22 14:59 ` ✓ Fi.CI.BAT: success for drm/i915/preemption: Allow preemption between submission ports (rev2) Patchwork
@ 2018-02-22 19:48 ` Patchwork
2018-02-23 13:35 ` [PATCH] drm/i915/preemption: Allow preemption between submission ports Chris Wilson
3 siblings, 0 replies; 10+ messages in thread
From: Patchwork @ 2018-02-22 19:48 UTC (permalink / raw)
To: Chris Wilson; +Cc: intel-gfx
== Series Details ==
Series: drm/i915/preemption: Allow preemption between submission ports (rev2)
URL : https://patchwork.freedesktop.org/series/38774/
State : success
== Summary ==
Test gem_eio:
Subgroup in-flight-contexts:
incomplete -> PASS (shard-apl) fdo#104945
Test kms_cursor_crc:
Subgroup cursor-64x64-suspend:
pass -> SKIP (shard-snb) fdo#102365
Test kms_chv_cursor_fail:
Subgroup pipe-b-128x128-right-edge:
dmesg-warn -> PASS (shard-snb)
Subgroup pipe-b-64x64-bottom-edge:
dmesg-warn -> PASS (shard-snb) fdo#105185
Test perf:
Subgroup enable-disable:
pass -> FAIL (shard-apl) fdo#103715
Subgroup oa-exponents:
incomplete -> PASS (shard-apl) fdo#102254
Test kms_flip:
Subgroup plain-flip-fb-recreate-interruptible:
pass -> FAIL (shard-hsw) fdo#100368 +1
Test kms_frontbuffer_tracking:
Subgroup fbc-shrfb-scaledprimary:
pass -> DMESG-FAIL (shard-apl) fdo#103167 +2
Test kms_plane_lowres:
Subgroup pipe-b-tiling-y:
pass -> FAIL (shard-apl) fdo#103166
fdo#104945 https://bugs.freedesktop.org/show_bug.cgi?id=104945
fdo#102365 https://bugs.freedesktop.org/show_bug.cgi?id=102365
fdo#105185 https://bugs.freedesktop.org/show_bug.cgi?id=105185
fdo#103715 https://bugs.freedesktop.org/show_bug.cgi?id=103715
fdo#102254 https://bugs.freedesktop.org/show_bug.cgi?id=102254
fdo#100368 https://bugs.freedesktop.org/show_bug.cgi?id=100368
fdo#103167 https://bugs.freedesktop.org/show_bug.cgi?id=103167
fdo#103166 https://bugs.freedesktop.org/show_bug.cgi?id=103166
shard-apl total:3465 pass:1816 dwarn:1 dfail:1 fail:16 skip:1631 time:12444s
shard-hsw total:3465 pass:1767 dwarn:1 dfail:0 fail:3 skip:1693 time:11711s
shard-snb total:3465 pass:1356 dwarn:1 dfail:0 fail:2 skip:2106 time:6489s
Blacklisted hosts:
shard-kbl total:3448 pass:1950 dwarn:1 dfail:0 fail:14 skip:1482 time:9117s
== Logs ==
For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_8127/shards.html
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH] drm/i915/preemption: Allow preemption between submission ports
2018-02-22 14:11 [PATCH] drm/i915/preemption: Allow preemption between submission ports Chris Wilson
` (2 preceding siblings ...)
2018-02-22 19:48 ` ✓ Fi.CI.IGT: " Patchwork
@ 2018-02-23 13:35 ` Chris Wilson
3 siblings, 0 replies; 10+ messages in thread
From: Chris Wilson @ 2018-02-23 13:35 UTC (permalink / raw)
To: intel-gfx
Quoting Chris Wilson (2018-02-22 14:11:54)
> Sometimes we need to boost the priority of an in-flight request, which
> may lead to the situation where the second submission port then contains
> a higher priority context than the first and so we need to inject a
> preemption event. To do so we must always check inside
> execlists_dequeue() whether there is a priority inversion between the
> ports themselves as well as the head of the priority sorted queue, and we
> cannot just skip dequeuing if the queue is empty.
Michał noted this doesn't extend past 2-port submission as simply as I
thought it might. Nevertheless it does solve the problem we have today
of priority inversion within ELSP[2]. Extending the submission model as
a whole to more ports is left as an exercise to the reader. :|
-Chris
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Michał Winiarski <michal.winiarski@intel.com>
> Cc: Michel Thierry <michel.thierry@intel.com>
> Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> ---
> drivers/gpu/drm/i915/intel_engine_cs.c | 2 +
> drivers/gpu/drm/i915/intel_guc_submission.c | 17 +--
> drivers/gpu/drm/i915/intel_lrc.c | 161 ++++++++++++++++------------
> drivers/gpu/drm/i915/intel_ringbuffer.h | 5 +
> 4 files changed, 107 insertions(+), 78 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c
> index c31544406974..ce7fcf55ba18 100644
> --- a/drivers/gpu/drm/i915/intel_engine_cs.c
> +++ b/drivers/gpu/drm/i915/intel_engine_cs.c
> @@ -423,6 +423,7 @@ static void intel_engine_init_execlist(struct intel_engine_cs *engine)
> BUILD_BUG_ON_NOT_POWER_OF_2(execlists_num_ports(execlists));
> GEM_BUG_ON(execlists_num_ports(execlists) > EXECLIST_MAX_PORTS);
>
> + execlists->queue_priority = INT_MIN;
> execlists->queue = RB_ROOT;
> execlists->first = NULL;
> }
> @@ -1903,6 +1904,7 @@ void intel_engine_dump(struct intel_engine_cs *engine,
> spin_lock_irq(&engine->timeline->lock);
> list_for_each_entry(rq, &engine->timeline->requests, link)
> print_request(m, rq, " E ");
> + drm_printf(m, " Queue priority: %d\n", execlists->queue_priority);
> for (rb = execlists->first; rb; rb = rb_next(rb)) {
> struct i915_priolist *p =
> rb_entry(rb, typeof(*p), node);
> diff --git a/drivers/gpu/drm/i915/intel_guc_submission.c b/drivers/gpu/drm/i915/intel_guc_submission.c
> index 649113c7a3c2..586dde579903 100644
> --- a/drivers/gpu/drm/i915/intel_guc_submission.c
> +++ b/drivers/gpu/drm/i915/intel_guc_submission.c
> @@ -75,6 +75,11 @@
> *
> */
>
> +static inline struct i915_priolist *to_priolist(struct rb_node *rb)
> +{
> + return rb_entry(rb, struct i915_priolist, node);
> +}
> +
> static inline bool is_high_priority(struct intel_guc_client *client)
> {
> return (client->priority == GUC_CLIENT_PRIORITY_KMD_HIGH ||
> @@ -682,15 +687,12 @@ static void guc_dequeue(struct intel_engine_cs *engine)
> rb = execlists->first;
> GEM_BUG_ON(rb_first(&execlists->queue) != rb);
>
> - if (!rb)
> - goto unlock;
> -
> if (port_isset(port)) {
> if (engine->i915->preempt_context) {
> struct guc_preempt_work *preempt_work =
> &engine->i915->guc.preempt_work[engine->id];
>
> - if (rb_entry(rb, struct i915_priolist, node)->priority >
> + if (execlists->queue_priority >
> max(port_request(port)->priotree.priority, 0)) {
> execlists_set_active(execlists,
> EXECLISTS_ACTIVE_PREEMPT);
> @@ -706,8 +708,8 @@ static void guc_dequeue(struct intel_engine_cs *engine)
> }
> GEM_BUG_ON(port_isset(port));
>
> - do {
> - struct i915_priolist *p = rb_entry(rb, typeof(*p), node);
> + while (rb) {
> + struct i915_priolist *p = to_priolist(rb);
> struct i915_request *rq, *rn;
>
> list_for_each_entry_safe(rq, rn, &p->requests, priotree.link) {
> @@ -736,8 +738,9 @@ static void guc_dequeue(struct intel_engine_cs *engine)
> INIT_LIST_HEAD(&p->requests);
> if (p->priority != I915_PRIORITY_NORMAL)
> kmem_cache_free(engine->i915->priorities, p);
> - } while (rb);
> + }
> done:
> + execlists->queue_priority = rb ? to_priolist(rb)->priority : INT_MIN;
> execlists->first = rb;
> if (submit) {
> port_assign(port, last);
> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> index e781c912f197..8a98da7a5c83 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/intel_lrc.c
> @@ -169,6 +169,23 @@ static void execlists_init_reg_state(u32 *reg_state,
> struct intel_engine_cs *engine,
> struct intel_ring *ring);
>
> +static inline struct i915_priolist *to_priolist(struct rb_node *rb)
> +{
> + return rb_entry(rb, struct i915_priolist, node);
> +}
> +
> +static inline int rq_prio(const struct i915_request *rq)
> +{
> + return rq->priotree.priority;
> +}
> +
> +static inline bool need_preempt(const struct intel_engine_cs *engine,
> + const struct i915_request *last,
> + int prio)
> +{
> + return engine->i915->preempt_context && prio > max(rq_prio(last), 0);
> +}
> +
> /**
> * intel_lr_context_descriptor_update() - calculate & cache the descriptor
> * descriptor for a pinned context
> @@ -224,7 +241,7 @@ lookup_priolist(struct intel_engine_cs *engine,
> parent = &execlists->queue.rb_node;
> while (*parent) {
> rb = *parent;
> - p = rb_entry(rb, typeof(*p), node);
> + p = to_priolist(rb);
> if (prio > p->priority) {
> parent = &rb->rb_left;
> } else if (prio < p->priority) {
> @@ -264,7 +281,7 @@ lookup_priolist(struct intel_engine_cs *engine,
> if (first)
> execlists->first = &p->node;
>
> - return ptr_pack_bits(p, first, 1);
> + return p;
> }
>
> static void unwind_wa_tail(struct i915_request *rq)
> @@ -290,14 +307,10 @@ static void __unwind_incomplete_requests(struct intel_engine_cs *engine)
> __i915_request_unsubmit(rq);
> unwind_wa_tail(rq);
>
> - GEM_BUG_ON(rq->priotree.priority == I915_PRIORITY_INVALID);
> - if (rq->priotree.priority != last_prio) {
> - p = lookup_priolist(engine,
> - &rq->priotree,
> - rq->priotree.priority);
> - p = ptr_mask_bits(p, 1);
> -
> - last_prio = rq->priotree.priority;
> + GEM_BUG_ON(rq_prio(rq) == I915_PRIORITY_INVALID);
> + if (rq_prio(rq) != last_prio) {
> + last_prio = rq_prio(rq);
> + p = lookup_priolist(engine, &rq->priotree, last_prio);
> }
>
> list_add(&rq->priotree.link, &p->requests);
> @@ -397,10 +410,11 @@ static void execlists_submit_ports(struct intel_engine_cs *engine)
> desc = execlists_update_context(rq);
> GEM_DEBUG_EXEC(port[n].context_id = upper_32_bits(desc));
>
> - GEM_TRACE("%s in[%d]: ctx=%d.%d, seqno=%x\n",
> + GEM_TRACE("%s in[%d]: ctx=%d.%d, seqno=%x, prio=%d\n",
> engine->name, n,
> port[n].context_id, count,
> - rq->global_seqno);
> + rq->global_seqno,
> + rq_prio(rq));
> } else {
> GEM_BUG_ON(!n);
> desc = 0;
> @@ -453,12 +467,17 @@ static void inject_preempt_context(struct intel_engine_cs *engine)
> _MASKED_BIT_ENABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT |
> CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT));
>
> + /*
> + * Switch to our empty preempt context so
> + * the state of the GPU is known (idle).
> + */
> GEM_TRACE("%s\n", engine->name);
> for (n = execlists_num_ports(&engine->execlists); --n; )
> elsp_write(0, engine->execlists.elsp);
>
> elsp_write(ce->lrc_desc, engine->execlists.elsp);
> execlists_clear_active(&engine->execlists, EXECLISTS_ACTIVE_HWACK);
> + execlists_set_active(&engine->execlists, EXECLISTS_ACTIVE_PREEMPT);
> }
>
> static void execlists_dequeue(struct intel_engine_cs *engine)
> @@ -495,8 +514,6 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
> spin_lock_irq(&engine->timeline->lock);
> rb = execlists->first;
> GEM_BUG_ON(rb_first(&execlists->queue) != rb);
> - if (!rb)
> - goto unlock;
>
> if (last) {
> /*
> @@ -519,54 +536,48 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
> if (!execlists_is_active(execlists, EXECLISTS_ACTIVE_HWACK))
> goto unlock;
>
> - if (engine->i915->preempt_context &&
> - rb_entry(rb, struct i915_priolist, node)->priority >
> - max(last->priotree.priority, 0)) {
> - /*
> - * Switch to our empty preempt context so
> - * the state of the GPU is known (idle).
> - */
> + if (need_preempt(engine, last, execlists->queue_priority)) {
> inject_preempt_context(engine);
> - execlists_set_active(execlists,
> - EXECLISTS_ACTIVE_PREEMPT);
> goto unlock;
> - } else {
> - /*
> - * In theory, we could coalesce more requests onto
> - * the second port (the first port is active, with
> - * no preemptions pending). However, that means we
> - * then have to deal with the possible lite-restore
> - * of the second port (as we submit the ELSP, there
> - * may be a context-switch) but also we may complete
> - * the resubmission before the context-switch. Ergo,
> - * coalescing onto the second port will cause a
> - * preemption event, but we cannot predict whether
> - * that will affect port[0] or port[1].
> - *
> - * If the second port is already active, we can wait
> - * until the next context-switch before contemplating
> - * new requests. The GPU will be busy and we should be
> - * able to resubmit the new ELSP before it idles,
> - * avoiding pipeline bubbles (momentary pauses where
> - * the driver is unable to keep up the supply of new
> - * work).
> - */
> - if (port_count(&port[1]))
> - goto unlock;
> -
> - /* WaIdleLiteRestore:bdw,skl
> - * Apply the wa NOOPs to prevent
> - * ring:HEAD == rq:TAIL as we resubmit the
> - * request. See gen8_emit_breadcrumb() for
> - * where we prepare the padding after the
> - * end of the request.
> - */
> - last->tail = last->wa_tail;
> }
> +
> + /*
> + * In theory, we could coalesce more requests onto
> + * the second port (the first port is active, with
> + * no preemptions pending). However, that means we
> + * then have to deal with the possible lite-restore
> + * of the second port (as we submit the ELSP, there
> + * may be a context-switch) but also we may complete
> + * the resubmission before the context-switch. Ergo,
> + * coalescing onto the second port will cause a
> + * preemption event, but we cannot predict whether
> + * that will affect port[0] or port[1].
> + *
> + * If the second port is already active, we can wait
> + * until the next context-switch before contemplating
> + * new requests. The GPU will be busy and we should be
> + * able to resubmit the new ELSP before it idles,
> + * avoiding pipeline bubbles (momentary pauses where
> + * the driver is unable to keep up the supply of new
> + * work). However, we have to double check that the
> + * priorities of the ports haven't been switch.
> + */
> + if (port_count(&port[1]))
> + goto unlock;
> +
> + /*
> + * WaIdleLiteRestore:bdw,skl
> + * Apply the wa NOOPs to prevent
> + * ring:HEAD == rq:TAIL as we resubmit the
> + * request. See gen8_emit_breadcrumb() for
> + * where we prepare the padding after the
> + * end of the request.
> + */
> + last->tail = last->wa_tail;
> }
>
> - do {
> - struct i915_priolist *p = rb_entry(rb, typeof(*p), node);
> + while (rb) {
> + struct i915_priolist *p = to_priolist(rb);
> struct i915_request *rq, *rn;
>
> list_for_each_entry_safe(rq, rn, &p->requests, priotree.link) {
> @@ -628,8 +639,9 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
> INIT_LIST_HEAD(&p->requests);
> if (p->priority != I915_PRIORITY_NORMAL)
> kmem_cache_free(engine->i915->priorities, p);
> - } while (rb);
> + }
> done:
> + execlists->queue_priority = rb ? to_priolist(rb)->priority : INT_MIN;
> execlists->first = rb;
> if (submit)
> port_assign(port, last);
> @@ -690,7 +702,7 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine)
> /* Flush the queued requests to the timeline list (for retiring). */
> rb = execlists->first;
> while (rb) {
> - struct i915_priolist *p = rb_entry(rb, typeof(*p), node);
> + struct i915_priolist *p = to_priolist(rb);
>
> list_for_each_entry_safe(rq, rn, &p->requests, priotree.link) {
> INIT_LIST_HEAD(&rq->priotree.link);
> @@ -708,7 +720,7 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine)
>
> /* Remaining _unready_ requests will be nop'ed when submitted */
>
> -
> + execlists->queue_priority = INT_MIN;
> execlists->queue = RB_ROOT;
> execlists->first = NULL;
> GEM_BUG_ON(port_isset(execlists->port));
> @@ -867,10 +879,11 @@ static void execlists_submission_tasklet(unsigned long data)
> GEM_DEBUG_BUG_ON(buf[2 * head + 1] != port->context_id);
>
> rq = port_unpack(port, &count);
> - GEM_TRACE("%s out[0]: ctx=%d.%d, seqno=%x\n",
> + GEM_TRACE("%s out[0]: ctx=%d.%d, seqno=%x, prio=%d\n",
> engine->name,
> port->context_id, count,
> - rq ? rq->global_seqno : 0);
> + rq ? rq->global_seqno : 0,
> + rq ? rq_prio(rq) : 0);
> GEM_BUG_ON(count == 0);
> if (--count == 0) {
> GEM_BUG_ON(status & GEN8_CTX_STATUS_PREEMPTED);
> @@ -908,15 +921,19 @@ static void execlists_submission_tasklet(unsigned long data)
> intel_uncore_forcewake_put(dev_priv, execlists->fw_domains);
> }
>
> -static void insert_request(struct intel_engine_cs *engine,
> - struct i915_priotree *pt,
> - int prio)
> +static void queue_request(struct intel_engine_cs *engine,
> + struct i915_priotree *pt,
> + int prio)
> {
> - struct i915_priolist *p = lookup_priolist(engine, pt, prio);
> + list_add_tail(&pt->link, &lookup_priolist(engine, pt, prio)->requests);
> +}
>
> - list_add_tail(&pt->link, &ptr_mask_bits(p, 1)->requests);
> - if (ptr_unmask_bits(p, 1))
> +static void submit_queue(struct intel_engine_cs *engine, int prio)
> +{
> + if (prio > engine->execlists.queue_priority) {
> + engine->execlists.queue_priority = prio;
> tasklet_hi_schedule(&engine->execlists.tasklet);
> + }
> }
>
> static void execlists_submit_request(struct i915_request *request)
> @@ -927,7 +944,8 @@ static void execlists_submit_request(struct i915_request *request)
> /* Will be called from irq-context when using foreign fences. */
> spin_lock_irqsave(&engine->timeline->lock, flags);
>
> - insert_request(engine, &request->priotree, request->priotree.priority);
> + queue_request(engine, &request->priotree, rq_prio(request));
> + submit_queue(engine, rq_prio(request));
>
> GEM_BUG_ON(!engine->execlists.first);
> GEM_BUG_ON(list_empty(&request->priotree.link));
> @@ -983,7 +1001,7 @@ static void execlists_schedule(struct i915_request *request, int prio)
> * static void update_priorities(struct i915_priotree *pt, prio) {
> * list_for_each_entry(dep, &pt->signalers_list, signal_link)
> * update_priorities(dep->signal, prio)
> - * insert_request(pt);
> + * queue_request(pt);
> * }
> * but that may have unlimited recursion depth and so runs a very
> * real risk of overunning the kernel stack. Instead, we build
> @@ -1046,8 +1064,9 @@ static void execlists_schedule(struct i915_request *request, int prio)
> pt->priority = prio;
> if (!list_empty(&pt->link)) {
> __list_del_entry(&pt->link);
> - insert_request(engine, pt, prio);
> + queue_request(engine, pt, prio);
> }
> + submit_queue(engine, prio);
> }
>
> spin_unlock_irq(&engine->timeline->lock);
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
> index a9b83bf7e837..c4e9022b34e3 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.h
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
> @@ -257,6 +257,11 @@ struct intel_engine_execlists {
> */
> unsigned int port_mask;
>
> + /**
> + * @queue_priority: Highest pending priority.
> + */
> + int queue_priority;
> +
> /**
> * @queue: queue of requests, in priority lists
> */
> --
> 2.16.1
>
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH] drm/i915/preemption: Allow preemption between submission ports
2018-02-22 14:22 ` Chris Wilson
2018-02-22 14:36 ` Chris Wilson
@ 2018-02-23 14:06 ` Mika Kuoppala
2018-02-23 14:23 ` Chris Wilson
2018-02-23 15:18 ` Mika Kuoppala
2 siblings, 1 reply; 10+ messages in thread
From: Mika Kuoppala @ 2018-02-23 14:06 UTC (permalink / raw)
To: Chris Wilson, intel-gfx
Chris Wilson <chris@chris-wilson.co.uk> writes:
> Sometimes we need to boost the priority of an in-flight request, which
> may lead to the situation where the second submission port then contains
> a higher priority context than the first and so we need to inject a
> preemption event. To do so we must always check inside
> execlists_dequeue() whether there is a priority inversion between the
> ports themselves as well as the head of the priority sorted queue, and we
> cannot just skip dequeuing if the queue is empty.
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Michał Winiarski <michal.winiarski@intel.com>
> Cc: Michel Thierry <michel.thierry@intel.com>
> Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> ---
> Rebase for conflicts
> -Chris
> ---
> drivers/gpu/drm/i915/intel_engine_cs.c | 2 +
> drivers/gpu/drm/i915/intel_guc_submission.c | 17 +--
> drivers/gpu/drm/i915/intel_lrc.c | 161 ++++++++++++++++------------
> drivers/gpu/drm/i915/intel_ringbuffer.h | 5 +
> 4 files changed, 107 insertions(+), 78 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c
> index c31544406974..ce7fcf55ba18 100644
> --- a/drivers/gpu/drm/i915/intel_engine_cs.c
> +++ b/drivers/gpu/drm/i915/intel_engine_cs.c
> @@ -423,6 +423,7 @@ static void intel_engine_init_execlist(struct intel_engine_cs *engine)
> BUILD_BUG_ON_NOT_POWER_OF_2(execlists_num_ports(execlists));
> GEM_BUG_ON(execlists_num_ports(execlists) > EXECLIST_MAX_PORTS);
>
> + execlists->queue_priority = INT_MIN;
> execlists->queue = RB_ROOT;
> execlists->first = NULL;
> }
> @@ -1903,6 +1904,7 @@ void intel_engine_dump(struct intel_engine_cs *engine,
> spin_lock_irq(&engine->timeline->lock);
> list_for_each_entry(rq, &engine->timeline->requests, link)
> print_request(m, rq, "\t\tE ");
> + drm_printf(m, "\t\tQueue priority: %d\n", execlists->queue_priority);
> for (rb = execlists->first; rb; rb = rb_next(rb)) {
> struct i915_priolist *p =
> rb_entry(rb, typeof(*p), node);
> diff --git a/drivers/gpu/drm/i915/intel_guc_submission.c b/drivers/gpu/drm/i915/intel_guc_submission.c
> index 649113c7a3c2..586dde579903 100644
> --- a/drivers/gpu/drm/i915/intel_guc_submission.c
> +++ b/drivers/gpu/drm/i915/intel_guc_submission.c
> @@ -75,6 +75,11 @@
> *
> */
>
> +static inline struct i915_priolist *to_priolist(struct rb_node *rb)
> +{
> + return rb_entry(rb, struct i915_priolist, node);
> +}
> +
> static inline bool is_high_priority(struct intel_guc_client *client)
> {
> return (client->priority == GUC_CLIENT_PRIORITY_KMD_HIGH ||
> @@ -682,15 +687,12 @@ static void guc_dequeue(struct intel_engine_cs *engine)
> rb = execlists->first;
> GEM_BUG_ON(rb_first(&execlists->queue) != rb);
>
> - if (!rb)
> - goto unlock;
> -
> if (port_isset(port)) {
> if (engine->i915->preempt_context) {
> struct guc_preempt_work *preempt_work =
> &engine->i915->guc.preempt_work[engine->id];
>
> - if (rb_entry(rb, struct i915_priolist, node)->priority >
> + if (execlists->queue_priority >
> max(port_request(port)->priotree.priority, 0)) {
> execlists_set_active(execlists,
> EXECLISTS_ACTIVE_PREEMPT);
This is the priority inversion part? We preempt and clear the ports
to rearrange if the last port has a higher priority request?
> @@ -706,8 +708,8 @@ static void guc_dequeue(struct intel_engine_cs *engine)
> }
> GEM_BUG_ON(port_isset(port));
>
> - do {
> - struct i915_priolist *p = rb_entry(rb, typeof(*p), node);
> + while (rb) {
> + struct i915_priolist *p = to_priolist(rb);
> struct i915_request *rq, *rn;
>
> list_for_each_entry_safe(rq, rn, &p->requests, priotree.link) {
> @@ -736,8 +738,9 @@ static void guc_dequeue(struct intel_engine_cs *engine)
> INIT_LIST_HEAD(&p->requests);
> if (p->priority != I915_PRIORITY_NORMAL)
> kmem_cache_free(engine->i915->priorities, p);
> - } while (rb);
> + }
> done:
> + execlists->queue_priority = rb ? to_priolist(rb)->priority : INT_MIN;
> execlists->first = rb;
> if (submit) {
> port_assign(port, last);
> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> index 964885b5d7cb..4bc72fbaf793 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/intel_lrc.c
> @@ -169,6 +169,23 @@ static void execlists_init_reg_state(u32 *reg_state,
> struct intel_engine_cs *engine,
> struct intel_ring *ring);
>
> +static inline struct i915_priolist *to_priolist(struct rb_node *rb)
> +{
> + return rb_entry(rb, struct i915_priolist, node);
> +}
> +
> +static inline int rq_prio(const struct i915_request *rq)
> +{
> + return rq->priotree.priority;
> +}
> +
> +static inline bool need_preempt(const struct intel_engine_cs *engine,
> + const struct i915_request *last,
> + int prio)
> +{
> + return engine->i915->preempt_context && prio > max(rq_prio(last), 0);
> +}
> +
> /**
> * intel_lr_context_descriptor_update() - calculate & cache the descriptor
> * descriptor for a pinned context
> @@ -224,7 +241,7 @@ lookup_priolist(struct intel_engine_cs *engine,
> parent = &execlists->queue.rb_node;
> while (*parent) {
> rb = *parent;
> - p = rb_entry(rb, typeof(*p), node);
> + p = to_priolist(rb);
> if (prio > p->priority) {
> parent = &rb->rb_left;
> } else if (prio < p->priority) {
> @@ -264,7 +281,7 @@ lookup_priolist(struct intel_engine_cs *engine,
> if (first)
> execlists->first = &p->node;
>
> - return ptr_pack_bits(p, first, 1);
> + return p;
Hmm there is no need for decode first as we always resubmit
the queue depending on the prio?
> }
>
> static void unwind_wa_tail(struct i915_request *rq)
> @@ -290,14 +307,10 @@ static void __unwind_incomplete_requests(struct intel_engine_cs *engine)
> __i915_request_unsubmit(rq);
> unwind_wa_tail(rq);
>
> - GEM_BUG_ON(rq->priotree.priority == I915_PRIORITY_INVALID);
> - if (rq->priotree.priority != last_prio) {
> - p = lookup_priolist(engine,
> - &rq->priotree,
> - rq->priotree.priority);
> - p = ptr_mask_bits(p, 1);
> -
> - last_prio = rq->priotree.priority;
> + GEM_BUG_ON(rq_prio(rq) == I915_PRIORITY_INVALID);
> + if (rq_prio(rq) != last_prio) {
> + last_prio = rq_prio(rq);
> + p = lookup_priolist(engine, &rq->priotree, last_prio);
> }
>
> list_add(&rq->priotree.link, &p->requests);
> @@ -397,10 +410,11 @@ static void execlists_submit_ports(struct intel_engine_cs *engine)
> desc = execlists_update_context(rq);
> GEM_DEBUG_EXEC(port[n].context_id = upper_32_bits(desc));
>
> - GEM_TRACE("%s in[%d]: ctx=%d.%d, seqno=%x\n",
> + GEM_TRACE("%s in[%d]: ctx=%d.%d, seqno=%x, prio=%d\n",
> engine->name, n,
> port[n].context_id, count,
> - rq->global_seqno);
> + rq->global_seqno,
> + rq_prio(rq));
> } else {
> GEM_BUG_ON(!n);
> desc = 0;
> @@ -453,12 +467,17 @@ static void inject_preempt_context(struct intel_engine_cs *engine)
> _MASKED_BIT_ENABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT |
> CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT));
>
> + /*
> + * Switch to our empty preempt context so
> + * the state of the GPU is known (idle).
> + */
> GEM_TRACE("%s\n", engine->name);
> for (n = execlists_num_ports(&engine->execlists); --n; )
> elsp_write(0, engine->execlists.elsp);
>
> elsp_write(ce->lrc_desc, engine->execlists.elsp);
> execlists_clear_active(&engine->execlists, EXECLISTS_ACTIVE_HWACK);
> + execlists_set_active(&engine->execlists, EXECLISTS_ACTIVE_PREEMPT);
Surely a better place. Now looking at this would it be more prudent to
move both the clear and set right before the last elsp write?
Well I guess it really doesn't matter as we hold the timeline lock.
-Mika
> }
>
> static void execlists_dequeue(struct intel_engine_cs *engine)
> @@ -495,8 +514,6 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
> spin_lock_irq(&engine->timeline->lock);
> rb = execlists->first;
> GEM_BUG_ON(rb_first(&execlists->queue) != rb);
> - if (!rb)
> - goto unlock;
>
> if (last) {
> /*
> @@ -519,54 +536,48 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
> if (!execlists_is_active(execlists, EXECLISTS_ACTIVE_HWACK))
> goto unlock;
>
> - if (engine->i915->preempt_context &&
> - rb_entry(rb, struct i915_priolist, node)->priority >
> - max(last->priotree.priority, 0)) {
> - /*
> - * Switch to our empty preempt context so
> - * the state of the GPU is known (idle).
> - */
> + if (need_preempt(engine, last, execlists->queue_priority)) {
> inject_preempt_context(engine);
> - execlists_set_active(execlists,
> - EXECLISTS_ACTIVE_PREEMPT);
> goto unlock;
> - } else {
> - /*
> - * In theory, we could coalesce more requests onto
> - * the second port (the first port is active, with
> - * no preemptions pending). However, that means we
> - * then have to deal with the possible lite-restore
> - * of the second port (as we submit the ELSP, there
> - * may be a context-switch) but also we may complete
> - * the resubmission before the context-switch. Ergo,
> - * coalescing onto the second port will cause a
> - * preemption event, but we cannot predict whether
> - * that will affect port[0] or port[1].
> - *
> - * If the second port is already active, we can wait
> - * until the next context-switch before contemplating
> - * new requests. The GPU will be busy and we should be
> - * able to resubmit the new ELSP before it idles,
> - * avoiding pipeline bubbles (momentary pauses where
> - * the driver is unable to keep up the supply of new
> - * work).
> - */
> - if (port_count(&port[1]))
> - goto unlock;
> -
> - /* WaIdleLiteRestore:bdw,skl
> - * Apply the wa NOOPs to prevent
> - * ring:HEAD == rq:TAIL as we resubmit the
> - * request. See gen8_emit_breadcrumb() for
> - * where we prepare the padding after the
> - * end of the request.
> - */
> - last->tail = last->wa_tail;
> }
> +
> + /*
> + * In theory, we could coalesce more requests onto
> + * the second port (the first port is active, with
> + * no preemptions pending). However, that means we
> + * then have to deal with the possible lite-restore
> + * of the second port (as we submit the ELSP, there
> + * may be a context-switch) but also we may complete
> + * the resubmission before the context-switch. Ergo,
> + * coalescing onto the second port will cause a
> + * preemption event, but we cannot predict whether
> + * that will affect port[0] or port[1].
> + *
> + * If the second port is already active, we can wait
> + * until the next context-switch before contemplating
> + * new requests. The GPU will be busy and we should be
> + * able to resubmit the new ELSP before it idles,
> + * avoiding pipeline bubbles (momentary pauses where
> + * the driver is unable to keep up the supply of new
> + * work). However, we have to double check that the
> + * priorities of the ports haven't been switch.
> + */
> + if (port_count(&port[1]))
> + goto unlock;
> +
> + /*
> + * WaIdleLiteRestore:bdw,skl
> + * Apply the wa NOOPs to prevent
> + * ring:HEAD == rq:TAIL as we resubmit the
> + * request. See gen8_emit_breadcrumb() for
> + * where we prepare the padding after the
> + * end of the request.
> + */
> + last->tail = last->wa_tail;
> }
>
> - do {
> - struct i915_priolist *p = rb_entry(rb, typeof(*p), node);
> + while (rb) {
> + struct i915_priolist *p = to_priolist(rb);
> struct i915_request *rq, *rn;
>
> list_for_each_entry_safe(rq, rn, &p->requests, priotree.link) {
> @@ -628,8 +639,9 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
> INIT_LIST_HEAD(&p->requests);
> if (p->priority != I915_PRIORITY_NORMAL)
> kmem_cache_free(engine->i915->priorities, p);
> - } while (rb);
> + }
> done:
> + execlists->queue_priority = rb ? to_priolist(rb)->priority : INT_MIN;
> execlists->first = rb;
> if (submit)
> port_assign(port, last);
> @@ -690,7 +702,7 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine)
> /* Flush the queued requests to the timeline list (for retiring). */
> rb = execlists->first;
> while (rb) {
> - struct i915_priolist *p = rb_entry(rb, typeof(*p), node);
> + struct i915_priolist *p = to_priolist(rb);
>
> list_for_each_entry_safe(rq, rn, &p->requests, priotree.link) {
> INIT_LIST_HEAD(&rq->priotree.link);
> @@ -708,7 +720,7 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine)
>
> /* Remaining _unready_ requests will be nop'ed when submitted */
>
> -
> + execlists->queue_priority = INT_MIN;
> execlists->queue = RB_ROOT;
> execlists->first = NULL;
> GEM_BUG_ON(port_isset(execlists->port));
> @@ -864,10 +876,11 @@ static void execlists_submission_tasklet(unsigned long data)
> EXECLISTS_ACTIVE_USER));
>
> rq = port_unpack(port, &count);
> - GEM_TRACE("%s out[0]: ctx=%d.%d, seqno=%x\n",
> + GEM_TRACE("%s out[0]: ctx=%d.%d, seqno=%x, prio=%d\n",
> engine->name,
> port->context_id, count,
> - rq ? rq->global_seqno : 0);
> + rq ? rq->global_seqno : 0,
> + rq ? rq_prio(rq) : 0);
>
> /* Check the context/desc id for this event matches */
> GEM_DEBUG_BUG_ON(buf[2 * head + 1] != port->context_id);
> @@ -912,15 +925,19 @@ static void execlists_submission_tasklet(unsigned long data)
> intel_uncore_forcewake_put(dev_priv, execlists->fw_domains);
> }
>
> -static void insert_request(struct intel_engine_cs *engine,
> - struct i915_priotree *pt,
> - int prio)
> +static void queue_request(struct intel_engine_cs *engine,
> + struct i915_priotree *pt,
> + int prio)
> {
> - struct i915_priolist *p = lookup_priolist(engine, pt, prio);
> + list_add_tail(&pt->link, &lookup_priolist(engine, pt, prio)->requests);
> +}
>
> - list_add_tail(&pt->link, &ptr_mask_bits(p, 1)->requests);
> - if (ptr_unmask_bits(p, 1))
> +static void submit_queue(struct intel_engine_cs *engine, int prio)
> +{
> + if (prio > engine->execlists.queue_priority) {
> + engine->execlists.queue_priority = prio;
> tasklet_hi_schedule(&engine->execlists.tasklet);
> + }
> }
>
> static void execlists_submit_request(struct i915_request *request)
> @@ -931,7 +948,8 @@ static void execlists_submit_request(struct i915_request *request)
> /* Will be called from irq-context when using foreign fences. */
> spin_lock_irqsave(&engine->timeline->lock, flags);
>
> - insert_request(engine, &request->priotree, request->priotree.priority);
> + queue_request(engine, &request->priotree, rq_prio(request));
> + submit_queue(engine, rq_prio(request));
>
> GEM_BUG_ON(!engine->execlists.first);
> GEM_BUG_ON(list_empty(&request->priotree.link));
> @@ -987,7 +1005,7 @@ static void execlists_schedule(struct i915_request *request, int prio)
> * static void update_priorities(struct i915_priotree *pt, prio) {
> * list_for_each_entry(dep, &pt->signalers_list, signal_link)
> * update_priorities(dep->signal, prio)
> - * insert_request(pt);
> + * queue_request(pt);
> * }
> * but that may have unlimited recursion depth and so runs a very
> * real risk of overunning the kernel stack. Instead, we build
> @@ -1050,8 +1068,9 @@ static void execlists_schedule(struct i915_request *request, int prio)
> pt->priority = prio;
> if (!list_empty(&pt->link)) {
> __list_del_entry(&pt->link);
> - insert_request(engine, pt, prio);
> + queue_request(engine, pt, prio);
> }
> + submit_queue(engine, prio);
> }
>
> spin_unlock_irq(&engine->timeline->lock);
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
> index a9b83bf7e837..c4e9022b34e3 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.h
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
> @@ -257,6 +257,11 @@ struct intel_engine_execlists {
> */
> unsigned int port_mask;
>
> + /**
> + * @queue_priority: Highest pending priority.
> + */
> + int queue_priority;
> +
> /**
> * @queue: queue of requests, in priority lists
> */
> --
> 2.16.1
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH] drm/i915/preemption: Allow preemption between submission ports
2018-02-23 14:06 ` Mika Kuoppala
@ 2018-02-23 14:23 ` Chris Wilson
0 siblings, 0 replies; 10+ messages in thread
From: Chris Wilson @ 2018-02-23 14:23 UTC (permalink / raw)
To: Mika Kuoppala, intel-gfx
Quoting Mika Kuoppala (2018-02-23 14:06:06)
> Chris Wilson <chris@chris-wilson.co.uk> writes:
> > static inline bool is_high_priority(struct intel_guc_client *client)
> > {
> > return (client->priority == GUC_CLIENT_PRIORITY_KMD_HIGH ||
> > @@ -682,15 +687,12 @@ static void guc_dequeue(struct intel_engine_cs *engine)
> > rb = execlists->first;
> > GEM_BUG_ON(rb_first(&execlists->queue) != rb);
> >
> > - if (!rb)
> > - goto unlock;
> > -
> > if (port_isset(port)) {
> > if (engine->i915->preempt_context) {
> > struct guc_preempt_work *preempt_work =
> > &engine->i915->guc.preempt_work[engine->id];
> >
> > - if (rb_entry(rb, struct i915_priolist, node)->priority >
> > + if (execlists->queue_priority >
> > max(port_request(port)->priotree.priority, 0)) {
> > execlists_set_active(execlists,
> > EXECLISTS_ACTIVE_PREEMPT);
>
> This is the priority inversion part? We preempt and clear the ports
> to rearrange if the last port has a higher priority request?
Yes, along with a strong kick from
> > @@ -1050,8 +1068,9 @@ static void execlists_schedule(struct i915_request *request, int prio)
> > pt->priority = prio;
> > if (!list_empty(&pt->link)) {
> > __list_del_entry(&pt->link);
> > - insert_request(engine, pt, prio);
> > + queue_request(engine, pt, prio);
> > }
> > + submit_queue(engine, prio);
So that we re-evaluate ELSP if the active prio change.
> > @@ -264,7 +281,7 @@ lookup_priolist(struct intel_engine_cs *engine,
> > if (first)
> > execlists->first = &p->node;
> >
> > - return ptr_pack_bits(p, first, 1);
> > + return p;
>
> Hmm there is no need for decode first as we always resubmit
> the queue depending on the prio?
Right, the first bit is now checked against queue_priority instead. If
we are higher priority than the queue, we must rerun the tasklet.
Whereas before we knew we only had to do it if we inserted into the
start of the queue.
> > @@ -453,12 +467,17 @@ static void inject_preempt_context(struct intel_engine_cs *engine)
> > _MASKED_BIT_ENABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT |
> > CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT));
> >
> > + /*
> > + * Switch to our empty preempt context so
> > + * the state of the GPU is known (idle).
> > + */
> > GEM_TRACE("%s\n", engine->name);
> > for (n = execlists_num_ports(&engine->execlists); --n; )
> > elsp_write(0, engine->execlists.elsp);
> >
> > elsp_write(ce->lrc_desc, engine->execlists.elsp);
> > execlists_clear_active(&engine->execlists, EXECLISTS_ACTIVE_HWACK);
> > + execlists_set_active(&engine->execlists, EXECLISTS_ACTIVE_PREEMPT);
>
> Surely a better place. Now looking at this would it be more prudent to
> move both the clear and set right before the last elsp write?
>
> Well I guess it really doesn't matter as we hold the timeline lock.
And we only process ELSP from a single cpu, so it's all sequential,
right.
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH] drm/i915/preemption: Allow preemption between submission ports
2018-02-22 14:22 ` Chris Wilson
2018-02-22 14:36 ` Chris Wilson
2018-02-23 14:06 ` Mika Kuoppala
@ 2018-02-23 15:18 ` Mika Kuoppala
2018-02-23 16:41 ` Chris Wilson
2 siblings, 1 reply; 10+ messages in thread
From: Mika Kuoppala @ 2018-02-23 15:18 UTC (permalink / raw)
To: Chris Wilson, intel-gfx
Chris Wilson <chris@chris-wilson.co.uk> writes:
> Sometimes we need to boost the priority of an in-flight request, which
> may lead to the situation where the second submission port then contains
> a higher priority context than the first and so we need to inject a
> preemption event. To do so we must always check inside
> execlists_dequeue() whether there is a priority inversion between the
> ports themselves as well as the head of the priority sorted queue, and we
> cannot just skip dequeuing if the queue is empty.
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Michał Winiarski <michal.winiarski@intel.com>
> Cc: Michel Thierry <michel.thierry@intel.com>
> Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Reviewed-by: Mika Kuoppala <mika.kuoppala@linux.intel.com>
> ---
> Rebase for conflicts
> -Chris
> ---
> drivers/gpu/drm/i915/intel_engine_cs.c | 2 +
> drivers/gpu/drm/i915/intel_guc_submission.c | 17 +--
> drivers/gpu/drm/i915/intel_lrc.c | 161 ++++++++++++++++------------
> drivers/gpu/drm/i915/intel_ringbuffer.h | 5 +
> 4 files changed, 107 insertions(+), 78 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c
> index c31544406974..ce7fcf55ba18 100644
> --- a/drivers/gpu/drm/i915/intel_engine_cs.c
> +++ b/drivers/gpu/drm/i915/intel_engine_cs.c
> @@ -423,6 +423,7 @@ static void intel_engine_init_execlist(struct intel_engine_cs *engine)
> BUILD_BUG_ON_NOT_POWER_OF_2(execlists_num_ports(execlists));
> GEM_BUG_ON(execlists_num_ports(execlists) > EXECLIST_MAX_PORTS);
>
> + execlists->queue_priority = INT_MIN;
> execlists->queue = RB_ROOT;
> execlists->first = NULL;
> }
> @@ -1903,6 +1904,7 @@ void intel_engine_dump(struct intel_engine_cs *engine,
> spin_lock_irq(&engine->timeline->lock);
> list_for_each_entry(rq, &engine->timeline->requests, link)
> print_request(m, rq, "\t\tE ");
> + drm_printf(m, "\t\tQueue priority: %d\n", execlists->queue_priority);
> for (rb = execlists->first; rb; rb = rb_next(rb)) {
> struct i915_priolist *p =
> rb_entry(rb, typeof(*p), node);
> diff --git a/drivers/gpu/drm/i915/intel_guc_submission.c b/drivers/gpu/drm/i915/intel_guc_submission.c
> index 649113c7a3c2..586dde579903 100644
> --- a/drivers/gpu/drm/i915/intel_guc_submission.c
> +++ b/drivers/gpu/drm/i915/intel_guc_submission.c
> @@ -75,6 +75,11 @@
> *
> */
>
> +static inline struct i915_priolist *to_priolist(struct rb_node *rb)
> +{
> + return rb_entry(rb, struct i915_priolist, node);
> +}
> +
> static inline bool is_high_priority(struct intel_guc_client *client)
> {
> return (client->priority == GUC_CLIENT_PRIORITY_KMD_HIGH ||
> @@ -682,15 +687,12 @@ static void guc_dequeue(struct intel_engine_cs *engine)
> rb = execlists->first;
> GEM_BUG_ON(rb_first(&execlists->queue) != rb);
>
> - if (!rb)
> - goto unlock;
> -
> if (port_isset(port)) {
> if (engine->i915->preempt_context) {
> struct guc_preempt_work *preempt_work =
> &engine->i915->guc.preempt_work[engine->id];
>
> - if (rb_entry(rb, struct i915_priolist, node)->priority >
> + if (execlists->queue_priority >
> max(port_request(port)->priotree.priority, 0)) {
> execlists_set_active(execlists,
> EXECLISTS_ACTIVE_PREEMPT);
> @@ -706,8 +708,8 @@ static void guc_dequeue(struct intel_engine_cs *engine)
> }
> GEM_BUG_ON(port_isset(port));
>
> - do {
> - struct i915_priolist *p = rb_entry(rb, typeof(*p), node);
> + while (rb) {
> + struct i915_priolist *p = to_priolist(rb);
> struct i915_request *rq, *rn;
>
> list_for_each_entry_safe(rq, rn, &p->requests, priotree.link) {
> @@ -736,8 +738,9 @@ static void guc_dequeue(struct intel_engine_cs *engine)
> INIT_LIST_HEAD(&p->requests);
> if (p->priority != I915_PRIORITY_NORMAL)
> kmem_cache_free(engine->i915->priorities, p);
> - } while (rb);
> + }
> done:
> + execlists->queue_priority = rb ? to_priolist(rb)->priority : INT_MIN;
> execlists->first = rb;
> if (submit) {
> port_assign(port, last);
> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> index 964885b5d7cb..4bc72fbaf793 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/intel_lrc.c
> @@ -169,6 +169,23 @@ static void execlists_init_reg_state(u32 *reg_state,
> struct intel_engine_cs *engine,
> struct intel_ring *ring);
>
> +static inline struct i915_priolist *to_priolist(struct rb_node *rb)
> +{
> + return rb_entry(rb, struct i915_priolist, node);
> +}
> +
> +static inline int rq_prio(const struct i915_request *rq)
> +{
> + return rq->priotree.priority;
> +}
> +
> +static inline bool need_preempt(const struct intel_engine_cs *engine,
> + const struct i915_request *last,
> + int prio)
> +{
> + return engine->i915->preempt_context && prio > max(rq_prio(last), 0);
> +}
> +
> /**
> * intel_lr_context_descriptor_update() - calculate & cache the descriptor
> * descriptor for a pinned context
> @@ -224,7 +241,7 @@ lookup_priolist(struct intel_engine_cs *engine,
> parent = &execlists->queue.rb_node;
> while (*parent) {
> rb = *parent;
> - p = rb_entry(rb, typeof(*p), node);
> + p = to_priolist(rb);
> if (prio > p->priority) {
> parent = &rb->rb_left;
> } else if (prio < p->priority) {
> @@ -264,7 +281,7 @@ lookup_priolist(struct intel_engine_cs *engine,
> if (first)
> execlists->first = &p->node;
>
> - return ptr_pack_bits(p, first, 1);
> + return p;
> }
>
> static void unwind_wa_tail(struct i915_request *rq)
> @@ -290,14 +307,10 @@ static void __unwind_incomplete_requests(struct intel_engine_cs *engine)
> __i915_request_unsubmit(rq);
> unwind_wa_tail(rq);
>
> - GEM_BUG_ON(rq->priotree.priority == I915_PRIORITY_INVALID);
> - if (rq->priotree.priority != last_prio) {
> - p = lookup_priolist(engine,
> - &rq->priotree,
> - rq->priotree.priority);
> - p = ptr_mask_bits(p, 1);
> -
> - last_prio = rq->priotree.priority;
> + GEM_BUG_ON(rq_prio(rq) == I915_PRIORITY_INVALID);
> + if (rq_prio(rq) != last_prio) {
> + last_prio = rq_prio(rq);
> + p = lookup_priolist(engine, &rq->priotree, last_prio);
> }
>
> list_add(&rq->priotree.link, &p->requests);
> @@ -397,10 +410,11 @@ static void execlists_submit_ports(struct intel_engine_cs *engine)
> desc = execlists_update_context(rq);
> GEM_DEBUG_EXEC(port[n].context_id = upper_32_bits(desc));
>
> - GEM_TRACE("%s in[%d]: ctx=%d.%d, seqno=%x\n",
> + GEM_TRACE("%s in[%d]: ctx=%d.%d, seqno=%x, prio=%d\n",
> engine->name, n,
> port[n].context_id, count,
> - rq->global_seqno);
> + rq->global_seqno,
> + rq_prio(rq));
> } else {
> GEM_BUG_ON(!n);
> desc = 0;
> @@ -453,12 +467,17 @@ static void inject_preempt_context(struct intel_engine_cs *engine)
> _MASKED_BIT_ENABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT |
> CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT));
>
> + /*
> + * Switch to our empty preempt context so
> + * the state of the GPU is known (idle).
> + */
> GEM_TRACE("%s\n", engine->name);
> for (n = execlists_num_ports(&engine->execlists); --n; )
> elsp_write(0, engine->execlists.elsp);
>
> elsp_write(ce->lrc_desc, engine->execlists.elsp);
> execlists_clear_active(&engine->execlists, EXECLISTS_ACTIVE_HWACK);
> + execlists_set_active(&engine->execlists, EXECLISTS_ACTIVE_PREEMPT);
> }
>
> static void execlists_dequeue(struct intel_engine_cs *engine)
> @@ -495,8 +514,6 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
> spin_lock_irq(&engine->timeline->lock);
> rb = execlists->first;
> GEM_BUG_ON(rb_first(&execlists->queue) != rb);
> - if (!rb)
> - goto unlock;
>
> if (last) {
> /*
> @@ -519,54 +536,48 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
> if (!execlists_is_active(execlists, EXECLISTS_ACTIVE_HWACK))
> goto unlock;
>
> - if (engine->i915->preempt_context &&
> - rb_entry(rb, struct i915_priolist, node)->priority >
> - max(last->priotree.priority, 0)) {
> - /*
> - * Switch to our empty preempt context so
> - * the state of the GPU is known (idle).
> - */
> + if (need_preempt(engine, last, execlists->queue_priority)) {
> inject_preempt_context(engine);
> - execlists_set_active(execlists,
> - EXECLISTS_ACTIVE_PREEMPT);
> goto unlock;
> - } else {
> - /*
> - * In theory, we could coalesce more requests onto
> - * the second port (the first port is active, with
> - * no preemptions pending). However, that means we
> - * then have to deal with the possible lite-restore
> - * of the second port (as we submit the ELSP, there
> - * may be a context-switch) but also we may complete
> - * the resubmission before the context-switch. Ergo,
> - * coalescing onto the second port will cause a
> - * preemption event, but we cannot predict whether
> - * that will affect port[0] or port[1].
> - *
> - * If the second port is already active, we can wait
> - * until the next context-switch before contemplating
> - * new requests. The GPU will be busy and we should be
> - * able to resubmit the new ELSP before it idles,
> - * avoiding pipeline bubbles (momentary pauses where
> - * the driver is unable to keep up the supply of new
> - * work).
> - */
> - if (port_count(&port[1]))
> - goto unlock;
> -
> - /* WaIdleLiteRestore:bdw,skl
> - * Apply the wa NOOPs to prevent
> - * ring:HEAD == rq:TAIL as we resubmit the
> - * request. See gen8_emit_breadcrumb() for
> - * where we prepare the padding after the
> - * end of the request.
> - */
> - last->tail = last->wa_tail;
> }
> +
> + /*
> + * In theory, we could coalesce more requests onto
> + * the second port (the first port is active, with
> + * no preemptions pending). However, that means we
> + * then have to deal with the possible lite-restore
> + * of the second port (as we submit the ELSP, there
> + * may be a context-switch) but also we may complete
> + * the resubmission before the context-switch. Ergo,
> + * coalescing onto the second port will cause a
> + * preemption event, but we cannot predict whether
> + * that will affect port[0] or port[1].
> + *
> + * If the second port is already active, we can wait
> + * until the next context-switch before contemplating
> + * new requests. The GPU will be busy and we should be
> + * able to resubmit the new ELSP before it idles,
> + * avoiding pipeline bubbles (momentary pauses where
> + * the driver is unable to keep up the supply of new
> + * work). However, we have to double check that the
> + * priorities of the ports haven't been switch.
> + */
> + if (port_count(&port[1]))
> + goto unlock;
> +
> + /*
> + * WaIdleLiteRestore:bdw,skl
> + * Apply the wa NOOPs to prevent
> + * ring:HEAD == rq:TAIL as we resubmit the
> + * request. See gen8_emit_breadcrumb() for
> + * where we prepare the padding after the
> + * end of the request.
> + */
> + last->tail = last->wa_tail;
> }
>
> - do {
> - struct i915_priolist *p = rb_entry(rb, typeof(*p), node);
> + while (rb) {
> + struct i915_priolist *p = to_priolist(rb);
> struct i915_request *rq, *rn;
>
> list_for_each_entry_safe(rq, rn, &p->requests, priotree.link) {
> @@ -628,8 +639,9 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
> INIT_LIST_HEAD(&p->requests);
> if (p->priority != I915_PRIORITY_NORMAL)
> kmem_cache_free(engine->i915->priorities, p);
> - } while (rb);
> + }
> done:
> + execlists->queue_priority = rb ? to_priolist(rb)->priority : INT_MIN;
> execlists->first = rb;
> if (submit)
> port_assign(port, last);
> @@ -690,7 +702,7 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine)
> /* Flush the queued requests to the timeline list (for retiring). */
> rb = execlists->first;
> while (rb) {
> - struct i915_priolist *p = rb_entry(rb, typeof(*p), node);
> + struct i915_priolist *p = to_priolist(rb);
>
> list_for_each_entry_safe(rq, rn, &p->requests, priotree.link) {
> INIT_LIST_HEAD(&rq->priotree.link);
> @@ -708,7 +720,7 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine)
>
> /* Remaining _unready_ requests will be nop'ed when submitted */
>
> -
> + execlists->queue_priority = INT_MIN;
> execlists->queue = RB_ROOT;
> execlists->first = NULL;
> GEM_BUG_ON(port_isset(execlists->port));
> @@ -864,10 +876,11 @@ static void execlists_submission_tasklet(unsigned long data)
> EXECLISTS_ACTIVE_USER));
>
> rq = port_unpack(port, &count);
> - GEM_TRACE("%s out[0]: ctx=%d.%d, seqno=%x\n",
> + GEM_TRACE("%s out[0]: ctx=%d.%d, seqno=%x, prio=%d\n",
> engine->name,
> port->context_id, count,
> - rq ? rq->global_seqno : 0);
> + rq ? rq->global_seqno : 0,
> + rq ? rq_prio(rq) : 0);
>
> /* Check the context/desc id for this event matches */
> GEM_DEBUG_BUG_ON(buf[2 * head + 1] != port->context_id);
> @@ -912,15 +925,19 @@ static void execlists_submission_tasklet(unsigned long data)
> intel_uncore_forcewake_put(dev_priv, execlists->fw_domains);
> }
>
> -static void insert_request(struct intel_engine_cs *engine,
> - struct i915_priotree *pt,
> - int prio)
> +static void queue_request(struct intel_engine_cs *engine,
> + struct i915_priotree *pt,
> + int prio)
> {
> - struct i915_priolist *p = lookup_priolist(engine, pt, prio);
> + list_add_tail(&pt->link, &lookup_priolist(engine, pt, prio)->requests);
> +}
>
> - list_add_tail(&pt->link, &ptr_mask_bits(p, 1)->requests);
> - if (ptr_unmask_bits(p, 1))
> +static void submit_queue(struct intel_engine_cs *engine, int prio)
> +{
> + if (prio > engine->execlists.queue_priority) {
> + engine->execlists.queue_priority = prio;
> tasklet_hi_schedule(&engine->execlists.tasklet);
> + }
> }
>
> static void execlists_submit_request(struct i915_request *request)
> @@ -931,7 +948,8 @@ static void execlists_submit_request(struct i915_request *request)
> /* Will be called from irq-context when using foreign fences. */
> spin_lock_irqsave(&engine->timeline->lock, flags);
>
> - insert_request(engine, &request->priotree, request->priotree.priority);
> + queue_request(engine, &request->priotree, rq_prio(request));
> + submit_queue(engine, rq_prio(request));
>
> GEM_BUG_ON(!engine->execlists.first);
> GEM_BUG_ON(list_empty(&request->priotree.link));
> @@ -987,7 +1005,7 @@ static void execlists_schedule(struct i915_request *request, int prio)
> * static void update_priorities(struct i915_priotree *pt, prio) {
> * list_for_each_entry(dep, &pt->signalers_list, signal_link)
> * update_priorities(dep->signal, prio)
> - * insert_request(pt);
> + * queue_request(pt);
> * }
> * but that may have unlimited recursion depth and so runs a very
> * real risk of overunning the kernel stack. Instead, we build
> @@ -1050,8 +1068,9 @@ static void execlists_schedule(struct i915_request *request, int prio)
> pt->priority = prio;
> if (!list_empty(&pt->link)) {
> __list_del_entry(&pt->link);
> - insert_request(engine, pt, prio);
> + queue_request(engine, pt, prio);
> }
> + submit_queue(engine, prio);
> }
>
> spin_unlock_irq(&engine->timeline->lock);
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
> index a9b83bf7e837..c4e9022b34e3 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.h
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
> @@ -257,6 +257,11 @@ struct intel_engine_execlists {
> */
> unsigned int port_mask;
>
> + /**
> + * @queue_priority: Highest pending priority.
> + */
> + int queue_priority;
> +
> /**
> * @queue: queue of requests, in priority lists
> */
> --
> 2.16.1
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH] drm/i915/preemption: Allow preemption between submission ports
2018-02-23 15:18 ` Mika Kuoppala
@ 2018-02-23 16:41 ` Chris Wilson
0 siblings, 0 replies; 10+ messages in thread
From: Chris Wilson @ 2018-02-23 16:41 UTC (permalink / raw)
To: Mika Kuoppala, intel-gfx
Quoting Mika Kuoppala (2018-02-23 15:18:17)
> Chris Wilson <chris@chris-wilson.co.uk> writes:
>
> > Sometimes we need to boost the priority of an in-flight request, which
> > may lead to the situation where the second submission port then contains
> > a higher priority context than the first and so we need to inject a
> > preemption event. To do so we must always check inside
> > execlists_dequeue() whether there is a priority inversion between the
> > ports themselves as well as the head of the priority sorted queue, and we
> > cannot just skip dequeuing if the queue is empty.
> >
> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > Cc: Michał Winiarski <michal.winiarski@intel.com>
> > Cc: Michel Thierry <michel.thierry@intel.com>
> > Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
> > Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>
> Reviewed-by: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Added detail to @queue_priority and pushed. Thanks for the review!
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 10+ messages in thread
end of thread, other threads:[~2018-02-23 16:42 UTC | newest]
Thread overview: 10+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-02-22 14:11 [PATCH] drm/i915/preemption: Allow preemption between submission ports Chris Wilson
2018-02-22 14:22 ` Chris Wilson
2018-02-22 14:36 ` Chris Wilson
2018-02-23 14:06 ` Mika Kuoppala
2018-02-23 14:23 ` Chris Wilson
2018-02-23 15:18 ` Mika Kuoppala
2018-02-23 16:41 ` Chris Wilson
2018-02-22 14:59 ` ✓ Fi.CI.BAT: success for drm/i915/preemption: Allow preemption between submission ports (rev2) Patchwork
2018-02-22 19:48 ` ✓ Fi.CI.IGT: " Patchwork
2018-02-23 13:35 ` [PATCH] drm/i915/preemption: Allow preemption between submission ports Chris Wilson
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.