All of lore.kernel.org
 help / color / mirror / Atom feed
* [RFC 1/4] drm/i915/execlists: Move insert_request()
@ 2017-07-17  8:42 Chris Wilson
  2017-07-17  8:42 ` [RFC 2/4] drm/i915/execlists: Keep request->priority for its lifetime Chris Wilson
                   ` (4 more replies)
  0 siblings, 5 replies; 12+ messages in thread
From: Chris Wilson @ 2017-07-17  8:42 UTC (permalink / raw)
  To: intel-gfx

Move insert_request() earlier to avoid a forward declaration in a later
patch.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/intel_lrc.c | 128 +++++++++++++++++++--------------------
 1 file changed, 64 insertions(+), 64 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index ad61d1998fb7..1b2f0e3d383a 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -292,6 +292,70 @@ uint64_t intel_lr_context_descriptor(struct i915_gem_context *ctx,
 	return ctx->engine[engine->id].lrc_desc;
 }
 
+static bool
+insert_request(struct intel_engine_cs *engine,
+	       struct i915_priotree *pt,
+	       int prio)
+{
+	struct i915_priolist *p;
+	struct rb_node **parent, *rb;
+	bool first = true;
+
+	if (unlikely(engine->no_priolist))
+		prio = I915_PRIORITY_NORMAL;
+
+find_priolist:
+	/* most positive priority is scheduled first, equal priorities fifo */
+	rb = NULL;
+	parent = &engine->execlist_queue.rb_node;
+	while (*parent) {
+		rb = *parent;
+		p = rb_entry(rb, typeof(*p), node);
+		if (prio > p->priority) {
+			parent = &rb->rb_left;
+		} else if (prio < p->priority) {
+			parent = &rb->rb_right;
+			first = false;
+		} else {
+			list_add_tail(&pt->link, &p->requests);
+			return false;
+		}
+	}
+
+	if (prio == I915_PRIORITY_NORMAL) {
+		p = &engine->default_priolist;
+	} else {
+		p = kmem_cache_alloc(engine->i915->priorities, GFP_ATOMIC);
+		/* Convert an allocation failure to a priority bump */
+		if (unlikely(!p)) {
+			prio = I915_PRIORITY_NORMAL; /* recurses just once */
+
+			/* To maintain ordering with all rendering, after an
+			 * allocation failure we have to disable all scheduling.
+			 * Requests will then be executed in fifo, and schedule
+			 * will ensure that dependencies are emitted in fifo.
+			 * There will be still some reordering with existing
+			 * requests, so if userspace lied about their
+			 * dependencies that reordering may be visible.
+			 */
+			engine->no_priolist = true;
+			goto find_priolist;
+		}
+	}
+
+	p->priority = prio;
+	rb_link_node(&p->node, rb, parent);
+	rb_insert_color(&p->node, &engine->execlist_queue);
+
+	INIT_LIST_HEAD(&p->requests);
+	list_add_tail(&pt->link, &p->requests);
+
+	if (first)
+		engine->execlist_first = &p->node;
+
+	return first;
+}
+
 static inline void
 execlists_context_status_change(struct drm_i915_gem_request *rq,
 				unsigned long status)
@@ -649,70 +713,6 @@ static void intel_lrc_irq_handler(unsigned long data)
 	intel_uncore_forcewake_put(dev_priv, engine->fw_domains);
 }
 
-static bool
-insert_request(struct intel_engine_cs *engine,
-	       struct i915_priotree *pt,
-	       int prio)
-{
-	struct i915_priolist *p;
-	struct rb_node **parent, *rb;
-	bool first = true;
-
-	if (unlikely(engine->no_priolist))
-		prio = I915_PRIORITY_NORMAL;
-
-find_priolist:
-	/* most positive priority is scheduled first, equal priorities fifo */
-	rb = NULL;
-	parent = &engine->execlist_queue.rb_node;
-	while (*parent) {
-		rb = *parent;
-		p = rb_entry(rb, typeof(*p), node);
-		if (prio > p->priority) {
-			parent = &rb->rb_left;
-		} else if (prio < p->priority) {
-			parent = &rb->rb_right;
-			first = false;
-		} else {
-			list_add_tail(&pt->link, &p->requests);
-			return false;
-		}
-	}
-
-	if (prio == I915_PRIORITY_NORMAL) {
-		p = &engine->default_priolist;
-	} else {
-		p = kmem_cache_alloc(engine->i915->priorities, GFP_ATOMIC);
-		/* Convert an allocation failure to a priority bump */
-		if (unlikely(!p)) {
-			prio = I915_PRIORITY_NORMAL; /* recurses just once */
-
-			/* To maintain ordering with all rendering, after an
-			 * allocation failure we have to disable all scheduling.
-			 * Requests will then be executed in fifo, and schedule
-			 * will ensure that dependencies are emitted in fifo.
-			 * There will be still some reordering with existing
-			 * requests, so if userspace lied about their
-			 * dependencies that reordering may be visible.
-			 */
-			engine->no_priolist = true;
-			goto find_priolist;
-		}
-	}
-
-	p->priority = prio;
-	rb_link_node(&p->node, rb, parent);
-	rb_insert_color(&p->node, &engine->execlist_queue);
-
-	INIT_LIST_HEAD(&p->requests);
-	list_add_tail(&pt->link, &p->requests);
-
-	if (first)
-		engine->execlist_first = &p->node;
-
-	return first;
-}
-
 static void execlists_submit_request(struct drm_i915_gem_request *request)
 {
 	struct intel_engine_cs *engine = request->engine;
-- 
2.13.2

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [RFC 2/4] drm/i915/execlists: Keep request->priority for its lifetime
  2017-07-17  8:42 [RFC 1/4] drm/i915/execlists: Move insert_request() Chris Wilson
@ 2017-07-17  8:42 ` Chris Wilson
  2017-07-19 14:25   ` Michał Winiarski
  2017-07-17  8:42 ` [RFC 3/4] drm/i915/execlists: Split insert_request() Chris Wilson
                   ` (3 subsequent siblings)
  4 siblings, 1 reply; 12+ messages in thread
From: Chris Wilson @ 2017-07-17  8:42 UTC (permalink / raw)
  To: intel-gfx

With preemption, we will want to "unsubmit" a request, taking it back
from the hw and returning it to the priority sorted execution list. In
order to know where to insert it into that list, we need to remember
its adjust priority (which may change even as it was being executed).

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Michal Winiarski <michal.winiarski@intel.com>
---
 drivers/gpu/drm/i915/intel_lrc.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 1b2f0e3d383a..8ab0c4b76c98 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -552,8 +552,6 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
 			}
 
 			INIT_LIST_HEAD(&rq->priotree.link);
-			rq->priotree.priority = INT_MAX;
-
 			__i915_gem_request_submit(rq);
 			trace_i915_gem_request_in(rq, port_index(port, engine));
 			last = rq;
@@ -687,6 +685,7 @@ static void intel_lrc_irq_handler(unsigned long data)
 				execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_OUT);
 
 				trace_i915_gem_request_out(rq);
+				rq->priotree.priority = INT_MAX;
 				i915_gem_request_put(rq);
 
 				port[0] = port[1];
-- 
2.13.2

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [RFC 3/4] drm/i915/execlists: Split insert_request()
  2017-07-17  8:42 [RFC 1/4] drm/i915/execlists: Move insert_request() Chris Wilson
  2017-07-17  8:42 ` [RFC 2/4] drm/i915/execlists: Keep request->priority for its lifetime Chris Wilson
@ 2017-07-17  8:42 ` Chris Wilson
  2017-07-19 14:23   ` Michał Winiarski
  2017-07-17  8:42 ` [RFC 4/4] drm/i915/execlists: Preemption! Chris Wilson
                   ` (2 subsequent siblings)
  4 siblings, 1 reply; 12+ messages in thread
From: Chris Wilson @ 2017-07-17  8:42 UTC (permalink / raw)
  To: intel-gfx

In the next patch we will want to reinsert a request not at the end of
the priority queue, but at the front. Here we split insert_request()
into two, the first function retrieves the priority list (for reuse for
unsubmit later) and a wrapper function to insert at the end of that list
and to schedule the tasklet if we were first.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/intel_lrc.c | 35 +++++++++++++++++++----------------
 1 file changed, 19 insertions(+), 16 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 8ab0c4b76c98..d227480b3a26 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -292,10 +292,10 @@ uint64_t intel_lr_context_descriptor(struct i915_gem_context *ctx,
 	return ctx->engine[engine->id].lrc_desc;
 }
 
-static bool
-insert_request(struct intel_engine_cs *engine,
-	       struct i915_priotree *pt,
-	       int prio)
+static struct i915_priolist *
+lookup_priolist(struct intel_engine_cs *engine,
+		struct i915_priotree *pt,
+		int prio)
 {
 	struct i915_priolist *p;
 	struct rb_node **parent, *rb;
@@ -317,8 +317,7 @@ insert_request(struct intel_engine_cs *engine,
 			parent = &rb->rb_right;
 			first = false;
 		} else {
-			list_add_tail(&pt->link, &p->requests);
-			return false;
+			return p;
 		}
 	}
 
@@ -344,16 +343,14 @@ insert_request(struct intel_engine_cs *engine,
 	}
 
 	p->priority = prio;
+	INIT_LIST_HEAD(&p->requests);
 	rb_link_node(&p->node, rb, parent);
 	rb_insert_color(&p->node, &engine->execlist_queue);
 
-	INIT_LIST_HEAD(&p->requests);
-	list_add_tail(&pt->link, &p->requests);
-
 	if (first)
 		engine->execlist_first = &p->node;
 
-	return first;
+	return ptr_pack_bits(p, first, 1);
 }
 
 static inline void
@@ -712,6 +709,17 @@ static void intel_lrc_irq_handler(unsigned long data)
 	intel_uncore_forcewake_put(dev_priv, engine->fw_domains);
 }
 
+static void insert_request(struct intel_engine_cs *engine,
+			   struct i915_priotree *pt,
+			   int prio)
+{
+	struct i915_priolist *p = lookup_priolist(engine, pt, prio);
+
+	list_add_tail(&pt->link, &ptr_mask_bits(p, 1)->requests);
+	if (ptr_unmask_bits(p, 1) && execlists_elsp_ready(engine))
+		tasklet_hi_schedule(&engine->irq_tasklet);
+}
+
 static void execlists_submit_request(struct drm_i915_gem_request *request)
 {
 	struct intel_engine_cs *engine = request->engine;
@@ -720,12 +728,7 @@ static void execlists_submit_request(struct drm_i915_gem_request *request)
 	/* Will be called from irq-context when using foreign fences. */
 	spin_lock_irqsave(&engine->timeline->lock, flags);
 
-	if (insert_request(engine,
-			   &request->priotree,
-			   request->priotree.priority)) {
-		if (execlists_elsp_ready(engine))
-			tasklet_hi_schedule(&engine->irq_tasklet);
-	}
+	insert_request(engine, &request->priotree, request->priotree.priority);
 
 	GEM_BUG_ON(!engine->execlist_first);
 	GEM_BUG_ON(list_empty(&request->priotree.link));
-- 
2.13.2

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [RFC 4/4] drm/i915/execlists: Preemption!
  2017-07-17  8:42 [RFC 1/4] drm/i915/execlists: Move insert_request() Chris Wilson
  2017-07-17  8:42 ` [RFC 2/4] drm/i915/execlists: Keep request->priority for its lifetime Chris Wilson
  2017-07-17  8:42 ` [RFC 3/4] drm/i915/execlists: Split insert_request() Chris Wilson
@ 2017-07-17  8:42 ` Chris Wilson
  2017-07-17  9:46   ` Chris Wilson
  2017-07-17  9:00 ` ✓ Fi.CI.BAT: success for series starting with [RFC,1/4] drm/i915/execlists: Move insert_request() Patchwork
  2017-07-19 14:26 ` [RFC 1/4] " Michał Winiarski
  4 siblings, 1 reply; 12+ messages in thread
From: Chris Wilson @ 2017-07-17  8:42 UTC (permalink / raw)
  To: intel-gfx; +Cc: Ben Widawsky, Mika Kuoppala

When we write to ELSP, it triggers a context preemption at the earliest
arbitration point (3DPRIMITIVE, some PIPECONTROLs, a few other
operations and the explicit MI_ARB_CHECK). If this is to the same
context, it triggers a LITE_RESTORE where the RING_TAIL is merely
updated (used currently to chain requests from the same context
together, avoiding bubbles).  However, if it is to a different, a full
context-switch is performed and it will start to execute the new context
saving the image of the old for later execution.

Previously we avoided preemption by only submitting a new context when
the old was idle. But now we wish embrace it, and if the new request has
a higher priority than the currently executing request, we write to the
ELSP regardless, thus triggering preemption. In the context-switch
interrupt handler, we therefore need to check whether the old context
was completed or whether we just switched to the new context
preemptively. In the deqeueu function (responsible for deciding who
executes next), we need to take note of when we will cause a preemption
and move all the preempted requests back onto the execution list. After
that we can proceed as normal.

The current heuristic for deciding when to preempt are only if the new
request is of higher priority, and has the privileged priority of
greater than 0.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Michal Winiarski <michal.winiarski@intel.com>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: Arkadiusz Hiler <arkadiusz.hiler@intel.com>
Cc: Mika Kuoppala <mika.kuoppala@intel.com>
Cc: Ben Widawsky <benjamin.widawsky@intel.com>
---
 drivers/gpu/drm/i915/intel_lrc.c        | 146 ++++++++++++++++++++++++--------
 drivers/gpu/drm/i915/intel_ringbuffer.h |   2 +-
 2 files changed, 110 insertions(+), 38 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index d227480b3a26..fe037bb9644c 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -397,9 +397,9 @@ static u64 execlists_update_context(struct drm_i915_gem_request *rq)
 	return ce->lrc_desc;
 }
 
-static void execlists_submit_ports(struct intel_engine_cs *engine)
+static void execlists_submit_ports(struct intel_engine_cs *engine,
+				   struct execlist_port *port)
 {
-	struct execlist_port *port = engine->execlist_port;
 	u32 __iomem *elsp =
 		engine->i915->regs + i915_mmio_reg_offset(RING_ELSP(engine));
 	unsigned int n;
@@ -456,24 +456,21 @@ static void port_assign(struct execlist_port *port,
 	port_set(port, port_pack(i915_gem_request_get(rq), port_count(port)));
 }
 
+static void unwind_wa_tail(struct drm_i915_gem_request *rq)
+{
+	rq->tail = intel_ring_wrap(rq->ring,
+				   rq->wa_tail - WA_TAIL_DWORDS*sizeof(u32));
+	assert_ring_tail_valid(rq->ring, rq->tail);
+}
+
 static void execlists_dequeue(struct intel_engine_cs *engine)
 {
-	struct drm_i915_gem_request *last;
-	struct execlist_port *port = engine->execlist_port;
+	struct execlist_port *ports = engine->execlist_port;
+	struct execlist_port *port = ports;
+	struct drm_i915_gem_request *last = port_request(port);
 	struct rb_node *rb;
 	bool submit = false;
-
-	last = port_request(port);
-	if (last)
-		/* WaIdleLiteRestore:bdw,skl
-		 * Apply the wa NOOPs to prevent ring:HEAD == req:TAIL
-		 * as we resubmit the request. See gen8_emit_breadcrumb()
-		 * for where we prepare the padding after the end of the
-		 * request.
-		 */
-		last->tail = last->wa_tail;
-
-	GEM_BUG_ON(port_isset(&port[1]));
+	bool once = last;
 
 	/* Hardware submission is through 2 ports. Conceptually each port
 	 * has a (RING_START, RING_HEAD, RING_TAIL) tuple. RING_START is
@@ -503,6 +500,49 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
 		struct i915_priolist *p = rb_entry(rb, typeof(*p), node);
 		struct drm_i915_gem_request *rq, *rn;
 
+		if (once) {
+			if (port_count(&port[0]) > 1)
+				goto done;
+
+			if (p->priority > max(last->priotree.priority, 0)) {
+				list_for_each_entry_safe_reverse(rq, rn,
+								 &engine->timeline->requests,
+								 link) {
+					struct i915_priolist *p;
+
+					if (i915_gem_request_completed(rq))
+						break;
+
+					__i915_gem_request_unsubmit(rq);
+					unwind_wa_tail(rq);
+
+					p = lookup_priolist(engine,
+							    &rq->priotree,
+							    rq->priotree.priority);
+					list_add(&rq->priotree.link,
+						 &ptr_mask_bits(p, 1)->requests);
+				}
+
+				ports = engine->execlist_preempt;
+				port = ports;
+				last = NULL;
+			} else {
+				/* WaIdleLiteRestore:bdw,skl
+				 * Apply the wa NOOPs to prevent
+				 * ring:HEAD == req:TAIL as we resubmit the
+				 * request. See gen8_emit_breadcrumb() for
+				 * where we prepare the padding after the
+				 * end of the request.
+				 */
+				last->tail = last->wa_tail;
+
+				if (port_count(&port[1]))
+					goto done;
+			}
+
+			once = false;
+		}
+
 		list_for_each_entry_safe(rq, rn, &p->requests, priotree.link) {
 			/*
 			 * Can we combine this request with the current port?
@@ -521,7 +561,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
 				 * combine this request with the last, then we
 				 * are done.
 				 */
-				if (port != engine->execlist_port) {
+				if (port != ports) {
 					__list_del_many(&p->requests,
 							&rq->priotree.link);
 					goto done;
@@ -568,14 +608,16 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
 	spin_unlock_irq(&engine->timeline->lock);
 
 	if (submit)
-		execlists_submit_ports(engine);
+		execlists_submit_ports(engine, ports);
 }
 
-static bool execlists_elsp_ready(const struct intel_engine_cs *engine)
+static void switch_to_preempt(struct intel_engine_cs *engine)
 {
-	const struct execlist_port *port = engine->execlist_port;
-
-	return port_count(&port[0]) + port_count(&port[1]) < 2;
+	memcpy(engine->execlist_port,
+	       engine->execlist_preempt,
+	       sizeof(engine->execlist_preempt));
+	memset(engine->execlist_preempt, 0,
+	       sizeof(engine->execlist_preempt));
 }
 
 /*
@@ -585,7 +627,7 @@ static bool execlists_elsp_ready(const struct intel_engine_cs *engine)
 static void intel_lrc_irq_handler(unsigned long data)
 {
 	struct intel_engine_cs *engine = (struct intel_engine_cs *)data;
-	struct execlist_port *port = engine->execlist_port;
+	struct execlist_port * const port = engine->execlist_port;
 	struct drm_i915_private *dev_priv = engine->i915;
 
 	/* We can skip acquiring intel_runtime_pm_get() here as it was taken
@@ -674,6 +716,24 @@ static void intel_lrc_irq_handler(unsigned long data)
 			/* Check the context/desc id for this event matches */
 			GEM_DEBUG_BUG_ON(buf[2 * head + 1] != port->context_id);
 
+			if (status & GEN8_CTX_STATUS_PREEMPTED &&
+			    !(status & GEN8_CTX_STATUS_LITE_RESTORE)) {
+				int i;
+
+				GEM_BUG_ON(!port_isset(port));
+				GEM_BUG_ON(!port_isset(engine->execlist_preempt));
+				for (i = 0; i < ARRAY_SIZE(engine->execlist_port); i++) {
+					if (!port_isset(&port[i]))
+						break;
+
+					rq = port_request(&port[i]);
+					i915_gem_request_put(rq);
+				}
+
+				switch_to_preempt(engine);
+				continue;
+			}
+
 			rq = port_unpack(port, &count);
 			GEM_BUG_ON(count == 0);
 			if (--count == 0) {
@@ -691,6 +751,10 @@ static void intel_lrc_irq_handler(unsigned long data)
 				port_set(port, port_pack(rq, count));
 			}
 
+			if (!port_isset(port) &&
+			    port_isset(engine->execlist_preempt))
+				switch_to_preempt(engine);
+
 			/* After the final element, the hw should be idle */
 			GEM_BUG_ON(port_count(port) == 0 &&
 				   !(status & GEN8_CTX_STATUS_ACTIVE_IDLE));
@@ -703,7 +767,7 @@ static void intel_lrc_irq_handler(unsigned long data)
 		}
 	}
 
-	if (execlists_elsp_ready(engine))
+	if (!port_isset(engine->execlist_preempt))
 		execlists_dequeue(engine);
 
 	intel_uncore_forcewake_put(dev_priv, engine->fw_domains);
@@ -716,7 +780,7 @@ static void insert_request(struct intel_engine_cs *engine,
 	struct i915_priolist *p = lookup_priolist(engine, pt, prio);
 
 	list_add_tail(&pt->link, &ptr_mask_bits(p, 1)->requests);
-	if (ptr_unmask_bits(p, 1) && execlists_elsp_ready(engine))
+	if (ptr_unmask_bits(p, 1) && !port_isset(engine->execlist_preempt))
 		tasklet_hi_schedule(&engine->irq_tasklet);
 }
 
@@ -837,8 +901,6 @@ static void execlists_schedule(struct drm_i915_gem_request *request, int prio)
 	}
 
 	spin_unlock_irq(&engine->timeline->lock);
-
-	/* XXX Do we need to preempt to make room for us and our deps? */
 }
 
 static struct intel_ring *
@@ -1053,6 +1115,9 @@ static u32 *gen8_init_indirectctx_bb(struct intel_engine_cs *engine, u32 *batch)
 				       i915_ggtt_offset(engine->scratch) +
 				       2 * CACHELINE_BYTES);
 
+	/* WaDisableCtxRestoreArbitration:bdw,chv */
+	*batch++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
+
 	/* Pad to end of cacheline */
 	while ((unsigned long)batch % CACHELINE_BYTES)
 		*batch++ = MI_NOOP;
@@ -1077,8 +1142,6 @@ static u32 *gen8_init_indirectctx_bb(struct intel_engine_cs *engine, u32 *batch)
  */
 static u32 *gen8_init_perctx_bb(struct intel_engine_cs *engine, u32 *batch)
 {
-	/* WaDisableCtxRestoreArbitration:bdw,chv */
-	*batch++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
 	*batch++ = MI_BATCH_BUFFER_END;
 
 	return batch;
@@ -1086,6 +1149,8 @@ static u32 *gen8_init_perctx_bb(struct intel_engine_cs *engine, u32 *batch)
 
 static u32 *gen9_init_indirectctx_bb(struct intel_engine_cs *engine, u32 *batch)
 {
+	*batch++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
+
 	/* WaFlushCoherentL3CacheLinesAtContextSwitch:skl,bxt,glk */
 	batch = gen8_emit_flush_coherentl3_wa(engine, batch);
 
@@ -1131,6 +1196,8 @@ static u32 *gen9_init_indirectctx_bb(struct intel_engine_cs *engine, u32 *batch)
 		*batch++ = 0;
 	}
 
+	*batch++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
+
 	/* Pad to end of cacheline */
 	while ((unsigned long)batch % CACHELINE_BYTES)
 		*batch++ = MI_NOOP;
@@ -1271,6 +1338,8 @@ static int gen8_init_common_ring(struct intel_engine_cs *engine)
 	clear_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted);
 	engine->csb_head = -1;
 
+	GEM_BUG_ON(port_isset(engine->execlist_preempt));
+
 	submit = false;
 	for (n = 0; n < ARRAY_SIZE(engine->execlist_port); n++) {
 		if (!port_isset(&port[n]))
@@ -1286,7 +1355,7 @@ static int gen8_init_common_ring(struct intel_engine_cs *engine)
 	}
 
 	if (submit && !i915.enable_guc_submission)
-		execlists_submit_ports(engine);
+		execlists_submit_ports(engine, port);
 
 	return 0;
 }
@@ -1340,6 +1409,10 @@ static void reset_common_ring(struct intel_engine_cs *engine,
 	 * guessing the missed context-switch events by looking at what
 	 * requests were completed.
 	 */
+
+	if (port_isset(engine->execlist_preempt))
+		switch_to_preempt(engine); /* XXX fubar XXX */
+
 	if (!request) {
 		for (n = 0; n < ARRAY_SIZE(engine->execlist_port); n++)
 			i915_gem_request_put(port_request(&port[n]));
@@ -1388,10 +1461,7 @@ static void reset_common_ring(struct intel_engine_cs *engine,
 	intel_ring_update_space(request->ring);
 
 	/* Reset WaIdleLiteRestore:bdw,skl as well */
-	request->tail =
-		intel_ring_wrap(request->ring,
-				request->wa_tail - WA_TAIL_DWORDS*sizeof(u32));
-	assert_ring_tail_valid(request->ring, request->tail);
+	unwind_wa_tail(request);
 }
 
 static int intel_logical_ring_emit_pdps(struct drm_i915_gem_request *req)
@@ -1450,13 +1520,15 @@ static int gen8_emit_bb_start(struct drm_i915_gem_request *req,
 	if (IS_ERR(cs))
 		return PTR_ERR(cs);
 
+	*cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
+
 	/* FIXME(BDW): Address space and security selectors. */
 	*cs++ = MI_BATCH_BUFFER_START_GEN8 |
 		(flags & I915_DISPATCH_SECURE ? 0 : BIT(8)) |
 		(flags & I915_DISPATCH_RS ? MI_BATCH_RESOURCE_STREAMER : 0);
 	*cs++ = lower_32_bits(offset);
 	*cs++ = upper_32_bits(offset);
-	*cs++ = MI_NOOP;
+
 	intel_ring_advance(req, cs);
 
 	return 0;
@@ -1585,8 +1657,8 @@ static int gen8_emit_flush_render(struct drm_i915_gem_request *request,
  */
 static void gen8_emit_wa_tail(struct drm_i915_gem_request *request, u32 *cs)
 {
-	*cs++ = MI_NOOP;
-	*cs++ = MI_NOOP;
+	*cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
+	*cs++ = MI_ARB_CHECK; /* preemption point *between* requests */
 	request->wa_tail = intel_ring_offset(request, cs);
 }
 
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index a182da7eb9a9..109d64daf5d9 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -387,7 +387,7 @@ struct intel_engine_cs {
 #define port_isset(p) ((p)->request_count)
 #define port_index(p, e) ((p) - (e)->execlist_port)
 		GEM_DEBUG_DECL(u32 context_id);
-	} execlist_port[2];
+	} execlist_port[2], execlist_preempt[2];
 	struct rb_root execlist_queue;
 	struct rb_node *execlist_first;
 	unsigned int fw_domains;
-- 
2.13.2

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 12+ messages in thread

* ✓ Fi.CI.BAT: success for series starting with [RFC,1/4] drm/i915/execlists: Move insert_request()
  2017-07-17  8:42 [RFC 1/4] drm/i915/execlists: Move insert_request() Chris Wilson
                   ` (2 preceding siblings ...)
  2017-07-17  8:42 ` [RFC 4/4] drm/i915/execlists: Preemption! Chris Wilson
@ 2017-07-17  9:00 ` Patchwork
  2017-07-19 14:26 ` [RFC 1/4] " Michał Winiarski
  4 siblings, 0 replies; 12+ messages in thread
From: Patchwork @ 2017-07-17  9:00 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

== Series Details ==

Series: series starting with [RFC,1/4] drm/i915/execlists: Move insert_request()
URL   : https://patchwork.freedesktop.org/series/27382/
State : success

== Summary ==

Series 27382v1 Series without cover letter
https://patchwork.freedesktop.org/api/1.0/series/27382/revisions/1/mbox/

Test kms_pipe_crc_basic:
        Subgroup suspend-read-crc-pipe-b:
                pass       -> DMESG-WARN (fi-byt-j1900) fdo#101705 +1

fdo#101705 https://bugs.freedesktop.org/show_bug.cgi?id=101705

fi-bdw-5557u     total:279  pass:268  dwarn:0   dfail:0   fail:0   skip:11  time:450s
fi-bdw-gvtdvm    total:279  pass:265  dwarn:0   dfail:0   fail:0   skip:14  time:430s
fi-blb-e6850     total:279  pass:224  dwarn:1   dfail:0   fail:0   skip:54  time:355s
fi-bsw-n3050     total:279  pass:243  dwarn:0   dfail:0   fail:0   skip:36  time:537s
fi-bxt-j4205     total:279  pass:260  dwarn:0   dfail:0   fail:0   skip:19  time:510s
fi-byt-j1900     total:279  pass:254  dwarn:1   dfail:0   fail:0   skip:24  time:488s
fi-byt-n2820     total:279  pass:251  dwarn:0   dfail:0   fail:0   skip:28  time:484s
fi-glk-2a        total:279  pass:260  dwarn:0   dfail:0   fail:0   skip:19  time:595s
fi-hsw-4770      total:279  pass:263  dwarn:0   dfail:0   fail:0   skip:16  time:435s
fi-hsw-4770r     total:279  pass:263  dwarn:0   dfail:0   fail:0   skip:16  time:418s
fi-ilk-650       total:279  pass:229  dwarn:0   dfail:0   fail:0   skip:50  time:424s
fi-ivb-3520m     total:279  pass:261  dwarn:0   dfail:0   fail:0   skip:18  time:487s
fi-ivb-3770      total:279  pass:261  dwarn:0   dfail:0   fail:0   skip:18  time:476s
fi-kbl-7500u     total:279  pass:261  dwarn:0   dfail:0   fail:0   skip:18  time:465s
fi-kbl-7560u     total:279  pass:268  dwarn:1   dfail:0   fail:0   skip:10  time:573s
fi-kbl-r         total:279  pass:260  dwarn:1   dfail:0   fail:0   skip:18  time:581s
fi-pnv-d510      total:279  pass:223  dwarn:1   dfail:0   fail:0   skip:55  time:574s
fi-skl-6260u     total:279  pass:269  dwarn:0   dfail:0   fail:0   skip:10  time:457s
fi-skl-6700hq    total:279  pass:262  dwarn:0   dfail:0   fail:0   skip:17  time:589s
fi-skl-6700k     total:279  pass:257  dwarn:4   dfail:0   fail:0   skip:18  time:466s
fi-skl-6770hq    total:279  pass:269  dwarn:0   dfail:0   fail:0   skip:10  time:483s
fi-skl-gvtdvm    total:279  pass:266  dwarn:0   dfail:0   fail:0   skip:13  time:435s
fi-skl-x1585l    total:279  pass:268  dwarn:0   dfail:0   fail:0   skip:11  time:478s
fi-snb-2520m     total:279  pass:251  dwarn:0   dfail:0   fail:0   skip:28  time:543s
fi-snb-2600      total:279  pass:250  dwarn:0   dfail:0   fail:0   skip:29  time:408s

0ea81ff8a340aa1f2bc863f4c8e631542e47c98e drm-tip: 2017y-07m-17d-07h-18m-34s UTC integration manifest
79664bf drm/i915/execlists: Split insert_request()
94f9ee5 drm/i915/execlists: Keep request->priority for its lifetime
b207421 drm/i915/execlists: Move insert_request()

== Logs ==

For more details see: https://intel-gfx-ci.01.org/CI/Patchwork_5205/
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [RFC 4/4] drm/i915/execlists: Preemption!
  2017-07-17  8:42 ` [RFC 4/4] drm/i915/execlists: Preemption! Chris Wilson
@ 2017-07-17  9:46   ` Chris Wilson
  2017-07-17 12:58     ` Chris Wilson
  0 siblings, 1 reply; 12+ messages in thread
From: Chris Wilson @ 2017-07-17  9:46 UTC (permalink / raw)
  To: intel-gfx; +Cc: Ben Widawsky, Mika Kuoppala

Quoting Chris Wilson (2017-07-17 09:42:35)
> @@ -503,6 +500,49 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
>                 struct i915_priolist *p = rb_entry(rb, typeof(*p), node);
>                 struct drm_i915_gem_request *rq, *rn;
>  
> +               if (once) {
> +                       if (port_count(&port[0]) > 1)
> +                               goto done;
> +
> +                       if (p->priority > max(last->priotree.priority, 0)) {
> +                               list_for_each_entry_safe_reverse(rq, rn,
> +                                                                &engine->timeline->requests,
> +                                                                link) {
> +                                       struct i915_priolist *p;
> +
> +                                       if (i915_gem_request_completed(rq))
> +                                               break;
> +
> +                                       __i915_gem_request_unsubmit(rq);
> +                                       unwind_wa_tail(rq);

Fwiw, this is the flaw in this approach. As we decide to move the
request back to the execution queue, it may complete. If we try to
reexecute it, its ring->tail will be less than RING_HEAD, telling the hw
to execute everything after it again.

Michal's approach was to use a preemptive switch to a dummy context and
then once hw knew the hw wasn't executing any of the other requests, he
would unsubmit them and recompute the desired order. I've yet to see
another solution for a cheaper barrier between hw/sw, as otherwise we
must deliberately insert a stall to do preemption. :|
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [RFC 4/4] drm/i915/execlists: Preemption!
  2017-07-17  9:46   ` Chris Wilson
@ 2017-07-17 12:58     ` Chris Wilson
  0 siblings, 0 replies; 12+ messages in thread
From: Chris Wilson @ 2017-07-17 12:58 UTC (permalink / raw)
  To: intel-gfx; +Cc: Ben Widawsky, Mika Kuoppala

Quoting Chris Wilson (2017-07-17 10:46:23)
> Quoting Chris Wilson (2017-07-17 09:42:35)
> > @@ -503,6 +500,49 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
> >                 struct i915_priolist *p = rb_entry(rb, typeof(*p), node);
> >                 struct drm_i915_gem_request *rq, *rn;
> >  
> > +               if (once) {
> > +                       if (port_count(&port[0]) > 1)
> > +                               goto done;
> > +
> > +                       if (p->priority > max(last->priotree.priority, 0)) {
> > +                               list_for_each_entry_safe_reverse(rq, rn,
> > +                                                                &engine->timeline->requests,
> > +                                                                link) {
> > +                                       struct i915_priolist *p;
> > +
> > +                                       if (i915_gem_request_completed(rq))
> > +                                               break;
> > +
> > +                                       __i915_gem_request_unsubmit(rq);
> > +                                       unwind_wa_tail(rq);
> 
> Fwiw, this is the flaw in this approach. As we decide to move the
> request back to the execution queue, it may complete. If we try to
> reexecute it, its ring->tail will be less than RING_HEAD, telling the hw
> to execute everything after it again.
> 
> Michal's approach was to use a preemptive switch to a dummy context and
> then once hw knew the hw wasn't executing any of the other requests, he
> would unsubmit them and recompute the desired order. I've yet to see
> another solution for a cheaper barrier between hw/sw, as otherwise we
> must deliberately insert a stall to do preemption. :|

First thought to closing the race is to ensure that the gpu doesn't
advance across a breadcrumb as we are processing the list:

diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index c712d01f92ab..18a08c701604 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -412,7 +412,12 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg)
 #define   MI_SEMAPHORE_TARGET(engine)  ((engine)<<15)
 #define MI_SEMAPHORE_WAIT      MI_INSTR(0x1c, 2) /* GEN8+ */
 #define   MI_SEMAPHORE_POLL            (1<<15)
+#define   MI_SEMAPHORE_SAD_GT_SDD      (0<<12)
 #define   MI_SEMAPHORE_SAD_GTE_SDD     (1<<12)
+#define   MI_SEMAPHORE_SAD_LT_SDD      (2<<12)
+#define   MI_SEMAPHORE_SAD_LTE_SDD     (3<<12)
+#define   MI_SEMAPHORE_SAD_EQ_SDD      (4<<12)
+#define   MI_SEMAPHORE_SAD_NEQ_SDD     (5<<12)
 #define MI_STORE_DWORD_IMM     MI_INSTR(0x20, 1)
 #define MI_STORE_DWORD_IMM_GEN4        MI_INSTR(0x20, 2)
 #define   MI_MEM_VIRTUAL       (1 << 22) /* 945,g33,965 */
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index fe037bb9644c..c84e485cb323 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -505,6 +505,12 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
                                goto done;
 
                        if (p->priority > max(last->priotree.priority, 0)) {
+                               u32 *gpu_sema = &engine->status_page.page_addr[I915_GEM_HWS_INDEX + 2];
+                               /* Suspend updates from the gpu */
+                               WRITE_ONCE(*gpu_sema, 1);
+                               readl(engine->i915->regs +
+                                     i915_mmio_reg_offset(RING_ACTHD(engine->mmio_base)));
+
                                list_for_each_entry_safe_reverse(rq, rn,
                                                                 &engine->timeline->requests,
                                                                 link) {
@@ -613,11 +619,16 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
 
 static void switch_to_preempt(struct intel_engine_cs *engine)
 {
+       u32 *gpu_sema = &engine->status_page.page_addr[I915_GEM_HWS_INDEX + 2];
+
        memcpy(engine->execlist_port,
               engine->execlist_preempt,
               sizeof(engine->execlist_preempt));
        memset(engine->execlist_preempt, 0,
               sizeof(engine->execlist_preempt));
+
+       /* Restart updates from the gpu */
+       WRITE_ONCE(*gpu_sema, 0);
 }
 
 /*
@@ -1667,6 +1678,14 @@ static void gen8_emit_breadcrumb(struct drm_i915_gem_request *request, u32 *cs)
        /* w/a: bit 5 needs to be zero for MI_FLUSH_DW address. */
        BUILD_BUG_ON(I915_GEM_HWS_INDEX_ADDR & (1 << 5));
 
+       *cs++ = MI_SEMAPHORE_WAIT |
+               MI_SEMAPHORE_POLL |
+               MI_SEMAPHORE_GLOBAL_GTT |
+               MI_SEMAPHORE_SAD_EQ_SDD;
+       *cs++ = 0; /* continue if zero (preempt == 0) */
+       *cs++ = intel_hws_seqno_address(request->engine) + 8;
+       *cs++ = 0;
+
        *cs++ = (MI_FLUSH_DW + 1) | MI_FLUSH_DW_OP_STOREDW;
        *cs++ = intel_hws_seqno_address(request->engine) | MI_FLUSH_DW_USE_GTT;
        *cs++ = 0;
@@ -1679,7 +1698,7 @@ static void gen8_emit_breadcrumb(struct drm_i915_gem_request *request, u32 *cs)
        gen8_emit_wa_tail(request, cs);
 }
 
-static const int gen8_emit_breadcrumb_sz = 6 + WA_TAIL_DWORDS;
+static const int gen8_emit_breadcrumb_sz = 10 + WA_TAIL_DWORDS;
 
 static void gen8_emit_breadcrumb_render(struct drm_i915_gem_request *request,
                                        u32 *cs)
@@ -1687,6 +1706,14 @@ static void gen8_emit_breadcrumb_render(struct drm_i915_gem_request *request,
        /* We're using qword write, seqno should be aligned to 8 bytes. */
        BUILD_BUG_ON(I915_GEM_HWS_INDEX & 1);
 
+       *cs++ = MI_SEMAPHORE_WAIT |
+               MI_SEMAPHORE_POLL |
+               MI_SEMAPHORE_GLOBAL_GTT |
+               MI_SEMAPHORE_SAD_EQ_SDD;
+       *cs++ = 0; /* continue if zero */
+       *cs++ = intel_hws_seqno_address(request->engine) + 8;
+       *cs++ = 0;
+
        /* w/a for post sync ops following a GPGPU operation we
         * need a prior CS_STALL, which is emitted by the flush
         * following the batch.
@@ -1707,7 +1734,7 @@ static void gen8_emit_breadcrumb_render(struct drm_i915_gem_request *request,
        gen8_emit_wa_tail(request, cs);
 }
 
-static const int gen8_emit_breadcrumb_render_sz = 8 + WA_TAIL_DWORDS;
+static const int gen8_emit_breadcrumb_render_sz = 12 + WA_TAIL_DWORDS;
 
 static int gen8_init_rcs_context(struct drm_i915_gem_request *req)
 {

cuts down the race to concurrent memory op between cpu/gpu. The mmio read
there is to hand wave that away by delaying just enough to allow the GPU
write to complete. Though a no-op MI_SEMAPHORE_WAIT just about has a
measurable impact on no-op batches.
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 12+ messages in thread

* Re: [RFC 3/4] drm/i915/execlists: Split insert_request()
  2017-07-17  8:42 ` [RFC 3/4] drm/i915/execlists: Split insert_request() Chris Wilson
@ 2017-07-19 14:23   ` Michał Winiarski
  2017-07-19 14:37     ` Chris Wilson
  0 siblings, 1 reply; 12+ messages in thread
From: Michał Winiarski @ 2017-07-19 14:23 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

On Mon, Jul 17, 2017 at 09:42:34AM +0100, Chris Wilson wrote:
> In the next patch we will want to reinsert a request not at the end of
> the priority queue, but at the front. Here we split insert_request()
> into two, the first function retrieves the priority list (for reuse for
> unsubmit later) and a wrapper function to insert at the end of that list
> and to schedule the tasklet if we were first.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>  drivers/gpu/drm/i915/intel_lrc.c | 35 +++++++++++++++++++----------------
>  1 file changed, 19 insertions(+), 16 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> index 8ab0c4b76c98..d227480b3a26 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/intel_lrc.c
> @@ -292,10 +292,10 @@ uint64_t intel_lr_context_descriptor(struct i915_gem_context *ctx,
>  	return ctx->engine[engine->id].lrc_desc;
>  }
>  
> -static bool
> -insert_request(struct intel_engine_cs *engine,
> -	       struct i915_priotree *pt,
> -	       int prio)
> +static struct i915_priolist *
> +lookup_priolist(struct intel_engine_cs *engine,
> +		struct i915_priotree *pt,
> +		int prio)
>  {
>  	struct i915_priolist *p;
>  	struct rb_node **parent, *rb;
> @@ -317,8 +317,7 @@ insert_request(struct intel_engine_cs *engine,
>  			parent = &rb->rb_right;
>  			first = false;
>  		} else {
> -			list_add_tail(&pt->link, &p->requests);
> -			return false;
> +			return p;
>  		}
>  	}
>  
> @@ -344,16 +343,14 @@ insert_request(struct intel_engine_cs *engine,
>  	}
>  
>  	p->priority = prio;
> +	INIT_LIST_HEAD(&p->requests);
>  	rb_link_node(&p->node, rb, parent);
>  	rb_insert_color(&p->node, &engine->execlist_queue);
>  
> -	INIT_LIST_HEAD(&p->requests);
> -	list_add_tail(&pt->link, &p->requests);
> -
>  	if (first)
>  		engine->execlist_first = &p->node;
>  
> -	return first;
> +	return ptr_pack_bits(p, first, 1);

Matches what I have in my tree, except for "first" hidden in pointer.
Can we #define the bit where we're keeping the "first" status? This way we can
immediatelly see what's going on in the callers.
Or at least add a comment...

With that:

Reviewed-by: Michał Winiarski <michal.winiarski@intel.com>

While this is an RFC, I think 1-3 make the code more clear, and could be pushed
as-is (perhaps this one with slightly amended commit message s/next patch/future)

-Michał

>  }
>  
>  static inline void
> @@ -712,6 +709,17 @@ static void intel_lrc_irq_handler(unsigned long data)
>  	intel_uncore_forcewake_put(dev_priv, engine->fw_domains);
>  }
>  
> +static void insert_request(struct intel_engine_cs *engine,
> +			   struct i915_priotree *pt,
> +			   int prio)
> +{
> +	struct i915_priolist *p = lookup_priolist(engine, pt, prio);
> +
> +	list_add_tail(&pt->link, &ptr_mask_bits(p, 1)->requests);
> +	if (ptr_unmask_bits(p, 1) && execlists_elsp_ready(engine))
> +		tasklet_hi_schedule(&engine->irq_tasklet);
> +}
> +
>  static void execlists_submit_request(struct drm_i915_gem_request *request)
>  {
>  	struct intel_engine_cs *engine = request->engine;
> @@ -720,12 +728,7 @@ static void execlists_submit_request(struct drm_i915_gem_request *request)
>  	/* Will be called from irq-context when using foreign fences. */
>  	spin_lock_irqsave(&engine->timeline->lock, flags);
>  
> -	if (insert_request(engine,
> -			   &request->priotree,
> -			   request->priotree.priority)) {
> -		if (execlists_elsp_ready(engine))
> -			tasklet_hi_schedule(&engine->irq_tasklet);
> -	}
> +	insert_request(engine, &request->priotree, request->priotree.priority);
>  
>  	GEM_BUG_ON(!engine->execlist_first);
>  	GEM_BUG_ON(list_empty(&request->priotree.link));
> -- 
> 2.13.2
> 
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/intel-gfx
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [RFC 2/4] drm/i915/execlists: Keep request->priority for its lifetime
  2017-07-17  8:42 ` [RFC 2/4] drm/i915/execlists: Keep request->priority for its lifetime Chris Wilson
@ 2017-07-19 14:25   ` Michał Winiarski
  2017-07-19 14:33     ` Chris Wilson
  0 siblings, 1 reply; 12+ messages in thread
From: Michał Winiarski @ 2017-07-19 14:25 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

On Mon, Jul 17, 2017 at 09:42:33AM +0100, Chris Wilson wrote:
> With preemption, we will want to "unsubmit" a request, taking it back
> from the hw and returning it to the priority sorted execution list. In
> order to know where to insert it into that list, we need to remember
> its adjust priority (which may change even as it was being executed).
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Michal Winiarski <michal.winiarski@intel.com>

We should also change GuC dequeue/irq_handler.

With that:

Reviewed-by: Michał Winiarski <michal.winiarski@intel.com>

-Michał

> ---
>  drivers/gpu/drm/i915/intel_lrc.c | 3 +--
>  1 file changed, 1 insertion(+), 2 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> index 1b2f0e3d383a..8ab0c4b76c98 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/intel_lrc.c
> @@ -552,8 +552,6 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
>  			}
>  
>  			INIT_LIST_HEAD(&rq->priotree.link);
> -			rq->priotree.priority = INT_MAX;
> -
>  			__i915_gem_request_submit(rq);
>  			trace_i915_gem_request_in(rq, port_index(port, engine));
>  			last = rq;
> @@ -687,6 +685,7 @@ static void intel_lrc_irq_handler(unsigned long data)
>  				execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_OUT);
>  
>  				trace_i915_gem_request_out(rq);
> +				rq->priotree.priority = INT_MAX;
>  				i915_gem_request_put(rq);
>  
>  				port[0] = port[1];
> -- 
> 2.13.2
> 
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [RFC 1/4] drm/i915/execlists: Move insert_request()
  2017-07-17  8:42 [RFC 1/4] drm/i915/execlists: Move insert_request() Chris Wilson
                   ` (3 preceding siblings ...)
  2017-07-17  9:00 ` ✓ Fi.CI.BAT: success for series starting with [RFC,1/4] drm/i915/execlists: Move insert_request() Patchwork
@ 2017-07-19 14:26 ` Michał Winiarski
  4 siblings, 0 replies; 12+ messages in thread
From: Michał Winiarski @ 2017-07-19 14:26 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

On Mon, Jul 17, 2017 at 09:42:32AM +0100, Chris Wilson wrote:
> Move insert_request() earlier to avoid a forward declaration in a later
> patch.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>

Reviewed-by: Michał Winiarski <michal.winiarski@intel.com>

-Michał

> ---
>  drivers/gpu/drm/i915/intel_lrc.c | 128 +++++++++++++++++++--------------------
>  1 file changed, 64 insertions(+), 64 deletions(-)
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [RFC 2/4] drm/i915/execlists: Keep request->priority for its lifetime
  2017-07-19 14:25   ` Michał Winiarski
@ 2017-07-19 14:33     ` Chris Wilson
  0 siblings, 0 replies; 12+ messages in thread
From: Chris Wilson @ 2017-07-19 14:33 UTC (permalink / raw)
  To: Michał Winiarski; +Cc: intel-gfx

Quoting Michał Winiarski (2017-07-19 15:25:51)
> On Mon, Jul 17, 2017 at 09:42:33AM +0100, Chris Wilson wrote:
> > With preemption, we will want to "unsubmit" a request, taking it back
> > from the hw and returning it to the priority sorted execution list. In
> > order to know where to insert it into that list, we need to remember
> > its adjust priority (which may change even as it was being executed).
> > 
> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > Cc: Michal Winiarski <michal.winiarski@intel.com>
> 
> We should also change GuC dequeue/irq_handler.

I wasn't sure if that was necessary as the current shortcut of sealing
the priority once submitted applies to guc dequeue as it currently
doesn't unsubmit. (Wasn't much point in penalising the guc until it was
ready.)
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [RFC 3/4] drm/i915/execlists: Split insert_request()
  2017-07-19 14:23   ` Michał Winiarski
@ 2017-07-19 14:37     ` Chris Wilson
  0 siblings, 0 replies; 12+ messages in thread
From: Chris Wilson @ 2017-07-19 14:37 UTC (permalink / raw)
  To: Michał Winiarski; +Cc: intel-gfx

Quoting Michał Winiarski (2017-07-19 15:23:47)
> On Mon, Jul 17, 2017 at 09:42:34AM +0100, Chris Wilson wrote:
> Matches what I have in my tree, except for "first" hidden in pointer.
> Can we #define the bit where we're keeping the "first" status? This way we can
> immediatelly see what's going on in the callers.

At least give me a name for that bit!

#define PRIOLIST_FLAGS_MASK 0x1
#define PRIOLIST_FIRST 0x1

Never going to fit in 80cols! :(
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 12+ messages in thread

end of thread, other threads:[~2017-07-19 14:37 UTC | newest]

Thread overview: 12+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-07-17  8:42 [RFC 1/4] drm/i915/execlists: Move insert_request() Chris Wilson
2017-07-17  8:42 ` [RFC 2/4] drm/i915/execlists: Keep request->priority for its lifetime Chris Wilson
2017-07-19 14:25   ` Michał Winiarski
2017-07-19 14:33     ` Chris Wilson
2017-07-17  8:42 ` [RFC 3/4] drm/i915/execlists: Split insert_request() Chris Wilson
2017-07-19 14:23   ` Michał Winiarski
2017-07-19 14:37     ` Chris Wilson
2017-07-17  8:42 ` [RFC 4/4] drm/i915/execlists: Preemption! Chris Wilson
2017-07-17  9:46   ` Chris Wilson
2017-07-17 12:58     ` Chris Wilson
2017-07-17  9:00 ` ✓ Fi.CI.BAT: success for series starting with [RFC,1/4] drm/i915/execlists: Move insert_request() Patchwork
2017-07-19 14:26 ` [RFC 1/4] " Michał Winiarski

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.