All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 1/9] drm/i915: Drop posting reads to flush master interrupts
@ 2018-06-27 21:07 Chris Wilson
  2018-06-27 21:07 ` [PATCH 2/9] drm/i915/execlists: Pull submit after dequeue under timeline lock Chris Wilson
                   ` (14 more replies)
  0 siblings, 15 replies; 34+ messages in thread
From: Chris Wilson @ 2018-06-27 21:07 UTC (permalink / raw)
  To: intel-gfx

We do not need to do a posting read of our uncached mmio write to
re-enable the master interrupt lines after handling an interrupt, so
don't. This saves us a slow UC read before we can process the interrupt,
most noticeable in execlists where any stalls imposes extra latency on
GPU command execution.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_irq.c | 9 +--------
 1 file changed, 1 insertion(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index 46aaef5c1851..0bd8b4df1bff 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -2131,7 +2131,6 @@ static irqreturn_t valleyview_irq_handler(int irq, void *arg)
 
 		I915_WRITE(VLV_IER, ier);
 		I915_WRITE(VLV_MASTER_IER, MASTER_INTERRUPT_ENABLE);
-		POSTING_READ(VLV_MASTER_IER);
 
 		if (gt_iir)
 			snb_gt_irq_handler(dev_priv, gt_iir);
@@ -2216,7 +2215,6 @@ static irqreturn_t cherryview_irq_handler(int irq, void *arg)
 
 		I915_WRITE(VLV_IER, ier);
 		I915_WRITE(GEN8_MASTER_IRQ, GEN8_MASTER_IRQ_CONTROL);
-		POSTING_READ(GEN8_MASTER_IRQ);
 
 		gen8_gt_irq_handler(dev_priv, master_ctl, gt_iir);
 
@@ -2548,7 +2546,6 @@ static irqreturn_t ironlake_irq_handler(int irq, void *arg)
 	/* disable master interrupt before clearing iir  */
 	de_ier = I915_READ(DEIER);
 	I915_WRITE(DEIER, de_ier & ~DE_MASTER_IRQ_CONTROL);
-	POSTING_READ(DEIER);
 
 	/* Disable south interrupts. We'll only write to SDEIIR once, so further
 	 * interrupts will will be stored on its back queue, and then we'll be
@@ -2558,7 +2555,6 @@ static irqreturn_t ironlake_irq_handler(int irq, void *arg)
 	if (!HAS_PCH_NOP(dev_priv)) {
 		sde_ier = I915_READ(SDEIER);
 		I915_WRITE(SDEIER, 0);
-		POSTING_READ(SDEIER);
 	}
 
 	/* Find, clear, then process each source of interrupt */
@@ -2593,11 +2589,8 @@ static irqreturn_t ironlake_irq_handler(int irq, void *arg)
 	}
 
 	I915_WRITE(DEIER, de_ier);
-	POSTING_READ(DEIER);
-	if (!HAS_PCH_NOP(dev_priv)) {
+	if (!HAS_PCH_NOP(dev_priv))
 		I915_WRITE(SDEIER, sde_ier);
-		POSTING_READ(SDEIER);
-	}
 
 	/* IRQs are synced during runtime_suspend, we don't require a wakeref */
 	enable_rpm_wakeref_asserts(dev_priv);
-- 
2.18.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 34+ messages in thread

* [PATCH 2/9] drm/i915/execlists: Pull submit after dequeue under timeline lock
  2018-06-27 21:07 [PATCH 1/9] drm/i915: Drop posting reads to flush master interrupts Chris Wilson
@ 2018-06-27 21:07 ` Chris Wilson
  2018-06-27 21:07 ` [PATCH 3/9] drm/i915/execlists: Pull CSB reset under the timeline.lock Chris Wilson
                   ` (13 subsequent siblings)
  14 siblings, 0 replies; 34+ messages in thread
From: Chris Wilson @ 2018-06-27 21:07 UTC (permalink / raw)
  To: intel-gfx

In the next patch, we will begin processing the CSB from inside the
submission path (underneath an irqsoff section, and even from inside
interrupt handlers). This means that updating the execlists->port[] will
no longer be serialised by the tasklet but needs to be locked by the
engine->timeline.lock instead. Pull dequeue and submit under the same
lock for protection. (An alternate future plan is to keep the in/out
arrays separate for concurrent processing and reduced lock coverage.)

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 drivers/gpu/drm/i915/intel_lrc.c | 32 ++++++++++++--------------------
 1 file changed, 12 insertions(+), 20 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 2b21a6596360..009db92b67d7 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -567,7 +567,7 @@ static void complete_preempt_context(struct intel_engine_execlists *execlists)
 	execlists_clear_active(execlists, EXECLISTS_ACTIVE_PREEMPT);
 }
 
-static bool __execlists_dequeue(struct intel_engine_cs *engine)
+static void __execlists_dequeue(struct intel_engine_cs *engine)
 {
 	struct intel_engine_execlists * const execlists = &engine->execlists;
 	struct execlist_port *port = execlists->port;
@@ -622,11 +622,11 @@ static bool __execlists_dequeue(struct intel_engine_cs *engine)
 		 * the HW to indicate that it has had a chance to respond.
 		 */
 		if (!execlists_is_active(execlists, EXECLISTS_ACTIVE_HWACK))
-			return false;
+			return;
 
 		if (need_preempt(engine, last, execlists->queue_priority)) {
 			inject_preempt_context(engine);
-			return false;
+			return;
 		}
 
 		/*
@@ -651,7 +651,7 @@ static bool __execlists_dequeue(struct intel_engine_cs *engine)
 		 * priorities of the ports haven't been switch.
 		 */
 		if (port_count(&port[1]))
-			return false;
+			return;
 
 		/*
 		 * WaIdleLiteRestore:bdw,skl
@@ -751,8 +751,10 @@ static bool __execlists_dequeue(struct intel_engine_cs *engine)
 		port != execlists->port ? rq_prio(last) : INT_MIN;
 
 	execlists->first = rb;
-	if (submit)
+	if (submit) {
 		port_assign(port, last);
+		execlists_submit_ports(engine);
+	}
 
 	/* We must always keep the beast fed if we have work piled up */
 	GEM_BUG_ON(execlists->first && !port_isset(execlists->port));
@@ -761,24 +763,19 @@ static bool __execlists_dequeue(struct intel_engine_cs *engine)
 	if (last)
 		execlists_user_begin(execlists, execlists->port);
 
-	return submit;
+	/* If the engine is now idle, so should be the flag; and vice versa. */
+	GEM_BUG_ON(execlists_is_active(&engine->execlists,
+				       EXECLISTS_ACTIVE_USER) ==
+		   !port_isset(engine->execlists.port));
 }
 
 static void execlists_dequeue(struct intel_engine_cs *engine)
 {
-	struct intel_engine_execlists * const execlists = &engine->execlists;
 	unsigned long flags;
-	bool submit;
 
 	spin_lock_irqsave(&engine->timeline.lock, flags);
-	submit = __execlists_dequeue(engine);
+	__execlists_dequeue(engine);
 	spin_unlock_irqrestore(&engine->timeline.lock, flags);
-
-	if (submit)
-		execlists_submit_ports(engine);
-
-	GEM_BUG_ON(port_isset(execlists->port) &&
-		   !execlists_is_active(execlists, EXECLISTS_ACTIVE_USER));
 }
 
 void
@@ -1161,11 +1158,6 @@ static void execlists_submission_tasklet(unsigned long data)
 
 	if (!execlists_is_active(&engine->execlists, EXECLISTS_ACTIVE_PREEMPT))
 		execlists_dequeue(engine);
-
-	/* If the engine is now idle, so should be the flag; and vice versa. */
-	GEM_BUG_ON(execlists_is_active(&engine->execlists,
-				       EXECLISTS_ACTIVE_USER) ==
-		   !port_isset(engine->execlists.port));
 }
 
 static void queue_request(struct intel_engine_cs *engine,
-- 
2.18.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 34+ messages in thread

* [PATCH 3/9] drm/i915/execlists: Pull CSB reset under the timeline.lock
  2018-06-27 21:07 [PATCH 1/9] drm/i915: Drop posting reads to flush master interrupts Chris Wilson
  2018-06-27 21:07 ` [PATCH 2/9] drm/i915/execlists: Pull submit after dequeue under timeline lock Chris Wilson
@ 2018-06-27 21:07 ` Chris Wilson
  2018-06-27 21:07 ` [PATCH 4/9] drm/i915/execlists: Process one CSB update at a time Chris Wilson
                   ` (12 subsequent siblings)
  14 siblings, 0 replies; 34+ messages in thread
From: Chris Wilson @ 2018-06-27 21:07 UTC (permalink / raw)
  To: intel-gfx

In the following patch, we will process the CSB events under the
timeline.lock and not serialised by the tasklet. This also means that we
will need to protect access to common variables such as
execlists->csb_head with the timeline.lock during reset.

v2: Move sync_irq to avoid deadlocks between taking timeline.lock from
our interrupt handler.
v3: Kill off the synchronize_hardirq as it raises more questions than
answered; now we use the timeline.lock entirely for CSB serialisation
between the irq and elsewhere, we don't need to be so heavy handed with
flushing
v4: Treat request cancellation (wedging after failed reset) similarly

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 drivers/gpu/drm/i915/intel_lrc.c | 16 ++++------------
 1 file changed, 4 insertions(+), 12 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 009db92b67d7..4b31e8f42aeb 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -871,7 +871,6 @@ static void reset_irq(struct intel_engine_cs *engine)
 {
 	/* Mark all CS interrupts as complete */
 	smp_store_mb(engine->execlists.active, 0);
-	synchronize_hardirq(engine->i915->drm.irq);
 
 	clear_gtiir(engine);
 
@@ -908,14 +907,12 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine)
 	 * submission's irq state, we also wish to remind ourselves that
 	 * it is irq state.)
 	 */
-	local_irq_save(flags);
+	spin_lock_irqsave(&engine->timeline.lock, flags);
 
 	/* Cancel the requests on the HW and clear the ELSP tracker. */
 	execlists_cancel_port_requests(execlists);
 	reset_irq(engine);
 
-	spin_lock(&engine->timeline.lock);
-
 	/* Mark all executing requests as skipped. */
 	list_for_each_entry(rq, &engine->timeline.requests, link) {
 		GEM_BUG_ON(!rq->global_seqno);
@@ -949,9 +946,7 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine)
 	execlists->first = NULL;
 	GEM_BUG_ON(port_isset(execlists->port));
 
-	spin_unlock(&engine->timeline.lock);
-
-	local_irq_restore(flags);
+	spin_unlock_irqrestore(&engine->timeline.lock, flags);
 }
 
 static void process_csb(struct intel_engine_cs *engine)
@@ -1969,8 +1964,7 @@ static void execlists_reset(struct intel_engine_cs *engine,
 		  engine->name, request ? request->global_seqno : 0,
 		  intel_engine_get_seqno(engine));
 
-	/* See execlists_cancel_requests() for the irq/spinlock split. */
-	local_irq_save(flags);
+	spin_lock_irqsave(&engine->timeline.lock, flags);
 
 	/*
 	 * Catch up with any missed context-switch interrupts.
@@ -1985,14 +1979,12 @@ static void execlists_reset(struct intel_engine_cs *engine,
 	reset_irq(engine);
 
 	/* Push back any incomplete requests for replay after the reset. */
-	spin_lock(&engine->timeline.lock);
 	__unwind_incomplete_requests(engine);
-	spin_unlock(&engine->timeline.lock);
 
 	/* Following the reset, we need to reload the CSB read/write pointers */
 	engine->execlists.csb_head = GEN8_CSB_ENTRIES - 1;
 
-	local_irq_restore(flags);
+	spin_unlock_irqrestore(&engine->timeline.lock, flags);
 
 	/*
 	 * If the request was innocent, we leave the request in the ELSP
-- 
2.18.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 34+ messages in thread

* [PATCH 4/9] drm/i915/execlists: Process one CSB update at a time
  2018-06-27 21:07 [PATCH 1/9] drm/i915: Drop posting reads to flush master interrupts Chris Wilson
  2018-06-27 21:07 ` [PATCH 2/9] drm/i915/execlists: Pull submit after dequeue under timeline lock Chris Wilson
  2018-06-27 21:07 ` [PATCH 3/9] drm/i915/execlists: Pull CSB reset under the timeline.lock Chris Wilson
@ 2018-06-27 21:07 ` Chris Wilson
  2018-06-27 21:07 ` [PATCH 5/9] drm/i915/execlists: Unify CSB access pointers Chris Wilson
                   ` (11 subsequent siblings)
  14 siblings, 0 replies; 34+ messages in thread
From: Chris Wilson @ 2018-06-27 21:07 UTC (permalink / raw)
  To: intel-gfx

In the next patch, we will process the CSB events directly from the
submission path, rather than only after a CS interrupt. Hence, we will
no longer have the need for a loop until the has-interrupt bit is clear,
and in the meantime can remove that small optimisation.

v2: Tvrtko pointed out it was safer to unconditionally kick the tasklet
after each irq, when assuming that the tasklet is called for each irq.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 drivers/gpu/drm/i915/i915_irq.c  |   7 +-
 drivers/gpu/drm/i915/intel_lrc.c | 278 +++++++++++++++----------------
 2 files changed, 141 insertions(+), 144 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index 0bd8b4df1bff..3702992f9f75 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -1473,9 +1473,10 @@ gen8_cs_irq_handler(struct intel_engine_cs *engine, u32 iir)
 	bool tasklet = false;
 
 	if (iir & GT_CONTEXT_SWITCH_INTERRUPT) {
-		if (READ_ONCE(engine->execlists.active))
-			tasklet = !test_and_set_bit(ENGINE_IRQ_EXECLIST,
-						    &engine->irq_posted);
+		if (READ_ONCE(engine->execlists.active)) {
+			set_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted);
+			tasklet = true;
+		}
 	}
 
 	if (iir & GT_RENDER_USER_INTERRUPT) {
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 4b31e8f42aeb..91656eb2f2db 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -954,166 +954,162 @@ static void process_csb(struct intel_engine_cs *engine)
 	struct intel_engine_execlists * const execlists = &engine->execlists;
 	struct execlist_port *port = execlists->port;
 	struct drm_i915_private *i915 = engine->i915;
+
+	/* The HWSP contains a (cacheable) mirror of the CSB */
+	const u32 *buf =
+		&engine->status_page.page_addr[I915_HWS_CSB_BUF0_INDEX];
+	unsigned int head, tail;
 	bool fw = false;
 
-	do {
-		/* The HWSP contains a (cacheable) mirror of the CSB */
-		const u32 *buf =
-			&engine->status_page.page_addr[I915_HWS_CSB_BUF0_INDEX];
-		unsigned int head, tail;
-
-		/* Clear before reading to catch new interrupts */
-		clear_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted);
-		smp_mb__after_atomic();
-
-		if (unlikely(execlists->csb_use_mmio)) {
-			if (!fw) {
-				intel_uncore_forcewake_get(i915, execlists->fw_domains);
-				fw = true;
-			}
+	/* Clear before reading to catch new interrupts */
+	clear_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted);
+	smp_mb__after_atomic();
 
-			buf = (u32 * __force)
-				(i915->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_BUF_LO(engine, 0)));
+	if (unlikely(execlists->csb_use_mmio)) {
+		intel_uncore_forcewake_get(i915, execlists->fw_domains);
+		fw = true;
 
-			head = readl(i915->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)));
-			tail = GEN8_CSB_WRITE_PTR(head);
-			head = GEN8_CSB_READ_PTR(head);
-			execlists->csb_head = head;
-		} else {
-			const int write_idx =
-				intel_hws_csb_write_index(i915) -
-				I915_HWS_CSB_BUF0_INDEX;
+		buf = (u32 * __force)
+			(i915->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_BUF_LO(engine, 0)));
 
-			head = execlists->csb_head;
-			tail = READ_ONCE(buf[write_idx]);
-			rmb(); /* Hopefully paired with a wmb() in HW */
-		}
-		GEM_TRACE("%s cs-irq head=%d [%d%s], tail=%d [%d%s]\n",
-			  engine->name,
-			  head, GEN8_CSB_READ_PTR(readl(i915->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)))), fw ? "" : "?",
-			  tail, GEN8_CSB_WRITE_PTR(readl(i915->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)))), fw ? "" : "?");
+		head = readl(i915->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)));
+		tail = GEN8_CSB_WRITE_PTR(head);
+		head = GEN8_CSB_READ_PTR(head);
+		execlists->csb_head = head;
+	} else {
+		const int write_idx =
+			intel_hws_csb_write_index(i915) -
+			I915_HWS_CSB_BUF0_INDEX;
 
-		while (head != tail) {
-			struct i915_request *rq;
-			unsigned int status;
-			unsigned int count;
+		head = execlists->csb_head;
+		tail = READ_ONCE(buf[write_idx]);
+		rmb(); /* Hopefully paired with a wmb() in HW */
+	}
+	GEM_TRACE("%s cs-irq head=%d [%d%s], tail=%d [%d%s]\n",
+		  engine->name,
+		  head, GEN8_CSB_READ_PTR(readl(i915->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)))), fw ? "" : "?",
+		  tail, GEN8_CSB_WRITE_PTR(readl(i915->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)))), fw ? "" : "?");
 
-			if (++head == GEN8_CSB_ENTRIES)
-				head = 0;
+	while (head != tail) {
+		struct i915_request *rq;
+		unsigned int status;
+		unsigned int count;
 
-			/*
-			 * We are flying near dragons again.
-			 *
-			 * We hold a reference to the request in execlist_port[]
-			 * but no more than that. We are operating in softirq
-			 * context and so cannot hold any mutex or sleep. That
-			 * prevents us stopping the requests we are processing
-			 * in port[] from being retired simultaneously (the
-			 * breadcrumb will be complete before we see the
-			 * context-switch). As we only hold the reference to the
-			 * request, any pointer chasing underneath the request
-			 * is subject to a potential use-after-free. Thus we
-			 * store all of the bookkeeping within port[] as
-			 * required, and avoid using unguarded pointers beneath
-			 * request itself. The same applies to the atomic
-			 * status notifier.
-			 */
+		if (++head == GEN8_CSB_ENTRIES)
+			head = 0;
 
-			status = READ_ONCE(buf[2 * head]); /* maybe mmio! */
-			GEM_TRACE("%s csb[%d]: status=0x%08x:0x%08x, active=0x%x\n",
-				  engine->name, head,
-				  status, buf[2*head + 1],
-				  execlists->active);
-
-			if (status & (GEN8_CTX_STATUS_IDLE_ACTIVE |
-				      GEN8_CTX_STATUS_PREEMPTED))
-				execlists_set_active(execlists,
-						     EXECLISTS_ACTIVE_HWACK);
-			if (status & GEN8_CTX_STATUS_ACTIVE_IDLE)
-				execlists_clear_active(execlists,
-						       EXECLISTS_ACTIVE_HWACK);
-
-			if (!(status & GEN8_CTX_STATUS_COMPLETED_MASK))
-				continue;
+		/*
+		 * We are flying near dragons again.
+		 *
+		 * We hold a reference to the request in execlist_port[]
+		 * but no more than that. We are operating in softirq
+		 * context and so cannot hold any mutex or sleep. That
+		 * prevents us stopping the requests we are processing
+		 * in port[] from being retired simultaneously (the
+		 * breadcrumb will be complete before we see the
+		 * context-switch). As we only hold the reference to the
+		 * request, any pointer chasing underneath the request
+		 * is subject to a potential use-after-free. Thus we
+		 * store all of the bookkeeping within port[] as
+		 * required, and avoid using unguarded pointers beneath
+		 * request itself. The same applies to the atomic
+		 * status notifier.
+		 */
 
-			/* We should never get a COMPLETED | IDLE_ACTIVE! */
-			GEM_BUG_ON(status & GEN8_CTX_STATUS_IDLE_ACTIVE);
+		status = READ_ONCE(buf[2 * head]); /* maybe mmio! */
+		GEM_TRACE("%s csb[%d]: status=0x%08x:0x%08x, active=0x%x\n",
+			  engine->name, head,
+			  status, buf[2*head + 1],
+			  execlists->active);
+
+		if (status & (GEN8_CTX_STATUS_IDLE_ACTIVE |
+			      GEN8_CTX_STATUS_PREEMPTED))
+			execlists_set_active(execlists,
+					     EXECLISTS_ACTIVE_HWACK);
+		if (status & GEN8_CTX_STATUS_ACTIVE_IDLE)
+			execlists_clear_active(execlists,
+					       EXECLISTS_ACTIVE_HWACK);
+
+		if (!(status & GEN8_CTX_STATUS_COMPLETED_MASK))
+			continue;
 
-			if (status & GEN8_CTX_STATUS_COMPLETE &&
-			    buf[2*head + 1] == execlists->preempt_complete_status) {
-				GEM_TRACE("%s preempt-idle\n", engine->name);
-				complete_preempt_context(execlists);
-				continue;
-			}
+		/* We should never get a COMPLETED | IDLE_ACTIVE! */
+		GEM_BUG_ON(status & GEN8_CTX_STATUS_IDLE_ACTIVE);
 
-			if (status & GEN8_CTX_STATUS_PREEMPTED &&
-			    execlists_is_active(execlists,
-						EXECLISTS_ACTIVE_PREEMPT))
-				continue;
+		if (status & GEN8_CTX_STATUS_COMPLETE &&
+		    buf[2*head + 1] == execlists->preempt_complete_status) {
+			GEM_TRACE("%s preempt-idle\n", engine->name);
+			complete_preempt_context(execlists);
+			continue;
+		}
 
-			GEM_BUG_ON(!execlists_is_active(execlists,
-							EXECLISTS_ACTIVE_USER));
+		if (status & GEN8_CTX_STATUS_PREEMPTED &&
+		    execlists_is_active(execlists,
+					EXECLISTS_ACTIVE_PREEMPT))
+			continue;
 
-			rq = port_unpack(port, &count);
-			GEM_TRACE("%s out[0]: ctx=%d.%d, global=%d (fence %llx:%d) (current %d), prio=%d\n",
-				  engine->name,
-				  port->context_id, count,
-				  rq ? rq->global_seqno : 0,
-				  rq ? rq->fence.context : 0,
-				  rq ? rq->fence.seqno : 0,
-				  intel_engine_get_seqno(engine),
-				  rq ? rq_prio(rq) : 0);
+		GEM_BUG_ON(!execlists_is_active(execlists,
+						EXECLISTS_ACTIVE_USER));
 
-			/* Check the context/desc id for this event matches */
-			GEM_DEBUG_BUG_ON(buf[2 * head + 1] != port->context_id);
+		rq = port_unpack(port, &count);
+		GEM_TRACE("%s out[0]: ctx=%d.%d, global=%d (fence %llx:%d) (current %d), prio=%d\n",
+			  engine->name,
+			  port->context_id, count,
+			  rq ? rq->global_seqno : 0,
+			  rq ? rq->fence.context : 0,
+			  rq ? rq->fence.seqno : 0,
+			  intel_engine_get_seqno(engine),
+			  rq ? rq_prio(rq) : 0);
+
+		/* Check the context/desc id for this event matches */
+		GEM_DEBUG_BUG_ON(buf[2 * head + 1] != port->context_id);
+
+		GEM_BUG_ON(count == 0);
+		if (--count == 0) {
+			/*
+			 * On the final event corresponding to the
+			 * submission of this context, we expect either
+			 * an element-switch event or a completion
+			 * event (and on completion, the active-idle
+			 * marker). No more preemptions, lite-restore
+			 * or otherwise.
+			 */
+			GEM_BUG_ON(status & GEN8_CTX_STATUS_PREEMPTED);
+			GEM_BUG_ON(port_isset(&port[1]) &&
+				   !(status & GEN8_CTX_STATUS_ELEMENT_SWITCH));
+			GEM_BUG_ON(!port_isset(&port[1]) &&
+				   !(status & GEN8_CTX_STATUS_ACTIVE_IDLE));
 
-			GEM_BUG_ON(count == 0);
-			if (--count == 0) {
-				/*
-				 * On the final event corresponding to the
-				 * submission of this context, we expect either
-				 * an element-switch event or a completion
-				 * event (and on completion, the active-idle
-				 * marker). No more preemptions, lite-restore
-				 * or otherwise.
-				 */
-				GEM_BUG_ON(status & GEN8_CTX_STATUS_PREEMPTED);
-				GEM_BUG_ON(port_isset(&port[1]) &&
-					   !(status & GEN8_CTX_STATUS_ELEMENT_SWITCH));
-				GEM_BUG_ON(!port_isset(&port[1]) &&
-					   !(status & GEN8_CTX_STATUS_ACTIVE_IDLE));
+			/*
+			 * We rely on the hardware being strongly
+			 * ordered, that the breadcrumb write is
+			 * coherent (visible from the CPU) before the
+			 * user interrupt and CSB is processed.
+			 */
+			GEM_BUG_ON(!i915_request_completed(rq));
 
-				/*
-				 * We rely on the hardware being strongly
-				 * ordered, that the breadcrumb write is
-				 * coherent (visible from the CPU) before the
-				 * user interrupt and CSB is processed.
-				 */
-				GEM_BUG_ON(!i915_request_completed(rq));
-
-				execlists_context_schedule_out(rq,
-							       INTEL_CONTEXT_SCHEDULE_OUT);
-				i915_request_put(rq);
-
-				GEM_TRACE("%s completed ctx=%d\n",
-					  engine->name, port->context_id);
-
-				port = execlists_port_complete(execlists, port);
-				if (port_isset(port))
-					execlists_user_begin(execlists, port);
-				else
-					execlists_user_end(execlists);
-			} else {
-				port_set(port, port_pack(rq, count));
-			}
-		}
+			execlists_context_schedule_out(rq,
+						       INTEL_CONTEXT_SCHEDULE_OUT);
+			i915_request_put(rq);
 
-		if (head != execlists->csb_head) {
-			execlists->csb_head = head;
-			writel(_MASKED_FIELD(GEN8_CSB_READ_PTR_MASK, head << 8),
-			       i915->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)));
+			GEM_TRACE("%s completed ctx=%d\n",
+				  engine->name, port->context_id);
+
+			port = execlists_port_complete(execlists, port);
+			if (port_isset(port))
+				execlists_user_begin(execlists, port);
+			else
+				execlists_user_end(execlists);
+		} else {
+			port_set(port, port_pack(rq, count));
 		}
-	} while (test_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted));
+	}
+
+	if (head != execlists->csb_head) {
+		execlists->csb_head = head;
+		writel(_MASKED_FIELD(GEN8_CSB_READ_PTR_MASK, head << 8),
+		       i915->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)));
+	}
 
 	if (unlikely(fw))
 		intel_uncore_forcewake_put(i915, execlists->fw_domains);
-- 
2.18.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 34+ messages in thread

* [PATCH 5/9] drm/i915/execlists: Unify CSB access pointers
  2018-06-27 21:07 [PATCH 1/9] drm/i915: Drop posting reads to flush master interrupts Chris Wilson
                   ` (2 preceding siblings ...)
  2018-06-27 21:07 ` [PATCH 4/9] drm/i915/execlists: Process one CSB update at a time Chris Wilson
@ 2018-06-27 21:07 ` Chris Wilson
  2018-06-28 10:02   ` Tvrtko Ursulin
  2018-06-28 11:13   ` [PATCH v2] " Chris Wilson
  2018-06-27 21:07 ` [PATCH 6/9] drm/i915/execlists: Reset CSB write pointer after reset Chris Wilson
                   ` (10 subsequent siblings)
  14 siblings, 2 replies; 34+ messages in thread
From: Chris Wilson @ 2018-06-27 21:07 UTC (permalink / raw)
  To: intel-gfx

Following the removal of the last workarounds, the only CSB mmio access
is for the old vGPU interface. The mmio registers presented by vGPU do
not require forcewake and can be treated as ordinary volatile memory,
i.e. they behave just like the HWSP access just at a different location.
We can reduce the CSB access to a set of read/write/buffer pointers and
treat the various paths identically and not worry about forcewake.
(Forcewake is nightmare for worstcase latency, and we want to process
this all with irqsoff -- no latency allowed!)

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/intel_engine_cs.c  |  12 ---
 drivers/gpu/drm/i915/intel_lrc.c        | 116 ++++++++++--------------
 drivers/gpu/drm/i915/intel_ringbuffer.h |  23 +++--
 3 files changed, 65 insertions(+), 86 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c
index d3264bd6e9dc..7209c22798e6 100644
--- a/drivers/gpu/drm/i915/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/intel_engine_cs.c
@@ -25,7 +25,6 @@
 #include <drm/drm_print.h>
 
 #include "i915_drv.h"
-#include "i915_vgpu.h"
 #include "intel_ringbuffer.h"
 #include "intel_lrc.h"
 
@@ -456,21 +455,10 @@ static void intel_engine_init_batch_pool(struct intel_engine_cs *engine)
 	i915_gem_batch_pool_init(&engine->batch_pool, engine);
 }
 
-static bool csb_force_mmio(struct drm_i915_private *i915)
-{
-	/* Older GVT emulation depends upon intercepting CSB mmio */
-	if (intel_vgpu_active(i915) && !intel_vgpu_has_hwsp_emulation(i915))
-		return true;
-
-	return false;
-}
-
 static void intel_engine_init_execlist(struct intel_engine_cs *engine)
 {
 	struct intel_engine_execlists * const execlists = &engine->execlists;
 
-	execlists->csb_use_mmio = csb_force_mmio(engine->i915);
-
 	execlists->port_mask = 1;
 	BUILD_BUG_ON_NOT_POWER_OF_2(execlists_num_ports(execlists));
 	GEM_BUG_ON(execlists_num_ports(execlists) > EXECLIST_MAX_PORTS);
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 91656eb2f2db..368a8c74d11d 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -137,6 +137,7 @@
 #include <drm/i915_drm.h>
 #include "i915_drv.h"
 #include "i915_gem_render_state.h"
+#include "i915_vgpu.h"
 #include "intel_lrc_reg.h"
 #include "intel_mocs.h"
 #include "intel_workarounds.h"
@@ -953,44 +954,23 @@ static void process_csb(struct intel_engine_cs *engine)
 {
 	struct intel_engine_execlists * const execlists = &engine->execlists;
 	struct execlist_port *port = execlists->port;
-	struct drm_i915_private *i915 = engine->i915;
-
-	/* The HWSP contains a (cacheable) mirror of the CSB */
-	const u32 *buf =
-		&engine->status_page.page_addr[I915_HWS_CSB_BUF0_INDEX];
-	unsigned int head, tail;
-	bool fw = false;
+	const u32 * const buf = execlists->csb_status;
+	u8 head, tail;
 
 	/* Clear before reading to catch new interrupts */
 	clear_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted);
 	smp_mb__after_atomic();
 
-	if (unlikely(execlists->csb_use_mmio)) {
-		intel_uncore_forcewake_get(i915, execlists->fw_domains);
-		fw = true;
-
-		buf = (u32 * __force)
-			(i915->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_BUF_LO(engine, 0)));
+	/* Note that csb_write, csb_status may be either in HWSP or mmio */
+	head = execlists->csb_head;
+	tail = READ_ONCE(*execlists->csb_write);
+	GEM_TRACE("%s cs-irq head=%d, tail=%d\n", engine->name, head, tail);
+	if (unlikely(head == tail))
+		return;
 
-		head = readl(i915->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)));
-		tail = GEN8_CSB_WRITE_PTR(head);
-		head = GEN8_CSB_READ_PTR(head);
-		execlists->csb_head = head;
-	} else {
-		const int write_idx =
-			intel_hws_csb_write_index(i915) -
-			I915_HWS_CSB_BUF0_INDEX;
+	rmb(); /* Hopefully paired with a wmb() in HW */
 
-		head = execlists->csb_head;
-		tail = READ_ONCE(buf[write_idx]);
-		rmb(); /* Hopefully paired with a wmb() in HW */
-	}
-	GEM_TRACE("%s cs-irq head=%d [%d%s], tail=%d [%d%s]\n",
-		  engine->name,
-		  head, GEN8_CSB_READ_PTR(readl(i915->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)))), fw ? "" : "?",
-		  tail, GEN8_CSB_WRITE_PTR(readl(i915->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)))), fw ? "" : "?");
-
-	while (head != tail) {
+	do {
 		struct i915_request *rq;
 		unsigned int status;
 		unsigned int count;
@@ -1016,12 +996,12 @@ static void process_csb(struct intel_engine_cs *engine)
 		 * status notifier.
 		 */
 
-		status = READ_ONCE(buf[2 * head]); /* maybe mmio! */
 		GEM_TRACE("%s csb[%d]: status=0x%08x:0x%08x, active=0x%x\n",
 			  engine->name, head,
-			  status, buf[2*head + 1],
+			  buf[2 * head + 0], buf[2 * head + 1],
 			  execlists->active);
 
+		status = buf[2 * head];
 		if (status & (GEN8_CTX_STATUS_IDLE_ACTIVE |
 			      GEN8_CTX_STATUS_PREEMPTED))
 			execlists_set_active(execlists,
@@ -1103,16 +1083,11 @@ static void process_csb(struct intel_engine_cs *engine)
 		} else {
 			port_set(port, port_pack(rq, count));
 		}
-	}
+	} while (head != tail);
 
-	if (head != execlists->csb_head) {
-		execlists->csb_head = head;
-		writel(_MASKED_FIELD(GEN8_CSB_READ_PTR_MASK, head << 8),
-		       i915->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)));
-	}
-
-	if (unlikely(fw))
-		intel_uncore_forcewake_put(i915, execlists->fw_domains);
+	writel(_MASKED_FIELD(GEN8_CSB_READ_PTR_MASK, head << 8),
+	       execlists->csb_read);
+	execlists->csb_head = head;
 }
 
 /*
@@ -2430,28 +2405,11 @@ logical_ring_default_irqs(struct intel_engine_cs *engine)
 static void
 logical_ring_setup(struct intel_engine_cs *engine)
 {
-	struct drm_i915_private *dev_priv = engine->i915;
-	enum forcewake_domains fw_domains;
-
 	intel_engine_setup_common(engine);
 
 	/* Intentionally left blank. */
 	engine->buffer = NULL;
 
-	fw_domains = intel_uncore_forcewake_for_reg(dev_priv,
-						    RING_ELSP(engine),
-						    FW_REG_WRITE);
-
-	fw_domains |= intel_uncore_forcewake_for_reg(dev_priv,
-						     RING_CONTEXT_STATUS_PTR(engine),
-						     FW_REG_READ | FW_REG_WRITE);
-
-	fw_domains |= intel_uncore_forcewake_for_reg(dev_priv,
-						     RING_CONTEXT_STATUS_BUF_BASE(engine),
-						     FW_REG_READ);
-
-	engine->execlists.fw_domains = fw_domains;
-
 	tasklet_init(&engine->execlists.tasklet,
 		     execlists_submission_tasklet, (unsigned long)engine);
 
@@ -2459,34 +2417,56 @@ logical_ring_setup(struct intel_engine_cs *engine)
 	logical_ring_default_irqs(engine);
 }
 
+static bool csb_force_mmio(struct drm_i915_private *i915)
+{
+	/* Older GVT emulation depends upon intercepting CSB mmio */
+	return intel_vgpu_active(i915) && !intel_vgpu_has_hwsp_emulation(i915);
+}
+
 static int logical_ring_init(struct intel_engine_cs *engine)
 {
+	struct drm_i915_private *i915 = engine->i915;
+	struct intel_engine_execlists * const execlists = &engine->execlists;
 	int ret;
 
 	ret = intel_engine_init_common(engine);
 	if (ret)
 		goto error;
 
-	if (HAS_LOGICAL_RING_ELSQ(engine->i915)) {
-		engine->execlists.submit_reg = engine->i915->regs +
+	if (HAS_LOGICAL_RING_ELSQ(i915)) {
+		execlists->submit_reg = i915->regs +
 			i915_mmio_reg_offset(RING_EXECLIST_SQ_CONTENTS(engine));
-		engine->execlists.ctrl_reg = engine->i915->regs +
+		execlists->ctrl_reg = i915->regs +
 			i915_mmio_reg_offset(RING_EXECLIST_CONTROL(engine));
 	} else {
-		engine->execlists.submit_reg = engine->i915->regs +
+		execlists->submit_reg = i915->regs +
 			i915_mmio_reg_offset(RING_ELSP(engine));
 	}
 
-	engine->execlists.preempt_complete_status = ~0u;
-	if (engine->i915->preempt_context) {
+	execlists->preempt_complete_status = ~0u;
+	if (i915->preempt_context) {
 		struct intel_context *ce =
-			to_intel_context(engine->i915->preempt_context, engine);
+			to_intel_context(i915->preempt_context, engine);
 
-		engine->execlists.preempt_complete_status =
+		execlists->preempt_complete_status =
 			upper_32_bits(ce->lrc_desc);
 	}
 
-	engine->execlists.csb_head = GEN8_CSB_ENTRIES - 1;
+	execlists->csb_head = GEN8_CSB_ENTRIES - 1;
+	execlists->csb_read =
+		i915->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine));
+	if (csb_force_mmio(i915)) {
+		execlists->csb_status = (u32 __force *)
+			(i915->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_BUF_LO(engine, 0)));
+
+		execlists->csb_write = (u32 __force *)execlists->csb_read;
+	} else {
+		execlists->csb_status =
+			&engine->status_page.page_addr[I915_HWS_CSB_BUF0_INDEX];
+
+		execlists->csb_write =
+			&engine->status_page.page_addr[intel_hws_csb_write_index(i915)];
+	}
 
 	return 0;
 
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index a0bc7a8222b4..5b92c5f03e1d 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -300,19 +300,30 @@ struct intel_engine_execlists {
 	struct rb_node *first;
 
 	/**
-	 * @fw_domains: forcewake domains for irq tasklet
+	 * @csb_head: context status buffer head
 	 */
-	unsigned int fw_domains;
+	unsigned int csb_head;
 
 	/**
-	 * @csb_head: context status buffer head
+	 * @csb_read: control register for Context Switch buffer
+	 *
+	 * Note this register is always in mmio.
 	 */
-	unsigned int csb_head;
+	u32 __iomem *csb_read;
 
 	/**
-	 * @csb_use_mmio: access csb through mmio, instead of hwsp
+	 * @csb_write: control register for Context Switch buffer
+	 *
+	 * Note this register may be either mmio or HWSP shadow.
+	 */
+	u32 *csb_write;
+
+	/**
+	 * @csb_status: status array for Context Switch buffer
+	 *
+	 * Note these register may be either mmio or HWSP shadow.
 	 */
-	bool csb_use_mmio;
+	u32 *csb_status;
 
 	/**
 	 * @preempt_complete_status: expected CSB upon completing preemption
-- 
2.18.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 34+ messages in thread

* [PATCH 6/9] drm/i915/execlists: Reset CSB write pointer after reset
  2018-06-27 21:07 [PATCH 1/9] drm/i915: Drop posting reads to flush master interrupts Chris Wilson
                   ` (3 preceding siblings ...)
  2018-06-27 21:07 ` [PATCH 5/9] drm/i915/execlists: Unify CSB access pointers Chris Wilson
@ 2018-06-27 21:07 ` Chris Wilson
  2018-06-28 10:06   ` Tvrtko Ursulin
                     ` (2 more replies)
  2018-06-27 21:07 ` [PATCH 7/9] drm/i915/execlists: Stop storing the CSB read pointer in the mmio register Chris Wilson
                   ` (9 subsequent siblings)
  14 siblings, 3 replies; 34+ messages in thread
From: Chris Wilson @ 2018-06-27 21:07 UTC (permalink / raw)
  To: intel-gfx

On HW reset, the HW clears the write pointer (to 0). But since it also
writes its first CSB entry to slot 0, we need to reset the write pointer
back to the element before (so the first entry we read is 0).

This is required for the next patch, where we trust the CSB completely!

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 drivers/gpu/drm/i915/intel_lrc.c | 19 +++++++++++++++++--
 1 file changed, 17 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 368a8c74d11d..8b111a268697 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -884,6 +884,21 @@ static void reset_irq(struct intel_engine_cs *engine)
 	clear_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted);
 }
 
+static void reset_csb_pointers(struct intel_engine_execlists *execlists)
+{
+	/*
+	 * After a reset, the HW starts writing into CSB entry [0]. We
+	 * therefore have to set our HEAD pointer back one entry so that
+	 * the *first* entry we check is entry 0. To complicate this further,
+	 * as we don't wait for the first interrupt after reset, we have to
+	 * fake the HW write to point back to the last entry so that our
+	 * inline comparison of our cached head position against the last HW
+	 * write works even before the first interrupt.
+	 */
+	execlists->csb_head = GEN8_CSB_ENTRIES - 1;
+	WRITE_ONCE(*execlists->csb_write, (GEN8_CSB_ENTRIES - 1) | 0xff << 16);
+}
+
 static void execlists_cancel_requests(struct intel_engine_cs *engine)
 {
 	struct intel_engine_execlists * const execlists = &engine->execlists;
@@ -1953,7 +1968,7 @@ static void execlists_reset(struct intel_engine_cs *engine,
 	__unwind_incomplete_requests(engine);
 
 	/* Following the reset, we need to reload the CSB read/write pointers */
-	engine->execlists.csb_head = GEN8_CSB_ENTRIES - 1;
+	reset_csb_pointers(&engine->execlists);
 
 	spin_unlock_irqrestore(&engine->timeline.lock, flags);
 
@@ -2452,7 +2467,6 @@ static int logical_ring_init(struct intel_engine_cs *engine)
 			upper_32_bits(ce->lrc_desc);
 	}
 
-	execlists->csb_head = GEN8_CSB_ENTRIES - 1;
 	execlists->csb_read =
 		i915->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine));
 	if (csb_force_mmio(i915)) {
@@ -2467,6 +2481,7 @@ static int logical_ring_init(struct intel_engine_cs *engine)
 		execlists->csb_write =
 			&engine->status_page.page_addr[intel_hws_csb_write_index(i915)];
 	}
+	reset_csb_pointers(execlists);
 
 	return 0;
 
-- 
2.18.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 34+ messages in thread

* [PATCH 7/9] drm/i915/execlists: Stop storing the CSB read pointer in the mmio register
  2018-06-27 21:07 [PATCH 1/9] drm/i915: Drop posting reads to flush master interrupts Chris Wilson
                   ` (4 preceding siblings ...)
  2018-06-27 21:07 ` [PATCH 6/9] drm/i915/execlists: Reset CSB write pointer after reset Chris Wilson
@ 2018-06-27 21:07 ` Chris Wilson
  2018-06-28 10:07   ` Tvrtko Ursulin
  2018-06-27 21:07 ` [PATCH 8/9] drm/i915/execlists: Trust the CSB Chris Wilson
                   ` (8 subsequent siblings)
  14 siblings, 1 reply; 34+ messages in thread
From: Chris Wilson @ 2018-06-27 21:07 UTC (permalink / raw)
  To: intel-gfx

As we now never read back our current head position from the CSB
pointers register, and the HW itself doesn't use it to prevent
overwriting unread CSB entries, we do not need to keep updating the
register. As it turns out this register is not listed as being shadowed,
and so requires forcewake -- but we haven't been taking forcewake around
it so the writes has probably been regularly dropped. Fortuitously, we
only read the value after a reset where it did not matter, and zero was
the right answer (well, close enough).

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
---
 drivers/gpu/drm/i915/intel_lrc.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 8b111a268697..a6268103663f 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -1100,8 +1100,6 @@ static void process_csb(struct intel_engine_cs *engine)
 		}
 	} while (head != tail);
 
-	writel(_MASKED_FIELD(GEN8_CSB_READ_PTR_MASK, head << 8),
-	       execlists->csb_read);
 	execlists->csb_head = head;
 }
 
-- 
2.18.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 34+ messages in thread

* [PATCH 8/9] drm/i915/execlists: Trust the CSB
  2018-06-27 21:07 [PATCH 1/9] drm/i915: Drop posting reads to flush master interrupts Chris Wilson
                   ` (5 preceding siblings ...)
  2018-06-27 21:07 ` [PATCH 7/9] drm/i915/execlists: Stop storing the CSB read pointer in the mmio register Chris Wilson
@ 2018-06-27 21:07 ` Chris Wilson
  2018-06-28 11:29   ` Tvrtko Ursulin
  2018-06-27 21:07 ` [PATCH 9/9] drm/i915/execlists: Direct submission of new requests (avoid tasklet/ksoftirqd) Chris Wilson
                   ` (7 subsequent siblings)
  14 siblings, 1 reply; 34+ messages in thread
From: Chris Wilson @ 2018-06-27 21:07 UTC (permalink / raw)
  To: intel-gfx

Now that we use the CSB stored in the CPU friendly HWSP, we do not need
to track interrupts for when the mmio CSB registers are valid and can
just check where we read up to last from the cached HWSP. This means we
can forgo the atomic bit tracking from interrupt, and in the next patch
it means we can check the CSB at any time.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_irq.c         | 11 ++-----
 drivers/gpu/drm/i915/intel_engine_cs.c  |  8 ++----
 drivers/gpu/drm/i915/intel_lrc.c        | 38 ++++++-------------------
 drivers/gpu/drm/i915/intel_ringbuffer.h |  1 -
 4 files changed, 14 insertions(+), 44 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index 3702992f9f75..44fb11ca3cab 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -1469,15 +1469,10 @@ static void snb_gt_irq_handler(struct drm_i915_private *dev_priv,
 static void
 gen8_cs_irq_handler(struct intel_engine_cs *engine, u32 iir)
 {
-	struct intel_engine_execlists * const execlists = &engine->execlists;
 	bool tasklet = false;
 
-	if (iir & GT_CONTEXT_SWITCH_INTERRUPT) {
-		if (READ_ONCE(engine->execlists.active)) {
-			set_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted);
-			tasklet = true;
-		}
-	}
+	if (iir & GT_CONTEXT_SWITCH_INTERRUPT)
+		tasklet = true;
 
 	if (iir & GT_RENDER_USER_INTERRUPT) {
 		notify_ring(engine);
@@ -1485,7 +1480,7 @@ gen8_cs_irq_handler(struct intel_engine_cs *engine, u32 iir)
 	}
 
 	if (tasklet)
-		tasklet_hi_schedule(&execlists->tasklet);
+		tasklet_hi_schedule(&engine->execlists.tasklet);
 }
 
 static void gen8_gt_irq_ack(struct drm_i915_private *i915,
diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c
index 7209c22798e6..ace93958689e 100644
--- a/drivers/gpu/drm/i915/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/intel_engine_cs.c
@@ -1353,12 +1353,10 @@ static void intel_engine_print_registers(const struct intel_engine_cs *engine,
 		ptr = I915_READ(RING_CONTEXT_STATUS_PTR(engine));
 		read = GEN8_CSB_READ_PTR(ptr);
 		write = GEN8_CSB_WRITE_PTR(ptr);
-		drm_printf(m, "\tExeclist CSB read %d [%d cached], write %d [%d from hws], interrupt posted? %s, tasklet queued? %s (%s)\n",
+		drm_printf(m, "\tExeclist CSB read %d [%d cached], write %d [%d from hws], tasklet queued? %s (%s)\n",
 			   read, execlists->csb_head,
 			   write,
 			   intel_read_status_page(engine, intel_hws_csb_write_index(engine->i915)),
-			   yesno(test_bit(ENGINE_IRQ_EXECLIST,
-					  &engine->irq_posted)),
 			   yesno(test_bit(TASKLET_STATE_SCHED,
 					  &engine->execlists.tasklet.state)),
 			   enableddisabled(!atomic_read(&engine->execlists.tasklet.count)));
@@ -1570,11 +1568,9 @@ void intel_engine_dump(struct intel_engine_cs *engine,
 	spin_unlock(&b->rb_lock);
 	local_irq_restore(flags);
 
-	drm_printf(m, "IRQ? 0x%lx (breadcrumbs? %s) (execlists? %s)\n",
+	drm_printf(m, "IRQ? 0x%lx (breadcrumbs? %s)\n",
 		   engine->irq_posted,
 		   yesno(test_bit(ENGINE_IRQ_BREADCRUMB,
-				  &engine->irq_posted)),
-		   yesno(test_bit(ENGINE_IRQ_EXECLIST,
 				  &engine->irq_posted)));
 
 	drm_printf(m, "HWSP:\n");
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index a6268103663f..87dd8ee117c8 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -874,14 +874,6 @@ static void reset_irq(struct intel_engine_cs *engine)
 	smp_store_mb(engine->execlists.active, 0);
 
 	clear_gtiir(engine);
-
-	/*
-	 * The port is checked prior to scheduling a tasklet, but
-	 * just in case we have suspended the tasklet to do the
-	 * wedging make sure that when it wakes, it decides there
-	 * is no work to do by clearing the irq_posted bit.
-	 */
-	clear_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted);
 }
 
 static void reset_csb_pointers(struct intel_engine_execlists *execlists)
@@ -972,10 +964,6 @@ static void process_csb(struct intel_engine_cs *engine)
 	const u32 * const buf = execlists->csb_status;
 	u8 head, tail;
 
-	/* Clear before reading to catch new interrupts */
-	clear_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted);
-	smp_mb__after_atomic();
-
 	/* Note that csb_write, csb_status may be either in HWSP or mmio */
 	head = execlists->csb_head;
 	tail = READ_ONCE(*execlists->csb_write);
@@ -1111,11 +1099,10 @@ static void execlists_submission_tasklet(unsigned long data)
 {
 	struct intel_engine_cs * const engine = (struct intel_engine_cs *)data;
 
-	GEM_TRACE("%s awake?=%d, active=%x, irq-posted?=%d\n",
+	GEM_TRACE("%s awake?=%d, active=%x\n",
 		  engine->name,
 		  engine->i915->gt.awake,
-		  engine->execlists.active,
-		  test_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted));
+		  engine->execlists.active);
 
 	/*
 	 * We can skip acquiring intel_runtime_pm_get() here as it was taken
@@ -1127,14 +1114,7 @@ static void execlists_submission_tasklet(unsigned long data)
 	 */
 	GEM_BUG_ON(!engine->i915->gt.awake);
 
-	/*
-	 * Prefer doing test_and_clear_bit() as a two stage operation to avoid
-	 * imposing the cost of a locked atomic transaction when submitting a
-	 * new request (outside of the context-switch interrupt).
-	 */
-	if (test_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted))
-		process_csb(engine);
-
+	process_csb(engine);
 	if (!execlists_is_active(&engine->execlists, EXECLISTS_ACTIVE_PREEMPT))
 		execlists_dequeue(engine);
 }
@@ -1881,6 +1861,7 @@ execlists_reset_prepare(struct intel_engine_cs *engine)
 {
 	struct intel_engine_execlists * const execlists = &engine->execlists;
 	struct i915_request *request, *active;
+	unsigned long flags;
 
 	GEM_TRACE("%s\n", engine->name);
 
@@ -1902,8 +1883,9 @@ execlists_reset_prepare(struct intel_engine_cs *engine)
 	 * and avoid blaming an innocent request if the stall was due to the
 	 * preemption itself.
 	 */
-	if (test_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted))
-		process_csb(engine);
+	spin_lock_irqsave(&engine->timeline.lock, flags);
+
+	process_csb(engine);
 
 	/*
 	 * The last active request can then be no later than the last request
@@ -1913,15 +1895,12 @@ execlists_reset_prepare(struct intel_engine_cs *engine)
 	active = NULL;
 	request = port_request(execlists->port);
 	if (request) {
-		unsigned long flags;
-
 		/*
 		 * Prevent the breadcrumb from advancing before we decide
 		 * which request is currently active.
 		 */
 		intel_engine_stop_cs(engine);
 
-		spin_lock_irqsave(&engine->timeline.lock, flags);
 		list_for_each_entry_from_reverse(request,
 						 &engine->timeline.requests,
 						 link) {
@@ -1931,9 +1910,10 @@ execlists_reset_prepare(struct intel_engine_cs *engine)
 
 			active = request;
 		}
-		spin_unlock_irqrestore(&engine->timeline.lock, flags);
 	}
 
+	spin_unlock_irqrestore(&engine->timeline.lock, flags);
+
 	return active;
 }
 
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index 5b92c5f03e1d..381c243bfc6f 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -359,7 +359,6 @@ struct intel_engine_cs {
 	atomic_t irq_count;
 	unsigned long irq_posted;
 #define ENGINE_IRQ_BREADCRUMB 0
-#define ENGINE_IRQ_EXECLIST 1
 
 	/* Rather than have every client wait upon all user interrupts,
 	 * with the herd waking after every interrupt and each doing the
-- 
2.18.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 34+ messages in thread

* [PATCH 9/9] drm/i915/execlists: Direct submission of new requests (avoid tasklet/ksoftirqd)
  2018-06-27 21:07 [PATCH 1/9] drm/i915: Drop posting reads to flush master interrupts Chris Wilson
                   ` (6 preceding siblings ...)
  2018-06-27 21:07 ` [PATCH 8/9] drm/i915/execlists: Trust the CSB Chris Wilson
@ 2018-06-27 21:07 ` Chris Wilson
  2018-06-27 21:56 ` ✗ Fi.CI.CHECKPATCH: warning for series starting with [1/9] drm/i915: Drop posting reads to flush master interrupts Patchwork
                   ` (6 subsequent siblings)
  14 siblings, 0 replies; 34+ messages in thread
From: Chris Wilson @ 2018-06-27 21:07 UTC (permalink / raw)
  To: intel-gfx

Back in commit 27af5eea54d1 ("drm/i915: Move execlists irq handler to a
bottom half"), we came to the conclusion that running our CSB processing
and ELSP submission from inside the irq handler was a bad idea. A really
bad idea as we could impose nearly 1s latency on other users of the
system, on average! Deferring our work to a tasklet allowed us to do the
processing with irqs enabled, reducing the impact to an average of about
50us.

We have since eradicated the use of forcewaked mmio from inside the CSB
processing and ELSP submission, bringing the impact down to around 5us
(on Kabylake); an order of magnitude better than our measurements 2
years ago on Broadwell and only about 2x worse on average than the
gem_syslatency on an unladen system.

In this iteration of the tasklet-vs-direct submission debate, we seek a
compromise where by we submit new requests immediately to the HW but
defer processing the CS interrupt onto a tasklet. We gain the advantage
of low-latency and ksoftirqd avoidance when waking up the HW, while
avoiding the system-wide starvation of our CS irq-storms.

Comparing the impact on the maximum latency observed (that is the time
stolen from an RT process) over a 120s interval, repeated several times
(using gem_syslatency, similar to RT's cyclictest) while the system is
fully laden with i915 nops, we see that direct submission an actually
improve the worse case.

Maximum latency in microseconds of a third party RT thread
(gem_syslatency -t 120 -f 2)
  x Always using tasklets (a couple of >1000us outliers removed)
  + Only using tasklets from CS irq, direct submission of requests
+------------------------------------------------------------------------+
|          +                                                             |
|          +                                                             |
|          +                                                             |
|          +       +                                                     |
|          + +     +                                                     |
|       +  + +     +  x     x     x                                      |
|      +++ + +     +  x  x  x  x  x  x                                   |
|      +++ + ++  + +  *x x  x  x  x  x                                   |
|      +++ + ++  + *  *x x  *  x  x  x                                   |
|    + +++ + ++  * * +*xxx  *  x  x  xx                                  |
|    * +++ + ++++* *x+**xx+ *  x  x xxxx x                               |
|   **x++++*++**+*x*x****x+ * +x xx xxxx x          x                    |
|x* ******+***************++*+***xxxxxx* xx*x     xxx +                x+|
|             |__________MA___________|                                  |
|      |______M__A________|                                              |
+------------------------------------------------------------------------+
    N           Min           Max        Median           Avg        Stddev
x 118            91           186           124     125.28814     16.279137
+ 120            92           187           109     112.00833     13.458617
Difference at 95.0% confidence
	-13.2798 +/- 3.79219
	-10.5994% +/- 3.02677%
	(Student's t, pooled s = 14.9237)

However the mean latency is adversely affected:

Mean latency in microseconds of a third party RT thread
(gem_syslatency -t 120 -f 1)
  x Always using tasklets
  + Only using tasklets from CS irq, direct submission of requests
+------------------------------------------------------------------------+
|           xxxxxx                                        +   ++         |
|           xxxxxx                                        +   ++         |
|           xxxxxx                                      + +++ ++         |
|           xxxxxxx                                     +++++ ++         |
|           xxxxxxx                                     +++++ ++         |
|           xxxxxxx                                     +++++ +++        |
|           xxxxxxx                                   + ++++++++++       |
|           xxxxxxxx                                 ++ ++++++++++       |
|           xxxxxxxx                                 ++ ++++++++++       |
|          xxxxxxxxxx                                +++++++++++++++     |
|         xxxxxxxxxxx    x                           +++++++++++++++     |
|x       xxxxxxxxxxxxx   x           +            + ++++++++++++++++++  +|
|           |__A__|                                                      |
|                                                      |____A___|        |
+------------------------------------------------------------------------+
    N           Min           Max        Median           Avg        Stddev
x 120         3.506         3.727         3.631     3.6321417    0.02773109
+ 120         3.834         4.149         4.039     4.0375167   0.041221676
Difference at 95.0% confidence
	0.405375 +/- 0.00888913
	11.1608% +/- 0.244735%
	(Student's t, pooled s = 0.03513)

However, since the mean latency corresponds to the amount of irqsoff
processing we have to do for a CS interrupt, we only need to speed that
up to benefit not just system latency but our own throughput.

v2: Remember to defer submissions when under reset.
v4: Only use direct submission for new requests
v5: Be aware that with mixing direct tasklet evaluation and deferred
tasklets, we may end up idling before running the deferred tasklet.
v6: Remove the redudant likely() from tasklet_is_enabled(), restrict the
annotation to reset_in_progress().

Testcase: igt/gem_exec_latency/*rthog*
References: 27af5eea54d1 ("drm/i915: Move execlists irq handler to a bottom half")
Suggested-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 drivers/gpu/drm/i915/i915_gem.h        |  5 ++
 drivers/gpu/drm/i915/intel_engine_cs.c |  1 +
 drivers/gpu/drm/i915/intel_lrc.c       | 93 +++++++++++++++++---------
 3 files changed, 67 insertions(+), 32 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.h b/drivers/gpu/drm/i915/i915_gem.h
index 261da577829a..e46592956872 100644
--- a/drivers/gpu/drm/i915/i915_gem.h
+++ b/drivers/gpu/drm/i915/i915_gem.h
@@ -88,4 +88,9 @@ static inline void __tasklet_enable_sync_once(struct tasklet_struct *t)
 		tasklet_kill(t);
 }
 
+static inline bool __tasklet_is_enabled(const struct tasklet_struct *t)
+{
+	return !atomic_read(&t->count);
+}
+
 #endif /* __I915_GEM_H__ */
diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c
index ace93958689e..9bbdee24e254 100644
--- a/drivers/gpu/drm/i915/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/intel_engine_cs.c
@@ -1646,6 +1646,7 @@ int intel_enable_engine_stats(struct intel_engine_cs *engine)
 unlock:
 	write_sequnlock_irqrestore(&engine->stats.lock, flags);
 	tasklet_enable(&execlists->tasklet);
+	tasklet_hi_schedule(&execlists->tasklet);
 
 	return err;
 }
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 87dd8ee117c8..2c3f64c9a2f1 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -563,12 +563,14 @@ static void complete_preempt_context(struct intel_engine_execlists *execlists)
 	GEM_BUG_ON(!execlists_is_active(execlists, EXECLISTS_ACTIVE_PREEMPT));
 
 	execlists_cancel_port_requests(execlists);
-	execlists_unwind_incomplete_requests(execlists);
+	__unwind_incomplete_requests(container_of(execlists,
+						  struct intel_engine_cs,
+						  execlists));
 
 	execlists_clear_active(execlists, EXECLISTS_ACTIVE_PREEMPT);
 }
 
-static void __execlists_dequeue(struct intel_engine_cs *engine)
+static void execlists_dequeue(struct intel_engine_cs *engine)
 {
 	struct intel_engine_execlists * const execlists = &engine->execlists;
 	struct execlist_port *port = execlists->port;
@@ -578,9 +580,8 @@ static void __execlists_dequeue(struct intel_engine_cs *engine)
 	struct rb_node *rb;
 	bool submit = false;
 
-	lockdep_assert_held(&engine->timeline.lock);
-
-	/* Hardware submission is through 2 ports. Conceptually each port
+	/*
+	 * Hardware submission is through 2 ports. Conceptually each port
 	 * has a (RING_START, RING_HEAD, RING_TAIL) tuple. RING_START is
 	 * static for a context, and unique to each, so we only execute
 	 * requests belonging to a single context from each ring. RING_HEAD
@@ -770,15 +771,6 @@ static void __execlists_dequeue(struct intel_engine_cs *engine)
 		   !port_isset(engine->execlists.port));
 }
 
-static void execlists_dequeue(struct intel_engine_cs *engine)
-{
-	unsigned long flags;
-
-	spin_lock_irqsave(&engine->timeline.lock, flags);
-	__execlists_dequeue(engine);
-	spin_unlock_irqrestore(&engine->timeline.lock, flags);
-}
-
 void
 execlists_cancel_port_requests(struct intel_engine_execlists * const execlists)
 {
@@ -957,6 +949,12 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine)
 	spin_unlock_irqrestore(&engine->timeline.lock, flags);
 }
 
+static inline bool
+reset_in_progress(const struct intel_engine_execlists *execlists)
+{
+	return unlikely(!__tasklet_is_enabled(&execlists->tasklet));
+}
+
 static void process_csb(struct intel_engine_cs *engine)
 {
 	struct intel_engine_execlists * const execlists = &engine->execlists;
@@ -1091,18 +1089,9 @@ static void process_csb(struct intel_engine_cs *engine)
 	execlists->csb_head = head;
 }
 
-/*
- * Check the unread Context Status Buffers and manage the submission of new
- * contexts to the ELSP accordingly.
- */
-static void execlists_submission_tasklet(unsigned long data)
+static void __execlists_submission_tasklet(struct intel_engine_cs *const engine)
 {
-	struct intel_engine_cs * const engine = (struct intel_engine_cs *)data;
-
-	GEM_TRACE("%s awake?=%d, active=%x\n",
-		  engine->name,
-		  engine->i915->gt.awake,
-		  engine->execlists.active);
+	lockdep_assert_held(&engine->timeline.lock);
 
 	/*
 	 * We can skip acquiring intel_runtime_pm_get() here as it was taken
@@ -1119,6 +1108,28 @@ static void execlists_submission_tasklet(unsigned long data)
 		execlists_dequeue(engine);
 }
 
+/*
+ * Check the unread Context Status Buffers and manage the submission of new
+ * contexts to the ELSP accordingly.
+ */
+static void execlists_submission_tasklet(unsigned long data)
+{
+	struct intel_engine_cs * const engine = (struct intel_engine_cs *)data;
+	unsigned long flags;
+
+	GEM_TRACE("%s awake?=%d, active=%x\n",
+		  engine->name,
+		  engine->i915->gt.awake,
+		  engine->execlists.active);
+
+	spin_lock_irqsave(&engine->timeline.lock, flags);
+
+	if (engine->i915->gt.awake) /* we may be delayed until after we idle! */
+		__execlists_submission_tasklet(engine);
+
+	spin_unlock_irqrestore(&engine->timeline.lock, flags);
+}
+
 static void queue_request(struct intel_engine_cs *engine,
 			  struct i915_sched_node *node,
 			  int prio)
@@ -1127,16 +1138,30 @@ static void queue_request(struct intel_engine_cs *engine,
 		      &lookup_priolist(engine, prio)->requests);
 }
 
-static void __submit_queue(struct intel_engine_cs *engine, int prio)
+static void __update_queue(struct intel_engine_cs *engine, int prio)
 {
 	engine->execlists.queue_priority = prio;
-	tasklet_hi_schedule(&engine->execlists.tasklet);
+}
+
+static void __submit_queue_imm(struct intel_engine_cs *engine)
+{
+	struct intel_engine_execlists * const execlists = &engine->execlists;
+
+	if (reset_in_progress(execlists))
+		return; /* defer until we restart the engine following reset */
+
+	if (execlists->tasklet.func == execlists_submission_tasklet)
+		__execlists_submission_tasklet(engine);
+	else
+		tasklet_hi_schedule(&execlists->tasklet);
 }
 
 static void submit_queue(struct intel_engine_cs *engine, int prio)
 {
-	if (prio > engine->execlists.queue_priority)
-		__submit_queue(engine, prio);
+	if (prio > engine->execlists.queue_priority) {
+		__update_queue(engine, prio);
+		__submit_queue_imm(engine);
+	}
 }
 
 static void execlists_submit_request(struct i915_request *request)
@@ -1148,11 +1173,12 @@ static void execlists_submit_request(struct i915_request *request)
 	spin_lock_irqsave(&engine->timeline.lock, flags);
 
 	queue_request(engine, &request->sched, rq_prio(request));
-	submit_queue(engine, rq_prio(request));
 
 	GEM_BUG_ON(!engine->execlists.first);
 	GEM_BUG_ON(list_empty(&request->sched.link));
 
+	submit_queue(engine, rq_prio(request));
+
 	spin_unlock_irqrestore(&engine->timeline.lock, flags);
 }
 
@@ -1279,8 +1305,11 @@ static void execlists_schedule(struct i915_request *request,
 		}
 
 		if (prio > engine->execlists.queue_priority &&
-		    i915_sw_fence_done(&sched_to_request(node)->submit))
-			__submit_queue(engine, prio);
+		    i915_sw_fence_done(&sched_to_request(node)->submit)) {
+			/* defer submission until after all of our updates */
+			__update_queue(engine, prio);
+			tasklet_hi_schedule(&engine->execlists.tasklet);
+		}
 	}
 
 	spin_unlock_irq(&engine->timeline.lock);
-- 
2.18.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 34+ messages in thread

* ✗ Fi.CI.CHECKPATCH: warning for series starting with [1/9] drm/i915: Drop posting reads to flush master interrupts
  2018-06-27 21:07 [PATCH 1/9] drm/i915: Drop posting reads to flush master interrupts Chris Wilson
                   ` (7 preceding siblings ...)
  2018-06-27 21:07 ` [PATCH 9/9] drm/i915/execlists: Direct submission of new requests (avoid tasklet/ksoftirqd) Chris Wilson
@ 2018-06-27 21:56 ` Patchwork
  2018-06-27 22:13 ` ✓ Fi.CI.BAT: success " Patchwork
                   ` (5 subsequent siblings)
  14 siblings, 0 replies; 34+ messages in thread
From: Patchwork @ 2018-06-27 21:56 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

== Series Details ==

Series: series starting with [1/9] drm/i915: Drop posting reads to flush master interrupts
URL   : https://patchwork.freedesktop.org/series/45531/
State : warning

== Summary ==

$ dim checkpatch origin/drm-tip
e95bd2fda39a drm/i915: Drop posting reads to flush master interrupts
79dff3d56d42 drm/i915/execlists: Pull submit after dequeue under timeline lock
03ce62d8f412 drm/i915/execlists: Pull CSB reset under the timeline.lock
bf1d86422d87 drm/i915/execlists: Process one CSB update at a time
-:68: WARNING:MEMORY_BARRIER: memory barrier without comment
#68: FILE: drivers/gpu/drm/i915/intel_lrc.c:966:
+	smp_mb__after_atomic();

-:114: WARNING:LONG_LINE: line over 100 characters
#114: FILE: drivers/gpu/drm/i915/intel_lrc.c:990:
+		  head, GEN8_CSB_READ_PTR(readl(i915->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)))), fw ? "" : "?",

-:115: WARNING:LONG_LINE: line over 100 characters
#115: FILE: drivers/gpu/drm/i915/intel_lrc.c:991:
+		  tail, GEN8_CSB_WRITE_PTR(readl(i915->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)))), fw ? "" : "?");

-:183: CHECK:SPACING: spaces preferred around that '*' (ctx:VxV)
#183: FILE: drivers/gpu/drm/i915/intel_lrc.c:1022:
+			  status, buf[2*head + 1],
 			               ^

-:211: CHECK:SPACING: spaces preferred around that '*' (ctx:VxV)
#211: FILE: drivers/gpu/drm/i915/intel_lrc.c:1040:
+		    buf[2*head + 1] == execlists->preempt_complete_status) {
 		         ^

total: 0 errors, 3 warnings, 2 checks, 316 lines checked
adcb871cf5c4 drm/i915/execlists: Unify CSB access pointers
a395c426e0ef drm/i915/execlists: Reset CSB write pointer after reset
f9d017e3025c drm/i915/execlists: Stop storing the CSB read pointer in the mmio register
c0c2e4c7254a drm/i915/execlists: Trust the CSB
191d5967cda2 drm/i915/execlists: Direct submission of new requests (avoid tasklet/ksoftirqd)
-:104: WARNING:COMMIT_LOG_LONG_LINE: Possible unwrapped commit description (prefer a maximum 75 chars per line)
#104: 
References: 27af5eea54d1 ("drm/i915: Move execlists irq handler to a bottom half")

-:104: ERROR:GIT_COMMIT_ID: Please use git commit description style 'commit <12+ chars of sha1> ("<title line>")' - ie: 'commit 27af5eea54d1 ("drm/i915: Move execlists irq handler to a bottom half")'
#104: 
References: 27af5eea54d1 ("drm/i915: Move execlists irq handler to a bottom half")

total: 1 errors, 1 warnings, 0 checks, 178 lines checked

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 34+ messages in thread

* ✓ Fi.CI.BAT: success for series starting with [1/9] drm/i915: Drop posting reads to flush master interrupts
  2018-06-27 21:07 [PATCH 1/9] drm/i915: Drop posting reads to flush master interrupts Chris Wilson
                   ` (8 preceding siblings ...)
  2018-06-27 21:56 ` ✗ Fi.CI.CHECKPATCH: warning for series starting with [1/9] drm/i915: Drop posting reads to flush master interrupts Patchwork
@ 2018-06-27 22:13 ` Patchwork
  2018-06-28  3:46 ` ✓ Fi.CI.IGT: " Patchwork
                   ` (4 subsequent siblings)
  14 siblings, 0 replies; 34+ messages in thread
From: Patchwork @ 2018-06-27 22:13 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

== Series Details ==

Series: series starting with [1/9] drm/i915: Drop posting reads to flush master interrupts
URL   : https://patchwork.freedesktop.org/series/45531/
State : success

== Summary ==

= CI Bug Log - changes from CI_DRM_4392 -> Patchwork_9455 =

== Summary - SUCCESS ==

  No regressions found.

  External URL: https://patchwork.freedesktop.org/api/1.0/series/45531/revisions/1/mbox/

== Known issues ==

  Here are the changes found in Patchwork_9455 that come from known issues:

  === IGT changes ===

    ==== Issues hit ====

    igt@prime_vgem@basic-fence-flip:
      fi-byt-n2820:       PASS -> FAIL (fdo#104008)

    
  fdo#104008 https://bugs.freedesktop.org/show_bug.cgi?id=104008


== Participating hosts (45 -> 40) ==

  Missing    (5): fi-ctg-p8600 fi-ilk-m540 fi-byt-squawks fi-bsw-cyan fi-hsw-4200u 


== Build changes ==

    * Linux: CI_DRM_4392 -> Patchwork_9455

  CI_DRM_4392: fa0510c4f4d6e85cba3068f08a83cb56b9fdf668 @ git://anongit.freedesktop.org/gfx-ci/linux
  IGT_4530: 0e98bf69f146eb72fe3a7c3b19a049b5786f0ca3 @ git://anongit.freedesktop.org/xorg/app/intel-gpu-tools
  Patchwork_9455: 191d5967cda2a65c0193982493b349395823dba4 @ git://anongit.freedesktop.org/gfx-ci/linux


== Linux commits ==

191d5967cda2 drm/i915/execlists: Direct submission of new requests (avoid tasklet/ksoftirqd)
c0c2e4c7254a drm/i915/execlists: Trust the CSB
f9d017e3025c drm/i915/execlists: Stop storing the CSB read pointer in the mmio register
a395c426e0ef drm/i915/execlists: Reset CSB write pointer after reset
adcb871cf5c4 drm/i915/execlists: Unify CSB access pointers
bf1d86422d87 drm/i915/execlists: Process one CSB update at a time
03ce62d8f412 drm/i915/execlists: Pull CSB reset under the timeline.lock
79dff3d56d42 drm/i915/execlists: Pull submit after dequeue under timeline lock
e95bd2fda39a drm/i915: Drop posting reads to flush master interrupts

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_9455/issues.html
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 34+ messages in thread

* ✓ Fi.CI.IGT: success for series starting with [1/9] drm/i915: Drop posting reads to flush master interrupts
  2018-06-27 21:07 [PATCH 1/9] drm/i915: Drop posting reads to flush master interrupts Chris Wilson
                   ` (9 preceding siblings ...)
  2018-06-27 22:13 ` ✓ Fi.CI.BAT: success " Patchwork
@ 2018-06-28  3:46 ` Patchwork
  2018-06-28 10:41 ` ✗ Fi.CI.CHECKPATCH: warning for series starting with [1/9] drm/i915: Drop posting reads to flush master interrupts (rev2) Patchwork
                   ` (3 subsequent siblings)
  14 siblings, 0 replies; 34+ messages in thread
From: Patchwork @ 2018-06-28  3:46 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

== Series Details ==

Series: series starting with [1/9] drm/i915: Drop posting reads to flush master interrupts
URL   : https://patchwork.freedesktop.org/series/45531/
State : success

== Summary ==

= CI Bug Log - changes from CI_DRM_4392_full -> Patchwork_9455_full =

== Summary - WARNING ==

  Minor unknown changes coming with Patchwork_9455_full need to be verified
  manually.
  
  If you think the reported changes have nothing to do with the changes
  introduced in Patchwork_9455_full, please notify your bug team to allow them
  to document this new failure mode, which will reduce false positives in CI.

  

== Possible new issues ==

  Here are the unknown changes that may have been introduced in Patchwork_9455_full:

  === IGT changes ===

    ==== Warnings ====

    igt@gem_exec_schedule@deep-render:
      shard-kbl:          PASS -> SKIP

    
== Known issues ==

  Here are the changes found in Patchwork_9455_full that come from known issues:

  === IGT changes ===

    ==== Issues hit ====

    igt@drv_selftest@live_gtt:
      shard-glk:          NOTRUN -> INCOMPLETE (fdo#103359, k.org#198133)

    igt@drv_selftest@mock_scatterlist:
      shard-glk:          NOTRUN -> DMESG-WARN (fdo#103667)

    igt@gem_ctx_isolation@rcs0-s3:
      shard-kbl:          PASS -> INCOMPLETE (fdo#103665) +1

    igt@gem_exec_schedule@pi-ringfull-bsd:
      shard-glk:          NOTRUN -> FAIL (fdo#103158)

    igt@kms_flip@2x-plain-flip-fb-recreate:
      shard-glk:          PASS -> FAIL (fdo#100368)

    igt@prime_vgem@coherency-gtt:
      {shard-glk9}:       NOTRUN -> FAIL (fdo#100587)

    
    ==== Possible fixes ====

    igt@drv_selftest@live_hangcheck:
      shard-apl:          DMESG-FAIL (fdo#106947, fdo#106560) -> PASS

    igt@gem_exec_big:
      shard-hsw:          INCOMPLETE (fdo#103540) -> PASS

    igt@kms_atomic_transition@1x-modeset-transitions-nonblocking-fencing:
      shard-glk:          FAIL (fdo#105703) -> PASS

    igt@kms_flip@flip-vs-expired-vblank-interruptible:
      shard-hsw:          FAIL (fdo#105363, fdo#102887) -> PASS

    igt@kms_flip@plain-flip-fb-recreate:
      shard-glk:          FAIL (fdo#100368) -> PASS

    igt@kms_setmode@basic:
      shard-apl:          FAIL (fdo#99912) -> PASS
      shard-hsw:          FAIL (fdo#99912) -> PASS

    
  {name}: This element is suppressed. This means it is ignored when computing
          the status of the difference (SUCCESS, WARNING, or FAILURE).

  fdo#100368 https://bugs.freedesktop.org/show_bug.cgi?id=100368
  fdo#100587 https://bugs.freedesktop.org/show_bug.cgi?id=100587
  fdo#102887 https://bugs.freedesktop.org/show_bug.cgi?id=102887
  fdo#103158 https://bugs.freedesktop.org/show_bug.cgi?id=103158
  fdo#103359 https://bugs.freedesktop.org/show_bug.cgi?id=103359
  fdo#103540 https://bugs.freedesktop.org/show_bug.cgi?id=103540
  fdo#103665 https://bugs.freedesktop.org/show_bug.cgi?id=103665
  fdo#103667 https://bugs.freedesktop.org/show_bug.cgi?id=103667
  fdo#105363 https://bugs.freedesktop.org/show_bug.cgi?id=105363
  fdo#105703 https://bugs.freedesktop.org/show_bug.cgi?id=105703
  fdo#106560 https://bugs.freedesktop.org/show_bug.cgi?id=106560
  fdo#106947 https://bugs.freedesktop.org/show_bug.cgi?id=106947
  fdo#99912 https://bugs.freedesktop.org/show_bug.cgi?id=99912
  k.org#198133 https://bugzilla.kernel.org/show_bug.cgi?id=198133


== Participating hosts (6 -> 6) ==

  No changes in participating hosts


== Build changes ==

    * Linux: CI_DRM_4392 -> Patchwork_9455

  CI_DRM_4392: fa0510c4f4d6e85cba3068f08a83cb56b9fdf668 @ git://anongit.freedesktop.org/gfx-ci/linux
  IGT_4530: 0e98bf69f146eb72fe3a7c3b19a049b5786f0ca3 @ git://anongit.freedesktop.org/xorg/app/intel-gpu-tools
  Patchwork_9455: 191d5967cda2a65c0193982493b349395823dba4 @ git://anongit.freedesktop.org/gfx-ci/linux
  piglit_4509: fdc5a4ca11124ab8413c7988896eec4c97336694 @ git://anongit.freedesktop.org/piglit

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_9455/shards.html
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH 5/9] drm/i915/execlists: Unify CSB access pointers
  2018-06-27 21:07 ` [PATCH 5/9] drm/i915/execlists: Unify CSB access pointers Chris Wilson
@ 2018-06-28 10:02   ` Tvrtko Ursulin
  2018-06-28 10:10     ` Chris Wilson
  2018-06-28 11:13   ` [PATCH v2] " Chris Wilson
  1 sibling, 1 reply; 34+ messages in thread
From: Tvrtko Ursulin @ 2018-06-28 10:02 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 27/06/2018 22:07, Chris Wilson wrote:
> Following the removal of the last workarounds, the only CSB mmio access
> is for the old vGPU interface. The mmio registers presented by vGPU do
> not require forcewake and can be treated as ordinary volatile memory,
> i.e. they behave just like the HWSP access just at a different location.
> We can reduce the CSB access to a set of read/write/buffer pointers and
> treat the various paths identically and not worry about forcewake.
> (Forcewake is nightmare for worstcase latency, and we want to process
> this all with irqsoff -- no latency allowed!)
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>   drivers/gpu/drm/i915/intel_engine_cs.c  |  12 ---
>   drivers/gpu/drm/i915/intel_lrc.c        | 116 ++++++++++--------------
>   drivers/gpu/drm/i915/intel_ringbuffer.h |  23 +++--
>   3 files changed, 65 insertions(+), 86 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c
> index d3264bd6e9dc..7209c22798e6 100644
> --- a/drivers/gpu/drm/i915/intel_engine_cs.c
> +++ b/drivers/gpu/drm/i915/intel_engine_cs.c
> @@ -25,7 +25,6 @@
>   #include <drm/drm_print.h>
>   
>   #include "i915_drv.h"
> -#include "i915_vgpu.h"
>   #include "intel_ringbuffer.h"
>   #include "intel_lrc.h"
>   
> @@ -456,21 +455,10 @@ static void intel_engine_init_batch_pool(struct intel_engine_cs *engine)
>   	i915_gem_batch_pool_init(&engine->batch_pool, engine);
>   }
>   
> -static bool csb_force_mmio(struct drm_i915_private *i915)
> -{
> -	/* Older GVT emulation depends upon intercepting CSB mmio */
> -	if (intel_vgpu_active(i915) && !intel_vgpu_has_hwsp_emulation(i915))
> -		return true;
> -
> -	return false;
> -}
> -
>   static void intel_engine_init_execlist(struct intel_engine_cs *engine)
>   {
>   	struct intel_engine_execlists * const execlists = &engine->execlists;
>   
> -	execlists->csb_use_mmio = csb_force_mmio(engine->i915);
> -
>   	execlists->port_mask = 1;
>   	BUILD_BUG_ON_NOT_POWER_OF_2(execlists_num_ports(execlists));
>   	GEM_BUG_ON(execlists_num_ports(execlists) > EXECLIST_MAX_PORTS);
> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> index 91656eb2f2db..368a8c74d11d 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/intel_lrc.c
> @@ -137,6 +137,7 @@
>   #include <drm/i915_drm.h>
>   #include "i915_drv.h"
>   #include "i915_gem_render_state.h"
> +#include "i915_vgpu.h"
>   #include "intel_lrc_reg.h"
>   #include "intel_mocs.h"
>   #include "intel_workarounds.h"
> @@ -953,44 +954,23 @@ static void process_csb(struct intel_engine_cs *engine)
>   {
>   	struct intel_engine_execlists * const execlists = &engine->execlists;
>   	struct execlist_port *port = execlists->port;
> -	struct drm_i915_private *i915 = engine->i915;
> -
> -	/* The HWSP contains a (cacheable) mirror of the CSB */
> -	const u32 *buf =
> -		&engine->status_page.page_addr[I915_HWS_CSB_BUF0_INDEX];
> -	unsigned int head, tail;
> -	bool fw = false;
> +	const u32 * const buf = execlists->csb_status;
> +	u8 head, tail;
>   
>   	/* Clear before reading to catch new interrupts */
>   	clear_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted);
>   	smp_mb__after_atomic();
>   
> -	if (unlikely(execlists->csb_use_mmio)) {
> -		intel_uncore_forcewake_get(i915, execlists->fw_domains);
> -		fw = true;
> -
> -		buf = (u32 * __force)
> -			(i915->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_BUF_LO(engine, 0)));
> +	/* Note that csb_write, csb_status may be either in HWSP or mmio */
> +	head = execlists->csb_head;
> +	tail = READ_ONCE(*execlists->csb_write);

Under GVT when this is emulated mmio I think you need to mask and shift 
it with GEN8_CSB_WRITE_PTR.

> +	GEM_TRACE("%s cs-irq head=%d, tail=%d\n", engine->name, head, tail);
> +	if (unlikely(head == tail))
> +		return;
>   
> -		head = readl(i915->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)));
> -		tail = GEN8_CSB_WRITE_PTR(head);
> -		head = GEN8_CSB_READ_PTR(head);
> -		execlists->csb_head = head;
> -	} else {
> -		const int write_idx =
> -			intel_hws_csb_write_index(i915) -
> -			I915_HWS_CSB_BUF0_INDEX;
> +	rmb(); /* Hopefully paired with a wmb() in HW */
>   
> -		head = execlists->csb_head;
> -		tail = READ_ONCE(buf[write_idx]);
> -		rmb(); /* Hopefully paired with a wmb() in HW */
> -	}
> -	GEM_TRACE("%s cs-irq head=%d [%d%s], tail=%d [%d%s]\n",
> -		  engine->name,
> -		  head, GEN8_CSB_READ_PTR(readl(i915->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)))), fw ? "" : "?",
> -		  tail, GEN8_CSB_WRITE_PTR(readl(i915->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)))), fw ? "" : "?");
> -
> -	while (head != tail) {
> +	do {

Why convert while to do-while? Early unlikely return above handles the 
void process_csb calls? Would the same effect happen if you put unlikely 
in the while (head != tail) condition and would be simpler?

>   		struct i915_request *rq;
>   		unsigned int status;
>   		unsigned int count;
> @@ -1016,12 +996,12 @@ static void process_csb(struct intel_engine_cs *engine)
>   		 * status notifier.
>   		 */
>   
> -		status = READ_ONCE(buf[2 * head]); /* maybe mmio! */
>   		GEM_TRACE("%s csb[%d]: status=0x%08x:0x%08x, active=0x%x\n",
>   			  engine->name, head,
> -			  status, buf[2*head + 1],
> +			  buf[2 * head + 0], buf[2 * head + 1],
>   			  execlists->active);
>   
> +		status = buf[2 * head];

Why not leave before GEM_TRACE above, as is, and then diff is smaller?

>   		if (status & (GEN8_CTX_STATUS_IDLE_ACTIVE |
>   			      GEN8_CTX_STATUS_PREEMPTED))
>   			execlists_set_active(execlists,
> @@ -1103,16 +1083,11 @@ static void process_csb(struct intel_engine_cs *engine)
>   		} else {
>   			port_set(port, port_pack(rq, count));
>   		}
> -	}
> +	} while (head != tail);
>   
> -	if (head != execlists->csb_head) {
> -		execlists->csb_head = head;
> -		writel(_MASKED_FIELD(GEN8_CSB_READ_PTR_MASK, head << 8),
> -		       i915->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)));
> -	}
> -
> -	if (unlikely(fw))
> -		intel_uncore_forcewake_put(i915, execlists->fw_domains);
> +	writel(_MASKED_FIELD(GEN8_CSB_READ_PTR_MASK, head << 8),
> +	       execlists->csb_read);
> +	execlists->csb_head = head;

I guess early return helps to avoid this, but since it is going away in 
a following patch maybe it is not worth it.

>   }
>   
>   /*
> @@ -2430,28 +2405,11 @@ logical_ring_default_irqs(struct intel_engine_cs *engine)
>   static void
>   logical_ring_setup(struct intel_engine_cs *engine)
>   {
> -	struct drm_i915_private *dev_priv = engine->i915;
> -	enum forcewake_domains fw_domains;
> -
>   	intel_engine_setup_common(engine);
>   
>   	/* Intentionally left blank. */
>   	engine->buffer = NULL;
>   
> -	fw_domains = intel_uncore_forcewake_for_reg(dev_priv,
> -						    RING_ELSP(engine),
> -						    FW_REG_WRITE);
> -
> -	fw_domains |= intel_uncore_forcewake_for_reg(dev_priv,
> -						     RING_CONTEXT_STATUS_PTR(engine),
> -						     FW_REG_READ | FW_REG_WRITE);
> -
> -	fw_domains |= intel_uncore_forcewake_for_reg(dev_priv,
> -						     RING_CONTEXT_STATUS_BUF_BASE(engine),
> -						     FW_REG_READ);
> -
> -	engine->execlists.fw_domains = fw_domains;
> -
>   	tasklet_init(&engine->execlists.tasklet,
>   		     execlists_submission_tasklet, (unsigned long)engine);
>   
> @@ -2459,34 +2417,56 @@ logical_ring_setup(struct intel_engine_cs *engine)
>   	logical_ring_default_irqs(engine);
>   }
>   
> +static bool csb_force_mmio(struct drm_i915_private *i915)
> +{
> +	/* Older GVT emulation depends upon intercepting CSB mmio */
> +	return intel_vgpu_active(i915) && !intel_vgpu_has_hwsp_emulation(i915);
> +}
> +
>   static int logical_ring_init(struct intel_engine_cs *engine)
>   {
> +	struct drm_i915_private *i915 = engine->i915;
> +	struct intel_engine_execlists * const execlists = &engine->execlists;
>   	int ret;
>   
>   	ret = intel_engine_init_common(engine);
>   	if (ret)
>   		goto error;
>   
> -	if (HAS_LOGICAL_RING_ELSQ(engine->i915)) {
> -		engine->execlists.submit_reg = engine->i915->regs +
> +	if (HAS_LOGICAL_RING_ELSQ(i915)) {
> +		execlists->submit_reg = i915->regs +
>   			i915_mmio_reg_offset(RING_EXECLIST_SQ_CONTENTS(engine));
> -		engine->execlists.ctrl_reg = engine->i915->regs +
> +		execlists->ctrl_reg = i915->regs +
>   			i915_mmio_reg_offset(RING_EXECLIST_CONTROL(engine));
>   	} else {
> -		engine->execlists.submit_reg = engine->i915->regs +
> +		execlists->submit_reg = i915->regs +
>   			i915_mmio_reg_offset(RING_ELSP(engine));
>   	}
>   
> -	engine->execlists.preempt_complete_status = ~0u;
> -	if (engine->i915->preempt_context) {
> +	execlists->preempt_complete_status = ~0u;
> +	if (i915->preempt_context) {
>   		struct intel_context *ce =
> -			to_intel_context(engine->i915->preempt_context, engine);
> +			to_intel_context(i915->preempt_context, engine);
>   
> -		engine->execlists.preempt_complete_status =
> +		execlists->preempt_complete_status =
>   			upper_32_bits(ce->lrc_desc);
>   	}
>   
> -	engine->execlists.csb_head = GEN8_CSB_ENTRIES - 1;
> +	execlists->csb_head = GEN8_CSB_ENTRIES - 1;
> +	execlists->csb_read =
> +		i915->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine));
> +	if (csb_force_mmio(i915)) {
> +		execlists->csb_status = (u32 __force *)
> +			(i915->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_BUF_LO(engine, 0)));
> +
> +		execlists->csb_write = (u32 __force *)execlists->csb_read;
> +	} else {
> +		execlists->csb_status =
> +			&engine->status_page.page_addr[I915_HWS_CSB_BUF0_INDEX];
> +
> +		execlists->csb_write =
> +			&engine->status_page.page_addr[intel_hws_csb_write_index(i915)];
> +	}
>   
>   	return 0;
>   
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
> index a0bc7a8222b4..5b92c5f03e1d 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.h
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
> @@ -300,19 +300,30 @@ struct intel_engine_execlists {
>   	struct rb_node *first;
>   
>   	/**
> -	 * @fw_domains: forcewake domains for irq tasklet
> +	 * @csb_head: context status buffer head
>   	 */
> -	unsigned int fw_domains;
> +	unsigned int csb_head;
>   
>   	/**
> -	 * @csb_head: context status buffer head
> +	 * @csb_read: control register for Context Switch buffer
> +	 *
> +	 * Note this register is always in mmio.
>   	 */
> -	unsigned int csb_head;
> +	u32 __iomem *csb_read;
>   
>   	/**
> -	 * @csb_use_mmio: access csb through mmio, instead of hwsp
> +	 * @csb_write: control register for Context Switch buffer
> +	 *
> +	 * Note this register may be either mmio or HWSP shadow.
> +	 */
> +	u32 *csb_write;
> +
> +	/**
> +	 * @csb_status: status array for Context Switch buffer
> +	 *
> +	 * Note these register may be either mmio or HWSP shadow.
>   	 */
> -	bool csb_use_mmio;
> +	u32 *csb_status;
>   
>   	/**
>   	 * @preempt_complete_status: expected CSB upon completing preemption
> 

Looks otherwise ok.

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH 6/9] drm/i915/execlists: Reset CSB write pointer after reset
  2018-06-27 21:07 ` [PATCH 6/9] drm/i915/execlists: Reset CSB write pointer after reset Chris Wilson
@ 2018-06-28 10:06   ` Tvrtko Ursulin
  2018-06-28 10:17     ` Chris Wilson
  2018-06-28 10:22   ` [PATCH v2] " Chris Wilson
  2018-06-28 11:59   ` [PATCH v3] " Chris Wilson
  2 siblings, 1 reply; 34+ messages in thread
From: Tvrtko Ursulin @ 2018-06-28 10:06 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 27/06/2018 22:07, Chris Wilson wrote:
> On HW reset, the HW clears the write pointer (to 0). But since it also
> writes its first CSB entry to slot 0, we need to reset the write pointer
> back to the element before (so the first entry we read is 0).
> 
> This is required for the next patch, where we trust the CSB completely!
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> ---
>   drivers/gpu/drm/i915/intel_lrc.c | 19 +++++++++++++++++--
>   1 file changed, 17 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> index 368a8c74d11d..8b111a268697 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/intel_lrc.c
> @@ -884,6 +884,21 @@ static void reset_irq(struct intel_engine_cs *engine)
>   	clear_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted);
>   }
>   
> +static void reset_csb_pointers(struct intel_engine_execlists *execlists)
> +{
> +	/*
> +	 * After a reset, the HW starts writing into CSB entry [0]. We
> +	 * therefore have to set our HEAD pointer back one entry so that
> +	 * the *first* entry we check is entry 0. To complicate this further,
> +	 * as we don't wait for the first interrupt after reset, we have to
> +	 * fake the HW write to point back to the last entry so that our
> +	 * inline comparison of our cached head position against the last HW
> +	 * write works even before the first interrupt.
> +	 */
> +	execlists->csb_head = GEN8_CSB_ENTRIES - 1;
> +	WRITE_ONCE(*execlists->csb_write, (GEN8_CSB_ENTRIES - 1) | 0xff << 16);

Use _MASKED_FIELD and GEN8_CSB_WRITE_PTR_MASK?

> +}
> +
>   static void execlists_cancel_requests(struct intel_engine_cs *engine)
>   {
>   	struct intel_engine_execlists * const execlists = &engine->execlists;
> @@ -1953,7 +1968,7 @@ static void execlists_reset(struct intel_engine_cs *engine,
>   	__unwind_incomplete_requests(engine);
>   
>   	/* Following the reset, we need to reload the CSB read/write pointers */
> -	engine->execlists.csb_head = GEN8_CSB_ENTRIES - 1;
> +	reset_csb_pointers(&engine->execlists);
>   
>   	spin_unlock_irqrestore(&engine->timeline.lock, flags);
>   
> @@ -2452,7 +2467,6 @@ static int logical_ring_init(struct intel_engine_cs *engine)
>   			upper_32_bits(ce->lrc_desc);
>   	}
>   
> -	execlists->csb_head = GEN8_CSB_ENTRIES - 1;
>   	execlists->csb_read =
>   		i915->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine));
>   	if (csb_force_mmio(i915)) {
> @@ -2467,6 +2481,7 @@ static int logical_ring_init(struct intel_engine_cs *engine)
>   		execlists->csb_write =
>   			&engine->status_page.page_addr[intel_hws_csb_write_index(i915)];
>   	}
> +	reset_csb_pointers(execlists);
>   
>   	return 0;
>   
> 

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH 7/9] drm/i915/execlists: Stop storing the CSB read pointer in the mmio register
  2018-06-27 21:07 ` [PATCH 7/9] drm/i915/execlists: Stop storing the CSB read pointer in the mmio register Chris Wilson
@ 2018-06-28 10:07   ` Tvrtko Ursulin
  0 siblings, 0 replies; 34+ messages in thread
From: Tvrtko Ursulin @ 2018-06-28 10:07 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 27/06/2018 22:07, Chris Wilson wrote:
> As we now never read back our current head position from the CSB
> pointers register, and the HW itself doesn't use it to prevent
> overwriting unread CSB entries, we do not need to keep updating the
> register. As it turns out this register is not listed as being shadowed,
> and so requires forcewake -- but we haven't been taking forcewake around
> it so the writes has probably been regularly dropped. Fortuitously, we
> only read the value after a reset where it did not matter, and zero was
> the right answer (well, close enough).
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> Cc: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
> ---
>   drivers/gpu/drm/i915/intel_lrc.c | 2 --
>   1 file changed, 2 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> index 8b111a268697..a6268103663f 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/intel_lrc.c
> @@ -1100,8 +1100,6 @@ static void process_csb(struct intel_engine_cs *engine)
>   		}
>   	} while (head != tail);
>   
> -	writel(_MASKED_FIELD(GEN8_CSB_READ_PTR_MASK, head << 8),
> -	       execlists->csb_read);
>   	execlists->csb_head = head;
>   }
>   
> 

Proof is in the pudding. :)

Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH 5/9] drm/i915/execlists: Unify CSB access pointers
  2018-06-28 10:02   ` Tvrtko Ursulin
@ 2018-06-28 10:10     ` Chris Wilson
  2018-06-28 10:58       ` Tvrtko Ursulin
  0 siblings, 1 reply; 34+ messages in thread
From: Chris Wilson @ 2018-06-28 10:10 UTC (permalink / raw)
  To: Tvrtko Ursulin, intel-gfx

Quoting Tvrtko Ursulin (2018-06-28 11:02:17)
> 
> On 27/06/2018 22:07, Chris Wilson wrote:
> > Following the removal of the last workarounds, the only CSB mmio access
> > is for the old vGPU interface. The mmio registers presented by vGPU do
> > not require forcewake and can be treated as ordinary volatile memory,
> > i.e. they behave just like the HWSP access just at a different location.
> > We can reduce the CSB access to a set of read/write/buffer pointers and
> > treat the various paths identically and not worry about forcewake.
> > (Forcewake is nightmare for worstcase latency, and we want to process
> > this all with irqsoff -- no latency allowed!)
> > 
> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > ---
> >   drivers/gpu/drm/i915/intel_engine_cs.c  |  12 ---
> >   drivers/gpu/drm/i915/intel_lrc.c        | 116 ++++++++++--------------
> >   drivers/gpu/drm/i915/intel_ringbuffer.h |  23 +++--
> >   3 files changed, 65 insertions(+), 86 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c
> > index d3264bd6e9dc..7209c22798e6 100644
> > --- a/drivers/gpu/drm/i915/intel_engine_cs.c
> > +++ b/drivers/gpu/drm/i915/intel_engine_cs.c
> > @@ -25,7 +25,6 @@
> >   #include <drm/drm_print.h>
> >   
> >   #include "i915_drv.h"
> > -#include "i915_vgpu.h"
> >   #include "intel_ringbuffer.h"
> >   #include "intel_lrc.h"
> >   
> > @@ -456,21 +455,10 @@ static void intel_engine_init_batch_pool(struct intel_engine_cs *engine)
> >       i915_gem_batch_pool_init(&engine->batch_pool, engine);
> >   }
> >   
> > -static bool csb_force_mmio(struct drm_i915_private *i915)
> > -{
> > -     /* Older GVT emulation depends upon intercepting CSB mmio */
> > -     if (intel_vgpu_active(i915) && !intel_vgpu_has_hwsp_emulation(i915))
> > -             return true;
> > -
> > -     return false;
> > -}
> > -
> >   static void intel_engine_init_execlist(struct intel_engine_cs *engine)
> >   {
> >       struct intel_engine_execlists * const execlists = &engine->execlists;
> >   
> > -     execlists->csb_use_mmio = csb_force_mmio(engine->i915);
> > -
> >       execlists->port_mask = 1;
> >       BUILD_BUG_ON_NOT_POWER_OF_2(execlists_num_ports(execlists));
> >       GEM_BUG_ON(execlists_num_ports(execlists) > EXECLIST_MAX_PORTS);
> > diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> > index 91656eb2f2db..368a8c74d11d 100644
> > --- a/drivers/gpu/drm/i915/intel_lrc.c
> > +++ b/drivers/gpu/drm/i915/intel_lrc.c
> > @@ -137,6 +137,7 @@
> >   #include <drm/i915_drm.h>
> >   #include "i915_drv.h"
> >   #include "i915_gem_render_state.h"
> > +#include "i915_vgpu.h"
> >   #include "intel_lrc_reg.h"
> >   #include "intel_mocs.h"
> >   #include "intel_workarounds.h"
> > @@ -953,44 +954,23 @@ static void process_csb(struct intel_engine_cs *engine)
> >   {
> >       struct intel_engine_execlists * const execlists = &engine->execlists;
> >       struct execlist_port *port = execlists->port;
> > -     struct drm_i915_private *i915 = engine->i915;
> > -
> > -     /* The HWSP contains a (cacheable) mirror of the CSB */
> > -     const u32 *buf =
> > -             &engine->status_page.page_addr[I915_HWS_CSB_BUF0_INDEX];
> > -     unsigned int head, tail;
> > -     bool fw = false;
> > +     const u32 * const buf = execlists->csb_status;
> > +     u8 head, tail;
> >   
> >       /* Clear before reading to catch new interrupts */
> >       clear_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted);
> >       smp_mb__after_atomic();
> >   
> > -     if (unlikely(execlists->csb_use_mmio)) {
> > -             intel_uncore_forcewake_get(i915, execlists->fw_domains);
> > -             fw = true;
> > -
> > -             buf = (u32 * __force)
> > -                     (i915->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_BUF_LO(engine, 0)));
> > +     /* Note that csb_write, csb_status may be either in HWSP or mmio */
> > +     head = execlists->csb_head;
> > +     tail = READ_ONCE(*execlists->csb_write);
> 
> Under GVT when this is emulated mmio I think you need to mask and shift 
> it with GEN8_CSB_WRITE_PTR.

Shift is 0, mask is applied by u8.

> > +     GEM_TRACE("%s cs-irq head=%d, tail=%d\n", engine->name, head, tail);
> > +     if (unlikely(head == tail))
> > +             return;
> >   
> > -             head = readl(i915->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)));
> > -             tail = GEN8_CSB_WRITE_PTR(head);
> > -             head = GEN8_CSB_READ_PTR(head);
> > -             execlists->csb_head = head;
> > -     } else {
> > -             const int write_idx =
> > -                     intel_hws_csb_write_index(i915) -
> > -                     I915_HWS_CSB_BUF0_INDEX;
> > +     rmb(); /* Hopefully paired with a wmb() in HW */
> >   
> > -             head = execlists->csb_head;
> > -             tail = READ_ONCE(buf[write_idx]);
> > -             rmb(); /* Hopefully paired with a wmb() in HW */
> > -     }
> > -     GEM_TRACE("%s cs-irq head=%d [%d%s], tail=%d [%d%s]\n",
> > -               engine->name,
> > -               head, GEN8_CSB_READ_PTR(readl(i915->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)))), fw ? "" : "?",
> > -               tail, GEN8_CSB_WRITE_PTR(readl(i915->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)))), fw ? "" : "?");
> > -
> > -     while (head != tail) {
> > +     do {
> 
> Why convert while to do-while? Early unlikely return above handles the 
> void process_csb calls? Would the same effect happen if you put unlikely 
> in the while (head != tail) condition and would be simpler?

The earlier return to circumvent the lfence.

> >               struct i915_request *rq;
> >               unsigned int status;
> >               unsigned int count;
> > @@ -1016,12 +996,12 @@ static void process_csb(struct intel_engine_cs *engine)
> >                * status notifier.
> >                */
> >   
> > -             status = READ_ONCE(buf[2 * head]); /* maybe mmio! */
> >               GEM_TRACE("%s csb[%d]: status=0x%08x:0x%08x, active=0x%x\n",
> >                         engine->name, head,
> > -                       status, buf[2*head + 1],
> > +                       buf[2 * head + 0], buf[2 * head + 1],
> >                         execlists->active);
> >   
> > +             status = buf[2 * head];
> 
> Why not leave before GEM_TRACE above, as is, and then diff is smaller?

You made me correct the GEM_TRACE!

Looking at it I still prefer 2 buf[] vs the mixed local/buf[].

> >               if (status & (GEN8_CTX_STATUS_IDLE_ACTIVE |
> >                             GEN8_CTX_STATUS_PREEMPTED))
> >                       execlists_set_active(execlists,
> > @@ -1103,16 +1083,11 @@ static void process_csb(struct intel_engine_cs *engine)
> >               } else {
> >                       port_set(port, port_pack(rq, count));
> >               }
> > -     }
> > +     } while (head != tail);
> >   
> > -     if (head != execlists->csb_head) {
> > -             execlists->csb_head = head;
> > -             writel(_MASKED_FIELD(GEN8_CSB_READ_PTR_MASK, head << 8),
> > -                    i915->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)));
> > -     }
> > -
> > -     if (unlikely(fw))
> > -             intel_uncore_forcewake_put(i915, execlists->fw_domains);
> > +     writel(_MASKED_FIELD(GEN8_CSB_READ_PTR_MASK, head << 8),
> > +            execlists->csb_read);
> > +     execlists->csb_head = head;
> 
> I guess early return helps to avoid this, but since it is going away in 
> a following patch maybe it is not worth it.

lfence!
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH 6/9] drm/i915/execlists: Reset CSB write pointer after reset
  2018-06-28 10:06   ` Tvrtko Ursulin
@ 2018-06-28 10:17     ` Chris Wilson
  2018-06-28 11:02       ` Tvrtko Ursulin
  0 siblings, 1 reply; 34+ messages in thread
From: Chris Wilson @ 2018-06-28 10:17 UTC (permalink / raw)
  To: Tvrtko Ursulin, intel-gfx

Quoting Tvrtko Ursulin (2018-06-28 11:06:25)
> 
> On 27/06/2018 22:07, Chris Wilson wrote:
> > On HW reset, the HW clears the write pointer (to 0). But since it also
> > writes its first CSB entry to slot 0, we need to reset the write pointer
> > back to the element before (so the first entry we read is 0).
> > 
> > This is required for the next patch, where we trust the CSB completely!
> > 
> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> > ---
> >   drivers/gpu/drm/i915/intel_lrc.c | 19 +++++++++++++++++--
> >   1 file changed, 17 insertions(+), 2 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> > index 368a8c74d11d..8b111a268697 100644
> > --- a/drivers/gpu/drm/i915/intel_lrc.c
> > +++ b/drivers/gpu/drm/i915/intel_lrc.c
> > @@ -884,6 +884,21 @@ static void reset_irq(struct intel_engine_cs *engine)
> >       clear_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted);
> >   }
> >   
> > +static void reset_csb_pointers(struct intel_engine_execlists *execlists)
> > +{
> > +     /*
> > +      * After a reset, the HW starts writing into CSB entry [0]. We
> > +      * therefore have to set our HEAD pointer back one entry so that
> > +      * the *first* entry we check is entry 0. To complicate this further,
> > +      * as we don't wait for the first interrupt after reset, we have to
> > +      * fake the HW write to point back to the last entry so that our
> > +      * inline comparison of our cached head position against the last HW
> > +      * write works even before the first interrupt.
> > +      */
> > +     execlists->csb_head = GEN8_CSB_ENTRIES - 1;
> > +     WRITE_ONCE(*execlists->csb_write, (GEN8_CSB_ENTRIES - 1) | 0xff << 16);
> 
> Use _MASKED_FIELD and GEN8_CSB_WRITE_PTR_MASK?

Hah, there goes my attempt to kill off unused magic.
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 34+ messages in thread

* [PATCH v2] drm/i915/execlists: Reset CSB write pointer after reset
  2018-06-27 21:07 ` [PATCH 6/9] drm/i915/execlists: Reset CSB write pointer after reset Chris Wilson
  2018-06-28 10:06   ` Tvrtko Ursulin
@ 2018-06-28 10:22   ` Chris Wilson
  2018-06-28 11:59   ` [PATCH v3] " Chris Wilson
  2 siblings, 0 replies; 34+ messages in thread
From: Chris Wilson @ 2018-06-28 10:22 UTC (permalink / raw)
  To: intel-gfx

On HW reset, the HW clears the write pointer (to 0). But since it also
writes its first CSB entry to slot 0, we need to reset the write pointer
back to the element before (so the first entry we read is 0).

This is required for the next patch, where we trust the CSB completely!

v2: Use _MASKED_FIELD

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 drivers/gpu/drm/i915/intel_lrc.c | 20 ++++++++++++++++++--
 1 file changed, 18 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 368a8c74d11d..2e712a99425f 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -884,6 +884,22 @@ static void reset_irq(struct intel_engine_cs *engine)
 	clear_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted);
 }
 
+static void reset_csb_pointers(struct intel_engine_execlists *execlists)
+{
+	/*
+	 * After a reset, the HW starts writing into CSB entry [0]. We
+	 * therefore have to set our HEAD pointer back one entry so that
+	 * the *first* entry we check is entry 0. To complicate this further,
+	 * as we don't wait for the first interrupt after reset, we have to
+	 * fake the HW write to point back to the last entry so that our
+	 * inline comparison of our cached head position against the last HW
+	 * write works even before the first interrupt.
+	 */
+	execlists->csb_head = GEN8_CSB_ENTRIES - 1;
+	WRITE_ONCE(*execlists->csb_write,
+		   _MASKED_FIELD(0xff, GEN8_CSB_ENTRIES - 1));
+}
+
 static void execlists_cancel_requests(struct intel_engine_cs *engine)
 {
 	struct intel_engine_execlists * const execlists = &engine->execlists;
@@ -1953,7 +1969,7 @@ static void execlists_reset(struct intel_engine_cs *engine,
 	__unwind_incomplete_requests(engine);
 
 	/* Following the reset, we need to reload the CSB read/write pointers */
-	engine->execlists.csb_head = GEN8_CSB_ENTRIES - 1;
+	reset_csb_pointers(&engine->execlists);
 
 	spin_unlock_irqrestore(&engine->timeline.lock, flags);
 
@@ -2452,7 +2468,6 @@ static int logical_ring_init(struct intel_engine_cs *engine)
 			upper_32_bits(ce->lrc_desc);
 	}
 
-	execlists->csb_head = GEN8_CSB_ENTRIES - 1;
 	execlists->csb_read =
 		i915->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine));
 	if (csb_force_mmio(i915)) {
@@ -2467,6 +2482,7 @@ static int logical_ring_init(struct intel_engine_cs *engine)
 		execlists->csb_write =
 			&engine->status_page.page_addr[intel_hws_csb_write_index(i915)];
 	}
+	reset_csb_pointers(execlists);
 
 	return 0;
 
-- 
2.18.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 34+ messages in thread

* ✗ Fi.CI.CHECKPATCH: warning for series starting with [1/9] drm/i915: Drop posting reads to flush master interrupts (rev2)
  2018-06-27 21:07 [PATCH 1/9] drm/i915: Drop posting reads to flush master interrupts Chris Wilson
                   ` (10 preceding siblings ...)
  2018-06-28  3:46 ` ✓ Fi.CI.IGT: " Patchwork
@ 2018-06-28 10:41 ` Patchwork
  2018-06-28 11:00 ` ✓ Fi.CI.BAT: success " Patchwork
                   ` (2 subsequent siblings)
  14 siblings, 0 replies; 34+ messages in thread
From: Patchwork @ 2018-06-28 10:41 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

== Series Details ==

Series: series starting with [1/9] drm/i915: Drop posting reads to flush master interrupts (rev2)
URL   : https://patchwork.freedesktop.org/series/45531/
State : warning

== Summary ==

$ dim checkpatch origin/drm-tip
6d78246fb880 drm/i915: Drop posting reads to flush master interrupts
a029fbce709f drm/i915/execlists: Pull submit after dequeue under timeline lock
eede2b30ffb9 drm/i915/execlists: Pull CSB reset under the timeline.lock
92843704c6d6 drm/i915/execlists: Process one CSB update at a time
-:68: WARNING:MEMORY_BARRIER: memory barrier without comment
#68: FILE: drivers/gpu/drm/i915/intel_lrc.c:966:
+	smp_mb__after_atomic();

-:114: WARNING:LONG_LINE: line over 100 characters
#114: FILE: drivers/gpu/drm/i915/intel_lrc.c:990:
+		  head, GEN8_CSB_READ_PTR(readl(i915->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)))), fw ? "" : "?",

-:115: WARNING:LONG_LINE: line over 100 characters
#115: FILE: drivers/gpu/drm/i915/intel_lrc.c:991:
+		  tail, GEN8_CSB_WRITE_PTR(readl(i915->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)))), fw ? "" : "?");

-:183: CHECK:SPACING: spaces preferred around that '*' (ctx:VxV)
#183: FILE: drivers/gpu/drm/i915/intel_lrc.c:1022:
+			  status, buf[2*head + 1],
 			               ^

-:211: CHECK:SPACING: spaces preferred around that '*' (ctx:VxV)
#211: FILE: drivers/gpu/drm/i915/intel_lrc.c:1040:
+		    buf[2*head + 1] == execlists->preempt_complete_status) {
 		         ^

total: 0 errors, 3 warnings, 2 checks, 316 lines checked
c4c5eb343fb5 drm/i915/execlists: Unify CSB access pointers
a7894f5a9301 drm/i915/execlists: Reset CSB write pointer after reset
cd3ce098db8d drm/i915/execlists: Stop storing the CSB read pointer in the mmio register
657a10b942d4 drm/i915/execlists: Trust the CSB
81f84fa1a393 drm/i915/execlists: Direct submission of new requests (avoid tasklet/ksoftirqd)
-:104: WARNING:COMMIT_LOG_LONG_LINE: Possible unwrapped commit description (prefer a maximum 75 chars per line)
#104: 
References: 27af5eea54d1 ("drm/i915: Move execlists irq handler to a bottom half")

-:104: ERROR:GIT_COMMIT_ID: Please use git commit description style 'commit <12+ chars of sha1> ("<title line>")' - ie: 'commit 27af5eea54d1 ("drm/i915: Move execlists irq handler to a bottom half")'
#104: 
References: 27af5eea54d1 ("drm/i915: Move execlists irq handler to a bottom half")

total: 1 errors, 1 warnings, 0 checks, 178 lines checked

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH 5/9] drm/i915/execlists: Unify CSB access pointers
  2018-06-28 10:10     ` Chris Wilson
@ 2018-06-28 10:58       ` Tvrtko Ursulin
  2018-06-28 11:05         ` Chris Wilson
  0 siblings, 1 reply; 34+ messages in thread
From: Tvrtko Ursulin @ 2018-06-28 10:58 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 28/06/2018 11:10, Chris Wilson wrote:
> Quoting Tvrtko Ursulin (2018-06-28 11:02:17)
>>
>> On 27/06/2018 22:07, Chris Wilson wrote:
>>> Following the removal of the last workarounds, the only CSB mmio access
>>> is for the old vGPU interface. The mmio registers presented by vGPU do
>>> not require forcewake and can be treated as ordinary volatile memory,
>>> i.e. they behave just like the HWSP access just at a different location.
>>> We can reduce the CSB access to a set of read/write/buffer pointers and
>>> treat the various paths identically and not worry about forcewake.
>>> (Forcewake is nightmare for worstcase latency, and we want to process
>>> this all with irqsoff -- no latency allowed!)
>>>
>>> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
>>> ---
>>>    drivers/gpu/drm/i915/intel_engine_cs.c  |  12 ---
>>>    drivers/gpu/drm/i915/intel_lrc.c        | 116 ++++++++++--------------
>>>    drivers/gpu/drm/i915/intel_ringbuffer.h |  23 +++--
>>>    3 files changed, 65 insertions(+), 86 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c
>>> index d3264bd6e9dc..7209c22798e6 100644
>>> --- a/drivers/gpu/drm/i915/intel_engine_cs.c
>>> +++ b/drivers/gpu/drm/i915/intel_engine_cs.c
>>> @@ -25,7 +25,6 @@
>>>    #include <drm/drm_print.h>
>>>    
>>>    #include "i915_drv.h"
>>> -#include "i915_vgpu.h"
>>>    #include "intel_ringbuffer.h"
>>>    #include "intel_lrc.h"
>>>    
>>> @@ -456,21 +455,10 @@ static void intel_engine_init_batch_pool(struct intel_engine_cs *engine)
>>>        i915_gem_batch_pool_init(&engine->batch_pool, engine);
>>>    }
>>>    
>>> -static bool csb_force_mmio(struct drm_i915_private *i915)
>>> -{
>>> -     /* Older GVT emulation depends upon intercepting CSB mmio */
>>> -     if (intel_vgpu_active(i915) && !intel_vgpu_has_hwsp_emulation(i915))
>>> -             return true;
>>> -
>>> -     return false;
>>> -}
>>> -
>>>    static void intel_engine_init_execlist(struct intel_engine_cs *engine)
>>>    {
>>>        struct intel_engine_execlists * const execlists = &engine->execlists;
>>>    
>>> -     execlists->csb_use_mmio = csb_force_mmio(engine->i915);
>>> -
>>>        execlists->port_mask = 1;
>>>        BUILD_BUG_ON_NOT_POWER_OF_2(execlists_num_ports(execlists));
>>>        GEM_BUG_ON(execlists_num_ports(execlists) > EXECLIST_MAX_PORTS);
>>> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
>>> index 91656eb2f2db..368a8c74d11d 100644
>>> --- a/drivers/gpu/drm/i915/intel_lrc.c
>>> +++ b/drivers/gpu/drm/i915/intel_lrc.c
>>> @@ -137,6 +137,7 @@
>>>    #include <drm/i915_drm.h>
>>>    #include "i915_drv.h"
>>>    #include "i915_gem_render_state.h"
>>> +#include "i915_vgpu.h"
>>>    #include "intel_lrc_reg.h"
>>>    #include "intel_mocs.h"
>>>    #include "intel_workarounds.h"
>>> @@ -953,44 +954,23 @@ static void process_csb(struct intel_engine_cs *engine)
>>>    {
>>>        struct intel_engine_execlists * const execlists = &engine->execlists;
>>>        struct execlist_port *port = execlists->port;
>>> -     struct drm_i915_private *i915 = engine->i915;
>>> -
>>> -     /* The HWSP contains a (cacheable) mirror of the CSB */
>>> -     const u32 *buf =
>>> -             &engine->status_page.page_addr[I915_HWS_CSB_BUF0_INDEX];
>>> -     unsigned int head, tail;
>>> -     bool fw = false;
>>> +     const u32 * const buf = execlists->csb_status;
>>> +     u8 head, tail;
>>>    
>>>        /* Clear before reading to catch new interrupts */
>>>        clear_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted);
>>>        smp_mb__after_atomic();
>>>    
>>> -     if (unlikely(execlists->csb_use_mmio)) {
>>> -             intel_uncore_forcewake_get(i915, execlists->fw_domains);
>>> -             fw = true;
>>> -
>>> -             buf = (u32 * __force)
>>> -                     (i915->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_BUF_LO(engine, 0)));
>>> +     /* Note that csb_write, csb_status may be either in HWSP or mmio */
>>> +     head = execlists->csb_head;
>>> +     tail = READ_ONCE(*execlists->csb_write);
>>
>> Under GVT when this is emulated mmio I think you need to mask and shift
>> it with GEN8_CSB_WRITE_PTR.
> 
> Shift is 0, mask is applied by u8.

Evil. :) Put a comment please.

>>> +     GEM_TRACE("%s cs-irq head=%d, tail=%d\n", engine->name, head, tail);
>>> +     if (unlikely(head == tail))
>>> +             return;
>>>    
>>> -             head = readl(i915->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)));
>>> -             tail = GEN8_CSB_WRITE_PTR(head);
>>> -             head = GEN8_CSB_READ_PTR(head);
>>> -             execlists->csb_head = head;
>>> -     } else {
>>> -             const int write_idx =
>>> -                     intel_hws_csb_write_index(i915) -
>>> -                     I915_HWS_CSB_BUF0_INDEX;
>>> +     rmb(); /* Hopefully paired with a wmb() in HW */
>>>    
>>> -             head = execlists->csb_head;
>>> -             tail = READ_ONCE(buf[write_idx]);
>>> -             rmb(); /* Hopefully paired with a wmb() in HW */
>>> -     }
>>> -     GEM_TRACE("%s cs-irq head=%d [%d%s], tail=%d [%d%s]\n",
>>> -               engine->name,
>>> -               head, GEN8_CSB_READ_PTR(readl(i915->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)))), fw ? "" : "?",
>>> -               tail, GEN8_CSB_WRITE_PTR(readl(i915->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)))), fw ? "" : "?");
>>> -
>>> -     while (head != tail) {
>>> +     do {
>>
>> Why convert while to do-while? Early unlikely return above handles the
>> void process_csb calls? Would the same effect happen if you put unlikely
>> in the while (head != tail) condition and would be simpler?
> 
> The earlier return to circumvent the lfence.

Oh that one.. so why it is safe to compare head and tail before the rmb 
since we need the rmb afterwards?

> 
>>>                struct i915_request *rq;
>>>                unsigned int status;
>>>                unsigned int count;
>>> @@ -1016,12 +996,12 @@ static void process_csb(struct intel_engine_cs *engine)
>>>                 * status notifier.
>>>                 */
>>>    
>>> -             status = READ_ONCE(buf[2 * head]); /* maybe mmio! */
>>>                GEM_TRACE("%s csb[%d]: status=0x%08x:0x%08x, active=0x%x\n",
>>>                          engine->name, head,
>>> -                       status, buf[2*head + 1],
>>> +                       buf[2 * head + 0], buf[2 * head + 1],
>>>                          execlists->active);
>>>    
>>> +             status = buf[2 * head];
>>
>> Why not leave before GEM_TRACE above, as is, and then diff is smaller?
> 
> You made me correct the GEM_TRACE!

Hm?

> Looking at it I still prefer 2 buf[] vs the mixed local/buf[].

Not touching lines which don't need touching trumps in my book. But OK.

Regards,

Tvrtko

> 
>>>                if (status & (GEN8_CTX_STATUS_IDLE_ACTIVE |
>>>                              GEN8_CTX_STATUS_PREEMPTED))
>>>                        execlists_set_active(execlists,
>>> @@ -1103,16 +1083,11 @@ static void process_csb(struct intel_engine_cs *engine)
>>>                } else {
>>>                        port_set(port, port_pack(rq, count));
>>>                }
>>> -     }
>>> +     } while (head != tail);
>>>    
>>> -     if (head != execlists->csb_head) {
>>> -             execlists->csb_head = head;
>>> -             writel(_MASKED_FIELD(GEN8_CSB_READ_PTR_MASK, head << 8),
>>> -                    i915->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)));
>>> -     }
>>> -
>>> -     if (unlikely(fw))
>>> -             intel_uncore_forcewake_put(i915, execlists->fw_domains);
>>> +     writel(_MASKED_FIELD(GEN8_CSB_READ_PTR_MASK, head << 8),
>>> +            execlists->csb_read);
>>> +     execlists->csb_head = head;
>>
>> I guess early return helps to avoid this, but since it is going away in
>> a following patch maybe it is not worth it.
> 
> lfence!
> -Chris
> 
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 34+ messages in thread

* ✓ Fi.CI.BAT: success for series starting with [1/9] drm/i915: Drop posting reads to flush master interrupts (rev2)
  2018-06-27 21:07 [PATCH 1/9] drm/i915: Drop posting reads to flush master interrupts Chris Wilson
                   ` (11 preceding siblings ...)
  2018-06-28 10:41 ` ✗ Fi.CI.CHECKPATCH: warning for series starting with [1/9] drm/i915: Drop posting reads to flush master interrupts (rev2) Patchwork
@ 2018-06-28 11:00 ` Patchwork
  2018-06-28 11:20 ` ✗ Fi.CI.BAT: failure for series starting with [1/9] drm/i915: Drop posting reads to flush master interrupts (rev3) Patchwork
  2018-06-28 12:09 ` ✗ Fi.CI.BAT: failure for series starting with [1/9] drm/i915: Drop posting reads to flush master interrupts (rev4) Patchwork
  14 siblings, 0 replies; 34+ messages in thread
From: Patchwork @ 2018-06-28 11:00 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

== Series Details ==

Series: series starting with [1/9] drm/i915: Drop posting reads to flush master interrupts (rev2)
URL   : https://patchwork.freedesktop.org/series/45531/
State : success

== Summary ==

= CI Bug Log - changes from CI_DRM_4396 -> Patchwork_9463 =

== Summary - SUCCESS ==

  No regressions found.

  External URL: https://patchwork.freedesktop.org/api/1.0/series/45531/revisions/2/mbox/

== Known issues ==

  Here are the changes found in Patchwork_9463 that come from known issues:

  === IGT changes ===

    ==== Issues hit ====

    igt@kms_flip@basic-flip-vs-wf_vblank:
      fi-glk-dsi:         PASS -> FAIL (fdo#100368)

    igt@kms_pipe_crc_basic@suspend-read-crc-pipe-a:
      fi-ivb-3520m:       PASS -> FAIL (fdo#103375) +2

    
  fdo#100368 https://bugs.freedesktop.org/show_bug.cgi?id=100368
  fdo#103375 https://bugs.freedesktop.org/show_bug.cgi?id=103375


== Participating hosts (43 -> 39) ==

  Missing    (4): fi-ctg-p8600 fi-ilk-m540 fi-byt-squawks fi-hsw-4200u 


== Build changes ==

    * Linux: CI_DRM_4396 -> Patchwork_9463

  CI_DRM_4396: dd5f49f9686f412baa426502d417ac74a37fc77e @ git://anongit.freedesktop.org/gfx-ci/linux
  IGT_4530: 0e98bf69f146eb72fe3a7c3b19a049b5786f0ca3 @ git://anongit.freedesktop.org/xorg/app/intel-gpu-tools
  Patchwork_9463: 81f84fa1a3932d0f1b36cb10d41fc8a9e6836241 @ git://anongit.freedesktop.org/gfx-ci/linux


== Linux commits ==

81f84fa1a393 drm/i915/execlists: Direct submission of new requests (avoid tasklet/ksoftirqd)
657a10b942d4 drm/i915/execlists: Trust the CSB
cd3ce098db8d drm/i915/execlists: Stop storing the CSB read pointer in the mmio register
a7894f5a9301 drm/i915/execlists: Reset CSB write pointer after reset
c4c5eb343fb5 drm/i915/execlists: Unify CSB access pointers
92843704c6d6 drm/i915/execlists: Process one CSB update at a time
eede2b30ffb9 drm/i915/execlists: Pull CSB reset under the timeline.lock
a029fbce709f drm/i915/execlists: Pull submit after dequeue under timeline lock
6d78246fb880 drm/i915: Drop posting reads to flush master interrupts

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_9463/issues.html
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH 6/9] drm/i915/execlists: Reset CSB write pointer after reset
  2018-06-28 10:17     ` Chris Wilson
@ 2018-06-28 11:02       ` Tvrtko Ursulin
  2018-06-28 11:30         ` Chris Wilson
  0 siblings, 1 reply; 34+ messages in thread
From: Tvrtko Ursulin @ 2018-06-28 11:02 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 28/06/2018 11:17, Chris Wilson wrote:
> Quoting Tvrtko Ursulin (2018-06-28 11:06:25)
>>
>> On 27/06/2018 22:07, Chris Wilson wrote:
>>> On HW reset, the HW clears the write pointer (to 0). But since it also
>>> writes its first CSB entry to slot 0, we need to reset the write pointer
>>> back to the element before (so the first entry we read is 0).
>>>
>>> This is required for the next patch, where we trust the CSB completely!
>>>
>>> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
>>> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>>> ---
>>>    drivers/gpu/drm/i915/intel_lrc.c | 19 +++++++++++++++++--
>>>    1 file changed, 17 insertions(+), 2 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
>>> index 368a8c74d11d..8b111a268697 100644
>>> --- a/drivers/gpu/drm/i915/intel_lrc.c
>>> +++ b/drivers/gpu/drm/i915/intel_lrc.c
>>> @@ -884,6 +884,21 @@ static void reset_irq(struct intel_engine_cs *engine)
>>>        clear_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted);
>>>    }
>>>    
>>> +static void reset_csb_pointers(struct intel_engine_execlists *execlists)
>>> +{
>>> +     /*
>>> +      * After a reset, the HW starts writing into CSB entry [0]. We
>>> +      * therefore have to set our HEAD pointer back one entry so that
>>> +      * the *first* entry we check is entry 0. To complicate this further,
>>> +      * as we don't wait for the first interrupt after reset, we have to
>>> +      * fake the HW write to point back to the last entry so that our
>>> +      * inline comparison of our cached head position against the last HW
>>> +      * write works even before the first interrupt.
>>> +      */
>>> +     execlists->csb_head = GEN8_CSB_ENTRIES - 1;
>>> +     WRITE_ONCE(*execlists->csb_write, (GEN8_CSB_ENTRIES - 1) | 0xff << 16);
>>
>> Use _MASKED_FIELD and GEN8_CSB_WRITE_PTR_MASK?
> 
> Hah, there goes my attempt to kill off unused magic.

At least _MASKED_FIELD makes it clearer.

But the u8 trick is still evil since here you even explicitly do a fake 
masked write on hwsp. Ugly and evil. How about storing 
execlists->csb_write_default at init time and applying it here?

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH 5/9] drm/i915/execlists: Unify CSB access pointers
  2018-06-28 10:58       ` Tvrtko Ursulin
@ 2018-06-28 11:05         ` Chris Wilson
  0 siblings, 0 replies; 34+ messages in thread
From: Chris Wilson @ 2018-06-28 11:05 UTC (permalink / raw)
  To: Tvrtko Ursulin, intel-gfx

Quoting Tvrtko Ursulin (2018-06-28 11:58:26)
> On 28/06/2018 11:10, Chris Wilson wrote:
> > Quoting Tvrtko Ursulin (2018-06-28 11:02:17)
> >>
> >> On 27/06/2018 22:07, Chris Wilson wrote:
> >>> +     GEM_TRACE("%s cs-irq head=%d, tail=%d\n", engine->name, head, tail);
> >>> +     if (unlikely(head == tail))
> >>> +             return;
> >>>    
> >>> -             head = readl(i915->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)));
> >>> -             tail = GEN8_CSB_WRITE_PTR(head);
> >>> -             head = GEN8_CSB_READ_PTR(head);
> >>> -             execlists->csb_head = head;
> >>> -     } else {
> >>> -             const int write_idx =
> >>> -                     intel_hws_csb_write_index(i915) -
> >>> -                     I915_HWS_CSB_BUF0_INDEX;
> >>> +     rmb(); /* Hopefully paired with a wmb() in HW */
> >>>    
> >>> -             head = execlists->csb_head;
> >>> -             tail = READ_ONCE(buf[write_idx]);
> >>> -             rmb(); /* Hopefully paired with a wmb() in HW */
> >>> -     }
> >>> -     GEM_TRACE("%s cs-irq head=%d [%d%s], tail=%d [%d%s]\n",
> >>> -               engine->name,
> >>> -               head, GEN8_CSB_READ_PTR(readl(i915->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)))), fw ? "" : "?",
> >>> -               tail, GEN8_CSB_WRITE_PTR(readl(i915->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)))), fw ? "" : "?");
> >>> -
> >>> -     while (head != tail) {
> >>> +     do {
> >>
> >> Why convert while to do-while? Early unlikely return above handles the
> >> void process_csb calls? Would the same effect happen if you put unlikely
> >> in the while (head != tail) condition and would be simpler?
> > 
> > The earlier return to circumvent the lfence.
> 
> Oh that one.. so why it is safe to compare head and tail before the rmb 
> since we need the rmb afterwards?

There's a direct data dependency in the control flow of using the
volatile read from HWSP, but later on there is no data dependencies
between the write pointer we've "already" used and the rest of the CSB[]
we want to pull from HWSP. So while it seems unlikely, without that
lfence those later reads could be performed before the test (but the
test itself can't be performed before the read!). And sadly we can't get
away with our older read_barrier(), which for a long time I thought was
sufficient to order the read of the write pointer before the reads of
CSB[].
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 34+ messages in thread

* [PATCH v2] drm/i915/execlists: Unify CSB access pointers
  2018-06-27 21:07 ` [PATCH 5/9] drm/i915/execlists: Unify CSB access pointers Chris Wilson
  2018-06-28 10:02   ` Tvrtko Ursulin
@ 2018-06-28 11:13   ` Chris Wilson
  2018-06-28 11:21     ` Tvrtko Ursulin
  1 sibling, 1 reply; 34+ messages in thread
From: Chris Wilson @ 2018-06-28 11:13 UTC (permalink / raw)
  To: intel-gfx

Following the removal of the last workarounds, the only CSB mmio access
is for the old vGPU interface. The mmio registers presented by vGPU do
not require forcewake and can be treated as ordinary volatile memory,
i.e. they behave just like the HWSP access just at a different location.
We can reduce the CSB access to a set of read/write/buffer pointers and
treat the various paths identically and not worry about forcewake.
(Forcewake is nightmare for worstcase latency, and we want to process
this all with irqsoff -- no latency allowed!)

v2: Comments, comments, comments. Well, 2 bonus comments.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 drivers/gpu/drm/i915/intel_engine_cs.c  |  12 ---
 drivers/gpu/drm/i915/intel_lrc.c        | 133 ++++++++++++------------
 drivers/gpu/drm/i915/intel_ringbuffer.h |  23 ++--
 3 files changed, 82 insertions(+), 86 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c
index d3264bd6e9dc..7209c22798e6 100644
--- a/drivers/gpu/drm/i915/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/intel_engine_cs.c
@@ -25,7 +25,6 @@
 #include <drm/drm_print.h>
 
 #include "i915_drv.h"
-#include "i915_vgpu.h"
 #include "intel_ringbuffer.h"
 #include "intel_lrc.h"
 
@@ -456,21 +455,10 @@ static void intel_engine_init_batch_pool(struct intel_engine_cs *engine)
 	i915_gem_batch_pool_init(&engine->batch_pool, engine);
 }
 
-static bool csb_force_mmio(struct drm_i915_private *i915)
-{
-	/* Older GVT emulation depends upon intercepting CSB mmio */
-	if (intel_vgpu_active(i915) && !intel_vgpu_has_hwsp_emulation(i915))
-		return true;
-
-	return false;
-}
-
 static void intel_engine_init_execlist(struct intel_engine_cs *engine)
 {
 	struct intel_engine_execlists * const execlists = &engine->execlists;
 
-	execlists->csb_use_mmio = csb_force_mmio(engine->i915);
-
 	execlists->port_mask = 1;
 	BUILD_BUG_ON_NOT_POWER_OF_2(execlists_num_ports(execlists));
 	GEM_BUG_ON(execlists_num_ports(execlists) > EXECLIST_MAX_PORTS);
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 91656eb2f2db..8531a5b6f6ff 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -137,6 +137,7 @@
 #include <drm/i915_drm.h>
 #include "i915_drv.h"
 #include "i915_gem_render_state.h"
+#include "i915_vgpu.h"
 #include "intel_lrc_reg.h"
 #include "intel_mocs.h"
 #include "intel_workarounds.h"
@@ -953,44 +954,40 @@ static void process_csb(struct intel_engine_cs *engine)
 {
 	struct intel_engine_execlists * const execlists = &engine->execlists;
 	struct execlist_port *port = execlists->port;
-	struct drm_i915_private *i915 = engine->i915;
-
-	/* The HWSP contains a (cacheable) mirror of the CSB */
-	const u32 *buf =
-		&engine->status_page.page_addr[I915_HWS_CSB_BUF0_INDEX];
-	unsigned int head, tail;
-	bool fw = false;
+	const u32 * const buf = execlists->csb_status;
+	u8 head, tail;
 
 	/* Clear before reading to catch new interrupts */
 	clear_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted);
 	smp_mb__after_atomic();
 
-	if (unlikely(execlists->csb_use_mmio)) {
-		intel_uncore_forcewake_get(i915, execlists->fw_domains);
-		fw = true;
-
-		buf = (u32 * __force)
-			(i915->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_BUF_LO(engine, 0)));
-
-		head = readl(i915->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)));
-		tail = GEN8_CSB_WRITE_PTR(head);
-		head = GEN8_CSB_READ_PTR(head);
-		execlists->csb_head = head;
-	} else {
-		const int write_idx =
-			intel_hws_csb_write_index(i915) -
-			I915_HWS_CSB_BUF0_INDEX;
+	/*
+	 * Note that csb_write, csb_status may be either in HWSP or mmio.
+	 * When reading from the csb_write mmio register, we have to be
+	 * careful to only use the GEN8_CSB_WRITE_PTR portion, which is
+	 * the low 4bits. As it happens we know the next 4bits are always
+	 * zero and so we can simply masked off the low u8 of the register
+	 * and treat it identically to reading from the HWSP (without having
+	 * to use explicit shifting and masking, and probably bifurcating
+	 * the code to handle the legacy mmio read).
+	 */
+	head = execlists->csb_head;
+	tail = READ_ONCE(*execlists->csb_write);
+	GEM_TRACE("%s cs-irq head=%d, tail=%d\n", engine->name, head, tail);
+	if (unlikely(head == tail))
+		return;
 
-		head = execlists->csb_head;
-		tail = READ_ONCE(buf[write_idx]);
-		rmb(); /* Hopefully paired with a wmb() in HW */
-	}
-	GEM_TRACE("%s cs-irq head=%d [%d%s], tail=%d [%d%s]\n",
-		  engine->name,
-		  head, GEN8_CSB_READ_PTR(readl(i915->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)))), fw ? "" : "?",
-		  tail, GEN8_CSB_WRITE_PTR(readl(i915->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)))), fw ? "" : "?");
+	/*
+	 * Hopefully paired with a wmb() in HW!
+	 *
+	 * We must complete the read of the write pointer before any reads
+	 * from the CSB, so that we do not see stale values. Without an rmb
+	 * (lfence) the HW may speculatively perform the CSB[] reads *before*
+	 * we perform the READ_ONCE(*csb_write).
+	 */
+	rmb();
 
-	while (head != tail) {
+	do {
 		struct i915_request *rq;
 		unsigned int status;
 		unsigned int count;
@@ -1016,12 +1013,12 @@ static void process_csb(struct intel_engine_cs *engine)
 		 * status notifier.
 		 */
 
-		status = READ_ONCE(buf[2 * head]); /* maybe mmio! */
 		GEM_TRACE("%s csb[%d]: status=0x%08x:0x%08x, active=0x%x\n",
 			  engine->name, head,
-			  status, buf[2*head + 1],
+			  buf[2 * head + 0], buf[2 * head + 1],
 			  execlists->active);
 
+		status = buf[2 * head];
 		if (status & (GEN8_CTX_STATUS_IDLE_ACTIVE |
 			      GEN8_CTX_STATUS_PREEMPTED))
 			execlists_set_active(execlists,
@@ -1103,16 +1100,11 @@ static void process_csb(struct intel_engine_cs *engine)
 		} else {
 			port_set(port, port_pack(rq, count));
 		}
-	}
-
-	if (head != execlists->csb_head) {
-		execlists->csb_head = head;
-		writel(_MASKED_FIELD(GEN8_CSB_READ_PTR_MASK, head << 8),
-		       i915->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)));
-	}
+	} while (head != tail);
 
-	if (unlikely(fw))
-		intel_uncore_forcewake_put(i915, execlists->fw_domains);
+	writel(_MASKED_FIELD(GEN8_CSB_READ_PTR_MASK, head << 8),
+	       execlists->csb_read);
+	execlists->csb_head = head;
 }
 
 /*
@@ -2430,28 +2422,11 @@ logical_ring_default_irqs(struct intel_engine_cs *engine)
 static void
 logical_ring_setup(struct intel_engine_cs *engine)
 {
-	struct drm_i915_private *dev_priv = engine->i915;
-	enum forcewake_domains fw_domains;
-
 	intel_engine_setup_common(engine);
 
 	/* Intentionally left blank. */
 	engine->buffer = NULL;
 
-	fw_domains = intel_uncore_forcewake_for_reg(dev_priv,
-						    RING_ELSP(engine),
-						    FW_REG_WRITE);
-
-	fw_domains |= intel_uncore_forcewake_for_reg(dev_priv,
-						     RING_CONTEXT_STATUS_PTR(engine),
-						     FW_REG_READ | FW_REG_WRITE);
-
-	fw_domains |= intel_uncore_forcewake_for_reg(dev_priv,
-						     RING_CONTEXT_STATUS_BUF_BASE(engine),
-						     FW_REG_READ);
-
-	engine->execlists.fw_domains = fw_domains;
-
 	tasklet_init(&engine->execlists.tasklet,
 		     execlists_submission_tasklet, (unsigned long)engine);
 
@@ -2459,34 +2434,56 @@ logical_ring_setup(struct intel_engine_cs *engine)
 	logical_ring_default_irqs(engine);
 }
 
+static bool csb_force_mmio(struct drm_i915_private *i915)
+{
+	/* Older GVT emulation depends upon intercepting CSB mmio */
+	return intel_vgpu_active(i915) && !intel_vgpu_has_hwsp_emulation(i915);
+}
+
 static int logical_ring_init(struct intel_engine_cs *engine)
 {
+	struct drm_i915_private *i915 = engine->i915;
+	struct intel_engine_execlists * const execlists = &engine->execlists;
 	int ret;
 
 	ret = intel_engine_init_common(engine);
 	if (ret)
 		goto error;
 
-	if (HAS_LOGICAL_RING_ELSQ(engine->i915)) {
-		engine->execlists.submit_reg = engine->i915->regs +
+	if (HAS_LOGICAL_RING_ELSQ(i915)) {
+		execlists->submit_reg = i915->regs +
 			i915_mmio_reg_offset(RING_EXECLIST_SQ_CONTENTS(engine));
-		engine->execlists.ctrl_reg = engine->i915->regs +
+		execlists->ctrl_reg = i915->regs +
 			i915_mmio_reg_offset(RING_EXECLIST_CONTROL(engine));
 	} else {
-		engine->execlists.submit_reg = engine->i915->regs +
+		execlists->submit_reg = i915->regs +
 			i915_mmio_reg_offset(RING_ELSP(engine));
 	}
 
-	engine->execlists.preempt_complete_status = ~0u;
-	if (engine->i915->preempt_context) {
+	execlists->preempt_complete_status = ~0u;
+	if (i915->preempt_context) {
 		struct intel_context *ce =
-			to_intel_context(engine->i915->preempt_context, engine);
+			to_intel_context(i915->preempt_context, engine);
 
-		engine->execlists.preempt_complete_status =
+		execlists->preempt_complete_status =
 			upper_32_bits(ce->lrc_desc);
 	}
 
-	engine->execlists.csb_head = GEN8_CSB_ENTRIES - 1;
+	execlists->csb_head = GEN8_CSB_ENTRIES - 1;
+	execlists->csb_read =
+		i915->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine));
+	if (csb_force_mmio(i915)) {
+		execlists->csb_status = (u32 __force *)
+			(i915->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_BUF_LO(engine, 0)));
+
+		execlists->csb_write = (u32 __force *)execlists->csb_read;
+	} else {
+		execlists->csb_status =
+			&engine->status_page.page_addr[I915_HWS_CSB_BUF0_INDEX];
+
+		execlists->csb_write =
+			&engine->status_page.page_addr[intel_hws_csb_write_index(i915)];
+	}
 
 	return 0;
 
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index 78f01a35823a..970dbb3c9812 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -300,19 +300,30 @@ struct intel_engine_execlists {
 	struct rb_node *first;
 
 	/**
-	 * @fw_domains: forcewake domains for irq tasklet
+	 * @csb_head: context status buffer head
 	 */
-	unsigned int fw_domains;
+	unsigned int csb_head;
 
 	/**
-	 * @csb_head: context status buffer head
+	 * @csb_read: control register for Context Switch buffer
+	 *
+	 * Note this register is always in mmio.
 	 */
-	unsigned int csb_head;
+	u32 __iomem *csb_read;
 
 	/**
-	 * @csb_use_mmio: access csb through mmio, instead of hwsp
+	 * @csb_write: control register for Context Switch buffer
+	 *
+	 * Note this register may be either mmio or HWSP shadow.
+	 */
+	u32 *csb_write;
+
+	/**
+	 * @csb_status: status array for Context Switch buffer
+	 *
+	 * Note these register may be either mmio or HWSP shadow.
 	 */
-	bool csb_use_mmio;
+	u32 *csb_status;
 
 	/**
 	 * @preempt_complete_status: expected CSB upon completing preemption
-- 
2.18.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 34+ messages in thread

* ✗ Fi.CI.BAT: failure for series starting with [1/9] drm/i915: Drop posting reads to flush master interrupts (rev3)
  2018-06-27 21:07 [PATCH 1/9] drm/i915: Drop posting reads to flush master interrupts Chris Wilson
                   ` (12 preceding siblings ...)
  2018-06-28 11:00 ` ✓ Fi.CI.BAT: success " Patchwork
@ 2018-06-28 11:20 ` Patchwork
  2018-06-28 12:09 ` ✗ Fi.CI.BAT: failure for series starting with [1/9] drm/i915: Drop posting reads to flush master interrupts (rev4) Patchwork
  14 siblings, 0 replies; 34+ messages in thread
From: Patchwork @ 2018-06-28 11:20 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

== Series Details ==

Series: series starting with [1/9] drm/i915: Drop posting reads to flush master interrupts (rev3)
URL   : https://patchwork.freedesktop.org/series/45531/
State : failure

== Summary ==

Applying: drm/i915: Drop posting reads to flush master interrupts
Applying: drm/i915/execlists: Pull submit after dequeue under timeline lock
Applying: drm/i915/execlists: Pull CSB reset under the timeline.lock
Applying: drm/i915/execlists: Process one CSB update at a time
Applying: drm/i915/execlists: Unify CSB access pointers
Applying: drm/i915/execlists: Reset CSB write pointer after reset
Applying: drm/i915/execlists: Stop storing the CSB read pointer in the mmio register
Applying: drm/i915/execlists: Trust the CSB
error: sha1 information is lacking or useless (drivers/gpu/drm/i915/i915_irq.c).
error: could not build fake ancestor
Patch failed at 0008 drm/i915/execlists: Trust the CSB
Use 'git am --show-current-patch' to see the failed patch
When you have resolved this problem, run "git am --continue".
If you prefer to skip this patch, run "git am --skip" instead.
To restore the original branch and stop patching, run "git am --abort".

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH v2] drm/i915/execlists: Unify CSB access pointers
  2018-06-28 11:13   ` [PATCH v2] " Chris Wilson
@ 2018-06-28 11:21     ` Tvrtko Ursulin
  0 siblings, 0 replies; 34+ messages in thread
From: Tvrtko Ursulin @ 2018-06-28 11:21 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 28/06/2018 12:13, Chris Wilson wrote:
> Following the removal of the last workarounds, the only CSB mmio access
> is for the old vGPU interface. The mmio registers presented by vGPU do
> not require forcewake and can be treated as ordinary volatile memory,
> i.e. they behave just like the HWSP access just at a different location.
> We can reduce the CSB access to a set of read/write/buffer pointers and
> treat the various paths identically and not worry about forcewake.
> (Forcewake is nightmare for worstcase latency, and we want to process
> this all with irqsoff -- no latency allowed!)
> 
> v2: Comments, comments, comments. Well, 2 bonus comments.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> ---
>   drivers/gpu/drm/i915/intel_engine_cs.c  |  12 ---
>   drivers/gpu/drm/i915/intel_lrc.c        | 133 ++++++++++++------------
>   drivers/gpu/drm/i915/intel_ringbuffer.h |  23 ++--
>   3 files changed, 82 insertions(+), 86 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c
> index d3264bd6e9dc..7209c22798e6 100644
> --- a/drivers/gpu/drm/i915/intel_engine_cs.c
> +++ b/drivers/gpu/drm/i915/intel_engine_cs.c
> @@ -25,7 +25,6 @@
>   #include <drm/drm_print.h>
>   
>   #include "i915_drv.h"
> -#include "i915_vgpu.h"
>   #include "intel_ringbuffer.h"
>   #include "intel_lrc.h"
>   
> @@ -456,21 +455,10 @@ static void intel_engine_init_batch_pool(struct intel_engine_cs *engine)
>   	i915_gem_batch_pool_init(&engine->batch_pool, engine);
>   }
>   
> -static bool csb_force_mmio(struct drm_i915_private *i915)
> -{
> -	/* Older GVT emulation depends upon intercepting CSB mmio */
> -	if (intel_vgpu_active(i915) && !intel_vgpu_has_hwsp_emulation(i915))
> -		return true;
> -
> -	return false;
> -}
> -
>   static void intel_engine_init_execlist(struct intel_engine_cs *engine)
>   {
>   	struct intel_engine_execlists * const execlists = &engine->execlists;
>   
> -	execlists->csb_use_mmio = csb_force_mmio(engine->i915);
> -
>   	execlists->port_mask = 1;
>   	BUILD_BUG_ON_NOT_POWER_OF_2(execlists_num_ports(execlists));
>   	GEM_BUG_ON(execlists_num_ports(execlists) > EXECLIST_MAX_PORTS);
> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> index 91656eb2f2db..8531a5b6f6ff 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/intel_lrc.c
> @@ -137,6 +137,7 @@
>   #include <drm/i915_drm.h>
>   #include "i915_drv.h"
>   #include "i915_gem_render_state.h"
> +#include "i915_vgpu.h"
>   #include "intel_lrc_reg.h"
>   #include "intel_mocs.h"
>   #include "intel_workarounds.h"
> @@ -953,44 +954,40 @@ static void process_csb(struct intel_engine_cs *engine)
>   {
>   	struct intel_engine_execlists * const execlists = &engine->execlists;
>   	struct execlist_port *port = execlists->port;
> -	struct drm_i915_private *i915 = engine->i915;
> -
> -	/* The HWSP contains a (cacheable) mirror of the CSB */
> -	const u32 *buf =
> -		&engine->status_page.page_addr[I915_HWS_CSB_BUF0_INDEX];
> -	unsigned int head, tail;
> -	bool fw = false;
> +	const u32 * const buf = execlists->csb_status;
> +	u8 head, tail;
>   
>   	/* Clear before reading to catch new interrupts */
>   	clear_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted);
>   	smp_mb__after_atomic();
>   
> -	if (unlikely(execlists->csb_use_mmio)) {
> -		intel_uncore_forcewake_get(i915, execlists->fw_domains);
> -		fw = true;
> -
> -		buf = (u32 * __force)
> -			(i915->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_BUF_LO(engine, 0)));
> -
> -		head = readl(i915->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)));
> -		tail = GEN8_CSB_WRITE_PTR(head);
> -		head = GEN8_CSB_READ_PTR(head);
> -		execlists->csb_head = head;
> -	} else {
> -		const int write_idx =
> -			intel_hws_csb_write_index(i915) -
> -			I915_HWS_CSB_BUF0_INDEX;
> +	/*
> +	 * Note that csb_write, csb_status may be either in HWSP or mmio.
> +	 * When reading from the csb_write mmio register, we have to be
> +	 * careful to only use the GEN8_CSB_WRITE_PTR portion, which is
> +	 * the low 4bits. As it happens we know the next 4bits are always
> +	 * zero and so we can simply masked off the low u8 of the register
> +	 * and treat it identically to reading from the HWSP (without having
> +	 * to use explicit shifting and masking, and probably bifurcating
> +	 * the code to handle the legacy mmio read).
> +	 */
> +	head = execlists->csb_head;
> +	tail = READ_ONCE(*execlists->csb_write);
> +	GEM_TRACE("%s cs-irq head=%d, tail=%d\n", engine->name, head, tail);
> +	if (unlikely(head == tail))
> +		return;
>   
> -		head = execlists->csb_head;
> -		tail = READ_ONCE(buf[write_idx]);
> -		rmb(); /* Hopefully paired with a wmb() in HW */
> -	}
> -	GEM_TRACE("%s cs-irq head=%d [%d%s], tail=%d [%d%s]\n",
> -		  engine->name,
> -		  head, GEN8_CSB_READ_PTR(readl(i915->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)))), fw ? "" : "?",
> -		  tail, GEN8_CSB_WRITE_PTR(readl(i915->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)))), fw ? "" : "?");
> +	/*
> +	 * Hopefully paired with a wmb() in HW!
> +	 *
> +	 * We must complete the read of the write pointer before any reads
> +	 * from the CSB, so that we do not see stale values. Without an rmb
> +	 * (lfence) the HW may speculatively perform the CSB[] reads *before*
> +	 * we perform the READ_ONCE(*csb_write).
> +	 */
> +	rmb();
>   
> -	while (head != tail) {
> +	do {
>   		struct i915_request *rq;
>   		unsigned int status;
>   		unsigned int count;
> @@ -1016,12 +1013,12 @@ static void process_csb(struct intel_engine_cs *engine)
>   		 * status notifier.
>   		 */
>   
> -		status = READ_ONCE(buf[2 * head]); /* maybe mmio! */
>   		GEM_TRACE("%s csb[%d]: status=0x%08x:0x%08x, active=0x%x\n",
>   			  engine->name, head,
> -			  status, buf[2*head + 1],
> +			  buf[2 * head + 0], buf[2 * head + 1],
>   			  execlists->active);
>   
> +		status = buf[2 * head];
>   		if (status & (GEN8_CTX_STATUS_IDLE_ACTIVE |
>   			      GEN8_CTX_STATUS_PREEMPTED))
>   			execlists_set_active(execlists,
> @@ -1103,16 +1100,11 @@ static void process_csb(struct intel_engine_cs *engine)
>   		} else {
>   			port_set(port, port_pack(rq, count));
>   		}
> -	}
> -
> -	if (head != execlists->csb_head) {
> -		execlists->csb_head = head;
> -		writel(_MASKED_FIELD(GEN8_CSB_READ_PTR_MASK, head << 8),
> -		       i915->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)));
> -	}
> +	} while (head != tail);
>   
> -	if (unlikely(fw))
> -		intel_uncore_forcewake_put(i915, execlists->fw_domains);
> +	writel(_MASKED_FIELD(GEN8_CSB_READ_PTR_MASK, head << 8),
> +	       execlists->csb_read);
> +	execlists->csb_head = head;
>   }
>   
>   /*
> @@ -2430,28 +2422,11 @@ logical_ring_default_irqs(struct intel_engine_cs *engine)
>   static void
>   logical_ring_setup(struct intel_engine_cs *engine)
>   {
> -	struct drm_i915_private *dev_priv = engine->i915;
> -	enum forcewake_domains fw_domains;
> -
>   	intel_engine_setup_common(engine);
>   
>   	/* Intentionally left blank. */
>   	engine->buffer = NULL;
>   
> -	fw_domains = intel_uncore_forcewake_for_reg(dev_priv,
> -						    RING_ELSP(engine),
> -						    FW_REG_WRITE);
> -
> -	fw_domains |= intel_uncore_forcewake_for_reg(dev_priv,
> -						     RING_CONTEXT_STATUS_PTR(engine),
> -						     FW_REG_READ | FW_REG_WRITE);
> -
> -	fw_domains |= intel_uncore_forcewake_for_reg(dev_priv,
> -						     RING_CONTEXT_STATUS_BUF_BASE(engine),
> -						     FW_REG_READ);
> -
> -	engine->execlists.fw_domains = fw_domains;
> -
>   	tasklet_init(&engine->execlists.tasklet,
>   		     execlists_submission_tasklet, (unsigned long)engine);
>   
> @@ -2459,34 +2434,56 @@ logical_ring_setup(struct intel_engine_cs *engine)
>   	logical_ring_default_irqs(engine);
>   }
>   
> +static bool csb_force_mmio(struct drm_i915_private *i915)
> +{
> +	/* Older GVT emulation depends upon intercepting CSB mmio */
> +	return intel_vgpu_active(i915) && !intel_vgpu_has_hwsp_emulation(i915);
> +}
> +
>   static int logical_ring_init(struct intel_engine_cs *engine)
>   {
> +	struct drm_i915_private *i915 = engine->i915;
> +	struct intel_engine_execlists * const execlists = &engine->execlists;
>   	int ret;
>   
>   	ret = intel_engine_init_common(engine);
>   	if (ret)
>   		goto error;
>   
> -	if (HAS_LOGICAL_RING_ELSQ(engine->i915)) {
> -		engine->execlists.submit_reg = engine->i915->regs +
> +	if (HAS_LOGICAL_RING_ELSQ(i915)) {
> +		execlists->submit_reg = i915->regs +
>   			i915_mmio_reg_offset(RING_EXECLIST_SQ_CONTENTS(engine));
> -		engine->execlists.ctrl_reg = engine->i915->regs +
> +		execlists->ctrl_reg = i915->regs +
>   			i915_mmio_reg_offset(RING_EXECLIST_CONTROL(engine));
>   	} else {
> -		engine->execlists.submit_reg = engine->i915->regs +
> +		execlists->submit_reg = i915->regs +
>   			i915_mmio_reg_offset(RING_ELSP(engine));
>   	}
>   
> -	engine->execlists.preempt_complete_status = ~0u;
> -	if (engine->i915->preempt_context) {
> +	execlists->preempt_complete_status = ~0u;
> +	if (i915->preempt_context) {
>   		struct intel_context *ce =
> -			to_intel_context(engine->i915->preempt_context, engine);
> +			to_intel_context(i915->preempt_context, engine);
>   
> -		engine->execlists.preempt_complete_status =
> +		execlists->preempt_complete_status =
>   			upper_32_bits(ce->lrc_desc);
>   	}
>   
> -	engine->execlists.csb_head = GEN8_CSB_ENTRIES - 1;
> +	execlists->csb_head = GEN8_CSB_ENTRIES - 1;
> +	execlists->csb_read =
> +		i915->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine));
> +	if (csb_force_mmio(i915)) {
> +		execlists->csb_status = (u32 __force *)
> +			(i915->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_BUF_LO(engine, 0)));
> +
> +		execlists->csb_write = (u32 __force *)execlists->csb_read;
> +	} else {
> +		execlists->csb_status =
> +			&engine->status_page.page_addr[I915_HWS_CSB_BUF0_INDEX];
> +
> +		execlists->csb_write =
> +			&engine->status_page.page_addr[intel_hws_csb_write_index(i915)];
> +	}
>   
>   	return 0;
>   
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
> index 78f01a35823a..970dbb3c9812 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.h
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
> @@ -300,19 +300,30 @@ struct intel_engine_execlists {
>   	struct rb_node *first;
>   
>   	/**
> -	 * @fw_domains: forcewake domains for irq tasklet
> +	 * @csb_head: context status buffer head
>   	 */
> -	unsigned int fw_domains;
> +	unsigned int csb_head;
>   
>   	/**
> -	 * @csb_head: context status buffer head
> +	 * @csb_read: control register for Context Switch buffer
> +	 *
> +	 * Note this register is always in mmio.
>   	 */
> -	unsigned int csb_head;
> +	u32 __iomem *csb_read;
>   
>   	/**
> -	 * @csb_use_mmio: access csb through mmio, instead of hwsp
> +	 * @csb_write: control register for Context Switch buffer
> +	 *
> +	 * Note this register may be either mmio or HWSP shadow.
> +	 */
> +	u32 *csb_write;
> +
> +	/**
> +	 * @csb_status: status array for Context Switch buffer
> +	 *
> +	 * Note these register may be either mmio or HWSP shadow.
>   	 */
> -	bool csb_use_mmio;
> +	u32 *csb_status;
>   
>   	/**
>   	 * @preempt_complete_status: expected CSB upon completing preemption
> 

Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH 8/9] drm/i915/execlists: Trust the CSB
  2018-06-27 21:07 ` [PATCH 8/9] drm/i915/execlists: Trust the CSB Chris Wilson
@ 2018-06-28 11:29   ` Tvrtko Ursulin
  2018-06-28 12:03     ` Chris Wilson
  0 siblings, 1 reply; 34+ messages in thread
From: Tvrtko Ursulin @ 2018-06-28 11:29 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 27/06/2018 22:07, Chris Wilson wrote:
> Now that we use the CSB stored in the CPU friendly HWSP, we do not need
> to track interrupts for when the mmio CSB registers are valid and can
> just check where we read up to last from the cached HWSP. This means we
> can forgo the atomic bit tracking from interrupt, and in the next patch
> it means we can check the CSB at any time.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>   drivers/gpu/drm/i915/i915_irq.c         | 11 ++-----
>   drivers/gpu/drm/i915/intel_engine_cs.c  |  8 ++----
>   drivers/gpu/drm/i915/intel_lrc.c        | 38 ++++++-------------------
>   drivers/gpu/drm/i915/intel_ringbuffer.h |  1 -
>   4 files changed, 14 insertions(+), 44 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
> index 3702992f9f75..44fb11ca3cab 100644
> --- a/drivers/gpu/drm/i915/i915_irq.c
> +++ b/drivers/gpu/drm/i915/i915_irq.c
> @@ -1469,15 +1469,10 @@ static void snb_gt_irq_handler(struct drm_i915_private *dev_priv,
>   static void
>   gen8_cs_irq_handler(struct intel_engine_cs *engine, u32 iir)
>   {
> -	struct intel_engine_execlists * const execlists = &engine->execlists;
>   	bool tasklet = false;
>   
> -	if (iir & GT_CONTEXT_SWITCH_INTERRUPT) {
> -		if (READ_ONCE(engine->execlists.active)) {
> -			set_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted);
> -			tasklet = true;
> -		}
> -	}
> +	if (iir & GT_CONTEXT_SWITCH_INTERRUPT)
> +		tasklet = true;
>   
>   	if (iir & GT_RENDER_USER_INTERRUPT) {
>   		notify_ring(engine);
> @@ -1485,7 +1480,7 @@ gen8_cs_irq_handler(struct intel_engine_cs *engine, u32 iir)
>   	}
>   
>   	if (tasklet)
> -		tasklet_hi_schedule(&execlists->tasklet);
> +		tasklet_hi_schedule(&engine->execlists.tasklet);
>   }
>   
>   static void gen8_gt_irq_ack(struct drm_i915_private *i915,
> diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c
> index 7209c22798e6..ace93958689e 100644
> --- a/drivers/gpu/drm/i915/intel_engine_cs.c
> +++ b/drivers/gpu/drm/i915/intel_engine_cs.c
> @@ -1353,12 +1353,10 @@ static void intel_engine_print_registers(const struct intel_engine_cs *engine,
>   		ptr = I915_READ(RING_CONTEXT_STATUS_PTR(engine));
>   		read = GEN8_CSB_READ_PTR(ptr);
>   		write = GEN8_CSB_WRITE_PTR(ptr);
> -		drm_printf(m, "\tExeclist CSB read %d [%d cached], write %d [%d from hws], interrupt posted? %s, tasklet queued? %s (%s)\n",
> +		drm_printf(m, "\tExeclist CSB read %d [%d cached], write %d [%d from hws], tasklet queued? %s (%s)\n",
>   			   read, execlists->csb_head,
>   			   write,
>   			   intel_read_status_page(engine, intel_hws_csb_write_index(engine->i915)),
> -			   yesno(test_bit(ENGINE_IRQ_EXECLIST,
> -					  &engine->irq_posted)),
>   			   yesno(test_bit(TASKLET_STATE_SCHED,
>   					  &engine->execlists.tasklet.state)),
>   			   enableddisabled(!atomic_read(&engine->execlists.tasklet.count)));
> @@ -1570,11 +1568,9 @@ void intel_engine_dump(struct intel_engine_cs *engine,
>   	spin_unlock(&b->rb_lock);
>   	local_irq_restore(flags);
>   
> -	drm_printf(m, "IRQ? 0x%lx (breadcrumbs? %s) (execlists? %s)\n",
> +	drm_printf(m, "IRQ? 0x%lx (breadcrumbs? %s)\n",
>   		   engine->irq_posted,
>   		   yesno(test_bit(ENGINE_IRQ_BREADCRUMB,
> -				  &engine->irq_posted)),
> -		   yesno(test_bit(ENGINE_IRQ_EXECLIST,
>   				  &engine->irq_posted)));
>   
>   	drm_printf(m, "HWSP:\n");
> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> index a6268103663f..87dd8ee117c8 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/intel_lrc.c
> @@ -874,14 +874,6 @@ static void reset_irq(struct intel_engine_cs *engine)
>   	smp_store_mb(engine->execlists.active, 0);
>   
>   	clear_gtiir(engine);
> -
> -	/*
> -	 * The port is checked prior to scheduling a tasklet, but
> -	 * just in case we have suspended the tasklet to do the
> -	 * wedging make sure that when it wakes, it decides there
> -	 * is no work to do by clearing the irq_posted bit.
> -	 */
> -	clear_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted);
>   }
>   
>   static void reset_csb_pointers(struct intel_engine_execlists *execlists)
> @@ -972,10 +964,6 @@ static void process_csb(struct intel_engine_cs *engine)
>   	const u32 * const buf = execlists->csb_status;
>   	u8 head, tail;
>   
> -	/* Clear before reading to catch new interrupts */
> -	clear_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted);
> -	smp_mb__after_atomic();
> -
>   	/* Note that csb_write, csb_status may be either in HWSP or mmio */
>   	head = execlists->csb_head;
>   	tail = READ_ONCE(*execlists->csb_write);
> @@ -1111,11 +1099,10 @@ static void execlists_submission_tasklet(unsigned long data)
>   {
>   	struct intel_engine_cs * const engine = (struct intel_engine_cs *)data;
>   
> -	GEM_TRACE("%s awake?=%d, active=%x, irq-posted?=%d\n",
> +	GEM_TRACE("%s awake?=%d, active=%x\n",
>   		  engine->name,
>   		  engine->i915->gt.awake,
> -		  engine->execlists.active,
> -		  test_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted));
> +		  engine->execlists.active);
>   
>   	/*
>   	 * We can skip acquiring intel_runtime_pm_get() here as it was taken
> @@ -1127,14 +1114,7 @@ static void execlists_submission_tasklet(unsigned long data)
>   	 */
>   	GEM_BUG_ON(!engine->i915->gt.awake);
>   
> -	/*
> -	 * Prefer doing test_and_clear_bit() as a two stage operation to avoid
> -	 * imposing the cost of a locked atomic transaction when submitting a
> -	 * new request (outside of the context-switch interrupt).
> -	 */
> -	if (test_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted))
> -		process_csb(engine);
> -
> +	process_csb(engine);
>   	if (!execlists_is_active(&engine->execlists, EXECLISTS_ACTIVE_PREEMPT))
>   		execlists_dequeue(engine);
>   }
> @@ -1881,6 +1861,7 @@ execlists_reset_prepare(struct intel_engine_cs *engine)
>   {
>   	struct intel_engine_execlists * const execlists = &engine->execlists;
>   	struct i915_request *request, *active;
> +	unsigned long flags;
>   
>   	GEM_TRACE("%s\n", engine->name);
>   
> @@ -1902,8 +1883,9 @@ execlists_reset_prepare(struct intel_engine_cs *engine)
>   	 * and avoid blaming an innocent request if the stall was due to the
>   	 * preemption itself.
>   	 */
> -	if (test_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted))
> -		process_csb(engine);
> +	spin_lock_irqsave(&engine->timeline.lock, flags);
> +
> +	process_csb(engine);

I think taking the lock over process_csb belongs in the following patch.

>   
>   	/*
>   	 * The last active request can then be no later than the last request
> @@ -1913,15 +1895,12 @@ execlists_reset_prepare(struct intel_engine_cs *engine)
>   	active = NULL;
>   	request = port_request(execlists->port);
>   	if (request) {
> -		unsigned long flags;
> -
>   		/*
>   		 * Prevent the breadcrumb from advancing before we decide
>   		 * which request is currently active.
>   		 */
>   		intel_engine_stop_cs(engine);
>   
> -		spin_lock_irqsave(&engine->timeline.lock, flags);
>   		list_for_each_entry_from_reverse(request,
>   						 &engine->timeline.requests,
>   						 link) {
> @@ -1931,9 +1910,10 @@ execlists_reset_prepare(struct intel_engine_cs *engine)
>   
>   			active = request;
>   		}
> -		spin_unlock_irqrestore(&engine->timeline.lock, flags);
>   	}
>   
> +	spin_unlock_irqrestore(&engine->timeline.lock, flags);
> +
>   	return active;
>   }
>   
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
> index 5b92c5f03e1d..381c243bfc6f 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.h
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
> @@ -359,7 +359,6 @@ struct intel_engine_cs {
>   	atomic_t irq_count;
>   	unsigned long irq_posted;
>   #define ENGINE_IRQ_BREADCRUMB 0
> -#define ENGINE_IRQ_EXECLIST 1
>   
>   	/* Rather than have every client wait upon all user interrupts,
>   	 * with the herd waking after every interrupt and each doing the
> 

Otherwise looks OK.

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH 6/9] drm/i915/execlists: Reset CSB write pointer after reset
  2018-06-28 11:02       ` Tvrtko Ursulin
@ 2018-06-28 11:30         ` Chris Wilson
  2018-06-28 11:37           ` Tvrtko Ursulin
  0 siblings, 1 reply; 34+ messages in thread
From: Chris Wilson @ 2018-06-28 11:30 UTC (permalink / raw)
  To: Tvrtko Ursulin, intel-gfx

Quoting Tvrtko Ursulin (2018-06-28 12:02:30)
> 
> On 28/06/2018 11:17, Chris Wilson wrote:
> > Quoting Tvrtko Ursulin (2018-06-28 11:06:25)
> >>
> >> On 27/06/2018 22:07, Chris Wilson wrote:
> >>> On HW reset, the HW clears the write pointer (to 0). But since it also
> >>> writes its first CSB entry to slot 0, we need to reset the write pointer
> >>> back to the element before (so the first entry we read is 0).
> >>>
> >>> This is required for the next patch, where we trust the CSB completely!
> >>>
> >>> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> >>> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> >>> ---
> >>>    drivers/gpu/drm/i915/intel_lrc.c | 19 +++++++++++++++++--
> >>>    1 file changed, 17 insertions(+), 2 deletions(-)
> >>>
> >>> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> >>> index 368a8c74d11d..8b111a268697 100644
> >>> --- a/drivers/gpu/drm/i915/intel_lrc.c
> >>> +++ b/drivers/gpu/drm/i915/intel_lrc.c
> >>> @@ -884,6 +884,21 @@ static void reset_irq(struct intel_engine_cs *engine)
> >>>        clear_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted);
> >>>    }
> >>>    
> >>> +static void reset_csb_pointers(struct intel_engine_execlists *execlists)
> >>> +{
> >>> +     /*
> >>> +      * After a reset, the HW starts writing into CSB entry [0]. We
> >>> +      * therefore have to set our HEAD pointer back one entry so that
> >>> +      * the *first* entry we check is entry 0. To complicate this further,
> >>> +      * as we don't wait for the first interrupt after reset, we have to
> >>> +      * fake the HW write to point back to the last entry so that our
> >>> +      * inline comparison of our cached head position against the last HW
> >>> +      * write works even before the first interrupt.
> >>> +      */
> >>> +     execlists->csb_head = GEN8_CSB_ENTRIES - 1;
> >>> +     WRITE_ONCE(*execlists->csb_write, (GEN8_CSB_ENTRIES - 1) | 0xff << 16);
> >>
> >> Use _MASKED_FIELD and GEN8_CSB_WRITE_PTR_MASK?
> > 
> > Hah, there goes my attempt to kill off unused magic.
> 
> At least _MASKED_FIELD makes it clearer.
> 
> But the u8 trick is still evil since here you even explicitly do a fake 
> masked write on hwsp. Ugly and evil. How about storing 
> execlists->csb_write_default at init time and applying it here?

Honestly, I thought it made sense next to the READ_ONCE(*csb_write).

Could I just convince you with another comment pleading guilty to the
atrocities?
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH 6/9] drm/i915/execlists: Reset CSB write pointer after reset
  2018-06-28 11:30         ` Chris Wilson
@ 2018-06-28 11:37           ` Tvrtko Ursulin
  0 siblings, 0 replies; 34+ messages in thread
From: Tvrtko Ursulin @ 2018-06-28 11:37 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 28/06/2018 12:30, Chris Wilson wrote:
> Quoting Tvrtko Ursulin (2018-06-28 12:02:30)
>>
>> On 28/06/2018 11:17, Chris Wilson wrote:
>>> Quoting Tvrtko Ursulin (2018-06-28 11:06:25)
>>>>
>>>> On 27/06/2018 22:07, Chris Wilson wrote:
>>>>> On HW reset, the HW clears the write pointer (to 0). But since it also
>>>>> writes its first CSB entry to slot 0, we need to reset the write pointer
>>>>> back to the element before (so the first entry we read is 0).
>>>>>
>>>>> This is required for the next patch, where we trust the CSB completely!
>>>>>
>>>>> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
>>>>> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>>>>> ---
>>>>>     drivers/gpu/drm/i915/intel_lrc.c | 19 +++++++++++++++++--
>>>>>     1 file changed, 17 insertions(+), 2 deletions(-)
>>>>>
>>>>> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
>>>>> index 368a8c74d11d..8b111a268697 100644
>>>>> --- a/drivers/gpu/drm/i915/intel_lrc.c
>>>>> +++ b/drivers/gpu/drm/i915/intel_lrc.c
>>>>> @@ -884,6 +884,21 @@ static void reset_irq(struct intel_engine_cs *engine)
>>>>>         clear_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted);
>>>>>     }
>>>>>     
>>>>> +static void reset_csb_pointers(struct intel_engine_execlists *execlists)
>>>>> +{
>>>>> +     /*
>>>>> +      * After a reset, the HW starts writing into CSB entry [0]. We
>>>>> +      * therefore have to set our HEAD pointer back one entry so that
>>>>> +      * the *first* entry we check is entry 0. To complicate this further,
>>>>> +      * as we don't wait for the first interrupt after reset, we have to
>>>>> +      * fake the HW write to point back to the last entry so that our
>>>>> +      * inline comparison of our cached head position against the last HW
>>>>> +      * write works even before the first interrupt.
>>>>> +      */
>>>>> +     execlists->csb_head = GEN8_CSB_ENTRIES - 1;
>>>>> +     WRITE_ONCE(*execlists->csb_write, (GEN8_CSB_ENTRIES - 1) | 0xff << 16);
>>>>
>>>> Use _MASKED_FIELD and GEN8_CSB_WRITE_PTR_MASK?
>>>
>>> Hah, there goes my attempt to kill off unused magic.
>>
>> At least _MASKED_FIELD makes it clearer.
>>
>> But the u8 trick is still evil since here you even explicitly do a fake
>> masked write on hwsp. Ugly and evil. How about storing
>> execlists->csb_write_default at init time and applying it here?
> 
> Honestly, I thought it made sense next to the READ_ONCE(*csb_write).
> 
> Could I just convince you with another comment pleading guilty to the
> atrocities?

I'd prefer we did not write random stuff into hwsp. Like if one day 
someone is debugging something, they could see either 0xff00?? or 0x?? 
in there depending on the timings. Then would have to waste time 
figuring out what's happening.

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 34+ messages in thread

* [PATCH v3] drm/i915/execlists: Reset CSB write pointer after reset
  2018-06-27 21:07 ` [PATCH 6/9] drm/i915/execlists: Reset CSB write pointer after reset Chris Wilson
  2018-06-28 10:06   ` Tvrtko Ursulin
  2018-06-28 10:22   ` [PATCH v2] " Chris Wilson
@ 2018-06-28 11:59   ` Chris Wilson
  2018-06-28 12:15     ` Tvrtko Ursulin
  2 siblings, 1 reply; 34+ messages in thread
From: Chris Wilson @ 2018-06-28 11:59 UTC (permalink / raw)
  To: intel-gfx

On HW reset, the HW clears the write pointer (to 0). But since it also
writes its first CSB entry to slot 0, we need to reset the write pointer
back to the element before (so the first entry we read is 0).

This is required for the next patch, where we trust the CSB completely!

v2: Use _MASKED_FIELD
v3: Store the reset value, so that we differentiate between mmio/hwsp
transparently and without pretense.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 drivers/gpu/drm/i915/intel_lrc.c        | 23 +++++++++++++++++++++--
 drivers/gpu/drm/i915/intel_ringbuffer.h |  9 +++++++++
 2 files changed, 30 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 8531a5b6f6ff..49bf5048043c 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -884,6 +884,21 @@ static void reset_irq(struct intel_engine_cs *engine)
 	clear_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted);
 }
 
+static void reset_csb_pointers(struct intel_engine_execlists *execlists)
+{
+	/*
+	 * After a reset, the HW starts writing into CSB entry [0]. We
+	 * therefore have to set our HEAD pointer back one entry so that
+	 * the *first* entry we check is entry 0. To complicate this further,
+	 * as we don't wait for the first interrupt after reset, we have to
+	 * fake the HW write to point back to the last entry so that our
+	 * inline comparison of our cached head position against the last HW
+	 * write works even before the first interrupt.
+	 */
+	execlists->csb_head = execlists->csb_write_reset & 0xff;
+	WRITE_ONCE(*execlists->csb_write, execlists->csb_write_reset);
+}
+
 static void execlists_cancel_requests(struct intel_engine_cs *engine)
 {
 	struct intel_engine_execlists * const execlists = &engine->execlists;
@@ -1970,7 +1985,7 @@ static void execlists_reset(struct intel_engine_cs *engine,
 	__unwind_incomplete_requests(engine);
 
 	/* Following the reset, we need to reload the CSB read/write pointers */
-	engine->execlists.csb_head = GEN8_CSB_ENTRIES - 1;
+	reset_csb_pointers(&engine->execlists);
 
 	spin_unlock_irqrestore(&engine->timeline.lock, flags);
 
@@ -2469,7 +2484,6 @@ static int logical_ring_init(struct intel_engine_cs *engine)
 			upper_32_bits(ce->lrc_desc);
 	}
 
-	execlists->csb_head = GEN8_CSB_ENTRIES - 1;
 	execlists->csb_read =
 		i915->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine));
 	if (csb_force_mmio(i915)) {
@@ -2477,13 +2491,18 @@ static int logical_ring_init(struct intel_engine_cs *engine)
 			(i915->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_BUF_LO(engine, 0)));
 
 		execlists->csb_write = (u32 __force *)execlists->csb_read;
+		execlists->csb_write_reset =
+			_MASKED_FIELD(GEN8_CSB_WRITE_PTR_MASK,
+				      GEN8_CSB_ENTRIES - 1);
 	} else {
 		execlists->csb_status =
 			&engine->status_page.page_addr[I915_HWS_CSB_BUF0_INDEX];
 
 		execlists->csb_write =
 			&engine->status_page.page_addr[intel_hws_csb_write_index(i915)];
+		execlists->csb_write_reset = GEN8_CSB_ENTRIES - 1;
 	}
+	reset_csb_pointers(execlists);
 
 	return 0;
 
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index 970dbb3c9812..6ec7c019b5d9 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -304,6 +304,15 @@ struct intel_engine_execlists {
 	 */
 	unsigned int csb_head;
 
+	/**
+	 * @csb_write_reset: reset value for CSB write pointer
+	 *
+	 * As the CSB write pointer maybe either in HWSP or as a field
+	 * inside an mmio register, we want to reprogram it slightly
+	 * differently to avoid later confusion.
+	 */
+	u32 csb_write_reset;
+
 	/**
 	 * @csb_read: control register for Context Switch buffer
 	 *
-- 
2.18.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 34+ messages in thread

* Re: [PATCH 8/9] drm/i915/execlists: Trust the CSB
  2018-06-28 11:29   ` Tvrtko Ursulin
@ 2018-06-28 12:03     ` Chris Wilson
  0 siblings, 0 replies; 34+ messages in thread
From: Chris Wilson @ 2018-06-28 12:03 UTC (permalink / raw)
  To: Tvrtko Ursulin, intel-gfx

Quoting Tvrtko Ursulin (2018-06-28 12:29:41)
> 
> On 27/06/2018 22:07, Chris Wilson wrote:
> > @@ -1881,6 +1861,7 @@ execlists_reset_prepare(struct intel_engine_cs *engine)
> >   {
> >       struct intel_engine_execlists * const execlists = &engine->execlists;
> >       struct i915_request *request, *active;
> > +     unsigned long flags;
> >   
> >       GEM_TRACE("%s\n", engine->name);
> >   
> > @@ -1902,8 +1883,9 @@ execlists_reset_prepare(struct intel_engine_cs *engine)
> >        * and avoid blaming an innocent request if the stall was due to the
> >        * preemption itself.
> >        */
> > -     if (test_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted))
> > -             process_csb(engine);
> > +     spin_lock_irqsave(&engine->timeline.lock, flags);
> > +
> > +     process_csb(engine);
> 
> I think taking the lock over process_csb belongs in the following patch.

Splitter!
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 34+ messages in thread

* ✗ Fi.CI.BAT: failure for series starting with [1/9] drm/i915: Drop posting reads to flush master interrupts (rev4)
  2018-06-27 21:07 [PATCH 1/9] drm/i915: Drop posting reads to flush master interrupts Chris Wilson
                   ` (13 preceding siblings ...)
  2018-06-28 11:20 ` ✗ Fi.CI.BAT: failure for series starting with [1/9] drm/i915: Drop posting reads to flush master interrupts (rev3) Patchwork
@ 2018-06-28 12:09 ` Patchwork
  14 siblings, 0 replies; 34+ messages in thread
From: Patchwork @ 2018-06-28 12:09 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

== Series Details ==

Series: series starting with [1/9] drm/i915: Drop posting reads to flush master interrupts (rev4)
URL   : https://patchwork.freedesktop.org/series/45531/
State : failure

== Summary ==

Applying: drm/i915: Drop posting reads to flush master interrupts
Applying: drm/i915/execlists: Pull submit after dequeue under timeline lock
Applying: drm/i915/execlists: Pull CSB reset under the timeline.lock
Applying: drm/i915/execlists: Process one CSB update at a time
Applying: drm/i915/execlists: Unify CSB access pointers
Applying: drm/i915/execlists: Reset CSB write pointer after reset
Applying: drm/i915/execlists: Stop storing the CSB read pointer in the mmio register
Applying: drm/i915/execlists: Trust the CSB
error: sha1 information is lacking or useless (drivers/gpu/drm/i915/i915_irq.c).
error: could not build fake ancestor
Patch failed at 0008 drm/i915/execlists: Trust the CSB
Use 'git am --show-current-patch' to see the failed patch
When you have resolved this problem, run "git am --continue".
If you prefer to skip this patch, run "git am --skip" instead.
To restore the original branch and stop patching, run "git am --abort".

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH v3] drm/i915/execlists: Reset CSB write pointer after reset
  2018-06-28 11:59   ` [PATCH v3] " Chris Wilson
@ 2018-06-28 12:15     ` Tvrtko Ursulin
  2018-06-28 12:19       ` Chris Wilson
  0 siblings, 1 reply; 34+ messages in thread
From: Tvrtko Ursulin @ 2018-06-28 12:15 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 28/06/2018 12:59, Chris Wilson wrote:
> On HW reset, the HW clears the write pointer (to 0). But since it also
> writes its first CSB entry to slot 0, we need to reset the write pointer
> back to the element before (so the first entry we read is 0).
> 
> This is required for the next patch, where we trust the CSB completely!
> 
> v2: Use _MASKED_FIELD
> v3: Store the reset value, so that we differentiate between mmio/hwsp
> transparently and without pretense.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> ---
>   drivers/gpu/drm/i915/intel_lrc.c        | 23 +++++++++++++++++++++--
>   drivers/gpu/drm/i915/intel_ringbuffer.h |  9 +++++++++
>   2 files changed, 30 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> index 8531a5b6f6ff..49bf5048043c 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/intel_lrc.c
> @@ -884,6 +884,21 @@ static void reset_irq(struct intel_engine_cs *engine)
>   	clear_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted);
>   }
>   
> +static void reset_csb_pointers(struct intel_engine_execlists *execlists)
> +{
> +	/*
> +	 * After a reset, the HW starts writing into CSB entry [0]. We
> +	 * therefore have to set our HEAD pointer back one entry so that
> +	 * the *first* entry we check is entry 0. To complicate this further,
> +	 * as we don't wait for the first interrupt after reset, we have to
> +	 * fake the HW write to point back to the last entry so that our
> +	 * inline comparison of our cached head position against the last HW
> +	 * write works even before the first interrupt.
> +	 */
> +	execlists->csb_head = execlists->csb_write_reset & 0xff;

Idea for avoiding the & 0xff and applying the trick throughout - make 
csb_head u8. :)

> +	WRITE_ONCE(*execlists->csb_write, execlists->csb_write_reset);
> +}
> +
>   static void execlists_cancel_requests(struct intel_engine_cs *engine)
>   {
>   	struct intel_engine_execlists * const execlists = &engine->execlists;
> @@ -1970,7 +1985,7 @@ static void execlists_reset(struct intel_engine_cs *engine,
>   	__unwind_incomplete_requests(engine);
>   
>   	/* Following the reset, we need to reload the CSB read/write pointers */
> -	engine->execlists.csb_head = GEN8_CSB_ENTRIES - 1;
> +	reset_csb_pointers(&engine->execlists);
>   
>   	spin_unlock_irqrestore(&engine->timeline.lock, flags);
>   
> @@ -2469,7 +2484,6 @@ static int logical_ring_init(struct intel_engine_cs *engine)
>   			upper_32_bits(ce->lrc_desc);
>   	}
>   
> -	execlists->csb_head = GEN8_CSB_ENTRIES - 1;
>   	execlists->csb_read =
>   		i915->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine));
>   	if (csb_force_mmio(i915)) {
> @@ -2477,13 +2491,18 @@ static int logical_ring_init(struct intel_engine_cs *engine)
>   			(i915->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_BUF_LO(engine, 0)));
>   
>   		execlists->csb_write = (u32 __force *)execlists->csb_read;
> +		execlists->csb_write_reset =
> +			_MASKED_FIELD(GEN8_CSB_WRITE_PTR_MASK,
> +				      GEN8_CSB_ENTRIES - 1);
>   	} else {
>   		execlists->csb_status =
>   			&engine->status_page.page_addr[I915_HWS_CSB_BUF0_INDEX];
>   
>   		execlists->csb_write =
>   			&engine->status_page.page_addr[intel_hws_csb_write_index(i915)];
> +		execlists->csb_write_reset = GEN8_CSB_ENTRIES - 1;
>   	}
> +	reset_csb_pointers(execlists);
>   
>   	return 0;
>   
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
> index 970dbb3c9812..6ec7c019b5d9 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.h
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
> @@ -304,6 +304,15 @@ struct intel_engine_execlists {
>   	 */
>   	unsigned int csb_head;
>   
> +	/**
> +	 * @csb_write_reset: reset value for CSB write pointer
> +	 *
> +	 * As the CSB write pointer maybe either in HWSP or as a field
> +	 * inside an mmio register, we want to reprogram it slightly
> +	 * differently to avoid later confusion.
> +	 */
> +	u32 csb_write_reset;
> +
>   	/**
>   	 * @csb_read: control register for Context Switch buffer
>   	 *
> 

Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH v3] drm/i915/execlists: Reset CSB write pointer after reset
  2018-06-28 12:15     ` Tvrtko Ursulin
@ 2018-06-28 12:19       ` Chris Wilson
  0 siblings, 0 replies; 34+ messages in thread
From: Chris Wilson @ 2018-06-28 12:19 UTC (permalink / raw)
  To: Tvrtko Ursulin, intel-gfx

Quoting Tvrtko Ursulin (2018-06-28 13:15:07)
> 
> On 28/06/2018 12:59, Chris Wilson wrote:
> > On HW reset, the HW clears the write pointer (to 0). But since it also
> > writes its first CSB entry to slot 0, we need to reset the write pointer
> > back to the element before (so the first entry we read is 0).
> > 
> > This is required for the next patch, where we trust the CSB completely!
> > 
> > v2: Use _MASKED_FIELD
> > v3: Store the reset value, so that we differentiate between mmio/hwsp
> > transparently and without pretense.
> > 
> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> > ---
> >   drivers/gpu/drm/i915/intel_lrc.c        | 23 +++++++++++++++++++++--
> >   drivers/gpu/drm/i915/intel_ringbuffer.h |  9 +++++++++
> >   2 files changed, 30 insertions(+), 2 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> > index 8531a5b6f6ff..49bf5048043c 100644
> > --- a/drivers/gpu/drm/i915/intel_lrc.c
> > +++ b/drivers/gpu/drm/i915/intel_lrc.c
> > @@ -884,6 +884,21 @@ static void reset_irq(struct intel_engine_cs *engine)
> >       clear_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted);
> >   }
> >   
> > +static void reset_csb_pointers(struct intel_engine_execlists *execlists)
> > +{
> > +     /*
> > +      * After a reset, the HW starts writing into CSB entry [0]. We
> > +      * therefore have to set our HEAD pointer back one entry so that
> > +      * the *first* entry we check is entry 0. To complicate this further,
> > +      * as we don't wait for the first interrupt after reset, we have to
> > +      * fake the HW write to point back to the last entry so that our
> > +      * inline comparison of our cached head position against the last HW
> > +      * write works even before the first interrupt.
> > +      */
> > +     execlists->csb_head = execlists->csb_write_reset & 0xff;
> 
> Idea for avoiding the & 0xff and applying the trick throughout - make 
> csb_head u8. :)

Just leaves intel_engine_execlists full of holes. It did close my mind.
They may just spontaneously rearrange themselves...
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 34+ messages in thread

end of thread, other threads:[~2018-06-28 12:19 UTC | newest]

Thread overview: 34+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-06-27 21:07 [PATCH 1/9] drm/i915: Drop posting reads to flush master interrupts Chris Wilson
2018-06-27 21:07 ` [PATCH 2/9] drm/i915/execlists: Pull submit after dequeue under timeline lock Chris Wilson
2018-06-27 21:07 ` [PATCH 3/9] drm/i915/execlists: Pull CSB reset under the timeline.lock Chris Wilson
2018-06-27 21:07 ` [PATCH 4/9] drm/i915/execlists: Process one CSB update at a time Chris Wilson
2018-06-27 21:07 ` [PATCH 5/9] drm/i915/execlists: Unify CSB access pointers Chris Wilson
2018-06-28 10:02   ` Tvrtko Ursulin
2018-06-28 10:10     ` Chris Wilson
2018-06-28 10:58       ` Tvrtko Ursulin
2018-06-28 11:05         ` Chris Wilson
2018-06-28 11:13   ` [PATCH v2] " Chris Wilson
2018-06-28 11:21     ` Tvrtko Ursulin
2018-06-27 21:07 ` [PATCH 6/9] drm/i915/execlists: Reset CSB write pointer after reset Chris Wilson
2018-06-28 10:06   ` Tvrtko Ursulin
2018-06-28 10:17     ` Chris Wilson
2018-06-28 11:02       ` Tvrtko Ursulin
2018-06-28 11:30         ` Chris Wilson
2018-06-28 11:37           ` Tvrtko Ursulin
2018-06-28 10:22   ` [PATCH v2] " Chris Wilson
2018-06-28 11:59   ` [PATCH v3] " Chris Wilson
2018-06-28 12:15     ` Tvrtko Ursulin
2018-06-28 12:19       ` Chris Wilson
2018-06-27 21:07 ` [PATCH 7/9] drm/i915/execlists: Stop storing the CSB read pointer in the mmio register Chris Wilson
2018-06-28 10:07   ` Tvrtko Ursulin
2018-06-27 21:07 ` [PATCH 8/9] drm/i915/execlists: Trust the CSB Chris Wilson
2018-06-28 11:29   ` Tvrtko Ursulin
2018-06-28 12:03     ` Chris Wilson
2018-06-27 21:07 ` [PATCH 9/9] drm/i915/execlists: Direct submission of new requests (avoid tasklet/ksoftirqd) Chris Wilson
2018-06-27 21:56 ` ✗ Fi.CI.CHECKPATCH: warning for series starting with [1/9] drm/i915: Drop posting reads to flush master interrupts Patchwork
2018-06-27 22:13 ` ✓ Fi.CI.BAT: success " Patchwork
2018-06-28  3:46 ` ✓ Fi.CI.IGT: " Patchwork
2018-06-28 10:41 ` ✗ Fi.CI.CHECKPATCH: warning for series starting with [1/9] drm/i915: Drop posting reads to flush master interrupts (rev2) Patchwork
2018-06-28 11:00 ` ✓ Fi.CI.BAT: success " Patchwork
2018-06-28 11:20 ` ✗ Fi.CI.BAT: failure for series starting with [1/9] drm/i915: Drop posting reads to flush master interrupts (rev3) Patchwork
2018-06-28 12:09 ` ✗ Fi.CI.BAT: failure for series starting with [1/9] drm/i915: Drop posting reads to flush master interrupts (rev4) Patchwork

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.