All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 1/6] drm/i915: Remove redundant trailing request flush
@ 2018-12-28 17:16 Chris Wilson
  2018-12-28 17:16 ` [PATCH 2/6] drm/i915/ringbuffer: Remove irq-seqno w/a for gen6/7 rcs Chris Wilson
                   ` (8 more replies)
  0 siblings, 9 replies; 22+ messages in thread
From: Chris Wilson @ 2018-12-28 17:16 UTC (permalink / raw)
  To: intel-gfx

Now that we perform the request flushing inline with emitting the
breadcrumb, we can remove the now redundant manual flush. And we can
also remove the infrastructure that remained only for its purpose.

v2: emit_breadcrumb_sz is in dwords, but rq->reserved_space is in bytes

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_request.c          | 14 +++++++-------
 drivers/gpu/drm/i915/intel_ringbuffer.c      | 16 ----------------
 drivers/gpu/drm/i915/intel_ringbuffer.h      | 10 ----------
 drivers/gpu/drm/i915/selftests/mock_engine.c |  2 --
 4 files changed, 7 insertions(+), 35 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
index 6cedcfea33b5..ea4620f2ac9e 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -521,10 +521,6 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx)
 
 	reserve_gt(i915);
 
-	ret = intel_ring_wait_for_space(ce->ring, MIN_SPACE_FOR_ADD_REQUEST);
-	if (ret)
-		goto err_unreserve;
-
 	/* Move our oldest request to the slab-cache (if not in use!) */
 	rq = list_first_entry(&ce->ring->request_list, typeof(*rq), ring_link);
 	if (!list_is_last(&rq->ring_link, &ce->ring->request_list) &&
@@ -616,9 +612,13 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx)
 	 * i915_request_add() call can't fail. Note that the reserve may need
 	 * to be redone if the request is not actually submitted straight
 	 * away, e.g. because a GPU scheduler has deferred it.
+	 *
+	 * Note that due to how we add reserved_space to intel_ring_begin()
+	 * we need to double our request to ensure that if we need to wrap
+	 * around inside i915_request_add() there is sufficient space at
+	 * the beginning of the ring as well.
 	 */
-	rq->reserved_space = MIN_SPACE_FOR_ADD_REQUEST;
-	GEM_BUG_ON(rq->reserved_space < engine->emit_breadcrumb_sz);
+	rq->reserved_space = 2 * engine->emit_breadcrumb_sz * sizeof(u32);
 
 	/*
 	 * Record the position of the start of the request so that
@@ -860,8 +860,8 @@ void i915_request_add(struct i915_request *request)
 	 * should already have been reserved in the ring buffer. Let the ring
 	 * know that it is time to use that space up.
 	 */
+	GEM_BUG_ON(request->reserved_space > request->ring->space);
 	request->reserved_space = 0;
-	engine->emit_flush(request, EMIT_FLUSH);
 
 	/*
 	 * Record the position of the start of the breadcrumb so that
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index fc1e29305951..d773f7dd32a9 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -1904,22 +1904,6 @@ static noinline int wait_for_space(struct intel_ring *ring, unsigned int bytes)
 	return 0;
 }
 
-int intel_ring_wait_for_space(struct intel_ring *ring, unsigned int bytes)
-{
-	GEM_BUG_ON(bytes > ring->effective_size);
-	if (unlikely(bytes > ring->effective_size - ring->emit))
-		bytes += ring->size - ring->emit;
-
-	if (unlikely(bytes > ring->space)) {
-		int ret = wait_for_space(ring, bytes);
-		if (unlikely(ret))
-			return ret;
-	}
-
-	GEM_BUG_ON(ring->space < bytes);
-	return 0;
-}
-
 u32 *intel_ring_begin(struct i915_request *rq, unsigned int num_dwords)
 {
 	struct intel_ring *ring = rq->ring;
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index 32606d795af3..99e2cb75d29a 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -754,7 +754,6 @@ void intel_legacy_submission_resume(struct drm_i915_private *dev_priv);
 
 int __must_check intel_ring_cacheline_align(struct i915_request *rq);
 
-int intel_ring_wait_for_space(struct intel_ring *ring, unsigned int bytes);
 u32 __must_check *intel_ring_begin(struct i915_request *rq, unsigned int n);
 
 static inline void intel_ring_advance(struct i915_request *rq, u32 *cs)
@@ -895,15 +894,6 @@ static inline bool intel_engine_has_started(struct intel_engine_cs *engine,
 void intel_engine_get_instdone(struct intel_engine_cs *engine,
 			       struct intel_instdone *instdone);
 
-/*
- * Arbitrary size for largest possible 'add request' sequence. The code paths
- * are complex and variable. Empirical measurement shows that the worst case
- * is BDW at 192 bytes (6 + 6 + 36 dwords), then ILK at 136 bytes. However,
- * we need to allocate double the largest single packet within that emission
- * to account for tail wraparound (so 6 + 6 + 72 dwords for BDW).
- */
-#define MIN_SPACE_FOR_ADD_REQUEST 336
-
 static inline u32 intel_hws_seqno_address(struct intel_engine_cs *engine)
 {
 	return engine->status_page.ggtt_offset + I915_GEM_HWS_INDEX_ADDR;
diff --git a/drivers/gpu/drm/i915/selftests/mock_engine.c b/drivers/gpu/drm/i915/selftests/mock_engine.c
index d0c44c18db42..50e1a0b1af7e 100644
--- a/drivers/gpu/drm/i915/selftests/mock_engine.c
+++ b/drivers/gpu/drm/i915/selftests/mock_engine.c
@@ -148,8 +148,6 @@ static struct intel_ring *mock_ring(struct intel_engine_cs *engine)
 	const unsigned long sz = PAGE_SIZE / 2;
 	struct mock_ring *ring;
 
-	BUILD_BUG_ON(MIN_SPACE_FOR_ADD_REQUEST > sz);
-
 	ring = kzalloc(sizeof(*ring) + sz, GFP_KERNEL);
 	if (!ring)
 		return NULL;
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 22+ messages in thread

* [PATCH 2/6] drm/i915/ringbuffer: Remove irq-seqno w/a for gen6/7 rcs
  2018-12-28 17:16 [PATCH 1/6] drm/i915: Remove redundant trailing request flush Chris Wilson
@ 2018-12-28 17:16 ` Chris Wilson
  2018-12-31 10:28   ` Tvrtko Ursulin
  2018-12-28 17:16 ` [PATCH 3/6] drm/i915/ringbuffer: Remove irq-seqno w/a for gen6 xcs Chris Wilson
                   ` (7 subsequent siblings)
  8 siblings, 1 reply; 22+ messages in thread
From: Chris Wilson @ 2018-12-28 17:16 UTC (permalink / raw)
  To: intel-gfx

Having transitioned to using PIPECONTROL to combine the flush with the
breadcrumb write using their post-sync functions, assume that this will
resolve the serialisation with the subsequent MI_USER_INTERRUPT. That is
when inspecting the breadcrumb after an interrupt we can rely on the write
being posted (i.e. the HWSP will be coherent).

Testing using gem_sync shows that the PIPECONTROL + CS stall does
serialise the command streamer sufficient that the breadcrumb lands
before the MI_USER_INTERRUPT. The same is not true for MI_FLUSH_DW.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/intel_ringbuffer.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index d773f7dd32a9..1b9264883a8d 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -2218,13 +2218,11 @@ int intel_init_render_ring_buffer(struct intel_engine_cs *engine)
 		engine->emit_flush = gen7_render_ring_flush;
 		engine->emit_breadcrumb = gen7_rcs_emit_breadcrumb;
 		engine->emit_breadcrumb_sz = gen7_rcs_emit_breadcrumb_sz;
-		engine->irq_seqno_barrier = gen6_seqno_barrier;
 	} else if (IS_GEN(dev_priv, 6)) {
 		engine->init_context = intel_rcs_ctx_init;
 		engine->emit_flush = gen6_render_ring_flush;
 		engine->emit_breadcrumb = gen6_rcs_emit_breadcrumb;
 		engine->emit_breadcrumb_sz = gen6_rcs_emit_breadcrumb_sz;
-		engine->irq_seqno_barrier = gen6_seqno_barrier;
 	} else if (IS_GEN(dev_priv, 5)) {
 		engine->emit_flush = gen4_render_ring_flush;
 	} else {
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 22+ messages in thread

* [PATCH 3/6] drm/i915/ringbuffer: Remove irq-seqno w/a for gen6 xcs
  2018-12-28 17:16 [PATCH 1/6] drm/i915: Remove redundant trailing request flush Chris Wilson
  2018-12-28 17:16 ` [PATCH 2/6] drm/i915/ringbuffer: Remove irq-seqno w/a for gen6/7 rcs Chris Wilson
@ 2018-12-28 17:16 ` Chris Wilson
  2018-12-31 10:31   ` Tvrtko Ursulin
  2018-12-28 17:16 ` [PATCH 4/6] drm/i915/ringbuffer: Move irq seqno barrier to the GPU for gen7 Chris Wilson
                   ` (6 subsequent siblings)
  8 siblings, 1 reply; 22+ messages in thread
From: Chris Wilson @ 2018-12-28 17:16 UTC (permalink / raw)
  To: intel-gfx

The MI_FLUSH_DW does appear coherent with the following
MI_USER_INTERRUPT, but only on Sandybridge. Ivybridge requires a heavier
hammer, but on Sandybridge we can stop requiring the irq_seqno barrier.

Testcase: igt/gem_sync
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/intel_ringbuffer.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index 1b9264883a8d..2fb3a364c390 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -2260,7 +2260,8 @@ int intel_init_bsd_ring_buffer(struct intel_engine_cs *engine)
 
 		engine->emit_breadcrumb = gen6_xcs_emit_breadcrumb;
 		engine->emit_breadcrumb_sz = gen6_xcs_emit_breadcrumb_sz;
-		engine->irq_seqno_barrier = gen6_seqno_barrier;
+		if (!IS_GEN(dev_priv, 6))
+			engine->irq_seqno_barrier = gen6_seqno_barrier;
 	} else {
 		engine->emit_flush = bsd_ring_flush;
 		if (IS_GEN(dev_priv, 5))
@@ -2285,7 +2286,8 @@ int intel_init_blt_ring_buffer(struct intel_engine_cs *engine)
 
 	engine->emit_breadcrumb = gen6_xcs_emit_breadcrumb;
 	engine->emit_breadcrumb_sz = gen6_xcs_emit_breadcrumb_sz;
-	engine->irq_seqno_barrier = gen6_seqno_barrier;
+	if (!IS_GEN(dev_priv, 6))
+		engine->irq_seqno_barrier = gen6_seqno_barrier;
 
 	return intel_init_ring_buffer(engine);
 }
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 22+ messages in thread

* [PATCH 4/6] drm/i915/ringbuffer: Move irq seqno barrier to the GPU for gen7
  2018-12-28 17:16 [PATCH 1/6] drm/i915: Remove redundant trailing request flush Chris Wilson
  2018-12-28 17:16 ` [PATCH 2/6] drm/i915/ringbuffer: Remove irq-seqno w/a for gen6/7 rcs Chris Wilson
  2018-12-28 17:16 ` [PATCH 3/6] drm/i915/ringbuffer: Remove irq-seqno w/a for gen6 xcs Chris Wilson
@ 2018-12-28 17:16 ` Chris Wilson
  2018-12-31 10:43   ` Tvrtko Ursulin
  2018-12-28 17:16 ` [PATCH 5/6] drm/i915/ringbuffer: Move irq seqno barrier to the GPU for gen5 Chris Wilson
                   ` (5 subsequent siblings)
  8 siblings, 1 reply; 22+ messages in thread
From: Chris Wilson @ 2018-12-28 17:16 UTC (permalink / raw)
  To: intel-gfx

The irq_seqno_barrier is a tradeoff between doing work on every request
(on the GPU) and doing work after every interrupt (on the CPU). We
presume we have many more requests than interrupts! However, the current
w/a for Ivybridge is an implicit delay that currently fails sporadically
and consistently if we move the w/a into the irq handler itself. This
makes the CPU barrier untenable for upcoming interrupt handler changes
and so we need to replace it with a delay on the GPU before we send the
MI_USER_INTERRUPT. As it turns out that delay is 32x MI_STORE_DWORD_IMM,
or about 0.6us per request! Quite nasty, but the lesser of two evils
looking to the future.

Testcase: igt/gem_sync
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/intel_ringbuffer.c | 80 ++++++++++++++-----------
 1 file changed, 44 insertions(+), 36 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index 2fb3a364c390..dd996103d495 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -443,6 +443,34 @@ static void gen6_xcs_emit_breadcrumb(struct i915_request *rq, u32 *cs)
 }
 static const int gen6_xcs_emit_breadcrumb_sz = 4;
 
+#define GEN7_XCS_WA 32
+static void gen7_xcs_emit_breadcrumb(struct i915_request *rq, u32 *cs)
+{
+	int i;
+
+	*cs++ = MI_FLUSH_DW | MI_FLUSH_DW_OP_STOREDW;
+	*cs++ = intel_hws_seqno_address(rq->engine) | MI_FLUSH_DW_USE_GTT;
+	*cs++ = rq->global_seqno;
+
+	for (i = 0; i < GEN7_XCS_WA; i++) {
+		*cs++ = MI_STORE_DWORD_INDEX;
+		*cs++ = I915_GEM_HWS_INDEX_ADDR;
+		*cs++ = rq->global_seqno;
+	}
+
+	*cs++ = MI_FLUSH_DW;
+	*cs++ = 0;
+	*cs++ = 0;
+
+	*cs++ = MI_USER_INTERRUPT;
+	*cs++ = MI_NOOP;
+
+	rq->tail = intel_ring_offset(rq, cs);
+	assert_ring_tail_valid(rq->ring, rq->tail);
+}
+static const int gen7_xcs_emit_breadcrumb_sz = 8 + GEN7_XCS_WA * 3;
+#undef GEN7_XCS_WA
+
 static void set_hwstam(struct intel_engine_cs *engine, u32 mask)
 {
 	/*
@@ -874,31 +902,6 @@ gen5_seqno_barrier(struct intel_engine_cs *engine)
 	usleep_range(125, 250);
 }
 
-static void
-gen6_seqno_barrier(struct intel_engine_cs *engine)
-{
-	struct drm_i915_private *dev_priv = engine->i915;
-
-	/* Workaround to force correct ordering between irq and seqno writes on
-	 * ivb (and maybe also on snb) by reading from a CS register (like
-	 * ACTHD) before reading the status page.
-	 *
-	 * Note that this effectively stalls the read by the time it takes to
-	 * do a memory transaction, which more or less ensures that the write
-	 * from the GPU has sufficient time to invalidate the CPU cacheline.
-	 * Alternatively we could delay the interrupt from the CS ring to give
-	 * the write time to land, but that would incur a delay after every
-	 * batch i.e. much more frequent than a delay when waiting for the
-	 * interrupt (with the same net latency).
-	 *
-	 * Also note that to prevent whole machine hangs on gen7, we have to
-	 * take the spinlock to guard against concurrent cacheline access.
-	 */
-	spin_lock_irq(&dev_priv->uncore.lock);
-	POSTING_READ_FW(RING_ACTHD(engine->mmio_base));
-	spin_unlock_irq(&dev_priv->uncore.lock);
-}
-
 static void
 gen5_irq_enable(struct intel_engine_cs *engine)
 {
@@ -2258,10 +2261,13 @@ int intel_init_bsd_ring_buffer(struct intel_engine_cs *engine)
 		engine->emit_flush = gen6_bsd_ring_flush;
 		engine->irq_enable_mask = GT_BSD_USER_INTERRUPT;
 
-		engine->emit_breadcrumb = gen6_xcs_emit_breadcrumb;
-		engine->emit_breadcrumb_sz = gen6_xcs_emit_breadcrumb_sz;
-		if (!IS_GEN(dev_priv, 6))
-			engine->irq_seqno_barrier = gen6_seqno_barrier;
+		if (IS_GEN(dev_priv, 6)) {
+			engine->emit_breadcrumb = gen6_xcs_emit_breadcrumb;
+			engine->emit_breadcrumb_sz = gen6_xcs_emit_breadcrumb_sz;
+		} else {
+			engine->emit_breadcrumb = gen7_xcs_emit_breadcrumb;
+			engine->emit_breadcrumb_sz = gen7_xcs_emit_breadcrumb_sz;
+		}
 	} else {
 		engine->emit_flush = bsd_ring_flush;
 		if (IS_GEN(dev_priv, 5))
@@ -2284,10 +2290,13 @@ int intel_init_blt_ring_buffer(struct intel_engine_cs *engine)
 	engine->emit_flush = gen6_ring_flush;
 	engine->irq_enable_mask = GT_BLT_USER_INTERRUPT;
 
-	engine->emit_breadcrumb = gen6_xcs_emit_breadcrumb;
-	engine->emit_breadcrumb_sz = gen6_xcs_emit_breadcrumb_sz;
-	if (!IS_GEN(dev_priv, 6))
-		engine->irq_seqno_barrier = gen6_seqno_barrier;
+	if (IS_GEN(dev_priv, 6)) {
+		engine->emit_breadcrumb = gen6_xcs_emit_breadcrumb;
+		engine->emit_breadcrumb_sz = gen6_xcs_emit_breadcrumb_sz;
+	} else {
+		engine->emit_breadcrumb = gen7_xcs_emit_breadcrumb;
+		engine->emit_breadcrumb_sz = gen7_xcs_emit_breadcrumb_sz;
+	}
 
 	return intel_init_ring_buffer(engine);
 }
@@ -2305,9 +2314,8 @@ int intel_init_vebox_ring_buffer(struct intel_engine_cs *engine)
 	engine->irq_enable = hsw_vebox_irq_enable;
 	engine->irq_disable = hsw_vebox_irq_disable;
 
-	engine->emit_breadcrumb = gen6_xcs_emit_breadcrumb;
-	engine->emit_breadcrumb_sz = gen6_xcs_emit_breadcrumb_sz;
-	engine->irq_seqno_barrier = gen6_seqno_barrier;
+	engine->emit_breadcrumb = gen7_xcs_emit_breadcrumb;
+	engine->emit_breadcrumb_sz = gen7_xcs_emit_breadcrumb_sz;
 
 	return intel_init_ring_buffer(engine);
 }
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 22+ messages in thread

* [PATCH 5/6] drm/i915/ringbuffer: Move irq seqno barrier to the GPU for gen5
  2018-12-28 17:16 [PATCH 1/6] drm/i915: Remove redundant trailing request flush Chris Wilson
                   ` (2 preceding siblings ...)
  2018-12-28 17:16 ` [PATCH 4/6] drm/i915/ringbuffer: Move irq seqno barrier to the GPU for gen7 Chris Wilson
@ 2018-12-28 17:16 ` Chris Wilson
  2018-12-31 10:49   ` Tvrtko Ursulin
  2018-12-28 17:16 ` [PATCH 6/6] drm/i915: Drop unused engine->irq_seqno_barrier w/a Chris Wilson
                   ` (4 subsequent siblings)
  8 siblings, 1 reply; 22+ messages in thread
From: Chris Wilson @ 2018-12-28 17:16 UTC (permalink / raw)
  To: intel-gfx

The irq_seqno_barrier is a tradeoff between doing work on every request
(on the GPU) and doing work after every interrupt (on the CPU). We
presume we have many more requests than interrupts! However, for
Ironlake, the workaround is a pretty hideous usleep() and so even though
it was found we need to repeat the MI_STORE_DWORD_IMM 8 times, doing so
is preferrable than requiring a usleep!

The additional MI_STORE_DWORD_IMM also have the side-effect of flushing
MI operations from userspace which are not caught by MI_FLUSH!

Testcase: igt/gem_sync
Testcase: igt/gem_exec_whisper
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/intel_ringbuffer.c | 40 ++++++++++++++-----------
 1 file changed, 23 insertions(+), 17 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index dd996103d495..13ac01b67ead 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -881,26 +881,29 @@ static void i9xx_emit_breadcrumb(struct i915_request *rq, u32 *cs)
 	rq->tail = intel_ring_offset(rq, cs);
 	assert_ring_tail_valid(rq->ring, rq->tail);
 }
-
 static const int i9xx_emit_breadcrumb_sz = 6;
 
-static void
-gen5_seqno_barrier(struct intel_engine_cs *engine)
+#define GEN5_WA_STORES 8 /* must be at least 1! */
+static void gen5_emit_breadcrumb(struct i915_request *rq, u32 *cs)
 {
-	/* MI_STORE are internally buffered by the GPU and not flushed
-	 * either by MI_FLUSH or SyncFlush or any other combination of
-	 * MI commands.
-	 *
-	 * "Only the submission of the store operation is guaranteed.
-	 * The write result will be complete (coherent) some time later
-	 * (this is practically a finite period but there is no guaranteed
-	 * latency)."
-	 *
-	 * Empirically, we observe that we need a delay of at least 75us to
-	 * be sure that the seqno write is visible by the CPU.
-	 */
-	usleep_range(125, 250);
+	int i;
+
+	*cs++ = MI_FLUSH;
+
+	BUILD_BUG_ON(GEN5_WA_STORES < 1);
+	for (i = 0; i < GEN5_WA_STORES; i++) {
+		*cs++ = MI_STORE_DWORD_INDEX;
+		*cs++ = I915_GEM_HWS_INDEX_ADDR;
+		*cs++ = rq->global_seqno;
+	}
+
+	*cs++ = MI_USER_INTERRUPT;
+
+	rq->tail = intel_ring_offset(rq, cs);
+	assert_ring_tail_valid(rq->ring, rq->tail);
 }
+static const int gen5_emit_breadcrumb_sz = GEN5_WA_STORES * 3 + 2;
+#undef GEN5_WA_STORES
 
 static void
 gen5_irq_enable(struct intel_engine_cs *engine)
@@ -2148,7 +2151,6 @@ static void intel_ring_init_irq(struct drm_i915_private *dev_priv,
 	} else if (INTEL_GEN(dev_priv) >= 5) {
 		engine->irq_enable = gen5_irq_enable;
 		engine->irq_disable = gen5_irq_disable;
-		engine->irq_seqno_barrier = gen5_seqno_barrier;
 	} else if (INTEL_GEN(dev_priv) >= 3) {
 		engine->irq_enable = i9xx_irq_enable;
 		engine->irq_disable = i9xx_irq_disable;
@@ -2191,6 +2193,10 @@ static void intel_ring_default_vfuncs(struct drm_i915_private *dev_priv,
 
 	engine->emit_breadcrumb = i9xx_emit_breadcrumb;
 	engine->emit_breadcrumb_sz = i9xx_emit_breadcrumb_sz;
+	if (IS_GEN(dev_priv, 5)) {
+		engine->emit_breadcrumb = gen5_emit_breadcrumb;
+		engine->emit_breadcrumb_sz = gen5_emit_breadcrumb_sz;
+	}
 
 	engine->set_default_submission = i9xx_set_default_submission;
 
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 22+ messages in thread

* [PATCH 6/6] drm/i915: Drop unused engine->irq_seqno_barrier w/a
  2018-12-28 17:16 [PATCH 1/6] drm/i915: Remove redundant trailing request flush Chris Wilson
                   ` (3 preceding siblings ...)
  2018-12-28 17:16 ` [PATCH 5/6] drm/i915/ringbuffer: Move irq seqno barrier to the GPU for gen5 Chris Wilson
@ 2018-12-28 17:16 ` Chris Wilson
  2018-12-31 11:35   ` Tvrtko Ursulin
  2018-12-28 17:48 ` ✗ Fi.CI.CHECKPATCH: warning for series starting with [1/6] drm/i915: Remove redundant trailing request flush Patchwork
                   ` (3 subsequent siblings)
  8 siblings, 1 reply; 22+ messages in thread
From: Chris Wilson @ 2018-12-28 17:16 UTC (permalink / raw)
  To: intel-gfx

Now that we have eliminated the CPU-side irq_seqno_barrier by moving the
delays on the GPU before emitting the MI_USER_INTERRUPT, we can remove
the engine->irq_seqno_barrier infrastructure. Though intentionally
slowing down the GPU is nasty, so is the code we can now remove!

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_drv.h          | 84 ------------------------
 drivers/gpu/drm/i915/i915_gem.c          |  7 --
 drivers/gpu/drm/i915/i915_irq.c          |  7 --
 drivers/gpu/drm/i915/i915_request.c      |  8 +--
 drivers/gpu/drm/i915/intel_breadcrumbs.c | 26 --------
 drivers/gpu/drm/i915/intel_engine_cs.c   |  6 --
 drivers/gpu/drm/i915/intel_hangcheck.c   | 10 ---
 drivers/gpu/drm/i915/intel_ringbuffer.c  |  4 --
 drivers/gpu/drm/i915/intel_ringbuffer.h  | 10 ---
 9 files changed, 1 insertion(+), 161 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 287f06b9e95a..01a3d77fed41 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -3586,90 +3586,6 @@ wait_remaining_ms_from_jiffies(unsigned long timestamp_jiffies, int to_wait_ms)
 	}
 }
 
-static inline bool
-__i915_request_irq_complete(const struct i915_request *rq)
-{
-	struct intel_engine_cs *engine = rq->engine;
-	u32 seqno;
-
-	/* Note that the engine may have wrapped around the seqno, and
-	 * so our request->global_seqno will be ahead of the hardware,
-	 * even though it completed the request before wrapping. We catch
-	 * this by kicking all the waiters before resetting the seqno
-	 * in hardware, and also signal the fence.
-	 */
-	if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &rq->fence.flags))
-		return true;
-
-	/* The request was dequeued before we were awoken. We check after
-	 * inspecting the hw to confirm that this was the same request
-	 * that generated the HWS update. The memory barriers within
-	 * the request execution are sufficient to ensure that a check
-	 * after reading the value from hw matches this request.
-	 */
-	seqno = i915_request_global_seqno(rq);
-	if (!seqno)
-		return false;
-
-	/* Before we do the heavier coherent read of the seqno,
-	 * check the value (hopefully) in the CPU cacheline.
-	 */
-	if (__i915_request_completed(rq, seqno))
-		return true;
-
-	/* Ensure our read of the seqno is coherent so that we
-	 * do not "miss an interrupt" (i.e. if this is the last
-	 * request and the seqno write from the GPU is not visible
-	 * by the time the interrupt fires, we will see that the
-	 * request is incomplete and go back to sleep awaiting
-	 * another interrupt that will never come.)
-	 *
-	 * Strictly, we only need to do this once after an interrupt,
-	 * but it is easier and safer to do it every time the waiter
-	 * is woken.
-	 */
-	if (engine->irq_seqno_barrier &&
-	    test_and_clear_bit(ENGINE_IRQ_BREADCRUMB, &engine->irq_posted)) {
-		struct intel_breadcrumbs *b = &engine->breadcrumbs;
-
-		/* The ordering of irq_posted versus applying the barrier
-		 * is crucial. The clearing of the current irq_posted must
-		 * be visible before we perform the barrier operation,
-		 * such that if a subsequent interrupt arrives, irq_posted
-		 * is reasserted and our task rewoken (which causes us to
-		 * do another __i915_request_irq_complete() immediately
-		 * and reapply the barrier). Conversely, if the clear
-		 * occurs after the barrier, then an interrupt that arrived
-		 * whilst we waited on the barrier would not trigger a
-		 * barrier on the next pass, and the read may not see the
-		 * seqno update.
-		 */
-		engine->irq_seqno_barrier(engine);
-
-		/* If we consume the irq, but we are no longer the bottom-half,
-		 * the real bottom-half may not have serialised their own
-		 * seqno check with the irq-barrier (i.e. may have inspected
-		 * the seqno before we believe it coherent since they see
-		 * irq_posted == false but we are still running).
-		 */
-		spin_lock_irq(&b->irq_lock);
-		if (b->irq_wait && b->irq_wait->tsk != current)
-			/* Note that if the bottom-half is changed as we
-			 * are sending the wake-up, the new bottom-half will
-			 * be woken by whomever made the change. We only have
-			 * to worry about when we steal the irq-posted for
-			 * ourself.
-			 */
-			wake_up_process(b->irq_wait->tsk);
-		spin_unlock_irq(&b->irq_lock);
-
-		if (__i915_request_completed(rq, seqno))
-			return true;
-	}
-
-	return false;
-}
-
 void i915_memcpy_init_early(struct drm_i915_private *dev_priv);
 bool i915_memcpy_from_wc(void *dst, const void *src, unsigned long len);
 
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index f4254e012620..7ea87c7a0e8c 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -3230,13 +3230,6 @@ void i915_gem_reset_engine(struct intel_engine_cs *engine,
 			   struct i915_request *request,
 			   bool stalled)
 {
-	/*
-	 * Make sure this write is visible before we re-enable the interrupt
-	 * handlers on another CPU, as tasklet_enable() resolves to just
-	 * a compiler barrier which is insufficient for our purpose here.
-	 */
-	smp_store_mb(engine->irq_posted, 0);
-
 	if (request)
 		request = i915_gem_reset_request(engine, request, stalled);
 
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index 0c7fc9890891..fbb094ecf6c9 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -1189,13 +1189,6 @@ static void notify_ring(struct intel_engine_cs *engine)
 				rq = i915_request_get(waiter);
 
 			tsk = wait->tsk;
-		} else {
-			if (engine->irq_seqno_barrier &&
-			    i915_seqno_passed(seqno, wait->seqno - 1)) {
-				set_bit(ENGINE_IRQ_BREADCRUMB,
-					&engine->irq_posted);
-				tsk = wait->tsk;
-			}
 		}
 
 		engine->breadcrumbs.irq_count++;
diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
index ea4620f2ac9e..1e158eb8cb97 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -1179,13 +1179,7 @@ long i915_request_wait(struct i915_request *rq,
 		set_current_state(state);
 
 wakeup:
-		/*
-		 * Carefully check if the request is complete, giving time
-		 * for the seqno to be visible following the interrupt.
-		 * We also have to check in case we are kicked by the GPU
-		 * reset in order to drop the struct_mutex.
-		 */
-		if (__i915_request_irq_complete(rq))
+		if (i915_request_completed(rq))
 			break;
 
 		/*
diff --git a/drivers/gpu/drm/i915/intel_breadcrumbs.c b/drivers/gpu/drm/i915/intel_breadcrumbs.c
index 447c5256f63a..4ed7105d7ff5 100644
--- a/drivers/gpu/drm/i915/intel_breadcrumbs.c
+++ b/drivers/gpu/drm/i915/intel_breadcrumbs.c
@@ -166,12 +166,6 @@ static void irq_enable(struct intel_engine_cs *engine)
 	 */
 	GEM_BUG_ON(!intel_irqs_enabled(engine->i915));
 
-	/* Enabling the IRQ may miss the generation of the interrupt, but
-	 * we still need to force the barrier before reading the seqno,
-	 * just in case.
-	 */
-	set_bit(ENGINE_IRQ_BREADCRUMB, &engine->irq_posted);
-
 	/* Caller disables interrupts */
 	if (engine->irq_enable) {
 		spin_lock(&engine->i915->irq_lock);
@@ -683,16 +677,6 @@ static int intel_breadcrumbs_signaler(void *arg)
 		}
 
 		if (unlikely(do_schedule)) {
-			/* Before we sleep, check for a missed seqno */
-			if (current->state & TASK_NORMAL &&
-			    !list_empty(&b->signals) &&
-			    engine->irq_seqno_barrier &&
-			    test_and_clear_bit(ENGINE_IRQ_BREADCRUMB,
-					       &engine->irq_posted)) {
-				engine->irq_seqno_barrier(engine);
-				intel_engine_wakeup(engine);
-			}
-
 sleep:
 			if (kthread_should_park())
 				kthread_parkme();
@@ -859,16 +843,6 @@ void intel_engine_reset_breadcrumbs(struct intel_engine_cs *engine)
 	else
 		irq_disable(engine);
 
-	/*
-	 * We set the IRQ_BREADCRUMB bit when we enable the irq presuming the
-	 * GPU is active and may have already executed the MI_USER_INTERRUPT
-	 * before the CPU is ready to receive. However, the engine is currently
-	 * idle (we haven't started it yet), there is no possibility for a
-	 * missed interrupt as we enabled the irq and so we can clear the
-	 * immediate wakeup (until a real interrupt arrives for the waiter).
-	 */
-	clear_bit(ENGINE_IRQ_BREADCRUMB, &engine->irq_posted);
-
 	spin_unlock_irqrestore(&b->irq_lock, flags);
 }
 
diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c
index 1462bb49f420..189a934a63e9 100644
--- a/drivers/gpu/drm/i915/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/intel_engine_cs.c
@@ -457,7 +457,6 @@ int intel_engines_init(struct drm_i915_private *dev_priv)
 void intel_engine_write_global_seqno(struct intel_engine_cs *engine, u32 seqno)
 {
 	intel_write_status_page(engine, I915_GEM_HWS_INDEX, seqno);
-	clear_bit(ENGINE_IRQ_BREADCRUMB, &engine->irq_posted);
 
 	/* After manually advancing the seqno, fake the interrupt in case
 	 * there are any waiters for that seqno.
@@ -1536,11 +1535,6 @@ void intel_engine_dump(struct intel_engine_cs *engine,
 	spin_unlock(&b->rb_lock);
 	local_irq_restore(flags);
 
-	drm_printf(m, "IRQ? 0x%lx (breadcrumbs? %s)\n",
-		   engine->irq_posted,
-		   yesno(test_bit(ENGINE_IRQ_BREADCRUMB,
-				  &engine->irq_posted)));
-
 	drm_printf(m, "HWSP:\n");
 	hexdump(m, engine->status_page.page_addr, PAGE_SIZE);
 
diff --git a/drivers/gpu/drm/i915/intel_hangcheck.c b/drivers/gpu/drm/i915/intel_hangcheck.c
index c3f929f59424..51e9efec5116 100644
--- a/drivers/gpu/drm/i915/intel_hangcheck.c
+++ b/drivers/gpu/drm/i915/intel_hangcheck.c
@@ -120,16 +120,6 @@ engine_stuck(struct intel_engine_cs *engine, u64 acthd)
 static void hangcheck_load_sample(struct intel_engine_cs *engine,
 				  struct intel_engine_hangcheck *hc)
 {
-	/* We don't strictly need an irq-barrier here, as we are not
-	 * serving an interrupt request, be paranoid in case the
-	 * barrier has side-effects (such as preventing a broken
-	 * cacheline snoop) and so be sure that we can see the seqno
-	 * advance. If the seqno should stick, due to a stale
-	 * cacheline, we would erroneously declare the GPU hung.
-	 */
-	if (engine->irq_seqno_barrier)
-		engine->irq_seqno_barrier(engine);
-
 	hc->acthd = intel_engine_get_active_head(engine);
 	hc->seqno = intel_engine_get_seqno(engine);
 }
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index 13ac01b67ead..f8d3090ed193 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -711,10 +711,6 @@ static int init_ring_common(struct intel_engine_cs *engine)
 static struct i915_request *reset_prepare(struct intel_engine_cs *engine)
 {
 	intel_engine_stop_cs(engine);
-
-	if (engine->irq_seqno_barrier)
-		engine->irq_seqno_barrier(engine);
-
 	return i915_gem_find_active_request(engine);
 }
 
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index 99e2cb75d29a..91ef00d34e91 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -365,9 +365,6 @@ struct intel_engine_cs {
 	struct drm_i915_gem_object *default_state;
 	void *pinned_default_state;
 
-	unsigned long irq_posted;
-#define ENGINE_IRQ_BREADCRUMB 0
-
 	/* Rather than have every client wait upon all user interrupts,
 	 * with the herd waking after every interrupt and each doing the
 	 * heavyweight seqno dance, we delegate the task (of being the
@@ -501,13 +498,6 @@ struct intel_engine_cs {
 	 */
 	void		(*cancel_requests)(struct intel_engine_cs *engine);
 
-	/* Some chipsets are not quite as coherent as advertised and need
-	 * an expensive kick to force a true read of the up-to-date seqno.
-	 * However, the up-to-date seqno is not always required and the last
-	 * seen value is good enough. Note that the seqno will always be
-	 * monotonic, even if not coherent.
-	 */
-	void		(*irq_seqno_barrier)(struct intel_engine_cs *engine);
 	void		(*cleanup)(struct intel_engine_cs *engine);
 
 	struct intel_engine_execlists execlists;
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 22+ messages in thread

* ✗ Fi.CI.CHECKPATCH: warning for series starting with [1/6] drm/i915: Remove redundant trailing request flush
  2018-12-28 17:16 [PATCH 1/6] drm/i915: Remove redundant trailing request flush Chris Wilson
                   ` (4 preceding siblings ...)
  2018-12-28 17:16 ` [PATCH 6/6] drm/i915: Drop unused engine->irq_seqno_barrier w/a Chris Wilson
@ 2018-12-28 17:48 ` Patchwork
  2018-12-28 18:18 ` ✓ Fi.CI.BAT: success " Patchwork
                   ` (2 subsequent siblings)
  8 siblings, 0 replies; 22+ messages in thread
From: Patchwork @ 2018-12-28 17:48 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

== Series Details ==

Series: series starting with [1/6] drm/i915: Remove redundant trailing request flush
URL   : https://patchwork.freedesktop.org/series/54529/
State : warning

== Summary ==

$ dim checkpatch origin/drm-tip
899ae50fee37 drm/i915: Remove redundant trailing request flush
3a98ba1f5685 drm/i915/ringbuffer: Remove irq-seqno w/a for gen6/7 rcs
ec3ad64760c5 drm/i915/ringbuffer: Remove irq-seqno w/a for gen6 xcs
f9c7e2696e1d drm/i915/ringbuffer: Move irq seqno barrier to the GPU for gen7
-:54: CHECK:LINE_SPACING: Please use a blank line after function/struct/union/enum declarations
#54: FILE: drivers/gpu/drm/i915/intel_ringbuffer.c:471:
+}
+static const int gen7_xcs_emit_breadcrumb_sz = 8 + GEN7_XCS_WA * 3;

total: 0 errors, 0 warnings, 1 checks, 110 lines checked
61b16bd6633f drm/i915/ringbuffer: Move irq seqno barrier to the GPU for gen5
-:66: CHECK:LINE_SPACING: Please use a blank line after function/struct/union/enum declarations
#66: FILE: drivers/gpu/drm/i915/intel_ringbuffer.c:905:
 }
+static const int gen5_emit_breadcrumb_sz = GEN5_WA_STORES * 3 + 2;

total: 0 errors, 0 warnings, 1 checks, 62 lines checked
64c715390115 drm/i915: Drop unused engine->irq_seqno_barrier w/a

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 22+ messages in thread

* ✓ Fi.CI.BAT: success for series starting with [1/6] drm/i915: Remove redundant trailing request flush
  2018-12-28 17:16 [PATCH 1/6] drm/i915: Remove redundant trailing request flush Chris Wilson
                   ` (5 preceding siblings ...)
  2018-12-28 17:48 ` ✗ Fi.CI.CHECKPATCH: warning for series starting with [1/6] drm/i915: Remove redundant trailing request flush Patchwork
@ 2018-12-28 18:18 ` Patchwork
  2018-12-31 15:38   ` Chris Wilson
  2018-12-28 22:00 ` ✓ Fi.CI.IGT: " Patchwork
  2018-12-31 10:25 ` [PATCH 1/6] " Tvrtko Ursulin
  8 siblings, 1 reply; 22+ messages in thread
From: Patchwork @ 2018-12-28 18:18 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

== Series Details ==

Series: series starting with [1/6] drm/i915: Remove redundant trailing request flush
URL   : https://patchwork.freedesktop.org/series/54529/
State : success

== Summary ==

CI Bug Log - changes from CI_DRM_5349 -> Patchwork_11167
====================================================

Summary
-------

  **SUCCESS**

  No regressions found.

  External URL: https://patchwork.freedesktop.org/api/1.0/series/54529/revisions/1/mbox/

Known issues
------------

  Here are the changes found in Patchwork_11167 that come from known issues:

### IGT changes ###

#### Issues hit ####

  * igt@i915_module_load@reload:
    - fi-blb-e6850:       PASS -> INCOMPLETE [fdo#107718]

  * igt@kms_flip@basic-flip-vs-modeset:
    - fi-skl-6700hq:      PASS -> DMESG-WARN [fdo#105998] +1

  * {igt@runner@aborted}:
    - fi-icl-y:           NOTRUN -> FAIL [fdo#108915]

  
#### Possible fixes ####

  * igt@kms_chamelium@hdmi-hpd-fast:
    - fi-kbl-7500u:       FAIL [fdo#108767] -> PASS

  * igt@prime_vgem@basic-fence-flip:
    - fi-kbl-7567u:       FAIL [fdo#104008] -> PASS

  
#### Warnings ####

  * igt@i915_selftest@live_contexts:
    - fi-icl-u3:          INCOMPLETE [fdo#108315] -> DMESG-FAIL [fdo#108569]

  
  {name}: This element is suppressed. This means it is ignored when computing
          the status of the difference (SUCCESS, WARNING, or FAILURE).

  [fdo#104008]: https://bugs.freedesktop.org/show_bug.cgi?id=104008
  [fdo#105998]: https://bugs.freedesktop.org/show_bug.cgi?id=105998
  [fdo#107718]: https://bugs.freedesktop.org/show_bug.cgi?id=107718
  [fdo#108315]: https://bugs.freedesktop.org/show_bug.cgi?id=108315
  [fdo#108569]: https://bugs.freedesktop.org/show_bug.cgi?id=108569
  [fdo#108767]: https://bugs.freedesktop.org/show_bug.cgi?id=108767
  [fdo#108915]: https://bugs.freedesktop.org/show_bug.cgi?id=108915


Participating hosts (46 -> 40)
------------------------------

  Additional (1): fi-icl-y 
  Missing    (7): fi-kbl-soraka fi-ilk-m540 fi-hsw-4200u fi-byt-squawks fi-bsw-cyan fi-ctg-p8600 fi-bdw-samus 


Build changes
-------------

    * Linux: CI_DRM_5349 -> Patchwork_11167

  CI_DRM_5349: 28f592191f34b55d93b2e8e94f2815be0f6c7598 @ git://anongit.freedesktop.org/gfx-ci/linux
  IGT_4754: a176905d46d072300ba57f29ac2b98a0228e0e2d @ git://anongit.freedesktop.org/xorg/app/intel-gpu-tools
  Patchwork_11167: 64c715390115f12628672ee9427044f9308623ee @ git://anongit.freedesktop.org/gfx-ci/linux


== Linux commits ==

64c715390115 drm/i915: Drop unused engine->irq_seqno_barrier w/a
61b16bd6633f drm/i915/ringbuffer: Move irq seqno barrier to the GPU for gen5
f9c7e2696e1d drm/i915/ringbuffer: Move irq seqno barrier to the GPU for gen7
ec3ad64760c5 drm/i915/ringbuffer: Remove irq-seqno w/a for gen6 xcs
3a98ba1f5685 drm/i915/ringbuffer: Remove irq-seqno w/a for gen6/7 rcs
899ae50fee37 drm/i915: Remove redundant trailing request flush

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_11167/
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 22+ messages in thread

* ✓ Fi.CI.IGT: success for series starting with [1/6] drm/i915: Remove redundant trailing request flush
  2018-12-28 17:16 [PATCH 1/6] drm/i915: Remove redundant trailing request flush Chris Wilson
                   ` (6 preceding siblings ...)
  2018-12-28 18:18 ` ✓ Fi.CI.BAT: success " Patchwork
@ 2018-12-28 22:00 ` Patchwork
  2018-12-31 10:25 ` [PATCH 1/6] " Tvrtko Ursulin
  8 siblings, 0 replies; 22+ messages in thread
From: Patchwork @ 2018-12-28 22:00 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

== Series Details ==

Series: series starting with [1/6] drm/i915: Remove redundant trailing request flush
URL   : https://patchwork.freedesktop.org/series/54529/
State : success

== Summary ==

CI Bug Log - changes from CI_DRM_5349_full -> Patchwork_11167_full
====================================================

Summary
-------

  **WARNING**

  Minor unknown changes coming with Patchwork_11167_full need to be verified
  manually.
  
  If you think the reported changes have nothing to do with the changes
  introduced in Patchwork_11167_full, please notify your bug team to allow them
  to document this new failure mode, which will reduce false positives in CI.

  

Possible new issues
-------------------

  Here are the unknown changes that may have been introduced in Patchwork_11167_full:

### IGT changes ###

#### Warnings ####

  * igt@tools_test@tools_test:
    - shard-glk:          PASS -> SKIP

  
Known issues
------------

  Here are the changes found in Patchwork_11167_full that come from known issues:

### IGT changes ###

#### Issues hit ####

  * igt@gem_userptr_blits@readonly-unsync:
    - shard-glk:          PASS -> TIMEOUT [fdo#108887]

  * igt@kms_cursor_crc@cursor-128x128-offscreen:
    - shard-skl:          PASS -> FAIL [fdo#103232] +1

  * igt@kms_cursor_crc@cursor-256x256-suspend:
    - shard-apl:          PASS -> FAIL [fdo#103191] / [fdo#103232]

  * igt@kms_cursor_crc@cursor-64x64-random:
    - shard-iclb:         NOTRUN -> FAIL [fdo#103232]

  * igt@kms_cursor_crc@cursor-64x64-suspend:
    - shard-glk:          PASS -> FAIL [fdo#103232]

  * igt@kms_cursor_legacy@cursor-vs-flip-atomic-transitions:
    - shard-hsw:          PASS -> FAIL [fdo#103355]

  * igt@kms_frontbuffer_tracking@fbc-2p-primscrn-spr-indfb-fullscreen:
    - shard-glk:          PASS -> FAIL [fdo#103167]

  * igt@kms_frontbuffer_tracking@fbc-stridechange:
    - shard-iclb:         PASS -> FAIL [fdo#105683] / [fdo#108040]

  * igt@kms_frontbuffer_tracking@fbcpsr-1p-primscrn-pri-shrfb-draw-mmap-wc:
    - shard-iclb:         PASS -> DMESG-FAIL [fdo#107724]

  * igt@kms_frontbuffer_tracking@fbcpsr-1p-primscrn-spr-indfb-move:
    - shard-iclb:         PASS -> FAIL [fdo#103167] +2

  * igt@kms_frontbuffer_tracking@fbcpsr-stridechange:
    - shard-iclb:         PASS -> FAIL [fdo#105683]

  * igt@kms_plane@pixel-format-pipe-b-planes-source-clamping:
    - shard-iclb:         NOTRUN -> FAIL [fdo#108948]

  * igt@kms_plane@pixel-format-pipe-c-planes-source-clamping:
    - shard-skl:          NOTRUN -> DMESG-WARN [fdo#106885]

  * igt@kms_plane@plane-panning-bottom-right-suspend-pipe-b-planes:
    - shard-skl:          NOTRUN -> INCOMPLETE [fdo#104108] / [fdo#107773]

  * igt@kms_plane@plane-position-covered-pipe-b-planes:
    - shard-iclb:         PASS -> FAIL [fdo#103166]

  * igt@kms_plane_alpha_blend@pipe-b-coverage-7efc:
    - shard-skl:          PASS -> FAIL [fdo#107815]

  * igt@kms_plane_alpha_blend@pipe-c-alpha-opaque-fb:
    - shard-skl:          NOTRUN -> FAIL [fdo#108145]

  * igt@kms_plane_multiple@atomic-pipe-b-tiling-y:
    - shard-glk:          PASS -> FAIL [fdo#103166] +2
    - shard-apl:          PASS -> FAIL [fdo#103166] +3

  * igt@kms_rotation_crc@multiplane-rotation-cropping-top:
    - shard-kbl:          PASS -> DMESG-WARN [fdo#105604]

  * igt@kms_rotation_crc@primary-yf-tiled-reflect-x-0:
    - shard-iclb:         PASS -> DMESG-WARN [fdo#107724] / [fdo#108336] +5

  * igt@kms_setmode@basic:
    - shard-apl:          PASS -> FAIL [fdo#99912]

  * igt@kms_vblank@pipe-a-ts-continuation-modeset-hang:
    - shard-iclb:         PASS -> DMESG-WARN [fdo#107724] +5

  * igt@perf_pmu@rc6-runtime-pm:
    - shard-iclb:         PASS -> FAIL [fdo#105010]

  * igt@pm_rpm@gem-mmap-cpu:
    - shard-skl:          PASS -> INCOMPLETE [fdo#107807]

  
#### Possible fixes ####

  * igt@gem_userptr_blits@readonly-unsync:
    - shard-iclb:         TIMEOUT [fdo#108887] -> PASS

  * igt@i915_suspend@forcewake:
    - shard-skl:          INCOMPLETE [fdo#104108] / [fdo#107773] -> PASS

  * igt@kms_atomic_transition@plane-all-modeset-transition-internal-panels:
    - shard-iclb:         DMESG-WARN [fdo#107724] -> PASS +12

  * igt@kms_busy@extended-pageflip-modeset-hang-oldfb-render-a:
    - shard-glk:          DMESG-WARN [fdo#107956] -> PASS

  * igt@kms_cursor_crc@cursor-128x42-random:
    - shard-apl:          FAIL [fdo#103232] -> PASS

  * igt@kms_cursor_crc@cursor-256x256-dpms:
    - shard-glk:          FAIL [fdo#103232] -> PASS +1

  * igt@kms_draw_crc@draw-method-xrgb8888-render-ytiled:
    - shard-iclb:         FAIL [fdo#103232] -> PASS

  * igt@kms_flip@flip-vs-expired-vblank-interruptible:
    - shard-skl:          FAIL [fdo#105363] -> PASS

  * igt@kms_frontbuffer_tracking@fbc-1p-primscrn-spr-indfb-draw-mmap-gtt:
    - shard-apl:          FAIL [fdo#103167] -> PASS +1

  * igt@kms_frontbuffer_tracking@fbc-2p-primscrn-spr-indfb-move:
    - shard-glk:          FAIL [fdo#103167] -> PASS

  * igt@kms_frontbuffer_tracking@fbc-tilingchange:
    - shard-iclb:         FAIL [fdo#103167] -> PASS +3

  * igt@kms_frontbuffer_tracking@fbcpsr-1p-primscrn-pri-indfb-draw-mmap-gtt:
    - shard-iclb:         DMESG-FAIL [fdo#107724] -> PASS +2

  * igt@kms_plane@plane-panning-top-left-pipe-b-planes:
    - shard-iclb:         DMESG-WARN [fdo#107724] / [fdo#108336] -> PASS +2

  * igt@kms_rotation_crc@multiplane-rotation-cropping-top:
    - shard-glk:          DMESG-FAIL [fdo#105763] / [fdo#106538] -> PASS

  * igt@pm_rpm@pm-caching:
    - shard-skl:          INCOMPLETE [fdo#107807] -> PASS

  
#### Warnings ####

  * igt@i915_selftest@live_contexts:
    - shard-iclb:         INCOMPLETE [fdo#108315] -> DMESG-FAIL [fdo#108569]

  * igt@i915_suspend@shrink:
    - shard-snb:          DMESG-WARN [fdo#108784] -> INCOMPLETE [fdo#105411] / [fdo#106886]

  * igt@kms_ccs@pipe-b-crc-primary-basic:
    - shard-iclb:         FAIL [fdo#107725] -> DMESG-WARN [fdo#107724] / [fdo#108336] +1

  * igt@kms_content_protection@atomic:
    - shard-apl:          INCOMPLETE [fdo#103927] -> FAIL [fdo#108597]

  * igt@kms_cursor_crc@cursor-256x256-suspend:
    - shard-iclb:         DMESG-FAIL [fdo#103232] / [fdo#107724] -> FAIL [fdo#103232]

  
  [fdo#103166]: https://bugs.freedesktop.org/show_bug.cgi?id=103166
  [fdo#103167]: https://bugs.freedesktop.org/show_bug.cgi?id=103167
  [fdo#103191]: https://bugs.freedesktop.org/show_bug.cgi?id=103191
  [fdo#103232]: https://bugs.freedesktop.org/show_bug.cgi?id=103232
  [fdo#103355]: https://bugs.freedesktop.org/show_bug.cgi?id=103355
  [fdo#103927]: https://bugs.freedesktop.org/show_bug.cgi?id=103927
  [fdo#104108]: https://bugs.freedesktop.org/show_bug.cgi?id=104108
  [fdo#105010]: https://bugs.freedesktop.org/show_bug.cgi?id=105010
  [fdo#105363]: https://bugs.freedesktop.org/show_bug.cgi?id=105363
  [fdo#105411]: https://bugs.freedesktop.org/show_bug.cgi?id=105411
  [fdo#105604]: https://bugs.freedesktop.org/show_bug.cgi?id=105604
  [fdo#105683]: https://bugs.freedesktop.org/show_bug.cgi?id=105683
  [fdo#105763]: https://bugs.freedesktop.org/show_bug.cgi?id=105763
  [fdo#106538]: https://bugs.freedesktop.org/show_bug.cgi?id=106538
  [fdo#106885]: https://bugs.freedesktop.org/show_bug.cgi?id=106885
  [fdo#106886]: https://bugs.freedesktop.org/show_bug.cgi?id=106886
  [fdo#107724]: https://bugs.freedesktop.org/show_bug.cgi?id=107724
  [fdo#107725]: https://bugs.freedesktop.org/show_bug.cgi?id=107725
  [fdo#107773]: https://bugs.freedesktop.org/show_bug.cgi?id=107773
  [fdo#107807]: https://bugs.freedesktop.org/show_bug.cgi?id=107807
  [fdo#107815]: https://bugs.freedesktop.org/show_bug.cgi?id=107815
  [fdo#107956]: https://bugs.freedesktop.org/show_bug.cgi?id=107956
  [fdo#108040]: https://bugs.freedesktop.org/show_bug.cgi?id=108040
  [fdo#108145]: https://bugs.freedesktop.org/show_bug.cgi?id=108145
  [fdo#108315]: https://bugs.freedesktop.org/show_bug.cgi?id=108315
  [fdo#108336]: https://bugs.freedesktop.org/show_bug.cgi?id=108336
  [fdo#108569]: https://bugs.freedesktop.org/show_bug.cgi?id=108569
  [fdo#108597]: https://bugs.freedesktop.org/show_bug.cgi?id=108597
  [fdo#108784]: https://bugs.freedesktop.org/show_bug.cgi?id=108784
  [fdo#108887]: https://bugs.freedesktop.org/show_bug.cgi?id=108887
  [fdo#108948]: https://bugs.freedesktop.org/show_bug.cgi?id=108948
  [fdo#99912]: https://bugs.freedesktop.org/show_bug.cgi?id=99912


Participating hosts (7 -> 7)
------------------------------

  No changes in participating hosts


Build changes
-------------

    * Linux: CI_DRM_5349 -> Patchwork_11167

  CI_DRM_5349: 28f592191f34b55d93b2e8e94f2815be0f6c7598 @ git://anongit.freedesktop.org/gfx-ci/linux
  IGT_4754: a176905d46d072300ba57f29ac2b98a0228e0e2d @ git://anongit.freedesktop.org/xorg/app/intel-gpu-tools
  Patchwork_11167: 64c715390115f12628672ee9427044f9308623ee @ git://anongit.freedesktop.org/gfx-ci/linux
  piglit_4509: fdc5a4ca11124ab8413c7988896eec4c97336694 @ git://anongit.freedesktop.org/piglit

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_11167/
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH 1/6] drm/i915: Remove redundant trailing request flush
  2018-12-28 17:16 [PATCH 1/6] drm/i915: Remove redundant trailing request flush Chris Wilson
                   ` (7 preceding siblings ...)
  2018-12-28 22:00 ` ✓ Fi.CI.IGT: " Patchwork
@ 2018-12-31 10:25 ` Tvrtko Ursulin
  2018-12-31 10:32   ` Chris Wilson
  8 siblings, 1 reply; 22+ messages in thread
From: Tvrtko Ursulin @ 2018-12-31 10:25 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 28/12/2018 17:16, Chris Wilson wrote:
> Now that we perform the request flushing inline with emitting the
> breadcrumb, we can remove the now redundant manual flush. And we can
> also remove the infrastructure that remained only for its purpose.
> 
> v2: emit_breadcrumb_sz is in dwords, but rq->reserved_space is in bytes
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>   drivers/gpu/drm/i915/i915_request.c          | 14 +++++++-------
>   drivers/gpu/drm/i915/intel_ringbuffer.c      | 16 ----------------
>   drivers/gpu/drm/i915/intel_ringbuffer.h      | 10 ----------
>   drivers/gpu/drm/i915/selftests/mock_engine.c |  2 --
>   4 files changed, 7 insertions(+), 35 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
> index 6cedcfea33b5..ea4620f2ac9e 100644
> --- a/drivers/gpu/drm/i915/i915_request.c
> +++ b/drivers/gpu/drm/i915/i915_request.c
> @@ -521,10 +521,6 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx)
>   
>   	reserve_gt(i915);
>   
> -	ret = intel_ring_wait_for_space(ce->ring, MIN_SPACE_FOR_ADD_REQUEST);
> -	if (ret)
> -		goto err_unreserve;
> -
>   	/* Move our oldest request to the slab-cache (if not in use!) */
>   	rq = list_first_entry(&ce->ring->request_list, typeof(*rq), ring_link);
>   	if (!list_is_last(&rq->ring_link, &ce->ring->request_list) &&
> @@ -616,9 +612,13 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx)
>   	 * i915_request_add() call can't fail. Note that the reserve may need
>   	 * to be redone if the request is not actually submitted straight
>   	 * away, e.g. because a GPU scheduler has deferred it.
> +	 *
> +	 * Note that due to how we add reserved_space to intel_ring_begin()
> +	 * we need to double our request to ensure that if we need to wrap
> +	 * around inside i915_request_add() there is sufficient space at
> +	 * the beginning of the ring as well.

Is there a benefit of keeping this intel_ring_begin behaviour? I mean, 
could we just drop the special casing in there and always wrap the whole 
space from the beginning if either part does not fit? That would allow 
this part to pass in the true reserved space size I think.

Regards,

Tvrtko

>   	 */
> -	rq->reserved_space = MIN_SPACE_FOR_ADD_REQUEST;
> -	GEM_BUG_ON(rq->reserved_space < engine->emit_breadcrumb_sz);
> +	rq->reserved_space = 2 * engine->emit_breadcrumb_sz * sizeof(u32);
>   
>   	/*
>   	 * Record the position of the start of the request so that
> @@ -860,8 +860,8 @@ void i915_request_add(struct i915_request *request)
>   	 * should already have been reserved in the ring buffer. Let the ring
>   	 * know that it is time to use that space up.
>   	 */
> +	GEM_BUG_ON(request->reserved_space > request->ring->space);
>   	request->reserved_space = 0;
> -	engine->emit_flush(request, EMIT_FLUSH);
>   
>   	/*
>   	 * Record the position of the start of the breadcrumb so that
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
> index fc1e29305951..d773f7dd32a9 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.c
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
> @@ -1904,22 +1904,6 @@ static noinline int wait_for_space(struct intel_ring *ring, unsigned int bytes)
>   	return 0;
>   }
>   
> -int intel_ring_wait_for_space(struct intel_ring *ring, unsigned int bytes)
> -{
> -	GEM_BUG_ON(bytes > ring->effective_size);
> -	if (unlikely(bytes > ring->effective_size - ring->emit))
> -		bytes += ring->size - ring->emit;
> -
> -	if (unlikely(bytes > ring->space)) {
> -		int ret = wait_for_space(ring, bytes);
> -		if (unlikely(ret))
> -			return ret;
> -	}
> -
> -	GEM_BUG_ON(ring->space < bytes);
> -	return 0;
> -}
> -
>   u32 *intel_ring_begin(struct i915_request *rq, unsigned int num_dwords)
>   {
>   	struct intel_ring *ring = rq->ring;
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
> index 32606d795af3..99e2cb75d29a 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.h
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
> @@ -754,7 +754,6 @@ void intel_legacy_submission_resume(struct drm_i915_private *dev_priv);
>   
>   int __must_check intel_ring_cacheline_align(struct i915_request *rq);
>   
> -int intel_ring_wait_for_space(struct intel_ring *ring, unsigned int bytes);
>   u32 __must_check *intel_ring_begin(struct i915_request *rq, unsigned int n);
>   
>   static inline void intel_ring_advance(struct i915_request *rq, u32 *cs)
> @@ -895,15 +894,6 @@ static inline bool intel_engine_has_started(struct intel_engine_cs *engine,
>   void intel_engine_get_instdone(struct intel_engine_cs *engine,
>   			       struct intel_instdone *instdone);
>   
> -/*
> - * Arbitrary size for largest possible 'add request' sequence. The code paths
> - * are complex and variable. Empirical measurement shows that the worst case
> - * is BDW at 192 bytes (6 + 6 + 36 dwords), then ILK at 136 bytes. However,
> - * we need to allocate double the largest single packet within that emission
> - * to account for tail wraparound (so 6 + 6 + 72 dwords for BDW).
> - */
> -#define MIN_SPACE_FOR_ADD_REQUEST 336
> -
>   static inline u32 intel_hws_seqno_address(struct intel_engine_cs *engine)
>   {
>   	return engine->status_page.ggtt_offset + I915_GEM_HWS_INDEX_ADDR;
> diff --git a/drivers/gpu/drm/i915/selftests/mock_engine.c b/drivers/gpu/drm/i915/selftests/mock_engine.c
> index d0c44c18db42..50e1a0b1af7e 100644
> --- a/drivers/gpu/drm/i915/selftests/mock_engine.c
> +++ b/drivers/gpu/drm/i915/selftests/mock_engine.c
> @@ -148,8 +148,6 @@ static struct intel_ring *mock_ring(struct intel_engine_cs *engine)
>   	const unsigned long sz = PAGE_SIZE / 2;
>   	struct mock_ring *ring;
>   
> -	BUILD_BUG_ON(MIN_SPACE_FOR_ADD_REQUEST > sz);
> -
>   	ring = kzalloc(sizeof(*ring) + sz, GFP_KERNEL);
>   	if (!ring)
>   		return NULL;
> 
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH 2/6] drm/i915/ringbuffer: Remove irq-seqno w/a for gen6/7 rcs
  2018-12-28 17:16 ` [PATCH 2/6] drm/i915/ringbuffer: Remove irq-seqno w/a for gen6/7 rcs Chris Wilson
@ 2018-12-31 10:28   ` Tvrtko Ursulin
  0 siblings, 0 replies; 22+ messages in thread
From: Tvrtko Ursulin @ 2018-12-31 10:28 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 28/12/2018 17:16, Chris Wilson wrote:
> Having transitioned to using PIPECONTROL to combine the flush with the
> breadcrumb write using their post-sync functions, assume that this will
> resolve the serialisation with the subsequent MI_USER_INTERRUPT. That is
> when inspecting the breadcrumb after an interrupt we can rely on the write
> being posted (i.e. the HWSP will be coherent).
> 
> Testing using gem_sync shows that the PIPECONTROL + CS stall does
> serialise the command streamer sufficient that the breadcrumb lands
> before the MI_USER_INTERRUPT. The same is not true for MI_FLUSH_DW.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>   drivers/gpu/drm/i915/intel_ringbuffer.c | 2 --
>   1 file changed, 2 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
> index d773f7dd32a9..1b9264883a8d 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.c
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
> @@ -2218,13 +2218,11 @@ int intel_init_render_ring_buffer(struct intel_engine_cs *engine)
>   		engine->emit_flush = gen7_render_ring_flush;
>   		engine->emit_breadcrumb = gen7_rcs_emit_breadcrumb;
>   		engine->emit_breadcrumb_sz = gen7_rcs_emit_breadcrumb_sz;
> -		engine->irq_seqno_barrier = gen6_seqno_barrier;
>   	} else if (IS_GEN(dev_priv, 6)) {
>   		engine->init_context = intel_rcs_ctx_init;
>   		engine->emit_flush = gen6_render_ring_flush;
>   		engine->emit_breadcrumb = gen6_rcs_emit_breadcrumb;
>   		engine->emit_breadcrumb_sz = gen6_rcs_emit_breadcrumb_sz;
> -		engine->irq_seqno_barrier = gen6_seqno_barrier;
>   	} else if (IS_GEN(dev_priv, 5)) {
>   		engine->emit_flush = gen4_render_ring_flush;
>   	} else {
> 

If the proof was in the pudding, then:

Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH 3/6] drm/i915/ringbuffer: Remove irq-seqno w/a for gen6 xcs
  2018-12-28 17:16 ` [PATCH 3/6] drm/i915/ringbuffer: Remove irq-seqno w/a for gen6 xcs Chris Wilson
@ 2018-12-31 10:31   ` Tvrtko Ursulin
  0 siblings, 0 replies; 22+ messages in thread
From: Tvrtko Ursulin @ 2018-12-31 10:31 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 28/12/2018 17:16, Chris Wilson wrote:
> The MI_FLUSH_DW does appear coherent with the following
> MI_USER_INTERRUPT, but only on Sandybridge. Ivybridge requires a heavier
> hammer, but on Sandybridge we can stop requiring the irq_seqno barrier.
> 
> Testcase: igt/gem_sync
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>   drivers/gpu/drm/i915/intel_ringbuffer.c | 6 ++++--
>   1 file changed, 4 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
> index 1b9264883a8d..2fb3a364c390 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.c
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
> @@ -2260,7 +2260,8 @@ int intel_init_bsd_ring_buffer(struct intel_engine_cs *engine)
>   
>   		engine->emit_breadcrumb = gen6_xcs_emit_breadcrumb;
>   		engine->emit_breadcrumb_sz = gen6_xcs_emit_breadcrumb_sz;
> -		engine->irq_seqno_barrier = gen6_seqno_barrier;
> +		if (!IS_GEN(dev_priv, 6))
> +			engine->irq_seqno_barrier = gen6_seqno_barrier;
>   	} else {
>   		engine->emit_flush = bsd_ring_flush;
>   		if (IS_GEN(dev_priv, 5))
> @@ -2285,7 +2286,8 @@ int intel_init_blt_ring_buffer(struct intel_engine_cs *engine)
>   
>   	engine->emit_breadcrumb = gen6_xcs_emit_breadcrumb;
>   	engine->emit_breadcrumb_sz = gen6_xcs_emit_breadcrumb_sz;
> -	engine->irq_seqno_barrier = gen6_seqno_barrier;
> +	if (!IS_GEN(dev_priv, 6))
> +		engine->irq_seqno_barrier = gen6_seqno_barrier;
>   
>   	return intel_init_ring_buffer(engine);
>   }
> 

Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH 1/6] drm/i915: Remove redundant trailing request flush
  2018-12-31 10:25 ` [PATCH 1/6] " Tvrtko Ursulin
@ 2018-12-31 10:32   ` Chris Wilson
  2018-12-31 11:24     ` Tvrtko Ursulin
  0 siblings, 1 reply; 22+ messages in thread
From: Chris Wilson @ 2018-12-31 10:32 UTC (permalink / raw)
  To: Tvrtko Ursulin, intel-gfx

Quoting Tvrtko Ursulin (2018-12-31 10:25:41)
> 
> On 28/12/2018 17:16, Chris Wilson wrote:
> > @@ -616,9 +612,13 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx)
> >        * i915_request_add() call can't fail. Note that the reserve may need
> >        * to be redone if the request is not actually submitted straight
> >        * away, e.g. because a GPU scheduler has deferred it.
> > +      *
> > +      * Note that due to how we add reserved_space to intel_ring_begin()
> > +      * we need to double our request to ensure that if we need to wrap
> > +      * around inside i915_request_add() there is sufficient space at
> > +      * the beginning of the ring as well.
> 
> Is there a benefit of keeping this intel_ring_begin behaviour? I mean, 
> could we just drop the special casing in there and always wrap the whole 
> space from the beginning if either part does not fit? That would allow 
> this part to pass in the true reserved space size I think.

I was tempted to rewrite intel_ring_begin() to do the doubling itself it
it chose to wrap now that we know we only emit one packet. But I chose
the path of least resistance. The effect is miniscule as we only reserve
the extra space, but don't actually emit any extra MI_NOOPs. The effect
is that we may wait for a few bytes more than we actually require -- and
if the ring is full enough that we wait for this request, in all
likelihood we would be waiting again for the next.
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH 4/6] drm/i915/ringbuffer: Move irq seqno barrier to the GPU for gen7
  2018-12-28 17:16 ` [PATCH 4/6] drm/i915/ringbuffer: Move irq seqno barrier to the GPU for gen7 Chris Wilson
@ 2018-12-31 10:43   ` Tvrtko Ursulin
  2018-12-31 10:56     ` Chris Wilson
  0 siblings, 1 reply; 22+ messages in thread
From: Tvrtko Ursulin @ 2018-12-31 10:43 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 28/12/2018 17:16, Chris Wilson wrote:
> The irq_seqno_barrier is a tradeoff between doing work on every request
> (on the GPU) and doing work after every interrupt (on the CPU). We
> presume we have many more requests than interrupts! However, the current
> w/a for Ivybridge is an implicit delay that currently fails sporadically
> and consistently if we move the w/a into the irq handler itself. This
> makes the CPU barrier untenable for upcoming interrupt handler changes
> and so we need to replace it with a delay on the GPU before we send the
> MI_USER_INTERRUPT. As it turns out that delay is 32x MI_STORE_DWORD_IMM,
> or about 0.6us per request! Quite nasty, but the lesser of two evils
> looking to the future.
> 
> Testcase: igt/gem_sync
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>   drivers/gpu/drm/i915/intel_ringbuffer.c | 80 ++++++++++++++-----------
>   1 file changed, 44 insertions(+), 36 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
> index 2fb3a364c390..dd996103d495 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.c
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
> @@ -443,6 +443,34 @@ static void gen6_xcs_emit_breadcrumb(struct i915_request *rq, u32 *cs)
>   }
>   static const int gen6_xcs_emit_breadcrumb_sz = 4;
>   
> +#define GEN7_XCS_WA 32
> +static void gen7_xcs_emit_breadcrumb(struct i915_request *rq, u32 *cs)
> +{
> +	int i;
> +
> +	*cs++ = MI_FLUSH_DW | MI_FLUSH_DW_OP_STOREDW;
> +	*cs++ = intel_hws_seqno_address(rq->engine) | MI_FLUSH_DW_USE_GTT;
> +	*cs++ = rq->global_seqno;
> +
> +	for (i = 0; i < GEN7_XCS_WA; i++) {
> +		*cs++ = MI_STORE_DWORD_INDEX;
> +		*cs++ = I915_GEM_HWS_INDEX_ADDR;
> +		*cs++ = rq->global_seqno;
> +	}
> +
> +	*cs++ = MI_FLUSH_DW;
> +	*cs++ = 0;
> +	*cs++ = 0;
> +
> +	*cs++ = MI_USER_INTERRUPT;
> +	*cs++ = MI_NOOP;
> +
> +	rq->tail = intel_ring_offset(rq, cs);
> +	assert_ring_tail_valid(rq->ring, rq->tail);
> +}
> +static const int gen7_xcs_emit_breadcrumb_sz = 8 + GEN7_XCS_WA * 3;

Is it worth moving this before the function, and then in the function 
have a little GEM_BUG_ON using cs pointer arithmetic to check the two 
sizes match? Or even simpler the function could use 
engine->emit_breadcrumb_sz to check against.

> +#undef GEN7_XCS_WA
> +
>   static void set_hwstam(struct intel_engine_cs *engine, u32 mask)
>   {
>   	/*
> @@ -874,31 +902,6 @@ gen5_seqno_barrier(struct intel_engine_cs *engine)
>   	usleep_range(125, 250);
>   }
>   
> -static void
> -gen6_seqno_barrier(struct intel_engine_cs *engine)
> -{
> -	struct drm_i915_private *dev_priv = engine->i915;
> -
> -	/* Workaround to force correct ordering between irq and seqno writes on
> -	 * ivb (and maybe also on snb) by reading from a CS register (like
> -	 * ACTHD) before reading the status page.
> -	 *
> -	 * Note that this effectively stalls the read by the time it takes to
> -	 * do a memory transaction, which more or less ensures that the write
> -	 * from the GPU has sufficient time to invalidate the CPU cacheline.
> -	 * Alternatively we could delay the interrupt from the CS ring to give
> -	 * the write time to land, but that would incur a delay after every
> -	 * batch i.e. much more frequent than a delay when waiting for the
> -	 * interrupt (with the same net latency).
> -	 *
> -	 * Also note that to prevent whole machine hangs on gen7, we have to
> -	 * take the spinlock to guard against concurrent cacheline access.
> -	 */
> -	spin_lock_irq(&dev_priv->uncore.lock);
> -	POSTING_READ_FW(RING_ACTHD(engine->mmio_base));
> -	spin_unlock_irq(&dev_priv->uncore.lock);
> -}
> -
>   static void
>   gen5_irq_enable(struct intel_engine_cs *engine)
>   {
> @@ -2258,10 +2261,13 @@ int intel_init_bsd_ring_buffer(struct intel_engine_cs *engine)
>   		engine->emit_flush = gen6_bsd_ring_flush;
>   		engine->irq_enable_mask = GT_BSD_USER_INTERRUPT;
>   
> -		engine->emit_breadcrumb = gen6_xcs_emit_breadcrumb;
> -		engine->emit_breadcrumb_sz = gen6_xcs_emit_breadcrumb_sz;
> -		if (!IS_GEN(dev_priv, 6))
> -			engine->irq_seqno_barrier = gen6_seqno_barrier;
> +		if (IS_GEN(dev_priv, 6)) {
> +			engine->emit_breadcrumb = gen6_xcs_emit_breadcrumb;
> +			engine->emit_breadcrumb_sz = gen6_xcs_emit_breadcrumb_sz;
> +		} else {
> +			engine->emit_breadcrumb = gen7_xcs_emit_breadcrumb;
> +			engine->emit_breadcrumb_sz = gen7_xcs_emit_breadcrumb_sz;
> +		}
>   	} else {
>   		engine->emit_flush = bsd_ring_flush;
>   		if (IS_GEN(dev_priv, 5))
> @@ -2284,10 +2290,13 @@ int intel_init_blt_ring_buffer(struct intel_engine_cs *engine)
>   	engine->emit_flush = gen6_ring_flush;
>   	engine->irq_enable_mask = GT_BLT_USER_INTERRUPT;
>   
> -	engine->emit_breadcrumb = gen6_xcs_emit_breadcrumb;
> -	engine->emit_breadcrumb_sz = gen6_xcs_emit_breadcrumb_sz;
> -	if (!IS_GEN(dev_priv, 6))
> -		engine->irq_seqno_barrier = gen6_seqno_barrier;
> +	if (IS_GEN(dev_priv, 6)) {
> +		engine->emit_breadcrumb = gen6_xcs_emit_breadcrumb;
> +		engine->emit_breadcrumb_sz = gen6_xcs_emit_breadcrumb_sz;
> +	} else {
> +		engine->emit_breadcrumb = gen7_xcs_emit_breadcrumb;
> +		engine->emit_breadcrumb_sz = gen7_xcs_emit_breadcrumb_sz;
> +	}
>   
>   	return intel_init_ring_buffer(engine);
>   }
> @@ -2305,9 +2314,8 @@ int intel_init_vebox_ring_buffer(struct intel_engine_cs *engine)
>   	engine->irq_enable = hsw_vebox_irq_enable;
>   	engine->irq_disable = hsw_vebox_irq_disable;
>   
> -	engine->emit_breadcrumb = gen6_xcs_emit_breadcrumb;
> -	engine->emit_breadcrumb_sz = gen6_xcs_emit_breadcrumb_sz;
> -	engine->irq_seqno_barrier = gen6_seqno_barrier;
> +	engine->emit_breadcrumb = gen7_xcs_emit_breadcrumb;
> +	engine->emit_breadcrumb_sz = gen7_xcs_emit_breadcrumb_sz;
>   
>   	return intel_init_ring_buffer(engine);
>   }
> 

Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH 5/6] drm/i915/ringbuffer: Move irq seqno barrier to the GPU for gen5
  2018-12-28 17:16 ` [PATCH 5/6] drm/i915/ringbuffer: Move irq seqno barrier to the GPU for gen5 Chris Wilson
@ 2018-12-31 10:49   ` Tvrtko Ursulin
  2018-12-31 11:07     ` Chris Wilson
  2018-12-31 15:25     ` Chris Wilson
  0 siblings, 2 replies; 22+ messages in thread
From: Tvrtko Ursulin @ 2018-12-31 10:49 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 28/12/2018 17:16, Chris Wilson wrote:
> The irq_seqno_barrier is a tradeoff between doing work on every request
> (on the GPU) and doing work after every interrupt (on the CPU). We
> presume we have many more requests than interrupts! However, for
> Ironlake, the workaround is a pretty hideous usleep() and so even though
> it was found we need to repeat the MI_STORE_DWORD_IMM 8 times, doing so
> is preferrable than requiring a usleep!
> 
> The additional MI_STORE_DWORD_IMM also have the side-effect of flushing
> MI operations from userspace which are not caught by MI_FLUSH!
> 
> Testcase: igt/gem_sync
> Testcase: igt/gem_exec_whisper
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>   drivers/gpu/drm/i915/intel_ringbuffer.c | 40 ++++++++++++++-----------
>   1 file changed, 23 insertions(+), 17 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
> index dd996103d495..13ac01b67ead 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.c
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
> @@ -881,26 +881,29 @@ static void i9xx_emit_breadcrumb(struct i915_request *rq, u32 *cs)
>   	rq->tail = intel_ring_offset(rq, cs);
>   	assert_ring_tail_valid(rq->ring, rq->tail);
>   }
> -
>   static const int i9xx_emit_breadcrumb_sz = 6;
>   
> -static void
> -gen5_seqno_barrier(struct intel_engine_cs *engine)
> +#define GEN5_WA_STORES 8 /* must be at least 1! */
> +static void gen5_emit_breadcrumb(struct i915_request *rq, u32 *cs)
>   {
> -	/* MI_STORE are internally buffered by the GPU and not flushed
> -	 * either by MI_FLUSH or SyncFlush or any other combination of
> -	 * MI commands.
> -	 *
> -	 * "Only the submission of the store operation is guaranteed.
> -	 * The write result will be complete (coherent) some time later
> -	 * (this is practically a finite period but there is no guaranteed
> -	 * latency)."
> -	 *
> -	 * Empirically, we observe that we need a delay of at least 75us to
> -	 * be sure that the seqno write is visible by the CPU.
> -	 */
> -	usleep_range(125, 250);

How much time for 8 store dw on gen5? I mean, does it have any relation 
to this sleep?

> +	int i;
> +
> +	*cs++ = MI_FLUSH;
> +
> +	BUILD_BUG_ON(GEN5_WA_STORES < 1);
> +	for (i = 0; i < GEN5_WA_STORES; i++) {
> +		*cs++ = MI_STORE_DWORD_INDEX;
> +		*cs++ = I915_GEM_HWS_INDEX_ADDR;
> +		*cs++ = rq->global_seqno;
> +	}
> +
> +	*cs++ = MI_USER_INTERRUPT;
> +
> +	rq->tail = intel_ring_offset(rq, cs);
> +	assert_ring_tail_valid(rq->ring, rq->tail);
>   }
> +static const int gen5_emit_breadcrumb_sz = GEN5_WA_STORES * 3 + 2;
> +#undef GEN5_WA_STORES
>   
>   static void
>   gen5_irq_enable(struct intel_engine_cs *engine)
> @@ -2148,7 +2151,6 @@ static void intel_ring_init_irq(struct drm_i915_private *dev_priv,
>   	} else if (INTEL_GEN(dev_priv) >= 5) {
>   		engine->irq_enable = gen5_irq_enable;
>   		engine->irq_disable = gen5_irq_disable;
> -		engine->irq_seqno_barrier = gen5_seqno_barrier;
>   	} else if (INTEL_GEN(dev_priv) >= 3) {
>   		engine->irq_enable = i9xx_irq_enable;
>   		engine->irq_disable = i9xx_irq_disable;
> @@ -2191,6 +2193,10 @@ static void intel_ring_default_vfuncs(struct drm_i915_private *dev_priv,
>   
>   	engine->emit_breadcrumb = i9xx_emit_breadcrumb;
>   	engine->emit_breadcrumb_sz = i9xx_emit_breadcrumb_sz;
> +	if (IS_GEN(dev_priv, 5)) {
> +		engine->emit_breadcrumb = gen5_emit_breadcrumb;
> +		engine->emit_breadcrumb_sz = gen5_emit_breadcrumb_sz;

I'll only observe in passing that we lost some consistency in this 
approximate area of the code regarding when we are happy to overwrite 
the pointers, versus when we do if-ladders-or-so to avoid that.

> +	}
>   
>   	engine->set_default_submission = i9xx_set_default_submission;
>   
> 

But if it works:

Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH 4/6] drm/i915/ringbuffer: Move irq seqno barrier to the GPU for gen7
  2018-12-31 10:43   ` Tvrtko Ursulin
@ 2018-12-31 10:56     ` Chris Wilson
  0 siblings, 0 replies; 22+ messages in thread
From: Chris Wilson @ 2018-12-31 10:56 UTC (permalink / raw)
  To: Tvrtko Ursulin, intel-gfx

Quoting Tvrtko Ursulin (2018-12-31 10:43:07)
> 
> On 28/12/2018 17:16, Chris Wilson wrote:
> > The irq_seqno_barrier is a tradeoff between doing work on every request
> > (on the GPU) and doing work after every interrupt (on the CPU). We
> > presume we have many more requests than interrupts! However, the current
> > w/a for Ivybridge is an implicit delay that currently fails sporadically
> > and consistently if we move the w/a into the irq handler itself. This
> > makes the CPU barrier untenable for upcoming interrupt handler changes
> > and so we need to replace it with a delay on the GPU before we send the
> > MI_USER_INTERRUPT. As it turns out that delay is 32x MI_STORE_DWORD_IMM,
> > or about 0.6us per request! Quite nasty, but the lesser of two evils
> > looking to the future.
> > 
> > Testcase: igt/gem_sync
> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > ---
> >   drivers/gpu/drm/i915/intel_ringbuffer.c | 80 ++++++++++++++-----------
> >   1 file changed, 44 insertions(+), 36 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
> > index 2fb3a364c390..dd996103d495 100644
> > --- a/drivers/gpu/drm/i915/intel_ringbuffer.c
> > +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
> > @@ -443,6 +443,34 @@ static void gen6_xcs_emit_breadcrumb(struct i915_request *rq, u32 *cs)
> >   }
> >   static const int gen6_xcs_emit_breadcrumb_sz = 4;
> >   
> > +#define GEN7_XCS_WA 32
> > +static void gen7_xcs_emit_breadcrumb(struct i915_request *rq, u32 *cs)
> > +{
> > +     int i;
> > +
> > +     *cs++ = MI_FLUSH_DW | MI_FLUSH_DW_OP_STOREDW;
> > +     *cs++ = intel_hws_seqno_address(rq->engine) | MI_FLUSH_DW_USE_GTT;
> > +     *cs++ = rq->global_seqno;
> > +
> > +     for (i = 0; i < GEN7_XCS_WA; i++) {
> > +             *cs++ = MI_STORE_DWORD_INDEX;
> > +             *cs++ = I915_GEM_HWS_INDEX_ADDR;
> > +             *cs++ = rq->global_seqno;
> > +     }
> > +
> > +     *cs++ = MI_FLUSH_DW;
> > +     *cs++ = 0;
> > +     *cs++ = 0;
> > +
> > +     *cs++ = MI_USER_INTERRUPT;
> > +     *cs++ = MI_NOOP;
> > +
> > +     rq->tail = intel_ring_offset(rq, cs);
> > +     assert_ring_tail_valid(rq->ring, rq->tail);
> > +}
> > +static const int gen7_xcs_emit_breadcrumb_sz = 8 + GEN7_XCS_WA * 3;
> 
> Is it worth moving this before the function, and then in the function 
> have a little GEM_BUG_ON using cs pointer arithmetic to check the two 
> sizes match? Or even simpler the function could use 
> engine->emit_breadcrumb_sz to check against.

I have a sketch of an idea to build a request during
intel_engine_setup_common() and measure the actual breadcrumb_sz. Which
I think is the best way to remove the fragile counting by hand. (But it
required more than 5 minutes of mocking around, so it ended up on the
list of things to do later.)

Later on, we do add a check that the emit_breadcrumb matches the
breadcrumb_sz when we move the emission into i915_request_add() and can
rely on ring->emit being valid.
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH 5/6] drm/i915/ringbuffer: Move irq seqno barrier to the GPU for gen5
  2018-12-31 10:49   ` Tvrtko Ursulin
@ 2018-12-31 11:07     ` Chris Wilson
  2018-12-31 11:21       ` Tvrtko Ursulin
  2018-12-31 15:25     ` Chris Wilson
  1 sibling, 1 reply; 22+ messages in thread
From: Chris Wilson @ 2018-12-31 11:07 UTC (permalink / raw)
  To: Tvrtko Ursulin, intel-gfx

Quoting Tvrtko Ursulin (2018-12-31 10:49:37)
> 
> On 28/12/2018 17:16, Chris Wilson wrote:
> > @@ -2191,6 +2193,10 @@ static void intel_ring_default_vfuncs(struct drm_i915_private *dev_priv,
> >   
> >       engine->emit_breadcrumb = i9xx_emit_breadcrumb;
> >       engine->emit_breadcrumb_sz = i9xx_emit_breadcrumb_sz;
> > +     if (IS_GEN(dev_priv, 5)) {
> > +             engine->emit_breadcrumb = gen5_emit_breadcrumb;
> > +             engine->emit_breadcrumb_sz = gen5_emit_breadcrumb_sz;
> 
> I'll only observe in passing that we lost some consistency in this 
> approximate area of the code regarding when we are happy to overwrite 
> the pointers, versus when we do if-ladders-or-so to avoid that.

I hear you, but my thinking was that this was the default for gen5 where
we use the same function for both classes of engine. And emit_bb_start
already looked odd there :) You do remember inconsistency is both my
middle names?

Back to tables? With semaphores gone, we've removed the runtime
conditionals, so we just have a bunch of common functions (for reset
handling). The counter point is that some subarches (gen2) only vary in
a single function pointer, so we end up with a lot of duplicate, but
const, tables.

I definitely prefer the repetition in tables when it comes to figuring
out exactly what set of functions are used together for any particular
setup.
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH 5/6] drm/i915/ringbuffer: Move irq seqno barrier to the GPU for gen5
  2018-12-31 11:07     ` Chris Wilson
@ 2018-12-31 11:21       ` Tvrtko Ursulin
  0 siblings, 0 replies; 22+ messages in thread
From: Tvrtko Ursulin @ 2018-12-31 11:21 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 31/12/2018 11:07, Chris Wilson wrote:
> Quoting Tvrtko Ursulin (2018-12-31 10:49:37)
>>
>> On 28/12/2018 17:16, Chris Wilson wrote:
>>> @@ -2191,6 +2193,10 @@ static void intel_ring_default_vfuncs(struct drm_i915_private *dev_priv,
>>>    
>>>        engine->emit_breadcrumb = i9xx_emit_breadcrumb;
>>>        engine->emit_breadcrumb_sz = i9xx_emit_breadcrumb_sz;
>>> +     if (IS_GEN(dev_priv, 5)) {
>>> +             engine->emit_breadcrumb = gen5_emit_breadcrumb;
>>> +             engine->emit_breadcrumb_sz = gen5_emit_breadcrumb_sz;
>>
>> I'll only observe in passing that we lost some consistency in this
>> approximate area of the code regarding when we are happy to overwrite
>> the pointers, versus when we do if-ladders-or-so to avoid that.
> 
> I hear you, but my thinking was that this was the default for gen5 where
> we use the same function for both classes of engine. And emit_bb_start
> already looked odd there :) You do remember inconsistency is both my
> middle names?
> 
> Back to tables? With semaphores gone, we've removed the runtime
> conditionals, so we just have a bunch of common functions (for reset
> handling). The counter point is that some subarches (gen2) only vary in
> a single function pointer, so we end up with a lot of duplicate, but
> const, tables.
> 
> I definitely prefer the repetition in tables when it comes to figuring
> out exactly what set of functions are used together for any particular
> setup.

I was also looking at intel_init_bsd_ring_buffer before in the series - 
it ends up if gen >= 6, if !gen6, if gen6, if gen5, else, something like 
that..

I think for now we apply -EDONTCARE here and someone comes back later if 
it bugs them enough.

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH 1/6] drm/i915: Remove redundant trailing request flush
  2018-12-31 10:32   ` Chris Wilson
@ 2018-12-31 11:24     ` Tvrtko Ursulin
  0 siblings, 0 replies; 22+ messages in thread
From: Tvrtko Ursulin @ 2018-12-31 11:24 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 31/12/2018 10:32, Chris Wilson wrote:
> Quoting Tvrtko Ursulin (2018-12-31 10:25:41)
>>
>> On 28/12/2018 17:16, Chris Wilson wrote:
>>> @@ -616,9 +612,13 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx)
>>>         * i915_request_add() call can't fail. Note that the reserve may need
>>>         * to be redone if the request is not actually submitted straight
>>>         * away, e.g. because a GPU scheduler has deferred it.
>>> +      *
>>> +      * Note that due to how we add reserved_space to intel_ring_begin()
>>> +      * we need to double our request to ensure that if we need to wrap
>>> +      * around inside i915_request_add() there is sufficient space at
>>> +      * the beginning of the ring as well.
>>
>> Is there a benefit of keeping this intel_ring_begin behaviour? I mean,
>> could we just drop the special casing in there and always wrap the whole
>> space from the beginning if either part does not fit? That would allow
>> this part to pass in the true reserved space size I think.
> 
> I was tempted to rewrite intel_ring_begin() to do the doubling itself it
> it chose to wrap now that we know we only emit one packet. But I chose
> the path of least resistance. The effect is miniscule as we only reserve
> the extra space, but don't actually emit any extra MI_NOOPs. The effect
> is that we may wait for a few bytes more than we actually require -- and
> if the ring is full enough that we wait for this request, in all
> likelihood we would be waiting again for the next.

It's just awfully ugly that upper layer adds a hack to account for how 
lower layer handles things when both are totally ours. But okay, lets 
mark intel_ring_begin for a rewrite when things settle.

Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH 6/6] drm/i915: Drop unused engine->irq_seqno_barrier w/a
  2018-12-28 17:16 ` [PATCH 6/6] drm/i915: Drop unused engine->irq_seqno_barrier w/a Chris Wilson
@ 2018-12-31 11:35   ` Tvrtko Ursulin
  0 siblings, 0 replies; 22+ messages in thread
From: Tvrtko Ursulin @ 2018-12-31 11:35 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 28/12/2018 17:16, Chris Wilson wrote:
> Now that we have eliminated the CPU-side irq_seqno_barrier by moving the
> delays on the GPU before emitting the MI_USER_INTERRUPT, we can remove
> the engine->irq_seqno_barrier infrastructure. Though intentionally
> slowing down the GPU is nasty, so is the code we can now remove!
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>   drivers/gpu/drm/i915/i915_drv.h          | 84 ------------------------
>   drivers/gpu/drm/i915/i915_gem.c          |  7 --
>   drivers/gpu/drm/i915/i915_irq.c          |  7 --
>   drivers/gpu/drm/i915/i915_request.c      |  8 +--
>   drivers/gpu/drm/i915/intel_breadcrumbs.c | 26 --------
>   drivers/gpu/drm/i915/intel_engine_cs.c   |  6 --
>   drivers/gpu/drm/i915/intel_hangcheck.c   | 10 ---
>   drivers/gpu/drm/i915/intel_ringbuffer.c  |  4 --
>   drivers/gpu/drm/i915/intel_ringbuffer.h  | 10 ---
>   9 files changed, 1 insertion(+), 161 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 287f06b9e95a..01a3d77fed41 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -3586,90 +3586,6 @@ wait_remaining_ms_from_jiffies(unsigned long timestamp_jiffies, int to_wait_ms)
>   	}
>   }
>   
> -static inline bool
> -__i915_request_irq_complete(const struct i915_request *rq)
> -{
> -	struct intel_engine_cs *engine = rq->engine;
> -	u32 seqno;
> -
> -	/* Note that the engine may have wrapped around the seqno, and
> -	 * so our request->global_seqno will be ahead of the hardware,
> -	 * even though it completed the request before wrapping. We catch
> -	 * this by kicking all the waiters before resetting the seqno
> -	 * in hardware, and also signal the fence.
> -	 */
> -	if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &rq->fence.flags))
> -		return true;
> -
> -	/* The request was dequeued before we were awoken. We check after
> -	 * inspecting the hw to confirm that this was the same request
> -	 * that generated the HWS update. The memory barriers within
> -	 * the request execution are sufficient to ensure that a check
> -	 * after reading the value from hw matches this request.
> -	 */
> -	seqno = i915_request_global_seqno(rq);
> -	if (!seqno)
> -		return false;
> -
> -	/* Before we do the heavier coherent read of the seqno,
> -	 * check the value (hopefully) in the CPU cacheline.
> -	 */
> -	if (__i915_request_completed(rq, seqno))
> -		return true;
> -
> -	/* Ensure our read of the seqno is coherent so that we
> -	 * do not "miss an interrupt" (i.e. if this is the last
> -	 * request and the seqno write from the GPU is not visible
> -	 * by the time the interrupt fires, we will see that the
> -	 * request is incomplete and go back to sleep awaiting
> -	 * another interrupt that will never come.)
> -	 *
> -	 * Strictly, we only need to do this once after an interrupt,
> -	 * but it is easier and safer to do it every time the waiter
> -	 * is woken.
> -	 */
> -	if (engine->irq_seqno_barrier &&
> -	    test_and_clear_bit(ENGINE_IRQ_BREADCRUMB, &engine->irq_posted)) {
> -		struct intel_breadcrumbs *b = &engine->breadcrumbs;
> -
> -		/* The ordering of irq_posted versus applying the barrier
> -		 * is crucial. The clearing of the current irq_posted must
> -		 * be visible before we perform the barrier operation,
> -		 * such that if a subsequent interrupt arrives, irq_posted
> -		 * is reasserted and our task rewoken (which causes us to
> -		 * do another __i915_request_irq_complete() immediately
> -		 * and reapply the barrier). Conversely, if the clear
> -		 * occurs after the barrier, then an interrupt that arrived
> -		 * whilst we waited on the barrier would not trigger a
> -		 * barrier on the next pass, and the read may not see the
> -		 * seqno update.
> -		 */
> -		engine->irq_seqno_barrier(engine);
> -
> -		/* If we consume the irq, but we are no longer the bottom-half,
> -		 * the real bottom-half may not have serialised their own
> -		 * seqno check with the irq-barrier (i.e. may have inspected
> -		 * the seqno before we believe it coherent since they see
> -		 * irq_posted == false but we are still running).
> -		 */
> -		spin_lock_irq(&b->irq_lock);
> -		if (b->irq_wait && b->irq_wait->tsk != current)
> -			/* Note that if the bottom-half is changed as we
> -			 * are sending the wake-up, the new bottom-half will
> -			 * be woken by whomever made the change. We only have
> -			 * to worry about when we steal the irq-posted for
> -			 * ourself.
> -			 */
> -			wake_up_process(b->irq_wait->tsk);
> -		spin_unlock_irq(&b->irq_lock);
> -
> -		if (__i915_request_completed(rq, seqno))
> -			return true;
> -	}
> -
> -	return false;
> -}
> -
>   void i915_memcpy_init_early(struct drm_i915_private *dev_priv);
>   bool i915_memcpy_from_wc(void *dst, const void *src, unsigned long len);
>   
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index f4254e012620..7ea87c7a0e8c 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -3230,13 +3230,6 @@ void i915_gem_reset_engine(struct intel_engine_cs *engine,
>   			   struct i915_request *request,
>   			   bool stalled)
>   {
> -	/*
> -	 * Make sure this write is visible before we re-enable the interrupt
> -	 * handlers on another CPU, as tasklet_enable() resolves to just
> -	 * a compiler barrier which is insufficient for our purpose here.
> -	 */
> -	smp_store_mb(engine->irq_posted, 0);
> -
>   	if (request)
>   		request = i915_gem_reset_request(engine, request, stalled);
>   
> diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
> index 0c7fc9890891..fbb094ecf6c9 100644
> --- a/drivers/gpu/drm/i915/i915_irq.c
> +++ b/drivers/gpu/drm/i915/i915_irq.c
> @@ -1189,13 +1189,6 @@ static void notify_ring(struct intel_engine_cs *engine)
>   				rq = i915_request_get(waiter);
>   
>   			tsk = wait->tsk;
> -		} else {
> -			if (engine->irq_seqno_barrier &&
> -			    i915_seqno_passed(seqno, wait->seqno - 1)) {
> -				set_bit(ENGINE_IRQ_BREADCRUMB,
> -					&engine->irq_posted);
> -				tsk = wait->tsk;
> -			}
>   		}
>   
>   		engine->breadcrumbs.irq_count++;
> diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
> index ea4620f2ac9e..1e158eb8cb97 100644
> --- a/drivers/gpu/drm/i915/i915_request.c
> +++ b/drivers/gpu/drm/i915/i915_request.c
> @@ -1179,13 +1179,7 @@ long i915_request_wait(struct i915_request *rq,
>   		set_current_state(state);
>   
>   wakeup:
> -		/*
> -		 * Carefully check if the request is complete, giving time
> -		 * for the seqno to be visible following the interrupt.
> -		 * We also have to check in case we are kicked by the GPU
> -		 * reset in order to drop the struct_mutex.
> -		 */
> -		if (__i915_request_irq_complete(rq))
> +		if (i915_request_completed(rq))
>   			break;
>   
>   		/*
> diff --git a/drivers/gpu/drm/i915/intel_breadcrumbs.c b/drivers/gpu/drm/i915/intel_breadcrumbs.c
> index 447c5256f63a..4ed7105d7ff5 100644
> --- a/drivers/gpu/drm/i915/intel_breadcrumbs.c
> +++ b/drivers/gpu/drm/i915/intel_breadcrumbs.c
> @@ -166,12 +166,6 @@ static void irq_enable(struct intel_engine_cs *engine)
>   	 */
>   	GEM_BUG_ON(!intel_irqs_enabled(engine->i915));
>   
> -	/* Enabling the IRQ may miss the generation of the interrupt, but
> -	 * we still need to force the barrier before reading the seqno,
> -	 * just in case.
> -	 */
> -	set_bit(ENGINE_IRQ_BREADCRUMB, &engine->irq_posted);
> -
>   	/* Caller disables interrupts */
>   	if (engine->irq_enable) {
>   		spin_lock(&engine->i915->irq_lock);
> @@ -683,16 +677,6 @@ static int intel_breadcrumbs_signaler(void *arg)
>   		}
>   
>   		if (unlikely(do_schedule)) {
> -			/* Before we sleep, check for a missed seqno */
> -			if (current->state & TASK_NORMAL &&
> -			    !list_empty(&b->signals) &&
> -			    engine->irq_seqno_barrier &&
> -			    test_and_clear_bit(ENGINE_IRQ_BREADCRUMB,
> -					       &engine->irq_posted)) {
> -				engine->irq_seqno_barrier(engine);
> -				intel_engine_wakeup(engine);
> -			}
> -
>   sleep:
>   			if (kthread_should_park())
>   				kthread_parkme();
> @@ -859,16 +843,6 @@ void intel_engine_reset_breadcrumbs(struct intel_engine_cs *engine)
>   	else
>   		irq_disable(engine);
>   
> -	/*
> -	 * We set the IRQ_BREADCRUMB bit when we enable the irq presuming the
> -	 * GPU is active and may have already executed the MI_USER_INTERRUPT
> -	 * before the CPU is ready to receive. However, the engine is currently
> -	 * idle (we haven't started it yet), there is no possibility for a
> -	 * missed interrupt as we enabled the irq and so we can clear the
> -	 * immediate wakeup (until a real interrupt arrives for the waiter).
> -	 */
> -	clear_bit(ENGINE_IRQ_BREADCRUMB, &engine->irq_posted);
> -
>   	spin_unlock_irqrestore(&b->irq_lock, flags);
>   }
>   
> diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c
> index 1462bb49f420..189a934a63e9 100644
> --- a/drivers/gpu/drm/i915/intel_engine_cs.c
> +++ b/drivers/gpu/drm/i915/intel_engine_cs.c
> @@ -457,7 +457,6 @@ int intel_engines_init(struct drm_i915_private *dev_priv)
>   void intel_engine_write_global_seqno(struct intel_engine_cs *engine, u32 seqno)
>   {
>   	intel_write_status_page(engine, I915_GEM_HWS_INDEX, seqno);
> -	clear_bit(ENGINE_IRQ_BREADCRUMB, &engine->irq_posted);
>   
>   	/* After manually advancing the seqno, fake the interrupt in case
>   	 * there are any waiters for that seqno.
> @@ -1536,11 +1535,6 @@ void intel_engine_dump(struct intel_engine_cs *engine,
>   	spin_unlock(&b->rb_lock);
>   	local_irq_restore(flags);
>   
> -	drm_printf(m, "IRQ? 0x%lx (breadcrumbs? %s)\n",
> -		   engine->irq_posted,
> -		   yesno(test_bit(ENGINE_IRQ_BREADCRUMB,
> -				  &engine->irq_posted)));
> -
>   	drm_printf(m, "HWSP:\n");
>   	hexdump(m, engine->status_page.page_addr, PAGE_SIZE);
>   
> diff --git a/drivers/gpu/drm/i915/intel_hangcheck.c b/drivers/gpu/drm/i915/intel_hangcheck.c
> index c3f929f59424..51e9efec5116 100644
> --- a/drivers/gpu/drm/i915/intel_hangcheck.c
> +++ b/drivers/gpu/drm/i915/intel_hangcheck.c
> @@ -120,16 +120,6 @@ engine_stuck(struct intel_engine_cs *engine, u64 acthd)
>   static void hangcheck_load_sample(struct intel_engine_cs *engine,
>   				  struct intel_engine_hangcheck *hc)
>   {
> -	/* We don't strictly need an irq-barrier here, as we are not
> -	 * serving an interrupt request, be paranoid in case the
> -	 * barrier has side-effects (such as preventing a broken
> -	 * cacheline snoop) and so be sure that we can see the seqno
> -	 * advance. If the seqno should stick, due to a stale
> -	 * cacheline, we would erroneously declare the GPU hung.
> -	 */
> -	if (engine->irq_seqno_barrier)
> -		engine->irq_seqno_barrier(engine);
> -
>   	hc->acthd = intel_engine_get_active_head(engine);
>   	hc->seqno = intel_engine_get_seqno(engine);
>   }
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
> index 13ac01b67ead..f8d3090ed193 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.c
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
> @@ -711,10 +711,6 @@ static int init_ring_common(struct intel_engine_cs *engine)
>   static struct i915_request *reset_prepare(struct intel_engine_cs *engine)
>   {
>   	intel_engine_stop_cs(engine);
> -
> -	if (engine->irq_seqno_barrier)
> -		engine->irq_seqno_barrier(engine);
> -
>   	return i915_gem_find_active_request(engine);
>   }
>   
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
> index 99e2cb75d29a..91ef00d34e91 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.h
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
> @@ -365,9 +365,6 @@ struct intel_engine_cs {
>   	struct drm_i915_gem_object *default_state;
>   	void *pinned_default_state;
>   
> -	unsigned long irq_posted;
> -#define ENGINE_IRQ_BREADCRUMB 0
> -
>   	/* Rather than have every client wait upon all user interrupts,
>   	 * with the herd waking after every interrupt and each doing the
>   	 * heavyweight seqno dance, we delegate the task (of being the
> @@ -501,13 +498,6 @@ struct intel_engine_cs {
>   	 */
>   	void		(*cancel_requests)(struct intel_engine_cs *engine);
>   
> -	/* Some chipsets are not quite as coherent as advertised and need
> -	 * an expensive kick to force a true read of the up-to-date seqno.
> -	 * However, the up-to-date seqno is not always required and the last
> -	 * seen value is good enough. Note that the seqno will always be
> -	 * monotonic, even if not coherent.
> -	 */
> -	void		(*irq_seqno_barrier)(struct intel_engine_cs *engine);
>   	void		(*cleanup)(struct intel_engine_cs *engine);
>   
>   	struct intel_engine_execlists execlists;
> 

Series looks fine to me. A tiny bit more latency on old platforms to 
simplify the code seems fair. I trust you tested it well and the 
approach is robust under stress testing.

Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH 5/6] drm/i915/ringbuffer: Move irq seqno barrier to the GPU for gen5
  2018-12-31 10:49   ` Tvrtko Ursulin
  2018-12-31 11:07     ` Chris Wilson
@ 2018-12-31 15:25     ` Chris Wilson
  1 sibling, 0 replies; 22+ messages in thread
From: Chris Wilson @ 2018-12-31 15:25 UTC (permalink / raw)
  To: Tvrtko Ursulin, intel-gfx

Quoting Tvrtko Ursulin (2018-12-31 10:49:37)
> 
> On 28/12/2018 17:16, Chris Wilson wrote:
> > -static void
> > -gen5_seqno_barrier(struct intel_engine_cs *engine)
> > +#define GEN5_WA_STORES 8 /* must be at least 1! */
> > +static void gen5_emit_breadcrumb(struct i915_request *rq, u32 *cs)
> >   {
> > -     /* MI_STORE are internally buffered by the GPU and not flushed
> > -      * either by MI_FLUSH or SyncFlush or any other combination of
> > -      * MI commands.
> > -      *
> > -      * "Only the submission of the store operation is guaranteed.
> > -      * The write result will be complete (coherent) some time later
> > -      * (this is practically a finite period but there is no guaranteed
> > -      * latency)."
> > -      *
> > -      * Empirically, we observe that we need a delay of at least 75us to
> > -      * be sure that the seqno write is visible by the CPU.
> > -      */
> > -     usleep_range(125, 250);
> 
> How much time for 8 store dw on gen5? I mean, does it have any relation 
> to this sleep?

Throughput measurement puts it at around 1us. So dramatically less (just
every time not after the occasional interrupt). However, given that it
solves, at least one, MI incoherency issue it is required for correct
breadcrumb flushing.
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: ✓ Fi.CI.BAT: success for series starting with [1/6] drm/i915: Remove redundant trailing request flush
  2018-12-28 18:18 ` ✓ Fi.CI.BAT: success " Patchwork
@ 2018-12-31 15:38   ` Chris Wilson
  0 siblings, 0 replies; 22+ messages in thread
From: Chris Wilson @ 2018-12-31 15:38 UTC (permalink / raw)
  To: Patchwork; +Cc: intel-gfx

Quoting Patchwork (2018-12-28 18:18:13)
> == Series Details ==
> 
> Series: series starting with [1/6] drm/i915: Remove redundant trailing request flush
> URL   : https://patchwork.freedesktop.org/series/54529/
> State : success
> 
> == Summary ==
> 
> CI Bug Log - changes from CI_DRM_5349 -> Patchwork_11167
> ====================================================
> 
> Summary
> -------
> 
>   **SUCCESS**
> 
>   No regressions found.

Done. Thanks for the review, and for more todo-list items ;)
Fence signaling from interrupt context coming right up...
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 22+ messages in thread

end of thread, other threads:[~2018-12-31 15:38 UTC | newest]

Thread overview: 22+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-12-28 17:16 [PATCH 1/6] drm/i915: Remove redundant trailing request flush Chris Wilson
2018-12-28 17:16 ` [PATCH 2/6] drm/i915/ringbuffer: Remove irq-seqno w/a for gen6/7 rcs Chris Wilson
2018-12-31 10:28   ` Tvrtko Ursulin
2018-12-28 17:16 ` [PATCH 3/6] drm/i915/ringbuffer: Remove irq-seqno w/a for gen6 xcs Chris Wilson
2018-12-31 10:31   ` Tvrtko Ursulin
2018-12-28 17:16 ` [PATCH 4/6] drm/i915/ringbuffer: Move irq seqno barrier to the GPU for gen7 Chris Wilson
2018-12-31 10:43   ` Tvrtko Ursulin
2018-12-31 10:56     ` Chris Wilson
2018-12-28 17:16 ` [PATCH 5/6] drm/i915/ringbuffer: Move irq seqno barrier to the GPU for gen5 Chris Wilson
2018-12-31 10:49   ` Tvrtko Ursulin
2018-12-31 11:07     ` Chris Wilson
2018-12-31 11:21       ` Tvrtko Ursulin
2018-12-31 15:25     ` Chris Wilson
2018-12-28 17:16 ` [PATCH 6/6] drm/i915: Drop unused engine->irq_seqno_barrier w/a Chris Wilson
2018-12-31 11:35   ` Tvrtko Ursulin
2018-12-28 17:48 ` ✗ Fi.CI.CHECKPATCH: warning for series starting with [1/6] drm/i915: Remove redundant trailing request flush Patchwork
2018-12-28 18:18 ` ✓ Fi.CI.BAT: success " Patchwork
2018-12-31 15:38   ` Chris Wilson
2018-12-28 22:00 ` ✓ Fi.CI.IGT: " Patchwork
2018-12-31 10:25 ` [PATCH 1/6] " Tvrtko Ursulin
2018-12-31 10:32   ` Chris Wilson
2018-12-31 11:24     ` Tvrtko Ursulin

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.