All of lore.kernel.org
 help / color / mirror / Atom feed
From: Chris Wilson <chris@chris-wilson.co.uk>
To: intel-gfx@lists.freedesktop.org
Cc: Mika Kuoppala <mika.kuoppala@intel.com>
Subject: [PATCH] drm/i915: Use ordered seqno write interrupt generation on gen8+ execlists
Date: Mon, 18 Jan 2016 09:02:22 +0000	[thread overview]
Message-ID: <1453107742-18195-1-git-send-email-chris@chris-wilson.co.uk> (raw)
In-Reply-To: <1452937580-3625-3-git-send-email-chris@chris-wilson.co.uk>

Broadwell and later currently use the same unordered command sequence to
update the seqno in the HWS status page and then assert the user
interrupt. We should apply the w/a from legacy (where we do an mmio
read to delay the seqno read after the interrupt), but this is not
enough to enforce coherent seqno visibilty on Skylake. Rather than
search for the proper post-interrupt seqno barrier, use a strongly
ordered command sequence to write the seqno, then assert the user
interrupt from the ring.

v2: Move around the wa tail dwords to avoid adding duplicate code.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Mika Kuoppala <mika.kuoppala@intel.com>
---
 drivers/gpu/drm/i915/intel_lrc.c        | 84 +++++++++++++++++++++------------
 drivers/gpu/drm/i915/intel_ringbuffer.h |  1 +
 2 files changed, 56 insertions(+), 29 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 3ed4ab7f571e..225e1ff3bdd9 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -750,23 +750,34 @@ static int logical_ring_wait_for_space(struct drm_i915_gem_request *req,
  * on a queue waiting for the ELSP to be ready to accept a new context submission. At that
  * point, the tail *inside* the context is updated and the ELSP written to.
  */
-static void
+static int
 intel_logical_ring_advance_and_submit(struct drm_i915_gem_request *request)
 {
-	struct intel_engine_cs *ring = request->ring;
+	struct intel_ringbuffer *ringbuf = request->ringbuf;
 	struct drm_i915_private *dev_priv = request->i915;
 
-	intel_logical_ring_advance(request->ringbuf);
+	intel_logical_ring_advance(ringbuf);
+	request->tail = ringbuf->tail;
 
-	request->tail = request->ringbuf->tail;
+	/*
+	 * Here we add two extra NOOPs as padding to avoid
+	 * lite restore of a context with HEAD==TAIL.
+	 *
+	 * Caller must reserve WA_TAIL_DWORDS for us!
+	 */
+	intel_logical_ring_emit(ringbuf, MI_NOOP);
+	intel_logical_ring_emit(ringbuf, MI_NOOP);
+	intel_logical_ring_advance(ringbuf);
 
-	if (intel_ring_stopped(ring))
-		return;
+	if (intel_ring_stopped(request->ring))
+		return 0;
 
 	if (dev_priv->guc.execbuf_client)
 		i915_guc_submit(dev_priv->guc.execbuf_client, request);
 	else
 		execlists_context_queue(request);
+
+	return 0;
 }
 
 static void __wrap_ring_buffer(struct intel_ringbuffer *ringbuf)
@@ -1814,44 +1825,58 @@ static void bxt_a_set_seqno(struct intel_engine_cs *ring, u32 seqno)
 	intel_flush_status_page(ring, I915_GEM_HWS_INDEX);
 }
 
+/*
+ * Reserve space for 2 NOOPs at the end of each request to be
+ * used as a workaround for not being allowed to do lite
+ * restore with HEAD==TAIL (WaIdleLiteRestore).
+ */
+#define WA_TAIL_DWORDS 2
+
+static inline u32 hws_seqno_address(struct intel_engine_cs *engine)
+{
+	return engine->status_page.gfx_addr + I915_GEM_HWS_INDEX_ADDR;
+}
+
 static int gen8_emit_request(struct drm_i915_gem_request *request)
 {
 	struct intel_ringbuffer *ringbuf = request->ringbuf;
-	struct intel_engine_cs *ring = ringbuf->ring;
-	u32 cmd;
 	int ret;
 
-	/*
-	 * Reserve space for 2 NOOPs at the end of each request to be
-	 * used as a workaround for not being allowed to do lite
-	 * restore with HEAD==TAIL (WaIdleLiteRestore).
-	 */
-	ret = intel_logical_ring_begin(request, 8);
+	ret = intel_logical_ring_begin(request, 6 + WA_TAIL_DWORDS);
 	if (ret)
 		return ret;
 
-	cmd = MI_STORE_DWORD_IMM_GEN4;
-	cmd |= MI_GLOBAL_GTT;
-
-	intel_logical_ring_emit(ringbuf, cmd);
 	intel_logical_ring_emit(ringbuf,
-				(ring->status_page.gfx_addr +
-				(I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT)));
+				(MI_FLUSH_DW + 1) | MI_FLUSH_DW_OP_STOREDW);
+	intel_logical_ring_emit(ringbuf,
+				hws_seqno_address(request->ring) |
+				MI_FLUSH_DW_USE_GTT);
 	intel_logical_ring_emit(ringbuf, 0);
 	intel_logical_ring_emit(ringbuf, i915_gem_request_get_seqno(request));
 	intel_logical_ring_emit(ringbuf, MI_USER_INTERRUPT);
 	intel_logical_ring_emit(ringbuf, MI_NOOP);
-	intel_logical_ring_advance_and_submit(request);
+	return intel_logical_ring_advance_and_submit(request);
+}
 
-	/*
-	 * Here we add two extra NOOPs as padding to avoid
-	 * lite restore of a context with HEAD==TAIL.
-	 */
-	intel_logical_ring_emit(ringbuf, MI_NOOP);
-	intel_logical_ring_emit(ringbuf, MI_NOOP);
-	intel_logical_ring_advance(ringbuf);
+static int gen8_emit_request_render(struct drm_i915_gem_request *request)
+{
+	struct intel_ringbuffer *ringbuf = request->ringbuf;
+	int ret;
 
-	return 0;
+	ret = intel_logical_ring_begin(request, 6 + WA_TAIL_DWORDS);
+	if (ret)
+		return ret;
+
+	intel_logical_ring_emit(ringbuf, GFX_OP_PIPE_CONTROL(5));
+	intel_logical_ring_emit(ringbuf,
+				PIPE_CONTROL_GLOBAL_GTT_IVB |
+				PIPE_CONTROL_CS_STALL |
+				PIPE_CONTROL_QW_WRITE);
+	intel_logical_ring_emit(ringbuf, hws_seqno_address(request->ring));
+	intel_logical_ring_emit(ringbuf, 0);
+	intel_logical_ring_emit(ringbuf, i915_gem_request_get_seqno(request));
+	intel_logical_ring_emit(ringbuf, MI_USER_INTERRUPT);
+	return intel_logical_ring_advance_and_submit(request);
 }
 
 static int intel_lr_context_render_state_init(struct drm_i915_gem_request *req)
@@ -2034,6 +2059,7 @@ static int logical_render_ring_init(struct drm_device *dev)
 	ring->init_context = gen8_init_rcs_context;
 	ring->cleanup = intel_fini_pipe_control;
 	ring->emit_flush = gen8_emit_flush_render;
+	ring->emit_request = gen8_emit_request_render;
 
 	ring->dev = dev;
 
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index 8fb02b21e75d..e1797d42054c 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -424,6 +424,7 @@ intel_write_status_page(struct intel_engine_cs *ring,
  * The area from dword 0x30 to 0x3ff is available for driver usage.
  */
 #define I915_GEM_HWS_INDEX		0x30
+#define I915_GEM_HWS_INDEX_ADDR (I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT)
 #define I915_GEM_HWS_SCRATCH_INDEX	0x40
 #define I915_GEM_HWS_SCRATCH_ADDR (I915_GEM_HWS_SCRATCH_INDEX << MI_STORE_DWORD_INDEX_SHIFT)
 
-- 
2.7.0.rc3

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

  parent reply	other threads:[~2016-01-18  9:02 UTC|newest]

Thread overview: 23+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2016-01-15 14:35 "missed-interrupt" syndrome on Broadwell+ Chris Wilson
2016-01-15 14:35 ` [PATCH 1/6] drm/i915: Remove forcewake dance from seqno/irq barrier on legacy gen6+ Chris Wilson
2016-01-15 17:55   ` Mika Kuoppala
2016-01-15 14:35 ` [PATCH 2/6] drm/i915: Separate out the seqno-barrier from engine->get_seqno Chris Wilson
2016-01-15 14:35 ` [PATCH 3/6] drm/i915: Broadwell execlists needs exactly the same seqno w/a as legacy Chris Wilson
2016-01-15 14:35 ` [PATCH 4/6] drm/i915: Harden detection of missed interrupts Chris Wilson
2016-01-15 14:35 ` [PATCH 5/6] drm/i915: Use simplest form for flushing the single cacheline in the HWS Chris Wilson
2016-01-15 14:35 ` [PATCH 6/6] drm/i915: Replace manual barrier() with READ_ONCE() in HWS accessor Chris Wilson
2016-01-15 15:20 ` ✗ Fi.CI.BAT: warning for series starting with [1/6] drm/i915: Remove forcewake dance from seqno/irq barrier on legacy gen6+ Patchwork
2016-01-16  9:46 ` [PATCH v2 1/6] " Chris Wilson
2016-01-16  9:46   ` [PATCH v2 2/6] drm/i915: Separate out the seqno-barrier from engine->get_seqno Chris Wilson
2016-01-16  9:46   ` [PATCH v2 3/6] drm/i915: Use ordered seqno write interrupt generation on gen8+ execlists Chris Wilson
2016-01-18  8:58     ` [PATCH] magic-clflush-fix Chris Wilson
2016-01-18  9:02       ` Chris Wilson
2016-01-18  9:02     ` Chris Wilson [this message]
2016-01-16  9:46   ` [PATCH v2 4/6] drm/i915: Harden detection of missed interrupts Chris Wilson
2016-01-18 13:07     ` Mika Kuoppala
2016-01-18 15:35       ` Chris Wilson
2016-01-16  9:46   ` [PATCH v2 5/6] drm/i915: Use simplest form for flushing the single cacheline in the HWS Chris Wilson
2016-01-16  9:46   ` [PATCH v2 6/6] drm/i915: Replace manual barrier() with READ_ONCE() in HWS accessor Chris Wilson
2016-01-16 10:01 ` ✗ Fi.CI.BAT: failure for series starting with [1/6] drm/i915: Remove forcewake dance from seqno/irq barrier on legacy gen6+ (rev6) Patchwork
2016-01-18  9:30 ` ✗ Fi.CI.BAT: failure for series starting with [1/6] drm/i915: Remove forcewake dance from seqno/irq barrier on legacy gen6+ (rev8) Patchwork
2016-01-20 13:43 [PATCH] drm/i915: Use ordered seqno write interrupt generation on gen8+ execlists Mika Kuoppala

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1453107742-18195-1-git-send-email-chris@chris-wilson.co.uk \
    --to=chris@chris-wilson.co.uk \
    --cc=intel-gfx@lists.freedesktop.org \
    --cc=mika.kuoppala@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.