All of lore.kernel.org
 help / color / mirror / Atom feed
From: Chris Wilson <chris@chris-wilson.co.uk>
To: intel-gfx@lists.freedesktop.org
Subject: [PATCH 4/6] drm/i915/execlists: Unify CSB access pointers
Date: Wed, 27 Jun 2018 12:38:41 +0100	[thread overview]
Message-ID: <20180627113843.7257-4-chris@chris-wilson.co.uk> (raw)
In-Reply-To: <20180627113843.7257-1-chris@chris-wilson.co.uk>

Following the removal of the last workarounds, the only CSB mmio access
is for the old vGPU interface. The mmio registers presented by vGPU do
not require forcewake and can be treated as ordinary volatile memory,
i.e. they behave just like the HWSP access just at a different location.
We can reduce the CSB access to a set of read/write/buffer pointers and
treat the various paths identically and not worry about forcewake.
(Forcewake is nightmare for worstcase latency, and we want to process
this all with irqsoff -- no latency allowed!)

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/intel_engine_cs.c  |  12 ---
 drivers/gpu/drm/i915/intel_lrc.c        | 116 ++++++++++--------------
 drivers/gpu/drm/i915/intel_ringbuffer.h |  23 +++--
 3 files changed, 65 insertions(+), 86 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c
index d3264bd6e9dc..7209c22798e6 100644
--- a/drivers/gpu/drm/i915/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/intel_engine_cs.c
@@ -25,7 +25,6 @@
 #include <drm/drm_print.h>
 
 #include "i915_drv.h"
-#include "i915_vgpu.h"
 #include "intel_ringbuffer.h"
 #include "intel_lrc.h"
 
@@ -456,21 +455,10 @@ static void intel_engine_init_batch_pool(struct intel_engine_cs *engine)
 	i915_gem_batch_pool_init(&engine->batch_pool, engine);
 }
 
-static bool csb_force_mmio(struct drm_i915_private *i915)
-{
-	/* Older GVT emulation depends upon intercepting CSB mmio */
-	if (intel_vgpu_active(i915) && !intel_vgpu_has_hwsp_emulation(i915))
-		return true;
-
-	return false;
-}
-
 static void intel_engine_init_execlist(struct intel_engine_cs *engine)
 {
 	struct intel_engine_execlists * const execlists = &engine->execlists;
 
-	execlists->csb_use_mmio = csb_force_mmio(engine->i915);
-
 	execlists->port_mask = 1;
 	BUILD_BUG_ON_NOT_POWER_OF_2(execlists_num_ports(execlists));
 	GEM_BUG_ON(execlists_num_ports(execlists) > EXECLIST_MAX_PORTS);
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 91656eb2f2db..368a8c74d11d 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -137,6 +137,7 @@
 #include <drm/i915_drm.h>
 #include "i915_drv.h"
 #include "i915_gem_render_state.h"
+#include "i915_vgpu.h"
 #include "intel_lrc_reg.h"
 #include "intel_mocs.h"
 #include "intel_workarounds.h"
@@ -953,44 +954,23 @@ static void process_csb(struct intel_engine_cs *engine)
 {
 	struct intel_engine_execlists * const execlists = &engine->execlists;
 	struct execlist_port *port = execlists->port;
-	struct drm_i915_private *i915 = engine->i915;
-
-	/* The HWSP contains a (cacheable) mirror of the CSB */
-	const u32 *buf =
-		&engine->status_page.page_addr[I915_HWS_CSB_BUF0_INDEX];
-	unsigned int head, tail;
-	bool fw = false;
+	const u32 * const buf = execlists->csb_status;
+	u8 head, tail;
 
 	/* Clear before reading to catch new interrupts */
 	clear_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted);
 	smp_mb__after_atomic();
 
-	if (unlikely(execlists->csb_use_mmio)) {
-		intel_uncore_forcewake_get(i915, execlists->fw_domains);
-		fw = true;
-
-		buf = (u32 * __force)
-			(i915->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_BUF_LO(engine, 0)));
+	/* Note that csb_write, csb_status may be either in HWSP or mmio */
+	head = execlists->csb_head;
+	tail = READ_ONCE(*execlists->csb_write);
+	GEM_TRACE("%s cs-irq head=%d, tail=%d\n", engine->name, head, tail);
+	if (unlikely(head == tail))
+		return;
 
-		head = readl(i915->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)));
-		tail = GEN8_CSB_WRITE_PTR(head);
-		head = GEN8_CSB_READ_PTR(head);
-		execlists->csb_head = head;
-	} else {
-		const int write_idx =
-			intel_hws_csb_write_index(i915) -
-			I915_HWS_CSB_BUF0_INDEX;
+	rmb(); /* Hopefully paired with a wmb() in HW */
 
-		head = execlists->csb_head;
-		tail = READ_ONCE(buf[write_idx]);
-		rmb(); /* Hopefully paired with a wmb() in HW */
-	}
-	GEM_TRACE("%s cs-irq head=%d [%d%s], tail=%d [%d%s]\n",
-		  engine->name,
-		  head, GEN8_CSB_READ_PTR(readl(i915->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)))), fw ? "" : "?",
-		  tail, GEN8_CSB_WRITE_PTR(readl(i915->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)))), fw ? "" : "?");
-
-	while (head != tail) {
+	do {
 		struct i915_request *rq;
 		unsigned int status;
 		unsigned int count;
@@ -1016,12 +996,12 @@ static void process_csb(struct intel_engine_cs *engine)
 		 * status notifier.
 		 */
 
-		status = READ_ONCE(buf[2 * head]); /* maybe mmio! */
 		GEM_TRACE("%s csb[%d]: status=0x%08x:0x%08x, active=0x%x\n",
 			  engine->name, head,
-			  status, buf[2*head + 1],
+			  buf[2 * head + 0], buf[2 * head + 1],
 			  execlists->active);
 
+		status = buf[2 * head];
 		if (status & (GEN8_CTX_STATUS_IDLE_ACTIVE |
 			      GEN8_CTX_STATUS_PREEMPTED))
 			execlists_set_active(execlists,
@@ -1103,16 +1083,11 @@ static void process_csb(struct intel_engine_cs *engine)
 		} else {
 			port_set(port, port_pack(rq, count));
 		}
-	}
+	} while (head != tail);
 
-	if (head != execlists->csb_head) {
-		execlists->csb_head = head;
-		writel(_MASKED_FIELD(GEN8_CSB_READ_PTR_MASK, head << 8),
-		       i915->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)));
-	}
-
-	if (unlikely(fw))
-		intel_uncore_forcewake_put(i915, execlists->fw_domains);
+	writel(_MASKED_FIELD(GEN8_CSB_READ_PTR_MASK, head << 8),
+	       execlists->csb_read);
+	execlists->csb_head = head;
 }
 
 /*
@@ -2430,28 +2405,11 @@ logical_ring_default_irqs(struct intel_engine_cs *engine)
 static void
 logical_ring_setup(struct intel_engine_cs *engine)
 {
-	struct drm_i915_private *dev_priv = engine->i915;
-	enum forcewake_domains fw_domains;
-
 	intel_engine_setup_common(engine);
 
 	/* Intentionally left blank. */
 	engine->buffer = NULL;
 
-	fw_domains = intel_uncore_forcewake_for_reg(dev_priv,
-						    RING_ELSP(engine),
-						    FW_REG_WRITE);
-
-	fw_domains |= intel_uncore_forcewake_for_reg(dev_priv,
-						     RING_CONTEXT_STATUS_PTR(engine),
-						     FW_REG_READ | FW_REG_WRITE);
-
-	fw_domains |= intel_uncore_forcewake_for_reg(dev_priv,
-						     RING_CONTEXT_STATUS_BUF_BASE(engine),
-						     FW_REG_READ);
-
-	engine->execlists.fw_domains = fw_domains;
-
 	tasklet_init(&engine->execlists.tasklet,
 		     execlists_submission_tasklet, (unsigned long)engine);
 
@@ -2459,34 +2417,56 @@ logical_ring_setup(struct intel_engine_cs *engine)
 	logical_ring_default_irqs(engine);
 }
 
+static bool csb_force_mmio(struct drm_i915_private *i915)
+{
+	/* Older GVT emulation depends upon intercepting CSB mmio */
+	return intel_vgpu_active(i915) && !intel_vgpu_has_hwsp_emulation(i915);
+}
+
 static int logical_ring_init(struct intel_engine_cs *engine)
 {
+	struct drm_i915_private *i915 = engine->i915;
+	struct intel_engine_execlists * const execlists = &engine->execlists;
 	int ret;
 
 	ret = intel_engine_init_common(engine);
 	if (ret)
 		goto error;
 
-	if (HAS_LOGICAL_RING_ELSQ(engine->i915)) {
-		engine->execlists.submit_reg = engine->i915->regs +
+	if (HAS_LOGICAL_RING_ELSQ(i915)) {
+		execlists->submit_reg = i915->regs +
 			i915_mmio_reg_offset(RING_EXECLIST_SQ_CONTENTS(engine));
-		engine->execlists.ctrl_reg = engine->i915->regs +
+		execlists->ctrl_reg = i915->regs +
 			i915_mmio_reg_offset(RING_EXECLIST_CONTROL(engine));
 	} else {
-		engine->execlists.submit_reg = engine->i915->regs +
+		execlists->submit_reg = i915->regs +
 			i915_mmio_reg_offset(RING_ELSP(engine));
 	}
 
-	engine->execlists.preempt_complete_status = ~0u;
-	if (engine->i915->preempt_context) {
+	execlists->preempt_complete_status = ~0u;
+	if (i915->preempt_context) {
 		struct intel_context *ce =
-			to_intel_context(engine->i915->preempt_context, engine);
+			to_intel_context(i915->preempt_context, engine);
 
-		engine->execlists.preempt_complete_status =
+		execlists->preempt_complete_status =
 			upper_32_bits(ce->lrc_desc);
 	}
 
-	engine->execlists.csb_head = GEN8_CSB_ENTRIES - 1;
+	execlists->csb_head = GEN8_CSB_ENTRIES - 1;
+	execlists->csb_read =
+		i915->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine));
+	if (csb_force_mmio(i915)) {
+		execlists->csb_status = (u32 __force *)
+			(i915->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_BUF_LO(engine, 0)));
+
+		execlists->csb_write = (u32 __force *)execlists->csb_read;
+	} else {
+		execlists->csb_status =
+			&engine->status_page.page_addr[I915_HWS_CSB_BUF0_INDEX];
+
+		execlists->csb_write =
+			&engine->status_page.page_addr[intel_hws_csb_write_index(i915)];
+	}
 
 	return 0;
 
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index a0bc7a8222b4..5b92c5f03e1d 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -300,19 +300,30 @@ struct intel_engine_execlists {
 	struct rb_node *first;
 
 	/**
-	 * @fw_domains: forcewake domains for irq tasklet
+	 * @csb_head: context status buffer head
 	 */
-	unsigned int fw_domains;
+	unsigned int csb_head;
 
 	/**
-	 * @csb_head: context status buffer head
+	 * @csb_read: control register for Context Switch buffer
+	 *
+	 * Note this register is always in mmio.
 	 */
-	unsigned int csb_head;
+	u32 __iomem *csb_read;
 
 	/**
-	 * @csb_use_mmio: access csb through mmio, instead of hwsp
+	 * @csb_write: control register for Context Switch buffer
+	 *
+	 * Note this register may be either mmio or HWSP shadow.
+	 */
+	u32 *csb_write;
+
+	/**
+	 * @csb_status: status array for Context Switch buffer
+	 *
+	 * Note these register may be either mmio or HWSP shadow.
 	 */
-	bool csb_use_mmio;
+	u32 *csb_status;
 
 	/**
 	 * @preempt_complete_status: expected CSB upon completing preemption
-- 
2.18.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

  parent reply	other threads:[~2018-06-27 11:38 UTC|newest]

Thread overview: 11+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-06-27 11:38 [PATCH 1/6] drm/i915/execlists: Pull submit after dequeue under timeline lock Chris Wilson
2018-06-27 11:38 ` [PATCH 2/6] drm/i915/execlists: Pull CSB reset under the timeline.lock Chris Wilson
2018-06-27 11:38 ` [PATCH 3/6] drm/i915/execlists: Process one CSB update at a time Chris Wilson
2018-06-27 12:58   ` Tvrtko Ursulin
2018-06-27 11:38 ` Chris Wilson [this message]
2018-06-27 11:38 ` [PATCH 5/6] drm/i915/execlists: Reset CSB write pointer after reset Chris Wilson
2018-06-27 11:38 ` [PATCH 6/6] drm/i915/execlists: Direct submission of new requests (avoid tasklet/ksoftirqd) Chris Wilson
2018-06-27 12:32 ` ✗ Fi.CI.CHECKPATCH: warning for series starting with [1/6] drm/i915/execlists: Pull submit after dequeue under timeline lock Patchwork
2018-06-27 12:48 ` ✗ Fi.CI.BAT: failure " Patchwork
2018-06-27 16:46 ` ✓ Fi.CI.BAT: success " Patchwork
2018-06-27 18:45 ` ✓ Fi.CI.IGT: " Patchwork

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20180627113843.7257-4-chris@chris-wilson.co.uk \
    --to=chris@chris-wilson.co.uk \
    --cc=intel-gfx@lists.freedesktop.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.