All of lore.kernel.org
 help / color / mirror / Atom feed
From: Chris Wilson <chris@chris-wilson.co.uk>
To: intel-gfx@lists.freedesktop.org
Subject: [PATCH 1/2] drm/i915/oa: Reconfigure contexts on the fly
Date: Mon,  8 Jul 2019 12:19:43 +0100	[thread overview]
Message-ID: <20190708111944.20095-1-chris@chris-wilson.co.uk> (raw)

Avoid a global idle barrier by reconfiguring each context by rewriting
them with MI_STORE_DWORD from the kernel context.

v2: We only need to determine the desired register values once, they are
the same for all contexts.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
---
 drivers/gpu/drm/i915/gem/i915_gem_context.c |   2 +
 drivers/gpu/drm/i915/gt/intel_lrc.c         |   7 +-
 drivers/gpu/drm/i915/i915_perf.c            | 248 +++++++++++++++-----
 3 files changed, 195 insertions(+), 62 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c
index e367dce2a696..1f0d10bb88c1 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c
@@ -624,7 +624,9 @@ i915_gem_context_create_kernel(struct drm_i915_private *i915, int prio)
 	ctx->sched.priority = I915_USER_PRIORITY(prio);
 	ctx->ring_size = PAGE_SIZE;
 
+	/* Isolate the kernel context from prying eyes and sticky fingers */
 	GEM_BUG_ON(!i915_gem_context_is_kernel(ctx));
+	list_del_init(&ctx->link);
 
 	return ctx;
 }
diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
index e1ae1399c72b..9cc5374401e1 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -1552,9 +1552,12 @@ __execlists_update_reg_state(struct intel_context *ce,
 	regs[CTX_RING_TAIL + 1] = ring->tail;
 
 	/* RPCS */
-	if (engine->class == RENDER_CLASS)
+	if (engine->class == RENDER_CLASS) {
 		regs[CTX_R_PWR_CLK_STATE + 1] =
 			intel_sseu_make_rpcs(engine->i915, &ce->sseu);
+
+		i915_oa_init_reg_state(engine, ce, regs);
+	}
 }
 
 static int
@@ -2966,8 +2969,6 @@ static void execlists_init_reg_state(u32 *regs,
 	if (rcs) {
 		regs[CTX_LRI_HEADER_2] = MI_LOAD_REGISTER_IMM(1);
 		CTX_REG(regs, CTX_R_PWR_CLK_STATE, GEN8_R_PWR_CLK_STATE, 0);
-
-		i915_oa_init_reg_state(engine, ce, regs);
 	}
 
 	regs[CTX_END] = MI_BATCH_BUFFER_END;
diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
index 357e63beb373..25c34a0b521e 100644
--- a/drivers/gpu/drm/i915/i915_perf.c
+++ b/drivers/gpu/drm/i915/i915_perf.c
@@ -1630,6 +1630,27 @@ static void hsw_disable_metric_set(struct drm_i915_private *dev_priv)
 				      ~GT_NOA_ENABLE));
 }
 
+static u32 oa_config_flex_reg(const struct i915_oa_config *oa_config,
+			      i915_reg_t reg)
+{
+	u32 mmio = i915_mmio_reg_offset(reg);
+	int i;
+
+	/*
+	 * This arbitrary default will select the 'EU FPU0 Pipeline
+	 * Active' event. In the future it's anticipated that there
+	 * will be an explicit 'No Event' we can select, but not yet...
+	 */
+	if (!oa_config)
+		return 0;
+
+	for (i = 0; i < oa_config->flex_regs_len; i++) {
+		if (i915_mmio_reg_offset(oa_config->flex_regs[i].addr) == mmio)
+			return oa_config->flex_regs[i].value;
+	}
+
+	return 0;
+}
 /*
  * NB: It must always remain pointer safe to run this even if the OA unit
  * has been disabled.
@@ -1663,28 +1684,8 @@ gen8_update_reg_state_unlocked(struct intel_context *ce,
 		GEN8_OA_COUNTER_RESUME);
 
 	for (i = 0; i < ARRAY_SIZE(flex_regs); i++) {
-		u32 state_offset = ctx_flexeu0 + i * 2;
-		u32 mmio = i915_mmio_reg_offset(flex_regs[i]);
-
-		/*
-		 * This arbitrary default will select the 'EU FPU0 Pipeline
-		 * Active' event. In the future it's anticipated that there
-		 * will be an explicit 'No Event' we can select, but not yet...
-		 */
-		u32 value = 0;
-
-		if (oa_config) {
-			u32 j;
-
-			for (j = 0; j < oa_config->flex_regs_len; j++) {
-				if (i915_mmio_reg_offset(oa_config->flex_regs[j].addr) == mmio) {
-					value = oa_config->flex_regs[j].value;
-					break;
-				}
-			}
-		}
-
-		CTX_REG(reg_state, state_offset, flex_regs[i], value);
+		CTX_REG(reg_state, ctx_flexeu0 + i * 2, flex_regs[i],
+			oa_config_flex_reg(oa_config, flex_regs[i]));
 	}
 
 	CTX_REG(reg_state,
@@ -1692,6 +1693,107 @@ gen8_update_reg_state_unlocked(struct intel_context *ce,
 		intel_sseu_make_rpcs(i915, &ce->sseu));
 }
 
+struct flex {
+	i915_reg_t reg;
+	u32 offset;
+	u32 value;
+};
+
+static int
+gen8_store_flex(struct i915_request *rq,
+		struct intel_context *ce,
+		const struct flex *flex, unsigned int count)
+{
+	u32 offset;
+	u32 *cs;
+
+	cs = intel_ring_begin(rq, 4 * count);
+	if (IS_ERR(cs))
+		return PTR_ERR(cs);
+
+	offset = i915_ggtt_offset(ce->state) + LRC_STATE_PN * PAGE_SIZE;
+	do {
+		*cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
+		*cs++ = offset + (flex->offset + 1) * sizeof(u32);
+		*cs++ = 0;
+		*cs++ = flex->value;
+	} while (flex++, --count);
+
+	intel_ring_advance(rq, cs);
+
+	return 0;
+}
+
+static int
+gen8_load_flex(struct i915_request *rq,
+	       struct intel_context *ce,
+	       const struct flex *flex, unsigned int count)
+{
+	u32 *cs;
+
+	GEM_BUG_ON(!count || count > 63);
+
+	cs = intel_ring_begin(rq, 2 * count + 2);
+	if (IS_ERR(cs))
+		return PTR_ERR(cs);
+
+	*cs++ = MI_LOAD_REGISTER_IMM(count);
+	do {
+		*cs++ = i915_mmio_reg_offset(flex->reg);
+		*cs++ = flex->value;
+	} while (flex++, --count);
+	*cs++ = MI_NOOP;
+
+	intel_ring_advance(rq, cs);
+
+	return 0;
+}
+
+static int gen8_modify_context(struct intel_context *ce,
+			       const struct flex *flex, unsigned int count)
+{
+	struct i915_request *rq;
+	int err;
+
+	lockdep_assert_held(&ce->pin_mutex);
+
+	rq = i915_request_create(ce->engine->kernel_context);
+	if (IS_ERR(rq))
+		return PTR_ERR(rq);
+
+	/* Serialise with the remote context */
+	err = i915_active_request_set(&ce->ring->timeline->last_request, rq);
+	if (err)
+		goto out_add;
+
+	/* Keep the remote context alive until after we finish editing */
+	err = i915_active_ref(&ce->active, rq->fence.context, rq);
+	if (err)
+		goto out_add;
+
+	err = gen8_store_flex(rq, ce, flex, count);
+
+out_add:
+	i915_request_add(rq);
+	return err;
+}
+
+static int gen8_modify_self(struct intel_context *ce,
+			    const struct flex *flex, unsigned int count)
+{
+	struct i915_request *rq;
+	int err;
+
+	rq = i915_request_create(ce);
+	if (IS_ERR(rq))
+		return PTR_ERR(rq);
+
+	err = gen8_load_flex(rq, ce, flex, count);
+
+	i915_request_add(rq);
+	return err;
+}
+
 /*
  * Manages updating the per-context aspects of the OA stream
  * configuration across all contexts.
@@ -1716,15 +1818,43 @@ gen8_update_reg_state_unlocked(struct intel_context *ce,
  *
  * Note: it's only the RCS/Render context that has any OA state.
  */
-static int gen8_configure_all_contexts(struct drm_i915_private *dev_priv,
+static int gen8_configure_all_contexts(struct drm_i915_private *i915,
 				       const struct i915_oa_config *oa_config)
 {
-	unsigned int map_type = i915_coherent_map_type(dev_priv);
+	/* The MMIO offsets for Flex EU registers aren't contiguous */
+	const u32 ctx_flexeu0 = i915->perf.oa.ctx_flexeu0_offset;
+#define ctx_flexeuN(N) (ctx_flexeu0 + 2 * (N))
+	struct flex regs[] = {
+		{
+			GEN8_R_PWR_CLK_STATE,
+			CTX_R_PWR_CLK_STATE,
+		},
+		{
+			GEN8_OACTXCONTROL,
+			i915->perf.oa.ctx_oactxctrl_offset,
+			((i915->perf.oa.period_exponent << GEN8_OA_TIMER_PERIOD_SHIFT) |
+			 (i915->perf.oa.periodic ? GEN8_OA_TIMER_ENABLE : 0) |
+			 GEN8_OA_COUNTER_RESUME)
+		},
+		{ EU_PERF_CNTL0, ctx_flexeuN(0) },
+		{ EU_PERF_CNTL1, ctx_flexeuN(1) },
+		{ EU_PERF_CNTL2, ctx_flexeuN(2) },
+		{ EU_PERF_CNTL3, ctx_flexeuN(3) },
+		{ EU_PERF_CNTL4, ctx_flexeuN(4) },
+		{ EU_PERF_CNTL5, ctx_flexeuN(5) },
+		{ EU_PERF_CNTL6, ctx_flexeuN(6) },
+	};
+#undef ctx_flexeuN
+	struct intel_engine_cs *engine;
 	struct i915_gem_context *ctx;
-	struct i915_request *rq;
-	int ret;
+	enum intel_engine_id id;
+	int err;
+	int i;
+
+	for (i = 2; i < ARRAY_SIZE(regs); i++)
+		regs[i].value = oa_config_flex_reg(oa_config, regs[i].reg);
 
-	lockdep_assert_held(&dev_priv->drm.struct_mutex);
+	lockdep_assert_held(&i915->drm.struct_mutex);
 
 	/*
 	 * The OA register config is setup through the context image. This image
@@ -1736,58 +1866,58 @@ static int gen8_configure_all_contexts(struct drm_i915_private *dev_priv,
 	 * this might leave small interval of time where the OA unit is
 	 * configured at an invalid sampling period.
 	 *
-	 * So far the best way to work around this issue seems to be draining
-	 * the GPU from any submitted work.
+	 * Note that since we emit all requests from a single ring, there
+	 * is still an implicit global barrier here that may cause a high
+	 * priority context to wait for an otherwise independent low priority
+	 * context. Contexts idle at the time of reconfiguration are not
+	 * trapped behind the barrier.
 	 */
-	ret = i915_gem_wait_for_idle(dev_priv,
-				     I915_WAIT_LOCKED,
-				     MAX_SCHEDULE_TIMEOUT);
-	if (ret)
-		return ret;
-
-	/* Update all contexts now that we've stalled the submission. */
-	list_for_each_entry(ctx, &dev_priv->contexts.list, link) {
+	list_for_each_entry(ctx, &i915->contexts.list, link) {
 		struct i915_gem_engines_iter it;
 		struct intel_context *ce;
 
 		for_each_gem_engine(ce,
 				    i915_gem_context_lock_engines(ctx),
 				    it) {
-			u32 *regs;
-
 			if (ce->engine->class != RENDER_CLASS)
 				continue;
 
-			/* OA settings will be set upon first use */
-			if (!ce->state)
-				continue;
-
-			regs = i915_gem_object_pin_map(ce->state->obj,
-						       map_type);
-			if (IS_ERR(regs)) {
-				i915_gem_context_unlock_engines(ctx);
-				return PTR_ERR(regs);
-			}
+			err = intel_context_lock_pinned(ce);
+			if (err)
+				break;
 
-			ce->state->obj->mm.dirty = true;
-			regs += LRC_STATE_PN * PAGE_SIZE / sizeof(*regs);
+			regs[0].value = intel_sseu_make_rpcs(i915, &ce->sseu);
 
-			gen8_update_reg_state_unlocked(ce, regs, oa_config);
+			/* Otherwise OA settings will be set upon first use */
+			if (intel_context_is_pinned(ce))
+				err = gen8_modify_context(ce, regs, ARRAY_SIZE(regs));
 
-			i915_gem_object_unpin_map(ce->state->obj);
+			intel_context_unlock_pinned(ce);
+			if (err)
+				break;
 		}
 		i915_gem_context_unlock_engines(ctx);
+		if (err)
+			return err;
 	}
 
 	/*
-	 * Apply the configuration by doing one context restore of the edited
-	 * context image.
+	 * After updating all other contexts, we need to modify ourselves.
+	 * If we don't modify the kernel_context, we do not get events while
+	 * idle.
 	 */
-	rq = i915_request_create(dev_priv->engine[RCS0]->kernel_context);
-	if (IS_ERR(rq))
-		return PTR_ERR(rq);
+	for_each_engine(engine, i915, id) {
+		struct intel_context *ce = engine->kernel_context;
 
-	i915_request_add(rq);
+		if (engine->class != RENDER_CLASS)
+			continue;
+
+		regs[0].value = intel_sseu_make_rpcs(i915, &ce->sseu);
+
+		err = gen8_modify_self(ce, regs, ARRAY_SIZE(regs));
+		if (err)
+			return err;
+	}
 
 	return 0;
 }
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

             reply	other threads:[~2019-07-08 11:20 UTC|newest]

Thread overview: 23+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-07-08 11:19 Chris Wilson [this message]
2019-07-08 11:19 ` [PATCH 2/2] drm/i915: Add to timeline requires the timeline mutex Chris Wilson
2019-07-08 11:22   ` [PATCH v2] " Chris Wilson
2019-07-08 11:39   ` [PATCH v3] " Chris Wilson
2019-07-08 12:18     ` Tvrtko Ursulin
2019-07-08 12:22       ` Chris Wilson
2019-07-08 13:32         ` Chris Wilson
2019-07-08 13:40           ` Tvrtko Ursulin
2019-07-08 13:47             ` Chris Wilson
2019-07-08 13:49               ` Chris Wilson
2019-07-08 11:52 ` ✓ Fi.CI.BAT: success for series starting with [1/2] drm/i915/oa: Reconfigure contexts on the fly (rev2) Patchwork
2019-07-08 12:10 ` [PATCH 1/2] drm/i915/oa: Reconfigure contexts on the fly Tvrtko Ursulin
2019-07-08 12:16   ` Chris Wilson
2019-07-08 12:24     ` Tvrtko Ursulin
2019-07-08 13:39       ` Chris Wilson
2019-07-11  7:23         ` Tvrtko Ursulin
2019-07-11  7:59           ` Chris Wilson
2019-07-11 15:05         ` Chris Wilson
2019-07-08 12:35 ` ✓ Fi.CI.BAT: success for series starting with [1/2] drm/i915/oa: Reconfigure contexts on the fly (rev3) Patchwork
2019-07-08 17:37 ` ✓ Fi.CI.IGT: " Patchwork
2019-07-11 15:12 ` [PATCH v3] drm/i915/oa: Reconfigure contexts on the fly Chris Wilson
2019-07-11 17:59 ` ✓ Fi.CI.BAT: success for series starting with [v3] drm/i915/oa: Reconfigure contexts on the fly (rev4) Patchwork
2019-07-12 19:39 ` ✓ Fi.CI.IGT: " Patchwork

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20190708111944.20095-1-chris@chris-wilson.co.uk \
    --to=chris@chris-wilson.co.uk \
    --cc=intel-gfx@lists.freedesktop.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.