All of lore.kernel.org
 help / color / mirror / Atom feed
From: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
To: intel-gfx@lists.freedesktop.org
Subject: [PATCH v2 2/2] drm/i915/perf: fix ctx_id read with GuC & ICL
Date: Fri,  1 Jun 2018 10:52:15 +0100	[thread overview]
Message-ID: <20180601095215.29279-3-lionel.g.landwerlin@intel.com> (raw)
In-Reply-To: <20180601095215.29279-1-lionel.g.landwerlin@intel.com>

One thing we didn't really understand about the OA report is that the
ContextID field (dword 2) is copy of the context descriptor (dword 1).

On Gen8->10 and without using GuC we didn't notice the issue because
we only checked the 21bits of the ContextID field in the OA reports
which matches exactly the hw_id stored into the context descriptor.

When using GuC submission we have an issue of a non matching hw_id
because GuC uses bit 20 of the hw_id to signal proxy submission. This
change introduces a mask to compare only the relevant bits.

On ICL the context descriptor format has changed and we failed to
address this. On top of using a mask we also need to shift the bits
properly.

Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Fixes: 1de401c08fa805 ("drm/i915/perf: enable perf support on ICL")
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=104252
BSpec: 1237
Testcase: igt/perf/gen8-unprivileged-single-ctx-counters
---
 drivers/gpu/drm/i915/i915_drv.h  |   1 +
 drivers/gpu/drm/i915/i915_perf.c | 123 ++++++++++++++++++++++++-------
 drivers/gpu/drm/i915/intel_lrc.c |   5 ++
 3 files changed, 101 insertions(+), 28 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 7088a1c3b6ad..b363d5830a76 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1951,6 +1951,7 @@ struct drm_i915_private {
 
 			struct intel_context *pinned_ctx;
 			u32 specific_ctx_id;
+			u32 specific_ctx_id_mask;
 
 			struct hrtimer poll_check_timer;
 			wait_queue_head_t poll_wq;
diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
index 4f0eb84b3c00..132ed3c67228 100644
--- a/drivers/gpu/drm/i915/i915_perf.c
+++ b/drivers/gpu/drm/i915/i915_perf.c
@@ -737,12 +737,7 @@ static int gen8_append_oa_reports(struct i915_perf_stream *stream,
 			continue;
 		}
 
-		/*
-		 * XXX: Just keep the lower 21 bits for now since I'm not
-		 * entirely sure if the HW touches any of the higher bits in
-		 * this field
-		 */
-		ctx_id = report32[2] & 0x1fffff;
+		ctx_id = report32[2] & dev_priv->perf.oa.specific_ctx_id_mask;
 
 		/*
 		 * Squash whatever is in the CTX_ID field if it's marked as
@@ -1203,6 +1198,36 @@ static int i915_oa_read(struct i915_perf_stream *stream,
 	return dev_priv->perf.oa.ops.read(stream, buf, count, offset);
 }
 
+static int oa_get_render_lrca(struct drm_i915_private *i915,
+			      struct i915_gem_context *ctx,
+			      u32 *lrca)
+{
+	struct intel_engine_cs *engine = i915->engine[RCS];
+	struct intel_context *ce;
+	int ret;
+
+	ret = i915_mutex_lock_interruptible(&i915->drm);
+	if (ret)
+		return ret;
+
+	/*
+	 * As the ID is the gtt offset of the context's vma we
+	 * pin the vma to ensure the ID remains fixed.
+	 *
+	 * NB: implied RCS engine...
+	 */
+	ce = intel_context_pin(ctx, engine);
+	mutex_unlock(&i915->drm.struct_mutex);
+	if (IS_ERR(ce))
+		return PTR_ERR(ce);
+
+	i915->perf.oa.pinned_ctx = ce;
+
+	*lrca = i915_ggtt_offset(ce->state);
+
+	return 0;
+}
+
 /**
  * oa_get_render_ctx_id - determine and hold ctx hw id
  * @stream: An i915-perf stream opened for OA metrics
@@ -1215,40 +1240,81 @@ static int i915_oa_read(struct i915_perf_stream *stream,
  */
 static int oa_get_render_ctx_id(struct i915_perf_stream *stream)
 {
-	struct drm_i915_private *dev_priv = stream->dev_priv;
+	struct drm_i915_private *i915 = stream->dev_priv;
 
-	if (HAS_LOGICAL_RING_CONTEXTS(dev_priv)) {
-		dev_priv->perf.oa.specific_ctx_id = stream->ctx->hw_id;
-	} else {
-		struct intel_engine_cs *engine = dev_priv->engine[RCS];
-		struct intel_context *ce;
+	switch (INTEL_GEN(i915)) {
+	case 7: {
 		int ret;
 
-		ret = i915_mutex_lock_interruptible(&dev_priv->drm);
+		ret = oa_get_render_lrca(i915, stream->ctx,
+					 &i915->perf.oa.specific_ctx_id);
 		if (ret)
 			return ret;
 
 		/*
-		 * As the ID is the gtt offset of the context's vma we
-		 * pin the vma to ensure the ID remains fixed.
-		 *
-		 * NB: implied RCS engine...
+		 * On Haswell we don't do any post processing of the reports
+		 * and don't need to use the mask.
 		 */
-		ce = intel_context_pin(stream->ctx, engine);
-		mutex_unlock(&dev_priv->drm.struct_mutex);
-		if (IS_ERR(ce))
-			return PTR_ERR(ce);
+		i915->perf.oa.specific_ctx_id_mask = 0;
+		break;
+	}
 
-		dev_priv->perf.oa.pinned_ctx = ce;
+	case 8:
+	case 9:
+	case 10:
+		if (USES_GUC_SUBMISSION(i915)) {
+			u32 lrca;
+			int ret;
 
-		/*
-		 * Explicitly track the ID (instead of calling
-		 * i915_ggtt_offset() on the fly) considering the difference
-		 * with gen8+ and execlists
-		 */
-		dev_priv->perf.oa.specific_ctx_id = i915_ggtt_offset(ce->state);
+			ret = oa_get_render_lrca(i915, stream->ctx, &lrca);
+			if (ret)
+				return ret;
+
+			/*
+			 * The LRCA is aligned to a page. As a result the
+			 * lower 12bits are always at 0 and reused in the
+			 * context descriptor for some flags. They won't be
+			 * part of the context ID in the OA reports, so squash
+			 * those lower bits.
+			 */
+			i915->perf.oa.specific_ctx_id =
+				(lrca + LRC_HEADER_PAGES * PAGE_SIZE) >> 12;
+
+			/*
+			 * GuC uses the top bit to signal proxy submission, so
+			 * ignore that bit if using GuC.
+			 */
+			i915->perf.oa.specific_ctx_id_mask =
+				(1U << (GEN8_CTX_ID_WIDTH - 1)) - 1;
+		} else {
+			i915->perf.oa.specific_ctx_id = stream->ctx->hw_id;
+			i915->perf.oa.specific_ctx_id_mask =
+				(1U << GEN8_CTX_ID_WIDTH) - 1;
+		}
+		break;
+
+	case 11: {
+		struct intel_engine_cs *engine = i915->engine[RCS];
+
+		i915->perf.oa.specific_ctx_id =
+			stream->ctx->hw_id << (GEN11_SW_CTX_ID_SHIFT - 32) |
+			engine->instance << (GEN11_ENGINE_INSTANCE_SHIFT - 32) |
+			engine->class << (GEN11_ENGINE_INSTANCE_SHIFT - 32);
+		i915->perf.oa.specific_ctx_id_mask =
+			((1U << GEN11_SW_CTX_ID_WIDTH) - 1) << (GEN11_SW_CTX_ID_SHIFT - 32) |
+			((1U << GEN11_ENGINE_INSTANCE_WIDTH) - 1) << (GEN11_ENGINE_INSTANCE_SHIFT - 32) |
+			((1 << GEN11_ENGINE_CLASS_WIDTH) - 1) << (GEN11_ENGINE_CLASS_SHIFT - 32);
+		break;
 	}
 
+	default:
+		MISSING_CASE(INTEL_GEN(i915));
+	}
+
+	DRM_DEBUG_DRIVER("filtering on ctx_id=0x%x ctx_id_mask=0x%x\n",
+			 i915->perf.oa.specific_ctx_id,
+			 i915->perf.oa.specific_ctx_id_mask);
+
 	return 0;
 }
 
@@ -1265,6 +1331,7 @@ static void oa_put_render_ctx_id(struct i915_perf_stream *stream)
 	struct intel_context *ce;
 
 	dev_priv->perf.oa.specific_ctx_id = INVALID_CTX_ID;
+	dev_priv->perf.oa.specific_ctx_id_mask = 0;
 
 	ce = fetch_and_zero(&dev_priv->perf.oa.pinned_ctx);
 	if (ce) {
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index d09d2b79552f..eb25afa9694f 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -233,6 +233,11 @@ intel_lr_context_descriptor_update(struct i915_gem_context *ctx,
 								/* bits 12-31 */
 	GEM_BUG_ON(desc & GENMASK_ULL(63, 32));
 
+	/*
+	 * The following 32bits are copied into the OA reports (dword 2).
+	 * Consider updating oa_get_render_ctx_id in i915_perf.c when changing
+	 * anything below.
+	 */
 	if (INTEL_GEN(ctx->i915) >= 11) {
 		GEM_BUG_ON(ctx->hw_id >= BIT(GEN11_SW_CTX_ID_WIDTH));
 		desc |= (u64)ctx->hw_id << GEN11_SW_CTX_ID_SHIFT;
-- 
2.17.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

  parent reply	other threads:[~2018-06-01  9:52 UTC|newest]

Thread overview: 13+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-06-01  9:52 [PATCH v2 0/2] drm/i915/perf: fix context filtering with GuC & ICL Lionel Landwerlin
2018-06-01  9:52 ` [PATCH v2 1/2] drm/i915: drop one bit on the hw_id when using guc Lionel Landwerlin
2018-06-01 15:10   ` Chris Wilson
2018-06-01 19:24     ` Michel Thierry
2018-06-01 23:57       ` Lionel Landwerlin
2018-06-01  9:52 ` Lionel Landwerlin [this message]
2018-06-01 15:18   ` [PATCH v2 2/2] drm/i915/perf: fix ctx_id read with GuC & ICL Chris Wilson
2018-06-01 17:08     ` Lionel Landwerlin
2018-06-01 19:21       ` Michel Thierry
2018-06-01 10:50 ` ✗ Fi.CI.CHECKPATCH: warning for drm/i915/perf: fix context filtering with GuC & ICL (rev2) Patchwork
2018-06-01 10:51 ` ✗ Fi.CI.SPARSE: " Patchwork
2018-06-01 11:08 ` ✗ Fi.CI.BAT: failure " Patchwork
2018-06-01 14:00 ` ✓ Fi.CI.IGT: success " Patchwork

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20180601095215.29279-3-lionel.g.landwerlin@intel.com \
    --to=lionel.g.landwerlin@intel.com \
    --cc=intel-gfx@lists.freedesktop.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.