All of lore.kernel.org
 help / color / mirror / Atom feed
From: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
To: intel-gfx@lists.freedesktop.org
Subject: [Intel-gfx] [PATCH 1/3] drm/i915/perf: break OA config buffer object in 2
Date: Fri, 27 Mar 2020 12:32:07 +0200	[thread overview]
Message-ID: <20200327103209.3099292-2-lionel.g.landwerlin@intel.com> (raw)
In-Reply-To: <20200327103209.3099292-1-lionel.g.landwerlin@intel.com>

We want to enable performance monitoring on multiple contexts to cover
the Iris use case of using 2 GEM contexts (3D & compute).

So start by breaking the OA configuration BO which contains global &
per context register writes.

NOA muxes & OA configurations are global, while FLEXEU register
configurations are per context.

Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
---
 drivers/gpu/drm/i915/i915_perf.c | 194 ++++++++++++++++++++++---------
 1 file changed, 137 insertions(+), 57 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
index 3222f6cd8255..f524f50abdef 100644
--- a/drivers/gpu/drm/i915/i915_perf.c
+++ b/drivers/gpu/drm/i915/i915_perf.c
@@ -376,7 +376,8 @@ struct i915_oa_config_bo {
 	struct llist_node node;
 
 	struct i915_oa_config *oa_config;
-	struct i915_vma *vma;
+	struct i915_vma *ctx_vma;
+	struct i915_vma *global_vma;
 };
 
 static struct ctl_table_header *sysctl_header;
@@ -412,7 +413,8 @@ i915_perf_get_oa_config(struct i915_perf *perf, int metrics_set)
 static void free_oa_config_bo(struct i915_oa_config_bo *oa_bo)
 {
 	i915_oa_config_put(oa_bo->oa_config);
-	i915_vma_put(oa_bo->vma);
+	i915_vma_put(oa_bo->ctx_vma);
+	i915_vma_put(oa_bo->global_vma);
 	kfree(oa_bo);
 }
 
@@ -1868,9 +1870,9 @@ static struct i915_oa_config_bo *
 alloc_oa_config_buffer(struct i915_perf_stream *stream,
 		       struct i915_oa_config *oa_config)
 {
-	struct drm_i915_gem_object *obj;
 	struct i915_oa_config_bo *oa_bo;
-	size_t config_length = 0;
+	struct drm_i915_gem_object *global_obj, *ctx_obj;
+	size_t global_config_length = 0, ctx_config_length;
 	u32 *cs;
 	int err;
 
@@ -1878,27 +1880,26 @@ alloc_oa_config_buffer(struct i915_perf_stream *stream,
 	if (!oa_bo)
 		return ERR_PTR(-ENOMEM);
 
-	config_length += num_lri_dwords(oa_config->mux_regs_len);
-	config_length += num_lri_dwords(oa_config->b_counter_regs_len);
-	config_length += num_lri_dwords(oa_config->flex_regs_len);
-	config_length += 3; /* MI_BATCH_BUFFER_START */
-	config_length = ALIGN(sizeof(u32) * config_length, I915_GTT_PAGE_SIZE);
-
-	obj = i915_gem_object_create_shmem(stream->perf->i915, config_length);
-	if (IS_ERR(obj)) {
-		err = PTR_ERR(obj);
+	/* Global configuration requires a wait for it to apply. */
+	global_config_length += num_lri_dwords(oa_config->mux_regs_len);
+	global_config_length += num_lri_dwords(oa_config->b_counter_regs_len);
+	global_config_length += 3; /* MI_BATCH_BUFFER_START */
+	global_config_length = ALIGN(sizeof(u32) * global_config_length,
+				     I915_GTT_PAGE_SIZE);
+
+	global_obj = i915_gem_object_create_shmem(stream->perf->i915,
+						  global_config_length);
+	if (IS_ERR(global_obj)) {
+		err = PTR_ERR(global_obj);
 		goto err_free;
 	}
 
-	cs = i915_gem_object_pin_map(obj, I915_MAP_WB);
+	cs = i915_gem_object_pin_map(global_obj, I915_MAP_WB);
 	if (IS_ERR(cs)) {
 		err = PTR_ERR(cs);
-		goto err_oa_bo;
+		goto err_global_bo;
 	}
 
-	cs = write_cs_mi_lri(cs,
-			     oa_config->mux_regs,
-			     oa_config->mux_regs_len);
 	cs = write_cs_mi_lri(cs,
 			     oa_config->b_counter_regs,
 			     oa_config->b_counter_regs_len);
@@ -1913,15 +1914,51 @@ alloc_oa_config_buffer(struct i915_perf_stream *stream,
 	*cs++ = i915_ggtt_offset(stream->noa_wait);
 	*cs++ = 0;
 
-	i915_gem_object_flush_map(obj);
-	i915_gem_object_unpin_map(obj);
+	i915_gem_object_flush_map(global_obj);
+	i915_gem_object_unpin_map(global_obj);
+
+	oa_bo->global_vma = i915_vma_instance(global_obj,
+					      &stream->engine->gt->ggtt->vm,
+					      NULL);
+	if (IS_ERR(oa_bo->global_vma)) {
+		err = PTR_ERR(oa_bo->global_vma);
+		goto err_global_bo;
+	}
+
+	/* There is no known delay needed for the per context registers. */
+	ctx_config_length = 1 /* MI_BATCH_BUFFER_END */ +
+		num_lri_dwords(oa_config->flex_regs_len);
+	ctx_config_length = ALIGN(sizeof(u32) * ctx_config_length,
+				  I915_GTT_PAGE_SIZE);
+
+	ctx_obj = i915_gem_object_create_shmem(stream->perf->i915,
+					       ctx_config_length);
+	if (IS_ERR(ctx_obj)) {
+		err = PTR_ERR(ctx_obj);
+		goto err_global_vma;
+	}
+
+	cs = i915_gem_object_pin_map(ctx_obj, I915_MAP_WB);
+	if (IS_ERR(cs)) {
+		err = PTR_ERR(cs);
+		goto err_global_vma;
+	}
+
+	cs = write_cs_mi_lri(cs,
+			     oa_config->mux_regs,
+			     oa_config->mux_regs_len);
+
+	*cs++ = MI_BATCH_BUFFER_END;
+
+	i915_gem_object_flush_map(ctx_obj);
+	i915_gem_object_unpin_map(ctx_obj);
 
-	oa_bo->vma = i915_vma_instance(obj,
-				       &stream->engine->gt->ggtt->vm,
-				       NULL);
-	if (IS_ERR(oa_bo->vma)) {
-		err = PTR_ERR(oa_bo->vma);
-		goto err_oa_bo;
+	oa_bo->ctx_vma = i915_vma_instance(ctx_obj,
+					   &stream->engine->gt->ggtt->vm,
+					   NULL);
+	if (IS_ERR(oa_bo->ctx_vma)) {
+		err = PTR_ERR(oa_bo->ctx_vma);
+		goto err_ctx_bo;
 	}
 
 	oa_bo->oa_config = i915_oa_config_get(oa_config);
@@ -1929,15 +1966,19 @@ alloc_oa_config_buffer(struct i915_perf_stream *stream,
 
 	return oa_bo;
 
-err_oa_bo:
-	i915_gem_object_put(obj);
+err_ctx_bo:
+	i915_gem_object_put(ctx_obj);
+err_global_vma:
+	i915_vma_put(oa_bo->global_vma);
+err_global_bo:
+	i915_gem_object_put(global_obj);
 err_free:
 	kfree(oa_bo);
 	return ERR_PTR(err);
 }
 
-static struct i915_vma *
-get_oa_vma(struct i915_perf_stream *stream, struct i915_oa_config *oa_config)
+static struct i915_oa_config_bo *
+get_oa_bo(struct i915_perf_stream *stream, struct i915_oa_config *oa_config)
 {
 	struct i915_oa_config_bo *oa_bo;
 
@@ -1950,29 +1991,29 @@ get_oa_vma(struct i915_perf_stream *stream, struct i915_oa_config *oa_config)
 		    memcmp(oa_bo->oa_config->uuid,
 			   oa_config->uuid,
 			   sizeof(oa_config->uuid)) == 0)
-			goto out;
+			return oa_bo;
 	}
 
-	oa_bo = alloc_oa_config_buffer(stream, oa_config);
-	if (IS_ERR(oa_bo))
-		return ERR_CAST(oa_bo);
-
-out:
-	return i915_vma_get(oa_bo->vma);
+	return alloc_oa_config_buffer(stream, oa_config);
 }
 
 static struct i915_request *
 emit_oa_config(struct i915_perf_stream *stream,
 	       struct i915_oa_config *oa_config,
-	       struct intel_context *ce)
+	       struct intel_context *ce,
+	       bool global)
 {
+	struct i915_oa_config_bo *oa_bo;
 	struct i915_request *rq;
 	struct i915_vma *vma;
 	int err;
 
-	vma = get_oa_vma(stream, oa_config);
-	if (IS_ERR(vma))
-		return ERR_CAST(vma);
+	oa_bo = get_oa_bo(stream, oa_config);
+	if (IS_ERR(oa_bo))
+		return ERR_CAST(oa_bo);
+
+	vma = global ? i915_vma_get(oa_bo->global_vma) :
+		i915_vma_get(oa_bo->ctx_vma);
 
 	err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL | PIN_HIGH);
 	if (err)
@@ -2019,6 +2060,7 @@ static struct i915_request *
 hsw_enable_metric_set(struct i915_perf_stream *stream)
 {
 	struct intel_uncore *uncore = stream->uncore;
+	struct i915_request *rq;
 
 	/*
 	 * PRM:
@@ -2035,7 +2077,15 @@ hsw_enable_metric_set(struct i915_perf_stream *stream)
 	intel_uncore_rmw(uncore, GEN6_UCGCTL1,
 			 0, GEN6_CSUNIT_CLOCK_GATE_DISABLE);
 
-	return emit_oa_config(stream, stream->oa_config, oa_context(stream));
+	rq = emit_oa_config(stream, stream->oa_config,
+			    stream->engine->kernel_context,
+			    false /* global */);
+	if (IS_ERR(rq))
+		return rq;
+
+	return emit_oa_config(stream, stream->oa_config,
+			      stream->engine->kernel_context,
+			      true /* global */);
 }
 
 static void hsw_disable_metric_set(struct i915_perf_stream *stream)
@@ -2423,6 +2473,7 @@ gen8_enable_metric_set(struct i915_perf_stream *stream)
 {
 	struct intel_uncore *uncore = stream->uncore;
 	struct i915_oa_config *oa_config = stream->oa_config;
+	struct i915_request *rq;
 	int ret;
 
 	/*
@@ -2463,7 +2514,15 @@ gen8_enable_metric_set(struct i915_perf_stream *stream)
 	if (ret)
 		return ERR_PTR(ret);
 
-	return emit_oa_config(stream, oa_config, oa_context(stream));
+	rq = emit_oa_config(stream, oa_config,
+			    stream->engine->kernel_context,
+			    false /* global */);
+	if (IS_ERR(rq))
+		return rq;
+
+	return emit_oa_config(stream, stream->oa_config,
+			      stream->engine->kernel_context,
+			      true /* global */);
 }
 
 static u32 oag_report_ctx_switches(const struct i915_perf_stream *stream)
@@ -2480,6 +2539,7 @@ gen12_enable_metric_set(struct i915_perf_stream *stream)
 	struct i915_oa_config *oa_config = stream->oa_config;
 	bool periodic = stream->periodic;
 	u32 period_exponent = stream->period_exponent;
+	struct i915_request *rq;
 	int ret;
 
 	intel_uncore_write(uncore, GEN12_OAG_OA_DEBUG,
@@ -2508,17 +2568,23 @@ gen12_enable_metric_set(struct i915_perf_stream *stream)
 		return ERR_PTR(ret);
 
 	/*
-	 * For Gen12, performance counters are context
-	 * saved/restored. Only enable it for the context that
-	 * requested this.
+	 * For Gen12, performance counters are also context saved/restored on
+	 * another set of performance registers. Configure the unit dealing
+	 * with those.
 	 */
-	if (stream->ctx) {
-		ret = gen12_configure_oar_context(stream, true);
-		if (ret)
-			return ERR_PTR(ret);
-	}
+	ret = gen12_configure_oar_context(stream, true);
+	if (ret)
+		return ERR_PTR(ret);
+
+	rq = emit_oa_config(stream, oa_config,
+			    stream->engine->kernel_context,
+			    false /* global */);
+	if (IS_ERR(rq))
+		return rq;
 
-	return emit_oa_config(stream, oa_config, oa_context(stream));
+	return emit_oa_config(stream, stream->oa_config,
+			      stream->engine->kernel_context,
+			      true /* global */);
 }
 
 static void gen8_disable_metric_set(struct i915_perf_stream *stream)
@@ -3228,15 +3294,29 @@ static long i915_perf_config_locked(struct i915_perf_stream *stream,
 		 * When set globally, we use a low priority kernel context,
 		 * so it will effectively take effect when idle.
 		 */
-		rq = emit_oa_config(stream, config, oa_context(stream));
-		if (!IS_ERR(rq)) {
-			config = xchg(&stream->oa_config, config);
-			i915_request_put(rq);
-		} else {
+		rq = emit_oa_config(stream, config,
+				    oa_context(stream),
+				    false /* global */);
+		if (IS_ERR(rq)) {
 			ret = PTR_ERR(rq);
+			goto err;
 		}
+
+		i915_request_put(rq);
+
+		rq = emit_oa_config(stream, config,
+				    oa_context(stream),
+				    true /* global */);
+		if (IS_ERR(rq)) {
+			ret = PTR_ERR(rq);
+			goto err;
+		}
+
+		config = xchg(&stream->oa_config, config);
+		i915_request_put(rq);
 	}
 
+err:
 	i915_oa_config_put(config);
 
 	return ret;
-- 
2.26.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

  reply	other threads:[~2020-03-27 10:32 UTC|newest]

Thread overview: 11+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-03-27 10:32 [Intel-gfx] [PATCH 0/3] drm/i915/perf: add support for multi context filtering Lionel Landwerlin
2020-03-27 10:32 ` Lionel Landwerlin [this message]
2020-03-27 10:40   ` [Intel-gfx] [PATCH 1/3] drm/i915/perf: break OA config buffer object in 2 Chris Wilson
2020-03-30 13:11     ` Lionel Landwerlin
2020-03-27 10:32 ` [Intel-gfx] [PATCH 2/3] drm/i915/perf: prepare driver to receive multiple ctx handles Lionel Landwerlin
2020-03-27 10:32 ` [Intel-gfx] [PATCH 3/3] drm/i915/perf: enable filtering on multiple contexts Lionel Landwerlin
2020-03-27 10:42 ` [Intel-gfx] [PATCH 0/3] drm/i915/perf: add support for multi context filtering Chris Wilson
2020-03-30 13:14   ` Lionel Landwerlin
2020-03-30 13:28     ` Chris Wilson
2020-03-27 12:08 ` [Intel-gfx] ✗ Fi.CI.BUILD: failure for " Patchwork
2020-04-06 13:55 [Intel-gfx] [PATCH 1/3] drm/i915/perf: break OA config buffer object in 2 Lionel Landwerlin

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20200327103209.3099292-2-lionel.g.landwerlin@intel.com \
    --to=lionel.g.landwerlin@intel.com \
    --cc=intel-gfx@lists.freedesktop.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.