intel-gfx.lists.freedesktop.org archive mirror
 help / color / mirror / Atom feed
* [Intel-gfx] [PATCH 1/3] drm/i915/perf: break OA config buffer object in 2
@ 2020-04-06 13:55 Lionel Landwerlin
  2020-04-06 13:55 ` [Intel-gfx] [PATCH 2/3] drm/i915/perf: prepare driver to receive multiple ctx handles Lionel Landwerlin
                   ` (5 more replies)
  0 siblings, 6 replies; 10+ messages in thread
From: Lionel Landwerlin @ 2020-04-06 13:55 UTC (permalink / raw)
  To: intel-gfx

We want to enable performance monitoring on multiple contexts to cover
the Iris use case of using 2 GEM contexts (3D & compute).

So start by breaking the OA configuration BO which contains global &
per context register writes.

NOA muxes & OA configurations are global, while FLEXEU register
configurations are per context.

v2: Use an offset into the same VMA (Chris)

Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
---
 drivers/gpu/drm/i915/i915_perf.c | 176 ++++++++++++++++++++-----------
 1 file changed, 116 insertions(+), 60 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
index 2f78b147bb2d..e7bbb09e84a1 100644
--- a/drivers/gpu/drm/i915/i915_perf.c
+++ b/drivers/gpu/drm/i915/i915_perf.c
@@ -372,6 +372,7 @@ struct i915_oa_config_bo {
 
 	struct i915_oa_config *oa_config;
 	struct i915_vma *vma;
+	uint32_t per_context_offset;
 };
 
 static struct ctl_table_header *sysctl_header;
@@ -1826,37 +1827,43 @@ static struct i915_oa_config_bo *
 alloc_oa_config_buffer(struct i915_perf_stream *stream,
 		       struct i915_oa_config *oa_config)
 {
-	struct drm_i915_gem_object *obj;
 	struct i915_oa_config_bo *oa_bo;
+	struct drm_i915_gem_object *obj;
 	size_t config_length = 0;
-	u32 *cs;
+	u32 *cs_start, *cs;
 	int err;
 
 	oa_bo = kzalloc(sizeof(*oa_bo), GFP_KERNEL);
 	if (!oa_bo)
 		return ERR_PTR(-ENOMEM);
 
+	/*
+	 * Global configuration requires a jump into the NOA wait BO for it to
+	 * apply.
+	 */
 	config_length += num_lri_dwords(oa_config->mux_regs_len);
 	config_length += num_lri_dwords(oa_config->b_counter_regs_len);
-	config_length += num_lri_dwords(oa_config->flex_regs_len);
 	config_length += 3; /* MI_BATCH_BUFFER_START */
+
+	config_length += num_lri_dwords(oa_config->flex_regs_len);
+	config_length += 1 /* MI_BATCH_BUFFER_END */;
+
 	config_length = ALIGN(sizeof(u32) * config_length, I915_GTT_PAGE_SIZE);
 
-	obj = i915_gem_object_create_shmem(stream->perf->i915, config_length);
+	obj = i915_gem_object_create_shmem(stream->perf->i915,
+					   config_length);
 	if (IS_ERR(obj)) {
 		err = PTR_ERR(obj);
 		goto err_free;
 	}
 
-	cs = i915_gem_object_pin_map(obj, I915_MAP_WB);
-	if (IS_ERR(cs)) {
-		err = PTR_ERR(cs);
-		goto err_oa_bo;
+	cs_start = i915_gem_object_pin_map(obj, I915_MAP_WB);
+	if (IS_ERR(cs_start)) {
+		err = PTR_ERR(cs_start);
+		goto err_bo;
 	}
 
-	cs = write_cs_mi_lri(cs,
-			     oa_config->mux_regs,
-			     oa_config->mux_regs_len);
+	cs = cs_start;
 	cs = write_cs_mi_lri(cs,
 			     oa_config->b_counter_regs,
 			     oa_config->b_counter_regs_len);
@@ -1871,6 +1878,14 @@ alloc_oa_config_buffer(struct i915_perf_stream *stream,
 	*cs++ = i915_ggtt_offset(stream->noa_wait);
 	*cs++ = 0;
 
+	oa_bo->per_context_offset = 4 * (cs - cs_start);
+
+	cs = write_cs_mi_lri(cs,
+			     oa_config->mux_regs,
+			     oa_config->mux_regs_len);
+
+	*cs++ = MI_BATCH_BUFFER_END;
+
 	i915_gem_object_flush_map(obj);
 	i915_gem_object_unpin_map(obj);
 
@@ -1879,7 +1894,7 @@ alloc_oa_config_buffer(struct i915_perf_stream *stream,
 				       NULL);
 	if (IS_ERR(oa_bo->vma)) {
 		err = PTR_ERR(oa_bo->vma);
-		goto err_oa_bo;
+		goto err_bo;
 	}
 
 	oa_bo->oa_config = i915_oa_config_get(oa_config);
@@ -1887,15 +1902,15 @@ alloc_oa_config_buffer(struct i915_perf_stream *stream,
 
 	return oa_bo;
 
-err_oa_bo:
+err_bo:
 	i915_gem_object_put(obj);
 err_free:
 	kfree(oa_bo);
 	return ERR_PTR(err);
 }
 
-static struct i915_vma *
-get_oa_vma(struct i915_perf_stream *stream, struct i915_oa_config *oa_config)
+static struct i915_oa_config_bo *
+get_oa_bo(struct i915_perf_stream *stream, struct i915_oa_config *oa_config)
 {
 	struct i915_oa_config_bo *oa_bo;
 
@@ -1908,34 +1923,31 @@ get_oa_vma(struct i915_perf_stream *stream, struct i915_oa_config *oa_config)
 		    memcmp(oa_bo->oa_config->uuid,
 			   oa_config->uuid,
 			   sizeof(oa_config->uuid)) == 0)
-			goto out;
+			return oa_bo;
 	}
 
-	oa_bo = alloc_oa_config_buffer(stream, oa_config);
-	if (IS_ERR(oa_bo))
-		return ERR_CAST(oa_bo);
-
-out:
-	return i915_vma_get(oa_bo->vma);
+	return alloc_oa_config_buffer(stream, oa_config);
 }
 
 static int
 emit_oa_config(struct i915_perf_stream *stream,
 	       struct i915_oa_config *oa_config,
 	       struct intel_context *ce,
-	       struct i915_active *active)
+	       struct i915_active *active,
+	       bool global)
 {
+	struct i915_oa_config_bo *oa_bo;
 	struct i915_request *rq;
-	struct i915_vma *vma;
+	u64 vma_offset;
 	int err;
 
-	vma = get_oa_vma(stream, oa_config);
-	if (IS_ERR(vma))
-		return PTR_ERR(vma);
+	oa_bo = get_oa_bo(stream, oa_config);
+	if (IS_ERR(oa_bo))
+		return PTR_ERR(oa_bo);
 
-	err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL | PIN_HIGH);
+	err = i915_vma_pin(oa_bo->vma, 0, 0, PIN_GLOBAL | PIN_HIGH);
 	if (err)
-		goto err_vma_put;
+		return err;
 
 	intel_engine_pm_get(ce->engine);
 	rq = i915_request_create(ce);
@@ -1957,16 +1969,19 @@ emit_oa_config(struct i915_perf_stream *stream,
 			goto err_add_request;
 	}
 
-	i915_vma_lock(vma);
-	err = i915_request_await_object(rq, vma->obj, 0);
+	i915_vma_lock(oa_bo->vma);
+	err = i915_request_await_object(rq, oa_bo->vma->obj, 0);
 	if (!err)
-		err = i915_vma_move_to_active(vma, rq, 0);
-	i915_vma_unlock(vma);
+		err = i915_vma_move_to_active(oa_bo->vma, rq, 0);
+	i915_vma_unlock(oa_bo->vma);
 	if (err)
 		goto err_add_request;
 
-	err = rq->engine->emit_bb_start(rq,
-					vma->node.start, 0,
+	vma_offset = oa_bo->vma->node.start;
+	if (!global)
+		vma_offset += oa_bo->per_context_offset;
+
+	err = rq->engine->emit_bb_start(rq, vma_offset, 0,
 					I915_DISPATCH_SECURE);
 	if (err)
 		goto err_add_request;
@@ -1974,9 +1989,7 @@ emit_oa_config(struct i915_perf_stream *stream,
 err_add_request:
 	i915_request_add(rq);
 err_vma_unpin:
-	i915_vma_unpin(vma);
-err_vma_put:
-	i915_vma_put(vma);
+	i915_vma_unpin(oa_bo->vma);
 	return err;
 }
 
@@ -1990,6 +2003,7 @@ hsw_enable_metric_set(struct i915_perf_stream *stream,
 		      struct i915_active *active)
 {
 	struct intel_uncore *uncore = stream->uncore;
+	int err;
 
 	/*
 	 * PRM:
@@ -2006,9 +2020,17 @@ hsw_enable_metric_set(struct i915_perf_stream *stream,
 	intel_uncore_rmw(uncore, GEN6_UCGCTL1,
 			 0, GEN6_CSUNIT_CLOCK_GATE_DISABLE);
 
-	return emit_oa_config(stream,
-			      stream->oa_config, oa_context(stream),
-			      active);
+	err = emit_oa_config(stream, stream->oa_config,
+			     oa_context(stream),
+			     active,
+			     false /* global */);
+	if (err)
+		return err;
+
+	return emit_oa_config(stream, stream->oa_config,
+			      oa_context(stream),
+			      active,
+			      true /* global */);
 }
 
 static void hsw_disable_metric_set(struct i915_perf_stream *stream)
@@ -2419,7 +2441,7 @@ gen8_enable_metric_set(struct i915_perf_stream *stream,
 {
 	struct intel_uncore *uncore = stream->uncore;
 	struct i915_oa_config *oa_config = stream->oa_config;
-	int ret;
+	int err;
 
 	/*
 	 * We disable slice/unslice clock ratio change reports on SKL since
@@ -2455,13 +2477,21 @@ gen8_enable_metric_set(struct i915_perf_stream *stream,
 	 * to make sure all slices/subslices are ON before writing to NOA
 	 * registers.
 	 */
-	ret = lrc_configure_all_contexts(stream, oa_config, active);
-	if (ret)
-		return ret;
+	err = lrc_configure_all_contexts(stream, oa_config, active);
+	if (err)
+		return err;
 
-	return emit_oa_config(stream,
-			      stream->oa_config, oa_context(stream),
-			      active);
+	err = emit_oa_config(stream, oa_config,
+			     oa_context(stream),
+			     active,
+			     false /* global */);
+	if (err)
+		return err;
+
+	return emit_oa_config(stream, stream->oa_config,
+			      oa_context(stream),
+			      active,
+			      true /* global */);
 }
 
 static u32 oag_report_ctx_switches(const struct i915_perf_stream *stream)
@@ -2507,9 +2537,9 @@ gen12_enable_metric_set(struct i915_perf_stream *stream,
 		return ret;
 
 	/*
-	 * For Gen12, performance counters are context
-	 * saved/restored. Only enable it for the context that
-	 * requested this.
+	 * For Gen12, performance counters are also context saved/restored on
+	 * another set of performance registers. Configure the unit dealing
+	 * with those.
 	 */
 	if (stream->ctx) {
 		ret = gen12_configure_oar_context(stream, active);
@@ -2517,9 +2547,17 @@ gen12_enable_metric_set(struct i915_perf_stream *stream,
 			return ret;
 	}
 
-	return emit_oa_config(stream,
-			      stream->oa_config, oa_context(stream),
-			      active);
+	ret = emit_oa_config(stream, oa_config,
+			     oa_context(stream),
+			     active,
+			     false /* global */);
+	if (ret)
+		return ret;
+
+	return emit_oa_config(stream, stream->oa_config,
+			      oa_context(stream),
+			      active,
+			      true /* global */);
 }
 
 static void gen8_disable_metric_set(struct i915_perf_stream *stream)
@@ -3174,6 +3212,7 @@ static long i915_perf_config_locked(struct i915_perf_stream *stream,
 				    unsigned long metrics_set)
 {
 	struct i915_oa_config *config;
+	struct i915_active *active = NULL;
 	long ret = stream->oa_config->id;
 
 	config = i915_perf_get_oa_config(stream->perf, metrics_set);
@@ -3181,7 +3220,11 @@ static long i915_perf_config_locked(struct i915_perf_stream *stream,
 		return -EINVAL;
 
 	if (config != stream->oa_config) {
-		int err;
+		active = i915_active_create();
+		if (!active) {
+			ret = -ENOMEM;
+			goto err_config;
+		}
 
 		/*
 		 * If OA is bound to a specific context, emit the
@@ -3192,13 +3235,26 @@ static long i915_perf_config_locked(struct i915_perf_stream *stream,
 		 * When set globally, we use a low priority kernel context,
 		 * so it will effectively take effect when idle.
 		 */
-		err = emit_oa_config(stream, config, oa_context(stream), NULL);
-		if (!err)
-			config = xchg(&stream->oa_config, config);
-		else
-			ret = err;
+		ret = emit_oa_config(stream, config,
+				     oa_context(stream),
+				     active,
+				     false /* global */);
+		if (ret)
+			goto err_active;
+
+		ret = emit_oa_config(stream, config,
+				     oa_context(stream),
+				     active,
+				     true /* global */);
+		if (ret)
+			goto err_active;
+
+		config = xchg(&stream->oa_config, config);
 	}
 
+err_active:
+	i915_active_put(active);
+err_config:
 	i915_oa_config_put(config);
 
 	return ret;
-- 
2.26.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [Intel-gfx] [PATCH 2/3] drm/i915/perf: prepare driver to receive multiple ctx handles
  2020-04-06 13:55 [Intel-gfx] [PATCH 1/3] drm/i915/perf: break OA config buffer object in 2 Lionel Landwerlin
@ 2020-04-06 13:55 ` Lionel Landwerlin
  2020-04-06 13:55 ` [Intel-gfx] [PATCH 3/3] drm/i915/perf: enable filtering on multiple contexts Lionel Landwerlin
                   ` (4 subsequent siblings)
  5 siblings, 0 replies; 10+ messages in thread
From: Lionel Landwerlin @ 2020-04-06 13:55 UTC (permalink / raw)
  To: intel-gfx

Make all the internal necessary changes before we flip the switch.

v2: Use an unlimited number of intel contexts (Chris)

Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
---
 drivers/gpu/drm/i915/i915_perf.c       | 587 +++++++++++++++----------
 drivers/gpu/drm/i915/i915_perf_types.h |  23 +-
 2 files changed, 364 insertions(+), 246 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
index e7bbb09e84a1..008d2e55f923 100644
--- a/drivers/gpu/drm/i915/i915_perf.c
+++ b/drivers/gpu/drm/i915/i915_perf.c
@@ -192,6 +192,7 @@
  */
 
 #include <linux/anon_inodes.h>
+#include <linux/bsearch.h>
 #include <linux/sizes.h>
 #include <linux/uuid.h>
 
@@ -329,7 +330,8 @@ static const struct i915_oa_format gen12_oa_formats[I915_OA_FORMAT_MAX] = {
  * @single_context: Whether a single or all gpu contexts should be monitored
  * @hold_preemption: Whether the preemption is disabled for the filtered
  *                   context
- * @ctx_handle: A gem ctx handle for use with @single_context
+ * @n_ctx_handles: Length of @ctx_handles
+ * @ctx_handles: An array of gem context handles
  * @metrics_set: An ID for an OA unit metric set advertised via sysfs
  * @oa_format: An OA unit HW report format
  * @oa_periodic: Whether to enable periodic OA unit sampling
@@ -349,9 +351,10 @@ static const struct i915_oa_format gen12_oa_formats[I915_OA_FORMAT_MAX] = {
 struct perf_open_properties {
 	u32 sample_flags;
 
-	u64 single_context:1;
 	u64 hold_preemption:1;
-	u64 ctx_handle;
+
+	u32 n_ctx_handles;
+	u32 *ctx_handles;
 
 	/* OA sampling state */
 	int metrics_set;
@@ -625,6 +628,21 @@ static int append_oa_sample(struct i915_perf_stream *stream,
 	return 0;
 }
 
+static int ctx_id_equal(const void *key, const void *elem)
+{
+	return *((int *) elem) - *((int *) key);
+}
+
+static inline bool ctx_id_match(struct i915_perf_stream *stream,
+				u32 masked_ctx_id)
+{
+	return bsearch(&masked_ctx_id,
+		       stream->ctx_ids,
+		       stream->n_ctxs,
+		       sizeof(*stream->ctx_ids),
+		       ctx_id_equal) != NULL;
+}
+
 /**
  * Copies all buffered OA reports into userspace read() buffer.
  * @stream: An i915-perf stream opened for OA metrics
@@ -736,7 +754,7 @@ static int gen8_append_oa_reports(struct i915_perf_stream *stream,
 			continue;
 		}
 
-		ctx_id = report32[2] & stream->specific_ctx_id_mask;
+		ctx_id = report32[2] & stream->ctx_id_mask;
 
 		/*
 		 * Squash whatever is in the CTX_ID field if it's marked as
@@ -781,26 +799,33 @@ static int gen8_append_oa_reports(struct i915_perf_stream *stream,
 		 * switches since it's not-uncommon for periodic samples to
 		 * identify a switch before any 'context switch' report.
 		 */
-		if (!stream->perf->exclusive_stream->ctx ||
-		    stream->specific_ctx_id == ctx_id ||
-		    stream->oa_buffer.last_ctx_id == stream->specific_ctx_id ||
-		    reason & OAREPORT_REASON_CTX_SWITCH) {
-
-			/*
-			 * While filtering for a single context we avoid
-			 * leaking the IDs of other contexts.
-			 */
-			if (stream->perf->exclusive_stream->ctx &&
-			    stream->specific_ctx_id != ctx_id) {
-				report32[2] = INVALID_CTX_ID;
-			}
-
+		if (!stream->perf->exclusive_stream->n_ctxs) {
 			ret = append_oa_sample(stream, buf, count, offset,
 					       report);
 			if (ret)
 				break;
+		} else {
+			bool ctx_match = ctx_id != INVALID_CTX_ID &&
+				ctx_id_match(stream, ctx_id);
+
+			if (ctx_match ||
+			    stream->oa_buffer.last_ctx_match ||
+			    reason & OAREPORT_REASON_CTX_SWITCH) {
+
+				/*
+				 * While filtering for a single context we avoid
+				 * leaking the IDs of other contexts.
+				 */
+				if (!ctx_match)
+					report32[2] = INVALID_CTX_ID;
+
+				ret = append_oa_sample(stream, buf, count, offset,
+						       report);
+				if (ret)
+					break;
+			}
 
-			stream->oa_buffer.last_ctx_id = ctx_id;
+			stream->oa_buffer.last_ctx_match = ctx_match;
 		}
 
 		/*
@@ -1191,138 +1216,163 @@ static int i915_oa_read(struct i915_perf_stream *stream,
 	return stream->perf->ops.read(stream, buf, count, offset);
 }
 
-static struct intel_context *oa_pin_context(struct i915_perf_stream *stream)
+static u32 get_ctx_id_mask(struct intel_engine_cs *engine)
 {
-	struct i915_gem_engines_iter it;
-	struct i915_gem_context *ctx = stream->ctx;
-	struct intel_context *ce;
-	int err;
+	switch (INTEL_GEN(engine->i915)) {
+	case 7:
+		/*
+		 * On Haswell we don't do any post processing of the reports
+		 * and don't need to use the mask.
+		 */
+		return 0;
 
-	for_each_gem_engine(ce, i915_gem_context_lock_engines(ctx), it) {
-		if (ce->engine != stream->engine) /* first match! */
-			continue;
+	case 8:
+	case 9:
+	case 10:
+		if (intel_engine_in_execlists_submission_mode(engine))
+			return (1U << GEN8_CTX_ID_WIDTH) - 1;
 
 		/*
-		 * As the ID is the gtt offset of the context's vma we
-		 * pin the vma to ensure the ID remains fixed.
+		 * GuC uses the top bit to signal proxy submission, so ignore
+		 * that bit.
 		 */
-		err = intel_context_pin(ce);
-		if (err == 0) {
-			stream->pinned_ctx = ce;
-			break;
-		}
-	}
-	i915_gem_context_unlock_engines(ctx);
+		return (1U << (GEN8_CTX_ID_WIDTH - 1)) - 1;
+
+	case 11:
+	case 12:
+		/*
+		 * 0x7ff is used by idle context.
+		 */
+		BUILD_BUG_ON((GEN12_MAX_CONTEXT_HW_ID - 1) < NUM_CONTEXT_TAG);
+		return ((1U << GEN11_SW_CTX_ID_WIDTH) - 1) << (GEN11_SW_CTX_ID_SHIFT - 32);
 
-	return stream->pinned_ctx;
+	default:
+		MISSING_CASE(INTEL_GEN(engine->i915));
+		return 0;
+	}
 }
 
-/**
- * oa_get_render_ctx_id - determine and hold ctx hw id
- * @stream: An i915-perf stream opened for OA metrics
- *
- * Determine the render context hw id, and ensure it remains fixed for the
- * lifetime of the stream. This ensures that we don't have to worry about
- * updating the context ID in OACONTROL on the fly.
- *
- * Returns: zero on success or a negative error code
- */
-static int oa_get_render_ctx_id(struct i915_perf_stream *stream)
+static u32 get_ctx_id(struct intel_context *ce, int idx)
 {
-	struct intel_context *ce;
-
-	ce = oa_pin_context(stream);
-	if (IS_ERR(ce))
-		return PTR_ERR(ce);
 
 	switch (INTEL_GEN(ce->engine->i915)) {
-	case 7: {
-		/*
-		 * On Haswell we don't do any post processing of the reports
-		 * and don't need to use the mask.
-		 */
-		stream->specific_ctx_id = i915_ggtt_offset(ce->state);
-		stream->specific_ctx_id_mask = 0;
-		break;
-	}
+	case 7:
+		return i915_ggtt_offset(ce->state);
 
 	case 8:
 	case 9:
 	case 10:
-		if (intel_engine_in_execlists_submission_mode(ce->engine)) {
-			stream->specific_ctx_id_mask =
-				(1U << GEN8_CTX_ID_WIDTH) - 1;
-			stream->specific_ctx_id = stream->specific_ctx_id_mask;
-		} else {
-			/*
-			 * When using GuC, the context descriptor we write in
-			 * i915 is read by GuC and rewritten before it's
-			 * actually written into the hardware. The LRCA is
-			 * what is put into the context id field of the
-			 * context descriptor by GuC. Because it's aligned to
-			 * a page, the lower 12bits are always at 0 and
-			 * dropped by GuC. They won't be part of the context
-			 * ID in the OA reports, so squash those lower bits.
-			 */
-			stream->specific_ctx_id =
-				lower_32_bits(ce->lrc_desc) >> 12;
+		if (intel_engine_in_execlists_submission_mode(ce->engine))
+			return (1U << GEN8_CTX_ID_WIDTH) - 1 - idx;
 
-			/*
-			 * GuC uses the top bit to signal proxy submission, so
-			 * ignore that bit.
-			 */
-			stream->specific_ctx_id_mask =
-				(1U << (GEN8_CTX_ID_WIDTH - 1)) - 1;
-		}
-		break;
+		/*
+		 * When using GuC, the context descriptor we write in i915 is
+		 * read by GuC and rewritten before it's actually written into
+		 * the hardware. The LRCA is what is put into the context id
+		 * field of the context descriptor by GuC. Because it's
+		 * aligned to a page, the lower 12bits are always at 0 and
+		 * dropped by GuC. They won't be part of the context ID in the
+		 * OA reports, so squash those lower bits.
+		 */
+		return lower_32_bits(ce->lrc_desc) >> 12;
 
 	case 11:
-	case 12: {
-		stream->specific_ctx_id_mask =
-			((1U << GEN11_SW_CTX_ID_WIDTH) - 1) << (GEN11_SW_CTX_ID_SHIFT - 32);
+	case 12:
 		/*
-		 * Pick an unused context id
-		 * 0 - (NUM_CONTEXT_TAG - 1) are used by other contexts
-		 * GEN12_MAX_CONTEXT_HW_ID (0x7ff) is used by idle context
+		 * Pick an unused context id 0 -
+		 * (NUM_CONTEXT_TAG - 1) are used by other
+		 * contexts GEN12_MAX_CONTEXT_HW_ID (0x7ff) is
+		 * used by idle context
 		 */
-		stream->specific_ctx_id = (GEN12_MAX_CONTEXT_HW_ID - 1) << (GEN11_SW_CTX_ID_SHIFT - 32);
-		BUILD_BUG_ON((GEN12_MAX_CONTEXT_HW_ID - 1) < NUM_CONTEXT_TAG);
-		break;
-	}
+		return ((GEN12_MAX_CONTEXT_HW_ID - 1) - idx) << (GEN11_SW_CTX_ID_SHIFT - 32);
 
 	default:
 		MISSING_CASE(INTEL_GEN(ce->engine->i915));
+		return 0;
 	}
+}
+
+static int oa_get_render_ctx_ids(struct i915_perf_stream *stream)
+{
+	struct intel_context *ce;
+	int i, err;
 
-	ce->tag = stream->specific_ctx_id;
+	stream->ctx_id_mask = get_ctx_id_mask(stream->engine);
 
-	drm_dbg(&stream->perf->i915->drm,
-		"filtering on ctx_id=0x%x ctx_id_mask=0x%x\n",
-		stream->specific_ctx_id,
-		stream->specific_ctx_id_mask);
+	for (i = 0; i < stream->n_ctxs; i++) {
+		struct i915_gem_context *ctx = stream->ctxs[i];
+		struct i915_gem_engines_iter it;
+
+		for_each_gem_engine(ce, i915_gem_context_lock_engines(ctx), it) {
+			if (ce->engine != stream->engine) /* first match! */
+				continue;
+
+			/*
+			 * As the ID is the gtt offset of the context's vma we
+			 * pin the vma to ensure the ID remains fixed.
+			 */
+			err = intel_context_pin(ce);
+			if (err) {
+				i915_gem_context_unlock_engines(ctx);
+				goto err;
+			}
+
+			stream->pinned_ctxs[i] = ce;
+			stream->ctx_ids[i] = get_ctx_id(ce, i);
+
+			drm_dbg(&stream->perf->i915->drm,
+				"filtering on ctx_id%i=0x%x ctx_id_mask=0x%x\n",
+				i, stream->ctx_ids[i], stream->ctx_id_mask);
+
+			ce->tag = stream->ctx_ids[i];
+
+			break;
+		}
+		i915_gem_context_unlock_engines(ctx);
+	}
 
 	return 0;
+
+err:
+	while (i--) {
+		ce = fetch_and_zero(&stream->pinned_ctxs[i]);
+		if (ce) {
+			ce->tag = 0; /* recomputed on next submission after parking */
+			intel_context_unpin(ce);
+		}
+
+		stream->ctx_ids[i] = INVALID_CTX_ID;
+	}
+
+	stream->ctx_id_mask = 0;
+
+	return err;
 }
 
 /**
- * oa_put_render_ctx_id - counterpart to oa_get_render_ctx_id releases hold
+ * oa_put_render_ctx_id - counterpart to oa_get_render_ctx_ids releases hold
  * @stream: An i915-perf stream opened for OA metrics
  *
  * In case anything needed doing to ensure the context HW ID would remain valid
  * for the lifetime of the stream, then that can be undone here.
  */
-static void oa_put_render_ctx_id(struct i915_perf_stream *stream)
+static void oa_put_render_ctx_ids(struct i915_perf_stream *stream)
 {
-	struct intel_context *ce;
+	int i;
 
-	ce = fetch_and_zero(&stream->pinned_ctx);
-	if (ce) {
-		ce->tag = 0; /* recomputed on next submission after parking */
-		intel_context_unpin(ce);
+	for (i = 0; i < stream->n_ctxs; i++) {
+		struct intel_context *ce;
+
+		ce = fetch_and_zero(&stream->pinned_ctxs[i]);
+		if (ce) {
+			ce->tag = 0; /* recomputed on next submission after parking */
+			intel_context_unpin(ce);
+		}
+
+		stream->ctx_ids[i] = INVALID_CTX_ID;
 	}
 
-	stream->specific_ctx_id = INVALID_CTX_ID;
-	stream->specific_ctx_id_mask = 0;
+	stream->ctx_id_mask = 0;
 }
 
 static void
@@ -1370,8 +1420,7 @@ static void i915_oa_stream_destroy(struct i915_perf_stream *stream)
 	intel_uncore_forcewake_put(stream->uncore, FORCEWAKE_ALL);
 	intel_engine_pm_put(stream->engine);
 
-	if (stream->ctx)
-		oa_put_render_ctx_id(stream);
+	oa_put_render_ctx_ids(stream);
 
 	free_oa_configs(stream);
 	free_noa_wait(stream);
@@ -1463,7 +1512,7 @@ static void gen8_init_oa_buffer(struct i915_perf_stream *stream)
 	 * reports we will forward to userspace while filtering for a single
 	 * context.
 	 */
-	stream->oa_buffer.last_ctx_id = INVALID_CTX_ID;
+	stream->oa_buffer.last_ctx_match = false;
 
 	spin_unlock_irqrestore(&stream->oa_buffer.ptr_lock, flags);
 
@@ -1517,7 +1566,7 @@ static void gen12_init_oa_buffer(struct i915_perf_stream *stream)
 	 * reports we will forward to userspace while filtering for a single
 	 * context.
 	 */
-	stream->oa_buffer.last_ctx_id = INVALID_CTX_ID;
+	stream->oa_buffer.last_ctx_match = false;
 
 	spin_unlock_irqrestore(&stream->oa_buffer.ptr_lock, flags);
 
@@ -1993,11 +2042,6 @@ emit_oa_config(struct i915_perf_stream *stream,
 	return err;
 }
 
-static struct intel_context *oa_context(struct i915_perf_stream *stream)
-{
-	return stream->pinned_ctx ?: stream->engine->kernel_context;
-}
-
 static int
 hsw_enable_metric_set(struct i915_perf_stream *stream,
 		      struct i915_active *active)
@@ -2021,14 +2065,14 @@ hsw_enable_metric_set(struct i915_perf_stream *stream,
 			 0, GEN6_CSUNIT_CLOCK_GATE_DISABLE);
 
 	err = emit_oa_config(stream, stream->oa_config,
-			     oa_context(stream),
+			     stream->engine->kernel_context,
 			     active,
 			     false /* global */);
 	if (err)
 		return err;
 
 	return emit_oa_config(stream, stream->oa_config,
-			      oa_context(stream),
+			      stream->engine->kernel_context,
 			      active,
 			      true /* global */);
 }
@@ -2235,11 +2279,10 @@ static int gen8_configure_context(struct i915_gem_context *ctx,
 	return err;
 }
 
-static int gen12_configure_oar_context(struct i915_perf_stream *stream,
-				       struct i915_active *active)
+static int gen12_configure_oar_contexts(struct i915_perf_stream *stream,
+					struct i915_active *active)
 {
-	int err;
-	struct intel_context *ce = stream->pinned_ctx;
+	int i;
 	u32 format = stream->oa_buffer.format;
 	struct flex regs_context[] = {
 		{
@@ -2260,7 +2303,7 @@ static int gen12_configure_oar_context(struct i915_perf_stream *stream,
 			(active ? GEN12_OAR_OACONTROL_COUNTER_ENABLE : 0)
 		},
 		{
-			RING_CONTEXT_CONTROL(ce->engine->mmio_base),
+			RING_CONTEXT_CONTROL(stream->engine->mmio_base),
 			CTX_CONTEXT_CONTROL,
 			_MASKED_FIELD(GEN12_CTX_CTRL_OAR_CONTEXT_ENABLE,
 				      active ?
@@ -2269,18 +2312,28 @@ static int gen12_configure_oar_context(struct i915_perf_stream *stream,
 		},
 	};
 
-	/* Modify the context image of pinned context with regs_context*/
-	err = intel_context_lock_pinned(ce);
-	if (err)
-		return err;
+	for (i = 0; i < stream->n_ctxs; i++) {
+		struct intel_context *ce = stream->pinned_ctxs[i];
+		int err;
 
-	err = gen8_modify_context(ce, regs_context, ARRAY_SIZE(regs_context));
-	intel_context_unlock_pinned(ce);
-	if (err)
-		return err;
+		/* Modify the context image of pinned context with regs_context*/
+		err = intel_context_lock_pinned(ce);
+		if (err)
+			return err;
+
+		err = gen8_modify_context(ce, regs_context, ARRAY_SIZE(regs_context));
+		intel_context_unlock_pinned(ce);
+		if (err)
+			return err;
+
+		/* Apply regs_lri using LRI with pinned context */
+		err = gen8_modify_self(ce, regs_lri, ARRAY_SIZE(regs_lri),
+				       active);
+		if (err)
+			return err;
+	}
 
-	/* Apply regs_lri using LRI with pinned context */
-	return gen8_modify_self(ce, regs_lri, ARRAY_SIZE(regs_lri), active);
+	return 0;
 }
 
 /*
@@ -2482,14 +2535,14 @@ gen8_enable_metric_set(struct i915_perf_stream *stream,
 		return err;
 
 	err = emit_oa_config(stream, oa_config,
-			     oa_context(stream),
+			     stream->engine->kernel_context,
 			     active,
 			     false /* global */);
 	if (err)
 		return err;
 
 	return emit_oa_config(stream, stream->oa_config,
-			      oa_context(stream),
+			      stream->engine->kernel_context,
 			      active,
 			      true /* global */);
 }
@@ -2541,21 +2594,19 @@ gen12_enable_metric_set(struct i915_perf_stream *stream,
 	 * another set of performance registers. Configure the unit dealing
 	 * with those.
 	 */
-	if (stream->ctx) {
-		ret = gen12_configure_oar_context(stream, active);
-		if (ret)
-			return ret;
-	}
+	ret = gen12_configure_oar_contexts(stream, active);
+	if (ret)
+		return ret;
 
 	ret = emit_oa_config(stream, oa_config,
-			     oa_context(stream),
+			     stream->engine->kernel_context,
 			     active,
 			     false /* global */);
 	if (ret)
 		return ret;
 
 	return emit_oa_config(stream, stream->oa_config,
-			      oa_context(stream),
+			      stream->engine->kernel_context,
 			      active,
 			      true /* global */);
 }
@@ -2589,8 +2640,7 @@ static void gen12_disable_metric_set(struct i915_perf_stream *stream)
 	gen12_configure_all_contexts(stream, NULL, NULL);
 
 	/* disable the context save/restore or OAR counters */
-	if (stream->ctx)
-		gen12_configure_oar_context(stream, NULL);
+	gen12_configure_oar_contexts(stream, NULL);
 
 	/* Make sure we disable noa to save power. */
 	intel_uncore_rmw(uncore, RPM_CONFIG1, GEN10_GT_NOA_ENABLE, 0);
@@ -2599,8 +2649,7 @@ static void gen12_disable_metric_set(struct i915_perf_stream *stream)
 static void gen7_oa_enable(struct i915_perf_stream *stream)
 {
 	struct intel_uncore *uncore = stream->uncore;
-	struct i915_gem_context *ctx = stream->ctx;
-	u32 ctx_id = stream->specific_ctx_id;
+	u32 ctx_id = stream->ctx_ids[0];
 	bool periodic = stream->periodic;
 	u32 period_exponent = stream->period_exponent;
 	u32 report_format = stream->oa_buffer.format;
@@ -2622,7 +2671,7 @@ static void gen7_oa_enable(struct i915_perf_stream *stream)
 			    GEN7_OACONTROL_TIMER_PERIOD_SHIFT) |
 			   (periodic ? GEN7_OACONTROL_TIMER_ENABLE : 0) |
 			   (report_format << GEN7_OACONTROL_FORMAT_SHIFT) |
-			   (ctx ? GEN7_OACONTROL_PER_CTX_ENABLE : 0) |
+			   (stream->n_ctxs ? GEN7_OACONTROL_PER_CTX_ENABLE : 0) |
 			   GEN7_OACONTROL_ENABLE);
 }
 
@@ -2855,7 +2904,7 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream,
 	}
 
 	if (!(props->sample_flags & SAMPLE_OA_REPORT) &&
-	    (INTEL_GEN(perf->i915) < 12 || !stream->ctx)) {
+	    (INTEL_GEN(perf->i915) < 12 || !stream->n_ctxs)) {
 		DRM_DEBUG("Only OA report sampling supported\n");
 		return -EINVAL;
 	}
@@ -2903,12 +2952,10 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream,
 	if (stream->periodic)
 		stream->period_exponent = props->oa_period_exponent;
 
-	if (stream->ctx) {
-		ret = oa_get_render_ctx_id(stream);
-		if (ret) {
-			DRM_DEBUG("Invalid context id to filter with\n");
-			return ret;
-		}
+	ret = oa_get_render_ctx_ids(stream);
+	if (ret) {
+		DRM_DEBUG("Invalid context id to filter with\n");
+		return ret;
 	}
 
 	ret = alloc_noa_wait(stream);
@@ -2981,8 +3028,7 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream,
 	free_noa_wait(stream);
 
 err_noa_wait_alloc:
-	if (stream->ctx)
-		oa_put_render_ctx_id(stream);
+	oa_put_render_ctx_ids(stream);
 
 	return ret;
 }
@@ -3175,8 +3221,12 @@ static void i915_perf_enable_locked(struct i915_perf_stream *stream)
 	if (stream->ops->enable)
 		stream->ops->enable(stream);
 
-	if (stream->hold_preemption)
-		intel_context_set_nopreempt(stream->pinned_ctx);
+	if (stream->hold_preemption) {
+		int i;
+
+		for (i = 0; i < stream->n_ctxs; i++)
+			intel_context_set_nopreempt(stream->pinned_ctxs[i]);
+	}
 }
 
 /**
@@ -3201,8 +3251,12 @@ static void i915_perf_disable_locked(struct i915_perf_stream *stream)
 	/* Allow stream->ops->disable() to refer to this */
 	stream->enabled = false;
 
-	if (stream->hold_preemption)
-		intel_context_clear_nopreempt(stream->pinned_ctx);
+	if (stream->hold_preemption) {
+		int i;
+
+		for (i = 0; i < stream->n_ctxs; i++)
+			intel_context_clear_nopreempt(stream->pinned_ctxs[i]);
+	}
 
 	if (stream->ops->disable)
 		stream->ops->disable(stream);
@@ -3220,32 +3274,41 @@ static long i915_perf_config_locked(struct i915_perf_stream *stream,
 		return -EINVAL;
 
 	if (config != stream->oa_config) {
+		struct intel_context *ce = stream->n_ctxs ?
+			stream->pinned_ctxs[0] : stream->engine->kernel_context;
+		int i;
+
 		active = i915_active_create();
 		if (!active) {
 			ret = -ENOMEM;
 			goto err_config;
 		}
 
-		/*
-		 * If OA is bound to a specific context, emit the
-		 * reconfiguration inline from that context. The update
-		 * will then be ordered with respect to submission on that
-		 * context.
-		 *
-		 * When set globally, we use a low priority kernel context,
-		 * so it will effectively take effect when idle.
-		 */
-		ret = emit_oa_config(stream, config,
-				     oa_context(stream),
-				     active,
-				     false /* global */);
+		for (i = 1; i < stream->n_ctxs; i++) {
+			/*
+			 * If OA is bound to a specific context, emit the
+			 * reconfiguration inline from that context. The
+			 * update will then be ordered with respect to
+			 * submission on that context.
+			 *
+			 * When set globally, we use a low priority kernel
+			 * context, so it will effectively take effect when
+			 * idle.
+			 */
+			ret = emit_oa_config(stream, config,
+					     stream->pinned_ctxs[i],
+					     active, false /* global */);
+			if (ret)
+				goto err_active;
+		}
+
+		ret = emit_oa_config(stream, config, ce,
+				     active, false /* global */);
 		if (ret)
 			goto err_active;
 
-		ret = emit_oa_config(stream, config,
-				     oa_context(stream),
-				     active,
-				     true /* global */);
+		ret = emit_oa_config(stream, config, ce,
+				     active, true /* global */);
 		if (ret)
 			goto err_active;
 
@@ -3253,7 +3316,8 @@ static long i915_perf_config_locked(struct i915_perf_stream *stream,
 	}
 
 err_active:
-	i915_active_put(active);
+	if (active)
+		i915_active_put(active);
 err_config:
 	i915_oa_config_put(config);
 
@@ -3334,9 +3398,12 @@ static void i915_perf_destroy_locked(struct i915_perf_stream *stream)
 	if (stream->ops->destroy)
 		stream->ops->destroy(stream);
 
-	if (stream->ctx)
-		i915_gem_context_put(stream->ctx);
+	while (stream->n_ctxs--)
+		i915_gem_context_put(stream->ctxs[stream->n_ctxs]);
 
+	kfree(stream->ctxs);
+	kfree(stream->pinned_ctxs);
+	kfree(stream->ctx_ids);
 	kfree(stream);
 }
 
@@ -3411,25 +3478,12 @@ i915_perf_open_ioctl_locked(struct i915_perf *perf,
 			    struct perf_open_properties *props,
 			    struct drm_file *file)
 {
-	struct i915_gem_context *specific_ctx = NULL;
+	struct drm_i915_file_private *file_priv = file->driver_priv;
 	struct i915_perf_stream *stream = NULL;
 	unsigned long f_flags = 0;
 	bool privileged_op = true;
 	int stream_fd;
-	int ret;
-
-	if (props->single_context) {
-		u32 ctx_handle = props->ctx_handle;
-		struct drm_i915_file_private *file_priv = file->driver_priv;
-
-		specific_ctx = i915_gem_context_lookup(file_priv, ctx_handle);
-		if (!specific_ctx) {
-			DRM_DEBUG("Failed to look up context with ID %u for opening perf stream\n",
-				  ctx_handle);
-			ret = -ENOENT;
-			goto err;
-		}
-	}
+	int i, ret;
 
 	/*
 	 * On Haswell the OA unit supports clock gating off for a specific
@@ -3450,17 +3504,16 @@ i915_perf_open_ioctl_locked(struct i915_perf *perf,
 	 * doesn't request global stream access (i.e. query based sampling
 	 * using MI_RECORD_PERF_COUNT.
 	 */
-	if (IS_HASWELL(perf->i915) && specific_ctx)
+	if (IS_HASWELL(perf->i915) && props->n_ctx_handles > 0)
 		privileged_op = false;
-	else if (IS_GEN(perf->i915, 12) && specific_ctx &&
+	else if (IS_GEN(perf->i915, 12) && (props->n_ctx_handles > 0) &&
 		 (props->sample_flags & SAMPLE_OA_REPORT) == 0)
 		privileged_op = false;
 
 	if (props->hold_preemption) {
-		if (!props->single_context) {
+		if (!props->n_ctx_handles) {
 			DRM_DEBUG("preemption disable with no context\n");
-			ret = -EINVAL;
-			goto err;
+			return -EINVAL;
 		}
 		privileged_op = true;
 	}
@@ -3481,23 +3534,57 @@ i915_perf_open_ioctl_locked(struct i915_perf *perf,
 	if (privileged_op &&
 	    i915_perf_stream_paranoid && !capable(CAP_SYS_ADMIN)) {
 		DRM_DEBUG("Insufficient privileges to open i915 perf stream\n");
-		ret = -EACCES;
-		goto err_ctx;
+		return -EACCES;
 	}
 
 	stream = kzalloc(sizeof(*stream), GFP_KERNEL);
-	if (!stream) {
-		ret = -ENOMEM;
-		goto err_ctx;
+	if (!stream)
+		return -ENOMEM;
+
+	if (props->n_ctx_handles) {
+		gfp_t alloc_flags = GFP_KERNEL | __GFP_ZERO;
+
+		stream->ctxs = kmalloc_array(props->n_ctx_handles,
+					     sizeof(*stream->ctxs),
+					     alloc_flags);
+		if (!stream->ctxs)
+			goto err_ctx;
+
+		stream->pinned_ctxs =
+			kmalloc_array(props->n_ctx_handles,
+				      sizeof(*stream->pinned_ctxs),
+				      alloc_flags);
+		if (!stream->pinned_ctxs)
+			goto err_ctx;
+
+		stream->ctx_ids =
+			kmalloc_array(props->n_ctx_handles,
+				      sizeof(*stream->ctx_ids),
+				      alloc_flags);
+		if (!stream->ctx_ids)
+			goto err_ctx;
 	}
 
 	stream->perf = perf;
-	stream->ctx = specific_ctx;
 	stream->poll_oa_period = props->poll_oa_period;
 
+	for (i = 0; i < props->n_ctx_handles; i++) {
+		stream->ctxs[i] = i915_gem_context_lookup(file_priv,
+							  props->ctx_handles[i]);
+		if (!stream->ctxs[i]) {
+			DRM_DEBUG("Failed to look up context with ID %u for opening perf stream\n",
+				  props->ctx_handles[i]);
+
+			ret = -ENOENT;
+			goto err_ctx;
+		}
+
+		stream->n_ctxs++;
+	}
+
 	ret = i915_oa_stream_init(stream, param, props);
 	if (ret)
-		goto err_alloc;
+		goto err_ctx;
 
 	/* we avoid simply assigning stream->sample_flags = props->sample_flags
 	 * to have _stream_init check the combination of sample flags more
@@ -3532,12 +3619,13 @@ i915_perf_open_ioctl_locked(struct i915_perf *perf,
 err_flags:
 	if (stream->ops->destroy)
 		stream->ops->destroy(stream);
-err_alloc:
-	kfree(stream);
 err_ctx:
-	if (specific_ctx)
-		i915_gem_context_put(specific_ctx);
-err:
+	while (stream->n_ctxs--)
+		i915_gem_context_put(stream->ctxs[stream->n_ctxs]);
+	kfree(stream->ctxs);
+	kfree(stream->pinned_ctxs);
+	kfree(stream->ctx_ids);
+	kfree(stream);
 	return ret;
 }
 
@@ -3569,7 +3657,7 @@ static int read_properties_unlocked(struct i915_perf *perf,
 {
 	u64 __user *uprop = uprops;
 	u32 i;
-	int ret;
+	int err;
 
 	memset(props, 0, sizeof(struct perf_open_properties));
 	props->poll_oa_period = DEFAULT_POLL_PERIOD_NS;
@@ -3603,23 +3691,34 @@ static int read_properties_unlocked(struct i915_perf *perf,
 		u64 oa_period, oa_freq_hz;
 		u64 id, value;
 
-		ret = get_user(id, uprop);
-		if (ret)
-			return ret;
+		err = get_user(id, uprop);
+		if (err)
+			goto error;
 
-		ret = get_user(value, uprop + 1);
-		if (ret)
-			return ret;
+		err = get_user(value, uprop + 1);
+		if (err)
+			goto error;
 
 		if (id == 0 || id >= DRM_I915_PERF_PROP_MAX) {
 			DRM_DEBUG("Unknown i915 perf property ID\n");
-			return -EINVAL;
+			err = -EINVAL;
+			goto error;
 		}
 
 		switch ((enum drm_i915_perf_property_id)id) {
 		case DRM_I915_PERF_PROP_CTX_HANDLE:
-			props->single_context = 1;
-			props->ctx_handle = value;
+			if (props->n_ctx_handles > 0) {
+				DRM_DEBUG("Context handle specified multiple times\n");
+				err = -EINVAL;
+				goto error;
+			}
+			props->ctx_handles = kmalloc_array(1, sizeof(*props->ctx_handles), GFP_KERNEL);
+			if (!props->ctx_handles) {
+				err = -ENOMEM;
+				goto error;
+			}
+			props->ctx_handles[0] = value;
+			props->n_ctx_handles = 1;
 			break;
 		case DRM_I915_PERF_PROP_SAMPLE_OA:
 			if (value)
@@ -3628,7 +3727,8 @@ static int read_properties_unlocked(struct i915_perf *perf,
 		case DRM_I915_PERF_PROP_OA_METRICS_SET:
 			if (value == 0) {
 				DRM_DEBUG("Unknown OA metric set ID\n");
-				return -EINVAL;
+				err = -EINVAL;
+				goto error;
 			}
 			props->metrics_set = value;
 			break;
@@ -3636,12 +3736,14 @@ static int read_properties_unlocked(struct i915_perf *perf,
 			if (value == 0 || value >= I915_OA_FORMAT_MAX) {
 				DRM_DEBUG("Out-of-range OA report format %llu\n",
 					  value);
-				return -EINVAL;
+				err = -EINVAL;
+				goto error;
 			}
 			if (!perf->oa_formats[value].size) {
 				DRM_DEBUG("Unsupported OA report format %llu\n",
 					  value);
-				return -EINVAL;
+				err = -EINVAL;
+				goto error;
 			}
 			props->oa_format = value;
 			break;
@@ -3649,7 +3751,8 @@ static int read_properties_unlocked(struct i915_perf *perf,
 			if (value > OA_EXPONENT_MAX) {
 				DRM_DEBUG("OA timer exponent too high (> %u)\n",
 					 OA_EXPONENT_MAX);
-				return -EINVAL;
+				err = -EINVAL;
+				goto error;
 			}
 
 			/* Theoretically we can program the OA unit to sample
@@ -3678,7 +3781,8 @@ static int read_properties_unlocked(struct i915_perf *perf,
 			    !capable(CAP_SYS_ADMIN)) {
 				DRM_DEBUG("OA exponent would exceed the max sampling frequency (sysctl dev.i915.oa_max_sample_rate) %uHz without root privileges\n",
 					  i915_oa_max_sample_rate);
-				return -EACCES;
+				err = -EACCES;
+				goto error;
 			}
 
 			props->oa_periodic = true;
@@ -3694,13 +3798,14 @@ static int read_properties_unlocked(struct i915_perf *perf,
 					   u64_to_user_ptr(value),
 					   sizeof(user_sseu))) {
 				DRM_DEBUG("Unable to copy global sseu parameter\n");
-				return -EFAULT;
+				err = -EFAULT;
+				goto error;
 			}
 
-			ret = get_sseu_config(&props->sseu, props->engine, &user_sseu);
-			if (ret) {
+			err = get_sseu_config(&props->sseu, props->engine, &user_sseu);
+			if (err) {
 				DRM_DEBUG("Invalid SSEU configuration\n");
-				return ret;
+				goto error;
 			}
 			props->has_sseu = true;
 			break;
@@ -3709,19 +3814,25 @@ static int read_properties_unlocked(struct i915_perf *perf,
 			if (value < 100000 /* 100us */) {
 				DRM_DEBUG("OA availability timer too small (%lluns < 100us)\n",
 					  value);
-				return -EINVAL;
+				err = -EINVAL;
+				goto error;
 			}
 			props->poll_oa_period = value;
 			break;
 		case DRM_I915_PERF_PROP_MAX:
 			MISSING_CASE(id);
-			return -EINVAL;
+			err = -EINVAL;
+			goto error;
 		}
 
 		uprop += 2;
 	}
 
 	return 0;
+
+error:
+	kfree(props->ctx_handles);
+	return err;
 }
 
 /**
@@ -3781,6 +3892,8 @@ int i915_perf_open_ioctl(struct drm_device *dev, void *data,
 	ret = i915_perf_open_ioctl_locked(perf, param, &props, file);
 	mutex_unlock(&perf->lock);
 
+	kfree(props.ctx_handles);
+
 	return ret;
 }
 
diff --git a/drivers/gpu/drm/i915/i915_perf_types.h b/drivers/gpu/drm/i915/i915_perf_types.h
index a36a455ae336..e010ae2d9d66 100644
--- a/drivers/gpu/drm/i915/i915_perf_types.h
+++ b/drivers/gpu/drm/i915/i915_perf_types.h
@@ -160,10 +160,15 @@ struct i915_perf_stream {
 	int sample_size;
 
 	/**
-	 * @ctx: %NULL if measuring system-wide across all contexts or a
-	 * specific context that is being monitored.
+	 * @n_ctxs: Number of contexts pinned for the recording.
 	 */
-	struct i915_gem_context *ctx;
+	u32 n_ctxs;
+
+	/**
+	 * @ctxs: All to %NULL if measuring system-wide across all contexts or
+	 * a list specific contexts that are being monitored.
+	 */
+	struct i915_gem_context **ctxs;
 
 	/**
 	 * @enabled: Whether the stream is currently enabled, considering
@@ -200,17 +205,17 @@ struct i915_perf_stream {
 	/**
 	 * @pinned_ctx: The OA context specific information.
 	 */
-	struct intel_context *pinned_ctx;
+	struct intel_context **pinned_ctxs;
 
 	/**
-	 * @specific_ctx_id: The id of the specific context.
+	 * @ctx_id: The ids of the specific contexts.
 	 */
-	u32 specific_ctx_id;
+	u32 *ctx_ids;
 
 	/**
-	 * @specific_ctx_id_mask: The mask used to masking specific_ctx_id bits.
+	 * @ctx_id_mask: The mask used to masking specific_ctx_id bits.
 	 */
-	u32 specific_ctx_id_mask;
+	u32 ctx_id_mask;
 
 	/**
 	 * @poll_check_timer: High resolution timer that will periodically
@@ -246,7 +251,7 @@ struct i915_perf_stream {
 	struct {
 		struct i915_vma *vma;
 		u8 *vaddr;
-		u32 last_ctx_id;
+		bool last_ctx_match;
 		int format;
 		int format_size;
 		int size_exponent;
-- 
2.26.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [Intel-gfx] [PATCH 3/3] drm/i915/perf: enable filtering on multiple contexts
  2020-04-06 13:55 [Intel-gfx] [PATCH 1/3] drm/i915/perf: break OA config buffer object in 2 Lionel Landwerlin
  2020-04-06 13:55 ` [Intel-gfx] [PATCH 2/3] drm/i915/perf: prepare driver to receive multiple ctx handles Lionel Landwerlin
@ 2020-04-06 13:55 ` Lionel Landwerlin
  2020-04-06 14:19 ` [Intel-gfx] ✗ Fi.CI.CHECKPATCH: warning for series starting with [1/3] drm/i915/perf: break OA config buffer object in 2 Patchwork
                   ` (3 subsequent siblings)
  5 siblings, 0 replies; 10+ messages in thread
From: Lionel Landwerlin @ 2020-04-06 13:55 UTC (permalink / raw)
  To: intel-gfx

Add 2 new properties to the i915-perf open ioctl to specify an array
of GEM context handles as well as the length of the array.

This can be used by drivers using multiple GEM contexts to implement a
single GL context.

Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
---
 drivers/gpu/drm/i915/i915_perf.c | 58 ++++++++++++++++++++++++++++++--
 include/uapi/drm/i915_drm.h      | 21 ++++++++++++
 2 files changed, 76 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
index 008d2e55f923..48f77a7253bc 100644
--- a/drivers/gpu/drm/i915/i915_perf.c
+++ b/drivers/gpu/drm/i915/i915_perf.c
@@ -3656,7 +3656,8 @@ static int read_properties_unlocked(struct i915_perf *perf,
 				    struct perf_open_properties *props)
 {
 	u64 __user *uprop = uprops;
-	u32 i;
+	u32 __user *uctx_handles = NULL;
+	u32 i, n_uctx_handles = 0;
 	int err;
 
 	memset(props, 0, sizeof(struct perf_open_properties));
@@ -3707,7 +3708,7 @@ static int read_properties_unlocked(struct i915_perf *perf,
 
 		switch ((enum drm_i915_perf_property_id)id) {
 		case DRM_I915_PERF_PROP_CTX_HANDLE:
-			if (props->n_ctx_handles > 0) {
+			if (props->n_ctx_handles > 0 || n_uctx_handles > 0) {
 				DRM_DEBUG("Context handle specified multiple times\n");
 				err = -EINVAL;
 				goto error;
@@ -3819,6 +3820,38 @@ static int read_properties_unlocked(struct i915_perf *perf,
 			}
 			props->poll_oa_period = value;
 			break;
+		case DRM_I915_PERF_PROP_CTX_HANDLE_ARRAY:
+			/* HSW can only filter in HW and only on a single
+			 * context.
+			 */
+			if (IS_HASWELL(perf->i915)) {
+				DRM_DEBUG("Multi context filter not supported on HSW\n");
+				err = -ENODEV;
+				goto error;
+			}
+			uctx_handles = u64_to_user_ptr(value);
+			break;
+		case DRM_I915_PERF_PROP_CTX_HANDLE_ARRAY_LENGTH:
+			if (IS_HASWELL(perf->i915)) {
+				DRM_DEBUG("Multi context filter not supported on HSW\n");
+				err = -ENODEV;
+				goto error;
+			}
+			if (props->n_ctx_handles > 0 || n_uctx_handles > 0) {
+				DRM_DEBUG("Context handle specified multiple times\n");
+				err = -EINVAL;
+				goto error;
+			}
+			props->ctx_handles =
+				kmalloc_array(value,
+					      sizeof(*props->ctx_handles),
+					      GFP_KERNEL);
+			if (!props->ctx_handles) {
+				err = -ENOMEM;
+				goto error;
+			}
+			n_uctx_handles = value;
+			break;
 		case DRM_I915_PERF_PROP_MAX:
 			MISSING_CASE(id);
 			err = -EINVAL;
@@ -3828,6 +3861,21 @@ static int read_properties_unlocked(struct i915_perf *perf,
 		uprop += 2;
 	}
 
+	if (n_uctx_handles > 0 && props->n_ctx_handles > 0) {
+		DRM_DEBUG("Context handle specified multiple times\n");
+		err = -EINVAL;
+		goto error;
+	}
+
+	for (i = 0; i < n_uctx_handles; i++) {
+		err = get_user(props->ctx_handles[i], uctx_handles);
+		if (err)
+			goto error;
+
+		uctx_handles++;
+		props->n_ctx_handles++;
+	}
+
 	return 0;
 
 error:
@@ -4611,8 +4659,12 @@ int i915_perf_ioctl_version(void)
 	 *
 	 * 5: Add DRM_I915_PERF_PROP_POLL_OA_PERIOD parameter that controls the
 	 *    interval for the hrtimer used to check for OA data.
+	 *
+	 * 6: Add DRM_I915_PERF_PROP_CTX_HANDLE_ARRAY &
+	 *    DRM_I915_PERF_PROP_CTX_HANDLE_ARRAY_LENGTH to allow an
+	 *    application monitor/pin multiple contexts.
 	 */
-	return 5;
+	return 6;
 }
 
 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index 14b67cd6b54b..f80e7932d728 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -1993,6 +1993,27 @@ enum drm_i915_perf_property_id {
 	 */
 	DRM_I915_PERF_PROP_POLL_OA_PERIOD,
 
+	/**
+	 * Specifies an array of u32 GEM context handles to filter reports
+	 * with.
+	 *
+	 * Using this parameter is incompatible with using
+	 * DRM_I915_PERF_PROP_CTX_HANDLE.
+	 *
+	 * This property is available in perf revision 6.
+	 */
+	DRM_I915_PERF_PROP_CTX_HANDLE_ARRAY,
+
+	/**
+	 * Specifies the length of the array specified with
+	 * DRM_I915_PERF_PROP_CTX_HANDLE_ARRAY.
+	 *
+	 * The length must be in the range [1, 4].
+	 *
+	 * This property is available in perf revision 6.
+	 */
+	DRM_I915_PERF_PROP_CTX_HANDLE_ARRAY_LENGTH,
+
 	DRM_I915_PERF_PROP_MAX /* non-ABI */
 };
 
-- 
2.26.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [Intel-gfx] ✗ Fi.CI.CHECKPATCH: warning for series starting with [1/3] drm/i915/perf: break OA config buffer object in 2
  2020-04-06 13:55 [Intel-gfx] [PATCH 1/3] drm/i915/perf: break OA config buffer object in 2 Lionel Landwerlin
  2020-04-06 13:55 ` [Intel-gfx] [PATCH 2/3] drm/i915/perf: prepare driver to receive multiple ctx handles Lionel Landwerlin
  2020-04-06 13:55 ` [Intel-gfx] [PATCH 3/3] drm/i915/perf: enable filtering on multiple contexts Lionel Landwerlin
@ 2020-04-06 14:19 ` Patchwork
  2020-04-06 14:36 ` [Intel-gfx] ✗ Fi.CI.DOCS: " Patchwork
                   ` (2 subsequent siblings)
  5 siblings, 0 replies; 10+ messages in thread
From: Patchwork @ 2020-04-06 14:19 UTC (permalink / raw)
  To: Lionel Landwerlin; +Cc: intel-gfx

== Series Details ==

Series: series starting with [1/3] drm/i915/perf: break OA config buffer object in 2
URL   : https://patchwork.freedesktop.org/series/75550/
State : warning

== Summary ==

$ dim checkpatch origin/drm-tip
d78a933fe510 drm/i915/perf: break OA config buffer object in 2
-:27: CHECK:PREFER_KERNEL_TYPES: Prefer kernel type 'u32' over 'uint32_t'
#27: FILE: drivers/gpu/drm/i915/i915_perf.c:375:
+	uint32_t per_context_offset;

total: 0 errors, 0 warnings, 1 checks, 325 lines checked
122d52d6d979 drm/i915/perf: prepare driver to receive multiple ctx handles
-:53: CHECK:SPACING: No space is necessary after a cast
#53: FILE: drivers/gpu/drm/i915/i915_perf.c:633:
+	return *((int *) elem) - *((int *) key);

-:108: CHECK:BRACES: Blank lines aren't necessary after an open brace '{'
#108: FILE: drivers/gpu/drm/i915/i915_perf.c:814:
+			    reason & OAREPORT_REASON_CTX_SWITCH) {
+

-:903: WARNING:LONG_LINE: line over 100 characters
#903: FILE: drivers/gpu/drm/i915/i915_perf.c:3715:
+			props->ctx_handles = kmalloc_array(1, sizeof(*props->ctx_handles), GFP_KERNEL);

total: 0 errors, 1 warnings, 2 checks, 1007 lines checked
18728c0bc637 drm/i915/perf: enable filtering on multiple contexts

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 10+ messages in thread

* [Intel-gfx] ✗ Fi.CI.DOCS: warning for series starting with [1/3] drm/i915/perf: break OA config buffer object in 2
  2020-04-06 13:55 [Intel-gfx] [PATCH 1/3] drm/i915/perf: break OA config buffer object in 2 Lionel Landwerlin
                   ` (2 preceding siblings ...)
  2020-04-06 14:19 ` [Intel-gfx] ✗ Fi.CI.CHECKPATCH: warning for series starting with [1/3] drm/i915/perf: break OA config buffer object in 2 Patchwork
@ 2020-04-06 14:36 ` Patchwork
  2020-04-06 14:43 ` [Intel-gfx] ✓ Fi.CI.BAT: success " Patchwork
  2020-04-06 19:11 ` [Intel-gfx] ✗ Fi.CI.IGT: failure " Patchwork
  5 siblings, 0 replies; 10+ messages in thread
From: Patchwork @ 2020-04-06 14:36 UTC (permalink / raw)
  To: Lionel Landwerlin; +Cc: intel-gfx

== Series Details ==

Series: series starting with [1/3] drm/i915/perf: break OA config buffer object in 2
URL   : https://patchwork.freedesktop.org/series/75550/
State : warning

== Summary ==

$ make htmldocs 2>&1 > /dev/null | grep i915
./drivers/gpu/drm/i915/i915_perf_types.h:319: warning: Function parameter or member 'pinned_ctxs' not described in 'i915_perf_stream'
./drivers/gpu/drm/i915/i915_perf_types.h:319: warning: Function parameter or member 'ctx_ids' not described in 'i915_perf_stream'

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 10+ messages in thread

* [Intel-gfx] ✓ Fi.CI.BAT: success for series starting with [1/3] drm/i915/perf: break OA config buffer object in 2
  2020-04-06 13:55 [Intel-gfx] [PATCH 1/3] drm/i915/perf: break OA config buffer object in 2 Lionel Landwerlin
                   ` (3 preceding siblings ...)
  2020-04-06 14:36 ` [Intel-gfx] ✗ Fi.CI.DOCS: " Patchwork
@ 2020-04-06 14:43 ` Patchwork
  2020-04-06 19:11 ` [Intel-gfx] ✗ Fi.CI.IGT: failure " Patchwork
  5 siblings, 0 replies; 10+ messages in thread
From: Patchwork @ 2020-04-06 14:43 UTC (permalink / raw)
  To: Lionel Landwerlin; +Cc: intel-gfx

== Series Details ==

Series: series starting with [1/3] drm/i915/perf: break OA config buffer object in 2
URL   : https://patchwork.freedesktop.org/series/75550/
State : success

== Summary ==

CI Bug Log - changes from CI_DRM_8260 -> Patchwork_17220
====================================================

Summary
-------

  **SUCCESS**

  No regressions found.

  External URL: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17220/index.html

Known issues
------------

  Here are the changes found in Patchwork_17220 that come from known issues:

### IGT changes ###

#### Issues hit ####

  * igt@i915_module_load@reload:
    - fi-skl-6770hq:      [PASS][1] -> [DMESG-WARN][2] ([i915#203]) +1 similar issue
   [1]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8260/fi-skl-6770hq/igt@i915_module_load@reload.html
   [2]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17220/fi-skl-6770hq/igt@i915_module_load@reload.html

  * igt@kms_pipe_crc_basic@read-crc-pipe-a-frame-sequence:
    - fi-skl-6770hq:      [PASS][3] -> [SKIP][4] ([fdo#109271]) +5 similar issues
   [3]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8260/fi-skl-6770hq/igt@kms_pipe_crc_basic@read-crc-pipe-a-frame-sequence.html
   [4]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17220/fi-skl-6770hq/igt@kms_pipe_crc_basic@read-crc-pipe-a-frame-sequence.html

  * igt@kms_pipe_crc_basic@read-crc-pipe-b:
    - fi-skl-6770hq:      [PASS][5] -> [DMESG-WARN][6] ([i915#106])
   [5]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8260/fi-skl-6770hq/igt@kms_pipe_crc_basic@read-crc-pipe-b.html
   [6]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17220/fi-skl-6770hq/igt@kms_pipe_crc_basic@read-crc-pipe-b.html

  
  [fdo#109271]: https://bugs.freedesktop.org/show_bug.cgi?id=109271
  [i915#106]: https://gitlab.freedesktop.org/drm/intel/issues/106
  [i915#203]: https://gitlab.freedesktop.org/drm/intel/issues/203


Participating hosts (53 -> 46)
------------------------------

  Missing    (7): fi-ilk-m540 fi-hsw-4200u fi-byt-squawks fi-bsw-cyan fi-ctg-p8600 fi-byt-clapper fi-bdw-samus 


Build changes
-------------

  * CI: CI-20190529 -> None
  * Linux: CI_DRM_8260 -> Patchwork_17220

  CI-20190529: 20190529
  CI_DRM_8260: fa5519e01f097b7f69259be38606ff5f1bc3cc6c @ git://anongit.freedesktop.org/gfx-ci/linux
  IGT_5572: 6c124b5c8501d900966c033ac86c3dc55c16a2da @ git://anongit.freedesktop.org/xorg/app/intel-gpu-tools
  Patchwork_17220: 18728c0bc637aab4293092615a268bbd3e9f83a5 @ git://anongit.freedesktop.org/gfx-ci/linux


== Linux commits ==

18728c0bc637 drm/i915/perf: enable filtering on multiple contexts
122d52d6d979 drm/i915/perf: prepare driver to receive multiple ctx handles
d78a933fe510 drm/i915/perf: break OA config buffer object in 2

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17220/index.html
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 10+ messages in thread

* [Intel-gfx] ✗ Fi.CI.IGT: failure for series starting with [1/3] drm/i915/perf: break OA config buffer object in 2
  2020-04-06 13:55 [Intel-gfx] [PATCH 1/3] drm/i915/perf: break OA config buffer object in 2 Lionel Landwerlin
                   ` (4 preceding siblings ...)
  2020-04-06 14:43 ` [Intel-gfx] ✓ Fi.CI.BAT: success " Patchwork
@ 2020-04-06 19:11 ` Patchwork
  5 siblings, 0 replies; 10+ messages in thread
From: Patchwork @ 2020-04-06 19:11 UTC (permalink / raw)
  To: Lionel Landwerlin; +Cc: intel-gfx

== Series Details ==

Series: series starting with [1/3] drm/i915/perf: break OA config buffer object in 2
URL   : https://patchwork.freedesktop.org/series/75550/
State : failure

== Summary ==

CI Bug Log - changes from CI_DRM_8260_full -> Patchwork_17220_full
====================================================

Summary
-------

  **FAILURE**

  Serious unknown changes coming with Patchwork_17220_full absolutely need to be
  verified manually.
  
  If you think the reported changes have nothing to do with the changes
  introduced in Patchwork_17220_full, please notify your bug team to allow them
  to document this new failure mode, which will reduce false positives in CI.

  

Possible new issues
-------------------

  Here are the unknown changes that may have been introduced in Patchwork_17220_full:

### IGT changes ###

#### Possible regressions ####

  * igt@gem_ctx_persistence@engines-mixed-process@vcs1:
    - shard-tglb:         [PASS][1] -> [FAIL][2]
   [1]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8260/shard-tglb8/igt@gem_ctx_persistence@engines-mixed-process@vcs1.html
   [2]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17220/shard-tglb7/igt@gem_ctx_persistence@engines-mixed-process@vcs1.html

  
Known issues
------------

  Here are the changes found in Patchwork_17220_full that come from known issues:

### IGT changes ###

#### Issues hit ####

  * igt@gem_workarounds@suspend-resume-context:
    - shard-apl:          [PASS][3] -> [DMESG-WARN][4] ([i915#180]) +4 similar issues
   [3]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8260/shard-apl2/igt@gem_workarounds@suspend-resume-context.html
   [4]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17220/shard-apl8/igt@gem_workarounds@suspend-resume-context.html

  * igt@gem_workarounds@suspend-resume-fd:
    - shard-kbl:          [PASS][5] -> [DMESG-WARN][6] ([i915#180]) +1 similar issue
   [5]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8260/shard-kbl3/igt@gem_workarounds@suspend-resume-fd.html
   [6]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17220/shard-kbl1/igt@gem_workarounds@suspend-resume-fd.html

  * igt@i915_pm_dc@dc6-psr:
    - shard-skl:          [PASS][7] -> [FAIL][8] ([i915#454])
   [7]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8260/shard-skl3/igt@i915_pm_dc@dc6-psr.html
   [8]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17220/shard-skl10/igt@i915_pm_dc@dc6-psr.html

  * igt@i915_pm_rc6_residency@rc6-idle:
    - shard-snb:          [PASS][9] -> [FAIL][10] ([i915#1066])
   [9]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8260/shard-snb4/igt@i915_pm_rc6_residency@rc6-idle.html
   [10]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17220/shard-snb6/igt@i915_pm_rc6_residency@rc6-idle.html

  * igt@kms_cursor_crc@pipe-a-cursor-128x128-onscreen:
    - shard-apl:          [PASS][11] -> [FAIL][12] ([i915#54] / [i915#95])
   [11]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8260/shard-apl7/igt@kms_cursor_crc@pipe-a-cursor-128x128-onscreen.html
   [12]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17220/shard-apl8/igt@kms_cursor_crc@pipe-a-cursor-128x128-onscreen.html

  * igt@kms_flip@2x-dpms-vs-vblank-race:
    - shard-glk:          [PASS][13] -> [FAIL][14] ([i915#407])
   [13]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8260/shard-glk8/igt@kms_flip@2x-dpms-vs-vblank-race.html
   [14]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17220/shard-glk3/igt@kms_flip@2x-dpms-vs-vblank-race.html

  * igt@kms_plane_alpha_blend@pipe-b-coverage-7efc:
    - shard-skl:          [PASS][15] -> [FAIL][16] ([fdo#108145] / [i915#265])
   [15]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8260/shard-skl1/igt@kms_plane_alpha_blend@pipe-b-coverage-7efc.html
   [16]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17220/shard-skl10/igt@kms_plane_alpha_blend@pipe-b-coverage-7efc.html

  * igt@kms_psr@psr2_sprite_render:
    - shard-iclb:         [PASS][17] -> [SKIP][18] ([fdo#109441])
   [17]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8260/shard-iclb2/igt@kms_psr@psr2_sprite_render.html
   [18]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17220/shard-iclb8/igt@kms_psr@psr2_sprite_render.html

  * igt@perf@blocking:
    - shard-hsw:          [PASS][19] -> [INCOMPLETE][20] ([i915#61]) +11 similar issues
   [19]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8260/shard-hsw8/igt@perf@blocking.html
   [20]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17220/shard-hsw6/igt@perf@blocking.html

  * igt@perf@oa-formats:
    - shard-hsw:          [PASS][21] -> [INCOMPLETE][22] ([CI#80] / [i915#61])
   [21]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8260/shard-hsw7/igt@perf@oa-formats.html
   [22]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17220/shard-hsw1/igt@perf@oa-formats.html

  
#### Possible fixes ####

  * igt@gem_media_fill:
    - shard-kbl:          [DMESG-WARN][23] ([i915#165]) -> [PASS][24]
   [23]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8260/shard-kbl2/igt@gem_media_fill.html
   [24]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17220/shard-kbl4/igt@gem_media_fill.html

  * igt@gem_softpin@noreloc-s3:
    - shard-kbl:          [DMESG-WARN][25] ([i915#180] / [i915#93] / [i915#95]) -> [PASS][26]
   [25]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8260/shard-kbl6/igt@gem_softpin@noreloc-s3.html
   [26]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17220/shard-kbl3/igt@gem_softpin@noreloc-s3.html

  * igt@gen9_exec_parse@allowed-all:
    - shard-glk:          [DMESG-WARN][27] ([i915#716]) -> [PASS][28]
   [27]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8260/shard-glk6/igt@gen9_exec_parse@allowed-all.html
   [28]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17220/shard-glk3/igt@gen9_exec_parse@allowed-all.html

  * igt@kms_color@pipe-b-ctm-0-5:
    - shard-kbl:          [DMESG-WARN][29] ([i915#78]) -> [PASS][30] +1 similar issue
   [29]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8260/shard-kbl2/igt@kms_color@pipe-b-ctm-0-5.html
   [30]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17220/shard-kbl4/igt@kms_color@pipe-b-ctm-0-5.html

  * igt@kms_cursor_crc@pipe-a-cursor-128x42-sliding:
    - shard-apl:          [FAIL][31] ([i915#54] / [i915#95]) -> [PASS][32] +1 similar issue
   [31]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8260/shard-apl3/igt@kms_cursor_crc@pipe-a-cursor-128x42-sliding.html
   [32]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17220/shard-apl6/igt@kms_cursor_crc@pipe-a-cursor-128x42-sliding.html

  * igt@kms_dp_aux_dev:
    - shard-iclb:         [DMESG-FAIL][33] ([i915#1645]) -> [PASS][34]
   [33]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8260/shard-iclb7/igt@kms_dp_aux_dev.html
   [34]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17220/shard-iclb6/igt@kms_dp_aux_dev.html

  * igt@kms_fbcon_fbt@fbc:
    - shard-kbl:          [FAIL][35] ([i915#1121] / [i915#64]) -> [PASS][36]
   [35]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8260/shard-kbl2/igt@kms_fbcon_fbt@fbc.html
   [36]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17220/shard-kbl4/igt@kms_fbcon_fbt@fbc.html

  * igt@kms_flip@2x-wf_vblank-ts-check:
    - shard-glk:          [FAIL][37] ([i915#34]) -> [PASS][38]
   [37]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8260/shard-glk2/igt@kms_flip@2x-wf_vblank-ts-check.html
   [38]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17220/shard-glk9/igt@kms_flip@2x-wf_vblank-ts-check.html

  * igt@kms_hdr@bpc-switch-suspend:
    - shard-skl:          [FAIL][39] ([i915#1188]) -> [PASS][40]
   [39]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8260/shard-skl8/igt@kms_hdr@bpc-switch-suspend.html
   [40]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17220/shard-skl9/igt@kms_hdr@bpc-switch-suspend.html

  * igt@kms_plane@plane-panning-bottom-right-suspend-pipe-a-planes:
    - shard-kbl:          [DMESG-WARN][41] ([i915#180]) -> [PASS][42]
   [41]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8260/shard-kbl3/igt@kms_plane@plane-panning-bottom-right-suspend-pipe-a-planes.html
   [42]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17220/shard-kbl2/igt@kms_plane@plane-panning-bottom-right-suspend-pipe-a-planes.html

  * igt@kms_plane@plane-panning-bottom-right-suspend-pipe-b-planes:
    - shard-apl:          [DMESG-WARN][43] ([i915#180]) -> [PASS][44] +3 similar issues
   [43]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8260/shard-apl4/igt@kms_plane@plane-panning-bottom-right-suspend-pipe-b-planes.html
   [44]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17220/shard-apl3/igt@kms_plane@plane-panning-bottom-right-suspend-pipe-b-planes.html

  * igt@kms_plane_lowres@pipe-a-tiling-x:
    - shard-glk:          [FAIL][45] ([i915#899]) -> [PASS][46]
   [45]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8260/shard-glk7/igt@kms_plane_lowres@pipe-a-tiling-x.html
   [46]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17220/shard-glk2/igt@kms_plane_lowres@pipe-a-tiling-x.html

  * igt@kms_psr@psr2_dpms:
    - shard-iclb:         [SKIP][47] ([fdo#109441]) -> [PASS][48] +1 similar issue
   [47]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8260/shard-iclb3/igt@kms_psr@psr2_dpms.html
   [48]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17220/shard-iclb2/igt@kms_psr@psr2_dpms.html

  * igt@kms_setmode@basic:
    - shard-apl:          [FAIL][49] ([i915#31]) -> [PASS][50]
   [49]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8260/shard-apl7/igt@kms_setmode@basic.html
   [50]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17220/shard-apl4/igt@kms_setmode@basic.html

  * igt@perf@gen12-mi-rpc:
    - shard-tglb:         [FAIL][51] ([i915#1085]) -> [PASS][52]
   [51]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8260/shard-tglb7/igt@perf@gen12-mi-rpc.html
   [52]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17220/shard-tglb8/igt@perf@gen12-mi-rpc.html

  
  {name}: This element is suppressed. This means it is ignored when computing
          the status of the difference (SUCCESS, WARNING, or FAILURE).

  [CI#80]: https://gitlab.freedesktop.org/gfx-ci/i915-infra/issues/80
  [fdo#108145]: https://bugs.freedesktop.org/show_bug.cgi?id=108145
  [fdo#109441]: https://bugs.freedesktop.org/show_bug.cgi?id=109441
  [i915#1066]: https://gitlab.freedesktop.org/drm/intel/issues/1066
  [i915#1085]: https://gitlab.freedesktop.org/drm/intel/issues/1085
  [i915#1121]: https://gitlab.freedesktop.org/drm/intel/issues/1121
  [i915#1188]: https://gitlab.freedesktop.org/drm/intel/issues/1188
  [i915#1542]: https://gitlab.freedesktop.org/drm/intel/issues/1542
  [i915#1645]: https://gitlab.freedesktop.org/drm/intel/issues/1645
  [i915#165]: https://gitlab.freedesktop.org/drm/intel/issues/165
  [i915#180]: https://gitlab.freedesktop.org/drm/intel/issues/180
  [i915#265]: https://gitlab.freedesktop.org/drm/intel/issues/265
  [i915#31]: https://gitlab.freedesktop.org/drm/intel/issues/31
  [i915#34]: https://gitlab.freedesktop.org/drm/intel/issues/34
  [i915#407]: https://gitlab.freedesktop.org/drm/intel/issues/407
  [i915#454]: https://gitlab.freedesktop.org/drm/intel/issues/454
  [i915#54]: https://gitlab.freedesktop.org/drm/intel/issues/54
  [i915#61]: https://gitlab.freedesktop.org/drm/intel/issues/61
  [i915#64]: https://gitlab.freedesktop.org/drm/intel/issues/64
  [i915#716]: https://gitlab.freedesktop.org/drm/intel/issues/716
  [i915#78]: https://gitlab.freedesktop.org/drm/intel/issues/78
  [i915#899]: https://gitlab.freedesktop.org/drm/intel/issues/899
  [i915#93]: https://gitlab.freedesktop.org/drm/intel/issues/93
  [i915#95]: https://gitlab.freedesktop.org/drm/intel/issues/95


Participating hosts (10 -> 10)
------------------------------

  No changes in participating hosts


Build changes
-------------

  * CI: CI-20190529 -> None
  * Linux: CI_DRM_8260 -> Patchwork_17220

  CI-20190529: 20190529
  CI_DRM_8260: fa5519e01f097b7f69259be38606ff5f1bc3cc6c @ git://anongit.freedesktop.org/gfx-ci/linux
  IGT_5572: 6c124b5c8501d900966c033ac86c3dc55c16a2da @ git://anongit.freedesktop.org/xorg/app/intel-gpu-tools
  Patchwork_17220: 18728c0bc637aab4293092615a268bbd3e9f83a5 @ git://anongit.freedesktop.org/gfx-ci/linux
  piglit_4509: fdc5a4ca11124ab8413c7988896eec4c97336694 @ git://anongit.freedesktop.org/piglit

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17220/index.html
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [Intel-gfx] [PATCH 1/3] drm/i915/perf: break OA config buffer object in 2
  2020-03-27 10:40   ` Chris Wilson
@ 2020-03-30 13:11     ` Lionel Landwerlin
  0 siblings, 0 replies; 10+ messages in thread
From: Lionel Landwerlin @ 2020-03-30 13:11 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx

On 27/03/2020 12:40, Chris Wilson wrote:
> Quoting Lionel Landwerlin (2020-03-27 10:32:07)
>> We want to enable performance monitoring on multiple contexts to cover
>> the Iris use case of using 2 GEM contexts (3D & compute).
>>
>> So start by breaking the OA configuration BO which contains global &
>> per context register writes.
>>
>> NOA muxes & OA configurations are global, while FLEXEU register
>> configurations are per context.
>>
>> Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
>> ---
>>   drivers/gpu/drm/i915/i915_perf.c | 194 ++++++++++++++++++++++---------
>>   1 file changed, 137 insertions(+), 57 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
>> index 3222f6cd8255..f524f50abdef 100644
>> --- a/drivers/gpu/drm/i915/i915_perf.c
>> +++ b/drivers/gpu/drm/i915/i915_perf.c
>> @@ -376,7 +376,8 @@ struct i915_oa_config_bo {
>>          struct llist_node node;
>>   
>>          struct i915_oa_config *oa_config;
>> -       struct i915_vma *vma;
>> +       struct i915_vma *ctx_vma;
>> +       struct i915_vma *global_vma;
> What's the allocation like? Worth packing into one vma and use an
> offset?
> -Chris

Good point, thanks!


-Lionel

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [Intel-gfx] [PATCH 1/3] drm/i915/perf: break OA config buffer object in 2
  2020-03-27 10:32 ` [Intel-gfx] [PATCH 1/3] drm/i915/perf: break OA config buffer object in 2 Lionel Landwerlin
@ 2020-03-27 10:40   ` Chris Wilson
  2020-03-30 13:11     ` Lionel Landwerlin
  0 siblings, 1 reply; 10+ messages in thread
From: Chris Wilson @ 2020-03-27 10:40 UTC (permalink / raw)
  To: Lionel Landwerlin, intel-gfx

Quoting Lionel Landwerlin (2020-03-27 10:32:07)
> We want to enable performance monitoring on multiple contexts to cover
> the Iris use case of using 2 GEM contexts (3D & compute).
> 
> So start by breaking the OA configuration BO which contains global &
> per context register writes.
> 
> NOA muxes & OA configurations are global, while FLEXEU register
> configurations are per context.
> 
> Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
> ---
>  drivers/gpu/drm/i915/i915_perf.c | 194 ++++++++++++++++++++++---------
>  1 file changed, 137 insertions(+), 57 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
> index 3222f6cd8255..f524f50abdef 100644
> --- a/drivers/gpu/drm/i915/i915_perf.c
> +++ b/drivers/gpu/drm/i915/i915_perf.c
> @@ -376,7 +376,8 @@ struct i915_oa_config_bo {
>         struct llist_node node;
>  
>         struct i915_oa_config *oa_config;
> -       struct i915_vma *vma;
> +       struct i915_vma *ctx_vma;
> +       struct i915_vma *global_vma;

What's the allocation like? Worth packing into one vma and use an
offset?
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 10+ messages in thread

* [Intel-gfx] [PATCH 1/3] drm/i915/perf: break OA config buffer object in 2
  2020-03-27 10:32 [Intel-gfx] [PATCH 0/3] drm/i915/perf: add support for multi context filtering Lionel Landwerlin
@ 2020-03-27 10:32 ` Lionel Landwerlin
  2020-03-27 10:40   ` Chris Wilson
  0 siblings, 1 reply; 10+ messages in thread
From: Lionel Landwerlin @ 2020-03-27 10:32 UTC (permalink / raw)
  To: intel-gfx

We want to enable performance monitoring on multiple contexts to cover
the Iris use case of using 2 GEM contexts (3D & compute).

So start by breaking the OA configuration BO which contains global &
per context register writes.

NOA muxes & OA configurations are global, while FLEXEU register
configurations are per context.

Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
---
 drivers/gpu/drm/i915/i915_perf.c | 194 ++++++++++++++++++++++---------
 1 file changed, 137 insertions(+), 57 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
index 3222f6cd8255..f524f50abdef 100644
--- a/drivers/gpu/drm/i915/i915_perf.c
+++ b/drivers/gpu/drm/i915/i915_perf.c
@@ -376,7 +376,8 @@ struct i915_oa_config_bo {
 	struct llist_node node;
 
 	struct i915_oa_config *oa_config;
-	struct i915_vma *vma;
+	struct i915_vma *ctx_vma;
+	struct i915_vma *global_vma;
 };
 
 static struct ctl_table_header *sysctl_header;
@@ -412,7 +413,8 @@ i915_perf_get_oa_config(struct i915_perf *perf, int metrics_set)
 static void free_oa_config_bo(struct i915_oa_config_bo *oa_bo)
 {
 	i915_oa_config_put(oa_bo->oa_config);
-	i915_vma_put(oa_bo->vma);
+	i915_vma_put(oa_bo->ctx_vma);
+	i915_vma_put(oa_bo->global_vma);
 	kfree(oa_bo);
 }
 
@@ -1868,9 +1870,9 @@ static struct i915_oa_config_bo *
 alloc_oa_config_buffer(struct i915_perf_stream *stream,
 		       struct i915_oa_config *oa_config)
 {
-	struct drm_i915_gem_object *obj;
 	struct i915_oa_config_bo *oa_bo;
-	size_t config_length = 0;
+	struct drm_i915_gem_object *global_obj, *ctx_obj;
+	size_t global_config_length = 0, ctx_config_length;
 	u32 *cs;
 	int err;
 
@@ -1878,27 +1880,26 @@ alloc_oa_config_buffer(struct i915_perf_stream *stream,
 	if (!oa_bo)
 		return ERR_PTR(-ENOMEM);
 
-	config_length += num_lri_dwords(oa_config->mux_regs_len);
-	config_length += num_lri_dwords(oa_config->b_counter_regs_len);
-	config_length += num_lri_dwords(oa_config->flex_regs_len);
-	config_length += 3; /* MI_BATCH_BUFFER_START */
-	config_length = ALIGN(sizeof(u32) * config_length, I915_GTT_PAGE_SIZE);
-
-	obj = i915_gem_object_create_shmem(stream->perf->i915, config_length);
-	if (IS_ERR(obj)) {
-		err = PTR_ERR(obj);
+	/* Global configuration requires a wait for it to apply. */
+	global_config_length += num_lri_dwords(oa_config->mux_regs_len);
+	global_config_length += num_lri_dwords(oa_config->b_counter_regs_len);
+	global_config_length += 3; /* MI_BATCH_BUFFER_START */
+	global_config_length = ALIGN(sizeof(u32) * global_config_length,
+				     I915_GTT_PAGE_SIZE);
+
+	global_obj = i915_gem_object_create_shmem(stream->perf->i915,
+						  global_config_length);
+	if (IS_ERR(global_obj)) {
+		err = PTR_ERR(global_obj);
 		goto err_free;
 	}
 
-	cs = i915_gem_object_pin_map(obj, I915_MAP_WB);
+	cs = i915_gem_object_pin_map(global_obj, I915_MAP_WB);
 	if (IS_ERR(cs)) {
 		err = PTR_ERR(cs);
-		goto err_oa_bo;
+		goto err_global_bo;
 	}
 
-	cs = write_cs_mi_lri(cs,
-			     oa_config->mux_regs,
-			     oa_config->mux_regs_len);
 	cs = write_cs_mi_lri(cs,
 			     oa_config->b_counter_regs,
 			     oa_config->b_counter_regs_len);
@@ -1913,15 +1914,51 @@ alloc_oa_config_buffer(struct i915_perf_stream *stream,
 	*cs++ = i915_ggtt_offset(stream->noa_wait);
 	*cs++ = 0;
 
-	i915_gem_object_flush_map(obj);
-	i915_gem_object_unpin_map(obj);
+	i915_gem_object_flush_map(global_obj);
+	i915_gem_object_unpin_map(global_obj);
+
+	oa_bo->global_vma = i915_vma_instance(global_obj,
+					      &stream->engine->gt->ggtt->vm,
+					      NULL);
+	if (IS_ERR(oa_bo->global_vma)) {
+		err = PTR_ERR(oa_bo->global_vma);
+		goto err_global_bo;
+	}
+
+	/* There is no known delay needed for the per context registers. */
+	ctx_config_length = 1 /* MI_BATCH_BUFFER_END */ +
+		num_lri_dwords(oa_config->flex_regs_len);
+	ctx_config_length = ALIGN(sizeof(u32) * ctx_config_length,
+				  I915_GTT_PAGE_SIZE);
+
+	ctx_obj = i915_gem_object_create_shmem(stream->perf->i915,
+					       ctx_config_length);
+	if (IS_ERR(ctx_obj)) {
+		err = PTR_ERR(ctx_obj);
+		goto err_global_vma;
+	}
+
+	cs = i915_gem_object_pin_map(ctx_obj, I915_MAP_WB);
+	if (IS_ERR(cs)) {
+		err = PTR_ERR(cs);
+		goto err_global_vma;
+	}
+
+	cs = write_cs_mi_lri(cs,
+			     oa_config->mux_regs,
+			     oa_config->mux_regs_len);
+
+	*cs++ = MI_BATCH_BUFFER_END;
+
+	i915_gem_object_flush_map(ctx_obj);
+	i915_gem_object_unpin_map(ctx_obj);
 
-	oa_bo->vma = i915_vma_instance(obj,
-				       &stream->engine->gt->ggtt->vm,
-				       NULL);
-	if (IS_ERR(oa_bo->vma)) {
-		err = PTR_ERR(oa_bo->vma);
-		goto err_oa_bo;
+	oa_bo->ctx_vma = i915_vma_instance(ctx_obj,
+					   &stream->engine->gt->ggtt->vm,
+					   NULL);
+	if (IS_ERR(oa_bo->ctx_vma)) {
+		err = PTR_ERR(oa_bo->ctx_vma);
+		goto err_ctx_bo;
 	}
 
 	oa_bo->oa_config = i915_oa_config_get(oa_config);
@@ -1929,15 +1966,19 @@ alloc_oa_config_buffer(struct i915_perf_stream *stream,
 
 	return oa_bo;
 
-err_oa_bo:
-	i915_gem_object_put(obj);
+err_ctx_bo:
+	i915_gem_object_put(ctx_obj);
+err_global_vma:
+	i915_vma_put(oa_bo->global_vma);
+err_global_bo:
+	i915_gem_object_put(global_obj);
 err_free:
 	kfree(oa_bo);
 	return ERR_PTR(err);
 }
 
-static struct i915_vma *
-get_oa_vma(struct i915_perf_stream *stream, struct i915_oa_config *oa_config)
+static struct i915_oa_config_bo *
+get_oa_bo(struct i915_perf_stream *stream, struct i915_oa_config *oa_config)
 {
 	struct i915_oa_config_bo *oa_bo;
 
@@ -1950,29 +1991,29 @@ get_oa_vma(struct i915_perf_stream *stream, struct i915_oa_config *oa_config)
 		    memcmp(oa_bo->oa_config->uuid,
 			   oa_config->uuid,
 			   sizeof(oa_config->uuid)) == 0)
-			goto out;
+			return oa_bo;
 	}
 
-	oa_bo = alloc_oa_config_buffer(stream, oa_config);
-	if (IS_ERR(oa_bo))
-		return ERR_CAST(oa_bo);
-
-out:
-	return i915_vma_get(oa_bo->vma);
+	return alloc_oa_config_buffer(stream, oa_config);
 }
 
 static struct i915_request *
 emit_oa_config(struct i915_perf_stream *stream,
 	       struct i915_oa_config *oa_config,
-	       struct intel_context *ce)
+	       struct intel_context *ce,
+	       bool global)
 {
+	struct i915_oa_config_bo *oa_bo;
 	struct i915_request *rq;
 	struct i915_vma *vma;
 	int err;
 
-	vma = get_oa_vma(stream, oa_config);
-	if (IS_ERR(vma))
-		return ERR_CAST(vma);
+	oa_bo = get_oa_bo(stream, oa_config);
+	if (IS_ERR(oa_bo))
+		return ERR_CAST(oa_bo);
+
+	vma = global ? i915_vma_get(oa_bo->global_vma) :
+		i915_vma_get(oa_bo->ctx_vma);
 
 	err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL | PIN_HIGH);
 	if (err)
@@ -2019,6 +2060,7 @@ static struct i915_request *
 hsw_enable_metric_set(struct i915_perf_stream *stream)
 {
 	struct intel_uncore *uncore = stream->uncore;
+	struct i915_request *rq;
 
 	/*
 	 * PRM:
@@ -2035,7 +2077,15 @@ hsw_enable_metric_set(struct i915_perf_stream *stream)
 	intel_uncore_rmw(uncore, GEN6_UCGCTL1,
 			 0, GEN6_CSUNIT_CLOCK_GATE_DISABLE);
 
-	return emit_oa_config(stream, stream->oa_config, oa_context(stream));
+	rq = emit_oa_config(stream, stream->oa_config,
+			    stream->engine->kernel_context,
+			    false /* global */);
+	if (IS_ERR(rq))
+		return rq;
+
+	return emit_oa_config(stream, stream->oa_config,
+			      stream->engine->kernel_context,
+			      true /* global */);
 }
 
 static void hsw_disable_metric_set(struct i915_perf_stream *stream)
@@ -2423,6 +2473,7 @@ gen8_enable_metric_set(struct i915_perf_stream *stream)
 {
 	struct intel_uncore *uncore = stream->uncore;
 	struct i915_oa_config *oa_config = stream->oa_config;
+	struct i915_request *rq;
 	int ret;
 
 	/*
@@ -2463,7 +2514,15 @@ gen8_enable_metric_set(struct i915_perf_stream *stream)
 	if (ret)
 		return ERR_PTR(ret);
 
-	return emit_oa_config(stream, oa_config, oa_context(stream));
+	rq = emit_oa_config(stream, oa_config,
+			    stream->engine->kernel_context,
+			    false /* global */);
+	if (IS_ERR(rq))
+		return rq;
+
+	return emit_oa_config(stream, stream->oa_config,
+			      stream->engine->kernel_context,
+			      true /* global */);
 }
 
 static u32 oag_report_ctx_switches(const struct i915_perf_stream *stream)
@@ -2480,6 +2539,7 @@ gen12_enable_metric_set(struct i915_perf_stream *stream)
 	struct i915_oa_config *oa_config = stream->oa_config;
 	bool periodic = stream->periodic;
 	u32 period_exponent = stream->period_exponent;
+	struct i915_request *rq;
 	int ret;
 
 	intel_uncore_write(uncore, GEN12_OAG_OA_DEBUG,
@@ -2508,17 +2568,23 @@ gen12_enable_metric_set(struct i915_perf_stream *stream)
 		return ERR_PTR(ret);
 
 	/*
-	 * For Gen12, performance counters are context
-	 * saved/restored. Only enable it for the context that
-	 * requested this.
+	 * For Gen12, performance counters are also context saved/restored on
+	 * another set of performance registers. Configure the unit dealing
+	 * with those.
 	 */
-	if (stream->ctx) {
-		ret = gen12_configure_oar_context(stream, true);
-		if (ret)
-			return ERR_PTR(ret);
-	}
+	ret = gen12_configure_oar_context(stream, true);
+	if (ret)
+		return ERR_PTR(ret);
+
+	rq = emit_oa_config(stream, oa_config,
+			    stream->engine->kernel_context,
+			    false /* global */);
+	if (IS_ERR(rq))
+		return rq;
 
-	return emit_oa_config(stream, oa_config, oa_context(stream));
+	return emit_oa_config(stream, stream->oa_config,
+			      stream->engine->kernel_context,
+			      true /* global */);
 }
 
 static void gen8_disable_metric_set(struct i915_perf_stream *stream)
@@ -3228,15 +3294,29 @@ static long i915_perf_config_locked(struct i915_perf_stream *stream,
 		 * When set globally, we use a low priority kernel context,
 		 * so it will effectively take effect when idle.
 		 */
-		rq = emit_oa_config(stream, config, oa_context(stream));
-		if (!IS_ERR(rq)) {
-			config = xchg(&stream->oa_config, config);
-			i915_request_put(rq);
-		} else {
+		rq = emit_oa_config(stream, config,
+				    oa_context(stream),
+				    false /* global */);
+		if (IS_ERR(rq)) {
 			ret = PTR_ERR(rq);
+			goto err;
 		}
+
+		i915_request_put(rq);
+
+		rq = emit_oa_config(stream, config,
+				    oa_context(stream),
+				    true /* global */);
+		if (IS_ERR(rq)) {
+			ret = PTR_ERR(rq);
+			goto err;
+		}
+
+		config = xchg(&stream->oa_config, config);
+		i915_request_put(rq);
 	}
 
+err:
 	i915_oa_config_put(config);
 
 	return ret;
-- 
2.26.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 10+ messages in thread

end of thread, other threads:[~2020-04-06 19:11 UTC | newest]

Thread overview: 10+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-04-06 13:55 [Intel-gfx] [PATCH 1/3] drm/i915/perf: break OA config buffer object in 2 Lionel Landwerlin
2020-04-06 13:55 ` [Intel-gfx] [PATCH 2/3] drm/i915/perf: prepare driver to receive multiple ctx handles Lionel Landwerlin
2020-04-06 13:55 ` [Intel-gfx] [PATCH 3/3] drm/i915/perf: enable filtering on multiple contexts Lionel Landwerlin
2020-04-06 14:19 ` [Intel-gfx] ✗ Fi.CI.CHECKPATCH: warning for series starting with [1/3] drm/i915/perf: break OA config buffer object in 2 Patchwork
2020-04-06 14:36 ` [Intel-gfx] ✗ Fi.CI.DOCS: " Patchwork
2020-04-06 14:43 ` [Intel-gfx] ✓ Fi.CI.BAT: success " Patchwork
2020-04-06 19:11 ` [Intel-gfx] ✗ Fi.CI.IGT: failure " Patchwork
  -- strict thread matches above, loose matches on Subject: below --
2020-03-27 10:32 [Intel-gfx] [PATCH 0/3] drm/i915/perf: add support for multi context filtering Lionel Landwerlin
2020-03-27 10:32 ` [Intel-gfx] [PATCH 1/3] drm/i915/perf: break OA config buffer object in 2 Lionel Landwerlin
2020-03-27 10:40   ` Chris Wilson
2020-03-30 13:11     ` Lionel Landwerlin

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).