All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v13 0/6] Per context dynamic (sub)slice power-gating
@ 2018-09-17 11:30 Tvrtko Ursulin
  2018-09-17 11:30 ` [PATCH 1/6] drm/i915/execlists: Move RPCS setup to context pin Tvrtko Ursulin
                   ` (14 more replies)
  0 siblings, 15 replies; 35+ messages in thread
From: Tvrtko Ursulin @ 2018-09-17 11:30 UTC (permalink / raw)
  To: Intel-gfx

From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Fix for Icelake input validation logic and more review feedback.

Lionel Landwerlin (2):
  drm/i915: Record the sseu configuration per-context & engine
  drm/i915/perf: lock powergating configuration to default when active

Tvrtko Ursulin (4):
  drm/i915/execlists: Move RPCS setup to context pin
  drm/i915: Add timeline barrier support
  drm/i915: Expose RPCS (SSEU) configuration to userspace
  drm/i915/icl: Support co-existence between per-context SSEU and OA

 drivers/gpu/drm/i915/i915_drv.h               |  14 +
 drivers/gpu/drm/i915/i915_gem_context.c       | 320 +++++++++++++++++-
 drivers/gpu/drm/i915/i915_gem_context.h       |  10 +
 drivers/gpu/drm/i915/i915_perf.c              |  13 +-
 drivers/gpu/drm/i915/i915_request.c           |  13 +
 drivers/gpu/drm/i915/i915_request.h           |  10 +
 drivers/gpu/drm/i915/i915_timeline.c          |   3 +
 drivers/gpu/drm/i915/i915_timeline.h          |  27 ++
 drivers/gpu/drm/i915/intel_lrc.c              | 100 ++++--
 drivers/gpu/drm/i915/intel_lrc.h              |   2 +
 .../gpu/drm/i915/selftests/mock_timeline.c    |   2 +
 include/uapi/drm/i915_drm.h                   |  43 +++
 12 files changed, 520 insertions(+), 37 deletions(-)

-- 
2.17.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 35+ messages in thread

* [PATCH 1/6] drm/i915/execlists: Move RPCS setup to context pin
  2018-09-17 11:30 [PATCH v13 0/6] Per context dynamic (sub)slice power-gating Tvrtko Ursulin
@ 2018-09-17 11:30 ` Tvrtko Ursulin
  2018-09-17 11:43   ` Chris Wilson
  2018-09-17 11:30 ` [PATCH 2/6] drm/i915: Record the sseu configuration per-context & engine Tvrtko Ursulin
                   ` (13 subsequent siblings)
  14 siblings, 1 reply; 35+ messages in thread
From: Tvrtko Ursulin @ 2018-09-17 11:30 UTC (permalink / raw)
  To: Intel-gfx

From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Configuring RPCS in context image just before pin is sufficient and will
come extra handy in one of the following patches.

v2:
 * Split image setup a bit differently. (Chris Wilson)

v3:
 * Update context image after reset as well - otherwise the application
   of pinned default state clears the RPCS.

v4:
 * Use local variable throughout the function. (Chris Wilson)

Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Suggested-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk> # v2
---
 drivers/gpu/drm/i915/intel_lrc.c | 45 ++++++++++++++++++++------------
 1 file changed, 28 insertions(+), 17 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index a51be16ddaac..54446b6aeb7a 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -1305,6 +1305,24 @@ static int __context_pin(struct i915_gem_context *ctx, struct i915_vma *vma)
 	return i915_vma_pin(vma, 0, 0, flags);
 }
 
+static u32 make_rpcs(struct drm_i915_private *dev_priv);
+
+static void
+__execlists_update_reg_state(struct intel_engine_cs *engine,
+			     struct intel_context *ce)
+{
+	u32 *regs = ce->lrc_reg_state;
+	struct intel_ring *ring = ce->ring;
+
+	regs[CTX_RING_BUFFER_START + 1] = i915_ggtt_offset(ring->vma);
+	regs[CTX_RING_HEAD + 1] = ring->head;
+	regs[CTX_RING_TAIL + 1] = ring->tail;
+
+	/* RPCS */
+	if (engine->class == RENDER_CLASS)
+		regs[CTX_R_PWR_CLK_STATE + 1] = make_rpcs(engine->i915);
+}
+
 static struct intel_context *
 __execlists_context_pin(struct intel_engine_cs *engine,
 			struct i915_gem_context *ctx,
@@ -1343,10 +1361,8 @@ __execlists_context_pin(struct intel_engine_cs *engine,
 	GEM_BUG_ON(!intel_ring_offset_valid(ce->ring, ce->ring->head));
 
 	ce->lrc_reg_state = vaddr + LRC_STATE_PN * PAGE_SIZE;
-	ce->lrc_reg_state[CTX_RING_BUFFER_START+1] =
-		i915_ggtt_offset(ce->ring->vma);
-	ce->lrc_reg_state[CTX_RING_HEAD + 1] = ce->ring->head;
-	ce->lrc_reg_state[CTX_RING_TAIL + 1] = ce->ring->tail;
+
+	__execlists_update_reg_state(engine, ce);
 
 	ce->state->obj->pin_global++;
 	i915_gem_context_get(ctx);
@@ -1955,14 +1971,14 @@ static void execlists_reset(struct intel_engine_cs *engine,
 		       engine->pinned_default_state + LRC_STATE_PN * PAGE_SIZE,
 		       engine->context_size - PAGE_SIZE);
 	}
-	execlists_init_reg_state(regs,
-				 request->gem_context, engine, request->ring);
 
 	/* Move the RING_HEAD onto the breadcrumb, past the hanging batch */
-	regs[CTX_RING_BUFFER_START + 1] = i915_ggtt_offset(request->ring->vma);
-
 	request->ring->head = intel_ring_wrap(request->ring, request->postfix);
-	regs[CTX_RING_HEAD + 1] = request->ring->head;
+
+	execlists_init_reg_state(regs, request->gem_context, engine,
+				 request->ring);
+
+	__execlists_update_reg_state(engine, request->hw_context);
 
 	intel_ring_update_space(request->ring);
 
@@ -2710,8 +2726,7 @@ static void execlists_init_reg_state(u32 *regs,
 
 	if (rcs) {
 		regs[CTX_LRI_HEADER_2] = MI_LOAD_REGISTER_IMM(1);
-		CTX_REG(regs, CTX_R_PWR_CLK_STATE, GEN8_R_PWR_CLK_STATE,
-			make_rpcs(dev_priv));
+		CTX_REG(regs, CTX_R_PWR_CLK_STATE, GEN8_R_PWR_CLK_STATE, 0);
 
 		i915_oa_init_reg_state(engine, ctx, regs);
 	}
@@ -2872,12 +2887,8 @@ void intel_lr_context_resume(struct drm_i915_private *i915)
 
 			intel_ring_reset(ce->ring, 0);
 
-			if (ce->pin_count) { /* otherwise done in context_pin */
-				u32 *regs = ce->lrc_reg_state;
-
-				regs[CTX_RING_HEAD + 1] = ce->ring->head;
-				regs[CTX_RING_TAIL + 1] = ce->ring->tail;
-			}
+			if (ce->pin_count) /* otherwise done in context_pin */
+				__execlists_update_reg_state(engine, ce);
 		}
 	}
 }
-- 
2.17.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 35+ messages in thread

* [PATCH 2/6] drm/i915: Record the sseu configuration per-context & engine
  2018-09-17 11:30 [PATCH v13 0/6] Per context dynamic (sub)slice power-gating Tvrtko Ursulin
  2018-09-17 11:30 ` [PATCH 1/6] drm/i915/execlists: Move RPCS setup to context pin Tvrtko Ursulin
@ 2018-09-17 11:30 ` Tvrtko Ursulin
  2018-09-17 11:30 ` [PATCH 3/6] drm/i915/perf: lock powergating configuration to default when active Tvrtko Ursulin
                   ` (12 subsequent siblings)
  14 siblings, 0 replies; 35+ messages in thread
From: Tvrtko Ursulin @ 2018-09-17 11:30 UTC (permalink / raw)
  To: Intel-gfx

From: Lionel Landwerlin <lionel.g.landwerlin@intel.com>

We want to expose the ability to reconfigure the slices, subslice and
eu per context and per engine. To facilitate that, store the current
configuration on the context for each engine, which is initially set
to the device default upon creation.

v2: record sseu configuration per context & engine (Chris)

v3: introduce the i915_gem_context_sseu to store powergating
    programming, sseu_dev_info has grown quite a bit (Lionel)

v4: rename i915_gem_sseu into intel_sseu (Chris)
    use to_intel_context() (Chris)

v5: More to_intel_context() (Tvrtko)
    Switch intel_sseu from union to struct (Tvrtko)
    Move context default sseu in existing loop (Chris)

v6: s/intel_sseu_from_device_sseu/intel_device_default_sseu/ (Tvrtko)

Tvrtko Ursulin:

v7:
 * Pass intel_sseu by pointer instead of value to make_rpcs.
 * Rebase for make_rpcs changes.

v8:
 * Rebase for RPCS edit on pin.

v9:
 * Rebase for context image setup changes.

v10:
 * Rename dev_priv to i915. (Chris Wilson)

v11:
 * Rebase.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_drv.h         | 14 +++++++++++
 drivers/gpu/drm/i915/i915_gem_context.c |  2 ++
 drivers/gpu/drm/i915/i915_gem_context.h |  4 ++++
 drivers/gpu/drm/i915/i915_request.h     | 10 ++++++++
 drivers/gpu/drm/i915/intel_lrc.c        | 31 +++++++++++++------------
 5 files changed, 46 insertions(+), 15 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 7d4daa7412f1..6b7ae63e47c3 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -3479,6 +3479,20 @@ mkwrite_device_info(struct drm_i915_private *dev_priv)
 	return (struct intel_device_info *)&dev_priv->info;
 }
 
+static inline struct intel_sseu
+intel_device_default_sseu(struct drm_i915_private *i915)
+{
+	const struct sseu_dev_info *sseu = &INTEL_INFO(i915)->sseu;
+	struct intel_sseu value = {
+		.slice_mask = sseu->slice_mask,
+		.subslice_mask = sseu->subslice_mask[0],
+		.min_eus_per_subslice = sseu->max_eus_per_subslice,
+		.max_eus_per_subslice = sseu->max_eus_per_subslice,
+	};
+
+	return value;
+}
+
 /* modesetting */
 extern void intel_modeset_init_hw(struct drm_device *dev);
 extern int intel_modeset_init(struct drm_device *dev);
diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
index f772593b99ab..0b8cc748648b 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -343,6 +343,8 @@ __create_hw_context(struct drm_i915_private *dev_priv,
 		struct intel_context *ce = &ctx->__engine[n];
 
 		ce->gem_context = ctx;
+		/* Use the whole device by default */
+		ce->sseu = intel_device_default_sseu(dev_priv);
 	}
 
 	INIT_RADIX_TREE(&ctx->handles_vma, GFP_KERNEL);
diff --git a/drivers/gpu/drm/i915/i915_gem_context.h b/drivers/gpu/drm/i915/i915_gem_context.h
index 08165f6a0a84..7510de738b35 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.h
+++ b/drivers/gpu/drm/i915/i915_gem_context.h
@@ -31,6 +31,7 @@
 
 #include "i915_gem.h"
 #include "i915_scheduler.h"
+#include "intel_device_info.h"
 
 struct pid;
 
@@ -170,6 +171,9 @@ struct i915_gem_context {
 		int pin_count;
 
 		const struct intel_context_ops *ops;
+
+		/** sseu: Control eu/slice partitioning */
+		struct intel_sseu sseu;
 	} __engine[I915_NUM_ENGINES];
 
 	/** ring_size: size for allocating the per-engine ring buffer */
diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h
index 7fa94b024968..3a4be20ea74a 100644
--- a/drivers/gpu/drm/i915/i915_request.h
+++ b/drivers/gpu/drm/i915/i915_request.h
@@ -39,6 +39,16 @@ struct drm_i915_gem_object;
 struct i915_request;
 struct i915_timeline;
 
+/*
+ * Powergating configuration for a particular (context,engine).
+ */
+struct intel_sseu {
+	u8 slice_mask;
+	u8 subslice_mask;
+	u8 min_eus_per_subslice;
+	u8 max_eus_per_subslice;
+};
+
 struct intel_wait {
 	struct rb_node node;
 	struct task_struct *tsk;
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 54446b6aeb7a..1491032b675b 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -1305,7 +1305,8 @@ static int __context_pin(struct i915_gem_context *ctx, struct i915_vma *vma)
 	return i915_vma_pin(vma, 0, 0, flags);
 }
 
-static u32 make_rpcs(struct drm_i915_private *dev_priv);
+static u32
+make_rpcs(struct drm_i915_private *i915, struct intel_sseu *ctx_sseu);
 
 static void
 __execlists_update_reg_state(struct intel_engine_cs *engine,
@@ -1320,7 +1321,8 @@ __execlists_update_reg_state(struct intel_engine_cs *engine,
 
 	/* RPCS */
 	if (engine->class == RENDER_CLASS)
-		regs[CTX_R_PWR_CLK_STATE + 1] = make_rpcs(engine->i915);
+		regs[CTX_R_PWR_CLK_STATE + 1] = make_rpcs(engine->i915,
+							  &ce->sseu);
 }
 
 static struct intel_context *
@@ -2506,18 +2508,19 @@ int logical_xcs_ring_init(struct intel_engine_cs *engine)
 }
 
 static u32
-make_rpcs(struct drm_i915_private *dev_priv)
+make_rpcs(struct drm_i915_private *i915, struct intel_sseu *ctx_sseu)
 {
-	bool subslice_pg = INTEL_INFO(dev_priv)->sseu.has_subslice_pg;
-	u8 slices = hweight8(INTEL_INFO(dev_priv)->sseu.slice_mask);
-	u8 subslices = hweight8(INTEL_INFO(dev_priv)->sseu.subslice_mask[0]);
+	const struct sseu_dev_info *sseu = &INTEL_INFO(i915)->sseu;
+	bool subslice_pg = sseu->has_subslice_pg;
+	u8 slices = hweight8(ctx_sseu->slice_mask);
+	u8 subslices = hweight8(ctx_sseu->subslice_mask);
 	u32 rpcs = 0;
 
 	/*
 	 * No explicit RPCS request is needed to ensure full
 	 * slice/subslice/EU enablement prior to Gen9.
 	*/
-	if (INTEL_GEN(dev_priv) < 9)
+	if (INTEL_GEN(i915) < 9)
 		return 0;
 
 	/*
@@ -2545,7 +2548,7 @@ make_rpcs(struct drm_i915_private *dev_priv)
 	 * subslices are enabled, or a count between one and four on the first
 	 * slice.
 	 */
-	if (IS_GEN11(dev_priv) && slices == 1 && subslices >= 4) {
+	if (IS_GEN11(i915) && slices == 1 && subslices >= 4) {
 		GEM_BUG_ON(subslices & 1);
 
 		subslice_pg = false;
@@ -2558,10 +2561,10 @@ make_rpcs(struct drm_i915_private *dev_priv)
 	 * must make an explicit request through RPCS for full
 	 * enablement.
 	*/
-	if (INTEL_INFO(dev_priv)->sseu.has_slice_pg) {
+	if (sseu->has_slice_pg) {
 		u32 mask, val = slices;
 
-		if (INTEL_GEN(dev_priv) >= 11) {
+		if (INTEL_GEN(i915) >= 11) {
 			mask = GEN11_RPCS_S_CNT_MASK;
 			val <<= GEN11_RPCS_S_CNT_SHIFT;
 		} else {
@@ -2586,18 +2589,16 @@ make_rpcs(struct drm_i915_private *dev_priv)
 		rpcs |= GEN8_RPCS_ENABLE | GEN8_RPCS_SS_CNT_ENABLE | val;
 	}
 
-	if (INTEL_INFO(dev_priv)->sseu.has_eu_pg) {
+	if (sseu->has_eu_pg) {
 		u32 val;
 
-		val = INTEL_INFO(dev_priv)->sseu.eu_per_subslice <<
-		      GEN8_RPCS_EU_MIN_SHIFT;
+		val = ctx_sseu->min_eus_per_subslice << GEN8_RPCS_EU_MIN_SHIFT;
 		GEM_BUG_ON(val & ~GEN8_RPCS_EU_MIN_MASK);
 		val &= GEN8_RPCS_EU_MIN_MASK;
 
 		rpcs |= val;
 
-		val = INTEL_INFO(dev_priv)->sseu.eu_per_subslice <<
-		      GEN8_RPCS_EU_MAX_SHIFT;
+		val = ctx_sseu->max_eus_per_subslice << GEN8_RPCS_EU_MAX_SHIFT;
 		GEM_BUG_ON(val & ~GEN8_RPCS_EU_MAX_MASK);
 		val &= GEN8_RPCS_EU_MAX_MASK;
 
-- 
2.17.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 35+ messages in thread

* [PATCH 3/6] drm/i915/perf: lock powergating configuration to default when active
  2018-09-17 11:30 [PATCH v13 0/6] Per context dynamic (sub)slice power-gating Tvrtko Ursulin
  2018-09-17 11:30 ` [PATCH 1/6] drm/i915/execlists: Move RPCS setup to context pin Tvrtko Ursulin
  2018-09-17 11:30 ` [PATCH 2/6] drm/i915: Record the sseu configuration per-context & engine Tvrtko Ursulin
@ 2018-09-17 11:30 ` Tvrtko Ursulin
  2018-09-17 11:50   ` Chris Wilson
  2018-09-17 11:30 ` [PATCH 4/6] drm/i915: Add timeline barrier support Tvrtko Ursulin
                   ` (11 subsequent siblings)
  14 siblings, 1 reply; 35+ messages in thread
From: Tvrtko Ursulin @ 2018-09-17 11:30 UTC (permalink / raw)
  To: Intel-gfx

From: Lionel Landwerlin <lionel.g.landwerlin@intel.com>

If some of the contexts submitting workloads to the GPU have been
configured to shutdown slices/subslices, we might loose the NOA
configurations written in the NOA muxes.

One possible solution to this problem is to reprogram the NOA muxes
when we switch to a new context. We initially tried this in the
workaround batchbuffer but some concerns where raised about the cost
of reprogramming at every context switch. This solution is also not
without consequences from the userspace point of view. Reprogramming
of the muxes can only happen once the powergating configuration has
changed (which happens after context switch). This means for a window
of time during the recording, counters recorded by the OA unit might
be invalid. This requires userspace dealing with OA reports to discard
the invalid values.

Minimizing the reprogramming could be implemented by tracking of the
last programmed configuration somewhere in GGTT and use MI_PREDICATE
to discard some of the programming commands, but the command streamer
would still have to parse all the MI_LRI instructions in the
workaround batchbuffer.

Another solution, which this change implements, is to simply disregard
the user requested configuration for the period of time when i915/perf
is active. There is no known issue with this apart from a performance
penality for some media workloads that benefit from running on a
partially powergated GPU. We already prevent RC6 from affecting the
programming so it doesn't sound completely unreasonable to hold on
powergating for the same reason.

v2: Leave RPCS programming in intel_lrc.c (Lionel)

v3: Update for s/union intel_sseu/struct intel_sseu/ (Lionel)
    More to_intel_context() (Tvrtko)
    s/dev_priv/i915/ (Tvrtko)

Tvrtko Ursulin:

v4:
 * Rebase for make_rpcs changes.

v5:
 * Apply OA restriction from make_rpcs directly.

v6:
 * Rebase for context image setup changes.

v7:
 * Move stream assignment before metric enable.

v8:
 * Rebase.

Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 drivers/gpu/drm/i915/i915_perf.c | 13 +++++++++----
 drivers/gpu/drm/i915/intel_lrc.c | 31 ++++++++++++++++++++-----------
 drivers/gpu/drm/i915/intel_lrc.h |  2 ++
 3 files changed, 31 insertions(+), 15 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
index 664b96bb65a3..46ddd1913fee 100644
--- a/drivers/gpu/drm/i915/i915_perf.c
+++ b/drivers/gpu/drm/i915/i915_perf.c
@@ -1677,6 +1677,11 @@ static void gen8_update_reg_state_unlocked(struct i915_gem_context *ctx,
 
 		CTX_REG(reg_state, state_offset, flex_regs[i], value);
 	}
+
+	CTX_REG(reg_state, CTX_R_PWR_CLK_STATE, GEN8_R_PWR_CLK_STATE,
+		gen8_make_rpcs(dev_priv,
+			       &to_intel_context(ctx,
+						 dev_priv->engine[RCS])->sseu));
 }
 
 /*
@@ -2092,6 +2097,9 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream,
 	if (ret)
 		goto err_lock;
 
+	stream->ops = &i915_oa_stream_ops;
+	dev_priv->perf.oa.exclusive_stream = stream;
+
 	ret = dev_priv->perf.oa.ops.enable_metric_set(dev_priv,
 						      stream->oa_config);
 	if (ret) {
@@ -2099,15 +2107,12 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream,
 		goto err_enable;
 	}
 
-	stream->ops = &i915_oa_stream_ops;
-
-	dev_priv->perf.oa.exclusive_stream = stream;
-
 	mutex_unlock(&dev_priv->drm.struct_mutex);
 
 	return 0;
 
 err_enable:
+	dev_priv->perf.oa.exclusive_stream = NULL;
 	dev_priv->perf.oa.ops.disable_metric_set(dev_priv);
 	mutex_unlock(&dev_priv->drm.struct_mutex);
 
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 1491032b675b..cf2e0848fa55 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -1305,9 +1305,6 @@ static int __context_pin(struct i915_gem_context *ctx, struct i915_vma *vma)
 	return i915_vma_pin(vma, 0, 0, flags);
 }
 
-static u32
-make_rpcs(struct drm_i915_private *i915, struct intel_sseu *ctx_sseu);
-
 static void
 __execlists_update_reg_state(struct intel_engine_cs *engine,
 			     struct intel_context *ce)
@@ -1321,8 +1318,8 @@ __execlists_update_reg_state(struct intel_engine_cs *engine,
 
 	/* RPCS */
 	if (engine->class == RENDER_CLASS)
-		regs[CTX_R_PWR_CLK_STATE + 1] = make_rpcs(engine->i915,
-							  &ce->sseu);
+		regs[CTX_R_PWR_CLK_STATE + 1] = gen8_make_rpcs(engine->i915,
+							       &ce->sseu);
 }
 
 static struct intel_context *
@@ -2507,13 +2504,12 @@ int logical_xcs_ring_init(struct intel_engine_cs *engine)
 	return logical_ring_init(engine);
 }
 
-static u32
-make_rpcs(struct drm_i915_private *i915, struct intel_sseu *ctx_sseu)
+u32 gen8_make_rpcs(struct drm_i915_private *i915, struct intel_sseu *req_sseu)
 {
 	const struct sseu_dev_info *sseu = &INTEL_INFO(i915)->sseu;
 	bool subslice_pg = sseu->has_subslice_pg;
-	u8 slices = hweight8(ctx_sseu->slice_mask);
-	u8 subslices = hweight8(ctx_sseu->subslice_mask);
+	struct intel_sseu ctx_sseu;
+	u8 slices, subslices;
 	u32 rpcs = 0;
 
 	/*
@@ -2523,6 +2519,19 @@ make_rpcs(struct drm_i915_private *i915, struct intel_sseu *ctx_sseu)
 	if (INTEL_GEN(i915) < 9)
 		return 0;
 
+	/*
+	 * If i915/perf is active, we want a stable powergating configuration
+	 * on the system. The most natural configuration to take in that case
+	 * is the default (i.e maximum the hardware can do).
+	 */
+	if (unlikely(i915->perf.oa.exclusive_stream))
+		ctx_sseu = intel_device_default_sseu(i915);
+	else
+		ctx_sseu = *req_sseu;
+
+	slices = hweight8(ctx_sseu.slice_mask);
+	subslices = hweight8(ctx_sseu.subslice_mask);
+
 	/*
 	 * Since the SScount bitfield in GEN8_R_PWR_CLK_STATE is only three bits
 	 * wide and Icelake has up to eight subslices, specfial programming is
@@ -2592,13 +2601,13 @@ make_rpcs(struct drm_i915_private *i915, struct intel_sseu *ctx_sseu)
 	if (sseu->has_eu_pg) {
 		u32 val;
 
-		val = ctx_sseu->min_eus_per_subslice << GEN8_RPCS_EU_MIN_SHIFT;
+		val = ctx_sseu.min_eus_per_subslice << GEN8_RPCS_EU_MIN_SHIFT;
 		GEM_BUG_ON(val & ~GEN8_RPCS_EU_MIN_MASK);
 		val &= GEN8_RPCS_EU_MIN_MASK;
 
 		rpcs |= val;
 
-		val = ctx_sseu->max_eus_per_subslice << GEN8_RPCS_EU_MAX_SHIFT;
+		val = ctx_sseu.max_eus_per_subslice << GEN8_RPCS_EU_MAX_SHIFT;
 		GEM_BUG_ON(val & ~GEN8_RPCS_EU_MAX_MASK);
 		val &= GEN8_RPCS_EU_MAX_MASK;
 
diff --git a/drivers/gpu/drm/i915/intel_lrc.h b/drivers/gpu/drm/i915/intel_lrc.h
index f5a5502ecf70..a4e28cc55fda 100644
--- a/drivers/gpu/drm/i915/intel_lrc.h
+++ b/drivers/gpu/drm/i915/intel_lrc.h
@@ -104,4 +104,6 @@ void intel_lr_context_resume(struct drm_i915_private *dev_priv);
 
 void intel_execlists_set_default_submission(struct intel_engine_cs *engine);
 
+u32 gen8_make_rpcs(struct drm_i915_private *i915, struct intel_sseu *ctx_sseu);
+
 #endif /* _INTEL_LRC_H_ */
-- 
2.17.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 35+ messages in thread

* [PATCH 4/6] drm/i915: Add timeline barrier support
  2018-09-17 11:30 [PATCH v13 0/6] Per context dynamic (sub)slice power-gating Tvrtko Ursulin
                   ` (2 preceding siblings ...)
  2018-09-17 11:30 ` [PATCH 3/6] drm/i915/perf: lock powergating configuration to default when active Tvrtko Ursulin
@ 2018-09-17 11:30 ` Tvrtko Ursulin
  2018-09-17 11:30 ` [PATCH 5/6] drm/i915: Expose RPCS (SSEU) configuration to userspace Tvrtko Ursulin
                   ` (10 subsequent siblings)
  14 siblings, 0 replies; 35+ messages in thread
From: Tvrtko Ursulin @ 2018-09-17 11:30 UTC (permalink / raw)
  To: Intel-gfx

From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Timeline barrier allows serialization between different timelines.

After calling i915_timeline_set_barrier with a request, all following
submissions on this timeline will be set up as depending on this request,
or barrier. Once the barrier has been completed it automatically gets
cleared and things continue as normal.

This facility will be used by the upcoming context SSEU code.

v2:
 * Assert barrier has been retired on timeline_fini. (Chris Wilson)
 * Fix mock_timeline.

Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Suggested-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_request.c           | 13 +++++++++
 drivers/gpu/drm/i915/i915_timeline.c          |  3 +++
 drivers/gpu/drm/i915/i915_timeline.h          | 27 +++++++++++++++++++
 .../gpu/drm/i915/selftests/mock_timeline.c    |  2 ++
 4 files changed, 45 insertions(+)

diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
index a492385b2089..76fc80330c85 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -644,6 +644,15 @@ submit_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state)
 	return NOTIFY_DONE;
 }
 
+static int add_timeline_barrier(struct i915_request *rq)
+{
+	struct i915_request *barrier =
+		i915_gem_active_raw(&rq->timeline->barrier,
+				    &rq->i915->drm.struct_mutex);
+
+	return barrier ? i915_request_await_dma_fence(rq, &barrier->fence) : 0;
+}
+
 /**
  * i915_request_alloc - allocate a request structure
  *
@@ -808,6 +817,10 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx)
 	 */
 	rq->head = rq->ring->emit;
 
+	ret = add_timeline_barrier(rq);
+	if (ret)
+		goto err_unwind;
+
 	/* Unconditionally invalidate GPU caches and TLBs. */
 	ret = engine->emit_flush(rq, EMIT_INVALIDATE);
 	if (ret)
diff --git a/drivers/gpu/drm/i915/i915_timeline.c b/drivers/gpu/drm/i915/i915_timeline.c
index 4667cc08c416..5a87c5bd5154 100644
--- a/drivers/gpu/drm/i915/i915_timeline.c
+++ b/drivers/gpu/drm/i915/i915_timeline.c
@@ -37,6 +37,8 @@ void i915_timeline_init(struct drm_i915_private *i915,
 	INIT_LIST_HEAD(&timeline->requests);
 
 	i915_syncmap_init(&timeline->sync);
+
+	init_request_active(&timeline->barrier, NULL);
 }
 
 /**
@@ -69,6 +71,7 @@ void i915_timelines_park(struct drm_i915_private *i915)
 void i915_timeline_fini(struct i915_timeline *timeline)
 {
 	GEM_BUG_ON(!list_empty(&timeline->requests));
+	GEM_BUG_ON(i915_gem_active_isset(&timeline->barrier));
 
 	i915_syncmap_free(&timeline->sync);
 
diff --git a/drivers/gpu/drm/i915/i915_timeline.h b/drivers/gpu/drm/i915/i915_timeline.h
index a2c2c3ab5fb0..c8526ab44dbc 100644
--- a/drivers/gpu/drm/i915/i915_timeline.h
+++ b/drivers/gpu/drm/i915/i915_timeline.h
@@ -72,6 +72,16 @@ struct i915_timeline {
 	 */
 	u32 global_sync[I915_NUM_ENGINES];
 
+	/**
+	 * Barrier provides the ability to serialize ordering between different
+	 * timelines.
+	 *
+	 * Users can call i915_timeline_set_barrier which will make all
+	 * subsequent submissions be executed only after this barrier has been
+	 * completed.
+	 */
+	struct i915_gem_active barrier;
+
 	struct list_head link;
 	const char *name;
 
@@ -125,4 +135,21 @@ static inline bool i915_timeline_sync_is_later(struct i915_timeline *tl,
 
 void i915_timelines_park(struct drm_i915_private *i915);
 
+/**
+ * i915_timeline_set_barrier - orders submission between different timelines
+ * @timeline: timeline to set the barrier on
+ * @rq: request after which new submissions can proceed
+ *
+ * Sets the passed in request as the serialization point for all subsequent
+ * submissions on @timeline. Subsequent requests will not be submitted to GPU
+ * until the barrier has been completed.
+ */
+static inline void
+i915_timeline_set_barrier(struct i915_timeline *timeline,
+			  struct i915_request *rq)
+{
+	GEM_BUG_ON(timeline->fence_context == rq->timeline->fence_context);
+	i915_gem_active_set(&timeline->barrier, rq);
+}
+
 #endif
diff --git a/drivers/gpu/drm/i915/selftests/mock_timeline.c b/drivers/gpu/drm/i915/selftests/mock_timeline.c
index dcf3b16f5a07..a718b64c988e 100644
--- a/drivers/gpu/drm/i915/selftests/mock_timeline.c
+++ b/drivers/gpu/drm/i915/selftests/mock_timeline.c
@@ -19,6 +19,8 @@ void mock_timeline_init(struct i915_timeline *timeline, u64 context)
 
 	i915_syncmap_init(&timeline->sync);
 
+	init_request_active(&timeline->barrier, NULL);
+
 	INIT_LIST_HEAD(&timeline->link);
 }
 
-- 
2.17.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 35+ messages in thread

* [PATCH 5/6] drm/i915: Expose RPCS (SSEU) configuration to userspace
  2018-09-17 11:30 [PATCH v13 0/6] Per context dynamic (sub)slice power-gating Tvrtko Ursulin
                   ` (3 preceding siblings ...)
  2018-09-17 11:30 ` [PATCH 4/6] drm/i915: Add timeline barrier support Tvrtko Ursulin
@ 2018-09-17 11:30 ` Tvrtko Ursulin
  2018-09-17 11:48   ` Chris Wilson
  2018-09-18 13:43   ` [PATCH v18 5/6] drm/i915: Expose RPCS (SSEU) configuration to userspace (Gen11 only) Tvrtko Ursulin
  2018-09-17 11:30 ` [PATCH 6/6] drm/i915/icl: Support co-existence between per-context SSEU and OA Tvrtko Ursulin
                   ` (9 subsequent siblings)
  14 siblings, 2 replies; 35+ messages in thread
From: Tvrtko Ursulin @ 2018-09-17 11:30 UTC (permalink / raw)
  To: Intel-gfx

From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

We want to allow userspace to reconfigure the subslice configuration for
its own use case. To do so, we expose a context parameter to allow
adjustment of the RPCS register stored within the context image (and
currently not accessible via LRI). If the context is adjusted before
first use, the adjustment is for "free"; otherwise if the context is
active we flush the context off the GPU (stalling all users) and forcing
the GPU to save the context to memory where we can modify it and so
ensure that the register is reloaded on next execution.

The overhead of managing additional EU subslices can be significant,
especially in multi-context workloads. Non-GPGPU contexts should
preferably disable the subslices it is not using, and others should
fine-tune the number to match their workload.

We expose complete control over the RPCS register, allowing
configuration of slice/subslice, via masks packed into a u64 for
simplicity. For example,

	struct drm_i915_gem_context_param arg;
	struct drm_i915_gem_context_param_sseu sseu = { .class = 0,
	                                                .instance = 0, };

	memset(&arg, 0, sizeof(arg));
	arg.ctx_id = ctx;
	arg.param = I915_CONTEXT_PARAM_SSEU;
	arg.value = (uintptr_t) &sseu;
	if (drmIoctl(fd, DRM_IOCTL_I915_GEM_CONTEXT_GETPARAM, &arg) == 0) {
		sseu.packed.subslice_mask = 0;

		drmIoctl(fd, DRM_IOCTL_I915_GEM_CONTEXT_SETPARAM, &arg);
	}

could be used to disable all subslices where supported.

v2: Fix offset of CTX_R_PWR_CLK_STATE in intel_lr_context_set_sseu() (Lionel)

v3: Add ability to program this per engine (Chris)

v4: Move most get_sseu() into i915_gem_context.c (Lionel)

v5: Validate sseu configuration against the device's capabilities (Lionel)

v6: Change context powergating settings through MI_SDM on kernel context (Chris)

v7: Synchronize the requests following a powergating setting change using a global
    dependency (Chris)
    Iterate timelines through dev_priv.gt.active_rings (Tvrtko)
    Disable RPCS configuration setting for non capable users (Lionel/Tvrtko)

v8: s/union intel_sseu/struct intel_sseu/ (Lionel)
    s/dev_priv/i915/ (Tvrtko)
    Change uapi class/instance fields to u16 (Tvrtko)
    Bump mask fields to 64bits (Lionel)
    Don't return EPERM when dynamic sseu is disabled (Tvrtko)

v9: Import context image into kernel context's ppgtt only when
    reconfiguring powergated slice/subslices (Chris)
    Use aliasing ppgtt when needed (Michel)

Tvrtko Ursulin:

v10:
 * Update for upstream changes.
 * Request submit needs a RPM reference.
 * Reject on !FULL_PPGTT for simplicity.
 * Pull out get/set param to helpers for readability and less indent.
 * Use i915_request_await_dma_fence in add_global_barrier to skip waits
   on the same timeline and avoid GEM_BUG_ON.
 * No need to explicitly assign a NULL pointer to engine in legacy mode.
 * No need to move gen8_make_rpcs up.
 * Factored out global barrier as prep patch.
 * Allow to only CAP_SYS_ADMIN if !Gen11.

v11:
 * Remove engine vfunc in favour of local helper. (Chris Wilson)
 * Stop retiring requests before updates since it is not needed
   (Chris Wilson)
 * Implement direct CPU update path for idle contexts. (Chris Wilson)
 * Left side dependency needs only be on the same context timeline.
   (Chris Wilson)
 * It is sufficient to order the timeline. (Chris Wilson)
 * Reject !RCS configuration attempts with -ENODEV for now.

v12:
 * Rebase for make_rpcs.

v13:
 * Centralize SSEU normalization to make_rpcs.
 * Type width checking (uAPI <-> implementation).
 * Gen11 restrictions uAPI checks.
 * Gen11 subslice count differences handling.
 Chris Wilson:
 * args->size handling fixes.
 * Update context image from GGTT.
 * Postpone context image update to pinning.
 * Use i915_gem_active_raw instead of last_request_on_engine.

v14:
 * Add activity tracker on intel_context to fix the lifetime issues
   and simplify the code. (Chris Wilson)

v15:
 * Fix context pin leak if no space in ring by simplifying the
   context pinning sequence.

v16:
 * Rebase for context get/set param locking changes.
 * Just -ENODEV on !Gen11. (Joonas)

v17:
 * Fix one Gen11 subslice enablement rule.
 * Handle error from i915_sw_fence_await_sw_fence_gfp. (Chris Wilson)

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=100899
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=107634
Issue: https://github.com/intel/media-driver/issues/267
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Cc: Dmitry Rogozhkin <dmitry.v.rogozhkin@intel.com>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: Zhipeng Gong <zhipeng.gong@intel.com>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk> # v15
---
 drivers/gpu/drm/i915/i915_gem_context.c | 318 +++++++++++++++++++++++-
 drivers/gpu/drm/i915/i915_gem_context.h |   6 +
 drivers/gpu/drm/i915/intel_lrc.c        |   4 +-
 include/uapi/drm/i915_drm.h             |  43 ++++
 4 files changed, 368 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
index 0b8cc748648b..b38e8b8ec772 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -90,6 +90,7 @@
 #include <drm/i915_drm.h>
 #include "i915_drv.h"
 #include "i915_trace.h"
+#include "intel_lrc_reg.h"
 #include "intel_workarounds.h"
 
 #define ALL_L3_SLICES(dev) (1 << NUM_L3_SLICES(dev)) - 1
@@ -322,6 +323,14 @@ static u32 default_desc_template(const struct drm_i915_private *i915,
 	return desc;
 }
 
+static void intel_context_retire(struct i915_gem_active *active,
+				 struct i915_request *rq)
+{
+	struct intel_context *ce = container_of(active, typeof(*ce), active);
+
+	intel_context_unpin(ce);
+}
+
 static struct i915_gem_context *
 __create_hw_context(struct drm_i915_private *dev_priv,
 		    struct drm_i915_file_private *file_priv)
@@ -345,6 +354,8 @@ __create_hw_context(struct drm_i915_private *dev_priv,
 		ce->gem_context = ctx;
 		/* Use the whole device by default */
 		ce->sseu = intel_device_default_sseu(dev_priv);
+
+		init_request_active(&ce->active, intel_context_retire);
 	}
 
 	INIT_RADIX_TREE(&ctx->handles_vma, GFP_KERNEL);
@@ -846,6 +857,56 @@ int i915_gem_context_destroy_ioctl(struct drm_device *dev, void *data,
 	return 0;
 }
 
+static int get_sseu(struct i915_gem_context *ctx,
+		    struct drm_i915_gem_context_param *args)
+{
+	struct drm_i915_gem_context_param_sseu user_sseu;
+	struct intel_engine_cs *engine;
+	struct intel_context *ce;
+	int ret;
+
+	if (args->size == 0)
+		goto out;
+	else if (args->size < sizeof(user_sseu))
+		return -EINVAL;
+
+	if (copy_from_user(&user_sseu, u64_to_user_ptr(args->value),
+			   sizeof(user_sseu)))
+		return -EFAULT;
+
+	if (user_sseu.rsvd1 || user_sseu.rsvd2)
+		return -EINVAL;
+
+	engine = intel_engine_lookup_user(ctx->i915,
+					  user_sseu.class,
+					  user_sseu.instance);
+	if (!engine)
+		return -EINVAL;
+
+	/* Only use for mutex here is to serialize get_param and set_param. */
+	ret = mutex_lock_interruptible(&ctx->i915->drm.struct_mutex);
+	if (ret)
+		return ret;
+
+	ce = to_intel_context(ctx, engine);
+
+	user_sseu.slice_mask = ce->sseu.slice_mask;
+	user_sseu.subslice_mask = ce->sseu.subslice_mask;
+	user_sseu.min_eus_per_subslice = ce->sseu.min_eus_per_subslice;
+	user_sseu.max_eus_per_subslice = ce->sseu.max_eus_per_subslice;
+
+	mutex_unlock(&ctx->i915->drm.struct_mutex);
+
+	if (copy_to_user(u64_to_user_ptr(args->value), &user_sseu,
+			 sizeof(user_sseu)))
+		return -EFAULT;
+
+out:
+	args->size = sizeof(user_sseu);
+
+	return 0;
+}
+
 int i915_gem_context_getparam_ioctl(struct drm_device *dev, void *data,
 				    struct drm_file *file)
 {
@@ -858,15 +919,17 @@ int i915_gem_context_getparam_ioctl(struct drm_device *dev, void *data,
 	if (!ctx)
 		return -ENOENT;
 
-	args->size = 0;
 	switch (args->param) {
 	case I915_CONTEXT_PARAM_BAN_PERIOD:
 		ret = -EINVAL;
 		break;
 	case I915_CONTEXT_PARAM_NO_ZEROMAP:
+		args->size = 0;
 		args->value = test_bit(UCONTEXT_NO_ZEROMAP, &ctx->user_flags);
 		break;
 	case I915_CONTEXT_PARAM_GTT_SIZE:
+		args->size = 0;
+
 		if (ctx->ppgtt)
 			args->value = ctx->ppgtt->vm.total;
 		else if (to_i915(dev)->mm.aliasing_ppgtt)
@@ -875,14 +938,20 @@ int i915_gem_context_getparam_ioctl(struct drm_device *dev, void *data,
 			args->value = to_i915(dev)->ggtt.vm.total;
 		break;
 	case I915_CONTEXT_PARAM_NO_ERROR_CAPTURE:
+		args->size = 0;
 		args->value = i915_gem_context_no_error_capture(ctx);
 		break;
 	case I915_CONTEXT_PARAM_BANNABLE:
+		args->size = 0;
 		args->value = i915_gem_context_is_bannable(ctx);
 		break;
 	case I915_CONTEXT_PARAM_PRIORITY:
+		args->size = 0;
 		args->value = ctx->sched.priority;
 		break;
+	case I915_CONTEXT_PARAM_SSEU:
+		ret = get_sseu(ctx, args);
+		break;
 	default:
 		ret = -EINVAL;
 		break;
@@ -892,6 +961,249 @@ int i915_gem_context_getparam_ioctl(struct drm_device *dev, void *data,
 	return ret;
 }
 
+static int gen8_emit_rpcs_config(struct i915_request *rq,
+				 struct intel_context *ce,
+				 struct intel_sseu sseu)
+{
+	u64 offset;
+	u32 *cs;
+
+	cs = intel_ring_begin(rq, 4);
+	if (IS_ERR(cs))
+		return PTR_ERR(cs);
+
+	offset = ce->state->node.start +
+		LRC_STATE_PN * PAGE_SIZE +
+		(CTX_R_PWR_CLK_STATE + 1) * 4;
+
+	*cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
+	*cs++ = lower_32_bits(offset);
+	*cs++ = upper_32_bits(offset);
+	*cs++ = gen8_make_rpcs(rq->i915, &sseu);
+
+	intel_ring_advance(rq, cs);
+
+	return 0;
+}
+
+static int
+gen8_modify_rpcs_gpu(struct intel_context *ce,
+		     struct intel_engine_cs *engine,
+		     struct intel_sseu sseu)
+{
+	struct drm_i915_private *i915 = engine->i915;
+	struct i915_request *rq, *prev;
+	int ret;
+
+	GEM_BUG_ON(!ce->pin_count);
+
+	lockdep_assert_held(&i915->drm.struct_mutex);
+
+	/* Submitting requests etc needs the hw awake. */
+	intel_runtime_pm_get(i915);
+
+	rq = i915_request_alloc(engine, i915->kernel_context);
+	if (IS_ERR(rq)) {
+		ret = PTR_ERR(rq);
+		goto out_put;
+	}
+
+	/* Queue this switch after all other activity by this context. */
+	prev = i915_gem_active_raw(&ce->ring->timeline->last_request,
+				   &i915->drm.struct_mutex);
+	if (prev && !i915_request_completed(prev)) {
+		ret = i915_sw_fence_await_sw_fence_gfp(&rq->submit,
+						       &prev->submit,
+						       I915_FENCE_GFP);
+		if (ret < 0)
+			goto out_add;
+	}
+
+	ret = gen8_emit_rpcs_config(rq, ce, sseu);
+	if (ret)
+		goto out_add;
+
+	/* Order all following requests to be after. */
+	i915_timeline_set_barrier(ce->ring->timeline, rq);
+
+	/*
+	 * Guarantee context image and the timeline remains pinned until the
+	 * modifying request is retired by setting the ce activity tracker.
+	 *
+	 * But we only need to take one pin on the account of it. Or in other
+	 * words transfer the pinned ce object to tracked active request.
+	 */
+	if (!i915_gem_active_isset(&ce->active))
+		__intel_context_pin(ce);
+	i915_gem_active_set(&ce->active, rq);
+
+out_add:
+	i915_request_add(rq);
+out_put:
+	intel_runtime_pm_put(i915);
+
+	return ret;
+}
+
+static int
+i915_gem_context_reconfigure_sseu(struct i915_gem_context *ctx,
+				  struct intel_engine_cs *engine,
+				  struct intel_sseu sseu)
+{
+	struct intel_context *ce = to_intel_context(ctx, engine);
+	int ret;
+
+	GEM_BUG_ON(INTEL_GEN(ctx->i915) < 8);
+	GEM_BUG_ON(engine->id != RCS);
+
+	ret = mutex_lock_interruptible(&ctx->i915->drm.struct_mutex);
+	if (ret)
+		return ret;
+
+	/* Nothing to do if unmodified. */
+	if (!memcmp(&ce->sseu, &sseu, sizeof(sseu)))
+		goto out;
+
+	/*
+	 * If context is not idle we have to submit an ordered request to modify
+	 * its context image via the kernel context. Pristine and idle contexts
+	 * will be configured on pinning.
+	 */
+	if (ce->pin_count)
+		ret = gen8_modify_rpcs_gpu(ce, engine, sseu);
+
+	if (!ret)
+		ce->sseu = sseu;
+
+out:
+	mutex_unlock(&ctx->i915->drm.struct_mutex);
+
+	return ret;
+}
+
+static int
+user_to_context_sseu(struct drm_i915_private *i915,
+		     const struct drm_i915_gem_context_param_sseu *user,
+		     struct intel_sseu *context)
+{
+	const struct sseu_dev_info *device = &INTEL_INFO(i915)->sseu;
+
+	/* No zeros in any field. */
+	if (!user->slice_mask || !user->subslice_mask ||
+	    !user->min_eus_per_subslice || !user->max_eus_per_subslice)
+		return -EINVAL;
+
+	/* Max > min. */
+	if (user->max_eus_per_subslice < user->min_eus_per_subslice)
+		return -EINVAL;
+
+	/* Check validity against hardware. */
+	if (user->slice_mask & ~device->slice_mask)
+		return -EINVAL;
+
+	if (user->subslice_mask & ~device->subslice_mask[0])
+		return -EINVAL;
+
+	if (user->max_eus_per_subslice > device->max_eus_per_subslice)
+		return -EINVAL;
+
+	/*
+	 * Some future proofing on the types since the uAPI is wider than the
+	 * current internal implementation.
+	 */
+	if (WARN_ON((fls(user->slice_mask) >
+		     sizeof(context->slice_mask) * BITS_PER_BYTE) ||
+		    (fls(user->subslice_mask) >
+		     sizeof(context->subslice_mask) * BITS_PER_BYTE) ||
+		    overflows_type(user->min_eus_per_subslice,
+				   context->min_eus_per_subslice) ||
+		    overflows_type(user->max_eus_per_subslice,
+				   context->max_eus_per_subslice)))
+		return -EINVAL;
+
+	context->slice_mask = user->slice_mask;
+	context->subslice_mask = user->subslice_mask;
+	context->min_eus_per_subslice = user->min_eus_per_subslice;
+	context->max_eus_per_subslice = user->max_eus_per_subslice;
+
+	/* Part specific restrictions. */
+	if (IS_GEN11(i915)) {
+		unsigned int hw_ss_per_s = hweight8(device->subslice_mask[0]);
+		unsigned int req_s = hweight8(context->slice_mask);
+		unsigned int req_ss = hweight8(context->subslice_mask);
+
+		/*
+		 * Only full subslice enablement is possible if more than one
+		 * slice is turned on.
+		 */
+		if (req_s > 1 && req_ss != hw_ss_per_s)
+			return -EINVAL;
+
+		/*
+		 * If more than four (SScount bitfield limit) subslices are
+		 * requested then the number has to be even.
+		 */
+		if (req_ss > 4 && (req_ss & 1))
+			return -EINVAL;
+
+		/*
+		 * If only one slice is enabled and subslice count is below the
+		 * device full enablement, it must be at most half of the all
+		 * available subslices.
+		 */
+		if (req_s == 1 && req_ss < hw_ss_per_s &&
+		    req_ss > (hw_ss_per_s / 2))
+			return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int set_sseu(struct i915_gem_context *ctx,
+		    struct drm_i915_gem_context_param *args)
+{
+	struct drm_i915_private *i915 = ctx->i915;
+	struct drm_i915_gem_context_param_sseu user_sseu;
+	struct intel_engine_cs *engine;
+	struct intel_sseu sseu;
+	int ret;
+
+	if (args->size < sizeof(user_sseu))
+		return -EINVAL;
+
+	if (!IS_GEN11(i915))
+		return -ENODEV;
+
+	if (copy_from_user(&user_sseu, u64_to_user_ptr(args->value),
+			   sizeof(user_sseu)))
+		return -EFAULT;
+
+	if (user_sseu.rsvd1 || user_sseu.rsvd2)
+		return -EINVAL;
+
+	engine = intel_engine_lookup_user(i915,
+					  user_sseu.class,
+					  user_sseu.instance);
+	if (!engine)
+		return -EINVAL;
+
+	/* Only render engine supports RPCS configuration. */
+	if (engine->class != RENDER_CLASS)
+		return -ENODEV;
+
+	ret = user_to_context_sseu(i915, &user_sseu, &sseu);
+	if (ret)
+		return ret;
+
+	ret = i915_gem_context_reconfigure_sseu(ctx, engine, sseu);
+	if (ret)
+		return ret;
+
+	args->size = sizeof(user_sseu);
+
+	return 0;
+}
+
 int i915_gem_context_setparam_ioctl(struct drm_device *dev, void *data,
 				    struct drm_file *file)
 {
@@ -953,7 +1265,9 @@ int i915_gem_context_setparam_ioctl(struct drm_device *dev, void *data,
 				ctx->sched.priority = priority;
 		}
 		break;
-
+	case I915_CONTEXT_PARAM_SSEU:
+		ret = set_sseu(ctx, args);
+		break;
 	default:
 		ret = -EINVAL;
 		break;
diff --git a/drivers/gpu/drm/i915/i915_gem_context.h b/drivers/gpu/drm/i915/i915_gem_context.h
index 7510de738b35..c4aebfe7d4d2 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.h
+++ b/drivers/gpu/drm/i915/i915_gem_context.h
@@ -170,6 +170,12 @@ struct i915_gem_context {
 		u64 lrc_desc;
 		int pin_count;
 
+		/**
+		 * active: Active tracker for the external rq activity on this
+		 * intel_context object.
+		 */
+		struct i915_gem_active active;
+
 		const struct intel_context_ops *ops;
 
 		/** sseu: Control eu/slice partitioning */
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index cf2e0848fa55..b5603e977a3f 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -2557,7 +2557,9 @@ u32 gen8_make_rpcs(struct drm_i915_private *i915, struct intel_sseu *req_sseu)
 	 * subslices are enabled, or a count between one and four on the first
 	 * slice.
 	 */
-	if (IS_GEN11(i915) && slices == 1 && subslices >= 4) {
+	if (IS_GEN11(i915) &&
+	    slices == 1 &&
+	    subslices > min_t(u8, 4, hweight8(sseu->subslice_mask[0]) / 2)) {
 		GEM_BUG_ON(subslices & 1);
 
 		subslice_pg = false;
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index a4446f452040..e195c38b15a6 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -1478,9 +1478,52 @@ struct drm_i915_gem_context_param {
 #define   I915_CONTEXT_MAX_USER_PRIORITY	1023 /* inclusive */
 #define   I915_CONTEXT_DEFAULT_PRIORITY		0
 #define   I915_CONTEXT_MIN_USER_PRIORITY	-1023 /* inclusive */
+	/*
+	 * When using the following param, value should be a pointer to
+	 * drm_i915_gem_context_param_sseu.
+	 */
+#define I915_CONTEXT_PARAM_SSEU		0x7
 	__u64 value;
 };
 
+struct drm_i915_gem_context_param_sseu {
+	/*
+	 * Engine class & instance to be configured or queried.
+	 */
+	__u16 class;
+	__u16 instance;
+
+	/*
+	 * Unused for now. Must be cleared to zero.
+	 */
+	__u32 rsvd1;
+
+	/*
+	 * Mask of slices to enable for the context. Valid values are a subset
+	 * of the bitmask value returned for I915_PARAM_SLICE_MASK.
+	 */
+	__u64 slice_mask;
+
+	/*
+	 * Mask of subslices to enable for the context. Valid values are a
+	 * subset of the bitmask value return by I915_PARAM_SUBSLICE_MASK.
+	 */
+	__u64 subslice_mask;
+
+	/*
+	 * Minimum/Maximum number of EUs to enable per subslice for the
+	 * context. min_eus_per_subslice must be inferior or equal to
+	 * max_eus_per_subslice.
+	 */
+	__u16 min_eus_per_subslice;
+	__u16 max_eus_per_subslice;
+
+	/*
+	 * Unused for now. Must be cleared to zero.
+	 */
+	__u32 rsvd2;
+};
+
 enum drm_i915_oa_format {
 	I915_OA_FORMAT_A13 = 1,	    /* HSW only */
 	I915_OA_FORMAT_A29,	    /* HSW only */
-- 
2.17.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 35+ messages in thread

* [PATCH 6/6] drm/i915/icl: Support co-existence between per-context SSEU and OA
  2018-09-17 11:30 [PATCH v13 0/6] Per context dynamic (sub)slice power-gating Tvrtko Ursulin
                   ` (4 preceding siblings ...)
  2018-09-17 11:30 ` [PATCH 5/6] drm/i915: Expose RPCS (SSEU) configuration to userspace Tvrtko Ursulin
@ 2018-09-17 11:30 ` Tvrtko Ursulin
  2018-10-01  8:49   ` Tvrtko Ursulin
  2018-10-01  9:50   ` Lionel Landwerlin
  2018-09-17 11:44 ` ✗ Fi.CI.CHECKPATCH: warning for Per context dynamic (sub)slice power-gating (rev4) Patchwork
                   ` (8 subsequent siblings)
  14 siblings, 2 replies; 35+ messages in thread
From: Tvrtko Ursulin @ 2018-09-17 11:30 UTC (permalink / raw)
  To: Intel-gfx

From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

When OA is active we want to lock the powergating configuration, but on
Icelake, users like the media stack will have issues if we lock to the
full device configuration.

Instead lock to a subset of (sub)slices which are currently a known
working configuration for all users.

v2:
 * Fix commit message spelling.

Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
---
 drivers/gpu/drm/i915/intel_lrc.c | 25 ++++++++++++++++++++-----
 1 file changed, 20 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index b5603e977a3f..cded1f1d9ec2 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -2521,13 +2521,28 @@ u32 gen8_make_rpcs(struct drm_i915_private *i915, struct intel_sseu *req_sseu)
 
 	/*
 	 * If i915/perf is active, we want a stable powergating configuration
-	 * on the system. The most natural configuration to take in that case
-	 * is the default (i.e maximum the hardware can do).
+	 * on the system.
+	 *
+	 * We could choose full enablement, but on ICL we know there are use
+	 * cases which disable slices for functional, apart for performance
+	 * reasons. So in this case we select a known stable subset.
 	 */
-	if (unlikely(i915->perf.oa.exclusive_stream))
-		ctx_sseu = intel_device_default_sseu(i915);
-	else
+	if (!i915->perf.oa.exclusive_stream) {
 		ctx_sseu = *req_sseu;
+	} else {
+		ctx_sseu = intel_device_default_sseu(i915);
+
+		if (IS_GEN11(i915)) {
+			/*
+			 * We only need subslice count so it doesn't matter
+			 * which ones we select - just turn of low bits in the
+			 * amount of half of all available subslices per slice.
+			 */
+			ctx_sseu.subslice_mask =
+				~(~0 << (hweight8(ctx_sseu.subslice_mask) / 2));
+			ctx_sseu.slice_mask = 0x1;
+		}
+	}
 
 	slices = hweight8(ctx_sseu.slice_mask);
 	subslices = hweight8(ctx_sseu.subslice_mask);
-- 
2.17.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 35+ messages in thread

* Re: [PATCH 1/6] drm/i915/execlists: Move RPCS setup to context pin
  2018-09-17 11:30 ` [PATCH 1/6] drm/i915/execlists: Move RPCS setup to context pin Tvrtko Ursulin
@ 2018-09-17 11:43   ` Chris Wilson
  0 siblings, 0 replies; 35+ messages in thread
From: Chris Wilson @ 2018-09-17 11:43 UTC (permalink / raw)
  To: Intel-gfx, Tvrtko Ursulin

Quoting Tvrtko Ursulin (2018-09-17 12:30:53)
> From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> 
> Configuring RPCS in context image just before pin is sufficient and will
> come extra handy in one of the following patches.
> 
> v2:
>  * Split image setup a bit differently. (Chris Wilson)
> 
> v3:
>  * Update context image after reset as well - otherwise the application
>    of pinned default state clears the RPCS.
> 
> v4:
>  * Use local variable throughout the function. (Chris Wilson)
> 
> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> Suggested-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Chris Wilson <chris@chris-wilson.co.uk>
> Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk> # v2

> @@ -1955,14 +1971,14 @@ static void execlists_reset(struct intel_engine_cs *engine,
>                        engine->pinned_default_state + LRC_STATE_PN * PAGE_SIZE,
>                        engine->context_size - PAGE_SIZE);
>         }
> -       execlists_init_reg_state(regs,
> -                                request->gem_context, engine, request->ring);
>  
>         /* Move the RING_HEAD onto the breadcrumb, past the hanging batch */
> -       regs[CTX_RING_BUFFER_START + 1] = i915_ggtt_offset(request->ring->vma);
> -
>         request->ring->head = intel_ring_wrap(request->ring, request->postfix);
> -       regs[CTX_RING_HEAD + 1] = request->ring->head;
> +
> +       execlists_init_reg_state(regs, request->gem_context, engine,
> +                                request->ring);
> +
> +       __execlists_update_reg_state(engine, request->hw_context);

Probably need to reinforce the comments that the context state after the
guilty reset is our arbitrary defaults and not the state userspace
expects.

Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 35+ messages in thread

* ✗ Fi.CI.CHECKPATCH: warning for Per context dynamic (sub)slice power-gating (rev4)
  2018-09-17 11:30 [PATCH v13 0/6] Per context dynamic (sub)slice power-gating Tvrtko Ursulin
                   ` (5 preceding siblings ...)
  2018-09-17 11:30 ` [PATCH 6/6] drm/i915/icl: Support co-existence between per-context SSEU and OA Tvrtko Ursulin
@ 2018-09-17 11:44 ` Patchwork
  2018-09-17 11:46 ` ✗ Fi.CI.SPARSE: " Patchwork
                   ` (7 subsequent siblings)
  14 siblings, 0 replies; 35+ messages in thread
From: Patchwork @ 2018-09-17 11:44 UTC (permalink / raw)
  To: Tvrtko Ursulin; +Cc: intel-gfx

== Series Details ==

Series: Per context dynamic (sub)slice power-gating (rev4)
URL   : https://patchwork.freedesktop.org/series/48194/
State : warning

== Summary ==

$ dim checkpatch origin/drm-tip
d32c1a36efbb drm/i915/execlists: Move RPCS setup to context pin
275b17e9ba45 drm/i915: Record the sseu configuration per-context & engine
15ae17fc2bb0 drm/i915/perf: lock powergating configuration to default when active
6c15d4d43452 drm/i915: Add timeline barrier support
f6f383926f56 drm/i915: Expose RPCS (SSEU) configuration to userspace
-:40: WARNING:COMMIT_LOG_LONG_LINE: Possible unwrapped commit description (prefer a maximum 75 chars per line)
#40: 
v2: Fix offset of CTX_R_PWR_CLK_STATE in intel_lr_context_set_sseu() (Lionel)

total: 0 errors, 1 warnings, 0 checks, 456 lines checked
00297a7d4c51 drm/i915/icl: Support co-existence between per-context SSEU and OA

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 35+ messages in thread

* ✗ Fi.CI.SPARSE: warning for Per context dynamic (sub)slice power-gating (rev4)
  2018-09-17 11:30 [PATCH v13 0/6] Per context dynamic (sub)slice power-gating Tvrtko Ursulin
                   ` (6 preceding siblings ...)
  2018-09-17 11:44 ` ✗ Fi.CI.CHECKPATCH: warning for Per context dynamic (sub)slice power-gating (rev4) Patchwork
@ 2018-09-17 11:46 ` Patchwork
  2018-09-17 12:02 ` ✓ Fi.CI.BAT: success " Patchwork
                   ` (6 subsequent siblings)
  14 siblings, 0 replies; 35+ messages in thread
From: Patchwork @ 2018-09-17 11:46 UTC (permalink / raw)
  To: Tvrtko Ursulin; +Cc: intel-gfx

== Series Details ==

Series: Per context dynamic (sub)slice power-gating (rev4)
URL   : https://patchwork.freedesktop.org/series/48194/
State : warning

== Summary ==

$ dim sparse origin/drm-tip
Commit: drm/i915/execlists: Move RPCS setup to context pin
Okay!

Commit: drm/i915: Record the sseu configuration per-context & engine
-drivers/gpu/drm/i915/selftests/../i915_drv.h:3718:16: warning: expression using sizeof(void)
+drivers/gpu/drm/i915/selftests/../i915_drv.h:3732:16: warning: expression using sizeof(void)

Commit: drm/i915/perf: lock powergating configuration to default when active
Okay!

Commit: drm/i915: Add timeline barrier support
Okay!

Commit: drm/i915: Expose RPCS (SSEU) configuration to userspace
+drivers/gpu/drm/i915/intel_lrc.c:2562:25: warning: expression using sizeof(void)

Commit: drm/i915/icl: Support co-existence between per-context SSEU and OA
Okay!

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [PATCH 5/6] drm/i915: Expose RPCS (SSEU) configuration to userspace
  2018-09-17 11:30 ` [PATCH 5/6] drm/i915: Expose RPCS (SSEU) configuration to userspace Tvrtko Ursulin
@ 2018-09-17 11:48   ` Chris Wilson
  2018-09-18 13:43   ` [PATCH v18 5/6] drm/i915: Expose RPCS (SSEU) configuration to userspace (Gen11 only) Tvrtko Ursulin
  1 sibling, 0 replies; 35+ messages in thread
From: Chris Wilson @ 2018-09-17 11:48 UTC (permalink / raw)
  To: Intel-gfx, Tvrtko Ursulin

Quoting Tvrtko Ursulin (2018-09-17 12:30:57)
> From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> 
> We want to allow userspace to reconfigure the subslice configuration for
> its own use case. To do so, we expose a context parameter to allow
> adjustment of the RPCS register stored within the context image (and
> currently not accessible via LRI). If the context is adjusted before
> first use, the adjustment is for "free"; otherwise if the context is
> active we flush the context off the GPU (stalling all users) and forcing
> the GPU to save the context to memory where we can modify it and so
> ensure that the register is reloaded on next execution.
> 
> The overhead of managing additional EU subslices can be significant,
> especially in multi-context workloads. Non-GPGPU contexts should
> preferably disable the subslices it is not using, and others should
> fine-tune the number to match their workload.
> 
> We expose complete control over the RPCS register, allowing
> configuration of slice/subslice, via masks packed into a u64 for
> simplicity. For example,
> 
>         struct drm_i915_gem_context_param arg;
>         struct drm_i915_gem_context_param_sseu sseu = { .class = 0,
>                                                         .instance = 0, };
> 
>         memset(&arg, 0, sizeof(arg));
>         arg.ctx_id = ctx;
>         arg.param = I915_CONTEXT_PARAM_SSEU;
>         arg.value = (uintptr_t) &sseu;
>         if (drmIoctl(fd, DRM_IOCTL_I915_GEM_CONTEXT_GETPARAM, &arg) == 0) {
>                 sseu.packed.subslice_mask = 0;
> 
>                 drmIoctl(fd, DRM_IOCTL_I915_GEM_CONTEXT_SETPARAM, &arg);
>         }
> 
> could be used to disable all subslices where supported.
> 
> v2: Fix offset of CTX_R_PWR_CLK_STATE in intel_lr_context_set_sseu() (Lionel)
> 
> v3: Add ability to program this per engine (Chris)
> 
> v4: Move most get_sseu() into i915_gem_context.c (Lionel)
> 
> v5: Validate sseu configuration against the device's capabilities (Lionel)
> 
> v6: Change context powergating settings through MI_SDM on kernel context (Chris)
> 
> v7: Synchronize the requests following a powergating setting change using a global
>     dependency (Chris)
>     Iterate timelines through dev_priv.gt.active_rings (Tvrtko)
>     Disable RPCS configuration setting for non capable users (Lionel/Tvrtko)
> 
> v8: s/union intel_sseu/struct intel_sseu/ (Lionel)
>     s/dev_priv/i915/ (Tvrtko)
>     Change uapi class/instance fields to u16 (Tvrtko)
>     Bump mask fields to 64bits (Lionel)
>     Don't return EPERM when dynamic sseu is disabled (Tvrtko)
> 
> v9: Import context image into kernel context's ppgtt only when
>     reconfiguring powergated slice/subslices (Chris)
>     Use aliasing ppgtt when needed (Michel)
> 
> Tvrtko Ursulin:
> 
> v10:
>  * Update for upstream changes.
>  * Request submit needs a RPM reference.
>  * Reject on !FULL_PPGTT for simplicity.
>  * Pull out get/set param to helpers for readability and less indent.
>  * Use i915_request_await_dma_fence in add_global_barrier to skip waits
>    on the same timeline and avoid GEM_BUG_ON.
>  * No need to explicitly assign a NULL pointer to engine in legacy mode.
>  * No need to move gen8_make_rpcs up.
>  * Factored out global barrier as prep patch.
>  * Allow to only CAP_SYS_ADMIN if !Gen11.
> 
> v11:
>  * Remove engine vfunc in favour of local helper. (Chris Wilson)
>  * Stop retiring requests before updates since it is not needed
>    (Chris Wilson)
>  * Implement direct CPU update path for idle contexts. (Chris Wilson)
>  * Left side dependency needs only be on the same context timeline.
>    (Chris Wilson)
>  * It is sufficient to order the timeline. (Chris Wilson)
>  * Reject !RCS configuration attempts with -ENODEV for now.
> 
> v12:
>  * Rebase for make_rpcs.
> 
> v13:
>  * Centralize SSEU normalization to make_rpcs.
>  * Type width checking (uAPI <-> implementation).
>  * Gen11 restrictions uAPI checks.
>  * Gen11 subslice count differences handling.
>  Chris Wilson:
>  * args->size handling fixes.
>  * Update context image from GGTT.
>  * Postpone context image update to pinning.
>  * Use i915_gem_active_raw instead of last_request_on_engine.
> 
> v14:
>  * Add activity tracker on intel_context to fix the lifetime issues
>    and simplify the code. (Chris Wilson)
> 
> v15:
>  * Fix context pin leak if no space in ring by simplifying the
>    context pinning sequence.
> 
> v16:
>  * Rebase for context get/set param locking changes.
>  * Just -ENODEV on !Gen11. (Joonas)
> 
> v17:
>  * Fix one Gen11 subslice enablement rule.
>  * Handle error from i915_sw_fence_await_sw_fence_gfp. (Chris Wilson)
> 
> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=100899
> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=107634
> Issue: https://github.com/intel/media-driver/issues/267
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
> Cc: Dmitry Rogozhkin <dmitry.v.rogozhkin@intel.com>
> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> Cc: Zhipeng Gong <zhipeng.gong@intel.com>
> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk> # v15
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [PATCH 3/6] drm/i915/perf: lock powergating configuration to default when active
  2018-09-17 11:30 ` [PATCH 3/6] drm/i915/perf: lock powergating configuration to default when active Tvrtko Ursulin
@ 2018-09-17 11:50   ` Chris Wilson
  0 siblings, 0 replies; 35+ messages in thread
From: Chris Wilson @ 2018-09-17 11:50 UTC (permalink / raw)
  To: Intel-gfx, Tvrtko Ursulin

Quoting Tvrtko Ursulin (2018-09-17 12:30:55)
> From: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
> 
> If some of the contexts submitting workloads to the GPU have been
> configured to shutdown slices/subslices, we might loose the NOA
> configurations written in the NOA muxes.
> 
> One possible solution to this problem is to reprogram the NOA muxes
> when we switch to a new context. We initially tried this in the
> workaround batchbuffer but some concerns where raised about the cost
> of reprogramming at every context switch. This solution is also not
> without consequences from the userspace point of view. Reprogramming
> of the muxes can only happen once the powergating configuration has
> changed (which happens after context switch). This means for a window
> of time during the recording, counters recorded by the OA unit might
> be invalid. This requires userspace dealing with OA reports to discard
> the invalid values.
> 
> Minimizing the reprogramming could be implemented by tracking of the
> last programmed configuration somewhere in GGTT and use MI_PREDICATE
> to discard some of the programming commands, but the command streamer
> would still have to parse all the MI_LRI instructions in the
> workaround batchbuffer.
> 
> Another solution, which this change implements, is to simply disregard
> the user requested configuration for the period of time when i915/perf
> is active. There is no known issue with this apart from a performance
> penality for some media workloads that benefit from running on a
> partially powergated GPU. We already prevent RC6 from affecting the
> programming so it doesn't sound completely unreasonable to hold on
> powergating for the same reason.
> 
> v2: Leave RPCS programming in intel_lrc.c (Lionel)
> 
> v3: Update for s/union intel_sseu/struct intel_sseu/ (Lionel)
>     More to_intel_context() (Tvrtko)
>     s/dev_priv/i915/ (Tvrtko)
> 
> Tvrtko Ursulin:
> 
> v4:
>  * Rebase for make_rpcs changes.
> 
> v5:
>  * Apply OA restriction from make_rpcs directly.
> 
> v6:
>  * Rebase for context image setup changes.
> 
> v7:
>  * Move stream assignment before metric enable.
> 
> v8:
>  * Rebase.
> 
> Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 35+ messages in thread

* ✓ Fi.CI.BAT: success for Per context dynamic (sub)slice power-gating (rev4)
  2018-09-17 11:30 [PATCH v13 0/6] Per context dynamic (sub)slice power-gating Tvrtko Ursulin
                   ` (7 preceding siblings ...)
  2018-09-17 11:46 ` ✗ Fi.CI.SPARSE: " Patchwork
@ 2018-09-17 12:02 ` Patchwork
  2018-09-17 13:04 ` ✗ Fi.CI.IGT: failure " Patchwork
                   ` (5 subsequent siblings)
  14 siblings, 0 replies; 35+ messages in thread
From: Patchwork @ 2018-09-17 12:02 UTC (permalink / raw)
  To: Tvrtko Ursulin; +Cc: intel-gfx

== Series Details ==

Series: Per context dynamic (sub)slice power-gating (rev4)
URL   : https://patchwork.freedesktop.org/series/48194/
State : success

== Summary ==

= CI Bug Log - changes from CI_DRM_4833 -> Patchwork_10203 =

== Summary - SUCCESS ==

  No regressions found.

  External URL: https://patchwork.freedesktop.org/api/1.0/series/48194/revisions/4/mbox/

== Known issues ==

  Here are the changes found in Patchwork_10203 that come from known issues:

  === IGT changes ===

    ==== Issues hit ====

    igt@gem_exec_suspend@basic-s3:
      fi-skl-caroline:    NOTRUN -> INCOMPLETE (fdo#107556, fdo#104108)

    igt@kms_frontbuffer_tracking@basic:
      fi-hsw-peppy:       PASS -> DMESG-WARN (fdo#102614)

    igt@kms_pipe_crc_basic@nonblocking-crc-pipe-b-frame-sequence:
      fi-byt-clapper:     PASS -> FAIL (fdo#103191, fdo#107362)

    igt@kms_psr@primary_mmap_gtt:
      {fi-cnl-u}:         NOTRUN -> FAIL (fdo#107383) +3

    
    ==== Possible fixes ====

    igt@drv_module_reload@basic-reload:
      fi-blb-e6850:       INCOMPLETE (fdo#107718) -> PASS

    igt@drv_selftest@live_hangcheck:
      fi-glk-j4005:       INCOMPLETE (k.org#198133, fdo#103359) -> PASS

    igt@kms_pipe_crc_basic@suspend-read-crc-pipe-a:
      fi-icl-u:           INCOMPLETE (fdo#107713) -> PASS

    igt@kms_pipe_crc_basic@suspend-read-crc-pipe-b:
      fi-snb-2520m:       DMESG-FAIL (fdo#103713) -> PASS

    igt@kms_psr@primary_page_flip:
      fi-kbl-r:           FAIL (fdo#107336) -> PASS

    igt@kms_setmode@basic-clone-single-crtc:
      fi-snb-2520m:       DMESG-WARN (fdo#103713) -> PASS

    
  {name}: This element is suppressed. This means it is ignored when computing
          the status of the difference (SUCCESS, WARNING, or FAILURE).

  fdo#102614 https://bugs.freedesktop.org/show_bug.cgi?id=102614
  fdo#103191 https://bugs.freedesktop.org/show_bug.cgi?id=103191
  fdo#103359 https://bugs.freedesktop.org/show_bug.cgi?id=103359
  fdo#103713 https://bugs.freedesktop.org/show_bug.cgi?id=103713
  fdo#104108 https://bugs.freedesktop.org/show_bug.cgi?id=104108
  fdo#107336 https://bugs.freedesktop.org/show_bug.cgi?id=107336
  fdo#107362 https://bugs.freedesktop.org/show_bug.cgi?id=107362
  fdo#107383 https://bugs.freedesktop.org/show_bug.cgi?id=107383
  fdo#107556 https://bugs.freedesktop.org/show_bug.cgi?id=107556
  fdo#107713 https://bugs.freedesktop.org/show_bug.cgi?id=107713
  fdo#107718 https://bugs.freedesktop.org/show_bug.cgi?id=107718
  k.org#198133 https://bugzilla.kernel.org/show_bug.cgi?id=198133


== Participating hosts (46 -> 44) ==

  Additional (2): fi-cnl-u fi-skl-caroline 
  Missing    (4): fi-ilk-m540 fi-byt-squawks fi-bsw-cyan fi-hsw-4200u 


== Build changes ==

    * Linux: CI_DRM_4833 -> Patchwork_10203

  CI_DRM_4833: 75bb460b367a614d10b0fba220143bee42657d7e @ git://anongit.freedesktop.org/gfx-ci/linux
  IGT_4644: 0b59bb3231ab481959528c5c7b3a98762772e1b0 @ git://anongit.freedesktop.org/xorg/app/intel-gpu-tools
  Patchwork_10203: 00297a7d4c51196e972eba87810c64b51aac6f05 @ git://anongit.freedesktop.org/gfx-ci/linux


== Linux commits ==

00297a7d4c51 drm/i915/icl: Support co-existence between per-context SSEU and OA
f6f383926f56 drm/i915: Expose RPCS (SSEU) configuration to userspace
6c15d4d43452 drm/i915: Add timeline barrier support
15ae17fc2bb0 drm/i915/perf: lock powergating configuration to default when active
275b17e9ba45 drm/i915: Record the sseu configuration per-context & engine
d32c1a36efbb drm/i915/execlists: Move RPCS setup to context pin

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_10203/issues.html
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 35+ messages in thread

* ✗ Fi.CI.IGT: failure for Per context dynamic (sub)slice power-gating (rev4)
  2018-09-17 11:30 [PATCH v13 0/6] Per context dynamic (sub)slice power-gating Tvrtko Ursulin
                   ` (8 preceding siblings ...)
  2018-09-17 12:02 ` ✓ Fi.CI.BAT: success " Patchwork
@ 2018-09-17 13:04 ` Patchwork
  2018-09-18 14:06 ` ✗ Fi.CI.CHECKPATCH: warning for Per context dynamic (sub)slice power-gating (rev5) Patchwork
                   ` (4 subsequent siblings)
  14 siblings, 0 replies; 35+ messages in thread
From: Patchwork @ 2018-09-17 13:04 UTC (permalink / raw)
  To: Tvrtko Ursulin; +Cc: intel-gfx

== Series Details ==

Series: Per context dynamic (sub)slice power-gating (rev4)
URL   : https://patchwork.freedesktop.org/series/48194/
State : failure

== Summary ==

= CI Bug Log - changes from CI_DRM_4833_full -> Patchwork_10203_full =

== Summary - FAILURE ==

  Serious unknown changes coming with Patchwork_10203_full absolutely need to be
  verified manually.
  
  If you think the reported changes have nothing to do with the changes
  introduced in Patchwork_10203_full, please notify your bug team to allow them
  to document this new failure mode, which will reduce false positives in CI.

  

== Possible new issues ==

  Here are the unknown changes that may have been introduced in Patchwork_10203_full:

  === IGT changes ===

    ==== Possible regressions ====

    igt@gem_ctx_param@invalid-param-get:
      shard-apl:          PASS -> FAIL
      shard-glk:          PASS -> FAIL
      shard-snb:          PASS -> FAIL
      shard-hsw:          PASS -> FAIL
      shard-kbl:          PASS -> FAIL

    
    ==== Warnings ====

    igt@kms_vblank@pipe-b-query-forked-hang:
      shard-snb:          PASS -> SKIP +1

    
== Known issues ==

  Here are the changes found in Patchwork_10203_full that come from known issues:

  === IGT changes ===

    ==== Issues hit ====

    igt@gem_ppgtt@blt-vs-render-ctxn:
      shard-kbl:          PASS -> INCOMPLETE (fdo#106023, fdo#103665)

    igt@kms_busy@extended-pageflip-modeset-hang-oldfb-render-a:
      shard-apl:          PASS -> DMESG-WARN (fdo#107956)

    igt@perf_pmu@init-sema-vcs1:
      shard-snb:          PASS -> INCOMPLETE (fdo#105411)

    
    ==== Possible fixes ====

    igt@kms_frontbuffer_tracking@fbc-stridechange:
      shard-glk:          FAIL (fdo#103167) -> PASS

    igt@kms_setmode@basic:
      shard-kbl:          FAIL (fdo#99912) -> PASS

    igt@kms_vblank@pipe-c-ts-continuation-dpms-suspend:
      shard-kbl:          INCOMPLETE (fdo#103665) -> PASS

    
  fdo#103167 https://bugs.freedesktop.org/show_bug.cgi?id=103167
  fdo#103665 https://bugs.freedesktop.org/show_bug.cgi?id=103665
  fdo#105411 https://bugs.freedesktop.org/show_bug.cgi?id=105411
  fdo#106023 https://bugs.freedesktop.org/show_bug.cgi?id=106023
  fdo#107956 https://bugs.freedesktop.org/show_bug.cgi?id=107956
  fdo#99912 https://bugs.freedesktop.org/show_bug.cgi?id=99912


== Participating hosts (5 -> 5) ==

  No changes in participating hosts


== Build changes ==

    * Linux: CI_DRM_4833 -> Patchwork_10203

  CI_DRM_4833: 75bb460b367a614d10b0fba220143bee42657d7e @ git://anongit.freedesktop.org/gfx-ci/linux
  IGT_4644: 0b59bb3231ab481959528c5c7b3a98762772e1b0 @ git://anongit.freedesktop.org/xorg/app/intel-gpu-tools
  Patchwork_10203: 00297a7d4c51196e972eba87810c64b51aac6f05 @ git://anongit.freedesktop.org/gfx-ci/linux
  piglit_4509: fdc5a4ca11124ab8413c7988896eec4c97336694 @ git://anongit.freedesktop.org/piglit

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_10203/shards.html
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 35+ messages in thread

* [PATCH v18 5/6] drm/i915: Expose RPCS (SSEU) configuration to userspace (Gen11 only)
  2018-09-17 11:30 ` [PATCH 5/6] drm/i915: Expose RPCS (SSEU) configuration to userspace Tvrtko Ursulin
  2018-09-17 11:48   ` Chris Wilson
@ 2018-09-18 13:43   ` Tvrtko Ursulin
  1 sibling, 0 replies; 35+ messages in thread
From: Tvrtko Ursulin @ 2018-09-18 13:43 UTC (permalink / raw)
  To: Intel-gfx

From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

We want to allow userspace to reconfigure the subslice configuration on a
per context basis.

This is required for the functional requirement of shutting down non-VME
enabled sub-slices on Gen11 parts.

To do so, we expose a context parameter to allow adjustment of the RPCS
register stored within the context image (and currently not accessible via
LRI).

If the context is adjusted before first use or whilst idle, the adjustment
is for "free"; otherwise if the context is active we queue a request to do
so (using the kernel context), following all other activity by that
context, which is also marked as barrier for all following submission
against the same context.

Since the overhead of device re-configuration during context switching can
be significant, especially in multi-context workloads, we limit this new
uAPI to only support the Gen11 VME use case. In this use case either the
device is fully enabled, and exactly one slice and half of the subslices
are enabled.

Example usage:

	struct drm_i915_gem_context_param_sseu sseu = { };
	struct drm_i915_gem_context_param arg =
		{ .param = I915_CONTEXT_PARAM_SSEU,
		  .ctx_id = gem_context_create(fd),
		  .size = sizeof(sseu),
		  .value = to_user_pointer(&sseu)
		};

	/* Query device defaults. */
	gem_context_get_param(fd, &arg);

	/* Set VME configuration on a 1x6x8 part. */
	sseu.slice_mask = 0x1;
	sseu.subslice_mask = 0xe0;
	gem_context_set_param(fd, &arg);

v2: Fix offset of CTX_R_PWR_CLK_STATE in intel_lr_context_set_sseu() (Lionel)

v3: Add ability to program this per engine (Chris)

v4: Move most get_sseu() into i915_gem_context.c (Lionel)

v5: Validate sseu configuration against the device's capabilities (Lionel)

v6: Change context powergating settings through MI_SDM on kernel context (Chris)

v7: Synchronize the requests following a powergating setting change using a global
    dependency (Chris)
    Iterate timelines through dev_priv.gt.active_rings (Tvrtko)
    Disable RPCS configuration setting for non capable users (Lionel/Tvrtko)

v8: s/union intel_sseu/struct intel_sseu/ (Lionel)
    s/dev_priv/i915/ (Tvrtko)
    Change uapi class/instance fields to u16 (Tvrtko)
    Bump mask fields to 64bits (Lionel)
    Don't return EPERM when dynamic sseu is disabled (Tvrtko)

v9: Import context image into kernel context's ppgtt only when
    reconfiguring powergated slice/subslices (Chris)
    Use aliasing ppgtt when needed (Michel)

Tvrtko Ursulin:

v10:
 * Update for upstream changes.
 * Request submit needs a RPM reference.
 * Reject on !FULL_PPGTT for simplicity.
 * Pull out get/set param to helpers for readability and less indent.
 * Use i915_request_await_dma_fence in add_global_barrier to skip waits
   on the same timeline and avoid GEM_BUG_ON.
 * No need to explicitly assign a NULL pointer to engine in legacy mode.
 * No need to move gen8_make_rpcs up.
 * Factored out global barrier as prep patch.
 * Allow to only CAP_SYS_ADMIN if !Gen11.

v11:
 * Remove engine vfunc in favour of local helper. (Chris Wilson)
 * Stop retiring requests before updates since it is not needed
   (Chris Wilson)
 * Implement direct CPU update path for idle contexts. (Chris Wilson)
 * Left side dependency needs only be on the same context timeline.
   (Chris Wilson)
 * It is sufficient to order the timeline. (Chris Wilson)
 * Reject !RCS configuration attempts with -ENODEV for now.

v12:
 * Rebase for make_rpcs.

v13:
 * Centralize SSEU normalization to make_rpcs.
 * Type width checking (uAPI <-> implementation).
 * Gen11 restrictions uAPI checks.
 * Gen11 subslice count differences handling.
 Chris Wilson:
 * args->size handling fixes.
 * Update context image from GGTT.
 * Postpone context image update to pinning.
 * Use i915_gem_active_raw instead of last_request_on_engine.

v14:
 * Add activity tracker on intel_context to fix the lifetime issues
   and simplify the code. (Chris Wilson)

v15:
 * Fix context pin leak if no space in ring by simplifying the
   context pinning sequence.

v16:
 * Rebase for context get/set param locking changes.
 * Just -ENODEV on !Gen11. (Joonas)

v17:
 * Fix one Gen11 subslice enablement rule.
 * Handle error from i915_sw_fence_await_sw_fence_gfp. (Chris Wilson)

v18:
 * Update commit message. (Joonas)
 * Restrict uAPI to VME use case. (Joonas)

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=100899
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=107634
Issue: https://github.com/intel/media-driver/issues/267
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Cc: Dmitry Rogozhkin <dmitry.v.rogozhkin@intel.com>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: Zhipeng Gong <zhipeng.gong@intel.com>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk> # v17
---
 drivers/gpu/drm/i915/i915_gem_context.c | 340 +++++++++++++++++++++++-
 drivers/gpu/drm/i915/i915_gem_context.h |   6 +
 drivers/gpu/drm/i915/intel_lrc.c        |   4 +-
 include/uapi/drm/i915_drm.h             |  43 +++
 4 files changed, 390 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
index 0b8cc748648b..aee8f8392ef7 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -90,6 +90,7 @@
 #include <drm/i915_drm.h>
 #include "i915_drv.h"
 #include "i915_trace.h"
+#include "intel_lrc_reg.h"
 #include "intel_workarounds.h"
 
 #define ALL_L3_SLICES(dev) (1 << NUM_L3_SLICES(dev)) - 1
@@ -322,6 +323,14 @@ static u32 default_desc_template(const struct drm_i915_private *i915,
 	return desc;
 }
 
+static void intel_context_retire(struct i915_gem_active *active,
+				 struct i915_request *rq)
+{
+	struct intel_context *ce = container_of(active, typeof(*ce), active);
+
+	intel_context_unpin(ce);
+}
+
 static struct i915_gem_context *
 __create_hw_context(struct drm_i915_private *dev_priv,
 		    struct drm_i915_file_private *file_priv)
@@ -345,6 +354,8 @@ __create_hw_context(struct drm_i915_private *dev_priv,
 		ce->gem_context = ctx;
 		/* Use the whole device by default */
 		ce->sseu = intel_device_default_sseu(dev_priv);
+
+		init_request_active(&ce->active, intel_context_retire);
 	}
 
 	INIT_RADIX_TREE(&ctx->handles_vma, GFP_KERNEL);
@@ -846,6 +857,56 @@ int i915_gem_context_destroy_ioctl(struct drm_device *dev, void *data,
 	return 0;
 }
 
+static int get_sseu(struct i915_gem_context *ctx,
+		    struct drm_i915_gem_context_param *args)
+{
+	struct drm_i915_gem_context_param_sseu user_sseu;
+	struct intel_engine_cs *engine;
+	struct intel_context *ce;
+	int ret;
+
+	if (args->size == 0)
+		goto out;
+	else if (args->size < sizeof(user_sseu))
+		return -EINVAL;
+
+	if (copy_from_user(&user_sseu, u64_to_user_ptr(args->value),
+			   sizeof(user_sseu)))
+		return -EFAULT;
+
+	if (user_sseu.rsvd1 || user_sseu.rsvd2)
+		return -EINVAL;
+
+	engine = intel_engine_lookup_user(ctx->i915,
+					  user_sseu.class,
+					  user_sseu.instance);
+	if (!engine)
+		return -EINVAL;
+
+	/* Only use for mutex here is to serialize get_param and set_param. */
+	ret = mutex_lock_interruptible(&ctx->i915->drm.struct_mutex);
+	if (ret)
+		return ret;
+
+	ce = to_intel_context(ctx, engine);
+
+	user_sseu.slice_mask = ce->sseu.slice_mask;
+	user_sseu.subslice_mask = ce->sseu.subslice_mask;
+	user_sseu.min_eus_per_subslice = ce->sseu.min_eus_per_subslice;
+	user_sseu.max_eus_per_subslice = ce->sseu.max_eus_per_subslice;
+
+	mutex_unlock(&ctx->i915->drm.struct_mutex);
+
+	if (copy_to_user(u64_to_user_ptr(args->value), &user_sseu,
+			 sizeof(user_sseu)))
+		return -EFAULT;
+
+out:
+	args->size = sizeof(user_sseu);
+
+	return 0;
+}
+
 int i915_gem_context_getparam_ioctl(struct drm_device *dev, void *data,
 				    struct drm_file *file)
 {
@@ -858,15 +919,17 @@ int i915_gem_context_getparam_ioctl(struct drm_device *dev, void *data,
 	if (!ctx)
 		return -ENOENT;
 
-	args->size = 0;
 	switch (args->param) {
 	case I915_CONTEXT_PARAM_BAN_PERIOD:
 		ret = -EINVAL;
 		break;
 	case I915_CONTEXT_PARAM_NO_ZEROMAP:
+		args->size = 0;
 		args->value = test_bit(UCONTEXT_NO_ZEROMAP, &ctx->user_flags);
 		break;
 	case I915_CONTEXT_PARAM_GTT_SIZE:
+		args->size = 0;
+
 		if (ctx->ppgtt)
 			args->value = ctx->ppgtt->vm.total;
 		else if (to_i915(dev)->mm.aliasing_ppgtt)
@@ -875,14 +938,20 @@ int i915_gem_context_getparam_ioctl(struct drm_device *dev, void *data,
 			args->value = to_i915(dev)->ggtt.vm.total;
 		break;
 	case I915_CONTEXT_PARAM_NO_ERROR_CAPTURE:
+		args->size = 0;
 		args->value = i915_gem_context_no_error_capture(ctx);
 		break;
 	case I915_CONTEXT_PARAM_BANNABLE:
+		args->size = 0;
 		args->value = i915_gem_context_is_bannable(ctx);
 		break;
 	case I915_CONTEXT_PARAM_PRIORITY:
+		args->size = 0;
 		args->value = ctx->sched.priority;
 		break;
+	case I915_CONTEXT_PARAM_SSEU:
+		ret = get_sseu(ctx, args);
+		break;
 	default:
 		ret = -EINVAL;
 		break;
@@ -892,6 +961,271 @@ int i915_gem_context_getparam_ioctl(struct drm_device *dev, void *data,
 	return ret;
 }
 
+static int gen8_emit_rpcs_config(struct i915_request *rq,
+				 struct intel_context *ce,
+				 struct intel_sseu sseu)
+{
+	u64 offset;
+	u32 *cs;
+
+	cs = intel_ring_begin(rq, 4);
+	if (IS_ERR(cs))
+		return PTR_ERR(cs);
+
+	offset = ce->state->node.start +
+		LRC_STATE_PN * PAGE_SIZE +
+		(CTX_R_PWR_CLK_STATE + 1) * 4;
+
+	*cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
+	*cs++ = lower_32_bits(offset);
+	*cs++ = upper_32_bits(offset);
+	*cs++ = gen8_make_rpcs(rq->i915, &sseu);
+
+	intel_ring_advance(rq, cs);
+
+	return 0;
+}
+
+static int
+gen8_modify_rpcs_gpu(struct intel_context *ce,
+		     struct intel_engine_cs *engine,
+		     struct intel_sseu sseu)
+{
+	struct drm_i915_private *i915 = engine->i915;
+	struct i915_request *rq, *prev;
+	int ret;
+
+	GEM_BUG_ON(!ce->pin_count);
+
+	lockdep_assert_held(&i915->drm.struct_mutex);
+
+	/* Submitting requests etc needs the hw awake. */
+	intel_runtime_pm_get(i915);
+
+	rq = i915_request_alloc(engine, i915->kernel_context);
+	if (IS_ERR(rq)) {
+		ret = PTR_ERR(rq);
+		goto out_put;
+	}
+
+	/* Queue this switch after all other activity by this context. */
+	prev = i915_gem_active_raw(&ce->ring->timeline->last_request,
+				   &i915->drm.struct_mutex);
+	if (prev && !i915_request_completed(prev)) {
+		ret = i915_sw_fence_await_sw_fence_gfp(&rq->submit,
+						       &prev->submit,
+						       I915_FENCE_GFP);
+		if (ret < 0)
+			goto out_add;
+	}
+
+	ret = gen8_emit_rpcs_config(rq, ce, sseu);
+	if (ret)
+		goto out_add;
+
+	/* Order all following requests to be after. */
+	i915_timeline_set_barrier(ce->ring->timeline, rq);
+
+	/*
+	 * Guarantee context image and the timeline remains pinned until the
+	 * modifying request is retired by setting the ce activity tracker.
+	 *
+	 * But we only need to take one pin on the account of it. Or in other
+	 * words transfer the pinned ce object to tracked active request.
+	 */
+	if (!i915_gem_active_isset(&ce->active))
+		__intel_context_pin(ce);
+	i915_gem_active_set(&ce->active, rq);
+
+out_add:
+	i915_request_add(rq);
+out_put:
+	intel_runtime_pm_put(i915);
+
+	return ret;
+}
+
+static int
+i915_gem_context_reconfigure_sseu(struct i915_gem_context *ctx,
+				  struct intel_engine_cs *engine,
+				  struct intel_sseu sseu)
+{
+	struct intel_context *ce = to_intel_context(ctx, engine);
+	int ret;
+
+	GEM_BUG_ON(INTEL_GEN(ctx->i915) < 8);
+	GEM_BUG_ON(engine->id != RCS);
+
+	ret = mutex_lock_interruptible(&ctx->i915->drm.struct_mutex);
+	if (ret)
+		return ret;
+
+	/* Nothing to do if unmodified. */
+	if (!memcmp(&ce->sseu, &sseu, sizeof(sseu)))
+		goto out;
+
+	/*
+	 * If context is not idle we have to submit an ordered request to modify
+	 * its context image via the kernel context. Pristine and idle contexts
+	 * will be configured on pinning.
+	 */
+	if (ce->pin_count)
+		ret = gen8_modify_rpcs_gpu(ce, engine, sseu);
+
+	if (!ret)
+		ce->sseu = sseu;
+
+out:
+	mutex_unlock(&ctx->i915->drm.struct_mutex);
+
+	return ret;
+}
+
+static int
+user_to_context_sseu(struct drm_i915_private *i915,
+		     const struct drm_i915_gem_context_param_sseu *user,
+		     struct intel_sseu *context)
+{
+	const struct sseu_dev_info *device = &INTEL_INFO(i915)->sseu;
+
+	/* No zeros in any field. */
+	if (!user->slice_mask || !user->subslice_mask ||
+	    !user->min_eus_per_subslice || !user->max_eus_per_subslice)
+		return -EINVAL;
+
+	/* Max > min. */
+	if (user->max_eus_per_subslice < user->min_eus_per_subslice)
+		return -EINVAL;
+
+	/* Check validity against hardware. */
+	if (user->slice_mask & ~device->slice_mask)
+		return -EINVAL;
+
+	if (user->subslice_mask & ~device->subslice_mask[0])
+		return -EINVAL;
+
+	if (user->max_eus_per_subslice > device->max_eus_per_subslice)
+		return -EINVAL;
+
+	/*
+	 * Some future proofing on the types since the uAPI is wider than the
+	 * current internal implementation.
+	 */
+	if (WARN_ON((fls(user->slice_mask) >
+		     sizeof(context->slice_mask) * BITS_PER_BYTE) ||
+		    (fls(user->subslice_mask) >
+		     sizeof(context->subslice_mask) * BITS_PER_BYTE) ||
+		    overflows_type(user->min_eus_per_subslice,
+				   context->min_eus_per_subslice) ||
+		    overflows_type(user->max_eus_per_subslice,
+				   context->max_eus_per_subslice)))
+		return -EINVAL;
+
+	context->slice_mask = user->slice_mask;
+	context->subslice_mask = user->subslice_mask;
+	context->min_eus_per_subslice = user->min_eus_per_subslice;
+	context->max_eus_per_subslice = user->max_eus_per_subslice;
+
+	/* Part specific restrictions. */
+	if (IS_GEN11(i915)) {
+		unsigned int hw_s = hweight8(device->slice_mask);
+		unsigned int hw_ss_per_s = hweight8(device->subslice_mask[0]);
+		unsigned int req_s = hweight8(context->slice_mask);
+		unsigned int req_ss = hweight8(context->subslice_mask);
+
+		/*
+		 * Only full subslice enablement is possible if more than one
+		 * slice is turned on.
+		 */
+		if (req_s > 1 && req_ss != hw_ss_per_s)
+			return -EINVAL;
+
+		/*
+		 * If more than four (SScount bitfield limit) subslices are
+		 * requested then the number has to be even.
+		 */
+		if (req_ss > 4 && (req_ss & 1))
+			return -EINVAL;
+
+		/*
+		 * If only one slice is enabled and subslice count is below the
+		 * device full enablement, it must be at most half of the all
+		 * available subslices.
+		 */
+		if (req_s == 1 && req_ss < hw_ss_per_s &&
+		    req_ss > (hw_ss_per_s / 2))
+			return -EINVAL;
+
+		/* ABI restriction - VME use case only. */
+
+		/* All slices or one slice only. */
+		if (req_s != 1 && req_s != hw_s)
+			return -EINVAL;
+
+		/*
+		 * Half subslices or full enablement only when one slice is
+		 * enabled.
+		 */
+		if (req_s == 1 &&
+		    (req_ss != hw_ss_per_s && req_ss != (hw_ss_per_s / 2)))
+			return -EINVAL;
+
+		/* No EU configuration changes. */
+		if ((user->min_eus_per_subslice !=
+		     device->max_eus_per_subslice) ||
+		    (user->max_eus_per_subslice !=
+		     device->max_eus_per_subslice))
+			return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int set_sseu(struct i915_gem_context *ctx,
+		    struct drm_i915_gem_context_param *args)
+{
+	struct drm_i915_private *i915 = ctx->i915;
+	struct drm_i915_gem_context_param_sseu user_sseu;
+	struct intel_engine_cs *engine;
+	struct intel_sseu sseu;
+	int ret;
+
+	if (args->size < sizeof(user_sseu))
+		return -EINVAL;
+
+	if (!IS_GEN11(i915))
+		return -ENODEV;
+
+	if (copy_from_user(&user_sseu, u64_to_user_ptr(args->value),
+			   sizeof(user_sseu)))
+		return -EFAULT;
+
+	if (user_sseu.rsvd1 || user_sseu.rsvd2)
+		return -EINVAL;
+
+	engine = intel_engine_lookup_user(i915,
+					  user_sseu.class,
+					  user_sseu.instance);
+	if (!engine)
+		return -EINVAL;
+
+	/* Only render engine supports RPCS configuration. */
+	if (engine->class != RENDER_CLASS)
+		return -ENODEV;
+
+	ret = user_to_context_sseu(i915, &user_sseu, &sseu);
+	if (ret)
+		return ret;
+
+	ret = i915_gem_context_reconfigure_sseu(ctx, engine, sseu);
+	if (ret)
+		return ret;
+
+	args->size = sizeof(user_sseu);
+
+	return 0;
+}
+
 int i915_gem_context_setparam_ioctl(struct drm_device *dev, void *data,
 				    struct drm_file *file)
 {
@@ -953,7 +1287,9 @@ int i915_gem_context_setparam_ioctl(struct drm_device *dev, void *data,
 				ctx->sched.priority = priority;
 		}
 		break;
-
+	case I915_CONTEXT_PARAM_SSEU:
+		ret = set_sseu(ctx, args);
+		break;
 	default:
 		ret = -EINVAL;
 		break;
diff --git a/drivers/gpu/drm/i915/i915_gem_context.h b/drivers/gpu/drm/i915/i915_gem_context.h
index 7510de738b35..c4aebfe7d4d2 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.h
+++ b/drivers/gpu/drm/i915/i915_gem_context.h
@@ -170,6 +170,12 @@ struct i915_gem_context {
 		u64 lrc_desc;
 		int pin_count;
 
+		/**
+		 * active: Active tracker for the external rq activity on this
+		 * intel_context object.
+		 */
+		struct i915_gem_active active;
+
 		const struct intel_context_ops *ops;
 
 		/** sseu: Control eu/slice partitioning */
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index cf2e0848fa55..b5603e977a3f 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -2557,7 +2557,9 @@ u32 gen8_make_rpcs(struct drm_i915_private *i915, struct intel_sseu *req_sseu)
 	 * subslices are enabled, or a count between one and four on the first
 	 * slice.
 	 */
-	if (IS_GEN11(i915) && slices == 1 && subslices >= 4) {
+	if (IS_GEN11(i915) &&
+	    slices == 1 &&
+	    subslices > min_t(u8, 4, hweight8(sseu->subslice_mask[0]) / 2)) {
 		GEM_BUG_ON(subslices & 1);
 
 		subslice_pg = false;
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index a4446f452040..e195c38b15a6 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -1478,9 +1478,52 @@ struct drm_i915_gem_context_param {
 #define   I915_CONTEXT_MAX_USER_PRIORITY	1023 /* inclusive */
 #define   I915_CONTEXT_DEFAULT_PRIORITY		0
 #define   I915_CONTEXT_MIN_USER_PRIORITY	-1023 /* inclusive */
+	/*
+	 * When using the following param, value should be a pointer to
+	 * drm_i915_gem_context_param_sseu.
+	 */
+#define I915_CONTEXT_PARAM_SSEU		0x7
 	__u64 value;
 };
 
+struct drm_i915_gem_context_param_sseu {
+	/*
+	 * Engine class & instance to be configured or queried.
+	 */
+	__u16 class;
+	__u16 instance;
+
+	/*
+	 * Unused for now. Must be cleared to zero.
+	 */
+	__u32 rsvd1;
+
+	/*
+	 * Mask of slices to enable for the context. Valid values are a subset
+	 * of the bitmask value returned for I915_PARAM_SLICE_MASK.
+	 */
+	__u64 slice_mask;
+
+	/*
+	 * Mask of subslices to enable for the context. Valid values are a
+	 * subset of the bitmask value return by I915_PARAM_SUBSLICE_MASK.
+	 */
+	__u64 subslice_mask;
+
+	/*
+	 * Minimum/Maximum number of EUs to enable per subslice for the
+	 * context. min_eus_per_subslice must be inferior or equal to
+	 * max_eus_per_subslice.
+	 */
+	__u16 min_eus_per_subslice;
+	__u16 max_eus_per_subslice;
+
+	/*
+	 * Unused for now. Must be cleared to zero.
+	 */
+	__u32 rsvd2;
+};
+
 enum drm_i915_oa_format {
 	I915_OA_FORMAT_A13 = 1,	    /* HSW only */
 	I915_OA_FORMAT_A29,	    /* HSW only */
-- 
2.17.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 35+ messages in thread

* ✗ Fi.CI.CHECKPATCH: warning for Per context dynamic (sub)slice power-gating (rev5)
  2018-09-17 11:30 [PATCH v13 0/6] Per context dynamic (sub)slice power-gating Tvrtko Ursulin
                   ` (9 preceding siblings ...)
  2018-09-17 13:04 ` ✗ Fi.CI.IGT: failure " Patchwork
@ 2018-09-18 14:06 ` Patchwork
  2018-09-18 14:09 ` ✗ Fi.CI.SPARSE: " Patchwork
                   ` (3 subsequent siblings)
  14 siblings, 0 replies; 35+ messages in thread
From: Patchwork @ 2018-09-18 14:06 UTC (permalink / raw)
  To: Tvrtko Ursulin; +Cc: intel-gfx

== Series Details ==

Series: Per context dynamic (sub)slice power-gating (rev5)
URL   : https://patchwork.freedesktop.org/series/48194/
State : warning

== Summary ==

$ dim checkpatch origin/drm-tip
1edac7cf7ca4 drm/i915/execlists: Move RPCS setup to context pin
f72f696a198a drm/i915: Record the sseu configuration per-context & engine
fd0031f4d6e0 drm/i915/perf: lock powergating configuration to default when active
038123e5663c drm/i915: Add timeline barrier support
95b770166b65 drm/i915: Expose RPCS (SSEU) configuration to userspace (Gen11 only)
-:47: WARNING:COMMIT_LOG_LONG_LINE: Possible unwrapped commit description (prefer a maximum 75 chars per line)
#47: 
v2: Fix offset of CTX_R_PWR_CLK_STATE in intel_lr_context_set_sseu() (Lionel)

-:489: CHECK:UNNECESSARY_PARENTHESES: Unnecessary parentheses around 'user->min_eus_per_subslice !=
 		     device->max_eus_per_subslice'
#489: FILE: drivers/gpu/drm/i915/i915_gem_context.c:1174:
+		if ((user->min_eus_per_subslice !=
+		     device->max_eus_per_subslice) ||
+		    (user->max_eus_per_subslice !=
+		     device->max_eus_per_subslice))

-:489: CHECK:UNNECESSARY_PARENTHESES: Unnecessary parentheses around 'user->max_eus_per_subslice !=
 		     device->max_eus_per_subslice'
#489: FILE: drivers/gpu/drm/i915/i915_gem_context.c:1174:
+		if ((user->min_eus_per_subslice !=
+		     device->max_eus_per_subslice) ||
+		    (user->max_eus_per_subslice !=
+		     device->max_eus_per_subslice))

total: 0 errors, 1 warnings, 2 checks, 478 lines checked
170dc663d586 drm/i915/icl: Support co-existence between per-context SSEU and OA

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 35+ messages in thread

* ✗ Fi.CI.SPARSE: warning for Per context dynamic (sub)slice power-gating (rev5)
  2018-09-17 11:30 [PATCH v13 0/6] Per context dynamic (sub)slice power-gating Tvrtko Ursulin
                   ` (10 preceding siblings ...)
  2018-09-18 14:06 ` ✗ Fi.CI.CHECKPATCH: warning for Per context dynamic (sub)slice power-gating (rev5) Patchwork
@ 2018-09-18 14:09 ` Patchwork
  2018-09-18 14:25 ` ✓ Fi.CI.BAT: success " Patchwork
                   ` (2 subsequent siblings)
  14 siblings, 0 replies; 35+ messages in thread
From: Patchwork @ 2018-09-18 14:09 UTC (permalink / raw)
  To: Tvrtko Ursulin; +Cc: intel-gfx

== Series Details ==

Series: Per context dynamic (sub)slice power-gating (rev5)
URL   : https://patchwork.freedesktop.org/series/48194/
State : warning

== Summary ==

$ dim sparse origin/drm-tip
Commit: drm/i915/execlists: Move RPCS setup to context pin
Okay!

Commit: drm/i915: Record the sseu configuration per-context & engine
-drivers/gpu/drm/i915/selftests/../i915_drv.h:3718:16: warning: expression using sizeof(void)
+drivers/gpu/drm/i915/selftests/../i915_drv.h:3732:16: warning: expression using sizeof(void)

Commit: drm/i915/perf: lock powergating configuration to default when active
Okay!

Commit: drm/i915: Add timeline barrier support
Okay!

Commit: drm/i915: Expose RPCS (SSEU) configuration to userspace (Gen11 only)
+drivers/gpu/drm/i915/intel_lrc.c:2562:25: warning: expression using sizeof(void)

Commit: drm/i915/icl: Support co-existence between per-context SSEU and OA
Okay!

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 35+ messages in thread

* ✓ Fi.CI.BAT: success for Per context dynamic (sub)slice power-gating (rev5)
  2018-09-17 11:30 [PATCH v13 0/6] Per context dynamic (sub)slice power-gating Tvrtko Ursulin
                   ` (11 preceding siblings ...)
  2018-09-18 14:09 ` ✗ Fi.CI.SPARSE: " Patchwork
@ 2018-09-18 14:25 ` Patchwork
  2018-09-18 16:05 ` ✗ Fi.CI.IGT: failure " Patchwork
  2018-10-01 16:35 ` ✗ Fi.CI.BAT: failure for Per context dynamic (sub)slice power-gating (rev6) Patchwork
  14 siblings, 0 replies; 35+ messages in thread
From: Patchwork @ 2018-09-18 14:25 UTC (permalink / raw)
  To: Tvrtko Ursulin; +Cc: intel-gfx

== Series Details ==

Series: Per context dynamic (sub)slice power-gating (rev5)
URL   : https://patchwork.freedesktop.org/series/48194/
State : success

== Summary ==

= CI Bug Log - changes from CI_DRM_4836 -> Patchwork_10212 =

== Summary - SUCCESS ==

  No regressions found.

  External URL: https://patchwork.freedesktop.org/api/1.0/series/48194/revisions/5/mbox/

== Known issues ==

  Here are the changes found in Patchwork_10212 that come from known issues:

  === IGT changes ===

    ==== Issues hit ====

    igt@drv_module_reload@basic-reload:
      fi-ilk-650:         PASS -> DMESG-WARN (fdo#106387)

    igt@drv_selftest@live_hangcheck:
      fi-cfl-guc:         PASS -> DMESG-FAIL (fdo#107710)

    igt@drv_selftest@mock_hugepages:
      fi-bwr-2160:        PASS -> DMESG-FAIL (fdo#107930)

    igt@gem_exec_suspend@basic-s3:
      fi-bdw-samus:       PASS -> INCOMPLETE (fdo#107773)

    igt@kms_frontbuffer_tracking@basic:
      fi-byt-clapper:     NOTRUN -> FAIL (fdo#103167)

    igt@kms_pipe_crc_basic@suspend-read-crc-pipe-a:
      fi-byt-clapper:     NOTRUN -> INCOMPLETE (fdo#102657)

    igt@kms_psr@primary_page_flip:
      fi-kbl-r:           PASS -> FAIL (fdo#107336)

    
    ==== Possible fixes ====

    igt@drv_getparams_basic@basic-subslice-total:
      fi-snb-2520m:       DMESG-WARN (fdo#103713) -> PASS +9

    igt@drv_module_reload@basic-reload:
      fi-blb-e6850:       INCOMPLETE (fdo#107718) -> PASS

    igt@gem_mmap_gtt@basic-read-write:
      fi-glk-dsi:         INCOMPLETE (fdo#103359, k.org#198133) -> PASS

    igt@prime_vgem@basic-fence-flip:
      fi-ilk-650:         FAIL (fdo#104008) -> PASS

    
  fdo#102657 https://bugs.freedesktop.org/show_bug.cgi?id=102657
  fdo#103167 https://bugs.freedesktop.org/show_bug.cgi?id=103167
  fdo#103359 https://bugs.freedesktop.org/show_bug.cgi?id=103359
  fdo#103713 https://bugs.freedesktop.org/show_bug.cgi?id=103713
  fdo#104008 https://bugs.freedesktop.org/show_bug.cgi?id=104008
  fdo#106387 https://bugs.freedesktop.org/show_bug.cgi?id=106387
  fdo#107336 https://bugs.freedesktop.org/show_bug.cgi?id=107336
  fdo#107710 https://bugs.freedesktop.org/show_bug.cgi?id=107710
  fdo#107718 https://bugs.freedesktop.org/show_bug.cgi?id=107718
  fdo#107773 https://bugs.freedesktop.org/show_bug.cgi?id=107773
  fdo#107930 https://bugs.freedesktop.org/show_bug.cgi?id=107930
  k.org#198133 https://bugzilla.kernel.org/show_bug.cgi?id=198133


== Participating hosts (47 -> 45) ==

  Additional (3): fi-bsw-kefka fi-byt-clapper fi-elk-e7500 
  Missing    (5): fi-hsw-4770r fi-ctg-p8600 fi-byt-squawks fi-bsw-cyan fi-ilk-m540 


== Build changes ==

    * Linux: CI_DRM_4836 -> Patchwork_10212

  CI_DRM_4836: b2b0444aa439ade1ed809a91a19d382fbb5e7700 @ git://anongit.freedesktop.org/gfx-ci/linux
  IGT_4645: 03b90a39ed12a568c9da752466ea708d6348e110 @ git://anongit.freedesktop.org/xorg/app/intel-gpu-tools
  Patchwork_10212: 170dc663d5861cbc82f0b2d4ebf5884a154d3671 @ git://anongit.freedesktop.org/gfx-ci/linux


== Linux commits ==

170dc663d586 drm/i915/icl: Support co-existence between per-context SSEU and OA
95b770166b65 drm/i915: Expose RPCS (SSEU) configuration to userspace (Gen11 only)
038123e5663c drm/i915: Add timeline barrier support
fd0031f4d6e0 drm/i915/perf: lock powergating configuration to default when active
f72f696a198a drm/i915: Record the sseu configuration per-context & engine
1edac7cf7ca4 drm/i915/execlists: Move RPCS setup to context pin

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_10212/issues.html
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 35+ messages in thread

* ✗ Fi.CI.IGT: failure for Per context dynamic (sub)slice power-gating (rev5)
  2018-09-17 11:30 [PATCH v13 0/6] Per context dynamic (sub)slice power-gating Tvrtko Ursulin
                   ` (12 preceding siblings ...)
  2018-09-18 14:25 ` ✓ Fi.CI.BAT: success " Patchwork
@ 2018-09-18 16:05 ` Patchwork
  2018-10-01 16:35 ` ✗ Fi.CI.BAT: failure for Per context dynamic (sub)slice power-gating (rev6) Patchwork
  14 siblings, 0 replies; 35+ messages in thread
From: Patchwork @ 2018-09-18 16:05 UTC (permalink / raw)
  To: Tvrtko Ursulin; +Cc: intel-gfx

== Series Details ==

Series: Per context dynamic (sub)slice power-gating (rev5)
URL   : https://patchwork.freedesktop.org/series/48194/
State : failure

== Summary ==

= CI Bug Log - changes from CI_DRM_4836_full -> Patchwork_10212_full =

== Summary - FAILURE ==

  Serious unknown changes coming with Patchwork_10212_full absolutely need to be
  verified manually.
  
  If you think the reported changes have nothing to do with the changes
  introduced in Patchwork_10212_full, please notify your bug team to allow them
  to document this new failure mode, which will reduce false positives in CI.

  

== Possible new issues ==

  Here are the unknown changes that may have been introduced in Patchwork_10212_full:

  === IGT changes ===

    ==== Possible regressions ====

    igt@gem_ctx_param@invalid-param-get:
      shard-apl:          PASS -> FAIL
      shard-glk:          PASS -> FAIL
      shard-snb:          PASS -> FAIL
      shard-hsw:          PASS -> FAIL
      shard-kbl:          PASS -> FAIL

    
    ==== Warnings ====

    igt@pm_rc6_residency@rc6-accuracy:
      shard-snb:          PASS -> SKIP

    
== Known issues ==

  Here are the changes found in Patchwork_10212_full that come from known issues:

  === IGT changes ===

    ==== Issues hit ====

    igt@gem_gtt_cpu_tlb:
      shard-glk:          PASS -> DMESG-WARN (fdo#105763, fdo#106538) +1

    igt@kms_busy@extended-pageflip-hang-newfb-render-a:
      shard-glk:          PASS -> DMESG-WARN (fdo#107956)

    igt@kms_busy@extended-pageflip-modeset-hang-oldfb-render-a:
      shard-apl:          PASS -> DMESG-WARN (fdo#107956)

    igt@kms_setmode@basic:
      shard-apl:          PASS -> FAIL (fdo#99912)
      shard-kbl:          PASS -> FAIL (fdo#99912)

    
    ==== Possible fixes ====

    igt@kms_frontbuffer_tracking@fbc-2p-indfb-fliptrack:
      shard-glk:          FAIL (fdo#103167) -> PASS

    igt@pm_rpm@pm-tiling:
      shard-apl:          DMESG-WARN (fdo#103558, fdo#105602) -> PASS +1

    
  fdo#103167 https://bugs.freedesktop.org/show_bug.cgi?id=103167
  fdo#103558 https://bugs.freedesktop.org/show_bug.cgi?id=103558
  fdo#105602 https://bugs.freedesktop.org/show_bug.cgi?id=105602
  fdo#105763 https://bugs.freedesktop.org/show_bug.cgi?id=105763
  fdo#106538 https://bugs.freedesktop.org/show_bug.cgi?id=106538
  fdo#107956 https://bugs.freedesktop.org/show_bug.cgi?id=107956
  fdo#99912 https://bugs.freedesktop.org/show_bug.cgi?id=99912


== Participating hosts (5 -> 5) ==

  No changes in participating hosts


== Build changes ==

    * Linux: CI_DRM_4836 -> Patchwork_10212

  CI_DRM_4836: b2b0444aa439ade1ed809a91a19d382fbb5e7700 @ git://anongit.freedesktop.org/gfx-ci/linux
  IGT_4645: 03b90a39ed12a568c9da752466ea708d6348e110 @ git://anongit.freedesktop.org/xorg/app/intel-gpu-tools
  Patchwork_10212: 170dc663d5861cbc82f0b2d4ebf5884a154d3671 @ git://anongit.freedesktop.org/gfx-ci/linux
  piglit_4509: fdc5a4ca11124ab8413c7988896eec4c97336694 @ git://anongit.freedesktop.org/piglit

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_10212/shards.html
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [PATCH 6/6] drm/i915/icl: Support co-existence between per-context SSEU and OA
  2018-09-17 11:30 ` [PATCH 6/6] drm/i915/icl: Support co-existence between per-context SSEU and OA Tvrtko Ursulin
@ 2018-10-01  8:49   ` Tvrtko Ursulin
  2018-10-01  9:50   ` Lionel Landwerlin
  1 sibling, 0 replies; 35+ messages in thread
From: Tvrtko Ursulin @ 2018-10-01  8:49 UTC (permalink / raw)
  To: Tvrtko Ursulin, Intel-gfx



Hi,

One final unreviewed patch on this series. Hopefully uncontroversial 
enough for a quick review?

Thanks,

Tvrtko

On 17/09/2018 12:30, Tvrtko Ursulin wrote:
> From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> 
> When OA is active we want to lock the powergating configuration, but on
> Icelake, users like the media stack will have issues if we lock to the
> full device configuration.
> 
> Instead lock to a subset of (sub)slices which are currently a known
> working configuration for all users.
> 
> v2:
>   * Fix commit message spelling.
> 
> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> Cc: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
> ---
>   drivers/gpu/drm/i915/intel_lrc.c | 25 ++++++++++++++++++++-----
>   1 file changed, 20 insertions(+), 5 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> index b5603e977a3f..cded1f1d9ec2 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/intel_lrc.c
> @@ -2521,13 +2521,28 @@ u32 gen8_make_rpcs(struct drm_i915_private *i915, struct intel_sseu *req_sseu)
>   
>   	/*
>   	 * If i915/perf is active, we want a stable powergating configuration
> -	 * on the system. The most natural configuration to take in that case
> -	 * is the default (i.e maximum the hardware can do).
> +	 * on the system.
> +	 *
> +	 * We could choose full enablement, but on ICL we know there are use
> +	 * cases which disable slices for functional, apart for performance
> +	 * reasons. So in this case we select a known stable subset.
>   	 */
> -	if (unlikely(i915->perf.oa.exclusive_stream))
> -		ctx_sseu = intel_device_default_sseu(i915);
> -	else
> +	if (!i915->perf.oa.exclusive_stream) {
>   		ctx_sseu = *req_sseu;
> +	} else {
> +		ctx_sseu = intel_device_default_sseu(i915);
> +
> +		if (IS_GEN11(i915)) {
> +			/*
> +			 * We only need subslice count so it doesn't matter
> +			 * which ones we select - just turn of low bits in the
> +			 * amount of half of all available subslices per slice.
> +			 */
> +			ctx_sseu.subslice_mask =
> +				~(~0 << (hweight8(ctx_sseu.subslice_mask) / 2));
> +			ctx_sseu.slice_mask = 0x1;
> +		}
> +	}
>   
>   	slices = hweight8(ctx_sseu.slice_mask);
>   	subslices = hweight8(ctx_sseu.subslice_mask);
> 
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [PATCH 6/6] drm/i915/icl: Support co-existence between per-context SSEU and OA
  2018-09-17 11:30 ` [PATCH 6/6] drm/i915/icl: Support co-existence between per-context SSEU and OA Tvrtko Ursulin
  2018-10-01  8:49   ` Tvrtko Ursulin
@ 2018-10-01  9:50   ` Lionel Landwerlin
  2018-10-01 10:20     ` Tvrtko Ursulin
  1 sibling, 1 reply; 35+ messages in thread
From: Lionel Landwerlin @ 2018-10-01  9:50 UTC (permalink / raw)
  To: Tvrtko Ursulin, Intel-gfx

On 17/09/2018 13:30, Tvrtko Ursulin wrote:
> From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>
> When OA is active we want to lock the powergating configuration, but on
> Icelake, users like the media stack will have issues if we lock to the
> full device configuration.
>
> Instead lock to a subset of (sub)slices which are currently a known
> working configuration for all users.
>
> v2:
>   * Fix commit message spelling.
>
> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> Cc: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
> ---
>   drivers/gpu/drm/i915/intel_lrc.c | 25 ++++++++++++++++++++-----
>   1 file changed, 20 insertions(+), 5 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> index b5603e977a3f..cded1f1d9ec2 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/intel_lrc.c
> @@ -2521,13 +2521,28 @@ u32 gen8_make_rpcs(struct drm_i915_private *i915, struct intel_sseu *req_sseu)
>   
>   	/*
>   	 * If i915/perf is active, we want a stable powergating configuration
> -	 * on the system. The most natural configuration to take in that case
> -	 * is the default (i.e maximum the hardware can do).
> +	 * on the system.
> +	 *
> +	 * We could choose full enablement, but on ICL we know there are use
> +	 * cases which disable slices for functional, apart for performance
> +	 * reasons. So in this case we select a known stable subset.
>   	 */
> -	if (unlikely(i915->perf.oa.exclusive_stream))
> -		ctx_sseu = intel_device_default_sseu(i915);
> -	else
> +	if (!i915->perf.oa.exclusive_stream) {
>   		ctx_sseu = *req_sseu;
> +	} else {
> +		ctx_sseu = intel_device_default_sseu(i915);
> +
> +		if (IS_GEN11(i915)) {
> +			/*
> +			 * We only need subslice count so it doesn't matter
> +			 * which ones we select - just turn of low bits in the

s/turn of/turn off/


> +			 * amount of half of all available subslices per slice.
> +			 */
> +			ctx_sseu.subslice_mask =
> +				~(~0 << (hweight8(ctx_sseu.subslice_mask) / 2));


I would go with :


ctx_sseu.subslice_mask = ctx_sseu.subslice_mask & 0xf;


Documentation says that the first 4 subslices are the "big" ones 
(gathered from the fusing register fields which go from 
slice0-subslice[0-3] then slice1-subslice[0-3], etc...), so this should 
be equally media/3d capable.



> +			ctx_sseu.slice_mask = 0x1;
> +		}
> +	}
>   
>   	slices = hweight8(ctx_sseu.slice_mask);
>   	subslices = hweight8(ctx_sseu.subslice_mask);


_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [PATCH 6/6] drm/i915/icl: Support co-existence between per-context SSEU and OA
  2018-10-01  9:50   ` Lionel Landwerlin
@ 2018-10-01 10:20     ` Tvrtko Ursulin
  2018-10-01 11:06       ` Lionel Landwerlin
  0 siblings, 1 reply; 35+ messages in thread
From: Tvrtko Ursulin @ 2018-10-01 10:20 UTC (permalink / raw)
  To: Lionel Landwerlin, Tvrtko Ursulin, Intel-gfx


On 01/10/2018 10:50, Lionel Landwerlin wrote:
> On 17/09/2018 13:30, Tvrtko Ursulin wrote:
>> From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>>
>> When OA is active we want to lock the powergating configuration, but on
>> Icelake, users like the media stack will have issues if we lock to the
>> full device configuration.
>>
>> Instead lock to a subset of (sub)slices which are currently a known
>> working configuration for all users.
>>
>> v2:
>>   * Fix commit message spelling.
>>
>> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>> Cc: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
>> ---
>>   drivers/gpu/drm/i915/intel_lrc.c | 25 ++++++++++++++++++++-----
>>   1 file changed, 20 insertions(+), 5 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/i915/intel_lrc.c 
>> b/drivers/gpu/drm/i915/intel_lrc.c
>> index b5603e977a3f..cded1f1d9ec2 100644
>> --- a/drivers/gpu/drm/i915/intel_lrc.c
>> +++ b/drivers/gpu/drm/i915/intel_lrc.c
>> @@ -2521,13 +2521,28 @@ u32 gen8_make_rpcs(struct drm_i915_private 
>> *i915, struct intel_sseu *req_sseu)
>>       /*
>>        * If i915/perf is active, we want a stable powergating 
>> configuration
>> -     * on the system. The most natural configuration to take in that 
>> case
>> -     * is the default (i.e maximum the hardware can do).
>> +     * on the system.
>> +     *
>> +     * We could choose full enablement, but on ICL we know there are use
>> +     * cases which disable slices for functional, apart for performance
>> +     * reasons. So in this case we select a known stable subset.
>>        */
>> -    if (unlikely(i915->perf.oa.exclusive_stream))
>> -        ctx_sseu = intel_device_default_sseu(i915);
>> -    else
>> +    if (!i915->perf.oa.exclusive_stream) {
>>           ctx_sseu = *req_sseu;
>> +    } else {
>> +        ctx_sseu = intel_device_default_sseu(i915);
>> +
>> +        if (IS_GEN11(i915)) {
>> +            /*
>> +             * We only need subslice count so it doesn't matter
>> +             * which ones we select - just turn of low bits in the
> 
> s/turn of/turn off/

Yep, thanks.

> 
>> +             * amount of half of all available subslices per slice.
>> +             */
>> +            ctx_sseu.subslice_mask =
>> +                ~(~0 << (hweight8(ctx_sseu.subslice_mask) / 2));
> 
> 
> I would go with :
> 
> 
> ctx_sseu.subslice_mask = ctx_sseu.subslice_mask & 0xf;
> 
> 
> Documentation says that the first 4 subslices are the "big" ones 
> (gathered from the fusing register fields which go from 
> slice0-subslice[0-3] then slice1-subslice[0-3], etc...), so this should 
> be equally media/3d capable.

Doesn't work I think - one 1x6x8 part I've seen has a subslice mask of 
0b11111100 and there we want to have three subslices enabled.

Regards,

Tvrtko

> 
> 
>> +            ctx_sseu.slice_mask = 0x1;
>> +        }
>> +    }
>>       slices = hweight8(ctx_sseu.slice_mask);
>>       subslices = hweight8(ctx_sseu.subslice_mask);
> 
> 
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/intel-gfx
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [PATCH 6/6] drm/i915/icl: Support co-existence between per-context SSEU and OA
  2018-10-01 10:20     ` Tvrtko Ursulin
@ 2018-10-01 11:06       ` Lionel Landwerlin
  2018-10-01 11:42         ` Tvrtko Ursulin
  0 siblings, 1 reply; 35+ messages in thread
From: Lionel Landwerlin @ 2018-10-01 11:06 UTC (permalink / raw)
  To: Tvrtko Ursulin, Tvrtko Ursulin, Intel-gfx

On 01/10/2018 12:20, Tvrtko Ursulin wrote:
>
> On 01/10/2018 10:50, Lionel Landwerlin wrote:
>> On 17/09/2018 13:30, Tvrtko Ursulin wrote:
>>> From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>>>
>>> When OA is active we want to lock the powergating configuration, but on
>>> Icelake, users like the media stack will have issues if we lock to the
>>> full device configuration.
>>>
>>> Instead lock to a subset of (sub)slices which are currently a known
>>> working configuration for all users.
>>>
>>> v2:
>>>   * Fix commit message spelling.
>>>
>>> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>>> Cc: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
>>> ---
>>>   drivers/gpu/drm/i915/intel_lrc.c | 25 ++++++++++++++++++++-----
>>>   1 file changed, 20 insertions(+), 5 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/i915/intel_lrc.c 
>>> b/drivers/gpu/drm/i915/intel_lrc.c
>>> index b5603e977a3f..cded1f1d9ec2 100644
>>> --- a/drivers/gpu/drm/i915/intel_lrc.c
>>> +++ b/drivers/gpu/drm/i915/intel_lrc.c
>>> @@ -2521,13 +2521,28 @@ u32 gen8_make_rpcs(struct drm_i915_private 
>>> *i915, struct intel_sseu *req_sseu)
>>>       /*
>>>        * If i915/perf is active, we want a stable powergating 
>>> configuration
>>> -     * on the system. The most natural configuration to take in 
>>> that case
>>> -     * is the default (i.e maximum the hardware can do).
>>> +     * on the system.
>>> +     *
>>> +     * We could choose full enablement, but on ICL we know there 
>>> are use
>>> +     * cases which disable slices for functional, apart for 
>>> performance
>>> +     * reasons. So in this case we select a known stable subset.
>>>        */
>>> -    if (unlikely(i915->perf.oa.exclusive_stream))
>>> -        ctx_sseu = intel_device_default_sseu(i915);
>>> -    else
>>> +    if (!i915->perf.oa.exclusive_stream) {
>>>           ctx_sseu = *req_sseu;
>>> +    } else {
>>> +        ctx_sseu = intel_device_default_sseu(i915);
>>> +
>>> +        if (IS_GEN11(i915)) {
>>> +            /*
>>> +             * We only need subslice count so it doesn't matter
>>> +             * which ones we select - just turn of low bits in the
>>
>> s/turn of/turn off/
>
> Yep, thanks.
>
>>
>>> +             * amount of half of all available subslices per slice.
>>> +             */
>>> +            ctx_sseu.subslice_mask =
>>> +                ~(~0 << (hweight8(ctx_sseu.subslice_mask) / 2));
>>
>>
>> I would go with :
>>
>>
>> ctx_sseu.subslice_mask = ctx_sseu.subslice_mask & 0xf;
>>
>>
>> Documentation says that the first 4 subslices are the "big" ones 
>> (gathered from the fusing register fields which go from 
>> slice0-subslice[0-3] then slice1-subslice[0-3], etc...), so this 
>> should be equally media/3d capable.
>
> Doesn't work I think - one 1x6x8 part I've seen has a subslice mask of 
> 0b11111100 and there we want to have three subslices enabled.
>
> Regards,
>
> Tvrtko


Thanks, then the fusing fields don't match for a big/small sets of 
subslices.

I guess the fusing might be in this pattern (Small/Big) : SBSBSBSB

I was hoping we could know so that we program the powergating at the 
most capable configuration.


-

Lionel


>
>>
>>
>>> +            ctx_sseu.slice_mask = 0x1;
>>> +        }
>>> +    }
>>>       slices = hweight8(ctx_sseu.slice_mask);
>>>       subslices = hweight8(ctx_sseu.subslice_mask);
>>
>>
>> _______________________________________________
>> Intel-gfx mailing list
>> Intel-gfx@lists.freedesktop.org
>> https://lists.freedesktop.org/mailman/listinfo/intel-gfx
>

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [PATCH 6/6] drm/i915/icl: Support co-existence between per-context SSEU and OA
  2018-10-01 11:06       ` Lionel Landwerlin
@ 2018-10-01 11:42         ` Tvrtko Ursulin
  2018-10-01 14:18           ` Lionel Landwerlin
  0 siblings, 1 reply; 35+ messages in thread
From: Tvrtko Ursulin @ 2018-10-01 11:42 UTC (permalink / raw)
  To: Lionel Landwerlin, Tvrtko Ursulin, Intel-gfx


On 01/10/2018 12:06, Lionel Landwerlin wrote:
> On 01/10/2018 12:20, Tvrtko Ursulin wrote:
>>
>> On 01/10/2018 10:50, Lionel Landwerlin wrote:
>>> On 17/09/2018 13:30, Tvrtko Ursulin wrote:
>>>> From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>>>>
>>>> When OA is active we want to lock the powergating configuration, but on
>>>> Icelake, users like the media stack will have issues if we lock to the
>>>> full device configuration.
>>>>
>>>> Instead lock to a subset of (sub)slices which are currently a known
>>>> working configuration for all users.
>>>>
>>>> v2:
>>>>   * Fix commit message spelling.
>>>>
>>>> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>>>> Cc: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
>>>> ---
>>>>   drivers/gpu/drm/i915/intel_lrc.c | 25 ++++++++++++++++++++-----
>>>>   1 file changed, 20 insertions(+), 5 deletions(-)
>>>>
>>>> diff --git a/drivers/gpu/drm/i915/intel_lrc.c 
>>>> b/drivers/gpu/drm/i915/intel_lrc.c
>>>> index b5603e977a3f..cded1f1d9ec2 100644
>>>> --- a/drivers/gpu/drm/i915/intel_lrc.c
>>>> +++ b/drivers/gpu/drm/i915/intel_lrc.c
>>>> @@ -2521,13 +2521,28 @@ u32 gen8_make_rpcs(struct drm_i915_private 
>>>> *i915, struct intel_sseu *req_sseu)
>>>>       /*
>>>>        * If i915/perf is active, we want a stable powergating 
>>>> configuration
>>>> -     * on the system. The most natural configuration to take in 
>>>> that case
>>>> -     * is the default (i.e maximum the hardware can do).
>>>> +     * on the system.
>>>> +     *
>>>> +     * We could choose full enablement, but on ICL we know there 
>>>> are use
>>>> +     * cases which disable slices for functional, apart for 
>>>> performance
>>>> +     * reasons. So in this case we select a known stable subset.
>>>>        */
>>>> -    if (unlikely(i915->perf.oa.exclusive_stream))
>>>> -        ctx_sseu = intel_device_default_sseu(i915);
>>>> -    else
>>>> +    if (!i915->perf.oa.exclusive_stream) {
>>>>           ctx_sseu = *req_sseu;
>>>> +    } else {
>>>> +        ctx_sseu = intel_device_default_sseu(i915);
>>>> +
>>>> +        if (IS_GEN11(i915)) {
>>>> +            /*
>>>> +             * We only need subslice count so it doesn't matter
>>>> +             * which ones we select - just turn of low bits in the
>>>
>>> s/turn of/turn off/
>>
>> Yep, thanks.
>>
>>>
>>>> +             * amount of half of all available subslices per slice.
>>>> +             */
>>>> +            ctx_sseu.subslice_mask =
>>>> +                ~(~0 << (hweight8(ctx_sseu.subslice_mask) / 2));
>>>
>>>
>>> I would go with :
>>>
>>>
>>> ctx_sseu.subslice_mask = ctx_sseu.subslice_mask & 0xf;
>>>
>>>
>>> Documentation says that the first 4 subslices are the "big" ones 
>>> (gathered from the fusing register fields which go from 
>>> slice0-subslice[0-3] then slice1-subslice[0-3], etc...), so this 
>>> should be equally media/3d capable.
>>
>> Doesn't work I think - one 1x6x8 part I've seen has a subslice mask of 
>> 0b11111100 and there we want to have three subslices enabled.
>>
>> Regards,
>>
>> Tvrtko
> 
> 
> Thanks, then the fusing fields don't match for a big/small sets of 
> subslices.
> 
> I guess the fusing might be in this pattern (Small/Big) : SBSBSBSB
> 
> I was hoping we could know so that we program the powergating at the 
> most capable configuration.

It will be the most capable configuration. Did you forget the RPCS 
register in ICL works with counts and not masks? According the that 
Bspec table we cannot select anything but the most capable configuration.

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [PATCH 6/6] drm/i915/icl: Support co-existence between per-context SSEU and OA
  2018-10-01 11:42         ` Tvrtko Ursulin
@ 2018-10-01 14:18           ` Lionel Landwerlin
  2018-10-01 15:05             ` Tvrtko Ursulin
  2018-10-01 15:26             ` [PATCH v3] " Tvrtko Ursulin
  0 siblings, 2 replies; 35+ messages in thread
From: Lionel Landwerlin @ 2018-10-01 14:18 UTC (permalink / raw)
  To: Tvrtko Ursulin, Tvrtko Ursulin, Intel-gfx

On 01/10/2018 13:42, Tvrtko Ursulin wrote:
>
> On 01/10/2018 12:06, Lionel Landwerlin wrote:
>> On 01/10/2018 12:20, Tvrtko Ursulin wrote:
>>>
>>> On 01/10/2018 10:50, Lionel Landwerlin wrote:
>>>> On 17/09/2018 13:30, Tvrtko Ursulin wrote:
>>>>> From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>>>>>
>>>>> When OA is active we want to lock the powergating configuration, 
>>>>> but on
>>>>> Icelake, users like the media stack will have issues if we lock to 
>>>>> the
>>>>> full device configuration.
>>>>>
>>>>> Instead lock to a subset of (sub)slices which are currently a known
>>>>> working configuration for all users.
>>>>>
>>>>> v2:
>>>>>   * Fix commit message spelling.
>>>>>
>>>>> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>>>>> Cc: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
>>>>> ---
>>>>>   drivers/gpu/drm/i915/intel_lrc.c | 25 ++++++++++++++++++++-----
>>>>>   1 file changed, 20 insertions(+), 5 deletions(-)
>>>>>
>>>>> diff --git a/drivers/gpu/drm/i915/intel_lrc.c 
>>>>> b/drivers/gpu/drm/i915/intel_lrc.c
>>>>> index b5603e977a3f..cded1f1d9ec2 100644
>>>>> --- a/drivers/gpu/drm/i915/intel_lrc.c
>>>>> +++ b/drivers/gpu/drm/i915/intel_lrc.c
>>>>> @@ -2521,13 +2521,28 @@ u32 gen8_make_rpcs(struct drm_i915_private 
>>>>> *i915, struct intel_sseu *req_sseu)
>>>>>       /*
>>>>>        * If i915/perf is active, we want a stable powergating 
>>>>> configuration
>>>>> -     * on the system. The most natural configuration to take in 
>>>>> that case
>>>>> -     * is the default (i.e maximum the hardware can do).
>>>>> +     * on the system.
>>>>> +     *
>>>>> +     * We could choose full enablement, but on ICL we know there 
>>>>> are use
>>>>> +     * cases which disable slices for functional, apart for 
>>>>> performance
>>>>> +     * reasons. So in this case we select a known stable subset.
>>>>>        */
>>>>> -    if (unlikely(i915->perf.oa.exclusive_stream))
>>>>> -        ctx_sseu = intel_device_default_sseu(i915);
>>>>> -    else
>>>>> +    if (!i915->perf.oa.exclusive_stream) {
>>>>>           ctx_sseu = *req_sseu;
>>>>> +    } else {
>>>>> +        ctx_sseu = intel_device_default_sseu(i915);
>>>>> +
>>>>> +        if (IS_GEN11(i915)) {
>>>>> +            /*
>>>>> +             * We only need subslice count so it doesn't matter
>>>>> +             * which ones we select - just turn of low bits in the
>>>>
>>>> s/turn of/turn off/
>>>
>>> Yep, thanks.
>>>
>>>>
>>>>> +             * amount of half of all available subslices per slice.
>>>>> +             */
>>>>> +            ctx_sseu.subslice_mask =
>>>>> +                ~(~0 << (hweight8(ctx_sseu.subslice_mask) / 2));
>>>>
>>>>
>>>> I would go with :
>>>>
>>>>
>>>> ctx_sseu.subslice_mask = ctx_sseu.subslice_mask & 0xf;
>>>>
>>>>
>>>> Documentation says that the first 4 subslices are the "big" ones 
>>>> (gathered from the fusing register fields which go from 
>>>> slice0-subslice[0-3] then slice1-subslice[0-3], etc...), so this 
>>>> should be equally media/3d capable.
>>>
>>> Doesn't work I think - one 1x6x8 part I've seen has a subslice mask 
>>> of 0b11111100 and there we want to have three subslices enabled.
>>>
>>> Regards,
>>>
>>> Tvrtko
>>
>>
>> Thanks, then the fusing fields don't match for a big/small sets of 
>> subslices.
>>
>> I guess the fusing might be in this pattern (Small/Big) : SBSBSBSB
>>
>> I was hoping we could know so that we program the powergating at the 
>> most capable configuration.
>
> It will be the most capable configuration. Did you forget the RPCS 
> register in ICL works with counts and not masks? According the that 
> Bspec table we cannot select anything but the most capable configuration.
>
> Regards,
>
> Tvrtko
>
Sorry, I must have forgotten. I was trying to read the docs again 
looking for something that said the fusing always happened in pairs.


I'm confident we need to limit the subslices when OA is on and if you 
know where the details are written down (maybe add the bspec number?), 
this is :


Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [PATCH 6/6] drm/i915/icl: Support co-existence between per-context SSEU and OA
  2018-10-01 14:18           ` Lionel Landwerlin
@ 2018-10-01 15:05             ` Tvrtko Ursulin
  2018-10-01 15:26             ` [PATCH v3] " Tvrtko Ursulin
  1 sibling, 0 replies; 35+ messages in thread
From: Tvrtko Ursulin @ 2018-10-01 15:05 UTC (permalink / raw)
  To: Lionel Landwerlin, Tvrtko Ursulin, Intel-gfx


On 01/10/2018 15:18, Lionel Landwerlin wrote:
> On 01/10/2018 13:42, Tvrtko Ursulin wrote:
>>
>> On 01/10/2018 12:06, Lionel Landwerlin wrote:
>>> On 01/10/2018 12:20, Tvrtko Ursulin wrote:
>>>>
>>>> On 01/10/2018 10:50, Lionel Landwerlin wrote:
>>>>> On 17/09/2018 13:30, Tvrtko Ursulin wrote:
>>>>>> From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>>>>>>
>>>>>> When OA is active we want to lock the powergating configuration, 
>>>>>> but on
>>>>>> Icelake, users like the media stack will have issues if we lock to 
>>>>>> the
>>>>>> full device configuration.
>>>>>>
>>>>>> Instead lock to a subset of (sub)slices which are currently a known
>>>>>> working configuration for all users.
>>>>>>
>>>>>> v2:
>>>>>>   * Fix commit message spelling.
>>>>>>
>>>>>> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>>>>>> Cc: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
>>>>>> ---
>>>>>>   drivers/gpu/drm/i915/intel_lrc.c | 25 ++++++++++++++++++++-----
>>>>>>   1 file changed, 20 insertions(+), 5 deletions(-)
>>>>>>
>>>>>> diff --git a/drivers/gpu/drm/i915/intel_lrc.c 
>>>>>> b/drivers/gpu/drm/i915/intel_lrc.c
>>>>>> index b5603e977a3f..cded1f1d9ec2 100644
>>>>>> --- a/drivers/gpu/drm/i915/intel_lrc.c
>>>>>> +++ b/drivers/gpu/drm/i915/intel_lrc.c
>>>>>> @@ -2521,13 +2521,28 @@ u32 gen8_make_rpcs(struct drm_i915_private 
>>>>>> *i915, struct intel_sseu *req_sseu)
>>>>>>       /*
>>>>>>        * If i915/perf is active, we want a stable powergating 
>>>>>> configuration
>>>>>> -     * on the system. The most natural configuration to take in 
>>>>>> that case
>>>>>> -     * is the default (i.e maximum the hardware can do).
>>>>>> +     * on the system.
>>>>>> +     *
>>>>>> +     * We could choose full enablement, but on ICL we know there 
>>>>>> are use
>>>>>> +     * cases which disable slices for functional, apart for 
>>>>>> performance
>>>>>> +     * reasons. So in this case we select a known stable subset.
>>>>>>        */
>>>>>> -    if (unlikely(i915->perf.oa.exclusive_stream))
>>>>>> -        ctx_sseu = intel_device_default_sseu(i915);
>>>>>> -    else
>>>>>> +    if (!i915->perf.oa.exclusive_stream) {
>>>>>>           ctx_sseu = *req_sseu;
>>>>>> +    } else {
>>>>>> +        ctx_sseu = intel_device_default_sseu(i915);
>>>>>> +
>>>>>> +        if (IS_GEN11(i915)) {
>>>>>> +            /*
>>>>>> +             * We only need subslice count so it doesn't matter
>>>>>> +             * which ones we select - just turn of low bits in the
>>>>>
>>>>> s/turn of/turn off/
>>>>
>>>> Yep, thanks.
>>>>
>>>>>
>>>>>> +             * amount of half of all available subslices per slice.
>>>>>> +             */
>>>>>> +            ctx_sseu.subslice_mask =
>>>>>> +                ~(~0 << (hweight8(ctx_sseu.subslice_mask) / 2));
>>>>>
>>>>>
>>>>> I would go with :
>>>>>
>>>>>
>>>>> ctx_sseu.subslice_mask = ctx_sseu.subslice_mask & 0xf;
>>>>>
>>>>>
>>>>> Documentation says that the first 4 subslices are the "big" ones 
>>>>> (gathered from the fusing register fields which go from 
>>>>> slice0-subslice[0-3] then slice1-subslice[0-3], etc...), so this 
>>>>> should be equally media/3d capable.
>>>>
>>>> Doesn't work I think - one 1x6x8 part I've seen has a subslice mask 
>>>> of 0b11111100 and there we want to have three subslices enabled.
>>>>
>>>> Regards,
>>>>
>>>> Tvrtko
>>>
>>>
>>> Thanks, then the fusing fields don't match for a big/small sets of 
>>> subslices.
>>>
>>> I guess the fusing might be in this pattern (Small/Big) : SBSBSBSB
>>>
>>> I was hoping we could know so that we program the powergating at the 
>>> most capable configuration.
>>
>> It will be the most capable configuration. Did you forget the RPCS 
>> register in ICL works with counts and not masks? According the that 
>> Bspec table we cannot select anything but the most capable configuration.
>>
>> Regards,
>>
>> Tvrtko
>>
> Sorry, I must have forgotten. I was trying to read the docs again 
> looking for something that said the fusing always happened in pairs.
> 
> 
> I'm confident we need to limit the subslices when OA is on and if you 
> know where the details are written down (maybe add the bspec number?), 
> this is :

Hm I was certain I have put the link in some time ago, but you are 
right, I have not so will do!

> Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Thanks!

Regards,

Tvrtko

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 35+ messages in thread

* [PATCH v3] drm/i915/icl: Support co-existence between per-context SSEU and OA
  2018-10-01 14:18           ` Lionel Landwerlin
  2018-10-01 15:05             ` Tvrtko Ursulin
@ 2018-10-01 15:26             ` Tvrtko Ursulin
  1 sibling, 0 replies; 35+ messages in thread
From: Tvrtko Ursulin @ 2018-10-01 15:26 UTC (permalink / raw)
  To: Intel-gfx

From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

When OA is active we want to lock the powergating configuration, but on
Icelake, users like the media stack will have issues if we lock to the
full device configuration.

Instead lock to a subset of (sub)slices which are currently a known
working configuration for all users.

v2:
 * Fix commit message spelling.

v3:
 Lionel:
 * Add bspec reference.
 * Fix spelling in comment.

Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Bspec: 21140
Cc: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
---
 drivers/gpu/drm/i915/intel_lrc.c | 25 ++++++++++++++++++++-----
 1 file changed, 20 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index c6c5938684cf..2e8151192fc4 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -2577,13 +2577,28 @@ u32 gen8_make_rpcs(struct drm_i915_private *i915, struct intel_sseu *req_sseu)
 
 	/*
 	 * If i915/perf is active, we want a stable powergating configuration
-	 * on the system. The most natural configuration to take in that case
-	 * is the default (i.e maximum the hardware can do).
+	 * on the system.
+	 *
+	 * We could choose full enablement, but on ICL we know there are use
+	 * cases which disable slices for functional, apart for performance
+	 * reasons. So in this case we select a known stable subset.
 	 */
-	if (unlikely(i915->perf.oa.exclusive_stream))
-		ctx_sseu = intel_device_default_sseu(i915);
-	else
+	if (!i915->perf.oa.exclusive_stream) {
 		ctx_sseu = *req_sseu;
+	} else {
+		ctx_sseu = intel_device_default_sseu(i915);
+
+		if (IS_GEN11(i915)) {
+			/*
+			 * We only need subslice count so it doesn't matter
+			 * which ones we select - just turn off low bits in the
+			 * amount of half of all available subslices per slice.
+			 */
+			ctx_sseu.subslice_mask =
+				~(~0 << (hweight8(ctx_sseu.subslice_mask) / 2));
+			ctx_sseu.slice_mask = 0x1;
+		}
+	}
 
 	slices = hweight8(ctx_sseu.slice_mask);
 	subslices = hweight8(ctx_sseu.subslice_mask);
-- 
2.17.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 35+ messages in thread

* ✗ Fi.CI.BAT: failure for Per context dynamic (sub)slice power-gating (rev6)
  2018-09-17 11:30 [PATCH v13 0/6] Per context dynamic (sub)slice power-gating Tvrtko Ursulin
                   ` (13 preceding siblings ...)
  2018-09-18 16:05 ` ✗ Fi.CI.IGT: failure " Patchwork
@ 2018-10-01 16:35 ` Patchwork
  14 siblings, 0 replies; 35+ messages in thread
From: Patchwork @ 2018-10-01 16:35 UTC (permalink / raw)
  To: Tvrtko Ursulin; +Cc: intel-gfx

== Series Details ==

Series: Per context dynamic (sub)slice power-gating (rev6)
URL   : https://patchwork.freedesktop.org/series/48194/
State : failure

== Summary ==

Applying: drm/i915/execlists: Move RPCS setup to context pin
Applying: drm/i915: Record the sseu configuration per-context & engine
Applying: drm/i915/perf: lock powergating configuration to default when active
Applying: drm/i915: Add timeline barrier support
Applying: drm/i915: Expose RPCS (SSEU) configuration to userspace (Gen11 only)
Using index info to reconstruct a base tree...
M	drivers/gpu/drm/i915/i915_gem_context.c
M	drivers/gpu/drm/i915/intel_lrc.c
M	include/uapi/drm/i915_drm.h
Falling back to patching base and 3-way merge...
Auto-merging include/uapi/drm/i915_drm.h
Auto-merging drivers/gpu/drm/i915/intel_lrc.c
Auto-merging drivers/gpu/drm/i915/i915_gem_context.c
CONFLICT (content): Merge conflict in drivers/gpu/drm/i915/i915_gem_context.c
error: Failed to merge in the changes.
Patch failed at 0005 drm/i915: Expose RPCS (SSEU) configuration to userspace (Gen11 only)
Use 'git am --show-current-patch' to see the failed patch
When you have resolved this problem, run "git am --continue".
If you prefer to skip this patch, run "git am --skip" instead.
To restore the original branch and stop patching, run "git am --abort".

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [PATCH 4/6] drm/i915: Add timeline barrier support
  2019-01-24 11:42 ` [PATCH 4/6] drm/i915: Add timeline barrier support Tvrtko Ursulin
@ 2019-01-24 13:27   ` Chris Wilson
  0 siblings, 0 replies; 35+ messages in thread
From: Chris Wilson @ 2019-01-24 13:27 UTC (permalink / raw)
  To: Intel-gfx, Tvrtko Ursulin

Quoting Tvrtko Ursulin (2019-01-24 11:42:01)
> From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> 
> Timeline barrier allows serialization between different timelines.
> 
> After calling i915_timeline_set_barrier with a request, all following
> submissions on this timeline will be set up as depending on this request,
> or barrier. Once the barrier has been completed it automatically gets
> cleared and things continue as normal.
> 
> This facility will be used by the upcoming context SSEU code.
> 
> v2:
>  * Assert barrier has been retired on timeline_fini. (Chris Wilson)
>  * Fix mock_timeline.
> 
> v3:
>  * Improved comment language. (Chris Wilson)
> 
> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> Suggested-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Chris Wilson <chris@chris-wilson.co.uk>
> Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>

I don't think it makes a difference right away, but we should be pulling
the timeline barrier into i915_gem_switch_to_kernel_context().
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 35+ messages in thread

* [PATCH 4/6] drm/i915: Add timeline barrier support
  2019-01-24 11:41 [PATCH 0/6] Per context dynamic (sub)slice power-gating Tvrtko Ursulin
@ 2019-01-24 11:42 ` Tvrtko Ursulin
  2019-01-24 13:27   ` Chris Wilson
  0 siblings, 1 reply; 35+ messages in thread
From: Tvrtko Ursulin @ 2019-01-24 11:42 UTC (permalink / raw)
  To: Intel-gfx

From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Timeline barrier allows serialization between different timelines.

After calling i915_timeline_set_barrier with a request, all following
submissions on this timeline will be set up as depending on this request,
or barrier. Once the barrier has been completed it automatically gets
cleared and things continue as normal.

This facility will be used by the upcoming context SSEU code.

v2:
 * Assert barrier has been retired on timeline_fini. (Chris Wilson)
 * Fix mock_timeline.

v3:
 * Improved comment language. (Chris Wilson)

Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Suggested-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_request.c           | 13 +++++++++
 drivers/gpu/drm/i915/i915_timeline.c          |  3 +++
 drivers/gpu/drm/i915/i915_timeline.h          | 27 +++++++++++++++++++
 .../gpu/drm/i915/selftests/mock_timeline.c    |  2 ++
 4 files changed, 45 insertions(+)

diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
index f941e40fd373..ea659c620461 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -517,6 +517,15 @@ i915_request_alloc_slow(struct intel_context *ce)
 	return kmem_cache_alloc(ce->gem_context->i915->requests, GFP_KERNEL);
 }
 
+static int add_timeline_barrier(struct i915_request *rq)
+{
+	struct i915_request *barrier =
+		i915_gem_active_raw(&rq->timeline->barrier,
+				    &rq->i915->drm.struct_mutex);
+
+	return barrier ? i915_request_await_dma_fence(rq, &barrier->fence) : 0;
+}
+
 /**
  * i915_request_alloc - allocate a request structure
  *
@@ -660,6 +669,10 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx)
 	 */
 	rq->head = rq->ring->emit;
 
+	ret = add_timeline_barrier(rq);
+	if (ret)
+		goto err_unwind;
+
 	ret = engine->request_alloc(rq);
 	if (ret)
 		goto err_unwind;
diff --git a/drivers/gpu/drm/i915/i915_timeline.c b/drivers/gpu/drm/i915/i915_timeline.c
index 4667cc08c416..5a87c5bd5154 100644
--- a/drivers/gpu/drm/i915/i915_timeline.c
+++ b/drivers/gpu/drm/i915/i915_timeline.c
@@ -37,6 +37,8 @@ void i915_timeline_init(struct drm_i915_private *i915,
 	INIT_LIST_HEAD(&timeline->requests);
 
 	i915_syncmap_init(&timeline->sync);
+
+	init_request_active(&timeline->barrier, NULL);
 }
 
 /**
@@ -69,6 +71,7 @@ void i915_timelines_park(struct drm_i915_private *i915)
 void i915_timeline_fini(struct i915_timeline *timeline)
 {
 	GEM_BUG_ON(!list_empty(&timeline->requests));
+	GEM_BUG_ON(i915_gem_active_isset(&timeline->barrier));
 
 	i915_syncmap_free(&timeline->sync);
 
diff --git a/drivers/gpu/drm/i915/i915_timeline.h b/drivers/gpu/drm/i915/i915_timeline.h
index 38c1e15e927a..af6c05333d76 100644
--- a/drivers/gpu/drm/i915/i915_timeline.h
+++ b/drivers/gpu/drm/i915/i915_timeline.h
@@ -64,6 +64,16 @@ struct i915_timeline {
 	 */
 	struct i915_syncmap *sync;
 
+	/**
+	 * Barrier provides the ability to serialize ordering between different
+	 * timelines.
+	 *
+	 * Users can call i915_timeline_set_barrier which will make all
+	 * subsequent submissions to this timeline be executed only after the
+	 * barrier has been completed.
+	 */
+	struct i915_gem_active barrier;
+
 	struct list_head link;
 	const char *name;
 
@@ -136,4 +146,21 @@ static inline bool i915_timeline_sync_is_later(struct i915_timeline *tl,
 
 void i915_timelines_park(struct drm_i915_private *i915);
 
+/**
+ * i915_timeline_set_barrier - orders submission between different timelines
+ * @timeline: timeline to set the barrier on
+ * @rq: request after which new submissions can proceed
+ *
+ * Sets the passed in request as the serialization point for all subsequent
+ * submissions on @timeline. Subsequent requests will not be submitted to GPU
+ * until the barrier has been completed.
+ */
+static inline void
+i915_timeline_set_barrier(struct i915_timeline *timeline,
+			  struct i915_request *rq)
+{
+	GEM_BUG_ON(timeline->fence_context == rq->timeline->fence_context);
+	i915_gem_active_set(&timeline->barrier, rq);
+}
+
 #endif
diff --git a/drivers/gpu/drm/i915/selftests/mock_timeline.c b/drivers/gpu/drm/i915/selftests/mock_timeline.c
index dcf3b16f5a07..a718b64c988e 100644
--- a/drivers/gpu/drm/i915/selftests/mock_timeline.c
+++ b/drivers/gpu/drm/i915/selftests/mock_timeline.c
@@ -19,6 +19,8 @@ void mock_timeline_init(struct i915_timeline *timeline, u64 context)
 
 	i915_syncmap_init(&timeline->sync);
 
+	init_request_active(&timeline->barrier, NULL);
+
 	INIT_LIST_HEAD(&timeline->link);
 }
 
-- 
2.19.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 35+ messages in thread

* [PATCH 4/6] drm/i915: Add timeline barrier support
  2019-01-15 14:47 [PATCH 0/6] Add uAPI to support ICL VME hardware for new media-driver Joonas Lahtinen
@ 2019-01-15 14:47 ` Joonas Lahtinen
  0 siblings, 0 replies; 35+ messages in thread
From: Joonas Lahtinen @ 2019-01-15 14:47 UTC (permalink / raw)
  To: Intel graphics driver community testing & development
  Cc: Jani Nikula, Takashi Iwai, Timo Aaltonen, Carl Zhang, Stephane Marchesin

From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Timeline barrier allows serialization between different timelines.

After calling i915_timeline_set_barrier with a request, all following
submissions on this timeline will be set up as depending on this request,
or barrier. Once the barrier has been completed it automatically gets
cleared and things continue as normal.

This facility will be used by the upcoming context SSEU code.

v2:
 * Assert barrier has been retired on timeline_fini. (Chris Wilson)
 * Fix mock_timeline.

v3:
 * Improved comment language. (Chris Wilson)

Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Suggested-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_request.c           | 13 +++++++++
 drivers/gpu/drm/i915/i915_timeline.c          |  3 +++
 drivers/gpu/drm/i915/i915_timeline.h          | 27 +++++++++++++++++++
 .../gpu/drm/i915/selftests/mock_timeline.c    |  2 ++
 4 files changed, 45 insertions(+)

diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
index d1355154886a..496217305a00 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -509,6 +509,15 @@ i915_request_alloc_slow(struct intel_context *ce)
 	return kmem_cache_alloc(ce->gem_context->i915->requests, GFP_KERNEL);
 }
 
+static int add_timeline_barrier(struct i915_request *rq)
+{
+	struct i915_request *barrier =
+		i915_gem_active_raw(&rq->timeline->barrier,
+				    &rq->i915->drm.struct_mutex);
+
+	return barrier ? i915_request_await_dma_fence(rq, &barrier->fence) : 0;
+}
+
 /**
  * i915_request_alloc - allocate a request structure
  *
@@ -652,6 +661,10 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx)
 	 */
 	rq->head = rq->ring->emit;
 
+	ret = add_timeline_barrier(rq);
+	if (ret)
+		goto err_unwind;
+
 	ret = engine->request_alloc(rq);
 	if (ret)
 		goto err_unwind;
diff --git a/drivers/gpu/drm/i915/i915_timeline.c b/drivers/gpu/drm/i915/i915_timeline.c
index 4667cc08c416..5a87c5bd5154 100644
--- a/drivers/gpu/drm/i915/i915_timeline.c
+++ b/drivers/gpu/drm/i915/i915_timeline.c
@@ -37,6 +37,8 @@ void i915_timeline_init(struct drm_i915_private *i915,
 	INIT_LIST_HEAD(&timeline->requests);
 
 	i915_syncmap_init(&timeline->sync);
+
+	init_request_active(&timeline->barrier, NULL);
 }
 
 /**
@@ -69,6 +71,7 @@ void i915_timelines_park(struct drm_i915_private *i915)
 void i915_timeline_fini(struct i915_timeline *timeline)
 {
 	GEM_BUG_ON(!list_empty(&timeline->requests));
+	GEM_BUG_ON(i915_gem_active_isset(&timeline->barrier));
 
 	i915_syncmap_free(&timeline->sync);
 
diff --git a/drivers/gpu/drm/i915/i915_timeline.h b/drivers/gpu/drm/i915/i915_timeline.h
index 38c1e15e927a..af6c05333d76 100644
--- a/drivers/gpu/drm/i915/i915_timeline.h
+++ b/drivers/gpu/drm/i915/i915_timeline.h
@@ -64,6 +64,16 @@ struct i915_timeline {
 	 */
 	struct i915_syncmap *sync;
 
+	/**
+	 * Barrier provides the ability to serialize ordering between different
+	 * timelines.
+	 *
+	 * Users can call i915_timeline_set_barrier which will make all
+	 * subsequent submissions to this timeline be executed only after the
+	 * barrier has been completed.
+	 */
+	struct i915_gem_active barrier;
+
 	struct list_head link;
 	const char *name;
 
@@ -136,4 +146,21 @@ static inline bool i915_timeline_sync_is_later(struct i915_timeline *tl,
 
 void i915_timelines_park(struct drm_i915_private *i915);
 
+/**
+ * i915_timeline_set_barrier - orders submission between different timelines
+ * @timeline: timeline to set the barrier on
+ * @rq: request after which new submissions can proceed
+ *
+ * Sets the passed in request as the serialization point for all subsequent
+ * submissions on @timeline. Subsequent requests will not be submitted to GPU
+ * until the barrier has been completed.
+ */
+static inline void
+i915_timeline_set_barrier(struct i915_timeline *timeline,
+			  struct i915_request *rq)
+{
+	GEM_BUG_ON(timeline->fence_context == rq->timeline->fence_context);
+	i915_gem_active_set(&timeline->barrier, rq);
+}
+
 #endif
diff --git a/drivers/gpu/drm/i915/selftests/mock_timeline.c b/drivers/gpu/drm/i915/selftests/mock_timeline.c
index dcf3b16f5a07..a718b64c988e 100644
--- a/drivers/gpu/drm/i915/selftests/mock_timeline.c
+++ b/drivers/gpu/drm/i915/selftests/mock_timeline.c
@@ -19,6 +19,8 @@ void mock_timeline_init(struct i915_timeline *timeline, u64 context)
 
 	i915_syncmap_init(&timeline->sync);
 
+	init_request_active(&timeline->barrier, NULL);
+
 	INIT_LIST_HEAD(&timeline->link);
 }
 
-- 
2.17.2

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 35+ messages in thread

* [PATCH 4/6] drm/i915: Add timeline barrier support
  2019-01-14 13:57 [PATCH 0/6] Per context dynamic (sub)slice power-gating Tvrtko Ursulin
@ 2019-01-14 13:57 ` Tvrtko Ursulin
  0 siblings, 0 replies; 35+ messages in thread
From: Tvrtko Ursulin @ 2019-01-14 13:57 UTC (permalink / raw)
  To: Intel-gfx

From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Timeline barrier allows serialization between different timelines.

After calling i915_timeline_set_barrier with a request, all following
submissions on this timeline will be set up as depending on this request,
or barrier. Once the barrier has been completed it automatically gets
cleared and things continue as normal.

This facility will be used by the upcoming context SSEU code.

v2:
 * Assert barrier has been retired on timeline_fini. (Chris Wilson)
 * Fix mock_timeline.

v3:
 * Improved comment language. (Chris Wilson)

Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Suggested-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_request.c           | 13 +++++++++
 drivers/gpu/drm/i915/i915_timeline.c          |  3 +++
 drivers/gpu/drm/i915/i915_timeline.h          | 27 +++++++++++++++++++
 .../gpu/drm/i915/selftests/mock_timeline.c    |  2 ++
 4 files changed, 45 insertions(+)

diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
index d1355154886a..496217305a00 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -509,6 +509,15 @@ i915_request_alloc_slow(struct intel_context *ce)
 	return kmem_cache_alloc(ce->gem_context->i915->requests, GFP_KERNEL);
 }
 
+static int add_timeline_barrier(struct i915_request *rq)
+{
+	struct i915_request *barrier =
+		i915_gem_active_raw(&rq->timeline->barrier,
+				    &rq->i915->drm.struct_mutex);
+
+	return barrier ? i915_request_await_dma_fence(rq, &barrier->fence) : 0;
+}
+
 /**
  * i915_request_alloc - allocate a request structure
  *
@@ -652,6 +661,10 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx)
 	 */
 	rq->head = rq->ring->emit;
 
+	ret = add_timeline_barrier(rq);
+	if (ret)
+		goto err_unwind;
+
 	ret = engine->request_alloc(rq);
 	if (ret)
 		goto err_unwind;
diff --git a/drivers/gpu/drm/i915/i915_timeline.c b/drivers/gpu/drm/i915/i915_timeline.c
index 4667cc08c416..5a87c5bd5154 100644
--- a/drivers/gpu/drm/i915/i915_timeline.c
+++ b/drivers/gpu/drm/i915/i915_timeline.c
@@ -37,6 +37,8 @@ void i915_timeline_init(struct drm_i915_private *i915,
 	INIT_LIST_HEAD(&timeline->requests);
 
 	i915_syncmap_init(&timeline->sync);
+
+	init_request_active(&timeline->barrier, NULL);
 }
 
 /**
@@ -69,6 +71,7 @@ void i915_timelines_park(struct drm_i915_private *i915)
 void i915_timeline_fini(struct i915_timeline *timeline)
 {
 	GEM_BUG_ON(!list_empty(&timeline->requests));
+	GEM_BUG_ON(i915_gem_active_isset(&timeline->barrier));
 
 	i915_syncmap_free(&timeline->sync);
 
diff --git a/drivers/gpu/drm/i915/i915_timeline.h b/drivers/gpu/drm/i915/i915_timeline.h
index 38c1e15e927a..af6c05333d76 100644
--- a/drivers/gpu/drm/i915/i915_timeline.h
+++ b/drivers/gpu/drm/i915/i915_timeline.h
@@ -64,6 +64,16 @@ struct i915_timeline {
 	 */
 	struct i915_syncmap *sync;
 
+	/**
+	 * Barrier provides the ability to serialize ordering between different
+	 * timelines.
+	 *
+	 * Users can call i915_timeline_set_barrier which will make all
+	 * subsequent submissions to this timeline be executed only after the
+	 * barrier has been completed.
+	 */
+	struct i915_gem_active barrier;
+
 	struct list_head link;
 	const char *name;
 
@@ -136,4 +146,21 @@ static inline bool i915_timeline_sync_is_later(struct i915_timeline *tl,
 
 void i915_timelines_park(struct drm_i915_private *i915);
 
+/**
+ * i915_timeline_set_barrier - orders submission between different timelines
+ * @timeline: timeline to set the barrier on
+ * @rq: request after which new submissions can proceed
+ *
+ * Sets the passed in request as the serialization point for all subsequent
+ * submissions on @timeline. Subsequent requests will not be submitted to GPU
+ * until the barrier has been completed.
+ */
+static inline void
+i915_timeline_set_barrier(struct i915_timeline *timeline,
+			  struct i915_request *rq)
+{
+	GEM_BUG_ON(timeline->fence_context == rq->timeline->fence_context);
+	i915_gem_active_set(&timeline->barrier, rq);
+}
+
 #endif
diff --git a/drivers/gpu/drm/i915/selftests/mock_timeline.c b/drivers/gpu/drm/i915/selftests/mock_timeline.c
index dcf3b16f5a07..a718b64c988e 100644
--- a/drivers/gpu/drm/i915/selftests/mock_timeline.c
+++ b/drivers/gpu/drm/i915/selftests/mock_timeline.c
@@ -19,6 +19,8 @@ void mock_timeline_init(struct i915_timeline *timeline, u64 context)
 
 	i915_syncmap_init(&timeline->sync);
 
+	init_request_active(&timeline->barrier, NULL);
+
 	INIT_LIST_HEAD(&timeline->link);
 }
 
-- 
2.19.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 35+ messages in thread

* [PATCH 4/6] drm/i915: Add timeline barrier support
  2019-01-08 15:12 [PATCH 0/6] Per context dynamic (sub)slice power-gating Tvrtko Ursulin
@ 2019-01-08 15:12 ` Tvrtko Ursulin
  0 siblings, 0 replies; 35+ messages in thread
From: Tvrtko Ursulin @ 2019-01-08 15:12 UTC (permalink / raw)
  To: Intel-gfx

From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Timeline barrier allows serialization between different timelines.

After calling i915_timeline_set_barrier with a request, all following
submissions on this timeline will be set up as depending on this request,
or barrier. Once the barrier has been completed it automatically gets
cleared and things continue as normal.

This facility will be used by the upcoming context SSEU code.

v2:
 * Assert barrier has been retired on timeline_fini. (Chris Wilson)
 * Fix mock_timeline.

v3:
 * Improved comment language. (Chris Wilson)

Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Suggested-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_request.c           | 13 +++++++++
 drivers/gpu/drm/i915/i915_timeline.c          |  3 +++
 drivers/gpu/drm/i915/i915_timeline.h          | 27 +++++++++++++++++++
 .../gpu/drm/i915/selftests/mock_timeline.c    |  2 ++
 4 files changed, 45 insertions(+)

diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
index 1e158eb8cb97..b0bbaecac744 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -477,6 +477,15 @@ submit_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state)
 	return NOTIFY_DONE;
 }
 
+static int add_timeline_barrier(struct i915_request *rq)
+{
+	struct i915_request *barrier =
+		i915_gem_active_raw(&rq->timeline->barrier,
+				    &rq->i915->drm.struct_mutex);
+
+	return barrier ? i915_request_await_dma_fence(rq, &barrier->fence) : 0;
+}
+
 /**
  * i915_request_alloc - allocate a request structure
  *
@@ -628,6 +637,10 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx)
 	 */
 	rq->head = rq->ring->emit;
 
+	ret = add_timeline_barrier(rq);
+	if (ret)
+		goto err_unwind;
+
 	ret = engine->request_alloc(rq);
 	if (ret)
 		goto err_unwind;
diff --git a/drivers/gpu/drm/i915/i915_timeline.c b/drivers/gpu/drm/i915/i915_timeline.c
index 4667cc08c416..5a87c5bd5154 100644
--- a/drivers/gpu/drm/i915/i915_timeline.c
+++ b/drivers/gpu/drm/i915/i915_timeline.c
@@ -37,6 +37,8 @@ void i915_timeline_init(struct drm_i915_private *i915,
 	INIT_LIST_HEAD(&timeline->requests);
 
 	i915_syncmap_init(&timeline->sync);
+
+	init_request_active(&timeline->barrier, NULL);
 }
 
 /**
@@ -69,6 +71,7 @@ void i915_timelines_park(struct drm_i915_private *i915)
 void i915_timeline_fini(struct i915_timeline *timeline)
 {
 	GEM_BUG_ON(!list_empty(&timeline->requests));
+	GEM_BUG_ON(i915_gem_active_isset(&timeline->barrier));
 
 	i915_syncmap_free(&timeline->sync);
 
diff --git a/drivers/gpu/drm/i915/i915_timeline.h b/drivers/gpu/drm/i915/i915_timeline.h
index 38c1e15e927a..af6c05333d76 100644
--- a/drivers/gpu/drm/i915/i915_timeline.h
+++ b/drivers/gpu/drm/i915/i915_timeline.h
@@ -64,6 +64,16 @@ struct i915_timeline {
 	 */
 	struct i915_syncmap *sync;
 
+	/**
+	 * Barrier provides the ability to serialize ordering between different
+	 * timelines.
+	 *
+	 * Users can call i915_timeline_set_barrier which will make all
+	 * subsequent submissions to this timeline be executed only after the
+	 * barrier has been completed.
+	 */
+	struct i915_gem_active barrier;
+
 	struct list_head link;
 	const char *name;
 
@@ -136,4 +146,21 @@ static inline bool i915_timeline_sync_is_later(struct i915_timeline *tl,
 
 void i915_timelines_park(struct drm_i915_private *i915);
 
+/**
+ * i915_timeline_set_barrier - orders submission between different timelines
+ * @timeline: timeline to set the barrier on
+ * @rq: request after which new submissions can proceed
+ *
+ * Sets the passed in request as the serialization point for all subsequent
+ * submissions on @timeline. Subsequent requests will not be submitted to GPU
+ * until the barrier has been completed.
+ */
+static inline void
+i915_timeline_set_barrier(struct i915_timeline *timeline,
+			  struct i915_request *rq)
+{
+	GEM_BUG_ON(timeline->fence_context == rq->timeline->fence_context);
+	i915_gem_active_set(&timeline->barrier, rq);
+}
+
 #endif
diff --git a/drivers/gpu/drm/i915/selftests/mock_timeline.c b/drivers/gpu/drm/i915/selftests/mock_timeline.c
index dcf3b16f5a07..a718b64c988e 100644
--- a/drivers/gpu/drm/i915/selftests/mock_timeline.c
+++ b/drivers/gpu/drm/i915/selftests/mock_timeline.c
@@ -19,6 +19,8 @@ void mock_timeline_init(struct i915_timeline *timeline, u64 context)
 
 	i915_syncmap_init(&timeline->sync);
 
+	init_request_active(&timeline->barrier, NULL);
+
 	INIT_LIST_HEAD(&timeline->link);
 }
 
-- 
2.19.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 35+ messages in thread

* [PATCH 4/6] drm/i915: Add timeline barrier support
  2018-11-13 14:35 [PATCH 0/6] Per context dynamic (sub)slice power-gating Tvrtko Ursulin
@ 2018-11-13 14:35 ` Tvrtko Ursulin
  0 siblings, 0 replies; 35+ messages in thread
From: Tvrtko Ursulin @ 2018-11-13 14:35 UTC (permalink / raw)
  To: Intel-gfx

From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Timeline barrier allows serialization between different timelines.

After calling i915_timeline_set_barrier with a request, all following
submissions on this timeline will be set up as depending on this request,
or barrier. Once the barrier has been completed it automatically gets
cleared and things continue as normal.

This facility will be used by the upcoming context SSEU code.

v2:
 * Assert barrier has been retired on timeline_fini. (Chris Wilson)
 * Fix mock_timeline.

Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Suggested-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_request.c           | 13 +++++++++
 drivers/gpu/drm/i915/i915_timeline.c          |  3 +++
 drivers/gpu/drm/i915/i915_timeline.h          | 27 +++++++++++++++++++
 .../gpu/drm/i915/selftests/mock_timeline.c    |  2 ++
 4 files changed, 45 insertions(+)

diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
index 71107540581d..d1b2ebfc0ff3 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -563,6 +563,15 @@ submit_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state)
 	return NOTIFY_DONE;
 }
 
+static int add_timeline_barrier(struct i915_request *rq)
+{
+	struct i915_request *barrier =
+		i915_gem_active_raw(&rq->timeline->barrier,
+				    &rq->i915->drm.struct_mutex);
+
+	return barrier ? i915_request_await_dma_fence(rq, &barrier->fence) : 0;
+}
+
 /**
  * i915_request_alloc - allocate a request structure
  *
@@ -716,6 +725,10 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx)
 	 */
 	rq->head = rq->ring->emit;
 
+	ret = add_timeline_barrier(rq);
+	if (ret)
+		goto err_unwind;
+
 	/* Unconditionally invalidate GPU caches and TLBs. */
 	ret = engine->emit_flush(rq, EMIT_INVALIDATE);
 	if (ret)
diff --git a/drivers/gpu/drm/i915/i915_timeline.c b/drivers/gpu/drm/i915/i915_timeline.c
index 4667cc08c416..5a87c5bd5154 100644
--- a/drivers/gpu/drm/i915/i915_timeline.c
+++ b/drivers/gpu/drm/i915/i915_timeline.c
@@ -37,6 +37,8 @@ void i915_timeline_init(struct drm_i915_private *i915,
 	INIT_LIST_HEAD(&timeline->requests);
 
 	i915_syncmap_init(&timeline->sync);
+
+	init_request_active(&timeline->barrier, NULL);
 }
 
 /**
@@ -69,6 +71,7 @@ void i915_timelines_park(struct drm_i915_private *i915)
 void i915_timeline_fini(struct i915_timeline *timeline)
 {
 	GEM_BUG_ON(!list_empty(&timeline->requests));
+	GEM_BUG_ON(i915_gem_active_isset(&timeline->barrier));
 
 	i915_syncmap_free(&timeline->sync);
 
diff --git a/drivers/gpu/drm/i915/i915_timeline.h b/drivers/gpu/drm/i915/i915_timeline.h
index a2c2c3ab5fb0..c8526ab44dbc 100644
--- a/drivers/gpu/drm/i915/i915_timeline.h
+++ b/drivers/gpu/drm/i915/i915_timeline.h
@@ -72,6 +72,16 @@ struct i915_timeline {
 	 */
 	u32 global_sync[I915_NUM_ENGINES];
 
+	/**
+	 * Barrier provides the ability to serialize ordering between different
+	 * timelines.
+	 *
+	 * Users can call i915_timeline_set_barrier which will make all
+	 * subsequent submissions be executed only after this barrier has been
+	 * completed.
+	 */
+	struct i915_gem_active barrier;
+
 	struct list_head link;
 	const char *name;
 
@@ -125,4 +135,21 @@ static inline bool i915_timeline_sync_is_later(struct i915_timeline *tl,
 
 void i915_timelines_park(struct drm_i915_private *i915);
 
+/**
+ * i915_timeline_set_barrier - orders submission between different timelines
+ * @timeline: timeline to set the barrier on
+ * @rq: request after which new submissions can proceed
+ *
+ * Sets the passed in request as the serialization point for all subsequent
+ * submissions on @timeline. Subsequent requests will not be submitted to GPU
+ * until the barrier has been completed.
+ */
+static inline void
+i915_timeline_set_barrier(struct i915_timeline *timeline,
+			  struct i915_request *rq)
+{
+	GEM_BUG_ON(timeline->fence_context == rq->timeline->fence_context);
+	i915_gem_active_set(&timeline->barrier, rq);
+}
+
 #endif
diff --git a/drivers/gpu/drm/i915/selftests/mock_timeline.c b/drivers/gpu/drm/i915/selftests/mock_timeline.c
index dcf3b16f5a07..a718b64c988e 100644
--- a/drivers/gpu/drm/i915/selftests/mock_timeline.c
+++ b/drivers/gpu/drm/i915/selftests/mock_timeline.c
@@ -19,6 +19,8 @@ void mock_timeline_init(struct i915_timeline *timeline, u64 context)
 
 	i915_syncmap_init(&timeline->sync);
 
+	init_request_active(&timeline->barrier, NULL);
+
 	INIT_LIST_HEAD(&timeline->link);
 }
 
-- 
2.19.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 35+ messages in thread

* [PATCH 4/6] drm/i915: Add timeline barrier support
  2018-09-14 16:09 [PATCH 0/6] Per context dynamic (sub)slice power-gating Tvrtko Ursulin
@ 2018-09-14 16:09 ` Tvrtko Ursulin
  0 siblings, 0 replies; 35+ messages in thread
From: Tvrtko Ursulin @ 2018-09-14 16:09 UTC (permalink / raw)
  To: Intel-gfx

From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Timeline barrier allows serialization between different timelines.

After calling i915_timeline_set_barrier with a request, all following
submissions on this timeline will be set up as depending on this request,
or barrier. Once the barrier has been completed it automatically gets
cleared and things continue as normal.

This facility will be used by the upcoming context SSEU code.

v2:
 * Assert barrier has been retired on timeline_fini. (Chris Wilson)
 * Fix mock_timeline.

Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Suggested-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_request.c           | 13 +++++++++
 drivers/gpu/drm/i915/i915_timeline.c          |  3 +++
 drivers/gpu/drm/i915/i915_timeline.h          | 27 +++++++++++++++++++
 .../gpu/drm/i915/selftests/mock_timeline.c    |  2 ++
 4 files changed, 45 insertions(+)

diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
index a492385b2089..76fc80330c85 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -644,6 +644,15 @@ submit_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state)
 	return NOTIFY_DONE;
 }
 
+static int add_timeline_barrier(struct i915_request *rq)
+{
+	struct i915_request *barrier =
+		i915_gem_active_raw(&rq->timeline->barrier,
+				    &rq->i915->drm.struct_mutex);
+
+	return barrier ? i915_request_await_dma_fence(rq, &barrier->fence) : 0;
+}
+
 /**
  * i915_request_alloc - allocate a request structure
  *
@@ -808,6 +817,10 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx)
 	 */
 	rq->head = rq->ring->emit;
 
+	ret = add_timeline_barrier(rq);
+	if (ret)
+		goto err_unwind;
+
 	/* Unconditionally invalidate GPU caches and TLBs. */
 	ret = engine->emit_flush(rq, EMIT_INVALIDATE);
 	if (ret)
diff --git a/drivers/gpu/drm/i915/i915_timeline.c b/drivers/gpu/drm/i915/i915_timeline.c
index 4667cc08c416..5a87c5bd5154 100644
--- a/drivers/gpu/drm/i915/i915_timeline.c
+++ b/drivers/gpu/drm/i915/i915_timeline.c
@@ -37,6 +37,8 @@ void i915_timeline_init(struct drm_i915_private *i915,
 	INIT_LIST_HEAD(&timeline->requests);
 
 	i915_syncmap_init(&timeline->sync);
+
+	init_request_active(&timeline->barrier, NULL);
 }
 
 /**
@@ -69,6 +71,7 @@ void i915_timelines_park(struct drm_i915_private *i915)
 void i915_timeline_fini(struct i915_timeline *timeline)
 {
 	GEM_BUG_ON(!list_empty(&timeline->requests));
+	GEM_BUG_ON(i915_gem_active_isset(&timeline->barrier));
 
 	i915_syncmap_free(&timeline->sync);
 
diff --git a/drivers/gpu/drm/i915/i915_timeline.h b/drivers/gpu/drm/i915/i915_timeline.h
index a2c2c3ab5fb0..c8526ab44dbc 100644
--- a/drivers/gpu/drm/i915/i915_timeline.h
+++ b/drivers/gpu/drm/i915/i915_timeline.h
@@ -72,6 +72,16 @@ struct i915_timeline {
 	 */
 	u32 global_sync[I915_NUM_ENGINES];
 
+	/**
+	 * Barrier provides the ability to serialize ordering between different
+	 * timelines.
+	 *
+	 * Users can call i915_timeline_set_barrier which will make all
+	 * subsequent submissions be executed only after this barrier has been
+	 * completed.
+	 */
+	struct i915_gem_active barrier;
+
 	struct list_head link;
 	const char *name;
 
@@ -125,4 +135,21 @@ static inline bool i915_timeline_sync_is_later(struct i915_timeline *tl,
 
 void i915_timelines_park(struct drm_i915_private *i915);
 
+/**
+ * i915_timeline_set_barrier - orders submission between different timelines
+ * @timeline: timeline to set the barrier on
+ * @rq: request after which new submissions can proceed
+ *
+ * Sets the passed in request as the serialization point for all subsequent
+ * submissions on @timeline. Subsequent requests will not be submitted to GPU
+ * until the barrier has been completed.
+ */
+static inline void
+i915_timeline_set_barrier(struct i915_timeline *timeline,
+			  struct i915_request *rq)
+{
+	GEM_BUG_ON(timeline->fence_context == rq->timeline->fence_context);
+	i915_gem_active_set(&timeline->barrier, rq);
+}
+
 #endif
diff --git a/drivers/gpu/drm/i915/selftests/mock_timeline.c b/drivers/gpu/drm/i915/selftests/mock_timeline.c
index dcf3b16f5a07..a718b64c988e 100644
--- a/drivers/gpu/drm/i915/selftests/mock_timeline.c
+++ b/drivers/gpu/drm/i915/selftests/mock_timeline.c
@@ -19,6 +19,8 @@ void mock_timeline_init(struct i915_timeline *timeline, u64 context)
 
 	i915_syncmap_init(&timeline->sync);
 
+	init_request_active(&timeline->barrier, NULL);
+
 	INIT_LIST_HEAD(&timeline->link);
 }
 
-- 
2.17.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 35+ messages in thread

end of thread, other threads:[~2019-01-24 13:27 UTC | newest]

Thread overview: 35+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-09-17 11:30 [PATCH v13 0/6] Per context dynamic (sub)slice power-gating Tvrtko Ursulin
2018-09-17 11:30 ` [PATCH 1/6] drm/i915/execlists: Move RPCS setup to context pin Tvrtko Ursulin
2018-09-17 11:43   ` Chris Wilson
2018-09-17 11:30 ` [PATCH 2/6] drm/i915: Record the sseu configuration per-context & engine Tvrtko Ursulin
2018-09-17 11:30 ` [PATCH 3/6] drm/i915/perf: lock powergating configuration to default when active Tvrtko Ursulin
2018-09-17 11:50   ` Chris Wilson
2018-09-17 11:30 ` [PATCH 4/6] drm/i915: Add timeline barrier support Tvrtko Ursulin
2018-09-17 11:30 ` [PATCH 5/6] drm/i915: Expose RPCS (SSEU) configuration to userspace Tvrtko Ursulin
2018-09-17 11:48   ` Chris Wilson
2018-09-18 13:43   ` [PATCH v18 5/6] drm/i915: Expose RPCS (SSEU) configuration to userspace (Gen11 only) Tvrtko Ursulin
2018-09-17 11:30 ` [PATCH 6/6] drm/i915/icl: Support co-existence between per-context SSEU and OA Tvrtko Ursulin
2018-10-01  8:49   ` Tvrtko Ursulin
2018-10-01  9:50   ` Lionel Landwerlin
2018-10-01 10:20     ` Tvrtko Ursulin
2018-10-01 11:06       ` Lionel Landwerlin
2018-10-01 11:42         ` Tvrtko Ursulin
2018-10-01 14:18           ` Lionel Landwerlin
2018-10-01 15:05             ` Tvrtko Ursulin
2018-10-01 15:26             ` [PATCH v3] " Tvrtko Ursulin
2018-09-17 11:44 ` ✗ Fi.CI.CHECKPATCH: warning for Per context dynamic (sub)slice power-gating (rev4) Patchwork
2018-09-17 11:46 ` ✗ Fi.CI.SPARSE: " Patchwork
2018-09-17 12:02 ` ✓ Fi.CI.BAT: success " Patchwork
2018-09-17 13:04 ` ✗ Fi.CI.IGT: failure " Patchwork
2018-09-18 14:06 ` ✗ Fi.CI.CHECKPATCH: warning for Per context dynamic (sub)slice power-gating (rev5) Patchwork
2018-09-18 14:09 ` ✗ Fi.CI.SPARSE: " Patchwork
2018-09-18 14:25 ` ✓ Fi.CI.BAT: success " Patchwork
2018-09-18 16:05 ` ✗ Fi.CI.IGT: failure " Patchwork
2018-10-01 16:35 ` ✗ Fi.CI.BAT: failure for Per context dynamic (sub)slice power-gating (rev6) Patchwork
  -- strict thread matches above, loose matches on Subject: below --
2019-01-24 11:41 [PATCH 0/6] Per context dynamic (sub)slice power-gating Tvrtko Ursulin
2019-01-24 11:42 ` [PATCH 4/6] drm/i915: Add timeline barrier support Tvrtko Ursulin
2019-01-24 13:27   ` Chris Wilson
2019-01-15 14:47 [PATCH 0/6] Add uAPI to support ICL VME hardware for new media-driver Joonas Lahtinen
2019-01-15 14:47 ` [PATCH 4/6] drm/i915: Add timeline barrier support Joonas Lahtinen
2019-01-14 13:57 [PATCH 0/6] Per context dynamic (sub)slice power-gating Tvrtko Ursulin
2019-01-14 13:57 ` [PATCH 4/6] drm/i915: Add timeline barrier support Tvrtko Ursulin
2019-01-08 15:12 [PATCH 0/6] Per context dynamic (sub)slice power-gating Tvrtko Ursulin
2019-01-08 15:12 ` [PATCH 4/6] drm/i915: Add timeline barrier support Tvrtko Ursulin
2018-11-13 14:35 [PATCH 0/6] Per context dynamic (sub)slice power-gating Tvrtko Ursulin
2018-11-13 14:35 ` [PATCH 4/6] drm/i915: Add timeline barrier support Tvrtko Ursulin
2018-09-14 16:09 [PATCH 0/6] Per context dynamic (sub)slice power-gating Tvrtko Ursulin
2018-09-14 16:09 ` [PATCH 4/6] drm/i915: Add timeline barrier support Tvrtko Ursulin

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.