All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 0/6] Per context dynamic (sub)slice power-gating
@ 2019-01-08 15:12 Tvrtko Ursulin
  2019-01-08 15:12 ` [PATCH 1/6] drm/i915/execlists: Move RPCS setup to context pin Tvrtko Ursulin
                   ` (13 more replies)
  0 siblings, 14 replies; 25+ messages in thread
From: Tvrtko Ursulin @ 2019-01-08 15:12 UTC (permalink / raw)
  To: Intel-gfx

From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Changes since last version:

 * Squashed OA interaction patches into a single patch.
 * Small tidy in selftest.

Lionel Landwerlin (2):
  drm/i915: Record the sseu configuration per-context & engine
  drm/i915/perf: lock powergating configuration to default when active

Tvrtko Ursulin (4):
  drm/i915/execlists: Move RPCS setup to context pin
  drm/i915: Add timeline barrier support
  drm/i915: Expose RPCS (SSEU) configuration to userspace (Gen11 only)
  drm/i915/selftests: Context SSEU reconfiguration tests

 drivers/gpu/drm/i915/i915_drv.h               |  14 +
 drivers/gpu/drm/i915/i915_gem_context.c       | 354 ++++++++++++-
 drivers/gpu/drm/i915/i915_gem_context.h       |  10 +
 drivers/gpu/drm/i915/i915_perf.c              |  13 +-
 drivers/gpu/drm/i915/i915_request.c           |  13 +
 drivers/gpu/drm/i915/i915_request.h           |  10 +
 drivers/gpu/drm/i915/i915_timeline.c          |   3 +
 drivers/gpu/drm/i915/i915_timeline.h          |  27 +
 drivers/gpu/drm/i915/intel_lrc.c              | 100 ++--
 drivers/gpu/drm/i915/intel_lrc.h              |   2 +
 .../gpu/drm/i915/selftests/i915_gem_context.c | 481 ++++++++++++++++++
 .../gpu/drm/i915/selftests/mock_timeline.c    |   2 +
 include/uapi/drm/i915_drm.h                   |  43 ++
 13 files changed, 1035 insertions(+), 37 deletions(-)

-- 
2.19.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 25+ messages in thread

* [PATCH 1/6] drm/i915/execlists: Move RPCS setup to context pin
  2019-01-08 15:12 [PATCH 0/6] Per context dynamic (sub)slice power-gating Tvrtko Ursulin
@ 2019-01-08 15:12 ` Tvrtko Ursulin
  2019-01-08 15:12 ` [PATCH 2/6] drm/i915: Record the sseu configuration per-context & engine Tvrtko Ursulin
                   ` (12 subsequent siblings)
  13 siblings, 0 replies; 25+ messages in thread
From: Tvrtko Ursulin @ 2019-01-08 15:12 UTC (permalink / raw)
  To: Intel-gfx

From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Configuring RPCS in context image just before pin is sufficient and will
come extra handy in one of the following patches.

v2:
 * Split image setup a bit differently. (Chris Wilson)

v3:
 * Update context image after reset as well - otherwise the application
   of pinned default state clears the RPCS.

v4:
 * Use local variable throughout the function. (Chris Wilson)

Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Suggested-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
---
 drivers/gpu/drm/i915/intel_lrc.c | 45 ++++++++++++++++++++------------
 1 file changed, 28 insertions(+), 17 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 6c98fb7cebf2..229d620cf157 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -1170,6 +1170,24 @@ static int __context_pin(struct i915_gem_context *ctx, struct i915_vma *vma)
 	return i915_vma_pin(vma, 0, 0, flags);
 }
 
+static u32 make_rpcs(struct drm_i915_private *dev_priv);
+
+static void
+__execlists_update_reg_state(struct intel_engine_cs *engine,
+			     struct intel_context *ce)
+{
+	u32 *regs = ce->lrc_reg_state;
+	struct intel_ring *ring = ce->ring;
+
+	regs[CTX_RING_BUFFER_START + 1] = i915_ggtt_offset(ring->vma);
+	regs[CTX_RING_HEAD + 1] = ring->head;
+	regs[CTX_RING_TAIL + 1] = ring->tail;
+
+	/* RPCS */
+	if (engine->class == RENDER_CLASS)
+		regs[CTX_R_PWR_CLK_STATE + 1] = make_rpcs(engine->i915);
+}
+
 static struct intel_context *
 __execlists_context_pin(struct intel_engine_cs *engine,
 			struct i915_gem_context *ctx,
@@ -1208,10 +1226,8 @@ __execlists_context_pin(struct intel_engine_cs *engine,
 	GEM_BUG_ON(!intel_ring_offset_valid(ce->ring, ce->ring->head));
 
 	ce->lrc_reg_state = vaddr + LRC_STATE_PN * PAGE_SIZE;
-	ce->lrc_reg_state[CTX_RING_BUFFER_START+1] =
-		i915_ggtt_offset(ce->ring->vma);
-	ce->lrc_reg_state[CTX_RING_HEAD + 1] = ce->ring->head;
-	ce->lrc_reg_state[CTX_RING_TAIL + 1] = ce->ring->tail;
+
+	__execlists_update_reg_state(engine, ce);
 
 	ce->state->obj->pin_global++;
 	i915_gem_context_get(ctx);
@@ -1835,14 +1851,14 @@ static void execlists_reset(struct intel_engine_cs *engine,
 		       engine->pinned_default_state + LRC_STATE_PN * PAGE_SIZE,
 		       engine->context_size - PAGE_SIZE);
 	}
-	execlists_init_reg_state(regs,
-				 request->gem_context, engine, request->ring);
 
 	/* Move the RING_HEAD onto the breadcrumb, past the hanging batch */
-	regs[CTX_RING_BUFFER_START + 1] = i915_ggtt_offset(request->ring->vma);
-
 	request->ring->head = intel_ring_wrap(request->ring, request->postfix);
-	regs[CTX_RING_HEAD + 1] = request->ring->head;
+
+	execlists_init_reg_state(regs, request->gem_context, engine,
+				 request->ring);
+
+	__execlists_update_reg_state(engine, request->hw_context);
 
 	intel_ring_update_space(request->ring);
 
@@ -2534,8 +2550,7 @@ static void execlists_init_reg_state(u32 *regs,
 
 	if (rcs) {
 		regs[CTX_LRI_HEADER_2] = MI_LOAD_REGISTER_IMM(1);
-		CTX_REG(regs, CTX_R_PWR_CLK_STATE, GEN8_R_PWR_CLK_STATE,
-			make_rpcs(dev_priv));
+		CTX_REG(regs, CTX_R_PWR_CLK_STATE, GEN8_R_PWR_CLK_STATE, 0);
 
 		i915_oa_init_reg_state(engine, ctx, regs);
 	}
@@ -2696,12 +2711,8 @@ void intel_lr_context_resume(struct drm_i915_private *i915)
 
 			intel_ring_reset(ce->ring, 0);
 
-			if (ce->pin_count) { /* otherwise done in context_pin */
-				u32 *regs = ce->lrc_reg_state;
-
-				regs[CTX_RING_HEAD + 1] = ce->ring->head;
-				regs[CTX_RING_TAIL + 1] = ce->ring->tail;
-			}
+			if (ce->pin_count) /* otherwise done in context_pin */
+				__execlists_update_reg_state(engine, ce);
 		}
 	}
 }
-- 
2.19.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 25+ messages in thread

* [PATCH 2/6] drm/i915: Record the sseu configuration per-context & engine
  2019-01-08 15:12 [PATCH 0/6] Per context dynamic (sub)slice power-gating Tvrtko Ursulin
  2019-01-08 15:12 ` [PATCH 1/6] drm/i915/execlists: Move RPCS setup to context pin Tvrtko Ursulin
@ 2019-01-08 15:12 ` Tvrtko Ursulin
  2019-01-08 15:12 ` [PATCH 3/6] drm/i915/perf: lock powergating configuration to default when active Tvrtko Ursulin
                   ` (11 subsequent siblings)
  13 siblings, 0 replies; 25+ messages in thread
From: Tvrtko Ursulin @ 2019-01-08 15:12 UTC (permalink / raw)
  To: Intel-gfx

From: Lionel Landwerlin <lionel.g.landwerlin@intel.com>

We want to expose the ability to reconfigure the slices, subslice and
eu per context and per engine. To facilitate that, store the current
configuration on the context for each engine, which is initially set
to the device default upon creation.

v2: record sseu configuration per context & engine (Chris)

v3: introduce the i915_gem_context_sseu to store powergating
    programming, sseu_dev_info has grown quite a bit (Lionel)

v4: rename i915_gem_sseu into intel_sseu (Chris)
    use to_intel_context() (Chris)

v5: More to_intel_context() (Tvrtko)
    Switch intel_sseu from union to struct (Tvrtko)
    Move context default sseu in existing loop (Chris)

v6: s/intel_sseu_from_device_sseu/intel_device_default_sseu/ (Tvrtko)

Tvrtko Ursulin:

v7:
 * Pass intel_sseu by pointer instead of value to make_rpcs.
 * Rebase for make_rpcs changes.

v8:
 * Rebase for RPCS edit on pin.

v9:
 * Rebase for context image setup changes.

v10:
 * Rename dev_priv to i915. (Chris Wilson)

v11:
 * Rebase.

v12:
 * Rebase for IS_GEN changes.

v13:
 * Rebase for RUNTIME_INFO.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
---
 drivers/gpu/drm/i915/i915_drv.h         | 14 +++++++++++
 drivers/gpu/drm/i915/i915_gem_context.c |  2 ++
 drivers/gpu/drm/i915/i915_gem_context.h |  4 ++++
 drivers/gpu/drm/i915/i915_request.h     | 10 ++++++++
 drivers/gpu/drm/i915/intel_lrc.c        | 31 +++++++++++++------------
 5 files changed, 46 insertions(+), 15 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 17a017645c5d..d57438d87bc0 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -3300,6 +3300,20 @@ mkwrite_device_info(struct drm_i915_private *dev_priv)
 	return (struct intel_device_info *)INTEL_INFO(dev_priv);
 }
 
+static inline struct intel_sseu
+intel_device_default_sseu(struct drm_i915_private *i915)
+{
+	const struct sseu_dev_info *sseu = &RUNTIME_INFO(i915)->sseu;
+	struct intel_sseu value = {
+		.slice_mask = sseu->slice_mask,
+		.subslice_mask = sseu->subslice_mask[0],
+		.min_eus_per_subslice = sseu->max_eus_per_subslice,
+		.max_eus_per_subslice = sseu->max_eus_per_subslice,
+	};
+
+	return value;
+}
+
 /* modesetting */
 extern void intel_modeset_init_hw(struct drm_device *dev);
 extern int intel_modeset_init(struct drm_device *dev);
diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
index 5905b6d8f291..a565643e9a26 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -343,6 +343,8 @@ __create_hw_context(struct drm_i915_private *dev_priv,
 		struct intel_context *ce = &ctx->__engine[n];
 
 		ce->gem_context = ctx;
+		/* Use the whole device by default */
+		ce->sseu = intel_device_default_sseu(dev_priv);
 	}
 
 	INIT_RADIX_TREE(&ctx->handles_vma, GFP_KERNEL);
diff --git a/drivers/gpu/drm/i915/i915_gem_context.h b/drivers/gpu/drm/i915/i915_gem_context.h
index f6d870b1f73e..ef04e422cf9a 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.h
+++ b/drivers/gpu/drm/i915/i915_gem_context.h
@@ -31,6 +31,7 @@
 
 #include "i915_gem.h"
 #include "i915_scheduler.h"
+#include "intel_device_info.h"
 
 struct pid;
 
@@ -171,6 +172,9 @@ struct i915_gem_context {
 		int pin_count;
 
 		const struct intel_context_ops *ops;
+
+		/** sseu: Control eu/slice partitioning */
+		struct intel_sseu sseu;
 	} __engine[I915_NUM_ENGINES];
 
 	/** ring_size: size for allocating the per-engine ring buffer */
diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h
index d014b0605445..907bd8f11aeb 100644
--- a/drivers/gpu/drm/i915/i915_request.h
+++ b/drivers/gpu/drm/i915/i915_request.h
@@ -38,6 +38,16 @@ struct drm_i915_gem_object;
 struct i915_request;
 struct i915_timeline;
 
+/*
+ * Powergating configuration for a particular (context,engine).
+ */
+struct intel_sseu {
+	u8 slice_mask;
+	u8 subslice_mask;
+	u8 min_eus_per_subslice;
+	u8 max_eus_per_subslice;
+};
+
 struct intel_wait {
 	struct rb_node node;
 	struct task_struct *tsk;
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 229d620cf157..6df792bc5067 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -1170,7 +1170,8 @@ static int __context_pin(struct i915_gem_context *ctx, struct i915_vma *vma)
 	return i915_vma_pin(vma, 0, 0, flags);
 }
 
-static u32 make_rpcs(struct drm_i915_private *dev_priv);
+static u32
+make_rpcs(struct drm_i915_private *i915, struct intel_sseu *ctx_sseu);
 
 static void
 __execlists_update_reg_state(struct intel_engine_cs *engine,
@@ -1185,7 +1186,8 @@ __execlists_update_reg_state(struct intel_engine_cs *engine,
 
 	/* RPCS */
 	if (engine->class == RENDER_CLASS)
-		regs[CTX_R_PWR_CLK_STATE + 1] = make_rpcs(engine->i915);
+		regs[CTX_R_PWR_CLK_STATE + 1] = make_rpcs(engine->i915,
+							  &ce->sseu);
 }
 
 static struct intel_context *
@@ -2326,18 +2328,19 @@ int logical_xcs_ring_init(struct intel_engine_cs *engine)
 }
 
 static u32
-make_rpcs(struct drm_i915_private *dev_priv)
+make_rpcs(struct drm_i915_private *i915, struct intel_sseu *ctx_sseu)
 {
-	bool subslice_pg = RUNTIME_INFO(dev_priv)->sseu.has_subslice_pg;
-	u8 slices = hweight8(RUNTIME_INFO(dev_priv)->sseu.slice_mask);
-	u8 subslices = hweight8(RUNTIME_INFO(dev_priv)->sseu.subslice_mask[0]);
+	const struct sseu_dev_info *sseu = &RUNTIME_INFO(i915)->sseu;
+	bool subslice_pg = sseu->has_subslice_pg;
+	u8 slices = hweight8(ctx_sseu->slice_mask);
+	u8 subslices = hweight8(ctx_sseu->subslice_mask);
 	u32 rpcs = 0;
 
 	/*
 	 * No explicit RPCS request is needed to ensure full
 	 * slice/subslice/EU enablement prior to Gen9.
 	*/
-	if (INTEL_GEN(dev_priv) < 9)
+	if (INTEL_GEN(i915) < 9)
 		return 0;
 
 	/*
@@ -2365,7 +2368,7 @@ make_rpcs(struct drm_i915_private *dev_priv)
 	 * subslices are enabled, or a count between one and four on the first
 	 * slice.
 	 */
-	if (IS_GEN(dev_priv, 11) && slices == 1 && subslices >= 4) {
+	if (IS_GEN(i915, 11) && slices == 1 && subslices >= 4) {
 		GEM_BUG_ON(subslices & 1);
 
 		subslice_pg = false;
@@ -2378,10 +2381,10 @@ make_rpcs(struct drm_i915_private *dev_priv)
 	 * must make an explicit request through RPCS for full
 	 * enablement.
 	*/
-	if (RUNTIME_INFO(dev_priv)->sseu.has_slice_pg) {
+	if (sseu->has_slice_pg) {
 		u32 mask, val = slices;
 
-		if (INTEL_GEN(dev_priv) >= 11) {
+		if (INTEL_GEN(i915) >= 11) {
 			mask = GEN11_RPCS_S_CNT_MASK;
 			val <<= GEN11_RPCS_S_CNT_SHIFT;
 		} else {
@@ -2406,18 +2409,16 @@ make_rpcs(struct drm_i915_private *dev_priv)
 		rpcs |= GEN8_RPCS_ENABLE | GEN8_RPCS_SS_CNT_ENABLE | val;
 	}
 
-	if (RUNTIME_INFO(dev_priv)->sseu.has_eu_pg) {
+	if (sseu->has_eu_pg) {
 		u32 val;
 
-		val = RUNTIME_INFO(dev_priv)->sseu.eu_per_subslice <<
-		      GEN8_RPCS_EU_MIN_SHIFT;
+		val = ctx_sseu->min_eus_per_subslice << GEN8_RPCS_EU_MIN_SHIFT;
 		GEM_BUG_ON(val & ~GEN8_RPCS_EU_MIN_MASK);
 		val &= GEN8_RPCS_EU_MIN_MASK;
 
 		rpcs |= val;
 
-		val = RUNTIME_INFO(dev_priv)->sseu.eu_per_subslice <<
-		      GEN8_RPCS_EU_MAX_SHIFT;
+		val = ctx_sseu->max_eus_per_subslice << GEN8_RPCS_EU_MAX_SHIFT;
 		GEM_BUG_ON(val & ~GEN8_RPCS_EU_MAX_MASK);
 		val &= GEN8_RPCS_EU_MAX_MASK;
 
-- 
2.19.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 25+ messages in thread

* [PATCH 3/6] drm/i915/perf: lock powergating configuration to default when active
  2019-01-08 15:12 [PATCH 0/6] Per context dynamic (sub)slice power-gating Tvrtko Ursulin
  2019-01-08 15:12 ` [PATCH 1/6] drm/i915/execlists: Move RPCS setup to context pin Tvrtko Ursulin
  2019-01-08 15:12 ` [PATCH 2/6] drm/i915: Record the sseu configuration per-context & engine Tvrtko Ursulin
@ 2019-01-08 15:12 ` Tvrtko Ursulin
  2019-01-08 15:12 ` [PATCH 4/6] drm/i915: Add timeline barrier support Tvrtko Ursulin
                   ` (10 subsequent siblings)
  13 siblings, 0 replies; 25+ messages in thread
From: Tvrtko Ursulin @ 2019-01-08 15:12 UTC (permalink / raw)
  To: Intel-gfx

From: Lionel Landwerlin <lionel.g.landwerlin@intel.com>

If some of the contexts submitting workloads to the GPU have been
configured to shutdown slices/subslices, we might loose the NOA
configurations written in the NOA muxes.

One possible solution to this problem is to reprogram the NOA muxes
when we switch to a new context. We initially tried this in the
workaround batchbuffer but some concerns where raised about the cost
of reprogramming at every context switch. This solution is also not
without consequences from the userspace point of view. Reprogramming
of the muxes can only happen once the powergating configuration has
changed (which happens after context switch). This means for a window
of time during the recording, counters recorded by the OA unit might
be invalid. This requires userspace dealing with OA reports to discard
the invalid values.

Minimizing the reprogramming could be implemented by tracking of the
last programmed configuration somewhere in GGTT and use MI_PREDICATE
to discard some of the programming commands, but the command streamer
would still have to parse all the MI_LRI instructions in the
workaround batchbuffer.

Another solution, which this change implements, is to simply disregard
the user requested configuration for the period of time when i915/perf
is active.

On most platforms there are no issues with this apart from a performance
penality for some media workloads that benefit from running on a partially
powergated GPU. We already prevent RC6 from affecting the programming so
it doesn't sound completely unreasonable to hold on powergating for the
same reason.

On Icelake however there would a functional problem if the slices not-
containing the VME block were left enabled with a running media workload
which explicitly disabled them. To avoid a GPU hang in this case, on
Icelake we lock the enablement to only slices which contain VME blocks.
Downside is that it means degraded GPU performance when OA is active but
there is no known alternative solution for this.

v2: Leave RPCS programming in intel_lrc.c (Lionel)

v3: Update for s/union intel_sseu/struct intel_sseu/ (Lionel)
    More to_intel_context() (Tvrtko)
    s/dev_priv/i915/ (Tvrtko)

Tvrtko Ursulin:

v4:
 * Rebase for make_rpcs changes.

v5:
 * Apply OA restriction from make_rpcs directly.

v6:
 * Rebase for context image setup changes.

v7:
 * Move stream assignment before metric enable.

v8-9:
 * Rebase.

v10:
 * Squashed with ICL support patch.

Bspec: 21140
Co-Developed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk> # v9
Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
---
 drivers/gpu/drm/i915/i915_perf.c | 13 ++++++---
 drivers/gpu/drm/i915/intel_lrc.c | 46 ++++++++++++++++++++++++--------
 drivers/gpu/drm/i915/intel_lrc.h |  2 ++
 3 files changed, 46 insertions(+), 15 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
index 5b1ae5ed97b3..67e3fc12e84c 100644
--- a/drivers/gpu/drm/i915/i915_perf.c
+++ b/drivers/gpu/drm/i915/i915_perf.c
@@ -1677,6 +1677,11 @@ static void gen8_update_reg_state_unlocked(struct i915_gem_context *ctx,
 
 		CTX_REG(reg_state, state_offset, flex_regs[i], value);
 	}
+
+	CTX_REG(reg_state, CTX_R_PWR_CLK_STATE, GEN8_R_PWR_CLK_STATE,
+		gen8_make_rpcs(dev_priv,
+			       &to_intel_context(ctx,
+						 dev_priv->engine[RCS])->sseu));
 }
 
 /*
@@ -2098,21 +2103,21 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream,
 	if (ret)
 		goto err_lock;
 
+	stream->ops = &i915_oa_stream_ops;
+	dev_priv->perf.oa.exclusive_stream = stream;
+
 	ret = dev_priv->perf.oa.ops.enable_metric_set(stream);
 	if (ret) {
 		DRM_DEBUG("Unable to enable metric set\n");
 		goto err_enable;
 	}
 
-	stream->ops = &i915_oa_stream_ops;
-
-	dev_priv->perf.oa.exclusive_stream = stream;
-
 	mutex_unlock(&dev_priv->drm.struct_mutex);
 
 	return 0;
 
 err_enable:
+	dev_priv->perf.oa.exclusive_stream = NULL;
 	dev_priv->perf.oa.ops.disable_metric_set(dev_priv);
 	mutex_unlock(&dev_priv->drm.struct_mutex);
 
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 6df792bc5067..bca09a497f27 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -1170,9 +1170,6 @@ static int __context_pin(struct i915_gem_context *ctx, struct i915_vma *vma)
 	return i915_vma_pin(vma, 0, 0, flags);
 }
 
-static u32
-make_rpcs(struct drm_i915_private *i915, struct intel_sseu *ctx_sseu);
-
 static void
 __execlists_update_reg_state(struct intel_engine_cs *engine,
 			     struct intel_context *ce)
@@ -1186,8 +1183,8 @@ __execlists_update_reg_state(struct intel_engine_cs *engine,
 
 	/* RPCS */
 	if (engine->class == RENDER_CLASS)
-		regs[CTX_R_PWR_CLK_STATE + 1] = make_rpcs(engine->i915,
-							  &ce->sseu);
+		regs[CTX_R_PWR_CLK_STATE + 1] = gen8_make_rpcs(engine->i915,
+							       &ce->sseu);
 }
 
 static struct intel_context *
@@ -2327,13 +2324,12 @@ int logical_xcs_ring_init(struct intel_engine_cs *engine)
 	return logical_ring_init(engine);
 }
 
-static u32
-make_rpcs(struct drm_i915_private *i915, struct intel_sseu *ctx_sseu)
+u32 gen8_make_rpcs(struct drm_i915_private *i915, struct intel_sseu *req_sseu)
 {
 	const struct sseu_dev_info *sseu = &RUNTIME_INFO(i915)->sseu;
 	bool subslice_pg = sseu->has_subslice_pg;
-	u8 slices = hweight8(ctx_sseu->slice_mask);
-	u8 subslices = hweight8(ctx_sseu->subslice_mask);
+	struct intel_sseu ctx_sseu;
+	u8 slices, subslices;
 	u32 rpcs = 0;
 
 	/*
@@ -2343,6 +2339,34 @@ make_rpcs(struct drm_i915_private *i915, struct intel_sseu *ctx_sseu)
 	if (INTEL_GEN(i915) < 9)
 		return 0;
 
+	/*
+	 * If i915/perf is active, we want a stable powergating configuration
+	 * on the system.
+	 *
+	 * We could choose full enablement, but on ICL we know there are use
+	 * cases which disable slices for functional, apart for performance
+	 * reasons. So in this case we select a known stable subset.
+	 */
+	if (!i915->perf.oa.exclusive_stream) {
+		ctx_sseu = *req_sseu;
+	} else {
+		ctx_sseu = intel_device_default_sseu(i915);
+
+		if (IS_GEN(i915, 11)) {
+			/*
+			 * We only need subslice count so it doesn't matter
+			 * which ones we select - just turn off low bits in the
+			 * amount of half of all available subslices per slice.
+			 */
+			ctx_sseu.subslice_mask =
+				~(~0 << (hweight8(ctx_sseu.subslice_mask) / 2));
+			ctx_sseu.slice_mask = 0x1;
+		}
+	}
+
+	slices = hweight8(ctx_sseu.slice_mask);
+	subslices = hweight8(ctx_sseu.subslice_mask);
+
 	/*
 	 * Since the SScount bitfield in GEN8_R_PWR_CLK_STATE is only three bits
 	 * wide and Icelake has up to eight subslices, specfial programming is
@@ -2412,13 +2436,13 @@ make_rpcs(struct drm_i915_private *i915, struct intel_sseu *ctx_sseu)
 	if (sseu->has_eu_pg) {
 		u32 val;
 
-		val = ctx_sseu->min_eus_per_subslice << GEN8_RPCS_EU_MIN_SHIFT;
+		val = ctx_sseu.min_eus_per_subslice << GEN8_RPCS_EU_MIN_SHIFT;
 		GEM_BUG_ON(val & ~GEN8_RPCS_EU_MIN_MASK);
 		val &= GEN8_RPCS_EU_MIN_MASK;
 
 		rpcs |= val;
 
-		val = ctx_sseu->max_eus_per_subslice << GEN8_RPCS_EU_MAX_SHIFT;
+		val = ctx_sseu.max_eus_per_subslice << GEN8_RPCS_EU_MAX_SHIFT;
 		GEM_BUG_ON(val & ~GEN8_RPCS_EU_MAX_MASK);
 		val &= GEN8_RPCS_EU_MAX_MASK;
 
diff --git a/drivers/gpu/drm/i915/intel_lrc.h b/drivers/gpu/drm/i915/intel_lrc.h
index f5a5502ecf70..a4e28cc55fda 100644
--- a/drivers/gpu/drm/i915/intel_lrc.h
+++ b/drivers/gpu/drm/i915/intel_lrc.h
@@ -104,4 +104,6 @@ void intel_lr_context_resume(struct drm_i915_private *dev_priv);
 
 void intel_execlists_set_default_submission(struct intel_engine_cs *engine);
 
+u32 gen8_make_rpcs(struct drm_i915_private *i915, struct intel_sseu *ctx_sseu);
+
 #endif /* _INTEL_LRC_H_ */
-- 
2.19.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 25+ messages in thread

* [PATCH 4/6] drm/i915: Add timeline barrier support
  2019-01-08 15:12 [PATCH 0/6] Per context dynamic (sub)slice power-gating Tvrtko Ursulin
                   ` (2 preceding siblings ...)
  2019-01-08 15:12 ` [PATCH 3/6] drm/i915/perf: lock powergating configuration to default when active Tvrtko Ursulin
@ 2019-01-08 15:12 ` Tvrtko Ursulin
  2019-01-08 15:12 ` [PATCH 5/6] drm/i915: Expose RPCS (SSEU) configuration to userspace (Gen11 only) Tvrtko Ursulin
                   ` (9 subsequent siblings)
  13 siblings, 0 replies; 25+ messages in thread
From: Tvrtko Ursulin @ 2019-01-08 15:12 UTC (permalink / raw)
  To: Intel-gfx

From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Timeline barrier allows serialization between different timelines.

After calling i915_timeline_set_barrier with a request, all following
submissions on this timeline will be set up as depending on this request,
or barrier. Once the barrier has been completed it automatically gets
cleared and things continue as normal.

This facility will be used by the upcoming context SSEU code.

v2:
 * Assert barrier has been retired on timeline_fini. (Chris Wilson)
 * Fix mock_timeline.

v3:
 * Improved comment language. (Chris Wilson)

Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Suggested-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_request.c           | 13 +++++++++
 drivers/gpu/drm/i915/i915_timeline.c          |  3 +++
 drivers/gpu/drm/i915/i915_timeline.h          | 27 +++++++++++++++++++
 .../gpu/drm/i915/selftests/mock_timeline.c    |  2 ++
 4 files changed, 45 insertions(+)

diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
index 1e158eb8cb97..b0bbaecac744 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -477,6 +477,15 @@ submit_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state)
 	return NOTIFY_DONE;
 }
 
+static int add_timeline_barrier(struct i915_request *rq)
+{
+	struct i915_request *barrier =
+		i915_gem_active_raw(&rq->timeline->barrier,
+				    &rq->i915->drm.struct_mutex);
+
+	return barrier ? i915_request_await_dma_fence(rq, &barrier->fence) : 0;
+}
+
 /**
  * i915_request_alloc - allocate a request structure
  *
@@ -628,6 +637,10 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx)
 	 */
 	rq->head = rq->ring->emit;
 
+	ret = add_timeline_barrier(rq);
+	if (ret)
+		goto err_unwind;
+
 	ret = engine->request_alloc(rq);
 	if (ret)
 		goto err_unwind;
diff --git a/drivers/gpu/drm/i915/i915_timeline.c b/drivers/gpu/drm/i915/i915_timeline.c
index 4667cc08c416..5a87c5bd5154 100644
--- a/drivers/gpu/drm/i915/i915_timeline.c
+++ b/drivers/gpu/drm/i915/i915_timeline.c
@@ -37,6 +37,8 @@ void i915_timeline_init(struct drm_i915_private *i915,
 	INIT_LIST_HEAD(&timeline->requests);
 
 	i915_syncmap_init(&timeline->sync);
+
+	init_request_active(&timeline->barrier, NULL);
 }
 
 /**
@@ -69,6 +71,7 @@ void i915_timelines_park(struct drm_i915_private *i915)
 void i915_timeline_fini(struct i915_timeline *timeline)
 {
 	GEM_BUG_ON(!list_empty(&timeline->requests));
+	GEM_BUG_ON(i915_gem_active_isset(&timeline->barrier));
 
 	i915_syncmap_free(&timeline->sync);
 
diff --git a/drivers/gpu/drm/i915/i915_timeline.h b/drivers/gpu/drm/i915/i915_timeline.h
index 38c1e15e927a..af6c05333d76 100644
--- a/drivers/gpu/drm/i915/i915_timeline.h
+++ b/drivers/gpu/drm/i915/i915_timeline.h
@@ -64,6 +64,16 @@ struct i915_timeline {
 	 */
 	struct i915_syncmap *sync;
 
+	/**
+	 * Barrier provides the ability to serialize ordering between different
+	 * timelines.
+	 *
+	 * Users can call i915_timeline_set_barrier which will make all
+	 * subsequent submissions to this timeline be executed only after the
+	 * barrier has been completed.
+	 */
+	struct i915_gem_active barrier;
+
 	struct list_head link;
 	const char *name;
 
@@ -136,4 +146,21 @@ static inline bool i915_timeline_sync_is_later(struct i915_timeline *tl,
 
 void i915_timelines_park(struct drm_i915_private *i915);
 
+/**
+ * i915_timeline_set_barrier - orders submission between different timelines
+ * @timeline: timeline to set the barrier on
+ * @rq: request after which new submissions can proceed
+ *
+ * Sets the passed in request as the serialization point for all subsequent
+ * submissions on @timeline. Subsequent requests will not be submitted to GPU
+ * until the barrier has been completed.
+ */
+static inline void
+i915_timeline_set_barrier(struct i915_timeline *timeline,
+			  struct i915_request *rq)
+{
+	GEM_BUG_ON(timeline->fence_context == rq->timeline->fence_context);
+	i915_gem_active_set(&timeline->barrier, rq);
+}
+
 #endif
diff --git a/drivers/gpu/drm/i915/selftests/mock_timeline.c b/drivers/gpu/drm/i915/selftests/mock_timeline.c
index dcf3b16f5a07..a718b64c988e 100644
--- a/drivers/gpu/drm/i915/selftests/mock_timeline.c
+++ b/drivers/gpu/drm/i915/selftests/mock_timeline.c
@@ -19,6 +19,8 @@ void mock_timeline_init(struct i915_timeline *timeline, u64 context)
 
 	i915_syncmap_init(&timeline->sync);
 
+	init_request_active(&timeline->barrier, NULL);
+
 	INIT_LIST_HEAD(&timeline->link);
 }
 
-- 
2.19.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 25+ messages in thread

* [PATCH 5/6] drm/i915: Expose RPCS (SSEU) configuration to userspace (Gen11 only)
  2019-01-08 15:12 [PATCH 0/6] Per context dynamic (sub)slice power-gating Tvrtko Ursulin
                   ` (3 preceding siblings ...)
  2019-01-08 15:12 ` [PATCH 4/6] drm/i915: Add timeline barrier support Tvrtko Ursulin
@ 2019-01-08 15:12 ` Tvrtko Ursulin
  2019-01-08 16:29   ` [PATCH v24 " Tvrtko Ursulin
  2019-01-08 15:12 ` [PATCH 6/6] drm/i915/selftests: Context SSEU reconfiguration tests Tvrtko Ursulin
                   ` (8 subsequent siblings)
  13 siblings, 1 reply; 25+ messages in thread
From: Tvrtko Ursulin @ 2019-01-08 15:12 UTC (permalink / raw)
  To: Intel-gfx

From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

We want to allow userspace to reconfigure the subslice configuration on a
per context basis.

This is required for the functional requirement of shutting down non-VME
enabled sub-slices on Gen11 parts.

To do so, we expose a context parameter to allow adjustment of the RPCS
register stored within the context image (and currently not accessible via
LRI).

If the context is adjusted before first use or whilst idle, the adjustment
is for "free"; otherwise if the context is active we queue a request to do
so (using the kernel context), following all other activity by that
context, which is also marked as barrier for all following submission
against the same context.

Since the overhead of device re-configuration during context switching can
be significant, especially in multi-context workloads, we limit this new
uAPI to only support the Gen11 VME use case. In this use case either the
device is fully enabled, and exactly one slice and half of the subslices
are enabled.

Example usage:

	struct drm_i915_gem_context_param_sseu sseu = { };
	struct drm_i915_gem_context_param arg =
		{ .param = I915_CONTEXT_PARAM_SSEU,
		  .ctx_id = gem_context_create(fd),
		  .size = sizeof(sseu),
		  .value = to_user_pointer(&sseu)
		};

	/* Query device defaults. */
	gem_context_get_param(fd, &arg);

	/* Set VME configuration on a 1x6x8 part. */
	sseu.slice_mask = 0x1;
	sseu.subslice_mask = 0xe0;
	gem_context_set_param(fd, &arg);

v2: Fix offset of CTX_R_PWR_CLK_STATE in intel_lr_context_set_sseu() (Lionel)

v3: Add ability to program this per engine (Chris)

v4: Move most get_sseu() into i915_gem_context.c (Lionel)

v5: Validate sseu configuration against the device's capabilities (Lionel)

v6: Change context powergating settings through MI_SDM on kernel context (Chris)

v7: Synchronize the requests following a powergating setting change using a global
    dependency (Chris)
    Iterate timelines through dev_priv.gt.active_rings (Tvrtko)
    Disable RPCS configuration setting for non capable users (Lionel/Tvrtko)

v8: s/union intel_sseu/struct intel_sseu/ (Lionel)
    s/dev_priv/i915/ (Tvrtko)
    Change uapi class/instance fields to u16 (Tvrtko)
    Bump mask fields to 64bits (Lionel)
    Don't return EPERM when dynamic sseu is disabled (Tvrtko)

v9: Import context image into kernel context's ppgtt only when
    reconfiguring powergated slice/subslices (Chris)
    Use aliasing ppgtt when needed (Michel)

Tvrtko Ursulin:

v10:
 * Update for upstream changes.
 * Request submit needs a RPM reference.
 * Reject on !FULL_PPGTT for simplicity.
 * Pull out get/set param to helpers for readability and less indent.
 * Use i915_request_await_dma_fence in add_global_barrier to skip waits
   on the same timeline and avoid GEM_BUG_ON.
 * No need to explicitly assign a NULL pointer to engine in legacy mode.
 * No need to move gen8_make_rpcs up.
 * Factored out global barrier as prep patch.
 * Allow to only CAP_SYS_ADMIN if !Gen11.

v11:
 * Remove engine vfunc in favour of local helper. (Chris Wilson)
 * Stop retiring requests before updates since it is not needed
   (Chris Wilson)
 * Implement direct CPU update path for idle contexts. (Chris Wilson)
 * Left side dependency needs only be on the same context timeline.
   (Chris Wilson)
 * It is sufficient to order the timeline. (Chris Wilson)
 * Reject !RCS configuration attempts with -ENODEV for now.

v12:
 * Rebase for make_rpcs.

v13:
 * Centralize SSEU normalization to make_rpcs.
 * Type width checking (uAPI <-> implementation).
 * Gen11 restrictions uAPI checks.
 * Gen11 subslice count differences handling.
 Chris Wilson:
 * args->size handling fixes.
 * Update context image from GGTT.
 * Postpone context image update to pinning.
 * Use i915_gem_active_raw instead of last_request_on_engine.

v14:
 * Add activity tracker on intel_context to fix the lifetime issues
   and simplify the code. (Chris Wilson)

v15:
 * Fix context pin leak if no space in ring by simplifying the
   context pinning sequence.

v16:
 * Rebase for context get/set param locking changes.
 * Just -ENODEV on !Gen11. (Joonas)

v17:
 * Fix one Gen11 subslice enablement rule.
 * Handle error from i915_sw_fence_await_sw_fence_gfp. (Chris Wilson)

v18:
 * Update commit message. (Joonas)
 * Restrict uAPI to VME use case. (Joonas)

v19:
 * Rebase.

v20:
 * Rebase for ce->active_tracker.

v21:
 * Rebase for IS_GEN changes.

v22:
 * Reserve uAPI for flags straight away. (Chris Wilson)

v23:
 * Rebase for RUNTIME_INFO.

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=100899
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=107634
Issue: https://github.com/intel/media-driver/issues/267
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Cc: Dmitry Rogozhkin <dmitry.v.rogozhkin@intel.com>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: Zhipeng Gong <zhipeng.gong@intel.com>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk> # v21
Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
---
 drivers/gpu/drm/i915/i915_gem_context.c | 341 +++++++++++++++++++++++-
 drivers/gpu/drm/i915/i915_gem_context.h |   6 +
 drivers/gpu/drm/i915/intel_lrc.c        |   4 +-
 include/uapi/drm/i915_drm.h             |  43 +++
 4 files changed, 391 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
index a565643e9a26..1ab7d6980c36 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -90,6 +90,7 @@
 #include <drm/i915_drm.h>
 #include "i915_drv.h"
 #include "i915_trace.h"
+#include "intel_lrc_reg.h"
 #include "intel_workarounds.h"
 
 #define ALL_L3_SLICES(dev) (1 << NUM_L3_SLICES(dev)) - 1
@@ -322,6 +323,15 @@ static u32 default_desc_template(const struct drm_i915_private *i915,
 	return desc;
 }
 
+static void intel_context_retire(struct i915_gem_active *active,
+				 struct i915_request *rq)
+{
+	struct intel_context *ce =
+		container_of(active, typeof(*ce), active_tracker);
+
+	intel_context_unpin(ce);
+}
+
 static struct i915_gem_context *
 __create_hw_context(struct drm_i915_private *dev_priv,
 		    struct drm_i915_file_private *file_priv)
@@ -345,6 +355,8 @@ __create_hw_context(struct drm_i915_private *dev_priv,
 		ce->gem_context = ctx;
 		/* Use the whole device by default */
 		ce->sseu = intel_device_default_sseu(dev_priv);
+
+		init_request_active(&ce->active_tracker, intel_context_retire);
 	}
 
 	INIT_RADIX_TREE(&ctx->handles_vma, GFP_KERNEL);
@@ -842,6 +854,56 @@ int i915_gem_context_destroy_ioctl(struct drm_device *dev, void *data,
 	return 0;
 }
 
+static int get_sseu(struct i915_gem_context *ctx,
+		    struct drm_i915_gem_context_param *args)
+{
+	struct drm_i915_gem_context_param_sseu user_sseu;
+	struct intel_engine_cs *engine;
+	struct intel_context *ce;
+	int ret;
+
+	if (args->size == 0)
+		goto out;
+	else if (args->size < sizeof(user_sseu))
+		return -EINVAL;
+
+	if (copy_from_user(&user_sseu, u64_to_user_ptr(args->value),
+			   sizeof(user_sseu)))
+		return -EFAULT;
+
+	if (user_sseu.flags || user_sseu.rsvd)
+		return -EINVAL;
+
+	engine = intel_engine_lookup_user(ctx->i915,
+					  user_sseu.class,
+					  user_sseu.instance);
+	if (!engine)
+		return -EINVAL;
+
+	/* Only use for mutex here is to serialize get_param and set_param. */
+	ret = mutex_lock_interruptible(&ctx->i915->drm.struct_mutex);
+	if (ret)
+		return ret;
+
+	ce = to_intel_context(ctx, engine);
+
+	user_sseu.slice_mask = ce->sseu.slice_mask;
+	user_sseu.subslice_mask = ce->sseu.subslice_mask;
+	user_sseu.min_eus_per_subslice = ce->sseu.min_eus_per_subslice;
+	user_sseu.max_eus_per_subslice = ce->sseu.max_eus_per_subslice;
+
+	mutex_unlock(&ctx->i915->drm.struct_mutex);
+
+	if (copy_to_user(u64_to_user_ptr(args->value), &user_sseu,
+			 sizeof(user_sseu)))
+		return -EFAULT;
+
+out:
+	args->size = sizeof(user_sseu);
+
+	return 0;
+}
+
 int i915_gem_context_getparam_ioctl(struct drm_device *dev, void *data,
 				    struct drm_file *file)
 {
@@ -854,15 +916,17 @@ int i915_gem_context_getparam_ioctl(struct drm_device *dev, void *data,
 	if (!ctx)
 		return -ENOENT;
 
-	args->size = 0;
 	switch (args->param) {
 	case I915_CONTEXT_PARAM_BAN_PERIOD:
 		ret = -EINVAL;
 		break;
 	case I915_CONTEXT_PARAM_NO_ZEROMAP:
+		args->size = 0;
 		args->value = test_bit(UCONTEXT_NO_ZEROMAP, &ctx->user_flags);
 		break;
 	case I915_CONTEXT_PARAM_GTT_SIZE:
+		args->size = 0;
+
 		if (ctx->ppgtt)
 			args->value = ctx->ppgtt->vm.total;
 		else if (to_i915(dev)->mm.aliasing_ppgtt)
@@ -871,14 +935,20 @@ int i915_gem_context_getparam_ioctl(struct drm_device *dev, void *data,
 			args->value = to_i915(dev)->ggtt.vm.total;
 		break;
 	case I915_CONTEXT_PARAM_NO_ERROR_CAPTURE:
+		args->size = 0;
 		args->value = i915_gem_context_no_error_capture(ctx);
 		break;
 	case I915_CONTEXT_PARAM_BANNABLE:
+		args->size = 0;
 		args->value = i915_gem_context_is_bannable(ctx);
 		break;
 	case I915_CONTEXT_PARAM_PRIORITY:
+		args->size = 0;
 		args->value = ctx->sched.priority >> I915_USER_PRIORITY_SHIFT;
 		break;
+	case I915_CONTEXT_PARAM_SSEU:
+		ret = get_sseu(ctx, args);
+		break;
 	default:
 		ret = -EINVAL;
 		break;
@@ -888,6 +958,271 @@ int i915_gem_context_getparam_ioctl(struct drm_device *dev, void *data,
 	return ret;
 }
 
+static int gen8_emit_rpcs_config(struct i915_request *rq,
+				 struct intel_context *ce,
+				 struct intel_sseu sseu)
+{
+	u64 offset;
+	u32 *cs;
+
+	cs = intel_ring_begin(rq, 4);
+	if (IS_ERR(cs))
+		return PTR_ERR(cs);
+
+	offset = ce->state->node.start +
+		LRC_STATE_PN * PAGE_SIZE +
+		(CTX_R_PWR_CLK_STATE + 1) * 4;
+
+	*cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
+	*cs++ = lower_32_bits(offset);
+	*cs++ = upper_32_bits(offset);
+	*cs++ = gen8_make_rpcs(rq->i915, &sseu);
+
+	intel_ring_advance(rq, cs);
+
+	return 0;
+}
+
+static int
+gen8_modify_rpcs_gpu(struct intel_context *ce,
+		     struct intel_engine_cs *engine,
+		     struct intel_sseu sseu)
+{
+	struct drm_i915_private *i915 = engine->i915;
+	struct i915_request *rq, *prev;
+	int ret;
+
+	GEM_BUG_ON(!ce->pin_count);
+
+	lockdep_assert_held(&i915->drm.struct_mutex);
+
+	/* Submitting requests etc needs the hw awake. */
+	intel_runtime_pm_get(i915);
+
+	rq = i915_request_alloc(engine, i915->kernel_context);
+	if (IS_ERR(rq)) {
+		ret = PTR_ERR(rq);
+		goto out_put;
+	}
+
+	/* Queue this switch after all other activity by this context. */
+	prev = i915_gem_active_raw(&ce->ring->timeline->last_request,
+				   &i915->drm.struct_mutex);
+	if (prev && !i915_request_completed(prev)) {
+		ret = i915_sw_fence_await_sw_fence_gfp(&rq->submit,
+						       &prev->submit,
+						       I915_FENCE_GFP);
+		if (ret < 0)
+			goto out_add;
+	}
+
+	ret = gen8_emit_rpcs_config(rq, ce, sseu);
+	if (ret)
+		goto out_add;
+
+	/* Order all following requests to be after. */
+	i915_timeline_set_barrier(ce->ring->timeline, rq);
+
+	/*
+	 * Guarantee context image and the timeline remains pinned until the
+	 * modifying request is retired by setting the ce activity tracker.
+	 *
+	 * But we only need to take one pin on the account of it. Or in other
+	 * words transfer the pinned ce object to tracked active request.
+	 */
+	if (!i915_gem_active_isset(&ce->active_tracker))
+		__intel_context_pin(ce);
+	i915_gem_active_set(&ce->active_tracker, rq);
+
+out_add:
+	i915_request_add(rq);
+out_put:
+	intel_runtime_pm_put(i915);
+
+	return ret;
+}
+
+static int
+i915_gem_context_reconfigure_sseu(struct i915_gem_context *ctx,
+				  struct intel_engine_cs *engine,
+				  struct intel_sseu sseu)
+{
+	struct intel_context *ce = to_intel_context(ctx, engine);
+	int ret;
+
+	GEM_BUG_ON(INTEL_GEN(ctx->i915) < 8);
+	GEM_BUG_ON(engine->id != RCS);
+
+	ret = mutex_lock_interruptible(&ctx->i915->drm.struct_mutex);
+	if (ret)
+		return ret;
+
+	/* Nothing to do if unmodified. */
+	if (!memcmp(&ce->sseu, &sseu, sizeof(sseu)))
+		goto out;
+
+	/*
+	 * If context is not idle we have to submit an ordered request to modify
+	 * its context image via the kernel context. Pristine and idle contexts
+	 * will be configured on pinning.
+	 */
+	if (ce->pin_count)
+		ret = gen8_modify_rpcs_gpu(ce, engine, sseu);
+
+	if (!ret)
+		ce->sseu = sseu;
+
+out:
+	mutex_unlock(&ctx->i915->drm.struct_mutex);
+
+	return ret;
+}
+
+static int
+user_to_context_sseu(struct drm_i915_private *i915,
+		     const struct drm_i915_gem_context_param_sseu *user,
+		     struct intel_sseu *context)
+{
+	const struct sseu_dev_info *device = &RUNTIME_INFO(i915)->sseu;
+
+	/* No zeros in any field. */
+	if (!user->slice_mask || !user->subslice_mask ||
+	    !user->min_eus_per_subslice || !user->max_eus_per_subslice)
+		return -EINVAL;
+
+	/* Max > min. */
+	if (user->max_eus_per_subslice < user->min_eus_per_subslice)
+		return -EINVAL;
+
+	/* Check validity against hardware. */
+	if (user->slice_mask & ~device->slice_mask)
+		return -EINVAL;
+
+	if (user->subslice_mask & ~device->subslice_mask[0])
+		return -EINVAL;
+
+	if (user->max_eus_per_subslice > device->max_eus_per_subslice)
+		return -EINVAL;
+
+	/*
+	 * Some future proofing on the types since the uAPI is wider than the
+	 * current internal implementation.
+	 */
+	if (WARN_ON((fls(user->slice_mask) >
+		     sizeof(context->slice_mask) * BITS_PER_BYTE) ||
+		    (fls(user->subslice_mask) >
+		     sizeof(context->subslice_mask) * BITS_PER_BYTE) ||
+		    overflows_type(user->min_eus_per_subslice,
+				   context->min_eus_per_subslice) ||
+		    overflows_type(user->max_eus_per_subslice,
+				   context->max_eus_per_subslice)))
+		return -EINVAL;
+
+	context->slice_mask = user->slice_mask;
+	context->subslice_mask = user->subslice_mask;
+	context->min_eus_per_subslice = user->min_eus_per_subslice;
+	context->max_eus_per_subslice = user->max_eus_per_subslice;
+
+	/* Part specific restrictions. */
+	if (IS_GEN(i915, 11)) {
+		unsigned int hw_s = hweight8(device->slice_mask);
+		unsigned int hw_ss_per_s = hweight8(device->subslice_mask[0]);
+		unsigned int req_s = hweight8(context->slice_mask);
+		unsigned int req_ss = hweight8(context->subslice_mask);
+
+		/*
+		 * Only full subslice enablement is possible if more than one
+		 * slice is turned on.
+		 */
+		if (req_s > 1 && req_ss != hw_ss_per_s)
+			return -EINVAL;
+
+		/*
+		 * If more than four (SScount bitfield limit) subslices are
+		 * requested then the number has to be even.
+		 */
+		if (req_ss > 4 && (req_ss & 1))
+			return -EINVAL;
+
+		/*
+		 * If only one slice is enabled and subslice count is below the
+		 * device full enablement, it must be at most half of the all
+		 * available subslices.
+		 */
+		if (req_s == 1 && req_ss < hw_ss_per_s &&
+		    req_ss > (hw_ss_per_s / 2))
+			return -EINVAL;
+
+		/* ABI restriction - VME use case only. */
+
+		/* All slices or one slice only. */
+		if (req_s != 1 && req_s != hw_s)
+			return -EINVAL;
+
+		/*
+		 * Half subslices or full enablement only when one slice is
+		 * enabled.
+		 */
+		if (req_s == 1 &&
+		    (req_ss != hw_ss_per_s && req_ss != (hw_ss_per_s / 2)))
+			return -EINVAL;
+
+		/* No EU configuration changes. */
+		if ((user->min_eus_per_subslice !=
+		     device->max_eus_per_subslice) ||
+		    (user->max_eus_per_subslice !=
+		     device->max_eus_per_subslice))
+			return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int set_sseu(struct i915_gem_context *ctx,
+		    struct drm_i915_gem_context_param *args)
+{
+	struct drm_i915_private *i915 = ctx->i915;
+	struct drm_i915_gem_context_param_sseu user_sseu;
+	struct intel_engine_cs *engine;
+	struct intel_sseu sseu;
+	int ret;
+
+	if (args->size < sizeof(user_sseu))
+		return -EINVAL;
+
+	if (!IS_GEN(i915, 11))
+		return -ENODEV;
+
+	if (copy_from_user(&user_sseu, u64_to_user_ptr(args->value),
+			   sizeof(user_sseu)))
+		return -EFAULT;
+
+	if (user_sseu.flags || user_sseu.rsvd)
+		return -EINVAL;
+
+	engine = intel_engine_lookup_user(i915,
+					  user_sseu.class,
+					  user_sseu.instance);
+	if (!engine)
+		return -EINVAL;
+
+	/* Only render engine supports RPCS configuration. */
+	if (engine->class != RENDER_CLASS)
+		return -ENODEV;
+
+	ret = user_to_context_sseu(i915, &user_sseu, &sseu);
+	if (ret)
+		return ret;
+
+	ret = i915_gem_context_reconfigure_sseu(ctx, engine, sseu);
+	if (ret)
+		return ret;
+
+	args->size = sizeof(user_sseu);
+
+	return 0;
+}
+
 int i915_gem_context_setparam_ioctl(struct drm_device *dev, void *data,
 				    struct drm_file *file)
 {
@@ -950,7 +1285,9 @@ int i915_gem_context_setparam_ioctl(struct drm_device *dev, void *data,
 					I915_USER_PRIORITY(priority);
 		}
 		break;
-
+	case I915_CONTEXT_PARAM_SSEU:
+		ret = set_sseu(ctx, args);
+		break;
 	default:
 		ret = -EINVAL;
 		break;
diff --git a/drivers/gpu/drm/i915/i915_gem_context.h b/drivers/gpu/drm/i915/i915_gem_context.h
index ef04e422cf9a..2ded4b8c9b9f 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.h
+++ b/drivers/gpu/drm/i915/i915_gem_context.h
@@ -171,6 +171,12 @@ struct i915_gem_context {
 		u64 lrc_desc;
 		int pin_count;
 
+		/**
+		 * active_tracker: Active tracker for the external rq activity
+		 * on this intel_context object.
+		 */
+		struct i915_gem_active active_tracker;
+
 		const struct intel_context_ops *ops;
 
 		/** sseu: Control eu/slice partitioning */
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index bca09a497f27..e9c6876a5897 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -2392,7 +2392,9 @@ u32 gen8_make_rpcs(struct drm_i915_private *i915, struct intel_sseu *req_sseu)
 	 * subslices are enabled, or a count between one and four on the first
 	 * slice.
 	 */
-	if (IS_GEN(i915, 11) && slices == 1 && subslices >= 4) {
+	if (IS_GEN(i915, 11) &&
+	    slices == 1 &&
+	    subslices > min_t(u8, 4, hweight8(sseu->subslice_mask[0]) / 2)) {
 		GEM_BUG_ON(subslices & 1);
 
 		subslice_pg = false;
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index 298b2e197744..f90f23ab4ca3 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -1486,9 +1486,52 @@ struct drm_i915_gem_context_param {
 #define   I915_CONTEXT_MAX_USER_PRIORITY	1023 /* inclusive */
 #define   I915_CONTEXT_DEFAULT_PRIORITY		0
 #define   I915_CONTEXT_MIN_USER_PRIORITY	-1023 /* inclusive */
+	/*
+	 * When using the following param, value should be a pointer to
+	 * drm_i915_gem_context_param_sseu.
+	 */
+#define I915_CONTEXT_PARAM_SSEU		0x7
 	__u64 value;
 };
 
+struct drm_i915_gem_context_param_sseu {
+	/*
+	 * Engine class & instance to be configured or queried.
+	 */
+	__u16 class;
+	__u16 instance;
+
+	/*
+	 * Unused for now. Must be cleared to zero.
+	 */
+	__u32 flags;
+
+	/*
+	 * Mask of slices to enable for the context. Valid values are a subset
+	 * of the bitmask value returned for I915_PARAM_SLICE_MASK.
+	 */
+	__u64 slice_mask;
+
+	/*
+	 * Mask of subslices to enable for the context. Valid values are a
+	 * subset of the bitmask value return by I915_PARAM_SUBSLICE_MASK.
+	 */
+	__u64 subslice_mask;
+
+	/*
+	 * Minimum/Maximum number of EUs to enable per subslice for the
+	 * context. min_eus_per_subslice must be inferior or equal to
+	 * max_eus_per_subslice.
+	 */
+	__u16 min_eus_per_subslice;
+	__u16 max_eus_per_subslice;
+
+	/*
+	 * Unused for now. Must be cleared to zero.
+	 */
+	__u32 rsvd;
+};
+
 enum drm_i915_oa_format {
 	I915_OA_FORMAT_A13 = 1,	    /* HSW only */
 	I915_OA_FORMAT_A29,	    /* HSW only */
-- 
2.19.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 25+ messages in thread

* [PATCH 6/6] drm/i915/selftests: Context SSEU reconfiguration tests
  2019-01-08 15:12 [PATCH 0/6] Per context dynamic (sub)slice power-gating Tvrtko Ursulin
                   ` (4 preceding siblings ...)
  2019-01-08 15:12 ` [PATCH 5/6] drm/i915: Expose RPCS (SSEU) configuration to userspace (Gen11 only) Tvrtko Ursulin
@ 2019-01-08 15:12 ` Tvrtko Ursulin
  2019-01-08 15:37 ` ✗ Fi.CI.CHECKPATCH: warning for Per context dynamic (sub)slice power-gating (rev11) Patchwork
                   ` (7 subsequent siblings)
  13 siblings, 0 replies; 25+ messages in thread
From: Tvrtko Ursulin @ 2019-01-08 15:12 UTC (permalink / raw)
  To: Intel-gfx

From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Exercise the context image reconfiguration logic for idle and busy
contexts, with the resets thrown into the mix as well.

Free from the uAPI restrictions this test runs on all Gen9+ platforms
with slice power gating.

v2:
 * Rename some helpers for clarity.
 * Include subtest names in error logs.
 * Remove unnecessary function export.

v3:
 * Rebase for RUNTIME_INFO.

v4:
 * Fix incomplete unexport from v2. (Chris Wilson)

Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
---
 drivers/gpu/drm/i915/i915_gem_context.c       |  31 +-
 .../gpu/drm/i915/selftests/i915_gem_context.c | 481 ++++++++++++++++++
 2 files changed, 502 insertions(+), 10 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
index 1ab7d6980c36..c395141e26b5 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -1043,23 +1043,19 @@ gen8_modify_rpcs_gpu(struct intel_context *ce,
 }
 
 static int
-i915_gem_context_reconfigure_sseu(struct i915_gem_context *ctx,
-				  struct intel_engine_cs *engine,
-				  struct intel_sseu sseu)
+__i915_gem_context_reconfigure_sseu(struct i915_gem_context *ctx,
+				    struct intel_engine_cs *engine,
+				    struct intel_sseu sseu)
 {
 	struct intel_context *ce = to_intel_context(ctx, engine);
-	int ret;
+	int ret = 0;
 
 	GEM_BUG_ON(INTEL_GEN(ctx->i915) < 8);
 	GEM_BUG_ON(engine->id != RCS);
 
-	ret = mutex_lock_interruptible(&ctx->i915->drm.struct_mutex);
-	if (ret)
-		return ret;
-
 	/* Nothing to do if unmodified. */
 	if (!memcmp(&ce->sseu, &sseu, sizeof(sseu)))
-		goto out;
+		return 0;
 
 	/*
 	 * If context is not idle we have to submit an ordered request to modify
@@ -1072,7 +1068,22 @@ i915_gem_context_reconfigure_sseu(struct i915_gem_context *ctx,
 	if (!ret)
 		ce->sseu = sseu;
 
-out:
+	return ret;
+}
+
+static int
+i915_gem_context_reconfigure_sseu(struct i915_gem_context *ctx,
+				  struct intel_engine_cs *engine,
+				  struct intel_sseu sseu)
+{
+	int ret;
+
+	ret = mutex_lock_interruptible(&ctx->i915->drm.struct_mutex);
+	if (ret)
+		return ret;
+
+	ret = __i915_gem_context_reconfigure_sseu(ctx, engine, sseu);
+
 	mutex_unlock(&ctx->i915->drm.struct_mutex);
 
 	return ret;
diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/selftests/i915_gem_context.c
index d00cdf3c2939..c940381e2c66 100644
--- a/drivers/gpu/drm/i915/selftests/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/selftests/i915_gem_context.c
@@ -27,6 +27,8 @@
 #include "../i915_selftest.h"
 #include "i915_random.h"
 #include "igt_flush_test.h"
+#include "igt_reset.h"
+#include "igt_spinner.h"
 
 #include "mock_drm.h"
 #include "mock_gem_device.h"
@@ -650,6 +652,484 @@ static int igt_ctx_exec(void *arg)
 	return err;
 }
 
+static struct i915_vma *rpcs_query_batch(struct i915_vma *vma)
+{
+	struct drm_i915_gem_object *obj;
+	u32 *cmd;
+	int err;
+
+	if (INTEL_GEN(vma->vm->i915) < 8)
+		return ERR_PTR(-EINVAL);
+
+	obj = i915_gem_object_create_internal(vma->vm->i915, PAGE_SIZE);
+	if (IS_ERR(obj))
+		return ERR_CAST(obj);
+
+	cmd = i915_gem_object_pin_map(obj, I915_MAP_WB);
+	if (IS_ERR(cmd)) {
+		err = PTR_ERR(cmd);
+		goto err;
+	}
+
+	*cmd++ = MI_STORE_REGISTER_MEM_GEN8;
+	*cmd++ = i915_mmio_reg_offset(GEN8_R_PWR_CLK_STATE);
+	*cmd++ = lower_32_bits(vma->node.start);
+	*cmd++ = upper_32_bits(vma->node.start);
+	*cmd = MI_BATCH_BUFFER_END;
+
+	i915_gem_object_unpin_map(obj);
+
+	err = i915_gem_object_set_to_gtt_domain(obj, false);
+	if (err)
+		goto err;
+
+	vma = i915_vma_instance(obj, vma->vm, NULL);
+	if (IS_ERR(vma)) {
+		err = PTR_ERR(vma);
+		goto err;
+	}
+
+	err = i915_vma_pin(vma, 0, 0, PIN_USER);
+	if (err)
+		goto err;
+
+	return vma;
+
+err:
+	i915_gem_object_put(obj);
+	return ERR_PTR(err);
+}
+
+static int
+emit_rpcs_query(struct drm_i915_gem_object *obj,
+		struct i915_gem_context *ctx,
+		struct intel_engine_cs *engine,
+		struct i915_request **rq_out)
+{
+	struct i915_address_space *vm;
+	struct i915_request *rq;
+	struct i915_vma *batch;
+	struct i915_vma *vma;
+	int err;
+
+	GEM_BUG_ON(!ctx->ppgtt);
+	GEM_BUG_ON(!intel_engine_can_store_dword(engine));
+
+	vm = &ctx->ppgtt->vm;
+
+	vma = i915_vma_instance(obj, vm, NULL);
+	if (IS_ERR(vma))
+		return PTR_ERR(vma);
+
+	err = i915_gem_object_set_to_gtt_domain(obj, false);
+	if (err)
+		return err;
+
+	err = i915_vma_pin(vma, 0, 0, PIN_HIGH | PIN_USER);
+	if (err)
+		return err;
+
+	batch = rpcs_query_batch(vma);
+	if (IS_ERR(batch)) {
+		err = PTR_ERR(batch);
+		goto err_vma;
+	}
+
+	rq = i915_request_alloc(engine, ctx);
+	if (IS_ERR(rq)) {
+		err = PTR_ERR(rq);
+		goto err_batch;
+	}
+
+	err = engine->emit_bb_start(rq, batch->node.start, batch->node.size, 0);
+	if (err)
+		goto err_request;
+
+	err = i915_vma_move_to_active(batch, rq, 0);
+	if (err)
+		goto skip_request;
+
+	err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE);
+	if (err)
+		goto skip_request;
+
+	i915_gem_object_set_active_reference(batch->obj);
+	i915_vma_unpin(batch);
+	i915_vma_close(batch);
+
+	i915_vma_unpin(vma);
+
+	*rq_out = i915_request_get(rq);
+
+	i915_request_add(rq);
+
+	return 0;
+
+skip_request:
+	i915_request_skip(rq, err);
+err_request:
+	i915_request_add(rq);
+err_batch:
+	i915_vma_unpin(batch);
+err_vma:
+	i915_vma_unpin(vma);
+
+	return err;
+}
+
+#define TEST_IDLE	(1 << 0)
+#define TEST_BUSY	(1 << 1)
+#define TEST_RESET	(1 << 2)
+
+static int
+__sseu_prepare(struct drm_i915_private *i915,
+	       const char *name,
+	       unsigned int flags,
+	       struct i915_gem_context *ctx,
+	       struct intel_engine_cs *engine,
+	       struct igt_spinner **spin_out)
+{
+	int ret = 0;
+
+	if (flags & (TEST_BUSY | TEST_RESET)) {
+		struct igt_spinner *spin;
+		struct i915_request *rq;
+
+		spin = kzalloc(sizeof(*spin), GFP_KERNEL);
+		if (!spin) {
+			ret = -ENOMEM;
+			goto out;
+		}
+
+		ret = igt_spinner_init(spin, i915);
+		if (ret)
+			return ret;
+
+		rq = igt_spinner_create_request(spin, ctx, engine, MI_NOOP);
+		if (IS_ERR(rq)) {
+			ret = PTR_ERR(rq);
+			igt_spinner_fini(spin);
+			kfree(spin);
+			goto out;
+		}
+
+		i915_request_add(rq);
+
+		if (!igt_wait_for_spinner(spin, rq)) {
+			pr_err("%s: Spinner failed to start!\n", name);
+			igt_spinner_end(spin);
+			igt_spinner_fini(spin);
+			kfree(spin);
+			ret = -ETIMEDOUT;
+			goto out;
+		}
+
+		*spin_out = spin;
+	}
+
+out:
+	return ret;
+}
+
+static int
+__read_slice_count(struct drm_i915_private *i915,
+		   struct i915_gem_context *ctx,
+		   struct intel_engine_cs *engine,
+		   struct drm_i915_gem_object *obj,
+		   struct igt_spinner *spin,
+		   u32 *rpcs)
+{
+	struct i915_request *rq = NULL;
+	u32 s_mask, s_shift;
+	unsigned int cnt;
+	u32 *buf, val;
+	long ret;
+
+	ret = emit_rpcs_query(obj, ctx, engine, &rq);
+	if (ret)
+		return ret;
+
+	if (spin)
+		igt_spinner_end(spin);
+
+	ret = i915_request_wait(rq, I915_WAIT_LOCKED, MAX_SCHEDULE_TIMEOUT);
+	if (ret <= 0)
+		return ret < 0 ? ret : -ETIME;
+
+	i915_request_put(rq);
+
+	buf = i915_gem_object_pin_map(obj, I915_MAP_WB);
+	if (IS_ERR(buf)) {
+		ret = PTR_ERR(buf);
+		return ret;
+	}
+
+	if (INTEL_GEN(i915) >= 11) {
+		s_mask = GEN11_RPCS_S_CNT_MASK;
+		s_shift = GEN11_RPCS_S_CNT_SHIFT;
+	} else {
+		s_mask = GEN8_RPCS_S_CNT_MASK;
+		s_shift = GEN8_RPCS_S_CNT_SHIFT;
+	}
+
+	val = *buf;
+	cnt = (val & s_mask) >> s_shift;
+	*rpcs = val;
+
+	i915_gem_object_unpin_map(obj);
+
+	return cnt;
+}
+
+static void __err_rpcs(u32 rpcs, unsigned int slices)
+{
+	pr_info("RPCS=0x%x; %u%sx%u%s\n",
+		rpcs, slices,
+		(rpcs & GEN8_RPCS_S_CNT_ENABLE) ? "*" : "",
+		(rpcs & GEN8_RPCS_SS_CNT_MASK) >> GEN8_RPCS_SS_CNT_SHIFT,
+		(rpcs & GEN8_RPCS_SS_CNT_ENABLE) ? "*" : "");
+}
+
+static int
+__sseu_finish(struct drm_i915_private *i915,
+	      const char *name,
+	      unsigned int flags,
+	      struct i915_gem_context *ctx,
+	      struct i915_gem_context *kctx,
+	      struct intel_engine_cs *engine,
+	      struct drm_i915_gem_object *obj,
+	      unsigned int expected,
+	      struct igt_spinner *spin)
+{
+	unsigned int slices =
+		hweight32(intel_device_default_sseu(i915).slice_mask);
+	u32 rpcs = 0;
+	int ret = 0;
+
+	if (flags & TEST_RESET) {
+		ret = i915_reset_engine(engine, "sseu");
+		if (ret)
+			goto out;
+	}
+
+	ret = __read_slice_count(i915, ctx, engine, obj,
+				 flags & TEST_RESET ? NULL : spin, &rpcs);
+	if (ret < 0) {
+		goto out;
+	} else if (ret != expected) {
+		pr_err("%s: Context slice count %d is not %u!\n",
+		       name, ret, expected);
+		__err_rpcs(rpcs, ret);
+		ret = -EINVAL;
+		goto out;
+	} else {
+		ret = 0;
+	}
+
+	ret = __read_slice_count(i915, kctx, engine, obj, NULL, &rpcs);
+	if (ret < 0) {
+		goto out;
+	} else if (ret != slices) {
+		pr_err("%s: Kernel context slice count %d is not %u!\n",
+		       name, ret, slices);
+		__err_rpcs(rpcs, ret);
+		ret = -EINVAL;
+		goto out;
+	} else {
+		ret = 0;
+	}
+
+out:
+	if (spin)
+		igt_spinner_end(spin);
+
+	if (flags & TEST_IDLE) {
+		ret = i915_gem_wait_for_idle(i915,
+					     I915_WAIT_LOCKED,
+					     MAX_SCHEDULE_TIMEOUT);
+		if (ret)
+			return ret;
+
+		ret = __read_slice_count(i915, ctx, engine, obj, NULL, &rpcs);
+		if (ret < 0) {
+			return ret;
+		} else if (ret != expected) {
+			pr_err("%s: Context slice count %d is not %u after idle!\n",
+			       name, ret, expected);
+			__err_rpcs(rpcs, ret);
+			return -EINVAL;
+		} else {
+			ret = 0;
+		}
+	}
+
+	return ret;
+
+}
+
+static int
+__sseu_test(struct drm_i915_private *i915,
+	    const char *name,
+	    unsigned int flags,
+	    struct i915_gem_context *ctx,
+	    struct intel_engine_cs *engine,
+	    struct drm_i915_gem_object *obj,
+	    struct intel_sseu sseu)
+{
+	struct igt_spinner *spin = NULL;
+	struct i915_gem_context *kctx;
+	int ret;
+
+	kctx = kernel_context(i915);
+	if (IS_ERR(kctx))
+		return PTR_ERR(kctx);
+
+	ret = __sseu_prepare(i915, name, flags, ctx, engine, &spin);
+	if (ret)
+		goto out;
+
+	ret = __i915_gem_context_reconfigure_sseu(ctx, engine, sseu);
+	if (ret)
+		goto out;
+
+	ret = __sseu_finish(i915, name, flags, ctx, kctx, engine, obj,
+			    hweight32(sseu.slice_mask), spin);
+
+out:
+	if (spin) {
+		igt_spinner_end(spin);
+		igt_spinner_fini(spin);
+		kfree(spin);
+	}
+
+	kernel_context_close(kctx);
+
+	return ret;
+}
+
+static int
+__igt_ctx_sseu(struct drm_i915_private *i915,
+	       const char *name,
+	       unsigned int flags)
+{
+	struct intel_sseu default_sseu = intel_device_default_sseu(i915);
+	struct intel_engine_cs *engine = i915->engine[RCS];
+	struct drm_i915_gem_object *obj;
+	struct i915_gem_context *ctx;
+	struct intel_sseu pg_sseu;
+	struct drm_file *file;
+	int ret;
+
+	if (INTEL_GEN(i915) < 9)
+		return 0;
+
+	if (!RUNTIME_INFO(i915)->sseu.has_slice_pg)
+		return 0;
+
+	if (hweight32(default_sseu.slice_mask) < 2)
+		return 0;
+
+	/*
+	 * Gen11 VME friendly power-gated configuration with half enabled
+	 * sub-slices.
+	 */
+	pg_sseu = default_sseu;
+	pg_sseu.slice_mask = 1;
+	pg_sseu.subslice_mask =
+		~(~0 << (hweight32(default_sseu.subslice_mask) / 2));
+
+	pr_info("SSEU subtest '%s', flags=%x, def_slices=%u, pg_slices=%u\n",
+		name, flags, hweight32(default_sseu.slice_mask),
+		hweight32(pg_sseu.slice_mask));
+
+	file = mock_file(i915);
+	if (IS_ERR(file))
+		return PTR_ERR(file);
+
+	if (flags & TEST_RESET)
+		igt_global_reset_lock(i915);
+
+	mutex_lock(&i915->drm.struct_mutex);
+
+	ctx = i915_gem_create_context(i915, file->driver_priv);
+	if (IS_ERR(ctx)) {
+		ret = PTR_ERR(ctx);
+		goto out_unlock;
+	}
+
+	obj = i915_gem_object_create_internal(i915, PAGE_SIZE);
+	if (IS_ERR(obj)) {
+		ret = PTR_ERR(obj);
+		goto out_unlock;
+	}
+
+	intel_runtime_pm_get(i915);
+
+	/* First set the default mask. */
+	ret = __sseu_test(i915, name, flags, ctx, engine, obj, default_sseu);
+	if (ret)
+		goto out_fail;
+
+	/* Then set a power-gated configuration. */
+	ret = __sseu_test(i915, name, flags, ctx, engine, obj, pg_sseu);
+	if (ret)
+		goto out_fail;
+
+	/* Back to defaults. */
+	ret = __sseu_test(i915, name, flags, ctx, engine, obj, default_sseu);
+	if (ret)
+		goto out_fail;
+
+	/* One last power-gated configuration for the road. */
+	ret = __sseu_test(i915, name, flags, ctx, engine, obj, pg_sseu);
+	if (ret)
+		goto out_fail;
+
+out_fail:
+	if (igt_flush_test(i915, I915_WAIT_LOCKED))
+		ret = -EIO;
+
+	i915_gem_object_put(obj);
+
+	intel_runtime_pm_put(i915);
+
+out_unlock:
+	mutex_unlock(&i915->drm.struct_mutex);
+
+	if (flags & TEST_RESET)
+		igt_global_reset_unlock(i915);
+
+	mock_file_free(i915, file);
+
+	if (ret)
+		pr_err("%s: Failed with %d!\n", name, ret);
+
+	return ret;
+}
+
+static int igt_ctx_sseu(void *arg)
+{
+	struct {
+		const char *name;
+		unsigned int flags;
+	} *phase, phases[] = {
+		{ .name = "basic", .flags = 0 },
+		{ .name = "idle", .flags = TEST_IDLE },
+		{ .name = "busy", .flags = TEST_BUSY },
+		{ .name = "busy-reset", .flags = TEST_BUSY | TEST_RESET },
+		{ .name = "busy-idle", .flags = TEST_BUSY | TEST_IDLE },
+		{ .name = "reset-idle", .flags = TEST_RESET | TEST_IDLE },
+	};
+	unsigned int i;
+	int ret = 0;
+
+	for (i = 0, phase = phases; ret == 0 && i < ARRAY_SIZE(phases);
+	     i++, phase++)
+		ret = __igt_ctx_sseu(arg, phase->name, phase->flags);
+
+	return ret;
+}
+
 static int igt_ctx_readonly(void *arg)
 {
 	struct drm_i915_private *i915 = arg;
@@ -1232,6 +1712,7 @@ int i915_gem_context_live_selftests(struct drm_i915_private *dev_priv)
 		SUBTEST(live_nop_switch),
 		SUBTEST(igt_ctx_exec),
 		SUBTEST(igt_ctx_readonly),
+		SUBTEST(igt_ctx_sseu),
 		SUBTEST(igt_vm_isolation),
 	};
 
-- 
2.19.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 25+ messages in thread

* ✗ Fi.CI.CHECKPATCH: warning for Per context dynamic (sub)slice power-gating (rev11)
  2019-01-08 15:12 [PATCH 0/6] Per context dynamic (sub)slice power-gating Tvrtko Ursulin
                   ` (5 preceding siblings ...)
  2019-01-08 15:12 ` [PATCH 6/6] drm/i915/selftests: Context SSEU reconfiguration tests Tvrtko Ursulin
@ 2019-01-08 15:37 ` Patchwork
  2019-01-08 15:40 ` ✗ Fi.CI.SPARSE: " Patchwork
                   ` (6 subsequent siblings)
  13 siblings, 0 replies; 25+ messages in thread
From: Patchwork @ 2019-01-08 15:37 UTC (permalink / raw)
  To: Tvrtko Ursulin; +Cc: intel-gfx

== Series Details ==

Series: Per context dynamic (sub)slice power-gating (rev11)
URL   : https://patchwork.freedesktop.org/series/48194/
State : warning

== Summary ==

$ dim checkpatch origin/drm-tip
3350b5e6ed18 drm/i915/execlists: Move RPCS setup to context pin
961bc57575da drm/i915: Record the sseu configuration per-context & engine
37c8e6fcbba7 drm/i915/perf: lock powergating configuration to default when active
-:72: WARNING:BAD_SIGN_OFF: 'Co-developed-by:' is the preferred signature form
#72: 
Co-Developed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

total: 0 errors, 1 warnings, 0 checks, 126 lines checked
f2b12bcf0ca7 drm/i915: Add timeline barrier support
d9a1551457de drm/i915: Expose RPCS (SSEU) configuration to userspace (Gen11 only)
-:47: WARNING:COMMIT_LOG_LONG_LINE: Possible unwrapped commit description (prefer a maximum 75 chars per line)
#47: 
v2: Fix offset of CTX_R_PWR_CLK_STATE in intel_lr_context_set_sseu() (Lionel)

-:506: CHECK:UNNECESSARY_PARENTHESES: Unnecessary parentheses around 'user->min_eus_per_subslice !=
 		     device->max_eus_per_subslice'
#506: FILE: drivers/gpu/drm/i915/i915_gem_context.c:1171:
+		if ((user->min_eus_per_subslice !=
+		     device->max_eus_per_subslice) ||
+		    (user->max_eus_per_subslice !=
+		     device->max_eus_per_subslice))

-:506: CHECK:UNNECESSARY_PARENTHESES: Unnecessary parentheses around 'user->max_eus_per_subslice !=
 		     device->max_eus_per_subslice'
#506: FILE: drivers/gpu/drm/i915/i915_gem_context.c:1171:
+		if ((user->min_eus_per_subslice !=
+		     device->max_eus_per_subslice) ||
+		    (user->max_eus_per_subslice !=
+		     device->max_eus_per_subslice))

total: 0 errors, 1 warnings, 2 checks, 479 lines checked
93a3f1fd397c drm/i915/selftests: Context SSEU reconfiguration tests
-:407: WARNING:UNNECESSARY_ELSE: else is not generally useful after a break or return
#407: FILE: drivers/gpu/drm/i915/selftests/i915_gem_context.c:961:
+			return -EINVAL;
+		} else {

-:414: CHECK:BRACES: Blank lines aren't necessary before a close brace '}'
#414: FILE: drivers/gpu/drm/i915/selftests/i915_gem_context.c:968:
+
+}

total: 0 errors, 1 warnings, 1 checks, 550 lines checked

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 25+ messages in thread

* ✗ Fi.CI.SPARSE: warning for Per context dynamic (sub)slice power-gating (rev11)
  2019-01-08 15:12 [PATCH 0/6] Per context dynamic (sub)slice power-gating Tvrtko Ursulin
                   ` (6 preceding siblings ...)
  2019-01-08 15:37 ` ✗ Fi.CI.CHECKPATCH: warning for Per context dynamic (sub)slice power-gating (rev11) Patchwork
@ 2019-01-08 15:40 ` Patchwork
  2019-01-08 16:13 ` ✓ Fi.CI.BAT: success " Patchwork
                   ` (5 subsequent siblings)
  13 siblings, 0 replies; 25+ messages in thread
From: Patchwork @ 2019-01-08 15:40 UTC (permalink / raw)
  To: Tvrtko Ursulin; +Cc: intel-gfx

== Series Details ==

Series: Per context dynamic (sub)slice power-gating (rev11)
URL   : https://patchwork.freedesktop.org/series/48194/
State : warning

== Summary ==

$ dim sparse origin/drm-tip
Sparse version: v0.5.2
Commit: drm/i915/execlists: Move RPCS setup to context pin
Okay!

Commit: drm/i915: Record the sseu configuration per-context & engine
-drivers/gpu/drm/i915/selftests/../i915_drv.h:3546:16: warning: expression using sizeof(void)
+drivers/gpu/drm/i915/selftests/../i915_drv.h:3560:16: warning: expression using sizeof(void)

Commit: drm/i915/perf: lock powergating configuration to default when active
Okay!

Commit: drm/i915: Add timeline barrier support
Okay!

Commit: drm/i915: Expose RPCS (SSEU) configuration to userspace (Gen11 only)
+drivers/gpu/drm/i915/intel_lrc.c:2397:25: warning: expression using sizeof(void)

Commit: drm/i915/selftests: Context SSEU reconfiguration tests
+drivers/gpu/drm/i915/selftests/i915_gem_context.c:1220:25: warning: expression using sizeof(void)
+drivers/gpu/drm/i915/selftests/i915_gem_context.c:1220:25: warning: expression using sizeof(void)
-drivers/gpu/drm/i915/selftests/i915_gem_context.c:1220:25: warning: expression using sizeof(void)
-drivers/gpu/drm/i915/selftests/i915_gem_context.c:1220:25: warning: expression using sizeof(void)

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 25+ messages in thread

* ✓ Fi.CI.BAT: success for Per context dynamic (sub)slice power-gating (rev11)
  2019-01-08 15:12 [PATCH 0/6] Per context dynamic (sub)slice power-gating Tvrtko Ursulin
                   ` (7 preceding siblings ...)
  2019-01-08 15:40 ` ✗ Fi.CI.SPARSE: " Patchwork
@ 2019-01-08 16:13 ` Patchwork
  2019-01-08 18:38 ` ✗ Fi.CI.CHECKPATCH: warning for Per context dynamic (sub)slice power-gating (rev12) Patchwork
                   ` (4 subsequent siblings)
  13 siblings, 0 replies; 25+ messages in thread
From: Patchwork @ 2019-01-08 16:13 UTC (permalink / raw)
  To: Tvrtko Ursulin; +Cc: intel-gfx

== Series Details ==

Series: Per context dynamic (sub)slice power-gating (rev11)
URL   : https://patchwork.freedesktop.org/series/48194/
State : success

== Summary ==

CI Bug Log - changes from CI_DRM_5378 -> Patchwork_11254
====================================================

Summary
-------

  **SUCCESS**

  No regressions found.

  External URL: https://patchwork.freedesktop.org/api/1.0/series/48194/revisions/11/

Known issues
------------

  Here are the changes found in Patchwork_11254 that come from known issues:

### IGT changes ###

#### Issues hit ####

  * igt@amdgpu/amd_basic@cs-compute:
    - fi-kbl-8809g:       NOTRUN -> FAIL [fdo#108094]

  * igt@amdgpu/amd_prime@amd-to-i915:
    - fi-kbl-8809g:       NOTRUN -> FAIL [fdo#107341]

  * igt@i915_module_load@reload:
    - fi-kbl-7567u:       PASS -> DMESG-WARN [fdo#103558] / [fdo#105602]

  * igt@i915_module_load@reload-with-fault-injection:
    - fi-kbl-7567u:       PASS -> DMESG-WARN [fdo#105602]

  * igt@i915_selftest@live_hangcheck:
    - fi-bwr-2160:        PASS -> DMESG-FAIL [fdo#108735]

  * igt@prime_vgem@basic-fence-flip:
    - fi-gdg-551:         PASS -> FAIL [fdo#103182]

  
#### Possible fixes ####

  * igt@amdgpu/amd_basic@userptr:
    - fi-kbl-8809g:       DMESG-WARN [fdo#108965] -> PASS

  * igt@gem_exec_suspend@basic-s4-devices:
    - fi-bsw-kefka:       DMESG-WARN -> PASS
    - fi-blb-e6850:       INCOMPLETE [fdo#107718] -> PASS

  * igt@i915_selftest@live_hangcheck:
    - fi-apl-guc:         DMESG-FAIL [fdo#109228] -> PASS

  * igt@kms_frontbuffer_tracking@basic:
    - fi-byt-clapper:     FAIL [fdo#103167] -> PASS

  * igt@kms_pipe_crc_basic@hang-read-crc-pipe-a:
    - fi-byt-clapper:     FAIL [fdo#103191] / [fdo#107362] -> PASS

  * igt@kms_psr@sprite_plane_onoff:
    - fi-skl-6700hq:      FAIL [fdo#107383] -> PASS +3

  
#### Warnings ####

  * igt@kms_pipe_crc_basic@suspend-read-crc-pipe-b:
    - fi-byt-clapper:     INCOMPLETE [fdo#102657] -> FAIL [fdo#103191] / [fdo#107362]

  
  {name}: This element is suppressed. This means it is ignored when computing
          the status of the difference (SUCCESS, WARNING, or FAILURE).

  [fdo#102657]: https://bugs.freedesktop.org/show_bug.cgi?id=102657
  [fdo#103167]: https://bugs.freedesktop.org/show_bug.cgi?id=103167
  [fdo#103182]: https://bugs.freedesktop.org/show_bug.cgi?id=103182
  [fdo#103191]: https://bugs.freedesktop.org/show_bug.cgi?id=103191
  [fdo#103558]: https://bugs.freedesktop.org/show_bug.cgi?id=103558
  [fdo#105602]: https://bugs.freedesktop.org/show_bug.cgi?id=105602
  [fdo#107341]: https://bugs.freedesktop.org/show_bug.cgi?id=107341
  [fdo#107362]: https://bugs.freedesktop.org/show_bug.cgi?id=107362
  [fdo#107383]: https://bugs.freedesktop.org/show_bug.cgi?id=107383
  [fdo#107718]: https://bugs.freedesktop.org/show_bug.cgi?id=107718
  [fdo#108094]: https://bugs.freedesktop.org/show_bug.cgi?id=108094
  [fdo#108735]: https://bugs.freedesktop.org/show_bug.cgi?id=108735
  [fdo#108915]: https://bugs.freedesktop.org/show_bug.cgi?id=108915
  [fdo#108965]: https://bugs.freedesktop.org/show_bug.cgi?id=108965
  [fdo#109228]: https://bugs.freedesktop.org/show_bug.cgi?id=109228
  [fdo#109241]: https://bugs.freedesktop.org/show_bug.cgi?id=109241


Participating hosts (48 -> 45)
------------------------------

  Additional (1): fi-icl-y 
  Missing    (4): fi-ilk-m540 fi-byt-squawks fi-bsw-cyan fi-glk-j4005 


Build changes
-------------

    * Linux: CI_DRM_5378 -> Patchwork_11254

  CI_DRM_5378: 96b07848e43c024bd6a5a44970371c4866140a1c @ git://anongit.freedesktop.org/gfx-ci/linux
  IGT_4756: 75081c6bfb9998bd7cbf35a7ac0578c683fe55a8 @ git://anongit.freedesktop.org/xorg/app/intel-gpu-tools
  Patchwork_11254: 93a3f1fd397c89dbdf32aa9ed034bd1d2550a735 @ git://anongit.freedesktop.org/gfx-ci/linux


== Linux commits ==

93a3f1fd397c drm/i915/selftests: Context SSEU reconfiguration tests
d9a1551457de drm/i915: Expose RPCS (SSEU) configuration to userspace (Gen11 only)
f2b12bcf0ca7 drm/i915: Add timeline barrier support
37c8e6fcbba7 drm/i915/perf: lock powergating configuration to default when active
961bc57575da drm/i915: Record the sseu configuration per-context & engine
3350b5e6ed18 drm/i915/execlists: Move RPCS setup to context pin

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_11254/
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 25+ messages in thread

* [PATCH v24 5/6] drm/i915: Expose RPCS (SSEU) configuration to userspace (Gen11 only)
  2019-01-08 15:12 ` [PATCH 5/6] drm/i915: Expose RPCS (SSEU) configuration to userspace (Gen11 only) Tvrtko Ursulin
@ 2019-01-08 16:29   ` Tvrtko Ursulin
  2019-01-11 14:22     ` Tvrtko Ursulin
  2019-01-14  9:39     ` [PATCH v25 " Tvrtko Ursulin
  0 siblings, 2 replies; 25+ messages in thread
From: Tvrtko Ursulin @ 2019-01-08 16:29 UTC (permalink / raw)
  To: Intel-gfx

From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

We want to allow userspace to reconfigure the subslice configuration on a
per context basis.

This is required for the functional requirement of shutting down non-VME
enabled sub-slices on Gen11 parts.

To do so, we expose a context parameter to allow adjustment of the RPCS
register stored within the context image (and currently not accessible via
LRI).

If the context is adjusted before first use or whilst idle, the adjustment
is for "free"; otherwise if the context is active we queue a request to do
so (using the kernel context), following all other activity by that
context, which is also marked as barrier for all following submission
against the same context.

Since the overhead of device re-configuration during context switching can
be significant, especially in multi-context workloads, we limit this new
uAPI to only support the Gen11 VME use case. In this use case either the
device is fully enabled, and exactly one slice and half of the subslices
are enabled.

Example usage:

	struct drm_i915_gem_context_param_sseu sseu = { };
	struct drm_i915_gem_context_param arg =
		{ .param = I915_CONTEXT_PARAM_SSEU,
		  .ctx_id = gem_context_create(fd),
		  .size = sizeof(sseu),
		  .value = to_user_pointer(&sseu)
		};

	/* Query device defaults. */
	gem_context_get_param(fd, &arg);

	/* Set VME configuration on a 1x6x8 part. */
	sseu.slice_mask = 0x1;
	sseu.subslice_mask = 0xe0;
	gem_context_set_param(fd, &arg);

v2: Fix offset of CTX_R_PWR_CLK_STATE in intel_lr_context_set_sseu() (Lionel)

v3: Add ability to program this per engine (Chris)

v4: Move most get_sseu() into i915_gem_context.c (Lionel)

v5: Validate sseu configuration against the device's capabilities (Lionel)

v6: Change context powergating settings through MI_SDM on kernel context (Chris)

v7: Synchronize the requests following a powergating setting change using a global
    dependency (Chris)
    Iterate timelines through dev_priv.gt.active_rings (Tvrtko)
    Disable RPCS configuration setting for non capable users (Lionel/Tvrtko)

v8: s/union intel_sseu/struct intel_sseu/ (Lionel)
    s/dev_priv/i915/ (Tvrtko)
    Change uapi class/instance fields to u16 (Tvrtko)
    Bump mask fields to 64bits (Lionel)
    Don't return EPERM when dynamic sseu is disabled (Tvrtko)

v9: Import context image into kernel context's ppgtt only when
    reconfiguring powergated slice/subslices (Chris)
    Use aliasing ppgtt when needed (Michel)

Tvrtko Ursulin:

v10:
 * Update for upstream changes.
 * Request submit needs a RPM reference.
 * Reject on !FULL_PPGTT for simplicity.
 * Pull out get/set param to helpers for readability and less indent.
 * Use i915_request_await_dma_fence in add_global_barrier to skip waits
   on the same timeline and avoid GEM_BUG_ON.
 * No need to explicitly assign a NULL pointer to engine in legacy mode.
 * No need to move gen8_make_rpcs up.
 * Factored out global barrier as prep patch.
 * Allow to only CAP_SYS_ADMIN if !Gen11.

v11:
 * Remove engine vfunc in favour of local helper. (Chris Wilson)
 * Stop retiring requests before updates since it is not needed
   (Chris Wilson)
 * Implement direct CPU update path for idle contexts. (Chris Wilson)
 * Left side dependency needs only be on the same context timeline.
   (Chris Wilson)
 * It is sufficient to order the timeline. (Chris Wilson)
 * Reject !RCS configuration attempts with -ENODEV for now.

v12:
 * Rebase for make_rpcs.

v13:
 * Centralize SSEU normalization to make_rpcs.
 * Type width checking (uAPI <-> implementation).
 * Gen11 restrictions uAPI checks.
 * Gen11 subslice count differences handling.
 Chris Wilson:
 * args->size handling fixes.
 * Update context image from GGTT.
 * Postpone context image update to pinning.
 * Use i915_gem_active_raw instead of last_request_on_engine.

v14:
 * Add activity tracker on intel_context to fix the lifetime issues
   and simplify the code. (Chris Wilson)

v15:
 * Fix context pin leak if no space in ring by simplifying the
   context pinning sequence.

v16:
 * Rebase for context get/set param locking changes.
 * Just -ENODEV on !Gen11. (Joonas)

v17:
 * Fix one Gen11 subslice enablement rule.
 * Handle error from i915_sw_fence_await_sw_fence_gfp. (Chris Wilson)

v18:
 * Update commit message. (Joonas)
 * Restrict uAPI to VME use case. (Joonas)

v19:
 * Rebase.

v20:
 * Rebase for ce->active_tracker.

v21:
 * Rebase for IS_GEN changes.

v22:
 * Reserve uAPI for flags straight away. (Chris Wilson)

v23:
 * Rebase for RUNTIME_INFO.

v24:
 * Added some headline docs for the uapi usage. (Joonas/Chris)

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=100899
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=107634
Issue: https://github.com/intel/media-driver/issues/267
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Cc: Dmitry Rogozhkin <dmitry.v.rogozhkin@intel.com>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: Zhipeng Gong <zhipeng.gong@intel.com>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk> # v21
Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
---
 drivers/gpu/drm/i915/i915_gem_context.c | 341 +++++++++++++++++++++++-
 drivers/gpu/drm/i915/i915_gem_context.h |   6 +
 drivers/gpu/drm/i915/intel_lrc.c        |   4 +-
 include/uapi/drm/i915_drm.h             |  64 +++++
 4 files changed, 412 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
index a565643e9a26..1ab7d6980c36 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -90,6 +90,7 @@
 #include <drm/i915_drm.h>
 #include "i915_drv.h"
 #include "i915_trace.h"
+#include "intel_lrc_reg.h"
 #include "intel_workarounds.h"
 
 #define ALL_L3_SLICES(dev) (1 << NUM_L3_SLICES(dev)) - 1
@@ -322,6 +323,15 @@ static u32 default_desc_template(const struct drm_i915_private *i915,
 	return desc;
 }
 
+static void intel_context_retire(struct i915_gem_active *active,
+				 struct i915_request *rq)
+{
+	struct intel_context *ce =
+		container_of(active, typeof(*ce), active_tracker);
+
+	intel_context_unpin(ce);
+}
+
 static struct i915_gem_context *
 __create_hw_context(struct drm_i915_private *dev_priv,
 		    struct drm_i915_file_private *file_priv)
@@ -345,6 +355,8 @@ __create_hw_context(struct drm_i915_private *dev_priv,
 		ce->gem_context = ctx;
 		/* Use the whole device by default */
 		ce->sseu = intel_device_default_sseu(dev_priv);
+
+		init_request_active(&ce->active_tracker, intel_context_retire);
 	}
 
 	INIT_RADIX_TREE(&ctx->handles_vma, GFP_KERNEL);
@@ -842,6 +854,56 @@ int i915_gem_context_destroy_ioctl(struct drm_device *dev, void *data,
 	return 0;
 }
 
+static int get_sseu(struct i915_gem_context *ctx,
+		    struct drm_i915_gem_context_param *args)
+{
+	struct drm_i915_gem_context_param_sseu user_sseu;
+	struct intel_engine_cs *engine;
+	struct intel_context *ce;
+	int ret;
+
+	if (args->size == 0)
+		goto out;
+	else if (args->size < sizeof(user_sseu))
+		return -EINVAL;
+
+	if (copy_from_user(&user_sseu, u64_to_user_ptr(args->value),
+			   sizeof(user_sseu)))
+		return -EFAULT;
+
+	if (user_sseu.flags || user_sseu.rsvd)
+		return -EINVAL;
+
+	engine = intel_engine_lookup_user(ctx->i915,
+					  user_sseu.class,
+					  user_sseu.instance);
+	if (!engine)
+		return -EINVAL;
+
+	/* Only use for mutex here is to serialize get_param and set_param. */
+	ret = mutex_lock_interruptible(&ctx->i915->drm.struct_mutex);
+	if (ret)
+		return ret;
+
+	ce = to_intel_context(ctx, engine);
+
+	user_sseu.slice_mask = ce->sseu.slice_mask;
+	user_sseu.subslice_mask = ce->sseu.subslice_mask;
+	user_sseu.min_eus_per_subslice = ce->sseu.min_eus_per_subslice;
+	user_sseu.max_eus_per_subslice = ce->sseu.max_eus_per_subslice;
+
+	mutex_unlock(&ctx->i915->drm.struct_mutex);
+
+	if (copy_to_user(u64_to_user_ptr(args->value), &user_sseu,
+			 sizeof(user_sseu)))
+		return -EFAULT;
+
+out:
+	args->size = sizeof(user_sseu);
+
+	return 0;
+}
+
 int i915_gem_context_getparam_ioctl(struct drm_device *dev, void *data,
 				    struct drm_file *file)
 {
@@ -854,15 +916,17 @@ int i915_gem_context_getparam_ioctl(struct drm_device *dev, void *data,
 	if (!ctx)
 		return -ENOENT;
 
-	args->size = 0;
 	switch (args->param) {
 	case I915_CONTEXT_PARAM_BAN_PERIOD:
 		ret = -EINVAL;
 		break;
 	case I915_CONTEXT_PARAM_NO_ZEROMAP:
+		args->size = 0;
 		args->value = test_bit(UCONTEXT_NO_ZEROMAP, &ctx->user_flags);
 		break;
 	case I915_CONTEXT_PARAM_GTT_SIZE:
+		args->size = 0;
+
 		if (ctx->ppgtt)
 			args->value = ctx->ppgtt->vm.total;
 		else if (to_i915(dev)->mm.aliasing_ppgtt)
@@ -871,14 +935,20 @@ int i915_gem_context_getparam_ioctl(struct drm_device *dev, void *data,
 			args->value = to_i915(dev)->ggtt.vm.total;
 		break;
 	case I915_CONTEXT_PARAM_NO_ERROR_CAPTURE:
+		args->size = 0;
 		args->value = i915_gem_context_no_error_capture(ctx);
 		break;
 	case I915_CONTEXT_PARAM_BANNABLE:
+		args->size = 0;
 		args->value = i915_gem_context_is_bannable(ctx);
 		break;
 	case I915_CONTEXT_PARAM_PRIORITY:
+		args->size = 0;
 		args->value = ctx->sched.priority >> I915_USER_PRIORITY_SHIFT;
 		break;
+	case I915_CONTEXT_PARAM_SSEU:
+		ret = get_sseu(ctx, args);
+		break;
 	default:
 		ret = -EINVAL;
 		break;
@@ -888,6 +958,271 @@ int i915_gem_context_getparam_ioctl(struct drm_device *dev, void *data,
 	return ret;
 }
 
+static int gen8_emit_rpcs_config(struct i915_request *rq,
+				 struct intel_context *ce,
+				 struct intel_sseu sseu)
+{
+	u64 offset;
+	u32 *cs;
+
+	cs = intel_ring_begin(rq, 4);
+	if (IS_ERR(cs))
+		return PTR_ERR(cs);
+
+	offset = ce->state->node.start +
+		LRC_STATE_PN * PAGE_SIZE +
+		(CTX_R_PWR_CLK_STATE + 1) * 4;
+
+	*cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
+	*cs++ = lower_32_bits(offset);
+	*cs++ = upper_32_bits(offset);
+	*cs++ = gen8_make_rpcs(rq->i915, &sseu);
+
+	intel_ring_advance(rq, cs);
+
+	return 0;
+}
+
+static int
+gen8_modify_rpcs_gpu(struct intel_context *ce,
+		     struct intel_engine_cs *engine,
+		     struct intel_sseu sseu)
+{
+	struct drm_i915_private *i915 = engine->i915;
+	struct i915_request *rq, *prev;
+	int ret;
+
+	GEM_BUG_ON(!ce->pin_count);
+
+	lockdep_assert_held(&i915->drm.struct_mutex);
+
+	/* Submitting requests etc needs the hw awake. */
+	intel_runtime_pm_get(i915);
+
+	rq = i915_request_alloc(engine, i915->kernel_context);
+	if (IS_ERR(rq)) {
+		ret = PTR_ERR(rq);
+		goto out_put;
+	}
+
+	/* Queue this switch after all other activity by this context. */
+	prev = i915_gem_active_raw(&ce->ring->timeline->last_request,
+				   &i915->drm.struct_mutex);
+	if (prev && !i915_request_completed(prev)) {
+		ret = i915_sw_fence_await_sw_fence_gfp(&rq->submit,
+						       &prev->submit,
+						       I915_FENCE_GFP);
+		if (ret < 0)
+			goto out_add;
+	}
+
+	ret = gen8_emit_rpcs_config(rq, ce, sseu);
+	if (ret)
+		goto out_add;
+
+	/* Order all following requests to be after. */
+	i915_timeline_set_barrier(ce->ring->timeline, rq);
+
+	/*
+	 * Guarantee context image and the timeline remains pinned until the
+	 * modifying request is retired by setting the ce activity tracker.
+	 *
+	 * But we only need to take one pin on the account of it. Or in other
+	 * words transfer the pinned ce object to tracked active request.
+	 */
+	if (!i915_gem_active_isset(&ce->active_tracker))
+		__intel_context_pin(ce);
+	i915_gem_active_set(&ce->active_tracker, rq);
+
+out_add:
+	i915_request_add(rq);
+out_put:
+	intel_runtime_pm_put(i915);
+
+	return ret;
+}
+
+static int
+i915_gem_context_reconfigure_sseu(struct i915_gem_context *ctx,
+				  struct intel_engine_cs *engine,
+				  struct intel_sseu sseu)
+{
+	struct intel_context *ce = to_intel_context(ctx, engine);
+	int ret;
+
+	GEM_BUG_ON(INTEL_GEN(ctx->i915) < 8);
+	GEM_BUG_ON(engine->id != RCS);
+
+	ret = mutex_lock_interruptible(&ctx->i915->drm.struct_mutex);
+	if (ret)
+		return ret;
+
+	/* Nothing to do if unmodified. */
+	if (!memcmp(&ce->sseu, &sseu, sizeof(sseu)))
+		goto out;
+
+	/*
+	 * If context is not idle we have to submit an ordered request to modify
+	 * its context image via the kernel context. Pristine and idle contexts
+	 * will be configured on pinning.
+	 */
+	if (ce->pin_count)
+		ret = gen8_modify_rpcs_gpu(ce, engine, sseu);
+
+	if (!ret)
+		ce->sseu = sseu;
+
+out:
+	mutex_unlock(&ctx->i915->drm.struct_mutex);
+
+	return ret;
+}
+
+static int
+user_to_context_sseu(struct drm_i915_private *i915,
+		     const struct drm_i915_gem_context_param_sseu *user,
+		     struct intel_sseu *context)
+{
+	const struct sseu_dev_info *device = &RUNTIME_INFO(i915)->sseu;
+
+	/* No zeros in any field. */
+	if (!user->slice_mask || !user->subslice_mask ||
+	    !user->min_eus_per_subslice || !user->max_eus_per_subslice)
+		return -EINVAL;
+
+	/* Max > min. */
+	if (user->max_eus_per_subslice < user->min_eus_per_subslice)
+		return -EINVAL;
+
+	/* Check validity against hardware. */
+	if (user->slice_mask & ~device->slice_mask)
+		return -EINVAL;
+
+	if (user->subslice_mask & ~device->subslice_mask[0])
+		return -EINVAL;
+
+	if (user->max_eus_per_subslice > device->max_eus_per_subslice)
+		return -EINVAL;
+
+	/*
+	 * Some future proofing on the types since the uAPI is wider than the
+	 * current internal implementation.
+	 */
+	if (WARN_ON((fls(user->slice_mask) >
+		     sizeof(context->slice_mask) * BITS_PER_BYTE) ||
+		    (fls(user->subslice_mask) >
+		     sizeof(context->subslice_mask) * BITS_PER_BYTE) ||
+		    overflows_type(user->min_eus_per_subslice,
+				   context->min_eus_per_subslice) ||
+		    overflows_type(user->max_eus_per_subslice,
+				   context->max_eus_per_subslice)))
+		return -EINVAL;
+
+	context->slice_mask = user->slice_mask;
+	context->subslice_mask = user->subslice_mask;
+	context->min_eus_per_subslice = user->min_eus_per_subslice;
+	context->max_eus_per_subslice = user->max_eus_per_subslice;
+
+	/* Part specific restrictions. */
+	if (IS_GEN(i915, 11)) {
+		unsigned int hw_s = hweight8(device->slice_mask);
+		unsigned int hw_ss_per_s = hweight8(device->subslice_mask[0]);
+		unsigned int req_s = hweight8(context->slice_mask);
+		unsigned int req_ss = hweight8(context->subslice_mask);
+
+		/*
+		 * Only full subslice enablement is possible if more than one
+		 * slice is turned on.
+		 */
+		if (req_s > 1 && req_ss != hw_ss_per_s)
+			return -EINVAL;
+
+		/*
+		 * If more than four (SScount bitfield limit) subslices are
+		 * requested then the number has to be even.
+		 */
+		if (req_ss > 4 && (req_ss & 1))
+			return -EINVAL;
+
+		/*
+		 * If only one slice is enabled and subslice count is below the
+		 * device full enablement, it must be at most half of the all
+		 * available subslices.
+		 */
+		if (req_s == 1 && req_ss < hw_ss_per_s &&
+		    req_ss > (hw_ss_per_s / 2))
+			return -EINVAL;
+
+		/* ABI restriction - VME use case only. */
+
+		/* All slices or one slice only. */
+		if (req_s != 1 && req_s != hw_s)
+			return -EINVAL;
+
+		/*
+		 * Half subslices or full enablement only when one slice is
+		 * enabled.
+		 */
+		if (req_s == 1 &&
+		    (req_ss != hw_ss_per_s && req_ss != (hw_ss_per_s / 2)))
+			return -EINVAL;
+
+		/* No EU configuration changes. */
+		if ((user->min_eus_per_subslice !=
+		     device->max_eus_per_subslice) ||
+		    (user->max_eus_per_subslice !=
+		     device->max_eus_per_subslice))
+			return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int set_sseu(struct i915_gem_context *ctx,
+		    struct drm_i915_gem_context_param *args)
+{
+	struct drm_i915_private *i915 = ctx->i915;
+	struct drm_i915_gem_context_param_sseu user_sseu;
+	struct intel_engine_cs *engine;
+	struct intel_sseu sseu;
+	int ret;
+
+	if (args->size < sizeof(user_sseu))
+		return -EINVAL;
+
+	if (!IS_GEN(i915, 11))
+		return -ENODEV;
+
+	if (copy_from_user(&user_sseu, u64_to_user_ptr(args->value),
+			   sizeof(user_sseu)))
+		return -EFAULT;
+
+	if (user_sseu.flags || user_sseu.rsvd)
+		return -EINVAL;
+
+	engine = intel_engine_lookup_user(i915,
+					  user_sseu.class,
+					  user_sseu.instance);
+	if (!engine)
+		return -EINVAL;
+
+	/* Only render engine supports RPCS configuration. */
+	if (engine->class != RENDER_CLASS)
+		return -ENODEV;
+
+	ret = user_to_context_sseu(i915, &user_sseu, &sseu);
+	if (ret)
+		return ret;
+
+	ret = i915_gem_context_reconfigure_sseu(ctx, engine, sseu);
+	if (ret)
+		return ret;
+
+	args->size = sizeof(user_sseu);
+
+	return 0;
+}
+
 int i915_gem_context_setparam_ioctl(struct drm_device *dev, void *data,
 				    struct drm_file *file)
 {
@@ -950,7 +1285,9 @@ int i915_gem_context_setparam_ioctl(struct drm_device *dev, void *data,
 					I915_USER_PRIORITY(priority);
 		}
 		break;
-
+	case I915_CONTEXT_PARAM_SSEU:
+		ret = set_sseu(ctx, args);
+		break;
 	default:
 		ret = -EINVAL;
 		break;
diff --git a/drivers/gpu/drm/i915/i915_gem_context.h b/drivers/gpu/drm/i915/i915_gem_context.h
index ef04e422cf9a..2ded4b8c9b9f 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.h
+++ b/drivers/gpu/drm/i915/i915_gem_context.h
@@ -171,6 +171,12 @@ struct i915_gem_context {
 		u64 lrc_desc;
 		int pin_count;
 
+		/**
+		 * active_tracker: Active tracker for the external rq activity
+		 * on this intel_context object.
+		 */
+		struct i915_gem_active active_tracker;
+
 		const struct intel_context_ops *ops;
 
 		/** sseu: Control eu/slice partitioning */
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index bca09a497f27..e9c6876a5897 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -2392,7 +2392,9 @@ u32 gen8_make_rpcs(struct drm_i915_private *i915, struct intel_sseu *req_sseu)
 	 * subslices are enabled, or a count between one and four on the first
 	 * slice.
 	 */
-	if (IS_GEN(i915, 11) && slices == 1 && subslices >= 4) {
+	if (IS_GEN(i915, 11) &&
+	    slices == 1 &&
+	    subslices > min_t(u8, 4, hweight8(sseu->subslice_mask[0]) / 2)) {
 		GEM_BUG_ON(subslices & 1);
 
 		subslice_pg = false;
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index 298b2e197744..55dcbbf54cb9 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -1486,9 +1486,73 @@ struct drm_i915_gem_context_param {
 #define   I915_CONTEXT_MAX_USER_PRIORITY	1023 /* inclusive */
 #define   I915_CONTEXT_DEFAULT_PRIORITY		0
 #define   I915_CONTEXT_MIN_USER_PRIORITY	-1023 /* inclusive */
+	/*
+	 * When using the following param, value should be a pointer to
+	 * drm_i915_gem_context_param_sseu.
+	 */
+#define I915_CONTEXT_PARAM_SSEU		0x7
 	__u64 value;
 };
 
+/**
+ * Context SSEU programming
+ *
+ * It may be necessary for either functional or performance reason to configure
+ * a context to run with a reduced number of SSEU (where SSEU stands for Slice/
+ * Sub-slice/EU).
+ *
+ * This is done by configuring SSEU configuration using the below
+ * @struct drm_i915_gem_context_param_sseu for every supported engine which
+ * userspace intends to use.
+ *
+ * Not all GPUs or engines support this functionality in which case an error
+ * code -ENODEV will be returned.
+ *
+ * Also, flexibility of possible SSEU configuration permutations varies between
+ * GPU generations and software imposed limitations. Requesting such a
+ * combination will return an error code of -EINVAL.
+ *
+ * NOTE: When perf/OA is active the context's SSEU configuration is ignored in
+ * favour of a single global setting.
+ */
+struct drm_i915_gem_context_param_sseu {
+	/*
+	 * Engine class & instance to be configured or queried.
+	 */
+	__u16 class;
+	__u16 instance;
+
+	/*
+	 * Unused for now. Must be cleared to zero.
+	 */
+	__u32 flags;
+
+	/*
+	 * Mask of slices to enable for the context. Valid values are a subset
+	 * of the bitmask value returned for I915_PARAM_SLICE_MASK.
+	 */
+	__u64 slice_mask;
+
+	/*
+	 * Mask of subslices to enable for the context. Valid values are a
+	 * subset of the bitmask value return by I915_PARAM_SUBSLICE_MASK.
+	 */
+	__u64 subslice_mask;
+
+	/*
+	 * Minimum/Maximum number of EUs to enable per subslice for the
+	 * context. min_eus_per_subslice must be inferior or equal to
+	 * max_eus_per_subslice.
+	 */
+	__u16 min_eus_per_subslice;
+	__u16 max_eus_per_subslice;
+
+	/*
+	 * Unused for now. Must be cleared to zero.
+	 */
+	__u32 rsvd;
+};
+
 enum drm_i915_oa_format {
 	I915_OA_FORMAT_A13 = 1,	    /* HSW only */
 	I915_OA_FORMAT_A29,	    /* HSW only */
-- 
2.19.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 25+ messages in thread

* ✗ Fi.CI.CHECKPATCH: warning for Per context dynamic (sub)slice power-gating (rev12)
  2019-01-08 15:12 [PATCH 0/6] Per context dynamic (sub)slice power-gating Tvrtko Ursulin
                   ` (8 preceding siblings ...)
  2019-01-08 16:13 ` ✓ Fi.CI.BAT: success " Patchwork
@ 2019-01-08 18:38 ` Patchwork
  2019-01-08 18:40 ` ✗ Fi.CI.SPARSE: " Patchwork
                   ` (3 subsequent siblings)
  13 siblings, 0 replies; 25+ messages in thread
From: Patchwork @ 2019-01-08 18:38 UTC (permalink / raw)
  To: Tvrtko Ursulin; +Cc: intel-gfx

== Series Details ==

Series: Per context dynamic (sub)slice power-gating (rev12)
URL   : https://patchwork.freedesktop.org/series/48194/
State : warning

== Summary ==

$ dim checkpatch origin/drm-tip
c919f2ba4d24 drm/i915/execlists: Move RPCS setup to context pin
65954284c0af drm/i915: Record the sseu configuration per-context & engine
557c7dad1ad9 drm/i915/perf: lock powergating configuration to default when active
-:72: WARNING:BAD_SIGN_OFF: 'Co-developed-by:' is the preferred signature form
#72: 
Co-Developed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

total: 0 errors, 1 warnings, 0 checks, 126 lines checked
823fb1c4bf8f drm/i915: Add timeline barrier support
509b28e8038e drm/i915: Expose RPCS (SSEU) configuration to userspace (Gen11 only)
-:47: WARNING:COMMIT_LOG_LONG_LINE: Possible unwrapped commit description (prefer a maximum 75 chars per line)
#47: 
v2: Fix offset of CTX_R_PWR_CLK_STATE in intel_lr_context_set_sseu() (Lionel)

-:509: CHECK:UNNECESSARY_PARENTHESES: Unnecessary parentheses around 'user->min_eus_per_subslice !=
 		     device->max_eus_per_subslice'
#509: FILE: drivers/gpu/drm/i915/i915_gem_context.c:1171:
+		if ((user->min_eus_per_subslice !=
+		     device->max_eus_per_subslice) ||
+		    (user->max_eus_per_subslice !=
+		     device->max_eus_per_subslice))

-:509: CHECK:UNNECESSARY_PARENTHESES: Unnecessary parentheses around 'user->max_eus_per_subslice !=
 		     device->max_eus_per_subslice'
#509: FILE: drivers/gpu/drm/i915/i915_gem_context.c:1171:
+		if ((user->min_eus_per_subslice !=
+		     device->max_eus_per_subslice) ||
+		    (user->max_eus_per_subslice !=
+		     device->max_eus_per_subslice))

total: 0 errors, 1 warnings, 2 checks, 500 lines checked
625fca5d7b43 drm/i915/selftests: Context SSEU reconfiguration tests
-:407: WARNING:UNNECESSARY_ELSE: else is not generally useful after a break or return
#407: FILE: drivers/gpu/drm/i915/selftests/i915_gem_context.c:961:
+			return -EINVAL;
+		} else {

-:414: CHECK:BRACES: Blank lines aren't necessary before a close brace '}'
#414: FILE: drivers/gpu/drm/i915/selftests/i915_gem_context.c:968:
+
+}

total: 0 errors, 1 warnings, 1 checks, 550 lines checked

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 25+ messages in thread

* ✗ Fi.CI.SPARSE: warning for Per context dynamic (sub)slice power-gating (rev12)
  2019-01-08 15:12 [PATCH 0/6] Per context dynamic (sub)slice power-gating Tvrtko Ursulin
                   ` (9 preceding siblings ...)
  2019-01-08 18:38 ` ✗ Fi.CI.CHECKPATCH: warning for Per context dynamic (sub)slice power-gating (rev12) Patchwork
@ 2019-01-08 18:40 ` Patchwork
  2019-01-08 19:00 ` ✓ Fi.CI.BAT: success " Patchwork
                   ` (2 subsequent siblings)
  13 siblings, 0 replies; 25+ messages in thread
From: Patchwork @ 2019-01-08 18:40 UTC (permalink / raw)
  To: Tvrtko Ursulin; +Cc: intel-gfx

== Series Details ==

Series: Per context dynamic (sub)slice power-gating (rev12)
URL   : https://patchwork.freedesktop.org/series/48194/
State : warning

== Summary ==

$ dim sparse origin/drm-tip
Sparse version: v0.5.2
Commit: drm/i915/execlists: Move RPCS setup to context pin
Okay!

Commit: drm/i915: Record the sseu configuration per-context & engine
-drivers/gpu/drm/i915/selftests/../i915_drv.h:3546:16: warning: expression using sizeof(void)
+drivers/gpu/drm/i915/selftests/../i915_drv.h:3560:16: warning: expression using sizeof(void)

Commit: drm/i915/perf: lock powergating configuration to default when active
Okay!

Commit: drm/i915: Add timeline barrier support
Okay!

Commit: drm/i915: Expose RPCS (SSEU) configuration to userspace (Gen11 only)
+drivers/gpu/drm/i915/intel_lrc.c:2397:25: warning: expression using sizeof(void)

Commit: drm/i915/selftests: Context SSEU reconfiguration tests
+drivers/gpu/drm/i915/selftests/i915_gem_context.c:1220:25: warning: expression using sizeof(void)
+drivers/gpu/drm/i915/selftests/i915_gem_context.c:1220:25: warning: expression using sizeof(void)
-drivers/gpu/drm/i915/selftests/i915_gem_context.c:1220:25: warning: expression using sizeof(void)
-drivers/gpu/drm/i915/selftests/i915_gem_context.c:1220:25: warning: expression using sizeof(void)

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 25+ messages in thread

* ✓ Fi.CI.BAT: success for Per context dynamic (sub)slice power-gating (rev12)
  2019-01-08 15:12 [PATCH 0/6] Per context dynamic (sub)slice power-gating Tvrtko Ursulin
                   ` (10 preceding siblings ...)
  2019-01-08 18:40 ` ✗ Fi.CI.SPARSE: " Patchwork
@ 2019-01-08 19:00 ` Patchwork
  2019-01-09  8:12 ` ✗ Fi.CI.IGT: failure " Patchwork
  2019-01-14 12:32 ` ✗ Fi.CI.BAT: failure for Per context dynamic (sub)slice power-gating (rev13) Patchwork
  13 siblings, 0 replies; 25+ messages in thread
From: Patchwork @ 2019-01-08 19:00 UTC (permalink / raw)
  To: Tvrtko Ursulin; +Cc: intel-gfx

== Series Details ==

Series: Per context dynamic (sub)slice power-gating (rev12)
URL   : https://patchwork.freedesktop.org/series/48194/
State : success

== Summary ==

CI Bug Log - changes from CI_DRM_5379 -> Patchwork_11256
====================================================

Summary
-------

  **WARNING**

  Minor unknown changes coming with Patchwork_11256 need to be verified
  manually.
  
  If you think the reported changes have nothing to do with the changes
  introduced in Patchwork_11256, please notify your bug team to allow them
  to document this new failure mode, which will reduce false positives in CI.

  External URL: https://patchwork.freedesktop.org/api/1.0/series/48194/revisions/12/

Possible new issues
-------------------

  Here are the unknown changes that may have been introduced in Patchwork_11256:

### IGT changes ###

#### Warnings ####

  * igt@pm_rpm@basic-pci-d3-state:
    - fi-byt-j1900:       SKIP -> PASS

  
Known issues
------------

  Here are the changes found in Patchwork_11256 that come from known issues:

### IGT changes ###

#### Issues hit ####

  * igt@kms_pipe_crc_basic@suspend-read-crc-pipe-a:
    - fi-kbl-7567u:       PASS -> INCOMPLETE [fdo#103665]

  
#### Possible fixes ####

  * igt@gem_exec_basic@gtt-bsd2:
    - fi-skl-6770hq:      DMESG-WARN [fdo#105541] -> PASS

  * igt@i915_selftest@live_execlists:
    - fi-apl-guc:         INCOMPLETE [fdo#103927] -> PASS

  * igt@i915_selftest@live_hangcheck:
    - fi-bwr-2160:        DMESG-FAIL [fdo#108735] -> PASS

  * igt@kms_frontbuffer_tracking@basic:
    - fi-hsw-peppy:       DMESG-WARN [fdo#102614] -> PASS

  * igt@kms_pipe_crc_basic@suspend-read-crc-pipe-a:
    - fi-byt-clapper:     FAIL [fdo#103191] / [fdo#107362] -> PASS

  * igt@pm_rpm@basic-rte:
    - fi-byt-j1900:       FAIL [fdo#108800] -> PASS

  
  [fdo#102614]: https://bugs.freedesktop.org/show_bug.cgi?id=102614
  [fdo#103191]: https://bugs.freedesktop.org/show_bug.cgi?id=103191
  [fdo#103665]: https://bugs.freedesktop.org/show_bug.cgi?id=103665
  [fdo#103927]: https://bugs.freedesktop.org/show_bug.cgi?id=103927
  [fdo#105541]: https://bugs.freedesktop.org/show_bug.cgi?id=105541
  [fdo#107362]: https://bugs.freedesktop.org/show_bug.cgi?id=107362
  [fdo#108735]: https://bugs.freedesktop.org/show_bug.cgi?id=108735
  [fdo#108800]: https://bugs.freedesktop.org/show_bug.cgi?id=108800


Participating hosts (49 -> 45)
------------------------------

  Missing    (4): fi-ilk-m540 fi-byt-squawks fi-bsw-cyan fi-bdw-samus 


Build changes
-------------

    * Linux: CI_DRM_5379 -> Patchwork_11256

  CI_DRM_5379: cae04ddb2142baf68abdf7855e28c00a34b721b7 @ git://anongit.freedesktop.org/gfx-ci/linux
  IGT_4756: 75081c6bfb9998bd7cbf35a7ac0578c683fe55a8 @ git://anongit.freedesktop.org/xorg/app/intel-gpu-tools
  Patchwork_11256: 625fca5d7b43de1374e10cebeffd5a354cb57180 @ git://anongit.freedesktop.org/gfx-ci/linux


== Linux commits ==

625fca5d7b43 drm/i915/selftests: Context SSEU reconfiguration tests
509b28e8038e drm/i915: Expose RPCS (SSEU) configuration to userspace (Gen11 only)
823fb1c4bf8f drm/i915: Add timeline barrier support
557c7dad1ad9 drm/i915/perf: lock powergating configuration to default when active
65954284c0af drm/i915: Record the sseu configuration per-context & engine
c919f2ba4d24 drm/i915/execlists: Move RPCS setup to context pin

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_11256/
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 25+ messages in thread

* ✗ Fi.CI.IGT: failure for Per context dynamic (sub)slice power-gating (rev12)
  2019-01-08 15:12 [PATCH 0/6] Per context dynamic (sub)slice power-gating Tvrtko Ursulin
                   ` (11 preceding siblings ...)
  2019-01-08 19:00 ` ✓ Fi.CI.BAT: success " Patchwork
@ 2019-01-09  8:12 ` Patchwork
  2019-01-14 12:32 ` ✗ Fi.CI.BAT: failure for Per context dynamic (sub)slice power-gating (rev13) Patchwork
  13 siblings, 0 replies; 25+ messages in thread
From: Patchwork @ 2019-01-09  8:12 UTC (permalink / raw)
  To: Tvrtko Ursulin; +Cc: intel-gfx

== Series Details ==

Series: Per context dynamic (sub)slice power-gating (rev12)
URL   : https://patchwork.freedesktop.org/series/48194/
State : failure

== Summary ==

CI Bug Log - changes from CI_DRM_5379_full -> Patchwork_11256_full
====================================================

Summary
-------

  **FAILURE**

  Serious unknown changes coming with Patchwork_11256_full absolutely need to be
  verified manually.
  
  If you think the reported changes have nothing to do with the changes
  introduced in Patchwork_11256_full, please notify your bug team to allow them
  to document this new failure mode, which will reduce false positives in CI.

  

Possible new issues
-------------------

  Here are the unknown changes that may have been introduced in Patchwork_11256_full:

### IGT changes ###

#### Possible regressions ####

  * igt@gem_ctx_param@invalid-param-get:
    - shard-skl:          PASS -> FAIL
    - shard-apl:          PASS -> FAIL
    - shard-iclb:         PASS -> FAIL
    - shard-glk:          PASS -> FAIL
    - shard-hsw:          PASS -> FAIL
    - shard-kbl:          PASS -> FAIL

  
Known issues
------------

  Here are the changes found in Patchwork_11256_full that come from known issues:

### IGT changes ###

#### Issues hit ####

  * igt@gem_exec_schedule@pi-ringfull-render:
    - shard-skl:          NOTRUN -> FAIL [fdo#103158]

  * igt@kms_atomic_transition@plane-all-transition:
    - shard-iclb:         PASS -> DMESG-WARN [fdo#107724] / [fdo#109225]

  * igt@kms_busy@extended-pageflip-hang-newfb-render-b:
    - shard-snb:          PASS -> INCOMPLETE [fdo#105411]

  * igt@kms_busy@extended-pageflip-modeset-hang-oldfb-render-b:
    - shard-iclb:         NOTRUN -> DMESG-WARN [fdo#107956]

  * igt@kms_color@pipe-b-ctm-max:
    - shard-apl:          PASS -> FAIL [fdo#108147]

  * igt@kms_cursor_crc@cursor-256x256-dpms:
    - shard-glk:          PASS -> FAIL [fdo#103232] +4

  * igt@kms_cursor_crc@cursor-256x85-onscreen:
    - shard-apl:          PASS -> FAIL [fdo#103232]

  * igt@kms_cursor_crc@cursor-64x21-random:
    - shard-iclb:         NOTRUN -> FAIL [fdo#103232]

  * igt@kms_draw_crc@draw-method-xrgb2101010-render-untiled:
    - shard-iclb:         PASS -> WARN [fdo#108336] +3

  * igt@kms_frontbuffer_tracking@fbc-1p-offscren-pri-shrfb-draw-blt:
    - shard-glk:          PASS -> FAIL [fdo#103167]

  * igt@kms_frontbuffer_tracking@fbc-1p-primscrn-cur-indfb-draw-blt:
    - shard-iclb:         PASS -> DMESG-FAIL [fdo#107724] +5

  * igt@kms_frontbuffer_tracking@fbcpsr-1p-primscrn-spr-indfb-draw-mmap-wc:
    - shard-iclb:         PASS -> FAIL [fdo#103167] +4

  * igt@kms_frontbuffer_tracking@psr-1p-primscrn-cur-indfb-draw-pwrite:
    - shard-iclb:         PASS -> DMESG-WARN [fdo#107724] / [fdo#108336] +11

  * igt@kms_plane@pixel-format-pipe-a-planes-source-clamping:
    - shard-skl:          NOTRUN -> DMESG-WARN [fdo#106885] +1

  * igt@kms_plane@plane-panning-bottom-right-suspend-pipe-b-planes:
    - shard-iclb:         PASS -> DMESG-FAIL [fdo#103166] / [fdo#107724]

  * igt@kms_plane_alpha_blend@pipe-a-alpha-7efc:
    - shard-skl:          NOTRUN -> FAIL [fdo#107815] / [fdo#108145]

  * igt@kms_plane_alpha_blend@pipe-a-alpha-basic:
    - shard-kbl:          NOTRUN -> FAIL [fdo#108145] / [fdo#108590]

  * igt@kms_plane_multiple@atomic-pipe-c-tiling-x:
    - shard-glk:          PASS -> FAIL [fdo#103166]

  * igt@kms_plane_multiple@atomic-pipe-c-tiling-y:
    - shard-iclb:         PASS -> FAIL [fdo#103166]

  * igt@kms_rotation_crc@multiplane-rotation-cropping-top:
    - shard-kbl:          NOTRUN -> DMESG-FAIL [fdo#108950]

  * igt@kms_setmode@basic:
    - shard-kbl:          PASS -> FAIL [fdo#99912]

  * igt@kms_vblank@pipe-a-ts-continuation-dpms-suspend:
    - shard-kbl:          PASS -> INCOMPLETE [fdo#103665]

  * igt@perf_pmu@event-wait-rcs0:
    - shard-iclb:         PASS -> DMESG-WARN [fdo#107724] +19

  * igt@perf_pmu@rc6-runtime-pm-long:
    - shard-skl:          PASS -> FAIL [fdo#105010]

  * igt@pm_rpm@reg-read-ioctl:
    - shard-skl:          PASS -> INCOMPLETE [fdo#107807]

  * igt@pm_rpm@system-suspend-execbuf:
    - shard-skl:          PASS -> INCOMPLETE [fdo#104108] / [fdo#107807]

  * igt@pm_rps@min-max-config-loaded:
    - shard-apl:          PASS -> FAIL [fdo#102250]

  
#### Possible fixes ####

  * igt@kms_atomic_transition@plane-all-transition-fencing:
    - shard-iclb:         DMESG-WARN [fdo#107724] / [fdo#109225] -> PASS

  * igt@kms_color@pipe-b-ctm-0-25:
    - shard-skl:          FAIL [fdo#108682] -> PASS

  * igt@kms_color@pipe-b-legacy-gamma:
    - shard-apl:          FAIL [fdo#104782] -> PASS

  * igt@kms_color@pipe-c-ctm-max:
    - shard-apl:          FAIL [fdo#108147] -> PASS

  * igt@kms_concurrent@pipe-b:
    - shard-iclb:         DMESG-WARN [fdo#107724] -> PASS +14

  * igt@kms_cursor_crc@cursor-256x256-onscreen:
    - shard-apl:          FAIL [fdo#103232] -> PASS

  * igt@kms_frontbuffer_tracking@fbc-1p-primscrn-spr-indfb-draw-pwrite:
    - shard-iclb:         FAIL [fdo#103167] -> PASS +1

  * igt@kms_frontbuffer_tracking@fbc-1p-primscrn-spr-indfb-move:
    - shard-apl:          FAIL [fdo#103167] -> PASS

  * igt@kms_frontbuffer_tracking@psr-1p-primscrn-pri-indfb-draw-blt:
    - shard-iclb:         DMESG-WARN [fdo#107724] / [fdo#108336] -> PASS +3

  * igt@kms_plane@pixel-format-pipe-a-planes:
    - shard-apl:          FAIL [fdo#103166] -> PASS +1

  * igt@kms_plane_multiple@atomic-pipe-c-tiling-x:
    - shard-iclb:         FAIL [fdo#103166] -> PASS +1

  * igt@kms_rotation_crc@multiplane-rotation:
    - shard-iclb:         DMESG-FAIL [fdo#107724] -> PASS +5

  * igt@kms_sysfs_edid_timing:
    - shard-iclb:         FAIL [fdo#100047] -> PASS

  * igt@kms_universal_plane@universal-plane-pipe-a-functional:
    - shard-iclb:         DMESG-FAIL [fdo#103166] / [fdo#107724] -> PASS

  * igt@pm_rpm@debugfs-read:
    - shard-skl:          INCOMPLETE [fdo#107807] -> PASS

  * igt@pm_rpm@system-suspend-modeset:
    - shard-skl:          INCOMPLETE [fdo#104108] / [fdo#107807] -> PASS

  
#### Warnings ####

  * igt@gem_ppgtt@blt-vs-render-ctx0:
    - shard-skl:          TIMEOUT [fdo#108039] -> INCOMPLETE [fdo#106887]

  * igt@kms_ccs@pipe-c-crc-sprite-planes-basic:
    - shard-iclb:         FAIL [fdo#107725] -> DMESG-WARN [fdo#107724] / [fdo#108336]

  * igt@kms_cursor_crc@cursor-256x256-onscreen:
    - shard-iclb:         FAIL [fdo#103232] -> DMESG-WARN [fdo#107724] / [fdo#108336]

  * igt@kms_cursor_crc@cursor-64x64-onscreen:
    - shard-iclb:         DMESG-WARN [fdo#107724] / [fdo#108336] -> FAIL [fdo#103232]

  * igt@kms_fbcon_fbt@psr-suspend:
    - shard-iclb:         FAIL [fdo#107882] -> DMESG-FAIL [fdo#107724]

  * igt@pm_backlight@fade_with_suspend:
    - shard-iclb:         DMESG-FAIL [fdo#107724] -> FAIL [fdo#107847]

  
  [fdo#100047]: https://bugs.freedesktop.org/show_bug.cgi?id=100047
  [fdo#102250]: https://bugs.freedesktop.org/show_bug.cgi?id=102250
  [fdo#103158]: https://bugs.freedesktop.org/show_bug.cgi?id=103158
  [fdo#103166]: https://bugs.freedesktop.org/show_bug.cgi?id=103166
  [fdo#103167]: https://bugs.freedesktop.org/show_bug.cgi?id=103167
  [fdo#103232]: https://bugs.freedesktop.org/show_bug.cgi?id=103232
  [fdo#103665]: https://bugs.freedesktop.org/show_bug.cgi?id=103665
  [fdo#104108]: https://bugs.freedesktop.org/show_bug.cgi?id=104108
  [fdo#104782]: https://bugs.freedesktop.org/show_bug.cgi?id=104782
  [fdo#105010]: https://bugs.freedesktop.org/show_bug.cgi?id=105010
  [fdo#105411]: https://bugs.freedesktop.org/show_bug.cgi?id=105411
  [fdo#106885]: https://bugs.freedesktop.org/show_bug.cgi?id=106885
  [fdo#106887]: https://bugs.freedesktop.org/show_bug.cgi?id=106887
  [fdo#107724]: https://bugs.freedesktop.org/show_bug.cgi?id=107724
  [fdo#107725]: https://bugs.freedesktop.org/show_bug.cgi?id=107725
  [fdo#107807]: https://bugs.freedesktop.org/show_bug.cgi?id=107807
  [fdo#107815]: https://bugs.freedesktop.org/show_bug.cgi?id=107815
  [fdo#107847]: https://bugs.freedesktop.org/show_bug.cgi?id=107847
  [fdo#107882]: https://bugs.freedesktop.org/show_bug.cgi?id=107882
  [fdo#107956]: https://bugs.freedesktop.org/show_bug.cgi?id=107956
  [fdo#108039]: https://bugs.freedesktop.org/show_bug.cgi?id=108039
  [fdo#108145]: https://bugs.freedesktop.org/show_bug.cgi?id=108145
  [fdo#108147]: https://bugs.freedesktop.org/show_bug.cgi?id=108147
  [fdo#108336]: https://bugs.freedesktop.org/show_bug.cgi?id=108336
  [fdo#108590]: https://bugs.freedesktop.org/show_bug.cgi?id=108590
  [fdo#108682]: https://bugs.freedesktop.org/show_bug.cgi?id=108682
  [fdo#108950]: https://bugs.freedesktop.org/show_bug.cgi?id=108950
  [fdo#109225]: https://bugs.freedesktop.org/show_bug.cgi?id=109225
  [fdo#99912]: https://bugs.freedesktop.org/show_bug.cgi?id=99912


Participating hosts (7 -> 7)
------------------------------

  No changes in participating hosts


Build changes
-------------

    * Linux: CI_DRM_5379 -> Patchwork_11256

  CI_DRM_5379: cae04ddb2142baf68abdf7855e28c00a34b721b7 @ git://anongit.freedesktop.org/gfx-ci/linux
  IGT_4756: 75081c6bfb9998bd7cbf35a7ac0578c683fe55a8 @ git://anongit.freedesktop.org/xorg/app/intel-gpu-tools
  Patchwork_11256: 625fca5d7b43de1374e10cebeffd5a354cb57180 @ git://anongit.freedesktop.org/gfx-ci/linux
  piglit_4509: fdc5a4ca11124ab8413c7988896eec4c97336694 @ git://anongit.freedesktop.org/piglit

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_11256/
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH v24 5/6] drm/i915: Expose RPCS (SSEU) configuration to userspace (Gen11 only)
  2019-01-08 16:29   ` [PATCH v24 " Tvrtko Ursulin
@ 2019-01-11 14:22     ` Tvrtko Ursulin
  2019-01-14  9:39     ` [PATCH v25 " Tvrtko Ursulin
  1 sibling, 0 replies; 25+ messages in thread
From: Tvrtko Ursulin @ 2019-01-11 14:22 UTC (permalink / raw)
  To: Intel-gfx


On 08/01/2019 16:29, Tvrtko Ursulin wrote:
> From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> 
> We want to allow userspace to reconfigure the subslice configuration on a
> per context basis.
> 
> This is required for the functional requirement of shutting down non-VME
> enabled sub-slices on Gen11 parts.
> 
> To do so, we expose a context parameter to allow adjustment of the RPCS
> register stored within the context image (and currently not accessible via
> LRI).
> 
> If the context is adjusted before first use or whilst idle, the adjustment
> is for "free"; otherwise if the context is active we queue a request to do
> so (using the kernel context), following all other activity by that
> context, which is also marked as barrier for all following submission
> against the same context.
> 
> Since the overhead of device re-configuration during context switching can
> be significant, especially in multi-context workloads, we limit this new
> uAPI to only support the Gen11 VME use case. In this use case either the
> device is fully enabled, and exactly one slice and half of the subslices
> are enabled.
> 
> Example usage:
> 
> 	struct drm_i915_gem_context_param_sseu sseu = { };
> 	struct drm_i915_gem_context_param arg =
> 		{ .param = I915_CONTEXT_PARAM_SSEU,
> 		  .ctx_id = gem_context_create(fd),
> 		  .size = sizeof(sseu),
> 		  .value = to_user_pointer(&sseu)
> 		};
> 
> 	/* Query device defaults. */
> 	gem_context_get_param(fd, &arg);
> 
> 	/* Set VME configuration on a 1x6x8 part. */
> 	sseu.slice_mask = 0x1;
> 	sseu.subslice_mask = 0xe0;
> 	gem_context_set_param(fd, &arg);
> 
> v2: Fix offset of CTX_R_PWR_CLK_STATE in intel_lr_context_set_sseu() (Lionel)
> 
> v3: Add ability to program this per engine (Chris)
> 
> v4: Move most get_sseu() into i915_gem_context.c (Lionel)
> 
> v5: Validate sseu configuration against the device's capabilities (Lionel)
> 
> v6: Change context powergating settings through MI_SDM on kernel context (Chris)
> 
> v7: Synchronize the requests following a powergating setting change using a global
>      dependency (Chris)
>      Iterate timelines through dev_priv.gt.active_rings (Tvrtko)
>      Disable RPCS configuration setting for non capable users (Lionel/Tvrtko)
> 
> v8: s/union intel_sseu/struct intel_sseu/ (Lionel)
>      s/dev_priv/i915/ (Tvrtko)
>      Change uapi class/instance fields to u16 (Tvrtko)
>      Bump mask fields to 64bits (Lionel)
>      Don't return EPERM when dynamic sseu is disabled (Tvrtko)
> 
> v9: Import context image into kernel context's ppgtt only when
>      reconfiguring powergated slice/subslices (Chris)
>      Use aliasing ppgtt when needed (Michel)
> 
> Tvrtko Ursulin:
> 
> v10:
>   * Update for upstream changes.
>   * Request submit needs a RPM reference.
>   * Reject on !FULL_PPGTT for simplicity.
>   * Pull out get/set param to helpers for readability and less indent.
>   * Use i915_request_await_dma_fence in add_global_barrier to skip waits
>     on the same timeline and avoid GEM_BUG_ON.
>   * No need to explicitly assign a NULL pointer to engine in legacy mode.
>   * No need to move gen8_make_rpcs up.
>   * Factored out global barrier as prep patch.
>   * Allow to only CAP_SYS_ADMIN if !Gen11.
> 
> v11:
>   * Remove engine vfunc in favour of local helper. (Chris Wilson)
>   * Stop retiring requests before updates since it is not needed
>     (Chris Wilson)
>   * Implement direct CPU update path for idle contexts. (Chris Wilson)
>   * Left side dependency needs only be on the same context timeline.
>     (Chris Wilson)
>   * It is sufficient to order the timeline. (Chris Wilson)
>   * Reject !RCS configuration attempts with -ENODEV for now.
> 
> v12:
>   * Rebase for make_rpcs.
> 
> v13:
>   * Centralize SSEU normalization to make_rpcs.
>   * Type width checking (uAPI <-> implementation).
>   * Gen11 restrictions uAPI checks.
>   * Gen11 subslice count differences handling.
>   Chris Wilson:
>   * args->size handling fixes.
>   * Update context image from GGTT.
>   * Postpone context image update to pinning.
>   * Use i915_gem_active_raw instead of last_request_on_engine.
> 
> v14:
>   * Add activity tracker on intel_context to fix the lifetime issues
>     and simplify the code. (Chris Wilson)
> 
> v15:
>   * Fix context pin leak if no space in ring by simplifying the
>     context pinning sequence.
> 
> v16:
>   * Rebase for context get/set param locking changes.
>   * Just -ENODEV on !Gen11. (Joonas)
> 
> v17:
>   * Fix one Gen11 subslice enablement rule.
>   * Handle error from i915_sw_fence_await_sw_fence_gfp. (Chris Wilson)
> 
> v18:
>   * Update commit message. (Joonas)
>   * Restrict uAPI to VME use case. (Joonas)
> 
> v19:
>   * Rebase.
> 
> v20:
>   * Rebase for ce->active_tracker.
> 
> v21:
>   * Rebase for IS_GEN changes.
> 
> v22:
>   * Reserve uAPI for flags straight away. (Chris Wilson)
> 
> v23:
>   * Rebase for RUNTIME_INFO.
> 
> v24:
>   * Added some headline docs for the uapi usage. (Joonas/Chris)
> 
> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=100899
> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=107634
> Issue: https://github.com/intel/media-driver/issues/267
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
> Cc: Dmitry Rogozhkin <dmitry.v.rogozhkin@intel.com>
> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> Cc: Zhipeng Gong <zhipeng.gong@intel.com>
> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk> # v21
> Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
> ---
>   drivers/gpu/drm/i915/i915_gem_context.c | 341 +++++++++++++++++++++++-
>   drivers/gpu/drm/i915/i915_gem_context.h |   6 +
>   drivers/gpu/drm/i915/intel_lrc.c        |   4 +-
>   include/uapi/drm/i915_drm.h             |  64 +++++
>   4 files changed, 412 insertions(+), 3 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
> index a565643e9a26..1ab7d6980c36 100644
> --- a/drivers/gpu/drm/i915/i915_gem_context.c
> +++ b/drivers/gpu/drm/i915/i915_gem_context.c
> @@ -90,6 +90,7 @@
>   #include <drm/i915_drm.h>
>   #include "i915_drv.h"
>   #include "i915_trace.h"
> +#include "intel_lrc_reg.h"
>   #include "intel_workarounds.h"
>   
>   #define ALL_L3_SLICES(dev) (1 << NUM_L3_SLICES(dev)) - 1
> @@ -322,6 +323,15 @@ static u32 default_desc_template(const struct drm_i915_private *i915,
>   	return desc;
>   }
>   
> +static void intel_context_retire(struct i915_gem_active *active,
> +				 struct i915_request *rq)
> +{
> +	struct intel_context *ce =
> +		container_of(active, typeof(*ce), active_tracker);
> +
> +	intel_context_unpin(ce);
> +}
> +
>   static struct i915_gem_context *
>   __create_hw_context(struct drm_i915_private *dev_priv,
>   		    struct drm_i915_file_private *file_priv)
> @@ -345,6 +355,8 @@ __create_hw_context(struct drm_i915_private *dev_priv,
>   		ce->gem_context = ctx;
>   		/* Use the whole device by default */
>   		ce->sseu = intel_device_default_sseu(dev_priv);
> +
> +		init_request_active(&ce->active_tracker, intel_context_retire);
>   	}
>   
>   	INIT_RADIX_TREE(&ctx->handles_vma, GFP_KERNEL);
> @@ -842,6 +854,56 @@ int i915_gem_context_destroy_ioctl(struct drm_device *dev, void *data,
>   	return 0;
>   }
>   
> +static int get_sseu(struct i915_gem_context *ctx,
> +		    struct drm_i915_gem_context_param *args)
> +{
> +	struct drm_i915_gem_context_param_sseu user_sseu;
> +	struct intel_engine_cs *engine;
> +	struct intel_context *ce;
> +	int ret;
> +
> +	if (args->size == 0)
> +		goto out;
> +	else if (args->size < sizeof(user_sseu))
> +		return -EINVAL;
> +
> +	if (copy_from_user(&user_sseu, u64_to_user_ptr(args->value),
> +			   sizeof(user_sseu)))
> +		return -EFAULT;
> +
> +	if (user_sseu.flags || user_sseu.rsvd)
> +		return -EINVAL;
> +
> +	engine = intel_engine_lookup_user(ctx->i915,
> +					  user_sseu.class,
> +					  user_sseu.instance);
> +	if (!engine)
> +		return -EINVAL;
> +
> +	/* Only use for mutex here is to serialize get_param and set_param. */
> +	ret = mutex_lock_interruptible(&ctx->i915->drm.struct_mutex);
> +	if (ret)
> +		return ret;
> +
> +	ce = to_intel_context(ctx, engine);
> +
> +	user_sseu.slice_mask = ce->sseu.slice_mask;
> +	user_sseu.subslice_mask = ce->sseu.subslice_mask;
> +	user_sseu.min_eus_per_subslice = ce->sseu.min_eus_per_subslice;
> +	user_sseu.max_eus_per_subslice = ce->sseu.max_eus_per_subslice;
> +
> +	mutex_unlock(&ctx->i915->drm.struct_mutex);
> +
> +	if (copy_to_user(u64_to_user_ptr(args->value), &user_sseu,
> +			 sizeof(user_sseu)))
> +		return -EFAULT;
> +
> +out:
> +	args->size = sizeof(user_sseu);
> +
> +	return 0;
> +}
> +
>   int i915_gem_context_getparam_ioctl(struct drm_device *dev, void *data,
>   				    struct drm_file *file)
>   {
> @@ -854,15 +916,17 @@ int i915_gem_context_getparam_ioctl(struct drm_device *dev, void *data,
>   	if (!ctx)
>   		return -ENOENT;
>   
> -	args->size = 0;
>   	switch (args->param) {
>   	case I915_CONTEXT_PARAM_BAN_PERIOD:
>   		ret = -EINVAL;
>   		break;
>   	case I915_CONTEXT_PARAM_NO_ZEROMAP:
> +		args->size = 0;
>   		args->value = test_bit(UCONTEXT_NO_ZEROMAP, &ctx->user_flags);
>   		break;
>   	case I915_CONTEXT_PARAM_GTT_SIZE:
> +		args->size = 0;
> +
>   		if (ctx->ppgtt)
>   			args->value = ctx->ppgtt->vm.total;
>   		else if (to_i915(dev)->mm.aliasing_ppgtt)
> @@ -871,14 +935,20 @@ int i915_gem_context_getparam_ioctl(struct drm_device *dev, void *data,
>   			args->value = to_i915(dev)->ggtt.vm.total;
>   		break;
>   	case I915_CONTEXT_PARAM_NO_ERROR_CAPTURE:
> +		args->size = 0;
>   		args->value = i915_gem_context_no_error_capture(ctx);
>   		break;
>   	case I915_CONTEXT_PARAM_BANNABLE:
> +		args->size = 0;
>   		args->value = i915_gem_context_is_bannable(ctx);
>   		break;
>   	case I915_CONTEXT_PARAM_PRIORITY:
> +		args->size = 0;
>   		args->value = ctx->sched.priority >> I915_USER_PRIORITY_SHIFT;
>   		break;
> +	case I915_CONTEXT_PARAM_SSEU:
> +		ret = get_sseu(ctx, args);
> +		break;
>   	default:
>   		ret = -EINVAL;
>   		break;
> @@ -888,6 +958,271 @@ int i915_gem_context_getparam_ioctl(struct drm_device *dev, void *data,
>   	return ret;
>   }
>   
> +static int gen8_emit_rpcs_config(struct i915_request *rq,
> +				 struct intel_context *ce,
> +				 struct intel_sseu sseu)
> +{
> +	u64 offset;
> +	u32 *cs;
> +
> +	cs = intel_ring_begin(rq, 4);
> +	if (IS_ERR(cs))
> +		return PTR_ERR(cs);
> +
> +	offset = ce->state->node.start +
> +		LRC_STATE_PN * PAGE_SIZE +
> +		(CTX_R_PWR_CLK_STATE + 1) * 4;
> +
> +	*cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
> +	*cs++ = lower_32_bits(offset);
> +	*cs++ = upper_32_bits(offset);
> +	*cs++ = gen8_make_rpcs(rq->i915, &sseu);
> +
> +	intel_ring_advance(rq, cs);
> +
> +	return 0;
> +}
> +
> +static int
> +gen8_modify_rpcs_gpu(struct intel_context *ce,
> +		     struct intel_engine_cs *engine,
> +		     struct intel_sseu sseu)
> +{
> +	struct drm_i915_private *i915 = engine->i915;
> +	struct i915_request *rq, *prev;
> +	int ret;
> +
> +	GEM_BUG_ON(!ce->pin_count);
> +
> +	lockdep_assert_held(&i915->drm.struct_mutex);
> +
> +	/* Submitting requests etc needs the hw awake. */
> +	intel_runtime_pm_get(i915);
> +
> +	rq = i915_request_alloc(engine, i915->kernel_context);
> +	if (IS_ERR(rq)) {
> +		ret = PTR_ERR(rq);
> +		goto out_put;
> +	}
> +
> +	/* Queue this switch after all other activity by this context. */
> +	prev = i915_gem_active_raw(&ce->ring->timeline->last_request,
> +				   &i915->drm.struct_mutex);
> +	if (prev && !i915_request_completed(prev)) {
> +		ret = i915_sw_fence_await_sw_fence_gfp(&rq->submit,
> +						       &prev->submit,
> +						       I915_FENCE_GFP);
> +		if (ret < 0)
> +			goto out_add;
> +	}
> +
> +	ret = gen8_emit_rpcs_config(rq, ce, sseu);
> +	if (ret)
> +		goto out_add;
> +
> +	/* Order all following requests to be after. */
> +	i915_timeline_set_barrier(ce->ring->timeline, rq);
> +
> +	/*
> +	 * Guarantee context image and the timeline remains pinned until the
> +	 * modifying request is retired by setting the ce activity tracker.
> +	 *
> +	 * But we only need to take one pin on the account of it. Or in other
> +	 * words transfer the pinned ce object to tracked active request.
> +	 */
> +	if (!i915_gem_active_isset(&ce->active_tracker))
> +		__intel_context_pin(ce);
> +	i915_gem_active_set(&ce->active_tracker, rq);
> +
> +out_add:
> +	i915_request_add(rq);
> +out_put:
> +	intel_runtime_pm_put(i915);
> +
> +	return ret;
> +}
> +
> +static int
> +i915_gem_context_reconfigure_sseu(struct i915_gem_context *ctx,
> +				  struct intel_engine_cs *engine,
> +				  struct intel_sseu sseu)
> +{
> +	struct intel_context *ce = to_intel_context(ctx, engine);
> +	int ret;
> +
> +	GEM_BUG_ON(INTEL_GEN(ctx->i915) < 8);
> +	GEM_BUG_ON(engine->id != RCS);
> +
> +	ret = mutex_lock_interruptible(&ctx->i915->drm.struct_mutex);
> +	if (ret)
> +		return ret;
> +
> +	/* Nothing to do if unmodified. */
> +	if (!memcmp(&ce->sseu, &sseu, sizeof(sseu)))
> +		goto out;
> +
> +	/*
> +	 * If context is not idle we have to submit an ordered request to modify
> +	 * its context image via the kernel context. Pristine and idle contexts
> +	 * will be configured on pinning.
> +	 */
> +	if (ce->pin_count)
> +		ret = gen8_modify_rpcs_gpu(ce, engine, sseu);
> +
> +	if (!ret)
> +		ce->sseu = sseu;
> +
> +out:
> +	mutex_unlock(&ctx->i915->drm.struct_mutex);
> +
> +	return ret;
> +}
> +
> +static int
> +user_to_context_sseu(struct drm_i915_private *i915,
> +		     const struct drm_i915_gem_context_param_sseu *user,
> +		     struct intel_sseu *context)
> +{
> +	const struct sseu_dev_info *device = &RUNTIME_INFO(i915)->sseu;
> +
> +	/* No zeros in any field. */
> +	if (!user->slice_mask || !user->subslice_mask ||
> +	    !user->min_eus_per_subslice || !user->max_eus_per_subslice)
> +		return -EINVAL;
> +
> +	/* Max > min. */
> +	if (user->max_eus_per_subslice < user->min_eus_per_subslice)
> +		return -EINVAL;
> +
> +	/* Check validity against hardware. */
> +	if (user->slice_mask & ~device->slice_mask)
> +		return -EINVAL;
> +
> +	if (user->subslice_mask & ~device->subslice_mask[0])
> +		return -EINVAL;
> +
> +	if (user->max_eus_per_subslice > device->max_eus_per_subslice)
> +		return -EINVAL;
> +
> +	/*
> +	 * Some future proofing on the types since the uAPI is wider than the
> +	 * current internal implementation.
> +	 */
> +	if (WARN_ON((fls(user->slice_mask) >
> +		     sizeof(context->slice_mask) * BITS_PER_BYTE) ||
> +		    (fls(user->subslice_mask) >
> +		     sizeof(context->subslice_mask) * BITS_PER_BYTE) ||
> +		    overflows_type(user->min_eus_per_subslice,
> +				   context->min_eus_per_subslice) ||
> +		    overflows_type(user->max_eus_per_subslice,
> +				   context->max_eus_per_subslice)))
> +		return -EINVAL;
> +
> +	context->slice_mask = user->slice_mask;
> +	context->subslice_mask = user->subslice_mask;
> +	context->min_eus_per_subslice = user->min_eus_per_subslice;
> +	context->max_eus_per_subslice = user->max_eus_per_subslice;
> +
> +	/* Part specific restrictions. */
> +	if (IS_GEN(i915, 11)) {
> +		unsigned int hw_s = hweight8(device->slice_mask);
> +		unsigned int hw_ss_per_s = hweight8(device->subslice_mask[0]);
> +		unsigned int req_s = hweight8(context->slice_mask);
> +		unsigned int req_ss = hweight8(context->subslice_mask);
> +
> +		/*
> +		 * Only full subslice enablement is possible if more than one
> +		 * slice is turned on.
> +		 */
> +		if (req_s > 1 && req_ss != hw_ss_per_s)
> +			return -EINVAL;
> +
> +		/*
> +		 * If more than four (SScount bitfield limit) subslices are
> +		 * requested then the number has to be even.
> +		 */
> +		if (req_ss > 4 && (req_ss & 1))
> +			return -EINVAL;
> +
> +		/*
> +		 * If only one slice is enabled and subslice count is below the
> +		 * device full enablement, it must be at most half of the all
> +		 * available subslices.
> +		 */
> +		if (req_s == 1 && req_ss < hw_ss_per_s &&
> +		    req_ss > (hw_ss_per_s / 2))
> +			return -EINVAL;
> +
> +		/* ABI restriction - VME use case only. */
> +
> +		/* All slices or one slice only. */
> +		if (req_s != 1 && req_s != hw_s)
> +			return -EINVAL;
> +
> +		/*
> +		 * Half subslices or full enablement only when one slice is
> +		 * enabled.
> +		 */
> +		if (req_s == 1 &&
> +		    (req_ss != hw_ss_per_s && req_ss != (hw_ss_per_s / 2)))
> +			return -EINVAL;
> +
> +		/* No EU configuration changes. */
> +		if ((user->min_eus_per_subslice !=
> +		     device->max_eus_per_subslice) ||
> +		    (user->max_eus_per_subslice !=
> +		     device->max_eus_per_subslice))
> +			return -EINVAL;
> +	}
> +
> +	return 0;
> +}
> +
> +static int set_sseu(struct i915_gem_context *ctx,
> +		    struct drm_i915_gem_context_param *args)
> +{
> +	struct drm_i915_private *i915 = ctx->i915;
> +	struct drm_i915_gem_context_param_sseu user_sseu;
> +	struct intel_engine_cs *engine;
> +	struct intel_sseu sseu;
> +	int ret;
> +
> +	if (args->size < sizeof(user_sseu))
> +		return -EINVAL;
> +
> +	if (!IS_GEN(i915, 11))
> +		return -ENODEV;
> +
> +	if (copy_from_user(&user_sseu, u64_to_user_ptr(args->value),
> +			   sizeof(user_sseu)))
> +		return -EFAULT;
> +
> +	if (user_sseu.flags || user_sseu.rsvd)
> +		return -EINVAL;
> +
> +	engine = intel_engine_lookup_user(i915,
> +					  user_sseu.class,
> +					  user_sseu.instance);
> +	if (!engine)
> +		return -EINVAL;
> +
> +	/* Only render engine supports RPCS configuration. */
> +	if (engine->class != RENDER_CLASS)
> +		return -ENODEV;
> +
> +	ret = user_to_context_sseu(i915, &user_sseu, &sseu);
> +	if (ret)
> +		return ret;
> +
> +	ret = i915_gem_context_reconfigure_sseu(ctx, engine, sseu);
> +	if (ret)
> +		return ret;
> +
> +	args->size = sizeof(user_sseu);
> +
> +	return 0;
> +}
> +
>   int i915_gem_context_setparam_ioctl(struct drm_device *dev, void *data,
>   				    struct drm_file *file)
>   {
> @@ -950,7 +1285,9 @@ int i915_gem_context_setparam_ioctl(struct drm_device *dev, void *data,
>   					I915_USER_PRIORITY(priority);
>   		}
>   		break;
> -
> +	case I915_CONTEXT_PARAM_SSEU:
> +		ret = set_sseu(ctx, args);
> +		break;
>   	default:
>   		ret = -EINVAL;
>   		break;
> diff --git a/drivers/gpu/drm/i915/i915_gem_context.h b/drivers/gpu/drm/i915/i915_gem_context.h
> index ef04e422cf9a..2ded4b8c9b9f 100644
> --- a/drivers/gpu/drm/i915/i915_gem_context.h
> +++ b/drivers/gpu/drm/i915/i915_gem_context.h
> @@ -171,6 +171,12 @@ struct i915_gem_context {
>   		u64 lrc_desc;
>   		int pin_count;
>   
> +		/**
> +		 * active_tracker: Active tracker for the external rq activity
> +		 * on this intel_context object.
> +		 */
> +		struct i915_gem_active active_tracker;
> +
>   		const struct intel_context_ops *ops;
>   
>   		/** sseu: Control eu/slice partitioning */
> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> index bca09a497f27..e9c6876a5897 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/intel_lrc.c
> @@ -2392,7 +2392,9 @@ u32 gen8_make_rpcs(struct drm_i915_private *i915, struct intel_sseu *req_sseu)
>   	 * subslices are enabled, or a count between one and four on the first
>   	 * slice.
>   	 */
> -	if (IS_GEN(i915, 11) && slices == 1 && subslices >= 4) {
> +	if (IS_GEN(i915, 11) &&
> +	    slices == 1 &&
> +	    subslices > min_t(u8, 4, hweight8(sseu->subslice_mask[0]) / 2)) {
>   		GEM_BUG_ON(subslices & 1);
>   
>   		subslice_pg = false;
> diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
> index 298b2e197744..55dcbbf54cb9 100644
> --- a/include/uapi/drm/i915_drm.h
> +++ b/include/uapi/drm/i915_drm.h
> @@ -1486,9 +1486,73 @@ struct drm_i915_gem_context_param {
>   #define   I915_CONTEXT_MAX_USER_PRIORITY	1023 /* inclusive */
>   #define   I915_CONTEXT_DEFAULT_PRIORITY		0
>   #define   I915_CONTEXT_MIN_USER_PRIORITY	-1023 /* inclusive */
> +	/*
> +	 * When using the following param, value should be a pointer to
> +	 * drm_i915_gem_context_param_sseu.
> +	 */
> +#define I915_CONTEXT_PARAM_SSEU		0x7
>   	__u64 value;
>   };
>   
> +/**
> + * Context SSEU programming
> + *
> + * It may be necessary for either functional or performance reason to configure
> + * a context to run with a reduced number of SSEU (where SSEU stands for Slice/
> + * Sub-slice/EU).
> + *
> + * This is done by configuring SSEU configuration using the below
> + * @struct drm_i915_gem_context_param_sseu for every supported engine which
> + * userspace intends to use.
> + *
> + * Not all GPUs or engines support this functionality in which case an error
> + * code -ENODEV will be returned.
> + *
> + * Also, flexibility of possible SSEU configuration permutations varies between
> + * GPU generations and software imposed limitations. Requesting such a
> + * combination will return an error code of -EINVAL.
> + *
> + * NOTE: When perf/OA is active the context's SSEU configuration is ignored in
> + * favour of a single global setting.
> + */
> +struct drm_i915_gem_context_param_sseu {
> +	/*
> +	 * Engine class & instance to be configured or queried.
> +	 */
> +	__u16 class;

I forgot about one request to rename this field to engine_class to avoid 
C++ reserved keyword.

Any complaints against a respin to this effect?

It then makes sense to rename instance to engine_instance as well.

Regards,

Tvrtko

> +	__u16 instance;
> +
> +	/*
> +	 * Unused for now. Must be cleared to zero.
> +	 */
> +	__u32 flags;
> +
> +	/*
> +	 * Mask of slices to enable for the context. Valid values are a subset
> +	 * of the bitmask value returned for I915_PARAM_SLICE_MASK.
> +	 */
> +	__u64 slice_mask;
> +
> +	/*
> +	 * Mask of subslices to enable for the context. Valid values are a
> +	 * subset of the bitmask value return by I915_PARAM_SUBSLICE_MASK.
> +	 */
> +	__u64 subslice_mask;
> +
> +	/*
> +	 * Minimum/Maximum number of EUs to enable per subslice for the
> +	 * context. min_eus_per_subslice must be inferior or equal to
> +	 * max_eus_per_subslice.
> +	 */
> +	__u16 min_eus_per_subslice;
> +	__u16 max_eus_per_subslice;
> +
> +	/*
> +	 * Unused for now. Must be cleared to zero.
> +	 */
> +	__u32 rsvd;
> +};
> +
>   enum drm_i915_oa_format {
>   	I915_OA_FORMAT_A13 = 1,	    /* HSW only */
>   	I915_OA_FORMAT_A29,	    /* HSW only */
> 
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 25+ messages in thread

* [PATCH v25 5/6] drm/i915: Expose RPCS (SSEU) configuration to userspace (Gen11 only)
  2019-01-08 16:29   ` [PATCH v24 " Tvrtko Ursulin
  2019-01-11 14:22     ` Tvrtko Ursulin
@ 2019-01-14  9:39     ` Tvrtko Ursulin
  1 sibling, 0 replies; 25+ messages in thread
From: Tvrtko Ursulin @ 2019-01-14  9:39 UTC (permalink / raw)
  To: Intel-gfx

From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

We want to allow userspace to reconfigure the subslice configuration on a
per context basis.

This is required for the functional requirement of shutting down non-VME
enabled sub-slices on Gen11 parts.

To do so, we expose a context parameter to allow adjustment of the RPCS
register stored within the context image (and currently not accessible via
LRI).

If the context is adjusted before first use or whilst idle, the adjustment
is for "free"; otherwise if the context is active we queue a request to do
so (using the kernel context), following all other activity by that
context, which is also marked as barrier for all following submission
against the same context.

Since the overhead of device re-configuration during context switching can
be significant, especially in multi-context workloads, we limit this new
uAPI to only support the Gen11 VME use case. In this use case either the
device is fully enabled, and exactly one slice and half of the subslices
are enabled.

Example usage:

	struct drm_i915_gem_context_param_sseu sseu = { };
	struct drm_i915_gem_context_param arg =
		{ .param = I915_CONTEXT_PARAM_SSEU,
		  .ctx_id = gem_context_create(fd),
		  .size = sizeof(sseu),
		  .value = to_user_pointer(&sseu)
		};

	/* Query device defaults. */
	gem_context_get_param(fd, &arg);

	/* Set VME configuration on a 1x6x8 part. */
	sseu.slice_mask = 0x1;
	sseu.subslice_mask = 0xe0;
	gem_context_set_param(fd, &arg);

v2: Fix offset of CTX_R_PWR_CLK_STATE in intel_lr_context_set_sseu() (Lionel)

v3: Add ability to program this per engine (Chris)

v4: Move most get_sseu() into i915_gem_context.c (Lionel)

v5: Validate sseu configuration against the device's capabilities (Lionel)

v6: Change context powergating settings through MI_SDM on kernel context (Chris)

v7: Synchronize the requests following a powergating setting change using a global
    dependency (Chris)
    Iterate timelines through dev_priv.gt.active_rings (Tvrtko)
    Disable RPCS configuration setting for non capable users (Lionel/Tvrtko)

v8: s/union intel_sseu/struct intel_sseu/ (Lionel)
    s/dev_priv/i915/ (Tvrtko)
    Change uapi class/instance fields to u16 (Tvrtko)
    Bump mask fields to 64bits (Lionel)
    Don't return EPERM when dynamic sseu is disabled (Tvrtko)

v9: Import context image into kernel context's ppgtt only when
    reconfiguring powergated slice/subslices (Chris)
    Use aliasing ppgtt when needed (Michel)

Tvrtko Ursulin:

v10:
 * Update for upstream changes.
 * Request submit needs a RPM reference.
 * Reject on !FULL_PPGTT for simplicity.
 * Pull out get/set param to helpers for readability and less indent.
 * Use i915_request_await_dma_fence in add_global_barrier to skip waits
   on the same timeline and avoid GEM_BUG_ON.
 * No need to explicitly assign a NULL pointer to engine in legacy mode.
 * No need to move gen8_make_rpcs up.
 * Factored out global barrier as prep patch.
 * Allow to only CAP_SYS_ADMIN if !Gen11.

v11:
 * Remove engine vfunc in favour of local helper. (Chris Wilson)
 * Stop retiring requests before updates since it is not needed
   (Chris Wilson)
 * Implement direct CPU update path for idle contexts. (Chris Wilson)
 * Left side dependency needs only be on the same context timeline.
   (Chris Wilson)
 * It is sufficient to order the timeline. (Chris Wilson)
 * Reject !RCS configuration attempts with -ENODEV for now.

v12:
 * Rebase for make_rpcs.

v13:
 * Centralize SSEU normalization to make_rpcs.
 * Type width checking (uAPI <-> implementation).
 * Gen11 restrictions uAPI checks.
 * Gen11 subslice count differences handling.
 Chris Wilson:
 * args->size handling fixes.
 * Update context image from GGTT.
 * Postpone context image update to pinning.
 * Use i915_gem_active_raw instead of last_request_on_engine.

v14:
 * Add activity tracker on intel_context to fix the lifetime issues
   and simplify the code. (Chris Wilson)

v15:
 * Fix context pin leak if no space in ring by simplifying the
   context pinning sequence.

v16:
 * Rebase for context get/set param locking changes.
 * Just -ENODEV on !Gen11. (Joonas)

v17:
 * Fix one Gen11 subslice enablement rule.
 * Handle error from i915_sw_fence_await_sw_fence_gfp. (Chris Wilson)

v18:
 * Update commit message. (Joonas)
 * Restrict uAPI to VME use case. (Joonas)

v19:
 * Rebase.

v20:
 * Rebase for ce->active_tracker.

v21:
 * Rebase for IS_GEN changes.

v22:
 * Reserve uAPI for flags straight away. (Chris Wilson)

v23:
 * Rebase for RUNTIME_INFO.

v24:
 * Added some headline docs for the uapi usage. (Joonas/Chris)

v25:
 * Renamed class/instance to engine_class/engine_instance to avoid clash
   with C++ keyword. (Tony Ye)

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=100899
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=107634
Issue: https://github.com/intel/media-driver/issues/267
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Cc: Dmitry Rogozhkin <dmitry.v.rogozhkin@intel.com>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: Zhipeng Gong <zhipeng.gong@intel.com>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Cc: Tony Ye <tony.ye@intel.com>
Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk> # v21
Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
---
 drivers/gpu/drm/i915/i915_gem_context.c | 341 +++++++++++++++++++++++-
 drivers/gpu/drm/i915/i915_gem_context.h |   6 +
 drivers/gpu/drm/i915/intel_lrc.c        |   4 +-
 include/uapi/drm/i915_drm.h             |  64 +++++
 4 files changed, 412 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
index a565643e9a26..ccb89384f247 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -90,6 +90,7 @@
 #include <drm/i915_drm.h>
 #include "i915_drv.h"
 #include "i915_trace.h"
+#include "intel_lrc_reg.h"
 #include "intel_workarounds.h"
 
 #define ALL_L3_SLICES(dev) (1 << NUM_L3_SLICES(dev)) - 1
@@ -322,6 +323,15 @@ static u32 default_desc_template(const struct drm_i915_private *i915,
 	return desc;
 }
 
+static void intel_context_retire(struct i915_gem_active *active,
+				 struct i915_request *rq)
+{
+	struct intel_context *ce =
+		container_of(active, typeof(*ce), active_tracker);
+
+	intel_context_unpin(ce);
+}
+
 static struct i915_gem_context *
 __create_hw_context(struct drm_i915_private *dev_priv,
 		    struct drm_i915_file_private *file_priv)
@@ -345,6 +355,8 @@ __create_hw_context(struct drm_i915_private *dev_priv,
 		ce->gem_context = ctx;
 		/* Use the whole device by default */
 		ce->sseu = intel_device_default_sseu(dev_priv);
+
+		init_request_active(&ce->active_tracker, intel_context_retire);
 	}
 
 	INIT_RADIX_TREE(&ctx->handles_vma, GFP_KERNEL);
@@ -842,6 +854,56 @@ int i915_gem_context_destroy_ioctl(struct drm_device *dev, void *data,
 	return 0;
 }
 
+static int get_sseu(struct i915_gem_context *ctx,
+		    struct drm_i915_gem_context_param *args)
+{
+	struct drm_i915_gem_context_param_sseu user_sseu;
+	struct intel_engine_cs *engine;
+	struct intel_context *ce;
+	int ret;
+
+	if (args->size == 0)
+		goto out;
+	else if (args->size < sizeof(user_sseu))
+		return -EINVAL;
+
+	if (copy_from_user(&user_sseu, u64_to_user_ptr(args->value),
+			   sizeof(user_sseu)))
+		return -EFAULT;
+
+	if (user_sseu.flags || user_sseu.rsvd)
+		return -EINVAL;
+
+	engine = intel_engine_lookup_user(ctx->i915,
+					  user_sseu.engine_class,
+					  user_sseu.engine_instance);
+	if (!engine)
+		return -EINVAL;
+
+	/* Only use for mutex here is to serialize get_param and set_param. */
+	ret = mutex_lock_interruptible(&ctx->i915->drm.struct_mutex);
+	if (ret)
+		return ret;
+
+	ce = to_intel_context(ctx, engine);
+
+	user_sseu.slice_mask = ce->sseu.slice_mask;
+	user_sseu.subslice_mask = ce->sseu.subslice_mask;
+	user_sseu.min_eus_per_subslice = ce->sseu.min_eus_per_subslice;
+	user_sseu.max_eus_per_subslice = ce->sseu.max_eus_per_subslice;
+
+	mutex_unlock(&ctx->i915->drm.struct_mutex);
+
+	if (copy_to_user(u64_to_user_ptr(args->value), &user_sseu,
+			 sizeof(user_sseu)))
+		return -EFAULT;
+
+out:
+	args->size = sizeof(user_sseu);
+
+	return 0;
+}
+
 int i915_gem_context_getparam_ioctl(struct drm_device *dev, void *data,
 				    struct drm_file *file)
 {
@@ -854,15 +916,17 @@ int i915_gem_context_getparam_ioctl(struct drm_device *dev, void *data,
 	if (!ctx)
 		return -ENOENT;
 
-	args->size = 0;
 	switch (args->param) {
 	case I915_CONTEXT_PARAM_BAN_PERIOD:
 		ret = -EINVAL;
 		break;
 	case I915_CONTEXT_PARAM_NO_ZEROMAP:
+		args->size = 0;
 		args->value = test_bit(UCONTEXT_NO_ZEROMAP, &ctx->user_flags);
 		break;
 	case I915_CONTEXT_PARAM_GTT_SIZE:
+		args->size = 0;
+
 		if (ctx->ppgtt)
 			args->value = ctx->ppgtt->vm.total;
 		else if (to_i915(dev)->mm.aliasing_ppgtt)
@@ -871,14 +935,20 @@ int i915_gem_context_getparam_ioctl(struct drm_device *dev, void *data,
 			args->value = to_i915(dev)->ggtt.vm.total;
 		break;
 	case I915_CONTEXT_PARAM_NO_ERROR_CAPTURE:
+		args->size = 0;
 		args->value = i915_gem_context_no_error_capture(ctx);
 		break;
 	case I915_CONTEXT_PARAM_BANNABLE:
+		args->size = 0;
 		args->value = i915_gem_context_is_bannable(ctx);
 		break;
 	case I915_CONTEXT_PARAM_PRIORITY:
+		args->size = 0;
 		args->value = ctx->sched.priority >> I915_USER_PRIORITY_SHIFT;
 		break;
+	case I915_CONTEXT_PARAM_SSEU:
+		ret = get_sseu(ctx, args);
+		break;
 	default:
 		ret = -EINVAL;
 		break;
@@ -888,6 +958,271 @@ int i915_gem_context_getparam_ioctl(struct drm_device *dev, void *data,
 	return ret;
 }
 
+static int gen8_emit_rpcs_config(struct i915_request *rq,
+				 struct intel_context *ce,
+				 struct intel_sseu sseu)
+{
+	u64 offset;
+	u32 *cs;
+
+	cs = intel_ring_begin(rq, 4);
+	if (IS_ERR(cs))
+		return PTR_ERR(cs);
+
+	offset = ce->state->node.start +
+		LRC_STATE_PN * PAGE_SIZE +
+		(CTX_R_PWR_CLK_STATE + 1) * 4;
+
+	*cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
+	*cs++ = lower_32_bits(offset);
+	*cs++ = upper_32_bits(offset);
+	*cs++ = gen8_make_rpcs(rq->i915, &sseu);
+
+	intel_ring_advance(rq, cs);
+
+	return 0;
+}
+
+static int
+gen8_modify_rpcs_gpu(struct intel_context *ce,
+		     struct intel_engine_cs *engine,
+		     struct intel_sseu sseu)
+{
+	struct drm_i915_private *i915 = engine->i915;
+	struct i915_request *rq, *prev;
+	int ret;
+
+	GEM_BUG_ON(!ce->pin_count);
+
+	lockdep_assert_held(&i915->drm.struct_mutex);
+
+	/* Submitting requests etc needs the hw awake. */
+	intel_runtime_pm_get(i915);
+
+	rq = i915_request_alloc(engine, i915->kernel_context);
+	if (IS_ERR(rq)) {
+		ret = PTR_ERR(rq);
+		goto out_put;
+	}
+
+	/* Queue this switch after all other activity by this context. */
+	prev = i915_gem_active_raw(&ce->ring->timeline->last_request,
+				   &i915->drm.struct_mutex);
+	if (prev && !i915_request_completed(prev)) {
+		ret = i915_sw_fence_await_sw_fence_gfp(&rq->submit,
+						       &prev->submit,
+						       I915_FENCE_GFP);
+		if (ret < 0)
+			goto out_add;
+	}
+
+	ret = gen8_emit_rpcs_config(rq, ce, sseu);
+	if (ret)
+		goto out_add;
+
+	/* Order all following requests to be after. */
+	i915_timeline_set_barrier(ce->ring->timeline, rq);
+
+	/*
+	 * Guarantee context image and the timeline remains pinned until the
+	 * modifying request is retired by setting the ce activity tracker.
+	 *
+	 * But we only need to take one pin on the account of it. Or in other
+	 * words transfer the pinned ce object to tracked active request.
+	 */
+	if (!i915_gem_active_isset(&ce->active_tracker))
+		__intel_context_pin(ce);
+	i915_gem_active_set(&ce->active_tracker, rq);
+
+out_add:
+	i915_request_add(rq);
+out_put:
+	intel_runtime_pm_put(i915);
+
+	return ret;
+}
+
+static int
+i915_gem_context_reconfigure_sseu(struct i915_gem_context *ctx,
+				  struct intel_engine_cs *engine,
+				  struct intel_sseu sseu)
+{
+	struct intel_context *ce = to_intel_context(ctx, engine);
+	int ret;
+
+	GEM_BUG_ON(INTEL_GEN(ctx->i915) < 8);
+	GEM_BUG_ON(engine->id != RCS);
+
+	ret = mutex_lock_interruptible(&ctx->i915->drm.struct_mutex);
+	if (ret)
+		return ret;
+
+	/* Nothing to do if unmodified. */
+	if (!memcmp(&ce->sseu, &sseu, sizeof(sseu)))
+		goto out;
+
+	/*
+	 * If context is not idle we have to submit an ordered request to modify
+	 * its context image via the kernel context. Pristine and idle contexts
+	 * will be configured on pinning.
+	 */
+	if (ce->pin_count)
+		ret = gen8_modify_rpcs_gpu(ce, engine, sseu);
+
+	if (!ret)
+		ce->sseu = sseu;
+
+out:
+	mutex_unlock(&ctx->i915->drm.struct_mutex);
+
+	return ret;
+}
+
+static int
+user_to_context_sseu(struct drm_i915_private *i915,
+		     const struct drm_i915_gem_context_param_sseu *user,
+		     struct intel_sseu *context)
+{
+	const struct sseu_dev_info *device = &RUNTIME_INFO(i915)->sseu;
+
+	/* No zeros in any field. */
+	if (!user->slice_mask || !user->subslice_mask ||
+	    !user->min_eus_per_subslice || !user->max_eus_per_subslice)
+		return -EINVAL;
+
+	/* Max > min. */
+	if (user->max_eus_per_subslice < user->min_eus_per_subslice)
+		return -EINVAL;
+
+	/* Check validity against hardware. */
+	if (user->slice_mask & ~device->slice_mask)
+		return -EINVAL;
+
+	if (user->subslice_mask & ~device->subslice_mask[0])
+		return -EINVAL;
+
+	if (user->max_eus_per_subslice > device->max_eus_per_subslice)
+		return -EINVAL;
+
+	/*
+	 * Some future proofing on the types since the uAPI is wider than the
+	 * current internal implementation.
+	 */
+	if (WARN_ON((fls(user->slice_mask) >
+		     sizeof(context->slice_mask) * BITS_PER_BYTE) ||
+		    (fls(user->subslice_mask) >
+		     sizeof(context->subslice_mask) * BITS_PER_BYTE) ||
+		    overflows_type(user->min_eus_per_subslice,
+				   context->min_eus_per_subslice) ||
+		    overflows_type(user->max_eus_per_subslice,
+				   context->max_eus_per_subslice)))
+		return -EINVAL;
+
+	context->slice_mask = user->slice_mask;
+	context->subslice_mask = user->subslice_mask;
+	context->min_eus_per_subslice = user->min_eus_per_subslice;
+	context->max_eus_per_subslice = user->max_eus_per_subslice;
+
+	/* Part specific restrictions. */
+	if (IS_GEN(i915, 11)) {
+		unsigned int hw_s = hweight8(device->slice_mask);
+		unsigned int hw_ss_per_s = hweight8(device->subslice_mask[0]);
+		unsigned int req_s = hweight8(context->slice_mask);
+		unsigned int req_ss = hweight8(context->subslice_mask);
+
+		/*
+		 * Only full subslice enablement is possible if more than one
+		 * slice is turned on.
+		 */
+		if (req_s > 1 && req_ss != hw_ss_per_s)
+			return -EINVAL;
+
+		/*
+		 * If more than four (SScount bitfield limit) subslices are
+		 * requested then the number has to be even.
+		 */
+		if (req_ss > 4 && (req_ss & 1))
+			return -EINVAL;
+
+		/*
+		 * If only one slice is enabled and subslice count is below the
+		 * device full enablement, it must be at most half of the all
+		 * available subslices.
+		 */
+		if (req_s == 1 && req_ss < hw_ss_per_s &&
+		    req_ss > (hw_ss_per_s / 2))
+			return -EINVAL;
+
+		/* ABI restriction - VME use case only. */
+
+		/* All slices or one slice only. */
+		if (req_s != 1 && req_s != hw_s)
+			return -EINVAL;
+
+		/*
+		 * Half subslices or full enablement only when one slice is
+		 * enabled.
+		 */
+		if (req_s == 1 &&
+		    (req_ss != hw_ss_per_s && req_ss != (hw_ss_per_s / 2)))
+			return -EINVAL;
+
+		/* No EU configuration changes. */
+		if ((user->min_eus_per_subslice !=
+		     device->max_eus_per_subslice) ||
+		    (user->max_eus_per_subslice !=
+		     device->max_eus_per_subslice))
+			return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int set_sseu(struct i915_gem_context *ctx,
+		    struct drm_i915_gem_context_param *args)
+{
+	struct drm_i915_private *i915 = ctx->i915;
+	struct drm_i915_gem_context_param_sseu user_sseu;
+	struct intel_engine_cs *engine;
+	struct intel_sseu sseu;
+	int ret;
+
+	if (args->size < sizeof(user_sseu))
+		return -EINVAL;
+
+	if (!IS_GEN(i915, 11))
+		return -ENODEV;
+
+	if (copy_from_user(&user_sseu, u64_to_user_ptr(args->value),
+			   sizeof(user_sseu)))
+		return -EFAULT;
+
+	if (user_sseu.flags || user_sseu.rsvd)
+		return -EINVAL;
+
+	engine = intel_engine_lookup_user(i915,
+					  user_sseu.engine_class,
+					  user_sseu.engine_instance);
+	if (!engine)
+		return -EINVAL;
+
+	/* Only render engine supports RPCS configuration. */
+	if (engine->class != RENDER_CLASS)
+		return -ENODEV;
+
+	ret = user_to_context_sseu(i915, &user_sseu, &sseu);
+	if (ret)
+		return ret;
+
+	ret = i915_gem_context_reconfigure_sseu(ctx, engine, sseu);
+	if (ret)
+		return ret;
+
+	args->size = sizeof(user_sseu);
+
+	return 0;
+}
+
 int i915_gem_context_setparam_ioctl(struct drm_device *dev, void *data,
 				    struct drm_file *file)
 {
@@ -950,7 +1285,9 @@ int i915_gem_context_setparam_ioctl(struct drm_device *dev, void *data,
 					I915_USER_PRIORITY(priority);
 		}
 		break;
-
+	case I915_CONTEXT_PARAM_SSEU:
+		ret = set_sseu(ctx, args);
+		break;
 	default:
 		ret = -EINVAL;
 		break;
diff --git a/drivers/gpu/drm/i915/i915_gem_context.h b/drivers/gpu/drm/i915/i915_gem_context.h
index ef04e422cf9a..2ded4b8c9b9f 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.h
+++ b/drivers/gpu/drm/i915/i915_gem_context.h
@@ -171,6 +171,12 @@ struct i915_gem_context {
 		u64 lrc_desc;
 		int pin_count;
 
+		/**
+		 * active_tracker: Active tracker for the external rq activity
+		 * on this intel_context object.
+		 */
+		struct i915_gem_active active_tracker;
+
 		const struct intel_context_ops *ops;
 
 		/** sseu: Control eu/slice partitioning */
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index bca09a497f27..e9c6876a5897 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -2392,7 +2392,9 @@ u32 gen8_make_rpcs(struct drm_i915_private *i915, struct intel_sseu *req_sseu)
 	 * subslices are enabled, or a count between one and four on the first
 	 * slice.
 	 */
-	if (IS_GEN(i915, 11) && slices == 1 && subslices >= 4) {
+	if (IS_GEN(i915, 11) &&
+	    slices == 1 &&
+	    subslices > min_t(u8, 4, hweight8(sseu->subslice_mask[0]) / 2)) {
 		GEM_BUG_ON(subslices & 1);
 
 		subslice_pg = false;
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index 298b2e197744..397810fa2d33 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -1486,9 +1486,73 @@ struct drm_i915_gem_context_param {
 #define   I915_CONTEXT_MAX_USER_PRIORITY	1023 /* inclusive */
 #define   I915_CONTEXT_DEFAULT_PRIORITY		0
 #define   I915_CONTEXT_MIN_USER_PRIORITY	-1023 /* inclusive */
+	/*
+	 * When using the following param, value should be a pointer to
+	 * drm_i915_gem_context_param_sseu.
+	 */
+#define I915_CONTEXT_PARAM_SSEU		0x7
 	__u64 value;
 };
 
+/**
+ * Context SSEU programming
+ *
+ * It may be necessary for either functional or performance reason to configure
+ * a context to run with a reduced number of SSEU (where SSEU stands for Slice/
+ * Sub-slice/EU).
+ *
+ * This is done by configuring SSEU configuration using the below
+ * @struct drm_i915_gem_context_param_sseu for every supported engine which
+ * userspace intends to use.
+ *
+ * Not all GPUs or engines support this functionality in which case an error
+ * code -ENODEV will be returned.
+ *
+ * Also, flexibility of possible SSEU configuration permutations varies between
+ * GPU generations and software imposed limitations. Requesting such a
+ * combination will return an error code of -EINVAL.
+ *
+ * NOTE: When perf/OA is active the context's SSEU configuration is ignored in
+ * favour of a single global setting.
+ */
+struct drm_i915_gem_context_param_sseu {
+	/*
+	 * Engine class & instance to be configured or queried.
+	 */
+	__u16 engine_class;
+	__u16 engine_instance;
+
+	/*
+	 * Unused for now. Must be cleared to zero.
+	 */
+	__u32 flags;
+
+	/*
+	 * Mask of slices to enable for the context. Valid values are a subset
+	 * of the bitmask value returned for I915_PARAM_SLICE_MASK.
+	 */
+	__u64 slice_mask;
+
+	/*
+	 * Mask of subslices to enable for the context. Valid values are a
+	 * subset of the bitmask value return by I915_PARAM_SUBSLICE_MASK.
+	 */
+	__u64 subslice_mask;
+
+	/*
+	 * Minimum/Maximum number of EUs to enable per subslice for the
+	 * context. min_eus_per_subslice must be inferior or equal to
+	 * max_eus_per_subslice.
+	 */
+	__u16 min_eus_per_subslice;
+	__u16 max_eus_per_subslice;
+
+	/*
+	 * Unused for now. Must be cleared to zero.
+	 */
+	__u32 rsvd;
+};
+
 enum drm_i915_oa_format {
 	I915_OA_FORMAT_A13 = 1,	    /* HSW only */
 	I915_OA_FORMAT_A29,	    /* HSW only */
-- 
2.19.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 25+ messages in thread

* ✗ Fi.CI.BAT: failure for Per context dynamic (sub)slice power-gating (rev13)
  2019-01-08 15:12 [PATCH 0/6] Per context dynamic (sub)slice power-gating Tvrtko Ursulin
                   ` (12 preceding siblings ...)
  2019-01-09  8:12 ` ✗ Fi.CI.IGT: failure " Patchwork
@ 2019-01-14 12:32 ` Patchwork
  13 siblings, 0 replies; 25+ messages in thread
From: Patchwork @ 2019-01-14 12:32 UTC (permalink / raw)
  To: Tvrtko Ursulin; +Cc: intel-gfx

== Series Details ==

Series: Per context dynamic (sub)slice power-gating (rev13)
URL   : https://patchwork.freedesktop.org/series/48194/
State : failure

== Summary ==

Applying: drm/i915/execlists: Move RPCS setup to context pin
Applying: drm/i915: Record the sseu configuration per-context & engine
Applying: drm/i915/perf: lock powergating configuration to default when active
Applying: drm/i915: Add timeline barrier support
Using index info to reconstruct a base tree...
M	drivers/gpu/drm/i915/i915_request.c
Falling back to patching base and 3-way merge...
Auto-merging drivers/gpu/drm/i915/i915_request.c
CONFLICT (content): Merge conflict in drivers/gpu/drm/i915/i915_request.c
error: Failed to merge in the changes.
hint: Use 'git am --show-current-patch' to see the failed patch
Patch failed at 0004 drm/i915: Add timeline barrier support
When you have resolved this problem, run "git am --continue".
If you prefer to skip this patch, run "git am --skip" instead.
To restore the original branch and stop patching, run "git am --abort".

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH 4/6] drm/i915: Add timeline barrier support
  2019-01-24 11:42 ` [PATCH 4/6] drm/i915: Add timeline barrier support Tvrtko Ursulin
@ 2019-01-24 13:27   ` Chris Wilson
  0 siblings, 0 replies; 25+ messages in thread
From: Chris Wilson @ 2019-01-24 13:27 UTC (permalink / raw)
  To: Intel-gfx, Tvrtko Ursulin

Quoting Tvrtko Ursulin (2019-01-24 11:42:01)
> From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> 
> Timeline barrier allows serialization between different timelines.
> 
> After calling i915_timeline_set_barrier with a request, all following
> submissions on this timeline will be set up as depending on this request,
> or barrier. Once the barrier has been completed it automatically gets
> cleared and things continue as normal.
> 
> This facility will be used by the upcoming context SSEU code.
> 
> v2:
>  * Assert barrier has been retired on timeline_fini. (Chris Wilson)
>  * Fix mock_timeline.
> 
> v3:
>  * Improved comment language. (Chris Wilson)
> 
> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> Suggested-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Chris Wilson <chris@chris-wilson.co.uk>
> Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>

I don't think it makes a difference right away, but we should be pulling
the timeline barrier into i915_gem_switch_to_kernel_context().
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 25+ messages in thread

* [PATCH 4/6] drm/i915: Add timeline barrier support
  2019-01-24 11:41 [PATCH 0/6] Per context dynamic (sub)slice power-gating Tvrtko Ursulin
@ 2019-01-24 11:42 ` Tvrtko Ursulin
  2019-01-24 13:27   ` Chris Wilson
  0 siblings, 1 reply; 25+ messages in thread
From: Tvrtko Ursulin @ 2019-01-24 11:42 UTC (permalink / raw)
  To: Intel-gfx

From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Timeline barrier allows serialization between different timelines.

After calling i915_timeline_set_barrier with a request, all following
submissions on this timeline will be set up as depending on this request,
or barrier. Once the barrier has been completed it automatically gets
cleared and things continue as normal.

This facility will be used by the upcoming context SSEU code.

v2:
 * Assert barrier has been retired on timeline_fini. (Chris Wilson)
 * Fix mock_timeline.

v3:
 * Improved comment language. (Chris Wilson)

Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Suggested-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_request.c           | 13 +++++++++
 drivers/gpu/drm/i915/i915_timeline.c          |  3 +++
 drivers/gpu/drm/i915/i915_timeline.h          | 27 +++++++++++++++++++
 .../gpu/drm/i915/selftests/mock_timeline.c    |  2 ++
 4 files changed, 45 insertions(+)

diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
index f941e40fd373..ea659c620461 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -517,6 +517,15 @@ i915_request_alloc_slow(struct intel_context *ce)
 	return kmem_cache_alloc(ce->gem_context->i915->requests, GFP_KERNEL);
 }
 
+static int add_timeline_barrier(struct i915_request *rq)
+{
+	struct i915_request *barrier =
+		i915_gem_active_raw(&rq->timeline->barrier,
+				    &rq->i915->drm.struct_mutex);
+
+	return barrier ? i915_request_await_dma_fence(rq, &barrier->fence) : 0;
+}
+
 /**
  * i915_request_alloc - allocate a request structure
  *
@@ -660,6 +669,10 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx)
 	 */
 	rq->head = rq->ring->emit;
 
+	ret = add_timeline_barrier(rq);
+	if (ret)
+		goto err_unwind;
+
 	ret = engine->request_alloc(rq);
 	if (ret)
 		goto err_unwind;
diff --git a/drivers/gpu/drm/i915/i915_timeline.c b/drivers/gpu/drm/i915/i915_timeline.c
index 4667cc08c416..5a87c5bd5154 100644
--- a/drivers/gpu/drm/i915/i915_timeline.c
+++ b/drivers/gpu/drm/i915/i915_timeline.c
@@ -37,6 +37,8 @@ void i915_timeline_init(struct drm_i915_private *i915,
 	INIT_LIST_HEAD(&timeline->requests);
 
 	i915_syncmap_init(&timeline->sync);
+
+	init_request_active(&timeline->barrier, NULL);
 }
 
 /**
@@ -69,6 +71,7 @@ void i915_timelines_park(struct drm_i915_private *i915)
 void i915_timeline_fini(struct i915_timeline *timeline)
 {
 	GEM_BUG_ON(!list_empty(&timeline->requests));
+	GEM_BUG_ON(i915_gem_active_isset(&timeline->barrier));
 
 	i915_syncmap_free(&timeline->sync);
 
diff --git a/drivers/gpu/drm/i915/i915_timeline.h b/drivers/gpu/drm/i915/i915_timeline.h
index 38c1e15e927a..af6c05333d76 100644
--- a/drivers/gpu/drm/i915/i915_timeline.h
+++ b/drivers/gpu/drm/i915/i915_timeline.h
@@ -64,6 +64,16 @@ struct i915_timeline {
 	 */
 	struct i915_syncmap *sync;
 
+	/**
+	 * Barrier provides the ability to serialize ordering between different
+	 * timelines.
+	 *
+	 * Users can call i915_timeline_set_barrier which will make all
+	 * subsequent submissions to this timeline be executed only after the
+	 * barrier has been completed.
+	 */
+	struct i915_gem_active barrier;
+
 	struct list_head link;
 	const char *name;
 
@@ -136,4 +146,21 @@ static inline bool i915_timeline_sync_is_later(struct i915_timeline *tl,
 
 void i915_timelines_park(struct drm_i915_private *i915);
 
+/**
+ * i915_timeline_set_barrier - orders submission between different timelines
+ * @timeline: timeline to set the barrier on
+ * @rq: request after which new submissions can proceed
+ *
+ * Sets the passed in request as the serialization point for all subsequent
+ * submissions on @timeline. Subsequent requests will not be submitted to GPU
+ * until the barrier has been completed.
+ */
+static inline void
+i915_timeline_set_barrier(struct i915_timeline *timeline,
+			  struct i915_request *rq)
+{
+	GEM_BUG_ON(timeline->fence_context == rq->timeline->fence_context);
+	i915_gem_active_set(&timeline->barrier, rq);
+}
+
 #endif
diff --git a/drivers/gpu/drm/i915/selftests/mock_timeline.c b/drivers/gpu/drm/i915/selftests/mock_timeline.c
index dcf3b16f5a07..a718b64c988e 100644
--- a/drivers/gpu/drm/i915/selftests/mock_timeline.c
+++ b/drivers/gpu/drm/i915/selftests/mock_timeline.c
@@ -19,6 +19,8 @@ void mock_timeline_init(struct i915_timeline *timeline, u64 context)
 
 	i915_syncmap_init(&timeline->sync);
 
+	init_request_active(&timeline->barrier, NULL);
+
 	INIT_LIST_HEAD(&timeline->link);
 }
 
-- 
2.19.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 25+ messages in thread

* [PATCH 4/6] drm/i915: Add timeline barrier support
  2019-01-15 14:47 [PATCH 0/6] Add uAPI to support ICL VME hardware for new media-driver Joonas Lahtinen
@ 2019-01-15 14:47 ` Joonas Lahtinen
  0 siblings, 0 replies; 25+ messages in thread
From: Joonas Lahtinen @ 2019-01-15 14:47 UTC (permalink / raw)
  To: Intel graphics driver community testing & development
  Cc: Jani Nikula, Takashi Iwai, Timo Aaltonen, Carl Zhang, Stephane Marchesin

From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Timeline barrier allows serialization between different timelines.

After calling i915_timeline_set_barrier with a request, all following
submissions on this timeline will be set up as depending on this request,
or barrier. Once the barrier has been completed it automatically gets
cleared and things continue as normal.

This facility will be used by the upcoming context SSEU code.

v2:
 * Assert barrier has been retired on timeline_fini. (Chris Wilson)
 * Fix mock_timeline.

v3:
 * Improved comment language. (Chris Wilson)

Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Suggested-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_request.c           | 13 +++++++++
 drivers/gpu/drm/i915/i915_timeline.c          |  3 +++
 drivers/gpu/drm/i915/i915_timeline.h          | 27 +++++++++++++++++++
 .../gpu/drm/i915/selftests/mock_timeline.c    |  2 ++
 4 files changed, 45 insertions(+)

diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
index d1355154886a..496217305a00 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -509,6 +509,15 @@ i915_request_alloc_slow(struct intel_context *ce)
 	return kmem_cache_alloc(ce->gem_context->i915->requests, GFP_KERNEL);
 }
 
+static int add_timeline_barrier(struct i915_request *rq)
+{
+	struct i915_request *barrier =
+		i915_gem_active_raw(&rq->timeline->barrier,
+				    &rq->i915->drm.struct_mutex);
+
+	return barrier ? i915_request_await_dma_fence(rq, &barrier->fence) : 0;
+}
+
 /**
  * i915_request_alloc - allocate a request structure
  *
@@ -652,6 +661,10 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx)
 	 */
 	rq->head = rq->ring->emit;
 
+	ret = add_timeline_barrier(rq);
+	if (ret)
+		goto err_unwind;
+
 	ret = engine->request_alloc(rq);
 	if (ret)
 		goto err_unwind;
diff --git a/drivers/gpu/drm/i915/i915_timeline.c b/drivers/gpu/drm/i915/i915_timeline.c
index 4667cc08c416..5a87c5bd5154 100644
--- a/drivers/gpu/drm/i915/i915_timeline.c
+++ b/drivers/gpu/drm/i915/i915_timeline.c
@@ -37,6 +37,8 @@ void i915_timeline_init(struct drm_i915_private *i915,
 	INIT_LIST_HEAD(&timeline->requests);
 
 	i915_syncmap_init(&timeline->sync);
+
+	init_request_active(&timeline->barrier, NULL);
 }
 
 /**
@@ -69,6 +71,7 @@ void i915_timelines_park(struct drm_i915_private *i915)
 void i915_timeline_fini(struct i915_timeline *timeline)
 {
 	GEM_BUG_ON(!list_empty(&timeline->requests));
+	GEM_BUG_ON(i915_gem_active_isset(&timeline->barrier));
 
 	i915_syncmap_free(&timeline->sync);
 
diff --git a/drivers/gpu/drm/i915/i915_timeline.h b/drivers/gpu/drm/i915/i915_timeline.h
index 38c1e15e927a..af6c05333d76 100644
--- a/drivers/gpu/drm/i915/i915_timeline.h
+++ b/drivers/gpu/drm/i915/i915_timeline.h
@@ -64,6 +64,16 @@ struct i915_timeline {
 	 */
 	struct i915_syncmap *sync;
 
+	/**
+	 * Barrier provides the ability to serialize ordering between different
+	 * timelines.
+	 *
+	 * Users can call i915_timeline_set_barrier which will make all
+	 * subsequent submissions to this timeline be executed only after the
+	 * barrier has been completed.
+	 */
+	struct i915_gem_active barrier;
+
 	struct list_head link;
 	const char *name;
 
@@ -136,4 +146,21 @@ static inline bool i915_timeline_sync_is_later(struct i915_timeline *tl,
 
 void i915_timelines_park(struct drm_i915_private *i915);
 
+/**
+ * i915_timeline_set_barrier - orders submission between different timelines
+ * @timeline: timeline to set the barrier on
+ * @rq: request after which new submissions can proceed
+ *
+ * Sets the passed in request as the serialization point for all subsequent
+ * submissions on @timeline. Subsequent requests will not be submitted to GPU
+ * until the barrier has been completed.
+ */
+static inline void
+i915_timeline_set_barrier(struct i915_timeline *timeline,
+			  struct i915_request *rq)
+{
+	GEM_BUG_ON(timeline->fence_context == rq->timeline->fence_context);
+	i915_gem_active_set(&timeline->barrier, rq);
+}
+
 #endif
diff --git a/drivers/gpu/drm/i915/selftests/mock_timeline.c b/drivers/gpu/drm/i915/selftests/mock_timeline.c
index dcf3b16f5a07..a718b64c988e 100644
--- a/drivers/gpu/drm/i915/selftests/mock_timeline.c
+++ b/drivers/gpu/drm/i915/selftests/mock_timeline.c
@@ -19,6 +19,8 @@ void mock_timeline_init(struct i915_timeline *timeline, u64 context)
 
 	i915_syncmap_init(&timeline->sync);
 
+	init_request_active(&timeline->barrier, NULL);
+
 	INIT_LIST_HEAD(&timeline->link);
 }
 
-- 
2.17.2

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 25+ messages in thread

* [PATCH 4/6] drm/i915: Add timeline barrier support
  2019-01-14 13:57 [PATCH 0/6] Per context dynamic (sub)slice power-gating Tvrtko Ursulin
@ 2019-01-14 13:57 ` Tvrtko Ursulin
  0 siblings, 0 replies; 25+ messages in thread
From: Tvrtko Ursulin @ 2019-01-14 13:57 UTC (permalink / raw)
  To: Intel-gfx

From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Timeline barrier allows serialization between different timelines.

After calling i915_timeline_set_barrier with a request, all following
submissions on this timeline will be set up as depending on this request,
or barrier. Once the barrier has been completed it automatically gets
cleared and things continue as normal.

This facility will be used by the upcoming context SSEU code.

v2:
 * Assert barrier has been retired on timeline_fini. (Chris Wilson)
 * Fix mock_timeline.

v3:
 * Improved comment language. (Chris Wilson)

Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Suggested-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_request.c           | 13 +++++++++
 drivers/gpu/drm/i915/i915_timeline.c          |  3 +++
 drivers/gpu/drm/i915/i915_timeline.h          | 27 +++++++++++++++++++
 .../gpu/drm/i915/selftests/mock_timeline.c    |  2 ++
 4 files changed, 45 insertions(+)

diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
index d1355154886a..496217305a00 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -509,6 +509,15 @@ i915_request_alloc_slow(struct intel_context *ce)
 	return kmem_cache_alloc(ce->gem_context->i915->requests, GFP_KERNEL);
 }
 
+static int add_timeline_barrier(struct i915_request *rq)
+{
+	struct i915_request *barrier =
+		i915_gem_active_raw(&rq->timeline->barrier,
+				    &rq->i915->drm.struct_mutex);
+
+	return barrier ? i915_request_await_dma_fence(rq, &barrier->fence) : 0;
+}
+
 /**
  * i915_request_alloc - allocate a request structure
  *
@@ -652,6 +661,10 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx)
 	 */
 	rq->head = rq->ring->emit;
 
+	ret = add_timeline_barrier(rq);
+	if (ret)
+		goto err_unwind;
+
 	ret = engine->request_alloc(rq);
 	if (ret)
 		goto err_unwind;
diff --git a/drivers/gpu/drm/i915/i915_timeline.c b/drivers/gpu/drm/i915/i915_timeline.c
index 4667cc08c416..5a87c5bd5154 100644
--- a/drivers/gpu/drm/i915/i915_timeline.c
+++ b/drivers/gpu/drm/i915/i915_timeline.c
@@ -37,6 +37,8 @@ void i915_timeline_init(struct drm_i915_private *i915,
 	INIT_LIST_HEAD(&timeline->requests);
 
 	i915_syncmap_init(&timeline->sync);
+
+	init_request_active(&timeline->barrier, NULL);
 }
 
 /**
@@ -69,6 +71,7 @@ void i915_timelines_park(struct drm_i915_private *i915)
 void i915_timeline_fini(struct i915_timeline *timeline)
 {
 	GEM_BUG_ON(!list_empty(&timeline->requests));
+	GEM_BUG_ON(i915_gem_active_isset(&timeline->barrier));
 
 	i915_syncmap_free(&timeline->sync);
 
diff --git a/drivers/gpu/drm/i915/i915_timeline.h b/drivers/gpu/drm/i915/i915_timeline.h
index 38c1e15e927a..af6c05333d76 100644
--- a/drivers/gpu/drm/i915/i915_timeline.h
+++ b/drivers/gpu/drm/i915/i915_timeline.h
@@ -64,6 +64,16 @@ struct i915_timeline {
 	 */
 	struct i915_syncmap *sync;
 
+	/**
+	 * Barrier provides the ability to serialize ordering between different
+	 * timelines.
+	 *
+	 * Users can call i915_timeline_set_barrier which will make all
+	 * subsequent submissions to this timeline be executed only after the
+	 * barrier has been completed.
+	 */
+	struct i915_gem_active barrier;
+
 	struct list_head link;
 	const char *name;
 
@@ -136,4 +146,21 @@ static inline bool i915_timeline_sync_is_later(struct i915_timeline *tl,
 
 void i915_timelines_park(struct drm_i915_private *i915);
 
+/**
+ * i915_timeline_set_barrier - orders submission between different timelines
+ * @timeline: timeline to set the barrier on
+ * @rq: request after which new submissions can proceed
+ *
+ * Sets the passed in request as the serialization point for all subsequent
+ * submissions on @timeline. Subsequent requests will not be submitted to GPU
+ * until the barrier has been completed.
+ */
+static inline void
+i915_timeline_set_barrier(struct i915_timeline *timeline,
+			  struct i915_request *rq)
+{
+	GEM_BUG_ON(timeline->fence_context == rq->timeline->fence_context);
+	i915_gem_active_set(&timeline->barrier, rq);
+}
+
 #endif
diff --git a/drivers/gpu/drm/i915/selftests/mock_timeline.c b/drivers/gpu/drm/i915/selftests/mock_timeline.c
index dcf3b16f5a07..a718b64c988e 100644
--- a/drivers/gpu/drm/i915/selftests/mock_timeline.c
+++ b/drivers/gpu/drm/i915/selftests/mock_timeline.c
@@ -19,6 +19,8 @@ void mock_timeline_init(struct i915_timeline *timeline, u64 context)
 
 	i915_syncmap_init(&timeline->sync);
 
+	init_request_active(&timeline->barrier, NULL);
+
 	INIT_LIST_HEAD(&timeline->link);
 }
 
-- 
2.19.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 25+ messages in thread

* [PATCH 4/6] drm/i915: Add timeline barrier support
  2018-11-13 14:35 [PATCH 0/6] Per context dynamic (sub)slice power-gating Tvrtko Ursulin
@ 2018-11-13 14:35 ` Tvrtko Ursulin
  0 siblings, 0 replies; 25+ messages in thread
From: Tvrtko Ursulin @ 2018-11-13 14:35 UTC (permalink / raw)
  To: Intel-gfx

From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Timeline barrier allows serialization between different timelines.

After calling i915_timeline_set_barrier with a request, all following
submissions on this timeline will be set up as depending on this request,
or barrier. Once the barrier has been completed it automatically gets
cleared and things continue as normal.

This facility will be used by the upcoming context SSEU code.

v2:
 * Assert barrier has been retired on timeline_fini. (Chris Wilson)
 * Fix mock_timeline.

Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Suggested-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_request.c           | 13 +++++++++
 drivers/gpu/drm/i915/i915_timeline.c          |  3 +++
 drivers/gpu/drm/i915/i915_timeline.h          | 27 +++++++++++++++++++
 .../gpu/drm/i915/selftests/mock_timeline.c    |  2 ++
 4 files changed, 45 insertions(+)

diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
index 71107540581d..d1b2ebfc0ff3 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -563,6 +563,15 @@ submit_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state)
 	return NOTIFY_DONE;
 }
 
+static int add_timeline_barrier(struct i915_request *rq)
+{
+	struct i915_request *barrier =
+		i915_gem_active_raw(&rq->timeline->barrier,
+				    &rq->i915->drm.struct_mutex);
+
+	return barrier ? i915_request_await_dma_fence(rq, &barrier->fence) : 0;
+}
+
 /**
  * i915_request_alloc - allocate a request structure
  *
@@ -716,6 +725,10 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx)
 	 */
 	rq->head = rq->ring->emit;
 
+	ret = add_timeline_barrier(rq);
+	if (ret)
+		goto err_unwind;
+
 	/* Unconditionally invalidate GPU caches and TLBs. */
 	ret = engine->emit_flush(rq, EMIT_INVALIDATE);
 	if (ret)
diff --git a/drivers/gpu/drm/i915/i915_timeline.c b/drivers/gpu/drm/i915/i915_timeline.c
index 4667cc08c416..5a87c5bd5154 100644
--- a/drivers/gpu/drm/i915/i915_timeline.c
+++ b/drivers/gpu/drm/i915/i915_timeline.c
@@ -37,6 +37,8 @@ void i915_timeline_init(struct drm_i915_private *i915,
 	INIT_LIST_HEAD(&timeline->requests);
 
 	i915_syncmap_init(&timeline->sync);
+
+	init_request_active(&timeline->barrier, NULL);
 }
 
 /**
@@ -69,6 +71,7 @@ void i915_timelines_park(struct drm_i915_private *i915)
 void i915_timeline_fini(struct i915_timeline *timeline)
 {
 	GEM_BUG_ON(!list_empty(&timeline->requests));
+	GEM_BUG_ON(i915_gem_active_isset(&timeline->barrier));
 
 	i915_syncmap_free(&timeline->sync);
 
diff --git a/drivers/gpu/drm/i915/i915_timeline.h b/drivers/gpu/drm/i915/i915_timeline.h
index a2c2c3ab5fb0..c8526ab44dbc 100644
--- a/drivers/gpu/drm/i915/i915_timeline.h
+++ b/drivers/gpu/drm/i915/i915_timeline.h
@@ -72,6 +72,16 @@ struct i915_timeline {
 	 */
 	u32 global_sync[I915_NUM_ENGINES];
 
+	/**
+	 * Barrier provides the ability to serialize ordering between different
+	 * timelines.
+	 *
+	 * Users can call i915_timeline_set_barrier which will make all
+	 * subsequent submissions be executed only after this barrier has been
+	 * completed.
+	 */
+	struct i915_gem_active barrier;
+
 	struct list_head link;
 	const char *name;
 
@@ -125,4 +135,21 @@ static inline bool i915_timeline_sync_is_later(struct i915_timeline *tl,
 
 void i915_timelines_park(struct drm_i915_private *i915);
 
+/**
+ * i915_timeline_set_barrier - orders submission between different timelines
+ * @timeline: timeline to set the barrier on
+ * @rq: request after which new submissions can proceed
+ *
+ * Sets the passed in request as the serialization point for all subsequent
+ * submissions on @timeline. Subsequent requests will not be submitted to GPU
+ * until the barrier has been completed.
+ */
+static inline void
+i915_timeline_set_barrier(struct i915_timeline *timeline,
+			  struct i915_request *rq)
+{
+	GEM_BUG_ON(timeline->fence_context == rq->timeline->fence_context);
+	i915_gem_active_set(&timeline->barrier, rq);
+}
+
 #endif
diff --git a/drivers/gpu/drm/i915/selftests/mock_timeline.c b/drivers/gpu/drm/i915/selftests/mock_timeline.c
index dcf3b16f5a07..a718b64c988e 100644
--- a/drivers/gpu/drm/i915/selftests/mock_timeline.c
+++ b/drivers/gpu/drm/i915/selftests/mock_timeline.c
@@ -19,6 +19,8 @@ void mock_timeline_init(struct i915_timeline *timeline, u64 context)
 
 	i915_syncmap_init(&timeline->sync);
 
+	init_request_active(&timeline->barrier, NULL);
+
 	INIT_LIST_HEAD(&timeline->link);
 }
 
-- 
2.19.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 25+ messages in thread

* [PATCH 4/6] drm/i915: Add timeline barrier support
  2018-09-17 11:30 [PATCH v13 0/6] Per context dynamic (sub)slice power-gating Tvrtko Ursulin
@ 2018-09-17 11:30 ` Tvrtko Ursulin
  0 siblings, 0 replies; 25+ messages in thread
From: Tvrtko Ursulin @ 2018-09-17 11:30 UTC (permalink / raw)
  To: Intel-gfx

From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Timeline barrier allows serialization between different timelines.

After calling i915_timeline_set_barrier with a request, all following
submissions on this timeline will be set up as depending on this request,
or barrier. Once the barrier has been completed it automatically gets
cleared and things continue as normal.

This facility will be used by the upcoming context SSEU code.

v2:
 * Assert barrier has been retired on timeline_fini. (Chris Wilson)
 * Fix mock_timeline.

Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Suggested-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_request.c           | 13 +++++++++
 drivers/gpu/drm/i915/i915_timeline.c          |  3 +++
 drivers/gpu/drm/i915/i915_timeline.h          | 27 +++++++++++++++++++
 .../gpu/drm/i915/selftests/mock_timeline.c    |  2 ++
 4 files changed, 45 insertions(+)

diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
index a492385b2089..76fc80330c85 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -644,6 +644,15 @@ submit_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state)
 	return NOTIFY_DONE;
 }
 
+static int add_timeline_barrier(struct i915_request *rq)
+{
+	struct i915_request *barrier =
+		i915_gem_active_raw(&rq->timeline->barrier,
+				    &rq->i915->drm.struct_mutex);
+
+	return barrier ? i915_request_await_dma_fence(rq, &barrier->fence) : 0;
+}
+
 /**
  * i915_request_alloc - allocate a request structure
  *
@@ -808,6 +817,10 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx)
 	 */
 	rq->head = rq->ring->emit;
 
+	ret = add_timeline_barrier(rq);
+	if (ret)
+		goto err_unwind;
+
 	/* Unconditionally invalidate GPU caches and TLBs. */
 	ret = engine->emit_flush(rq, EMIT_INVALIDATE);
 	if (ret)
diff --git a/drivers/gpu/drm/i915/i915_timeline.c b/drivers/gpu/drm/i915/i915_timeline.c
index 4667cc08c416..5a87c5bd5154 100644
--- a/drivers/gpu/drm/i915/i915_timeline.c
+++ b/drivers/gpu/drm/i915/i915_timeline.c
@@ -37,6 +37,8 @@ void i915_timeline_init(struct drm_i915_private *i915,
 	INIT_LIST_HEAD(&timeline->requests);
 
 	i915_syncmap_init(&timeline->sync);
+
+	init_request_active(&timeline->barrier, NULL);
 }
 
 /**
@@ -69,6 +71,7 @@ void i915_timelines_park(struct drm_i915_private *i915)
 void i915_timeline_fini(struct i915_timeline *timeline)
 {
 	GEM_BUG_ON(!list_empty(&timeline->requests));
+	GEM_BUG_ON(i915_gem_active_isset(&timeline->barrier));
 
 	i915_syncmap_free(&timeline->sync);
 
diff --git a/drivers/gpu/drm/i915/i915_timeline.h b/drivers/gpu/drm/i915/i915_timeline.h
index a2c2c3ab5fb0..c8526ab44dbc 100644
--- a/drivers/gpu/drm/i915/i915_timeline.h
+++ b/drivers/gpu/drm/i915/i915_timeline.h
@@ -72,6 +72,16 @@ struct i915_timeline {
 	 */
 	u32 global_sync[I915_NUM_ENGINES];
 
+	/**
+	 * Barrier provides the ability to serialize ordering between different
+	 * timelines.
+	 *
+	 * Users can call i915_timeline_set_barrier which will make all
+	 * subsequent submissions be executed only after this barrier has been
+	 * completed.
+	 */
+	struct i915_gem_active barrier;
+
 	struct list_head link;
 	const char *name;
 
@@ -125,4 +135,21 @@ static inline bool i915_timeline_sync_is_later(struct i915_timeline *tl,
 
 void i915_timelines_park(struct drm_i915_private *i915);
 
+/**
+ * i915_timeline_set_barrier - orders submission between different timelines
+ * @timeline: timeline to set the barrier on
+ * @rq: request after which new submissions can proceed
+ *
+ * Sets the passed in request as the serialization point for all subsequent
+ * submissions on @timeline. Subsequent requests will not be submitted to GPU
+ * until the barrier has been completed.
+ */
+static inline void
+i915_timeline_set_barrier(struct i915_timeline *timeline,
+			  struct i915_request *rq)
+{
+	GEM_BUG_ON(timeline->fence_context == rq->timeline->fence_context);
+	i915_gem_active_set(&timeline->barrier, rq);
+}
+
 #endif
diff --git a/drivers/gpu/drm/i915/selftests/mock_timeline.c b/drivers/gpu/drm/i915/selftests/mock_timeline.c
index dcf3b16f5a07..a718b64c988e 100644
--- a/drivers/gpu/drm/i915/selftests/mock_timeline.c
+++ b/drivers/gpu/drm/i915/selftests/mock_timeline.c
@@ -19,6 +19,8 @@ void mock_timeline_init(struct i915_timeline *timeline, u64 context)
 
 	i915_syncmap_init(&timeline->sync);
 
+	init_request_active(&timeline->barrier, NULL);
+
 	INIT_LIST_HEAD(&timeline->link);
 }
 
-- 
2.17.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 25+ messages in thread

* [PATCH 4/6] drm/i915: Add timeline barrier support
  2018-09-14 16:09 [PATCH 0/6] Per context dynamic (sub)slice power-gating Tvrtko Ursulin
@ 2018-09-14 16:09 ` Tvrtko Ursulin
  0 siblings, 0 replies; 25+ messages in thread
From: Tvrtko Ursulin @ 2018-09-14 16:09 UTC (permalink / raw)
  To: Intel-gfx

From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Timeline barrier allows serialization between different timelines.

After calling i915_timeline_set_barrier with a request, all following
submissions on this timeline will be set up as depending on this request,
or barrier. Once the barrier has been completed it automatically gets
cleared and things continue as normal.

This facility will be used by the upcoming context SSEU code.

v2:
 * Assert barrier has been retired on timeline_fini. (Chris Wilson)
 * Fix mock_timeline.

Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Suggested-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_request.c           | 13 +++++++++
 drivers/gpu/drm/i915/i915_timeline.c          |  3 +++
 drivers/gpu/drm/i915/i915_timeline.h          | 27 +++++++++++++++++++
 .../gpu/drm/i915/selftests/mock_timeline.c    |  2 ++
 4 files changed, 45 insertions(+)

diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
index a492385b2089..76fc80330c85 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -644,6 +644,15 @@ submit_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state)
 	return NOTIFY_DONE;
 }
 
+static int add_timeline_barrier(struct i915_request *rq)
+{
+	struct i915_request *barrier =
+		i915_gem_active_raw(&rq->timeline->barrier,
+				    &rq->i915->drm.struct_mutex);
+
+	return barrier ? i915_request_await_dma_fence(rq, &barrier->fence) : 0;
+}
+
 /**
  * i915_request_alloc - allocate a request structure
  *
@@ -808,6 +817,10 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx)
 	 */
 	rq->head = rq->ring->emit;
 
+	ret = add_timeline_barrier(rq);
+	if (ret)
+		goto err_unwind;
+
 	/* Unconditionally invalidate GPU caches and TLBs. */
 	ret = engine->emit_flush(rq, EMIT_INVALIDATE);
 	if (ret)
diff --git a/drivers/gpu/drm/i915/i915_timeline.c b/drivers/gpu/drm/i915/i915_timeline.c
index 4667cc08c416..5a87c5bd5154 100644
--- a/drivers/gpu/drm/i915/i915_timeline.c
+++ b/drivers/gpu/drm/i915/i915_timeline.c
@@ -37,6 +37,8 @@ void i915_timeline_init(struct drm_i915_private *i915,
 	INIT_LIST_HEAD(&timeline->requests);
 
 	i915_syncmap_init(&timeline->sync);
+
+	init_request_active(&timeline->barrier, NULL);
 }
 
 /**
@@ -69,6 +71,7 @@ void i915_timelines_park(struct drm_i915_private *i915)
 void i915_timeline_fini(struct i915_timeline *timeline)
 {
 	GEM_BUG_ON(!list_empty(&timeline->requests));
+	GEM_BUG_ON(i915_gem_active_isset(&timeline->barrier));
 
 	i915_syncmap_free(&timeline->sync);
 
diff --git a/drivers/gpu/drm/i915/i915_timeline.h b/drivers/gpu/drm/i915/i915_timeline.h
index a2c2c3ab5fb0..c8526ab44dbc 100644
--- a/drivers/gpu/drm/i915/i915_timeline.h
+++ b/drivers/gpu/drm/i915/i915_timeline.h
@@ -72,6 +72,16 @@ struct i915_timeline {
 	 */
 	u32 global_sync[I915_NUM_ENGINES];
 
+	/**
+	 * Barrier provides the ability to serialize ordering between different
+	 * timelines.
+	 *
+	 * Users can call i915_timeline_set_barrier which will make all
+	 * subsequent submissions be executed only after this barrier has been
+	 * completed.
+	 */
+	struct i915_gem_active barrier;
+
 	struct list_head link;
 	const char *name;
 
@@ -125,4 +135,21 @@ static inline bool i915_timeline_sync_is_later(struct i915_timeline *tl,
 
 void i915_timelines_park(struct drm_i915_private *i915);
 
+/**
+ * i915_timeline_set_barrier - orders submission between different timelines
+ * @timeline: timeline to set the barrier on
+ * @rq: request after which new submissions can proceed
+ *
+ * Sets the passed in request as the serialization point for all subsequent
+ * submissions on @timeline. Subsequent requests will not be submitted to GPU
+ * until the barrier has been completed.
+ */
+static inline void
+i915_timeline_set_barrier(struct i915_timeline *timeline,
+			  struct i915_request *rq)
+{
+	GEM_BUG_ON(timeline->fence_context == rq->timeline->fence_context);
+	i915_gem_active_set(&timeline->barrier, rq);
+}
+
 #endif
diff --git a/drivers/gpu/drm/i915/selftests/mock_timeline.c b/drivers/gpu/drm/i915/selftests/mock_timeline.c
index dcf3b16f5a07..a718b64c988e 100644
--- a/drivers/gpu/drm/i915/selftests/mock_timeline.c
+++ b/drivers/gpu/drm/i915/selftests/mock_timeline.c
@@ -19,6 +19,8 @@ void mock_timeline_init(struct i915_timeline *timeline, u64 context)
 
 	i915_syncmap_init(&timeline->sync);
 
+	init_request_active(&timeline->barrier, NULL);
+
 	INIT_LIST_HEAD(&timeline->link);
 }
 
-- 
2.17.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 25+ messages in thread

end of thread, other threads:[~2019-01-24 13:27 UTC | newest]

Thread overview: 25+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-01-08 15:12 [PATCH 0/6] Per context dynamic (sub)slice power-gating Tvrtko Ursulin
2019-01-08 15:12 ` [PATCH 1/6] drm/i915/execlists: Move RPCS setup to context pin Tvrtko Ursulin
2019-01-08 15:12 ` [PATCH 2/6] drm/i915: Record the sseu configuration per-context & engine Tvrtko Ursulin
2019-01-08 15:12 ` [PATCH 3/6] drm/i915/perf: lock powergating configuration to default when active Tvrtko Ursulin
2019-01-08 15:12 ` [PATCH 4/6] drm/i915: Add timeline barrier support Tvrtko Ursulin
2019-01-08 15:12 ` [PATCH 5/6] drm/i915: Expose RPCS (SSEU) configuration to userspace (Gen11 only) Tvrtko Ursulin
2019-01-08 16:29   ` [PATCH v24 " Tvrtko Ursulin
2019-01-11 14:22     ` Tvrtko Ursulin
2019-01-14  9:39     ` [PATCH v25 " Tvrtko Ursulin
2019-01-08 15:12 ` [PATCH 6/6] drm/i915/selftests: Context SSEU reconfiguration tests Tvrtko Ursulin
2019-01-08 15:37 ` ✗ Fi.CI.CHECKPATCH: warning for Per context dynamic (sub)slice power-gating (rev11) Patchwork
2019-01-08 15:40 ` ✗ Fi.CI.SPARSE: " Patchwork
2019-01-08 16:13 ` ✓ Fi.CI.BAT: success " Patchwork
2019-01-08 18:38 ` ✗ Fi.CI.CHECKPATCH: warning for Per context dynamic (sub)slice power-gating (rev12) Patchwork
2019-01-08 18:40 ` ✗ Fi.CI.SPARSE: " Patchwork
2019-01-08 19:00 ` ✓ Fi.CI.BAT: success " Patchwork
2019-01-09  8:12 ` ✗ Fi.CI.IGT: failure " Patchwork
2019-01-14 12:32 ` ✗ Fi.CI.BAT: failure for Per context dynamic (sub)slice power-gating (rev13) Patchwork
  -- strict thread matches above, loose matches on Subject: below --
2019-01-24 11:41 [PATCH 0/6] Per context dynamic (sub)slice power-gating Tvrtko Ursulin
2019-01-24 11:42 ` [PATCH 4/6] drm/i915: Add timeline barrier support Tvrtko Ursulin
2019-01-24 13:27   ` Chris Wilson
2019-01-15 14:47 [PATCH 0/6] Add uAPI to support ICL VME hardware for new media-driver Joonas Lahtinen
2019-01-15 14:47 ` [PATCH 4/6] drm/i915: Add timeline barrier support Joonas Lahtinen
2019-01-14 13:57 [PATCH 0/6] Per context dynamic (sub)slice power-gating Tvrtko Ursulin
2019-01-14 13:57 ` [PATCH 4/6] drm/i915: Add timeline barrier support Tvrtko Ursulin
2018-11-13 14:35 [PATCH 0/6] Per context dynamic (sub)slice power-gating Tvrtko Ursulin
2018-11-13 14:35 ` [PATCH 4/6] drm/i915: Add timeline barrier support Tvrtko Ursulin
2018-09-17 11:30 [PATCH v13 0/6] Per context dynamic (sub)slice power-gating Tvrtko Ursulin
2018-09-17 11:30 ` [PATCH 4/6] drm/i915: Add timeline barrier support Tvrtko Ursulin
2018-09-14 16:09 [PATCH 0/6] Per context dynamic (sub)slice power-gating Tvrtko Ursulin
2018-09-14 16:09 ` [PATCH 4/6] drm/i915: Add timeline barrier support Tvrtko Ursulin

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.