All of lore.kernel.org
 help / color / mirror / Atom feed
* [RFC PATCH 0/4] drm/i915: enable userspace to program slice/subslice programming
@ 2017-09-01 17:12 Lionel Landwerlin
  2017-09-01 17:12 ` [RFC PATCH 1/4] drm/i915: Record both min/max eu_per_subslice in sseu_dev_info Lionel Landwerlin
                   ` (5 more replies)
  0 siblings, 6 replies; 10+ messages in thread
From: Lionel Landwerlin @ 2017-09-01 17:12 UTC (permalink / raw)
  To: intel-gfx

Hi all,

This is a respin of a series from Chris. Actually got around testing
it a bit. The main reason for this feature is to allow media workloads
to tweak the number of slices powered on. It seems to have significant
performance gains.

Cheers,

Chris Wilson (4):
  drm/i915: Record both min/max eu_per_subslice in sseu_dev_info
  drm/i915: Program RPCS for Broadwell
  drm/i915: Record the sseu configuration per-context & engine
  drm/i915: Expose RPCS (SSEU) configuration to userspace

 drivers/gpu/drm/i915/i915_debugfs.c      | 36 ++++++++----
 drivers/gpu/drm/i915/i915_drv.h          | 18 ------
 drivers/gpu/drm/i915/i915_gem_context.c  | 17 ++++++
 drivers/gpu/drm/i915/i915_gem_context.h  | 21 +++++++
 drivers/gpu/drm/i915/intel_device_info.c | 32 +++++++----
 drivers/gpu/drm/i915/intel_lrc.c         | 99 +++++++++++++++++++++++++-------
 drivers/gpu/drm/i915/intel_lrc.h         |  2 +
 include/uapi/drm/i915_drm.h              | 11 ++++
 8 files changed, 175 insertions(+), 61 deletions(-)

--
2.14.1
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 10+ messages in thread

* [RFC PATCH 1/4] drm/i915: Record both min/max eu_per_subslice in sseu_dev_info
  2017-09-01 17:12 [RFC PATCH 0/4] drm/i915: enable userspace to program slice/subslice programming Lionel Landwerlin
@ 2017-09-01 17:12 ` Lionel Landwerlin
  2017-09-01 17:12 ` [RFC PATCH 2/4] drm/i915: Program RPCS for Broadwell Lionel Landwerlin
                   ` (4 subsequent siblings)
  5 siblings, 0 replies; 10+ messages in thread
From: Lionel Landwerlin @ 2017-09-01 17:12 UTC (permalink / raw)
  To: intel-gfx

From: Chris Wilson <chris@chris-wilson.co.uk>

When we query the available eu on each subslice, we currently only
report the max. It would also be useful to report the minimum found as
well.

When we set RPCS (power gating over the EU), we can also specify both
the min and max number of eu to configure on each slice; currently we
just set it to a single value, but the flexibility may be beneficial in
future.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
---
 drivers/gpu/drm/i915/i915_debugfs.c      | 36 +++++++++++++++++++++++---------
 drivers/gpu/drm/i915/i915_drv.h          |  3 ++-
 drivers/gpu/drm/i915/intel_device_info.c | 32 +++++++++++++++++-----------
 drivers/gpu/drm/i915/intel_lrc.c         |  4 ++--
 4 files changed, 50 insertions(+), 25 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index 48572b157222..ac3749f5a7a0 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -4491,6 +4491,7 @@ DEFINE_SIMPLE_ATTRIBUTE(i915_cache_sharing_fops,
 static void cherryview_sseu_device_status(struct drm_i915_private *dev_priv,
 					  struct sseu_dev_info *sseu)
 {
+	unsigned int min_eu_per_subslice, max_eu_per_subslice;
 	int ss_max = 2;
 	int ss;
 	u32 sig1[ss_max], sig2[ss_max];
@@ -4500,6 +4501,9 @@ static void cherryview_sseu_device_status(struct drm_i915_private *dev_priv,
 	sig2[0] = I915_READ(CHV_POWER_SS0_SIG2);
 	sig2[1] = I915_READ(CHV_POWER_SS1_SIG2);
 
+	min_eu_per_subslice = ~0u;
+	max_eu_per_subslice = 0;
+
 	for (ss = 0; ss < ss_max; ss++) {
 		unsigned int eu_cnt;
 
@@ -4514,14 +4518,18 @@ static void cherryview_sseu_device_status(struct drm_i915_private *dev_priv,
 			 ((sig1[ss] & CHV_EU210_PG_ENABLE) ? 0 : 2) +
 			 ((sig2[ss] & CHV_EU311_PG_ENABLE) ? 0 : 2);
 		sseu->eu_total += eu_cnt;
-		sseu->eu_per_subslice = max_t(unsigned int,
-					      sseu->eu_per_subslice, eu_cnt);
+		min_eu_per_subslice = min(min_eu_per_subslice, eu_cnt);
+		max_eu_per_subslice = max(max_eu_per_subslice, eu_cnt);
 	}
+
+	sseu->min_eu_per_subslice = min_eu_per_subslice;
+	sseu->max_eu_per_subslice = max_eu_per_subslice;
 }
 
 static void gen9_sseu_device_status(struct drm_i915_private *dev_priv,
 				    struct sseu_dev_info *sseu)
 {
+	unsigned int min_eu_per_subslice, max_eu_per_subslice;
 	int s_max = 3, ss_max = 4;
 	int s, ss;
 	u32 s_reg[s_max], eu_reg[2*s_max], eu_mask[2];
@@ -4547,6 +4555,9 @@ static void gen9_sseu_device_status(struct drm_i915_private *dev_priv,
 		     GEN9_PGCTL_SSB_EU210_ACK |
 		     GEN9_PGCTL_SSB_EU311_ACK;
 
+	min_eu_per_subslice = ~0u;
+	max_eu_per_subslice = 0;
+
 	for (s = 0; s < s_max; s++) {
 		if ((s_reg[s] & GEN9_PGCTL_SLICE_ACK) == 0)
 			/* skip disabled slice */
@@ -4572,11 +4583,14 @@ static void gen9_sseu_device_status(struct drm_i915_private *dev_priv,
 			eu_cnt = 2 * hweight32(eu_reg[2*s + ss/2] &
 					       eu_mask[ss%2]);
 			sseu->eu_total += eu_cnt;
-			sseu->eu_per_subslice = max_t(unsigned int,
-						      sseu->eu_per_subslice,
-						      eu_cnt);
+
+			min_eu_per_subslice = min(min_eu_per_subslice, eu_cnt);
+			max_eu_per_subslice = max(max_eu_per_subslice, eu_cnt);
 		}
 	}
+
+	sseu->min_eu_per_subslice = min_eu_per_subslice;
+	sseu->max_eu_per_subslice = max_eu_per_subslice;
 }
 
 static void broadwell_sseu_device_status(struct drm_i915_private *dev_priv,
@@ -4589,9 +4603,11 @@ static void broadwell_sseu_device_status(struct drm_i915_private *dev_priv,
 
 	if (sseu->slice_mask) {
 		sseu->subslice_mask = INTEL_INFO(dev_priv)->sseu.subslice_mask;
-		sseu->eu_per_subslice =
-				INTEL_INFO(dev_priv)->sseu.eu_per_subslice;
-		sseu->eu_total = sseu->eu_per_subslice *
+		sseu->min_eu_per_subslice =
+			INTEL_INFO(dev_priv)->sseu.min_eu_per_subslice;
+		sseu->max_eu_per_subslice =
+			INTEL_INFO(dev_priv)->sseu.max_eu_per_subslice;
+		sseu->eu_total = sseu->max_eu_per_subslice *
 				 sseu_subslice_total(sseu);
 
 		/* subtract fused off EU(s) from enabled slice(s) */
@@ -4622,8 +4638,8 @@ static void i915_print_sseu_info(struct seq_file *m, bool is_available_info,
 		   hweight8(sseu->subslice_mask));
 	seq_printf(m, "  %s EU Total: %u\n", type,
 		   sseu->eu_total);
-	seq_printf(m, "  %s EU Per Subslice: %u\n", type,
-		   sseu->eu_per_subslice);
+	seq_printf(m, "  %s EU Per Subslice: [%u, %u]\n", type,
+		   sseu->min_eu_per_subslice, sseu->max_eu_per_subslice);
 
 	if (!is_available_info)
 		return;
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index cfe86d78ffd2..4eea89751608 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -804,7 +804,8 @@ struct sseu_dev_info {
 	u8 slice_mask;
 	u8 subslice_mask;
 	u8 eu_total;
-	u8 eu_per_subslice;
+	u8 min_eu_per_subslice;
+	u8 max_eu_per_subslice;
 	u8 min_eu_in_pool;
 	/* For each slice, which subslice(s) has(have) 7 EUs (bitfield)? */
 	u8 subslice_7eu[3];
diff --git a/drivers/gpu/drm/i915/intel_device_info.c b/drivers/gpu/drm/i915/intel_device_info.c
index 5f91ddc78c7a..05ef5e1b1750 100644
--- a/drivers/gpu/drm/i915/intel_device_info.c
+++ b/drivers/gpu/drm/i915/intel_device_info.c
@@ -85,6 +85,7 @@ void intel_device_info_dump(struct drm_i915_private *dev_priv)
 static void cherryview_sseu_info_init(struct drm_i915_private *dev_priv)
 {
 	struct sseu_dev_info *sseu = &mkwrite_device_info(dev_priv)->sseu;
+	unsigned int eu_per_subslice;
 	u32 fuse, eu_dis;
 
 	fuse = I915_READ(CHV_FUSE_GT);
@@ -109,9 +110,10 @@ static void cherryview_sseu_info_init(struct drm_i915_private *dev_priv)
 	 * CHV expected to always have a uniform distribution of EU
 	 * across subslices.
 	*/
-	sseu->eu_per_subslice = sseu_subslice_total(sseu) ?
-				sseu->eu_total / sseu_subslice_total(sseu) :
-				0;
+	eu_per_subslice = sseu_subslice_total(sseu) ?
+		sseu->eu_total / sseu_subslice_total(sseu) : 0;
+	sseu->min_eu_per_subslice = eu_per_subslice;
+	sseu->max_eu_per_subslice = eu_per_subslice;
 	/*
 	 * CHV supports subslice power gating on devices with more than
 	 * one subslice, and supports EU power gating on devices with
@@ -119,13 +121,14 @@ static void cherryview_sseu_info_init(struct drm_i915_private *dev_priv)
 	*/
 	sseu->has_slice_pg = 0;
 	sseu->has_subslice_pg = sseu_subslice_total(sseu) > 1;
-	sseu->has_eu_pg = (sseu->eu_per_subslice > 2);
+	sseu->has_eu_pg = eu_per_subslice > 2;
 }
 
 static void gen9_sseu_info_init(struct drm_i915_private *dev_priv)
 {
 	struct intel_device_info *info = mkwrite_device_info(dev_priv);
 	struct sseu_dev_info *sseu = &info->sseu;
+	unsigned int eu_per_subslice;
 	int s_max = 3, ss_max = 4, eu_max = 8;
 	int s, ss;
 	u32 fuse2, eu_disable;
@@ -181,9 +184,10 @@ static void gen9_sseu_info_init(struct drm_i915_private *dev_priv)
 	 * recovery. BXT is expected to be perfectly uniform in EU
 	 * distribution.
 	*/
-	sseu->eu_per_subslice = sseu_subslice_total(sseu) ?
-				DIV_ROUND_UP(sseu->eu_total,
-					     sseu_subslice_total(sseu)) : 0;
+	eu_per_subslice = sseu_subslice_total(sseu) ?
+		DIV_ROUND_UP(sseu->eu_total, sseu_subslice_total(sseu)) : 0;
+	sseu->min_eu_per_subslice = eu_per_subslice;
+	sseu->max_eu_per_subslice = eu_per_subslice;
 	/*
 	 * SKL+ supports slice power gating on devices with more than
 	 * one slice, and supports EU power gating on devices with
@@ -196,7 +200,7 @@ static void gen9_sseu_info_init(struct drm_i915_private *dev_priv)
 		!IS_GEN9_LP(dev_priv) && hweight8(sseu->slice_mask) > 1;
 	sseu->has_subslice_pg =
 		IS_GEN9_LP(dev_priv) && sseu_subslice_total(sseu) > 1;
-	sseu->has_eu_pg = sseu->eu_per_subslice > 2;
+	sseu->has_eu_pg = eu_per_subslice > 2;
 
 	if (IS_GEN9_LP(dev_priv)) {
 #define IS_SS_DISABLED(ss)	(!(sseu->subslice_mask & BIT(ss)))
@@ -229,6 +233,7 @@ static void broadwell_sseu_info_init(struct drm_i915_private *dev_priv)
 {
 	struct sseu_dev_info *sseu = &mkwrite_device_info(dev_priv)->sseu;
 	const int s_max = 3, ss_max = 3, eu_max = 8;
+	unsigned int eu_per_subslice;
 	int s, ss;
 	u32 fuse2, eu_disable[3]; /* s_max */
 
@@ -283,9 +288,10 @@ static void broadwell_sseu_info_init(struct drm_i915_private *dev_priv)
 	 * subslices with the exception that any one EU in any one subslice may
 	 * be fused off for die recovery.
 	 */
-	sseu->eu_per_subslice = sseu_subslice_total(sseu) ?
-				DIV_ROUND_UP(sseu->eu_total,
-					     sseu_subslice_total(sseu)) : 0;
+	eu_per_subslice = sseu_subslice_total(sseu) ?
+		DIV_ROUND_UP(sseu->eu_total, sseu_subslice_total(sseu)) : 0;
+	sseu->min_eu_per_subslice = eu_per_subslice;
+	sseu->max_eu_per_subslice = eu_per_subslice;
 
 	/*
 	 * BDW supports slice power gating on devices with more than
@@ -422,7 +428,9 @@ void intel_device_info_runtime_init(struct drm_i915_private *dev_priv)
 	DRM_DEBUG_DRIVER("subslice per slice: %u\n",
 			 hweight8(info->sseu.subslice_mask));
 	DRM_DEBUG_DRIVER("EU total: %u\n", info->sseu.eu_total);
-	DRM_DEBUG_DRIVER("EU per subslice: %u\n", info->sseu.eu_per_subslice);
+	DRM_DEBUG_DRIVER("EU per subslice: [%u, %u]\n",
+			 info->sseu.min_eu_per_subslice,
+			 info->sseu.max_eu_per_subslice);
 	DRM_DEBUG_DRIVER("has slice power gating: %s\n",
 			 info->sseu.has_slice_pg ? "y" : "n");
 	DRM_DEBUG_DRIVER("has subslice power gating: %s\n",
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index d89e1b8e1cc5..7fa0d654a4c2 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -1883,9 +1883,9 @@ make_rpcs(struct drm_i915_private *dev_priv)
 	}
 
 	if (INTEL_INFO(dev_priv)->sseu.has_eu_pg) {
-		rpcs |= INTEL_INFO(dev_priv)->sseu.eu_per_subslice <<
+		rpcs |= INTEL_INFO(dev_priv)->sseu.min_eu_per_subslice <<
 			GEN8_RPCS_EU_MIN_SHIFT;
-		rpcs |= INTEL_INFO(dev_priv)->sseu.eu_per_subslice <<
+		rpcs |= INTEL_INFO(dev_priv)->sseu.max_eu_per_subslice <<
 			GEN8_RPCS_EU_MAX_SHIFT;
 		rpcs |= GEN8_RPCS_ENABLE;
 	}
-- 
2.14.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [RFC PATCH 2/4] drm/i915: Program RPCS for Broadwell
  2017-09-01 17:12 [RFC PATCH 0/4] drm/i915: enable userspace to program slice/subslice programming Lionel Landwerlin
  2017-09-01 17:12 ` [RFC PATCH 1/4] drm/i915: Record both min/max eu_per_subslice in sseu_dev_info Lionel Landwerlin
@ 2017-09-01 17:12 ` Lionel Landwerlin
  2017-09-01 17:12 ` [RFC PATCH 3/4] drm/i915: Record the sseu configuration per-context & engine Lionel Landwerlin
                   ` (3 subsequent siblings)
  5 siblings, 0 replies; 10+ messages in thread
From: Lionel Landwerlin @ 2017-09-01 17:12 UTC (permalink / raw)
  To: intel-gfx

From: Chris Wilson <chris@chris-wilson.co.uk>

Currently we only configure the power gating for Skylake and above, but
the configuration should equally apply to Broadwell and Braswell. Even
though, there is not as much variation as for later generations, we want
to expose control over the configuration to userspace and may want to
opt out of the "always-enabled" setting.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
---
 drivers/gpu/drm/i915/intel_lrc.c | 7 -------
 1 file changed, 7 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 7fa0d654a4c2..8d1649899f12 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -1855,13 +1855,6 @@ make_rpcs(struct drm_i915_private *dev_priv)
 {
 	u32 rpcs = 0;
 
-	/*
-	 * No explicit RPCS request is needed to ensure full
-	 * slice/subslice/EU enablement prior to Gen9.
-	*/
-	if (INTEL_GEN(dev_priv) < 9)
-		return 0;
-
 	/*
 	 * Starting in Gen9, render power gating can leave
 	 * slice/subslice/EU in a partially enabled state. We
-- 
2.14.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [RFC PATCH 3/4] drm/i915: Record the sseu configuration per-context & engine
  2017-09-01 17:12 [RFC PATCH 0/4] drm/i915: enable userspace to program slice/subslice programming Lionel Landwerlin
  2017-09-01 17:12 ` [RFC PATCH 1/4] drm/i915: Record both min/max eu_per_subslice in sseu_dev_info Lionel Landwerlin
  2017-09-01 17:12 ` [RFC PATCH 2/4] drm/i915: Program RPCS for Broadwell Lionel Landwerlin
@ 2017-09-01 17:12 ` Lionel Landwerlin
  2017-09-01 17:12 ` [RFC PATCH 4/4] drm/i915: Expose RPCS (SSEU) configuration to userspace Lionel Landwerlin
                   ` (2 subsequent siblings)
  5 siblings, 0 replies; 10+ messages in thread
From: Lionel Landwerlin @ 2017-09-01 17:12 UTC (permalink / raw)
  To: intel-gfx

From: Chris Wilson <chris@chris-wilson.co.uk>

We want to expose the ability to reconfigure the slices, subslice and
eu per context and per engine. To facilitate that, store the current
configuration on the context for each engine, which is initially set
to the device default upon creation.

v2: record sseu configuration per context & engine (Chris)

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
---
 drivers/gpu/drm/i915/i915_drv.h         | 19 -------------------
 drivers/gpu/drm/i915/i915_gem_context.c |  6 ++++++
 drivers/gpu/drm/i915/i915_gem_context.h | 21 +++++++++++++++++++++
 drivers/gpu/drm/i915/intel_lrc.c        | 23 +++++++++--------------
 4 files changed, 36 insertions(+), 33 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 4eea89751608..43d83ffae2d3 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -800,25 +800,6 @@ struct intel_csr {
 	func(overlay_needs_physical); \
 	func(supports_tv);
 
-struct sseu_dev_info {
-	u8 slice_mask;
-	u8 subslice_mask;
-	u8 eu_total;
-	u8 min_eu_per_subslice;
-	u8 max_eu_per_subslice;
-	u8 min_eu_in_pool;
-	/* For each slice, which subslice(s) has(have) 7 EUs (bitfield)? */
-	u8 subslice_7eu[3];
-	u8 has_slice_pg:1;
-	u8 has_subslice_pg:1;
-	u8 has_eu_pg:1;
-};
-
-static inline unsigned int sseu_subslice_total(const struct sseu_dev_info *sseu)
-{
-	return hweight8(sseu->slice_mask) * hweight8(sseu->subslice_mask);
-}
-
 /* Keep in gen based order, and chronological order within a gen */
 enum intel_platform {
 	INTEL_PLATFORM_UNINITIALIZED = 0,
diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
index 58a2a44f88bd..97fcb01d70eb 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -257,6 +257,8 @@ __create_hw_context(struct drm_i915_private *dev_priv,
 		    struct drm_i915_file_private *file_priv)
 {
 	struct i915_gem_context *ctx;
+	struct intel_engine_cs *engine;
+	enum intel_engine_id id;
 	int ret;
 
 	ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
@@ -305,6 +307,10 @@ __create_hw_context(struct drm_i915_private *dev_priv,
 	 * is no remap info, it will be a NOP. */
 	ctx->remap_slice = ALL_L3_SLICES(dev_priv);
 
+	/* On all engines, use the whole device by default */
+	for_each_engine(engine, dev_priv, id)
+		ctx->engine[id].sseu = INTEL_INFO(dev_priv)->sseu;
+
 	i915_gem_context_set_bannable(ctx);
 	ctx->ring_size = 4 * PAGE_SIZE;
 	ctx->desc_template =
diff --git a/drivers/gpu/drm/i915/i915_gem_context.h b/drivers/gpu/drm/i915/i915_gem_context.h
index 44688e22a5c2..727b3b5bced1 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.h
+++ b/drivers/gpu/drm/i915/i915_gem_context.h
@@ -40,6 +40,25 @@ struct i915_hw_ppgtt;
 struct i915_vma;
 struct intel_ring;
 
+struct sseu_dev_info {
+	u8 slice_mask;
+	u8 subslice_mask;
+	u8 eu_total;
+	u8 min_eu_per_subslice;
+	u8 max_eu_per_subslice;
+	u8 min_eu_in_pool;
+	/* For each slice, which subslice(s) has(have) 7 EUs (bitfield)? */
+	u8 subslice_7eu[3];
+	u8 has_slice_pg:1;
+	u8 has_subslice_pg:1;
+	u8 has_eu_pg:1;
+};
+
+static inline unsigned int sseu_subslice_total(const struct sseu_dev_info *sseu)
+{
+	return hweight8(sseu->slice_mask) * hweight8(sseu->subslice_mask);
+}
+
 #define DEFAULT_CONTEXT_HANDLE 0
 
 /**
@@ -158,6 +177,8 @@ struct i915_gem_context {
 		u64 lrc_desc;
 		int pin_count;
 		bool initialised;
+		/** sseu: Control eu/slice partitioning */
+		struct sseu_dev_info sseu;
 	} engine[I915_NUM_ENGINES];
 
 	/** ring_size: size for allocating the per-engine ring buffer */
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 8d1649899f12..1693fd9d279b 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -1850,8 +1850,7 @@ int logical_xcs_ring_init(struct intel_engine_cs *engine)
 	return logical_ring_init(engine);
 }
 
-static u32
-make_rpcs(struct drm_i915_private *dev_priv)
+static u32 make_rpcs(const struct sseu_dev_info *sseu)
 {
 	u32 rpcs = 0;
 
@@ -1861,25 +1860,21 @@ make_rpcs(struct drm_i915_private *dev_priv)
 	 * must make an explicit request through RPCS for full
 	 * enablement.
 	*/
-	if (INTEL_INFO(dev_priv)->sseu.has_slice_pg) {
+	if (sseu->has_slice_pg) {
 		rpcs |= GEN8_RPCS_S_CNT_ENABLE;
-		rpcs |= hweight8(INTEL_INFO(dev_priv)->sseu.slice_mask) <<
-			GEN8_RPCS_S_CNT_SHIFT;
+		rpcs |= hweight8(sseu->slice_mask) << GEN8_RPCS_S_CNT_SHIFT;
 		rpcs |= GEN8_RPCS_ENABLE;
 	}
 
-	if (INTEL_INFO(dev_priv)->sseu.has_subslice_pg) {
+	if (sseu->has_subslice_pg) {
 		rpcs |= GEN8_RPCS_SS_CNT_ENABLE;
-		rpcs |= hweight8(INTEL_INFO(dev_priv)->sseu.subslice_mask) <<
-			GEN8_RPCS_SS_CNT_SHIFT;
+		rpcs |= hweight8(sseu->subslice_mask) << GEN8_RPCS_SS_CNT_SHIFT;
 		rpcs |= GEN8_RPCS_ENABLE;
 	}
 
-	if (INTEL_INFO(dev_priv)->sseu.has_eu_pg) {
-		rpcs |= INTEL_INFO(dev_priv)->sseu.min_eu_per_subslice <<
-			GEN8_RPCS_EU_MIN_SHIFT;
-		rpcs |= INTEL_INFO(dev_priv)->sseu.max_eu_per_subslice <<
-			GEN8_RPCS_EU_MAX_SHIFT;
+	if (sseu->has_eu_pg) {
+		rpcs |= sseu->min_eu_per_subslice << GEN8_RPCS_EU_MIN_SHIFT;
+		rpcs |= sseu->max_eu_per_subslice << GEN8_RPCS_EU_MAX_SHIFT;
 		rpcs |= GEN8_RPCS_ENABLE;
 	}
 
@@ -1993,7 +1988,7 @@ static void execlists_init_reg_state(u32 *regs,
 	if (rcs) {
 		regs[CTX_LRI_HEADER_2] = MI_LOAD_REGISTER_IMM(1);
 		CTX_REG(regs, CTX_R_PWR_CLK_STATE, GEN8_R_PWR_CLK_STATE,
-			make_rpcs(dev_priv));
+			make_rpcs(&ctx->engine[engine->id].sseu));
 
 		i915_oa_init_reg_state(engine, ctx, regs);
 	}
-- 
2.14.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [RFC PATCH 4/4] drm/i915: Expose RPCS (SSEU) configuration to userspace
  2017-09-01 17:12 [RFC PATCH 0/4] drm/i915: enable userspace to program slice/subslice programming Lionel Landwerlin
                   ` (2 preceding siblings ...)
  2017-09-01 17:12 ` [RFC PATCH 3/4] drm/i915: Record the sseu configuration per-context & engine Lionel Landwerlin
@ 2017-09-01 17:12 ` Lionel Landwerlin
  2017-09-01 18:58   ` Chris Wilson
  2017-09-04  6:49   ` Joonas Lahtinen
  2017-09-01 17:44 ` ✓ Fi.CI.BAT: success for drm/i915: enable userspace to program slice/subslice programming Patchwork
  2017-09-01 22:21 ` ✗ Fi.CI.IGT: failure " Patchwork
  5 siblings, 2 replies; 10+ messages in thread
From: Lionel Landwerlin @ 2017-09-01 17:12 UTC (permalink / raw)
  To: intel-gfx

From: Chris Wilson <chris@chris-wilson.co.uk>

We want to allow userspace to reconfigure the subslice configuration for
its own use case. To do so, we expose a context parameter to allow
adjustment of the RPCS register stored within the context image (and
currently not accessible via LRI). If the context is adjusted before
first use, the adjustment is for "free"; otherwise if the context is
active we flush the context off the GPU (stalling all users) and forcing
the GPU to save the context to memory where we can modify it and so
ensure that the register is reloaded on next execution.

The overhead of managing additional EU subslices can be significant,
especially in multi-context workloads. Non-GPGPU contexts should
preferably disable the subslices it is not using, and others should
fine-tune the number to match their workload.

We expose complete control over the RPCS register, allowing
configuration of slice/subslice, via masks packed into a u64 for
simplicity. For example,

	struct drm_i915_gem_context_param arg;

	memset(&arg, 0, sizeof(arg));
	arg.ctx_id = ctx;
	arg.param = I915_CONTEXT_PARAM_SSEU;
	if (drmIoctl(fd, DRM_IOCTL_I915_GEM_CONTEXT_GETPARAM, &arg) == 0) {
		union drm_i915_gem_context_param_sseu *sseu = &arg.value;

		sseu->packed.subslice_mask = 0;

		drmIoctl(fd, DRM_IOCTL_I915_GEM_CONTEXT_SETPARAM, &arg);
	}

could be used to disable all subslices where supported.

v2: Fix offset of CTX_R_PWR_CLK_STATE in intel_lr_context_set_sseu() (Lionel)

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=100899
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Cc: Dmitry Rogozhkin <dmitry.v.rogozhkin@intel.com>
CC: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
CC: Zhipeng Gong <zhipeng.gong@intel.com>
CC: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
---
 drivers/gpu/drm/i915/i915_gem_context.c | 11 ++++++
 drivers/gpu/drm/i915/intel_lrc.c        | 69 +++++++++++++++++++++++++++++++++
 drivers/gpu/drm/i915/intel_lrc.h        |  2 +
 include/uapi/drm/i915_drm.h             | 11 ++++++
 4 files changed, 93 insertions(+)

diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
index 97fcb01d70eb..d399b03f452c 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -1042,6 +1042,9 @@ int i915_gem_context_getparam_ioctl(struct drm_device *dev, void *data,
 	case I915_CONTEXT_PARAM_BANNABLE:
 		args->value = i915_gem_context_is_bannable(ctx);
 		break;
+	case I915_CONTEXT_PARAM_SSEU:
+		args->value = intel_lr_context_get_sseu(ctx);
+		break;
 	default:
 		ret = -EINVAL;
 		break;
@@ -1097,6 +1100,14 @@ int i915_gem_context_setparam_ioctl(struct drm_device *dev, void *data,
 		else
 			i915_gem_context_clear_bannable(ctx);
 		break;
+	case I915_CONTEXT_PARAM_SSEU:
+		if (args->size)
+			ret = -EINVAL;
+		else if (!i915.enable_execlists)
+			ret = -ENODEV;
+		else
+			ret = intel_lr_context_set_sseu(ctx, args->value);
+		break;
 	default:
 		ret = -EINVAL;
 		break;
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 1693fd9d279b..c063b84911d5 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -2122,3 +2122,72 @@ void intel_lr_context_resume(struct drm_i915_private *dev_priv)
 		}
 	}
 }
+
+int intel_lr_context_set_sseu(struct i915_gem_context *ctx, u64 value)
+{
+	union drm_i915_gem_context_param_sseu user = { .value = value };
+	struct drm_i915_private *i915 = ctx->i915;
+	struct intel_context *ce = &ctx->engine[RCS];
+	struct sseu_dev_info sseu = ctx->engine[RCS].sseu;
+	struct intel_engine_cs *engine;
+	enum intel_engine_id id;
+	int ret;
+
+	lockdep_assert_held(&i915->drm.struct_mutex);
+
+	sseu.slice_mask =
+		user.packed.slice_mask & INTEL_INFO(i915)->sseu.slice_mask;
+	sseu.subslice_mask =
+		user.packed.subslice_mask & INTEL_INFO(i915)->sseu.subslice_mask;
+	sseu.min_eu_per_subslice =
+		max(user.packed.min_eu_per_subslice,
+		    INTEL_INFO(i915)->sseu.min_eu_per_subslice);
+	sseu.max_eu_per_subslice =
+		min(user.packed.max_eu_per_subslice,
+		    INTEL_INFO(i915)->sseu.max_eu_per_subslice);
+
+	if (memcmp(&sseu, &ctx->engine[RCS].sseu, sizeof(sseu)) == 0)
+		return 0;
+
+	if (ce->pin_count) { /* Assume that the context is active! */
+		ret = i915_gem_switch_to_kernel_context(i915);
+		if (ret)
+			return ret;
+
+		ret = i915_gem_wait_for_idle(i915,
+					     I915_WAIT_INTERRUPTIBLE |
+					     I915_WAIT_LOCKED);
+		if (ret)
+			return ret;
+	}
+
+	if (ce->state) {
+		u32 *regs;
+
+		regs = i915_gem_object_pin_map(ce->state->obj, I915_MAP_WB) +
+			LRC_STATE_PN * PAGE_SIZE;
+		if (IS_ERR(regs))
+			return PTR_ERR(regs);
+
+		regs[CTX_R_PWR_CLK_STATE + 1] = make_rpcs(&sseu);
+		i915_gem_object_unpin_map(ce->state->obj);
+	}
+
+	for_each_engine(engine, i915, id)
+		ctx->engine[id].sseu = sseu;
+
+	return 0;
+}
+
+u64 intel_lr_context_get_sseu(struct i915_gem_context *ctx)
+{
+	union drm_i915_gem_context_param_sseu user;
+	const struct sseu_dev_info *sseu = &ctx->engine[RCS].sseu;
+
+	user.packed.slice_mask = sseu->slice_mask;
+	user.packed.subslice_mask = sseu->subslice_mask;
+	user.packed.min_eu_per_subslice = sseu->min_eu_per_subslice;
+	user.packed.max_eu_per_subslice = sseu->max_eu_per_subslice;
+
+	return user.value;
+}
diff --git a/drivers/gpu/drm/i915/intel_lrc.h b/drivers/gpu/drm/i915/intel_lrc.h
index 57ef5833c427..4ef6a6143f5d 100644
--- a/drivers/gpu/drm/i915/intel_lrc.h
+++ b/drivers/gpu/drm/i915/intel_lrc.h
@@ -80,6 +80,8 @@ struct i915_gem_context;
 void intel_lr_context_resume(struct drm_i915_private *dev_priv);
 uint64_t intel_lr_context_descriptor(struct i915_gem_context *ctx,
 				     struct intel_engine_cs *engine);
+int intel_lr_context_set_sseu(struct i915_gem_context *ctx, u64 value);
+u64 intel_lr_context_get_sseu(struct i915_gem_context *ctx);
 
 /* Execlists */
 int intel_sanitize_enable_execlists(struct drm_i915_private *dev_priv,
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index 6598fb76d2c2..f1e4d0c8c63b 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -1358,6 +1358,17 @@ struct drm_i915_gem_context_param {
 #define I915_CONTEXT_PARAM_GTT_SIZE	0x3
 #define I915_CONTEXT_PARAM_NO_ERROR_CAPTURE	0x4
 #define I915_CONTEXT_PARAM_BANNABLE	0x5
+#define I915_CONTEXT_PARAM_SSEU		0x6
+	__u64 value;
+};
+
+union drm_i915_gem_context_param_sseu {
+	struct {
+		__u8 slice_mask;
+		__u8 subslice_mask;
+		__u8 min_eu_per_subslice;
+		__u8 max_eu_per_subslice;
+	} packed;
 	__u64 value;
 };
 
-- 
2.14.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 10+ messages in thread

* ✓ Fi.CI.BAT: success for drm/i915: enable userspace to program slice/subslice programming
  2017-09-01 17:12 [RFC PATCH 0/4] drm/i915: enable userspace to program slice/subslice programming Lionel Landwerlin
                   ` (3 preceding siblings ...)
  2017-09-01 17:12 ` [RFC PATCH 4/4] drm/i915: Expose RPCS (SSEU) configuration to userspace Lionel Landwerlin
@ 2017-09-01 17:44 ` Patchwork
  2017-09-01 22:21 ` ✗ Fi.CI.IGT: failure " Patchwork
  5 siblings, 0 replies; 10+ messages in thread
From: Patchwork @ 2017-09-01 17:44 UTC (permalink / raw)
  To: Lionel Landwerlin; +Cc: intel-gfx

== Series Details ==

Series: drm/i915: enable userspace to program slice/subslice programming
URL   : https://patchwork.freedesktop.org/series/29715/
State : success

== Summary ==

Series 29715v1 drm/i915: enable userspace to program slice/subslice programming
https://patchwork.freedesktop.org/api/1.0/series/29715/revisions/1/mbox/

Test kms_cursor_legacy:
        Subgroup basic-busy-flip-before-cursor-atomic:
                pass       -> FAIL       (fi-snb-2600) fdo#100215 +1
Test kms_pipe_crc_basic:
        Subgroup suspend-read-crc-pipe-b:
                dmesg-warn -> PASS       (fi-byt-j1900) fdo#101705

fdo#100215 https://bugs.freedesktop.org/show_bug.cgi?id=100215
fdo#101705 https://bugs.freedesktop.org/show_bug.cgi?id=101705

fi-bdw-5557u     total:288  pass:268  dwarn:0   dfail:0   fail:0   skip:20  time:456s
fi-bdw-gvtdvm    total:288  pass:265  dwarn:0   dfail:0   fail:0   skip:23  time:441s
fi-blb-e6850     total:288  pass:224  dwarn:1   dfail:0   fail:0   skip:63  time:359s
fi-bsw-n3050     total:288  pass:243  dwarn:0   dfail:0   fail:0   skip:45  time:553s
fi-bwr-2160      total:288  pass:184  dwarn:0   dfail:0   fail:0   skip:104 time:253s
fi-bxt-j4205     total:288  pass:260  dwarn:0   dfail:0   fail:0   skip:28  time:532s
fi-byt-j1900     total:288  pass:255  dwarn:0   dfail:0   fail:0   skip:33  time:520s
fi-byt-n2820     total:288  pass:250  dwarn:1   dfail:0   fail:0   skip:37  time:518s
fi-elk-e7500     total:288  pass:230  dwarn:0   dfail:0   fail:0   skip:58  time:436s
fi-glk-2a        total:288  pass:260  dwarn:0   dfail:0   fail:0   skip:28  time:617s
fi-hsw-4770      total:288  pass:263  dwarn:0   dfail:0   fail:0   skip:25  time:445s
fi-hsw-4770r     total:288  pass:263  dwarn:0   dfail:0   fail:0   skip:25  time:424s
fi-ilk-650       total:288  pass:229  dwarn:0   dfail:0   fail:0   skip:59  time:424s
fi-ivb-3520m     total:288  pass:261  dwarn:0   dfail:0   fail:0   skip:27  time:506s
fi-ivb-3770      total:288  pass:260  dwarn:0   dfail:0   fail:1   skip:27  time:477s
fi-kbl-7500u     total:288  pass:264  dwarn:1   dfail:0   fail:0   skip:23  time:526s
fi-kbl-7560u     total:288  pass:269  dwarn:0   dfail:0   fail:0   skip:19  time:599s
fi-kbl-r         total:288  pass:261  dwarn:0   dfail:0   fail:0   skip:27  time:588s
fi-pnv-d510      total:288  pass:223  dwarn:1   dfail:0   fail:0   skip:64  time:520s
fi-skl-6260u     total:288  pass:269  dwarn:0   dfail:0   fail:0   skip:19  time:476s
fi-skl-6700k     total:288  pass:265  dwarn:0   dfail:0   fail:0   skip:23  time:528s
fi-skl-6770hq    total:288  pass:269  dwarn:0   dfail:0   fail:0   skip:19  time:490s
fi-skl-gvtdvm    total:288  pass:266  dwarn:0   dfail:0   fail:0   skip:22  time:445s
fi-skl-x1585l    total:288  pass:269  dwarn:0   dfail:0   fail:0   skip:19  time:514s
fi-snb-2520m     total:288  pass:251  dwarn:0   dfail:0   fail:0   skip:37  time:556s
fi-snb-2600      total:288  pass:249  dwarn:0   dfail:0   fail:1   skip:38  time:402s

5dafa147cc44e79db01227901d4503d2cb7beca5 drm-tip: 2017y-09m-01d-16h-49m-20s UTC integration manifest
f58923254518 drm/i915: Expose RPCS (SSEU) configuration to userspace
aa69bc43caf4 drm/i915: Record the sseu configuration per-context & engine
67b33ada02aa drm/i915: Program RPCS for Broadwell
118570ddc0d4 drm/i915: Record both min/max eu_per_subslice in sseu_dev_info

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_5569/
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [RFC PATCH 4/4] drm/i915: Expose RPCS (SSEU) configuration to userspace
  2017-09-01 17:12 ` [RFC PATCH 4/4] drm/i915: Expose RPCS (SSEU) configuration to userspace Lionel Landwerlin
@ 2017-09-01 18:58   ` Chris Wilson
  2017-09-21 17:16     ` Lionel Landwerlin
  2017-09-04  6:49   ` Joonas Lahtinen
  1 sibling, 1 reply; 10+ messages in thread
From: Chris Wilson @ 2017-09-01 18:58 UTC (permalink / raw)
  To: Lionel Landwerlin, intel-gfx

Quoting Lionel Landwerlin (2017-09-01 18:12:30)
> From: Chris Wilson <chris@chris-wilson.co.uk>
> 
> We want to allow userspace to reconfigure the subslice configuration for
> its own use case. To do so, we expose a context parameter to allow
> adjustment of the RPCS register stored within the context image (and
> currently not accessible via LRI). If the context is adjusted before
> first use, the adjustment is for "free"; otherwise if the context is
> active we flush the context off the GPU (stalling all users) and forcing
> the GPU to save the context to memory where we can modify it and so
> ensure that the register is reloaded on next execution.
> 
> The overhead of managing additional EU subslices can be significant,
> especially in multi-context workloads. Non-GPGPU contexts should
> preferably disable the subslices it is not using, and others should
> fine-tune the number to match their workload.
> 
> We expose complete control over the RPCS register, allowing
> configuration of slice/subslice, via masks packed into a u64 for
> simplicity. For example,
> 
>         struct drm_i915_gem_context_param arg;
> 
>         memset(&arg, 0, sizeof(arg));
>         arg.ctx_id = ctx;
>         arg.param = I915_CONTEXT_PARAM_SSEU;
>         if (drmIoctl(fd, DRM_IOCTL_I915_GEM_CONTEXT_GETPARAM, &arg) == 0) {
>                 union drm_i915_gem_context_param_sseu *sseu = &arg.value;
> 
>                 sseu->packed.subslice_mask = 0;
> 
>                 drmIoctl(fd, DRM_IOCTL_I915_GEM_CONTEXT_SETPARAM, &arg);
>         }
> 
> could be used to disable all subslices where supported.
> 
> v2: Fix offset of CTX_R_PWR_CLK_STATE in intel_lr_context_set_sseu() (Lionel)
> 
> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=100899
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
> Cc: Dmitry Rogozhkin <dmitry.v.rogozhkin@intel.com>
> CC: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> CC: Zhipeng Gong <zhipeng.gong@intel.com>
> CC: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
> ---
>  drivers/gpu/drm/i915/i915_gem_context.c | 11 ++++++
>  drivers/gpu/drm/i915/intel_lrc.c        | 69 +++++++++++++++++++++++++++++++++
>  drivers/gpu/drm/i915/intel_lrc.h        |  2 +
>  include/uapi/drm/i915_drm.h             | 11 ++++++
>  4 files changed, 93 insertions(+)
> 
> diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
> index 97fcb01d70eb..d399b03f452c 100644
> --- a/drivers/gpu/drm/i915/i915_gem_context.c
> +++ b/drivers/gpu/drm/i915/i915_gem_context.c
> @@ -1042,6 +1042,9 @@ int i915_gem_context_getparam_ioctl(struct drm_device *dev, void *data,
>         case I915_CONTEXT_PARAM_BANNABLE:
>                 args->value = i915_gem_context_is_bannable(ctx);
>                 break;
> +       case I915_CONTEXT_PARAM_SSEU:
> +               args->value = intel_lr_context_get_sseu(ctx);
> +               break;
>         default:
>                 ret = -EINVAL;
>                 break;
> @@ -1097,6 +1100,14 @@ int i915_gem_context_setparam_ioctl(struct drm_device *dev, void *data,
>                 else
>                         i915_gem_context_clear_bannable(ctx);
>                 break;
> +       case I915_CONTEXT_PARAM_SSEU:
> +               if (args->size)
> +                       ret = -EINVAL;
> +               else if (!i915.enable_execlists)
> +                       ret = -ENODEV;
> +               else
> +                       ret = intel_lr_context_set_sseu(ctx, args->value);
> +               break;
>         default:
>                 ret = -EINVAL;
>                 break;
> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> index 1693fd9d279b..c063b84911d5 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/intel_lrc.c
> @@ -2122,3 +2122,72 @@ void intel_lr_context_resume(struct drm_i915_private *dev_priv)
>                 }
>         }
>  }
> +
> +int intel_lr_context_set_sseu(struct i915_gem_context *ctx, u64 value)
> +{
> +       union drm_i915_gem_context_param_sseu user = { .value = value };
> +       struct drm_i915_private *i915 = ctx->i915;
> +       struct intel_context *ce = &ctx->engine[RCS];
> +       struct sseu_dev_info sseu = ctx->engine[RCS].sseu;
> +       struct intel_engine_cs *engine;
> +       enum intel_engine_id id;
> +       int ret;

I have a note saying that we want to pass in (engine, class), and so
forgo the packed value and switch to an array of structs.
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 10+ messages in thread

* ✗ Fi.CI.IGT: failure for drm/i915: enable userspace to program slice/subslice programming
  2017-09-01 17:12 [RFC PATCH 0/4] drm/i915: enable userspace to program slice/subslice programming Lionel Landwerlin
                   ` (4 preceding siblings ...)
  2017-09-01 17:44 ` ✓ Fi.CI.BAT: success for drm/i915: enable userspace to program slice/subslice programming Patchwork
@ 2017-09-01 22:21 ` Patchwork
  5 siblings, 0 replies; 10+ messages in thread
From: Patchwork @ 2017-09-01 22:21 UTC (permalink / raw)
  To: Lionel Landwerlin; +Cc: intel-gfx

== Series Details ==

Series: drm/i915: enable userspace to program slice/subslice programming
URL   : https://patchwork.freedesktop.org/series/29715/
State : failure

== Summary ==

Test perf:
        Subgroup blocking:
                fail       -> PASS       (shard-hsw) fdo#102252
Test gem_ctx_param:
        Subgroup invalid-param-get:
                pass       -> FAIL       (shard-hsw)
        Subgroup invalid-param-set:
                pass       -> FAIL       (shard-hsw)

fdo#102252 https://bugs.freedesktop.org/show_bug.cgi?id=102252

shard-hsw        total:2263 pass:1232 dwarn:0   dfail:0   fail:15  skip:1015 time:9461s

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_5569/shards.html
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [RFC PATCH 4/4] drm/i915: Expose RPCS (SSEU) configuration to userspace
  2017-09-01 17:12 ` [RFC PATCH 4/4] drm/i915: Expose RPCS (SSEU) configuration to userspace Lionel Landwerlin
  2017-09-01 18:58   ` Chris Wilson
@ 2017-09-04  6:49   ` Joonas Lahtinen
  1 sibling, 0 replies; 10+ messages in thread
From: Joonas Lahtinen @ 2017-09-04  6:49 UTC (permalink / raw)
  To: Lionel Landwerlin, intel-gfx

On Fri, 2017-09-01 at 18:12 +0100, Lionel Landwerlin wrote:
> From: Chris Wilson <chris@chris-wilson.co.uk>
> 
> We want to allow userspace to reconfigure the subslice configuration for
> its own use case. To do so, we expose a context parameter to allow
> adjustment of the RPCS register stored within the context image (and
> currently not accessible via LRI). If the context is adjusted before
> first use, the adjustment is for "free"; otherwise if the context is
> active we flush the context off the GPU (stalling all users) and forcing
> the GPU to save the context to memory where we can modify it and so
> ensure that the register is reloaded on next execution.
> 
> The overhead of managing additional EU subslices can be significant,
> especially in multi-context workloads. Non-GPGPU contexts should
> preferably disable the subslices it is not using, and others should
> fine-tune the number to match their workload.
> 
> We expose complete control over the RPCS register, allowing
> configuration of slice/subslice, via masks packed into a u64 for
> simplicity. For example,
> 
> 	struct drm_i915_gem_context_param arg;
> 
> 	memset(&arg, 0, sizeof(arg));
> 	arg.ctx_id = ctx;
> 	arg.param = I915_CONTEXT_PARAM_SSEU;
> 	if (drmIoctl(fd, DRM_IOCTL_I915_GEM_CONTEXT_GETPARAM, &arg) == 0) {
> 		union drm_i915_gem_context_param_sseu *sseu = &arg.value;
> 
> 		sseu->packed.subslice_mask = 0;
> 
> 		drmIoctl(fd, DRM_IOCTL_I915_GEM_CONTEXT_SETPARAM, &arg);
> 	}
> 
> could be used to disable all subslices where supported.
> 
> v2: Fix offset of CTX_R_PWR_CLK_STATE in intel_lr_context_set_sseu() (Lionel)
> 
> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=100899
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
> Cc: Dmitry Rogozhkin <dmitry.v.rogozhkin@intel.com>
> CC: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> CC: Zhipeng Gong <zhipeng.gong@intel.com>
> CC: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>

Please do link to the userspace patches, will be easier to track them
that way.

Regards, Joonas
-- 
Joonas Lahtinen
Open Source Technology Center
Intel Corporation
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [RFC PATCH 4/4] drm/i915: Expose RPCS (SSEU) configuration to userspace
  2017-09-01 18:58   ` Chris Wilson
@ 2017-09-21 17:16     ` Lionel Landwerlin
  0 siblings, 0 replies; 10+ messages in thread
From: Lionel Landwerlin @ 2017-09-21 17:16 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx

On 01/09/17 19:58, Chris Wilson wrote:
> Quoting Lionel Landwerlin (2017-09-01 18:12:30)
>> From: Chris Wilson <chris@chris-wilson.co.uk>
>>
>> We want to allow userspace to reconfigure the subslice configuration for
>> its own use case. To do so, we expose a context parameter to allow
>> adjustment of the RPCS register stored within the context image (and
>> currently not accessible via LRI). If the context is adjusted before
>> first use, the adjustment is for "free"; otherwise if the context is
>> active we flush the context off the GPU (stalling all users) and forcing
>> the GPU to save the context to memory where we can modify it and so
>> ensure that the register is reloaded on next execution.
>>
>> The overhead of managing additional EU subslices can be significant,
>> especially in multi-context workloads. Non-GPGPU contexts should
>> preferably disable the subslices it is not using, and others should
>> fine-tune the number to match their workload.
>>
>> We expose complete control over the RPCS register, allowing
>> configuration of slice/subslice, via masks packed into a u64 for
>> simplicity. For example,
>>
>>          struct drm_i915_gem_context_param arg;
>>
>>          memset(&arg, 0, sizeof(arg));
>>          arg.ctx_id = ctx;
>>          arg.param = I915_CONTEXT_PARAM_SSEU;
>>          if (drmIoctl(fd, DRM_IOCTL_I915_GEM_CONTEXT_GETPARAM, &arg) == 0) {
>>                  union drm_i915_gem_context_param_sseu *sseu = &arg.value;
>>
>>                  sseu->packed.subslice_mask = 0;
>>
>>                  drmIoctl(fd, DRM_IOCTL_I915_GEM_CONTEXT_SETPARAM, &arg);
>>          }
>>
>> could be used to disable all subslices where supported.
>>
>> v2: Fix offset of CTX_R_PWR_CLK_STATE in intel_lr_context_set_sseu() (Lionel)
>>
>> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=100899
>> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
>> Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
>> Cc: Dmitry Rogozhkin <dmitry.v.rogozhkin@intel.com>
>> CC: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>> CC: Zhipeng Gong <zhipeng.gong@intel.com>
>> CC: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
>> ---
>>   drivers/gpu/drm/i915/i915_gem_context.c | 11 ++++++
>>   drivers/gpu/drm/i915/intel_lrc.c        | 69 +++++++++++++++++++++++++++++++++
>>   drivers/gpu/drm/i915/intel_lrc.h        |  2 +
>>   include/uapi/drm/i915_drm.h             | 11 ++++++
>>   4 files changed, 93 insertions(+)
>>
>> diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
>> index 97fcb01d70eb..d399b03f452c 100644
>> --- a/drivers/gpu/drm/i915/i915_gem_context.c
>> +++ b/drivers/gpu/drm/i915/i915_gem_context.c
>> @@ -1042,6 +1042,9 @@ int i915_gem_context_getparam_ioctl(struct drm_device *dev, void *data,
>>          case I915_CONTEXT_PARAM_BANNABLE:
>>                  args->value = i915_gem_context_is_bannable(ctx);
>>                  break;
>> +       case I915_CONTEXT_PARAM_SSEU:
>> +               args->value = intel_lr_context_get_sseu(ctx);
>> +               break;
>>          default:
>>                  ret = -EINVAL;
>>                  break;
>> @@ -1097,6 +1100,14 @@ int i915_gem_context_setparam_ioctl(struct drm_device *dev, void *data,
>>                  else
>>                          i915_gem_context_clear_bannable(ctx);
>>                  break;
>> +       case I915_CONTEXT_PARAM_SSEU:
>> +               if (args->size)
>> +                       ret = -EINVAL;
>> +               else if (!i915.enable_execlists)
>> +                       ret = -ENODEV;
>> +               else
>> +                       ret = intel_lr_context_set_sseu(ctx, args->value);
>> +               break;
>>          default:
>>                  ret = -EINVAL;
>>                  break;
>> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
>> index 1693fd9d279b..c063b84911d5 100644
>> --- a/drivers/gpu/drm/i915/intel_lrc.c
>> +++ b/drivers/gpu/drm/i915/intel_lrc.c
>> @@ -2122,3 +2122,72 @@ void intel_lr_context_resume(struct drm_i915_private *dev_priv)
>>                  }
>>          }
>>   }
>> +
>> +int intel_lr_context_set_sseu(struct i915_gem_context *ctx, u64 value)
>> +{
>> +       union drm_i915_gem_context_param_sseu user = { .value = value };
>> +       struct drm_i915_private *i915 = ctx->i915;
>> +       struct intel_context *ce = &ctx->engine[RCS];
>> +       struct sseu_dev_info sseu = ctx->engine[RCS].sseu;
>> +       struct intel_engine_cs *engine;
>> +       enum intel_engine_id id;
>> +       int ret;
> I have a note saying that we want to pass in (engine, class), and so
> forgo the packed value and switch to an array of structs.
> -Chris
>
Not sure what you meant by class. I've used the same flags as the 
execbuff2 field.

I'm programming only the RCS (since documentation says that's the only 
thing that is allowed).
But reading the R_PWR_CLK_STATE from other engine seems to give 
incoherent results...
Would you expect that?

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 10+ messages in thread

end of thread, other threads:[~2017-09-21 17:16 UTC | newest]

Thread overview: 10+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-09-01 17:12 [RFC PATCH 0/4] drm/i915: enable userspace to program slice/subslice programming Lionel Landwerlin
2017-09-01 17:12 ` [RFC PATCH 1/4] drm/i915: Record both min/max eu_per_subslice in sseu_dev_info Lionel Landwerlin
2017-09-01 17:12 ` [RFC PATCH 2/4] drm/i915: Program RPCS for Broadwell Lionel Landwerlin
2017-09-01 17:12 ` [RFC PATCH 3/4] drm/i915: Record the sseu configuration per-context & engine Lionel Landwerlin
2017-09-01 17:12 ` [RFC PATCH 4/4] drm/i915: Expose RPCS (SSEU) configuration to userspace Lionel Landwerlin
2017-09-01 18:58   ` Chris Wilson
2017-09-21 17:16     ` Lionel Landwerlin
2017-09-04  6:49   ` Joonas Lahtinen
2017-09-01 17:44 ` ✓ Fi.CI.BAT: success for drm/i915: enable userspace to program slice/subslice programming Patchwork
2017-09-01 22:21 ` ✗ Fi.CI.IGT: failure " Patchwork

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.