* [RFC PATCH 1/4] drm/i915: Record both min/max eu_per_subslice in sseu_dev_info
2017-09-01 17:12 [RFC PATCH 0/4] drm/i915: enable userspace to program slice/subslice programming Lionel Landwerlin
@ 2017-09-01 17:12 ` Lionel Landwerlin
2017-09-01 17:12 ` [RFC PATCH 2/4] drm/i915: Program RPCS for Broadwell Lionel Landwerlin
` (4 subsequent siblings)
5 siblings, 0 replies; 10+ messages in thread
From: Lionel Landwerlin @ 2017-09-01 17:12 UTC (permalink / raw)
To: intel-gfx
From: Chris Wilson <chris@chris-wilson.co.uk>
When we query the available eu on each subslice, we currently only
report the max. It would also be useful to report the minimum found as
well.
When we set RPCS (power gating over the EU), we can also specify both
the min and max number of eu to configure on each slice; currently we
just set it to a single value, but the flexibility may be beneficial in
future.
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
---
drivers/gpu/drm/i915/i915_debugfs.c | 36 +++++++++++++++++++++++---------
drivers/gpu/drm/i915/i915_drv.h | 3 ++-
drivers/gpu/drm/i915/intel_device_info.c | 32 +++++++++++++++++-----------
drivers/gpu/drm/i915/intel_lrc.c | 4 ++--
4 files changed, 50 insertions(+), 25 deletions(-)
diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index 48572b157222..ac3749f5a7a0 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -4491,6 +4491,7 @@ DEFINE_SIMPLE_ATTRIBUTE(i915_cache_sharing_fops,
static void cherryview_sseu_device_status(struct drm_i915_private *dev_priv,
struct sseu_dev_info *sseu)
{
+ unsigned int min_eu_per_subslice, max_eu_per_subslice;
int ss_max = 2;
int ss;
u32 sig1[ss_max], sig2[ss_max];
@@ -4500,6 +4501,9 @@ static void cherryview_sseu_device_status(struct drm_i915_private *dev_priv,
sig2[0] = I915_READ(CHV_POWER_SS0_SIG2);
sig2[1] = I915_READ(CHV_POWER_SS1_SIG2);
+ min_eu_per_subslice = ~0u;
+ max_eu_per_subslice = 0;
+
for (ss = 0; ss < ss_max; ss++) {
unsigned int eu_cnt;
@@ -4514,14 +4518,18 @@ static void cherryview_sseu_device_status(struct drm_i915_private *dev_priv,
((sig1[ss] & CHV_EU210_PG_ENABLE) ? 0 : 2) +
((sig2[ss] & CHV_EU311_PG_ENABLE) ? 0 : 2);
sseu->eu_total += eu_cnt;
- sseu->eu_per_subslice = max_t(unsigned int,
- sseu->eu_per_subslice, eu_cnt);
+ min_eu_per_subslice = min(min_eu_per_subslice, eu_cnt);
+ max_eu_per_subslice = max(max_eu_per_subslice, eu_cnt);
}
+
+ sseu->min_eu_per_subslice = min_eu_per_subslice;
+ sseu->max_eu_per_subslice = max_eu_per_subslice;
}
static void gen9_sseu_device_status(struct drm_i915_private *dev_priv,
struct sseu_dev_info *sseu)
{
+ unsigned int min_eu_per_subslice, max_eu_per_subslice;
int s_max = 3, ss_max = 4;
int s, ss;
u32 s_reg[s_max], eu_reg[2*s_max], eu_mask[2];
@@ -4547,6 +4555,9 @@ static void gen9_sseu_device_status(struct drm_i915_private *dev_priv,
GEN9_PGCTL_SSB_EU210_ACK |
GEN9_PGCTL_SSB_EU311_ACK;
+ min_eu_per_subslice = ~0u;
+ max_eu_per_subslice = 0;
+
for (s = 0; s < s_max; s++) {
if ((s_reg[s] & GEN9_PGCTL_SLICE_ACK) == 0)
/* skip disabled slice */
@@ -4572,11 +4583,14 @@ static void gen9_sseu_device_status(struct drm_i915_private *dev_priv,
eu_cnt = 2 * hweight32(eu_reg[2*s + ss/2] &
eu_mask[ss%2]);
sseu->eu_total += eu_cnt;
- sseu->eu_per_subslice = max_t(unsigned int,
- sseu->eu_per_subslice,
- eu_cnt);
+
+ min_eu_per_subslice = min(min_eu_per_subslice, eu_cnt);
+ max_eu_per_subslice = max(max_eu_per_subslice, eu_cnt);
}
}
+
+ sseu->min_eu_per_subslice = min_eu_per_subslice;
+ sseu->max_eu_per_subslice = max_eu_per_subslice;
}
static void broadwell_sseu_device_status(struct drm_i915_private *dev_priv,
@@ -4589,9 +4603,11 @@ static void broadwell_sseu_device_status(struct drm_i915_private *dev_priv,
if (sseu->slice_mask) {
sseu->subslice_mask = INTEL_INFO(dev_priv)->sseu.subslice_mask;
- sseu->eu_per_subslice =
- INTEL_INFO(dev_priv)->sseu.eu_per_subslice;
- sseu->eu_total = sseu->eu_per_subslice *
+ sseu->min_eu_per_subslice =
+ INTEL_INFO(dev_priv)->sseu.min_eu_per_subslice;
+ sseu->max_eu_per_subslice =
+ INTEL_INFO(dev_priv)->sseu.max_eu_per_subslice;
+ sseu->eu_total = sseu->max_eu_per_subslice *
sseu_subslice_total(sseu);
/* subtract fused off EU(s) from enabled slice(s) */
@@ -4622,8 +4638,8 @@ static void i915_print_sseu_info(struct seq_file *m, bool is_available_info,
hweight8(sseu->subslice_mask));
seq_printf(m, " %s EU Total: %u\n", type,
sseu->eu_total);
- seq_printf(m, " %s EU Per Subslice: %u\n", type,
- sseu->eu_per_subslice);
+ seq_printf(m, " %s EU Per Subslice: [%u, %u]\n", type,
+ sseu->min_eu_per_subslice, sseu->max_eu_per_subslice);
if (!is_available_info)
return;
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index cfe86d78ffd2..4eea89751608 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -804,7 +804,8 @@ struct sseu_dev_info {
u8 slice_mask;
u8 subslice_mask;
u8 eu_total;
- u8 eu_per_subslice;
+ u8 min_eu_per_subslice;
+ u8 max_eu_per_subslice;
u8 min_eu_in_pool;
/* For each slice, which subslice(s) has(have) 7 EUs (bitfield)? */
u8 subslice_7eu[3];
diff --git a/drivers/gpu/drm/i915/intel_device_info.c b/drivers/gpu/drm/i915/intel_device_info.c
index 5f91ddc78c7a..05ef5e1b1750 100644
--- a/drivers/gpu/drm/i915/intel_device_info.c
+++ b/drivers/gpu/drm/i915/intel_device_info.c
@@ -85,6 +85,7 @@ void intel_device_info_dump(struct drm_i915_private *dev_priv)
static void cherryview_sseu_info_init(struct drm_i915_private *dev_priv)
{
struct sseu_dev_info *sseu = &mkwrite_device_info(dev_priv)->sseu;
+ unsigned int eu_per_subslice;
u32 fuse, eu_dis;
fuse = I915_READ(CHV_FUSE_GT);
@@ -109,9 +110,10 @@ static void cherryview_sseu_info_init(struct drm_i915_private *dev_priv)
* CHV expected to always have a uniform distribution of EU
* across subslices.
*/
- sseu->eu_per_subslice = sseu_subslice_total(sseu) ?
- sseu->eu_total / sseu_subslice_total(sseu) :
- 0;
+ eu_per_subslice = sseu_subslice_total(sseu) ?
+ sseu->eu_total / sseu_subslice_total(sseu) : 0;
+ sseu->min_eu_per_subslice = eu_per_subslice;
+ sseu->max_eu_per_subslice = eu_per_subslice;
/*
* CHV supports subslice power gating on devices with more than
* one subslice, and supports EU power gating on devices with
@@ -119,13 +121,14 @@ static void cherryview_sseu_info_init(struct drm_i915_private *dev_priv)
*/
sseu->has_slice_pg = 0;
sseu->has_subslice_pg = sseu_subslice_total(sseu) > 1;
- sseu->has_eu_pg = (sseu->eu_per_subslice > 2);
+ sseu->has_eu_pg = eu_per_subslice > 2;
}
static void gen9_sseu_info_init(struct drm_i915_private *dev_priv)
{
struct intel_device_info *info = mkwrite_device_info(dev_priv);
struct sseu_dev_info *sseu = &info->sseu;
+ unsigned int eu_per_subslice;
int s_max = 3, ss_max = 4, eu_max = 8;
int s, ss;
u32 fuse2, eu_disable;
@@ -181,9 +184,10 @@ static void gen9_sseu_info_init(struct drm_i915_private *dev_priv)
* recovery. BXT is expected to be perfectly uniform in EU
* distribution.
*/
- sseu->eu_per_subslice = sseu_subslice_total(sseu) ?
- DIV_ROUND_UP(sseu->eu_total,
- sseu_subslice_total(sseu)) : 0;
+ eu_per_subslice = sseu_subslice_total(sseu) ?
+ DIV_ROUND_UP(sseu->eu_total, sseu_subslice_total(sseu)) : 0;
+ sseu->min_eu_per_subslice = eu_per_subslice;
+ sseu->max_eu_per_subslice = eu_per_subslice;
/*
* SKL+ supports slice power gating on devices with more than
* one slice, and supports EU power gating on devices with
@@ -196,7 +200,7 @@ static void gen9_sseu_info_init(struct drm_i915_private *dev_priv)
!IS_GEN9_LP(dev_priv) && hweight8(sseu->slice_mask) > 1;
sseu->has_subslice_pg =
IS_GEN9_LP(dev_priv) && sseu_subslice_total(sseu) > 1;
- sseu->has_eu_pg = sseu->eu_per_subslice > 2;
+ sseu->has_eu_pg = eu_per_subslice > 2;
if (IS_GEN9_LP(dev_priv)) {
#define IS_SS_DISABLED(ss) (!(sseu->subslice_mask & BIT(ss)))
@@ -229,6 +233,7 @@ static void broadwell_sseu_info_init(struct drm_i915_private *dev_priv)
{
struct sseu_dev_info *sseu = &mkwrite_device_info(dev_priv)->sseu;
const int s_max = 3, ss_max = 3, eu_max = 8;
+ unsigned int eu_per_subslice;
int s, ss;
u32 fuse2, eu_disable[3]; /* s_max */
@@ -283,9 +288,10 @@ static void broadwell_sseu_info_init(struct drm_i915_private *dev_priv)
* subslices with the exception that any one EU in any one subslice may
* be fused off for die recovery.
*/
- sseu->eu_per_subslice = sseu_subslice_total(sseu) ?
- DIV_ROUND_UP(sseu->eu_total,
- sseu_subslice_total(sseu)) : 0;
+ eu_per_subslice = sseu_subslice_total(sseu) ?
+ DIV_ROUND_UP(sseu->eu_total, sseu_subslice_total(sseu)) : 0;
+ sseu->min_eu_per_subslice = eu_per_subslice;
+ sseu->max_eu_per_subslice = eu_per_subslice;
/*
* BDW supports slice power gating on devices with more than
@@ -422,7 +428,9 @@ void intel_device_info_runtime_init(struct drm_i915_private *dev_priv)
DRM_DEBUG_DRIVER("subslice per slice: %u\n",
hweight8(info->sseu.subslice_mask));
DRM_DEBUG_DRIVER("EU total: %u\n", info->sseu.eu_total);
- DRM_DEBUG_DRIVER("EU per subslice: %u\n", info->sseu.eu_per_subslice);
+ DRM_DEBUG_DRIVER("EU per subslice: [%u, %u]\n",
+ info->sseu.min_eu_per_subslice,
+ info->sseu.max_eu_per_subslice);
DRM_DEBUG_DRIVER("has slice power gating: %s\n",
info->sseu.has_slice_pg ? "y" : "n");
DRM_DEBUG_DRIVER("has subslice power gating: %s\n",
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index d89e1b8e1cc5..7fa0d654a4c2 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -1883,9 +1883,9 @@ make_rpcs(struct drm_i915_private *dev_priv)
}
if (INTEL_INFO(dev_priv)->sseu.has_eu_pg) {
- rpcs |= INTEL_INFO(dev_priv)->sseu.eu_per_subslice <<
+ rpcs |= INTEL_INFO(dev_priv)->sseu.min_eu_per_subslice <<
GEN8_RPCS_EU_MIN_SHIFT;
- rpcs |= INTEL_INFO(dev_priv)->sseu.eu_per_subslice <<
+ rpcs |= INTEL_INFO(dev_priv)->sseu.max_eu_per_subslice <<
GEN8_RPCS_EU_MAX_SHIFT;
rpcs |= GEN8_RPCS_ENABLE;
}
--
2.14.1
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply related [flat|nested] 10+ messages in thread
* [RFC PATCH 3/4] drm/i915: Record the sseu configuration per-context & engine
2017-09-01 17:12 [RFC PATCH 0/4] drm/i915: enable userspace to program slice/subslice programming Lionel Landwerlin
2017-09-01 17:12 ` [RFC PATCH 1/4] drm/i915: Record both min/max eu_per_subslice in sseu_dev_info Lionel Landwerlin
2017-09-01 17:12 ` [RFC PATCH 2/4] drm/i915: Program RPCS for Broadwell Lionel Landwerlin
@ 2017-09-01 17:12 ` Lionel Landwerlin
2017-09-01 17:12 ` [RFC PATCH 4/4] drm/i915: Expose RPCS (SSEU) configuration to userspace Lionel Landwerlin
` (2 subsequent siblings)
5 siblings, 0 replies; 10+ messages in thread
From: Lionel Landwerlin @ 2017-09-01 17:12 UTC (permalink / raw)
To: intel-gfx
From: Chris Wilson <chris@chris-wilson.co.uk>
We want to expose the ability to reconfigure the slices, subslice and
eu per context and per engine. To facilitate that, store the current
configuration on the context for each engine, which is initially set
to the device default upon creation.
v2: record sseu configuration per context & engine (Chris)
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
---
drivers/gpu/drm/i915/i915_drv.h | 19 -------------------
drivers/gpu/drm/i915/i915_gem_context.c | 6 ++++++
drivers/gpu/drm/i915/i915_gem_context.h | 21 +++++++++++++++++++++
drivers/gpu/drm/i915/intel_lrc.c | 23 +++++++++--------------
4 files changed, 36 insertions(+), 33 deletions(-)
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 4eea89751608..43d83ffae2d3 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -800,25 +800,6 @@ struct intel_csr {
func(overlay_needs_physical); \
func(supports_tv);
-struct sseu_dev_info {
- u8 slice_mask;
- u8 subslice_mask;
- u8 eu_total;
- u8 min_eu_per_subslice;
- u8 max_eu_per_subslice;
- u8 min_eu_in_pool;
- /* For each slice, which subslice(s) has(have) 7 EUs (bitfield)? */
- u8 subslice_7eu[3];
- u8 has_slice_pg:1;
- u8 has_subslice_pg:1;
- u8 has_eu_pg:1;
-};
-
-static inline unsigned int sseu_subslice_total(const struct sseu_dev_info *sseu)
-{
- return hweight8(sseu->slice_mask) * hweight8(sseu->subslice_mask);
-}
-
/* Keep in gen based order, and chronological order within a gen */
enum intel_platform {
INTEL_PLATFORM_UNINITIALIZED = 0,
diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
index 58a2a44f88bd..97fcb01d70eb 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -257,6 +257,8 @@ __create_hw_context(struct drm_i915_private *dev_priv,
struct drm_i915_file_private *file_priv)
{
struct i915_gem_context *ctx;
+ struct intel_engine_cs *engine;
+ enum intel_engine_id id;
int ret;
ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
@@ -305,6 +307,10 @@ __create_hw_context(struct drm_i915_private *dev_priv,
* is no remap info, it will be a NOP. */
ctx->remap_slice = ALL_L3_SLICES(dev_priv);
+ /* On all engines, use the whole device by default */
+ for_each_engine(engine, dev_priv, id)
+ ctx->engine[id].sseu = INTEL_INFO(dev_priv)->sseu;
+
i915_gem_context_set_bannable(ctx);
ctx->ring_size = 4 * PAGE_SIZE;
ctx->desc_template =
diff --git a/drivers/gpu/drm/i915/i915_gem_context.h b/drivers/gpu/drm/i915/i915_gem_context.h
index 44688e22a5c2..727b3b5bced1 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.h
+++ b/drivers/gpu/drm/i915/i915_gem_context.h
@@ -40,6 +40,25 @@ struct i915_hw_ppgtt;
struct i915_vma;
struct intel_ring;
+struct sseu_dev_info {
+ u8 slice_mask;
+ u8 subslice_mask;
+ u8 eu_total;
+ u8 min_eu_per_subslice;
+ u8 max_eu_per_subslice;
+ u8 min_eu_in_pool;
+ /* For each slice, which subslice(s) has(have) 7 EUs (bitfield)? */
+ u8 subslice_7eu[3];
+ u8 has_slice_pg:1;
+ u8 has_subslice_pg:1;
+ u8 has_eu_pg:1;
+};
+
+static inline unsigned int sseu_subslice_total(const struct sseu_dev_info *sseu)
+{
+ return hweight8(sseu->slice_mask) * hweight8(sseu->subslice_mask);
+}
+
#define DEFAULT_CONTEXT_HANDLE 0
/**
@@ -158,6 +177,8 @@ struct i915_gem_context {
u64 lrc_desc;
int pin_count;
bool initialised;
+ /** sseu: Control eu/slice partitioning */
+ struct sseu_dev_info sseu;
} engine[I915_NUM_ENGINES];
/** ring_size: size for allocating the per-engine ring buffer */
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 8d1649899f12..1693fd9d279b 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -1850,8 +1850,7 @@ int logical_xcs_ring_init(struct intel_engine_cs *engine)
return logical_ring_init(engine);
}
-static u32
-make_rpcs(struct drm_i915_private *dev_priv)
+static u32 make_rpcs(const struct sseu_dev_info *sseu)
{
u32 rpcs = 0;
@@ -1861,25 +1860,21 @@ make_rpcs(struct drm_i915_private *dev_priv)
* must make an explicit request through RPCS for full
* enablement.
*/
- if (INTEL_INFO(dev_priv)->sseu.has_slice_pg) {
+ if (sseu->has_slice_pg) {
rpcs |= GEN8_RPCS_S_CNT_ENABLE;
- rpcs |= hweight8(INTEL_INFO(dev_priv)->sseu.slice_mask) <<
- GEN8_RPCS_S_CNT_SHIFT;
+ rpcs |= hweight8(sseu->slice_mask) << GEN8_RPCS_S_CNT_SHIFT;
rpcs |= GEN8_RPCS_ENABLE;
}
- if (INTEL_INFO(dev_priv)->sseu.has_subslice_pg) {
+ if (sseu->has_subslice_pg) {
rpcs |= GEN8_RPCS_SS_CNT_ENABLE;
- rpcs |= hweight8(INTEL_INFO(dev_priv)->sseu.subslice_mask) <<
- GEN8_RPCS_SS_CNT_SHIFT;
+ rpcs |= hweight8(sseu->subslice_mask) << GEN8_RPCS_SS_CNT_SHIFT;
rpcs |= GEN8_RPCS_ENABLE;
}
- if (INTEL_INFO(dev_priv)->sseu.has_eu_pg) {
- rpcs |= INTEL_INFO(dev_priv)->sseu.min_eu_per_subslice <<
- GEN8_RPCS_EU_MIN_SHIFT;
- rpcs |= INTEL_INFO(dev_priv)->sseu.max_eu_per_subslice <<
- GEN8_RPCS_EU_MAX_SHIFT;
+ if (sseu->has_eu_pg) {
+ rpcs |= sseu->min_eu_per_subslice << GEN8_RPCS_EU_MIN_SHIFT;
+ rpcs |= sseu->max_eu_per_subslice << GEN8_RPCS_EU_MAX_SHIFT;
rpcs |= GEN8_RPCS_ENABLE;
}
@@ -1993,7 +1988,7 @@ static void execlists_init_reg_state(u32 *regs,
if (rcs) {
regs[CTX_LRI_HEADER_2] = MI_LOAD_REGISTER_IMM(1);
CTX_REG(regs, CTX_R_PWR_CLK_STATE, GEN8_R_PWR_CLK_STATE,
- make_rpcs(dev_priv));
+ make_rpcs(&ctx->engine[engine->id].sseu));
i915_oa_init_reg_state(engine, ctx, regs);
}
--
2.14.1
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply related [flat|nested] 10+ messages in thread
* [RFC PATCH 4/4] drm/i915: Expose RPCS (SSEU) configuration to userspace
2017-09-01 17:12 [RFC PATCH 0/4] drm/i915: enable userspace to program slice/subslice programming Lionel Landwerlin
` (2 preceding siblings ...)
2017-09-01 17:12 ` [RFC PATCH 3/4] drm/i915: Record the sseu configuration per-context & engine Lionel Landwerlin
@ 2017-09-01 17:12 ` Lionel Landwerlin
2017-09-01 18:58 ` Chris Wilson
2017-09-04 6:49 ` Joonas Lahtinen
2017-09-01 17:44 ` ✓ Fi.CI.BAT: success for drm/i915: enable userspace to program slice/subslice programming Patchwork
2017-09-01 22:21 ` ✗ Fi.CI.IGT: failure " Patchwork
5 siblings, 2 replies; 10+ messages in thread
From: Lionel Landwerlin @ 2017-09-01 17:12 UTC (permalink / raw)
To: intel-gfx
From: Chris Wilson <chris@chris-wilson.co.uk>
We want to allow userspace to reconfigure the subslice configuration for
its own use case. To do so, we expose a context parameter to allow
adjustment of the RPCS register stored within the context image (and
currently not accessible via LRI). If the context is adjusted before
first use, the adjustment is for "free"; otherwise if the context is
active we flush the context off the GPU (stalling all users) and forcing
the GPU to save the context to memory where we can modify it and so
ensure that the register is reloaded on next execution.
The overhead of managing additional EU subslices can be significant,
especially in multi-context workloads. Non-GPGPU contexts should
preferably disable the subslices it is not using, and others should
fine-tune the number to match their workload.
We expose complete control over the RPCS register, allowing
configuration of slice/subslice, via masks packed into a u64 for
simplicity. For example,
struct drm_i915_gem_context_param arg;
memset(&arg, 0, sizeof(arg));
arg.ctx_id = ctx;
arg.param = I915_CONTEXT_PARAM_SSEU;
if (drmIoctl(fd, DRM_IOCTL_I915_GEM_CONTEXT_GETPARAM, &arg) == 0) {
union drm_i915_gem_context_param_sseu *sseu = &arg.value;
sseu->packed.subslice_mask = 0;
drmIoctl(fd, DRM_IOCTL_I915_GEM_CONTEXT_SETPARAM, &arg);
}
could be used to disable all subslices where supported.
v2: Fix offset of CTX_R_PWR_CLK_STATE in intel_lr_context_set_sseu() (Lionel)
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=100899
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Cc: Dmitry Rogozhkin <dmitry.v.rogozhkin@intel.com>
CC: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
CC: Zhipeng Gong <zhipeng.gong@intel.com>
CC: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
---
drivers/gpu/drm/i915/i915_gem_context.c | 11 ++++++
drivers/gpu/drm/i915/intel_lrc.c | 69 +++++++++++++++++++++++++++++++++
drivers/gpu/drm/i915/intel_lrc.h | 2 +
include/uapi/drm/i915_drm.h | 11 ++++++
4 files changed, 93 insertions(+)
diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
index 97fcb01d70eb..d399b03f452c 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -1042,6 +1042,9 @@ int i915_gem_context_getparam_ioctl(struct drm_device *dev, void *data,
case I915_CONTEXT_PARAM_BANNABLE:
args->value = i915_gem_context_is_bannable(ctx);
break;
+ case I915_CONTEXT_PARAM_SSEU:
+ args->value = intel_lr_context_get_sseu(ctx);
+ break;
default:
ret = -EINVAL;
break;
@@ -1097,6 +1100,14 @@ int i915_gem_context_setparam_ioctl(struct drm_device *dev, void *data,
else
i915_gem_context_clear_bannable(ctx);
break;
+ case I915_CONTEXT_PARAM_SSEU:
+ if (args->size)
+ ret = -EINVAL;
+ else if (!i915.enable_execlists)
+ ret = -ENODEV;
+ else
+ ret = intel_lr_context_set_sseu(ctx, args->value);
+ break;
default:
ret = -EINVAL;
break;
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 1693fd9d279b..c063b84911d5 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -2122,3 +2122,72 @@ void intel_lr_context_resume(struct drm_i915_private *dev_priv)
}
}
}
+
+int intel_lr_context_set_sseu(struct i915_gem_context *ctx, u64 value)
+{
+ union drm_i915_gem_context_param_sseu user = { .value = value };
+ struct drm_i915_private *i915 = ctx->i915;
+ struct intel_context *ce = &ctx->engine[RCS];
+ struct sseu_dev_info sseu = ctx->engine[RCS].sseu;
+ struct intel_engine_cs *engine;
+ enum intel_engine_id id;
+ int ret;
+
+ lockdep_assert_held(&i915->drm.struct_mutex);
+
+ sseu.slice_mask =
+ user.packed.slice_mask & INTEL_INFO(i915)->sseu.slice_mask;
+ sseu.subslice_mask =
+ user.packed.subslice_mask & INTEL_INFO(i915)->sseu.subslice_mask;
+ sseu.min_eu_per_subslice =
+ max(user.packed.min_eu_per_subslice,
+ INTEL_INFO(i915)->sseu.min_eu_per_subslice);
+ sseu.max_eu_per_subslice =
+ min(user.packed.max_eu_per_subslice,
+ INTEL_INFO(i915)->sseu.max_eu_per_subslice);
+
+ if (memcmp(&sseu, &ctx->engine[RCS].sseu, sizeof(sseu)) == 0)
+ return 0;
+
+ if (ce->pin_count) { /* Assume that the context is active! */
+ ret = i915_gem_switch_to_kernel_context(i915);
+ if (ret)
+ return ret;
+
+ ret = i915_gem_wait_for_idle(i915,
+ I915_WAIT_INTERRUPTIBLE |
+ I915_WAIT_LOCKED);
+ if (ret)
+ return ret;
+ }
+
+ if (ce->state) {
+ u32 *regs;
+
+ regs = i915_gem_object_pin_map(ce->state->obj, I915_MAP_WB) +
+ LRC_STATE_PN * PAGE_SIZE;
+ if (IS_ERR(regs))
+ return PTR_ERR(regs);
+
+ regs[CTX_R_PWR_CLK_STATE + 1] = make_rpcs(&sseu);
+ i915_gem_object_unpin_map(ce->state->obj);
+ }
+
+ for_each_engine(engine, i915, id)
+ ctx->engine[id].sseu = sseu;
+
+ return 0;
+}
+
+u64 intel_lr_context_get_sseu(struct i915_gem_context *ctx)
+{
+ union drm_i915_gem_context_param_sseu user;
+ const struct sseu_dev_info *sseu = &ctx->engine[RCS].sseu;
+
+ user.packed.slice_mask = sseu->slice_mask;
+ user.packed.subslice_mask = sseu->subslice_mask;
+ user.packed.min_eu_per_subslice = sseu->min_eu_per_subslice;
+ user.packed.max_eu_per_subslice = sseu->max_eu_per_subslice;
+
+ return user.value;
+}
diff --git a/drivers/gpu/drm/i915/intel_lrc.h b/drivers/gpu/drm/i915/intel_lrc.h
index 57ef5833c427..4ef6a6143f5d 100644
--- a/drivers/gpu/drm/i915/intel_lrc.h
+++ b/drivers/gpu/drm/i915/intel_lrc.h
@@ -80,6 +80,8 @@ struct i915_gem_context;
void intel_lr_context_resume(struct drm_i915_private *dev_priv);
uint64_t intel_lr_context_descriptor(struct i915_gem_context *ctx,
struct intel_engine_cs *engine);
+int intel_lr_context_set_sseu(struct i915_gem_context *ctx, u64 value);
+u64 intel_lr_context_get_sseu(struct i915_gem_context *ctx);
/* Execlists */
int intel_sanitize_enable_execlists(struct drm_i915_private *dev_priv,
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index 6598fb76d2c2..f1e4d0c8c63b 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -1358,6 +1358,17 @@ struct drm_i915_gem_context_param {
#define I915_CONTEXT_PARAM_GTT_SIZE 0x3
#define I915_CONTEXT_PARAM_NO_ERROR_CAPTURE 0x4
#define I915_CONTEXT_PARAM_BANNABLE 0x5
+#define I915_CONTEXT_PARAM_SSEU 0x6
+ __u64 value;
+};
+
+union drm_i915_gem_context_param_sseu {
+ struct {
+ __u8 slice_mask;
+ __u8 subslice_mask;
+ __u8 min_eu_per_subslice;
+ __u8 max_eu_per_subslice;
+ } packed;
__u64 value;
};
--
2.14.1
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply related [flat|nested] 10+ messages in thread