All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v11 0/6] drm/i915: expose RCS topology to userspace
@ 2018-01-22  8:21 Lionel Landwerlin
  2018-01-22  8:21 ` [PATCH v11 1/6] drm/i915: store all subslice masks Lionel Landwerlin
                   ` (7 more replies)
  0 siblings, 8 replies; 16+ messages in thread
From: Lionel Landwerlin @ 2018-01-22  8:21 UTC (permalink / raw)
  To: intel-gfx

Hi all,

Just another update with only one change to the last patch (fixing a
comment).

Cheers,

Lionel Landwerlin (6):
  drm/i915: store all subslice masks
  drm/i915/debugfs: reuse max slice/subslices already stored in sseu
  drm/i915/debugfs: add rcs topology entry
  drm/i915: add rcs topology to error state
  drm/i915: add query uAPI
  drm/i915: expose rcs topology through query uAPI

 drivers/gpu/drm/i915/Makefile            |   1 +
 drivers/gpu/drm/i915/i915_debugfs.c      |  63 +++++----
 drivers/gpu/drm/i915/i915_drv.c          |   3 +-
 drivers/gpu/drm/i915/i915_drv.h          |   3 +
 drivers/gpu/drm/i915/i915_gpu_error.c    |   1 +
 drivers/gpu/drm/i915/i915_query.c        | 177 ++++++++++++++++++++++++
 drivers/gpu/drm/i915/intel_device_info.c | 226 ++++++++++++++++++++++++-------
 drivers/gpu/drm/i915/intel_device_info.h |  49 ++++++-
 drivers/gpu/drm/i915/intel_lrc.c         |   2 +-
 drivers/gpu/drm/i915/intel_ringbuffer.h  |   2 +-
 include/uapi/drm/i915_drm.h              | 112 +++++++++++++++
 11 files changed, 560 insertions(+), 79 deletions(-)
 create mode 100644 drivers/gpu/drm/i915/i915_query.c

--
2.15.1
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 16+ messages in thread

* [PATCH v11 1/6] drm/i915: store all subslice masks
  2018-01-22  8:21 [PATCH v11 0/6] drm/i915: expose RCS topology to userspace Lionel Landwerlin
@ 2018-01-22  8:21 ` Lionel Landwerlin
  2018-01-22  8:21 ` [PATCH v11 2/6] drm/i915/debugfs: reuse max slice/subslices already stored in sseu Lionel Landwerlin
                   ` (6 subsequent siblings)
  7 siblings, 0 replies; 16+ messages in thread
From: Lionel Landwerlin @ 2018-01-22  8:21 UTC (permalink / raw)
  To: intel-gfx

Up to now, subslice mask was assumed to be uniform across slices. But
starting with Cannonlake, slices can be asymmetric (for example slice0
has different number of subslices as slice1+). This change stores all
subslices masks for all slices rather than having a single mask that
applies to all slices.

v2: Rework how we store total numbers in sseu_dev_info (Tvrtko)
    Fix CHV eu masks, was reading disabled as enabled (Tvrtko)
    Readability changes (Tvrtko)
    Add EU index helper (Tvrtko)

v3: Turn ALIGN(v, 8) / 8 into DIV_ROUND_UP(v, BITS_PER_BYTE) (Tvrtko)
    Reuse sseu_eu_idx() for setting eu_mask on CHV (Tvrtko)
    Reformat debug prints for subslices (Tvrtko)

v4: Change eu_mask helper into sseu_set_eus() (Tvrtko)

Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 drivers/gpu/drm/i915/i915_debugfs.c      |  25 ++--
 drivers/gpu/drm/i915/i915_drv.c          |   2 +-
 drivers/gpu/drm/i915/intel_device_info.c | 201 +++++++++++++++++++++++--------
 drivers/gpu/drm/i915/intel_device_info.h |  47 +++++++-
 drivers/gpu/drm/i915/intel_lrc.c         |   2 +-
 drivers/gpu/drm/i915/intel_ringbuffer.h  |   2 +-
 6 files changed, 216 insertions(+), 63 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index 80dc679c0f01..a9d0f58ce18b 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -4289,7 +4289,7 @@ static void cherryview_sseu_device_status(struct drm_i915_private *dev_priv,
 			continue;
 
 		sseu->slice_mask = BIT(0);
-		sseu->subslice_mask |= BIT(ss);
+		sseu->subslice_mask[0] |= BIT(ss);
 		eu_cnt = ((sig1[ss] & CHV_EU08_PG_ENABLE) ? 0 : 2) +
 			 ((sig1[ss] & CHV_EU19_PG_ENABLE) ? 0 : 2) +
 			 ((sig1[ss] & CHV_EU210_PG_ENABLE) ? 0 : 2) +
@@ -4336,7 +4336,7 @@ static void gen10_sseu_device_status(struct drm_i915_private *dev_priv,
 			continue;
 
 		sseu->slice_mask |= BIT(s);
-		sseu->subslice_mask = info->sseu.subslice_mask;
+		sseu->subslice_mask[s] = info->sseu.subslice_mask[s];
 
 		for (ss = 0; ss < ss_max; ss++) {
 			unsigned int eu_cnt;
@@ -4391,8 +4391,8 @@ static void gen9_sseu_device_status(struct drm_i915_private *dev_priv,
 		sseu->slice_mask |= BIT(s);
 
 		if (IS_GEN9_BC(dev_priv))
-			sseu->subslice_mask =
-				INTEL_INFO(dev_priv)->sseu.subslice_mask;
+			sseu->subslice_mask[s] =
+				INTEL_INFO(dev_priv)->sseu.subslice_mask[s];
 
 		for (ss = 0; ss < ss_max; ss++) {
 			unsigned int eu_cnt;
@@ -4402,7 +4402,7 @@ static void gen9_sseu_device_status(struct drm_i915_private *dev_priv,
 					/* skip disabled subslice */
 					continue;
 
-				sseu->subslice_mask |= BIT(ss);
+				sseu->subslice_mask[s] |= BIT(ss);
 			}
 
 			eu_cnt = 2 * hweight32(eu_reg[2*s + ss/2] &
@@ -4424,9 +4424,12 @@ static void broadwell_sseu_device_status(struct drm_i915_private *dev_priv,
 	sseu->slice_mask = slice_info & GEN8_LSLICESTAT_MASK;
 
 	if (sseu->slice_mask) {
-		sseu->subslice_mask = INTEL_INFO(dev_priv)->sseu.subslice_mask;
 		sseu->eu_per_subslice =
 				INTEL_INFO(dev_priv)->sseu.eu_per_subslice;
+		for (s = 0; s < fls(sseu->slice_mask); s++) {
+			sseu->subslice_mask[s] =
+				INTEL_INFO(dev_priv)->sseu.subslice_mask[s];
+		}
 		sseu->eu_total = sseu->eu_per_subslice *
 				 sseu_subslice_total(sseu);
 
@@ -4445,6 +4448,7 @@ static void i915_print_sseu_info(struct seq_file *m, bool is_available_info,
 {
 	struct drm_i915_private *dev_priv = node_to_i915(m->private);
 	const char *type = is_available_info ? "Available" : "Enabled";
+	int s;
 
 	seq_printf(m, "  %s Slice Mask: %04x\n", type,
 		   sseu->slice_mask);
@@ -4452,10 +4456,11 @@ static void i915_print_sseu_info(struct seq_file *m, bool is_available_info,
 		   hweight8(sseu->slice_mask));
 	seq_printf(m, "  %s Subslice Total: %u\n", type,
 		   sseu_subslice_total(sseu));
-	seq_printf(m, "  %s Subslice Mask: %04x\n", type,
-		   sseu->subslice_mask);
-	seq_printf(m, "  %s Subslice Per Slice: %u\n", type,
-		   hweight8(sseu->subslice_mask));
+	for (s = 0; s < fls(sseu->slice_mask); s++) {
+		seq_printf(m, "  %s Slice%i %u subslices, mask=%04x\n", type,
+			   s, hweight8(sseu->subslice_mask[s]),
+			   sseu->subslice_mask[s]);
+	}
 	seq_printf(m, "  %s EU Total: %u\n", type,
 		   sseu->eu_total);
 	seq_printf(m, "  %s EU Per Subslice: %u\n", type,
diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index 1fbe37889d92..6d34bc1a7fff 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -418,7 +418,7 @@ static int i915_getparam(struct drm_device *dev, void *data,
 			return -ENODEV;
 		break;
 	case I915_PARAM_SUBSLICE_MASK:
-		value = INTEL_INFO(dev_priv)->sseu.subslice_mask;
+		value = INTEL_INFO(dev_priv)->sseu.subslice_mask[0];
 		if (!value)
 			return -ENODEV;
 		break;
diff --git a/drivers/gpu/drm/i915/intel_device_info.c b/drivers/gpu/drm/i915/intel_device_info.c
index a2c16140169f..80cf72069bae 100644
--- a/drivers/gpu/drm/i915/intel_device_info.c
+++ b/drivers/gpu/drm/i915/intel_device_info.c
@@ -81,12 +81,16 @@ void intel_device_info_dump_flags(const struct intel_device_info *info,
 
 static void sseu_dump(const struct sseu_dev_info *sseu, struct drm_printer *p)
 {
+	int s;
+
 	drm_printf(p, "slice mask: %04x\n", sseu->slice_mask);
 	drm_printf(p, "slice total: %u\n", hweight8(sseu->slice_mask));
 	drm_printf(p, "subslice total: %u\n", sseu_subslice_total(sseu));
-	drm_printf(p, "subslice mask %04x\n", sseu->subslice_mask);
-	drm_printf(p, "subslice per slice: %u\n",
-		   hweight8(sseu->subslice_mask));
+	for (s = 0; s < ARRAY_SIZE(sseu->subslice_mask); s++) {
+		drm_printf(p, "slice%d %u subslices mask=%04x\n",
+			   s, hweight8(sseu->subslice_mask[s]),
+			   sseu->subslice_mask[s]);
+	}
 	drm_printf(p, "EU total: %u\n", sseu->eu_total);
 	drm_printf(p, "EU per subslice: %u\n", sseu->eu_per_subslice);
 	drm_printf(p, "has slice power gating: %s\n",
@@ -120,22 +124,87 @@ void intel_device_info_dump(const struct intel_device_info *info,
 	intel_device_info_dump_flags(info, p);
 }
 
+static u16 compute_eu_total(const struct sseu_dev_info *sseu)
+{
+	u16 i, total = 0;
+
+	for (i = 0; i < ARRAY_SIZE(sseu->eu_mask); i++)
+		total += hweight8(sseu->eu_mask[i]);
+
+	return total;
+}
+
+static u16 compute_subslice_total(const struct sseu_dev_info *sseu)
+{
+	u16 i, total = 0;
+
+	for (i = 0; i < ARRAY_SIZE(sseu->subslice_mask); i++)
+		total += hweight8(sseu->subslice_mask[i]);
+
+	return total;
+}
+
 static void gen10_sseu_info_init(struct drm_i915_private *dev_priv)
 {
 	struct sseu_dev_info *sseu = &mkwrite_device_info(dev_priv)->sseu;
 	const u32 fuse2 = I915_READ(GEN8_FUSE2);
+	int s, ss;
+	const int eu_mask = 0xff;
+	u32 subslice_mask, eu_en;
 
 	sseu->slice_mask = (fuse2 & GEN10_F2_S_ENA_MASK) >>
 			    GEN10_F2_S_ENA_SHIFT;
-	sseu->subslice_mask = (1 << 4) - 1;
-	sseu->subslice_mask &= ~((fuse2 & GEN10_F2_SS_DIS_MASK) >>
-				 GEN10_F2_SS_DIS_SHIFT);
+	sseu->max_slices = 6;
+	sseu->max_subslices = 4;
+	sseu->max_eus_per_subslice = 8;
 
-	sseu->eu_total = hweight32(~I915_READ(GEN8_EU_DISABLE0));
-	sseu->eu_total += hweight32(~I915_READ(GEN8_EU_DISABLE1));
-	sseu->eu_total += hweight32(~I915_READ(GEN8_EU_DISABLE2));
-	sseu->eu_total += hweight8(~(I915_READ(GEN10_EU_DISABLE3) &
-				     GEN10_EU_DIS_SS_MASK));
+	subslice_mask = (1 << 4) - 1;
+	subslice_mask &= ~((fuse2 & GEN10_F2_SS_DIS_MASK) >>
+			   GEN10_F2_SS_DIS_SHIFT);
+
+	/*
+	 * Slice0 can have up to 3 subslices, but there are only 2 in
+	 * slice1/2.
+	 */
+	sseu->subslice_mask[0] = subslice_mask;
+	for (s = 1; s < sseu->max_slices; s++)
+		sseu->subslice_mask[s] = subslice_mask & 0x3;
+
+	/* Slice0 */
+	eu_en = ~I915_READ(GEN8_EU_DISABLE0);
+	for (ss = 0; ss < sseu->max_subslices; ss++)
+		sseu_set_eus(sseu, 0, ss, (eu_en >> (8 * ss)) & eu_mask);
+	/* Slice1 */
+	sseu_set_eus(sseu, 1, 0, (eu_en >> 24) & eu_mask);
+	eu_en = ~I915_READ(GEN8_EU_DISABLE1);
+	sseu_set_eus(sseu, 1, 1, eu_en & eu_mask);
+	/* Slice2 */
+	sseu_set_eus(sseu, 2, 0, (eu_en >> 8) & eu_mask);
+	sseu_set_eus(sseu, 2, 1, (eu_en >> 16) & eu_mask);
+	/* Slice3 */
+	sseu_set_eus(sseu, 3, 0, (eu_en >> 24) & eu_mask);
+	eu_en = ~I915_READ(GEN8_EU_DISABLE2);
+	sseu_set_eus(sseu, 3, 1, eu_en & eu_mask);
+	/* Slice4 */
+	sseu_set_eus(sseu, 4, 0, (eu_en >> 8) & eu_mask);
+	sseu_set_eus(sseu, 4, 1, (eu_en >> 16) & eu_mask);
+	/* Slice5 */
+	sseu_set_eus(sseu, 5, 0, (eu_en >> 24) & eu_mask);
+	eu_en = ~I915_READ(GEN10_EU_DISABLE3);
+	sseu_set_eus(sseu, 5, 1, eu_en & eu_mask);
+
+	/* Do a second pass where we mark the subslices disabled if all their
+	 * eus are off.
+	 */
+	for (s = 0; s < sseu->max_slices; s++) {
+		for (ss = 0; ss < sseu->max_subslices; ss++) {
+			if (sseu_get_eus(sseu, s, ss) == 0)
+				sseu->subslice_mask[s] &= ~BIT(ss);
+		}
+	}
+
+	sseu->subslice_total = compute_subslice_total(sseu);
+	sseu->eu_total = compute_eu_total(sseu);
 
 	/*
 	 * CNL is expected to always have a uniform distribution
@@ -156,26 +225,40 @@ static void gen10_sseu_info_init(struct drm_i915_private *dev_priv)
 static void cherryview_sseu_info_init(struct drm_i915_private *dev_priv)
 {
 	struct sseu_dev_info *sseu = &mkwrite_device_info(dev_priv)->sseu;
-	u32 fuse, eu_dis;
+	u32 fuse;
 
 	fuse = I915_READ(CHV_FUSE_GT);
 
 	sseu->slice_mask = BIT(0);
+	sseu->max_slices = 1;
+	sseu->max_subslices = 2;
+	sseu->max_eus_per_subslice = 8;
 
 	if (!(fuse & CHV_FGT_DISABLE_SS0)) {
-		sseu->subslice_mask |= BIT(0);
-		eu_dis = fuse & (CHV_FGT_EU_DIS_SS0_R0_MASK |
-				 CHV_FGT_EU_DIS_SS0_R1_MASK);
-		sseu->eu_total += 8 - hweight32(eu_dis);
+		u8 disabled_mask =
+			((fuse & CHV_FGT_EU_DIS_SS0_R0_MASK) >>
+			 CHV_FGT_EU_DIS_SS0_R0_SHIFT) |
+			(((fuse & CHV_FGT_EU_DIS_SS0_R1_MASK) >>
+			  CHV_FGT_EU_DIS_SS0_R1_SHIFT) << 4);
+
+		sseu->subslice_mask[0] |= BIT(0);
+		sseu_set_eus(sseu, 0, 0, ~disabled_mask);
 	}
 
 	if (!(fuse & CHV_FGT_DISABLE_SS1)) {
-		sseu->subslice_mask |= BIT(1);
-		eu_dis = fuse & (CHV_FGT_EU_DIS_SS1_R0_MASK |
-				 CHV_FGT_EU_DIS_SS1_R1_MASK);
-		sseu->eu_total += 8 - hweight32(eu_dis);
+		u8 disabled_mask =
+			((fuse & CHV_FGT_EU_DIS_SS1_R0_MASK) >>
+			 CHV_FGT_EU_DIS_SS1_R0_SHIFT) |
+			(((fuse & CHV_FGT_EU_DIS_SS1_R1_MASK) >>
+			  CHV_FGT_EU_DIS_SS1_R1_SHIFT) << 4);
+
+		sseu->subslice_mask[0] |= BIT(1);
+		sseu_set_eus(sseu, 0, 1, ~disabled_mask);
 	}
 
+	sseu->subslice_total = compute_subslice_total(sseu);
+	sseu->eu_total = compute_eu_total(sseu);
+
 	/*
 	 * CHV expected to always have a uniform distribution of EU
 	 * across subslices.
@@ -197,41 +280,52 @@ static void gen9_sseu_info_init(struct drm_i915_private *dev_priv)
 {
 	struct intel_device_info *info = mkwrite_device_info(dev_priv);
 	struct sseu_dev_info *sseu = &info->sseu;
-	int s_max = 3, ss_max = 4, eu_max = 8;
 	int s, ss;
-	u32 fuse2, eu_disable;
-	u8 eu_mask = 0xff;
+	u32 fuse2, eu_disable, subslice_mask;
+	const u8 eu_mask = 0xff;
 
 	fuse2 = I915_READ(GEN8_FUSE2);
 	sseu->slice_mask = (fuse2 & GEN8_F2_S_ENA_MASK) >> GEN8_F2_S_ENA_SHIFT;
 
+	/* BXT has a single slice and at most 3 subslices. */
+	sseu->max_slices = IS_GEN9_LP(dev_priv) ? 1 : 3;
+	sseu->max_subslices = IS_GEN9_LP(dev_priv) ? 3 : 4;
+	sseu->max_eus_per_subslice = 8;
+
 	/*
 	 * The subslice disable field is global, i.e. it applies
 	 * to each of the enabled slices.
 	*/
-	sseu->subslice_mask = (1 << ss_max) - 1;
-	sseu->subslice_mask &= ~((fuse2 & GEN9_F2_SS_DIS_MASK) >>
-				 GEN9_F2_SS_DIS_SHIFT);
+	subslice_mask = (1 << sseu->max_subslices) - 1;
+	subslice_mask &= ~((fuse2 & GEN9_F2_SS_DIS_MASK) >>
+			   GEN9_F2_SS_DIS_SHIFT);
 
 	/*
 	 * Iterate through enabled slices and subslices to
 	 * count the total enabled EU.
 	*/
-	for (s = 0; s < s_max; s++) {
+	for (s = 0; s < sseu->max_slices; s++) {
 		if (!(sseu->slice_mask & BIT(s)))
 			/* skip disabled slice */
 			continue;
 
+		sseu->subslice_mask[s] = subslice_mask;
+
 		eu_disable = I915_READ(GEN9_EU_DISABLE(s));
-		for (ss = 0; ss < ss_max; ss++) {
+		for (ss = 0; ss < sseu->max_subslices; ss++) {
 			int eu_per_ss;
+			u8 eu_disabled_mask;
 
-			if (!(sseu->subslice_mask & BIT(ss)))
+			if (!(sseu->subslice_mask[s] & BIT(ss)))
 				/* skip disabled subslice */
 				continue;
 
-			eu_per_ss = eu_max - hweight8((eu_disable >> (ss*8)) &
-						      eu_mask);
+			eu_disabled_mask = (eu_disable >> (ss*8)) & eu_mask;
+
+			sseu_set_eus(sseu, s, ss, ~eu_disabled_mask);
+
+			eu_per_ss = sseu->max_eus_per_subslice -
+				hweight8(eu_disabled_mask);
 
 			/*
 			 * Record which subslice(s) has(have) 7 EUs. we
@@ -240,11 +334,12 @@ static void gen9_sseu_info_init(struct drm_i915_private *dev_priv)
 			 */
 			if (eu_per_ss == 7)
 				sseu->subslice_7eu[s] |= BIT(ss);
-
-			sseu->eu_total += eu_per_ss;
 		}
 	}
 
+	sseu->subslice_total = compute_subslice_total(sseu);
+	sseu->eu_total = compute_eu_total(sseu);
+
 	/*
 	 * SKL is expected to always have a uniform distribution
 	 * of EU across subslices with the exception that any one
@@ -270,8 +365,8 @@ static void gen9_sseu_info_init(struct drm_i915_private *dev_priv)
 	sseu->has_eu_pg = sseu->eu_per_subslice > 2;
 
 	if (IS_GEN9_LP(dev_priv)) {
-#define IS_SS_DISABLED(ss)	(!(sseu->subslice_mask & BIT(ss)))
-		info->has_pooled_eu = hweight8(sseu->subslice_mask) == 3;
+#define IS_SS_DISABLED(ss)	(!(sseu->subslice_mask[0] & BIT(ss)))
+		info->has_pooled_eu = hweight8(sseu->subslice_mask[0]) == 3;
 
 		sseu->min_eu_in_pool = 0;
 		if (info->has_pooled_eu) {
@@ -289,19 +384,22 @@ static void gen9_sseu_info_init(struct drm_i915_private *dev_priv)
 static void broadwell_sseu_info_init(struct drm_i915_private *dev_priv)
 {
 	struct sseu_dev_info *sseu = &mkwrite_device_info(dev_priv)->sseu;
-	const int s_max = 3, ss_max = 3, eu_max = 8;
 	int s, ss;
-	u32 fuse2, eu_disable[3]; /* s_max */
+	u32 fuse2, subslice_mask, eu_disable[3]; /* s_max */
 
 	fuse2 = I915_READ(GEN8_FUSE2);
 	sseu->slice_mask = (fuse2 & GEN8_F2_S_ENA_MASK) >> GEN8_F2_S_ENA_SHIFT;
+	sseu->max_slices = 3;
+	sseu->max_subslices = 3;
+	sseu->max_eus_per_subslice = 8;
+
 	/*
 	 * The subslice disable field is global, i.e. it applies
 	 * to each of the enabled slices.
 	 */
-	sseu->subslice_mask = GENMASK(ss_max - 1, 0);
-	sseu->subslice_mask &= ~((fuse2 & GEN8_F2_SS_DIS_MASK) >>
-				 GEN8_F2_SS_DIS_SHIFT);
+	subslice_mask = GENMASK(sseu->max_subslices - 1, 0);
+	subslice_mask &= ~((fuse2 & GEN8_F2_SS_DIS_MASK) >>
+			   GEN8_F2_SS_DIS_SHIFT);
 
 	eu_disable[0] = I915_READ(GEN8_EU_DISABLE0) & GEN8_EU_DIS0_S0_MASK;
 	eu_disable[1] = (I915_READ(GEN8_EU_DISABLE0) >> GEN8_EU_DIS0_S1_SHIFT) |
@@ -315,30 +413,39 @@ static void broadwell_sseu_info_init(struct drm_i915_private *dev_priv)
 	 * Iterate through enabled slices and subslices to
 	 * count the total enabled EU.
 	 */
-	for (s = 0; s < s_max; s++) {
+	for (s = 0; s < sseu->max_slices; s++) {
 		if (!(sseu->slice_mask & BIT(s)))
 			/* skip disabled slice */
 			continue;
 
-		for (ss = 0; ss < ss_max; ss++) {
+		sseu->subslice_mask[s] = subslice_mask;
+
+		for (ss = 0; ss < sseu->max_subslices; ss++) {
+			u8 eu_disabled_mask;
 			u32 n_disabled;
 
-			if (!(sseu->subslice_mask & BIT(ss)))
+			if (!(sseu->subslice_mask[ss] & BIT(ss)))
 				/* skip disabled subslice */
 				continue;
 
-			n_disabled = hweight8(eu_disable[s] >> (ss * eu_max));
+			eu_disabled_mask =
+				eu_disable[s] >> (ss * sseu->max_eus_per_subslice);
+
+			sseu_set_eus(sseu, s, ss, ~eu_disabled_mask);
+
+			n_disabled = hweight8(eu_disabled_mask);
 
 			/*
 			 * Record which subslices have 7 EUs.
 			 */
-			if (eu_max - n_disabled == 7)
+			if (sseu->max_eus_per_subslice - n_disabled == 7)
 				sseu->subslice_7eu[s] |= 1 << ss;
-
-			sseu->eu_total += eu_max - n_disabled;
 		}
 	}
 
+	sseu->subslice_total = compute_subslice_total(sseu);
+	sseu->eu_total = compute_eu_total(sseu);
+
 	/*
 	 * BDW is expected to always have a uniform distribution of EU across
 	 * subslices with the exception that any one EU in any one subslice may
diff --git a/drivers/gpu/drm/i915/intel_device_info.h b/drivers/gpu/drm/i915/intel_device_info.h
index 9542018d11d0..213db0ca6252 100644
--- a/drivers/gpu/drm/i915/intel_device_info.h
+++ b/drivers/gpu/drm/i915/intel_device_info.h
@@ -112,10 +112,14 @@ enum intel_platform {
 	func(supports_tv); \
 	func(has_ipc);
 
+#define GEN_MAX_SLICES		(6) /* CNL upper bound */
+#define GEN_MAX_SUBSLICES	(7)
+
 struct sseu_dev_info {
 	u8 slice_mask;
-	u8 subslice_mask;
-	u8 eu_total;
+	u8 subslice_mask[GEN_MAX_SUBSLICES];
+	u16 subslice_total;
+	u16 eu_total;
 	u8 eu_per_subslice;
 	u8 min_eu_in_pool;
 	/* For each slice, which subslice(s) has(have) 7 EUs (bitfield)? */
@@ -123,6 +127,17 @@ struct sseu_dev_info {
 	u8 has_slice_pg:1;
 	u8 has_subslice_pg:1;
 	u8 has_eu_pg:1;
+
+	/* Topology fields */
+	u8 max_slices;
+	u8 max_subslices;
+	u8 max_eus_per_subslice;
+
+	/* We don't have more than 8 eus per subslice at the moment and as we
+	 * store eus enabled using bits, no need to multiply by eus per
+	 * subslice.
+	 */
+	u8 eu_mask[GEN_MAX_SLICES * GEN_MAX_SUBSLICES];
 };
 
 struct intel_device_info {
@@ -169,7 +184,33 @@ struct intel_device_info {
 
 static inline unsigned int sseu_subslice_total(const struct sseu_dev_info *sseu)
 {
-	return hweight8(sseu->slice_mask) * hweight8(sseu->subslice_mask);
+	return sseu->subslice_total;
+}
+
+static inline int sseu_eu_idx(const struct sseu_dev_info *sseu,
+			      int slice, int subslice)
+{
+	int subslice_stride = DIV_ROUND_UP(sseu->max_eus_per_subslice,
+					   BITS_PER_BYTE);
+	int slice_stride = sseu->max_subslices * subslice_stride;
+
+	return slice * slice_stride + subslice * subslice_stride;
+}
+
+/*
+ * The following functions prototypes should be updated with a larger type
+ * than u8 if we ever have more than 8 EUs per subslice.
+ */
+static inline u8 sseu_get_eus(const struct sseu_dev_info *sseu,
+			      int slice, int subslice)
+{
+	return sseu->eu_mask[sseu_eu_idx(sseu, slice, subslice)];
+}
+
+static inline void sseu_set_eus(struct sseu_dev_info *sseu,
+				int slice, int subslice, u8 eu_mask)
+{
+	sseu->eu_mask[sseu_eu_idx(sseu, slice, subslice)] = eu_mask;
 }
 
 const char *intel_platform_name(enum intel_platform platform);
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index ff25f209d0a5..ac7896031b8d 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -2098,7 +2098,7 @@ make_rpcs(struct drm_i915_private *dev_priv)
 
 	if (INTEL_INFO(dev_priv)->sseu.has_subslice_pg) {
 		rpcs |= GEN8_RPCS_SS_CNT_ENABLE;
-		rpcs |= hweight8(INTEL_INFO(dev_priv)->sseu.subslice_mask) <<
+		rpcs |= hweight8(INTEL_INFO(dev_priv)->sseu.subslice_mask[0]) <<
 			GEN8_RPCS_SS_CNT_SHIFT;
 		rpcs |= GEN8_RPCS_ENABLE;
 	}
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index c5ff203e42d6..23ae9a957fab 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -90,7 +90,7 @@ hangcheck_action_to_str(const enum intel_engine_hangcheck_action a)
 
 #define instdone_subslice_mask(dev_priv__) \
 	(INTEL_GEN(dev_priv__) == 7 ? \
-	 1 : INTEL_INFO(dev_priv__)->sseu.subslice_mask)
+	 1 : INTEL_INFO(dev_priv__)->sseu.subslice_mask[0])
 
 #define for_each_instdone_slice_subslice(dev_priv__, slice__, subslice__) \
 	for ((slice__) = 0, (subslice__) = 0; \
-- 
2.15.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 16+ messages in thread

* [PATCH v11 2/6] drm/i915/debugfs: reuse max slice/subslices already stored in sseu
  2018-01-22  8:21 [PATCH v11 0/6] drm/i915: expose RCS topology to userspace Lionel Landwerlin
  2018-01-22  8:21 ` [PATCH v11 1/6] drm/i915: store all subslice masks Lionel Landwerlin
@ 2018-01-22  8:21 ` Lionel Landwerlin
  2018-01-22  8:21 ` [PATCH v11 3/6] drm/i915/debugfs: add rcs topology entry Lionel Landwerlin
                   ` (5 subsequent siblings)
  7 siblings, 0 replies; 16+ messages in thread
From: Lionel Landwerlin @ 2018-01-22  8:21 UTC (permalink / raw)
  To: intel-gfx

Now that we have that information in topology fields, let's just reused it.

v2: Style tweaks (Tvrtko)

Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 drivers/gpu/drm/i915/i915_debugfs.c | 27 +++++++++++----------------
 1 file changed, 11 insertions(+), 16 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index a9d0f58ce18b..5bb28d89dfe9 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -4304,11 +4304,11 @@ static void gen10_sseu_device_status(struct drm_i915_private *dev_priv,
 				     struct sseu_dev_info *sseu)
 {
 	const struct intel_device_info *info = INTEL_INFO(dev_priv);
-	int s_max = 6, ss_max = 4;
 	int s, ss;
-	u32 s_reg[s_max], eu_reg[2 * s_max], eu_mask[2];
+	u32 s_reg[info->sseu.max_slices];
+	u32 eu_reg[2 * info->sseu.max_subslices], eu_mask[2];
 
-	for (s = 0; s < s_max; s++) {
+	for (s = 0; s < info->sseu.max_slices; s++) {
 		/*
 		 * FIXME: Valid SS Mask respects the spec and read
 		 * only valid bits for those registers, excluding reserverd
@@ -4330,7 +4330,7 @@ static void gen10_sseu_device_status(struct drm_i915_private *dev_priv,
 		     GEN9_PGCTL_SSB_EU210_ACK |
 		     GEN9_PGCTL_SSB_EU311_ACK;
 
-	for (s = 0; s < s_max; s++) {
+	for (s = 0; s < info->sseu.max_slices; s++) {
 		if ((s_reg[s] & GEN9_PGCTL_SLICE_ACK) == 0)
 			/* skip disabled slice */
 			continue;
@@ -4338,7 +4338,7 @@ static void gen10_sseu_device_status(struct drm_i915_private *dev_priv,
 		sseu->slice_mask |= BIT(s);
 		sseu->subslice_mask[s] = info->sseu.subslice_mask[s];
 
-		for (ss = 0; ss < ss_max; ss++) {
+		for (ss = 0; ss < info->sseu.max_subslices; ss++) {
 			unsigned int eu_cnt;
 
 			if (!(s_reg[s] & (GEN9_PGCTL_SS_ACK(ss))))
@@ -4358,17 +4358,12 @@ static void gen10_sseu_device_status(struct drm_i915_private *dev_priv,
 static void gen9_sseu_device_status(struct drm_i915_private *dev_priv,
 				    struct sseu_dev_info *sseu)
 {
-	int s_max = 3, ss_max = 4;
+	const struct intel_device_info *info = INTEL_INFO(dev_priv);
 	int s, ss;
-	u32 s_reg[s_max], eu_reg[2*s_max], eu_mask[2];
-
-	/* BXT has a single slice and at most 3 subslices. */
-	if (IS_GEN9_LP(dev_priv)) {
-		s_max = 1;
-		ss_max = 3;
-	}
+	u32 s_reg[info->sseu.max_slices];
+	u32 eu_reg[2 * info->sseu.max_subslices], eu_mask[2];
 
-	for (s = 0; s < s_max; s++) {
+	for (s = 0; s < info->sseu.max_slices; s++) {
 		s_reg[s] = I915_READ(GEN9_SLICE_PGCTL_ACK(s));
 		eu_reg[2*s] = I915_READ(GEN9_SS01_EU_PGCTL_ACK(s));
 		eu_reg[2*s + 1] = I915_READ(GEN9_SS23_EU_PGCTL_ACK(s));
@@ -4383,7 +4378,7 @@ static void gen9_sseu_device_status(struct drm_i915_private *dev_priv,
 		     GEN9_PGCTL_SSB_EU210_ACK |
 		     GEN9_PGCTL_SSB_EU311_ACK;
 
-	for (s = 0; s < s_max; s++) {
+	for (s = 0; s < info->sseu.max_slices; s++) {
 		if ((s_reg[s] & GEN9_PGCTL_SLICE_ACK) == 0)
 			/* skip disabled slice */
 			continue;
@@ -4394,7 +4389,7 @@ static void gen9_sseu_device_status(struct drm_i915_private *dev_priv,
 			sseu->subslice_mask[s] =
 				INTEL_INFO(dev_priv)->sseu.subslice_mask[s];
 
-		for (ss = 0; ss < ss_max; ss++) {
+		for (ss = 0; ss < info->sseu.max_subslices; ss++) {
 			unsigned int eu_cnt;
 
 			if (IS_GEN9_LP(dev_priv)) {
-- 
2.15.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 16+ messages in thread

* [PATCH v11 3/6] drm/i915/debugfs: add rcs topology entry
  2018-01-22  8:21 [PATCH v11 0/6] drm/i915: expose RCS topology to userspace Lionel Landwerlin
  2018-01-22  8:21 ` [PATCH v11 1/6] drm/i915: store all subslice masks Lionel Landwerlin
  2018-01-22  8:21 ` [PATCH v11 2/6] drm/i915/debugfs: reuse max slice/subslices already stored in sseu Lionel Landwerlin
@ 2018-01-22  8:21 ` Lionel Landwerlin
  2018-01-22  8:21 ` [PATCH v11 4/6] drm/i915: add rcs topology to error state Lionel Landwerlin
                   ` (4 subsequent siblings)
  7 siblings, 0 replies; 16+ messages in thread
From: Lionel Landwerlin @ 2018-01-22  8:21 UTC (permalink / raw)
  To: intel-gfx

While the end goal is to make this information available to userspace
through a new ioctl, there is no reason we can't display it in a human
readable fashion through debugfs.

slice0: 3 subslice(s) (0x7):
	subslice0: 8 EUs (0xff)
	subslice1: 8 EUs (0xff)
	subslice2: 8 EUs (0xff)
	subslice3: 0 EUs (0x0)
slice1: 3 subslice(s) (0x7):
	subslice0: 8 EUs (0xff)
	subslice1: 8 EUs (0xff)
	subslice2: 8 EUs (0xff)
	subslice3: 0 EUs (0x0)
slice2: 3 subslice(s) (0x7):
	subslice0: 8 EUs (0xff)
	subslice1: 8 EUs (0xff)
	subslice2: 8 EUs (0xff)
	subslice3: 0 EUs (0x0)

v2: Reformat debugfs printing (Tvrtko)
    Use the new EU mask helper (Tvrtko)

v3: Move printing code to intel_device_info.c to be shared with error
    state (Michal)

Suggested-by: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 drivers/gpu/drm/i915/i915_debugfs.c      | 11 +++++++++++
 drivers/gpu/drm/i915/intel_device_info.c | 25 +++++++++++++++++++++++++
 drivers/gpu/drm/i915/intel_device_info.h |  2 ++
 3 files changed, 38 insertions(+)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index 5bb28d89dfe9..3e8e36120e5a 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -3166,6 +3166,16 @@ static int i915_engine_info(struct seq_file *m, void *unused)
 	return 0;
 }
 
+static int i915_rcs_topology(struct seq_file *m, void *unused)
+{
+	struct drm_i915_private *dev_priv = node_to_i915(m->private);
+	struct drm_printer p = drm_seq_file_printer(m);
+
+	intel_device_info_dump_topology(&INTEL_INFO(dev_priv)->sseu, &p);
+
+	return 0;
+}
+
 static int i915_shrinker_info(struct seq_file *m, void *unused)
 {
 	struct drm_i915_private *i915 = node_to_i915(m->private);
@@ -4696,6 +4706,7 @@ static const struct drm_info_list i915_debugfs_list[] = {
 	{"i915_dmc_info", i915_dmc_info, 0},
 	{"i915_display_info", i915_display_info, 0},
 	{"i915_engine_info", i915_engine_info, 0},
+	{"i915_rcs_topology", i915_rcs_topology, 0},
 	{"i915_shrinker_info", i915_shrinker_info, 0},
 	{"i915_shared_dplls_info", i915_shared_dplls_info, 0},
 	{"i915_dp_mst_info", i915_dp_mst_info, 0},
diff --git a/drivers/gpu/drm/i915/intel_device_info.c b/drivers/gpu/drm/i915/intel_device_info.c
index 80cf72069bae..04d00b73523b 100644
--- a/drivers/gpu/drm/i915/intel_device_info.c
+++ b/drivers/gpu/drm/i915/intel_device_info.c
@@ -124,6 +124,31 @@ void intel_device_info_dump(const struct intel_device_info *info,
 	intel_device_info_dump_flags(info, p);
 }
 
+void intel_device_info_dump_topology(const struct sseu_dev_info *sseu,
+				     struct drm_printer *p)
+{
+	int s, ss;
+
+	if (sseu->max_slices == 0) {
+		drm_printf(p, "Unavailable\n");
+		return;
+	}
+
+	for (s = 0; s < sseu->max_slices; s++) {
+		drm_printf(p, "slice%d: %u subslice(s) (0x%hhx):\n",
+			   s, hweight8(sseu->subslice_mask[s]),
+			   sseu->subslice_mask[s]);
+
+		for (ss = 0; ss < sseu->max_subslices; ss++) {
+			u8 enabled_eus = sseu_get_eus(sseu, s, ss);
+
+			drm_printf(p, "\tsubslice%d: %u EUs (0x%hhx)\n",
+				   ss, hweight8(enabled_eus), enabled_eus);
+		}
+	}
+}
+
+
 static u16 compute_eu_total(const struct sseu_dev_info *sseu)
 {
 	u16 i, total = 0;
diff --git a/drivers/gpu/drm/i915/intel_device_info.h b/drivers/gpu/drm/i915/intel_device_info.h
index 213db0ca6252..5086adf85824 100644
--- a/drivers/gpu/drm/i915/intel_device_info.h
+++ b/drivers/gpu/drm/i915/intel_device_info.h
@@ -222,5 +222,7 @@ void intel_device_info_dump_flags(const struct intel_device_info *info,
 				  struct drm_printer *p);
 void intel_device_info_dump_runtime(const struct intel_device_info *info,
 				    struct drm_printer *p);
+void intel_device_info_dump_topology(const struct sseu_dev_info *sseu,
+				     struct drm_printer *p);
 
 #endif
-- 
2.15.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 16+ messages in thread

* [PATCH v11 4/6] drm/i915: add rcs topology to error state
  2018-01-22  8:21 [PATCH v11 0/6] drm/i915: expose RCS topology to userspace Lionel Landwerlin
                   ` (2 preceding siblings ...)
  2018-01-22  8:21 ` [PATCH v11 3/6] drm/i915/debugfs: add rcs topology entry Lionel Landwerlin
@ 2018-01-22  8:21 ` Lionel Landwerlin
  2018-01-22  8:21 ` [PATCH v11 5/6] drm/i915: add query uAPI Lionel Landwerlin
                   ` (3 subsequent siblings)
  7 siblings, 0 replies; 16+ messages in thread
From: Lionel Landwerlin @ 2018-01-22  8:21 UTC (permalink / raw)
  To: intel-gfx

This might be useful information for developers looking at an error
state.

v2: Place topology towards the end of the error state (Chris)

v3: Reuse common printing code (Michal)

v4: Make this a one-liner (Chris)

Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 drivers/gpu/drm/i915/i915_gpu_error.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
index a81351d9e3a6..499a346923ec 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -578,6 +578,7 @@ static void err_print_capabilities(struct drm_i915_error_state_buf *m,
 	struct drm_printer p = i915_error_printer(m);
 
 	intel_device_info_dump_flags(info, &p);
+	intel_device_info_dump_topology(&info->sseu, &p);
 }
 
 static void err_print_params(struct drm_i915_error_state_buf *m,
-- 
2.15.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 16+ messages in thread

* [PATCH v11 5/6] drm/i915: add query uAPI
  2018-01-22  8:21 [PATCH v11 0/6] drm/i915: expose RCS topology to userspace Lionel Landwerlin
                   ` (3 preceding siblings ...)
  2018-01-22  8:21 ` [PATCH v11 4/6] drm/i915: add rcs topology to error state Lionel Landwerlin
@ 2018-01-22  8:21 ` Lionel Landwerlin
  2018-01-23 14:17   ` Lionel Landwerlin
  2018-01-22  8:21 ` [PATCH v11 6/6] drm/i915: expose rcs topology through " Lionel Landwerlin
                   ` (2 subsequent siblings)
  7 siblings, 1 reply; 16+ messages in thread
From: Lionel Landwerlin @ 2018-01-22  8:21 UTC (permalink / raw)
  To: intel-gfx

There are a number of information that are readable from hardware
registers and that we would like to make accessible to userspace. One
particular example is the topology of the execution units (how are
execution units grouped in subslices and slices and also which ones
have been fused off for die recovery).

At the moment the GET_PARAM ioctl covers some basic needs, but
generally is only able to return a single value for each defined
parameter. This is a bit problematic with topology descriptions which
are array/maps of available units.

This change introduces a new ioctl that can deal with requests to fill
structures of potentially variable lengths. The user is expected fill
a query with length fields set at 0 on the first call, the kernel then
sets the length fields to the their expected values. A second call to
the kernel with length fields at their expected values will trigger a
copy of the data to the pointed memory locations.

The scope of this uAPI is only to provide information to userspace,
not to allow configuration of the device.

v2: Simplify dispatcher code iteration (Tvrtko)
    Tweak uapi drm_i915_query_item structure (Tvrtko)

v3: Rename pad fields into flags (Chris)
    Return error on flags field != 0 (Chris)
    Only copy length back to userspace in drm_i915_query_item (Chris)

v4: Use array of functions instead of switch (Chris)

v5: More comments in uapi (Tvrtko)
    Return query item errors in length field (All)

v6: Tweak uapi comments style to match the coding style (Lionel)

Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 drivers/gpu/drm/i915/Makefile     |  1 +
 drivers/gpu/drm/i915/i915_drv.c   |  1 +
 drivers/gpu/drm/i915/i915_drv.h   |  3 ++
 drivers/gpu/drm/i915/i915_query.c | 67 +++++++++++++++++++++++++++++++++++++++
 include/uapi/drm/i915_drm.h       | 41 ++++++++++++++++++++++++
 5 files changed, 113 insertions(+)
 create mode 100644 drivers/gpu/drm/i915/i915_query.c

diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
index 3bddd8a06806..b0415a3e2d59 100644
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -69,6 +69,7 @@ i915-y += i915_cmd_parser.o \
 	  i915_gem_timeline.o \
 	  i915_gem_userptr.o \
 	  i915_gemfs.o \
+	  i915_query.o \
 	  i915_trace_points.o \
 	  i915_vma.o \
 	  intel_breadcrumbs.o \
diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index 6d34bc1a7fff..c59be22f19ab 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -2830,6 +2830,7 @@ static const struct drm_ioctl_desc i915_ioctls[] = {
 	DRM_IOCTL_DEF_DRV(I915_PERF_OPEN, i915_perf_open_ioctl, DRM_RENDER_ALLOW),
 	DRM_IOCTL_DEF_DRV(I915_PERF_ADD_CONFIG, i915_perf_add_config_ioctl, DRM_UNLOCKED|DRM_RENDER_ALLOW),
 	DRM_IOCTL_DEF_DRV(I915_PERF_REMOVE_CONFIG, i915_perf_remove_config_ioctl, DRM_UNLOCKED|DRM_RENDER_ALLOW),
+	DRM_IOCTL_DEF_DRV(I915_QUERY, i915_query_ioctl, DRM_UNLOCKED|DRM_RENDER_ALLOW),
 };
 
 static struct drm_driver driver = {
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 8333692dac5a..50a7040af71e 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -3627,6 +3627,9 @@ extern void i915_perf_fini(struct drm_i915_private *dev_priv);
 extern void i915_perf_register(struct drm_i915_private *dev_priv);
 extern void i915_perf_unregister(struct drm_i915_private *dev_priv);
 
+/* i915_query.c */
+int i915_query_ioctl(struct drm_device *dev, void *data, struct drm_file *file);
+
 /* i915_suspend.c */
 extern int i915_save_state(struct drm_i915_private *dev_priv);
 extern int i915_restore_state(struct drm_i915_private *dev_priv);
diff --git a/drivers/gpu/drm/i915/i915_query.c b/drivers/gpu/drm/i915/i915_query.c
new file mode 100644
index 000000000000..5aa886313cf9
--- /dev/null
+++ b/drivers/gpu/drm/i915/i915_query.c
@@ -0,0 +1,67 @@
+/*
+ * Copyright © 2017 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ */
+
+#include "i915_drv.h"
+#include <uapi/drm/i915_drm.h>
+
+static int (* const i915_query_funcs[])(struct drm_i915_private *dev_priv,
+					struct drm_i915_query_item *query_item) = {
+};
+
+int i915_query_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
+{
+	struct drm_i915_private *dev_priv = to_i915(dev);
+	struct drm_i915_query *args = data;
+	struct drm_i915_query_item __user *user_item_ptr =
+		u64_to_user_ptr(args->items_ptr);
+	u32 i;
+
+	if (args->flags != 0)
+		return -EINVAL;
+
+	for (i = 0; i < args->num_items; i++, user_item_ptr++) {
+		struct drm_i915_query_item item;
+		u64 func_idx;
+		int ret;
+
+		if (copy_from_user(&item, user_item_ptr, sizeof(item)))
+			return -EFAULT;
+
+		if (item.query_id == 0)
+			return -EINVAL;
+
+		func_idx = item.query_id - 1;
+
+		if (func_idx >= ARRAY_SIZE(i915_query_funcs))
+			return -EINVAL;
+
+		ret = i915_query_funcs[func_idx](dev_priv, &item);
+
+		/* Only write the length back to userspace if they differ. */
+		if (ret != item.length && put_user(ret, &user_item_ptr->length))
+			return -EFAULT;
+	}
+
+	return 0;
+}
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index 536ee4febd74..03f031652d86 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -318,6 +318,7 @@ typedef struct _drm_i915_sarea {
 #define DRM_I915_PERF_OPEN		0x36
 #define DRM_I915_PERF_ADD_CONFIG	0x37
 #define DRM_I915_PERF_REMOVE_CONFIG	0x38
+#define DRM_I915_QUERY			0x39
 
 #define DRM_IOCTL_I915_INIT		DRM_IOW( DRM_COMMAND_BASE + DRM_I915_INIT, drm_i915_init_t)
 #define DRM_IOCTL_I915_FLUSH		DRM_IO ( DRM_COMMAND_BASE + DRM_I915_FLUSH)
@@ -375,6 +376,7 @@ typedef struct _drm_i915_sarea {
 #define DRM_IOCTL_I915_PERF_OPEN	DRM_IOW(DRM_COMMAND_BASE + DRM_I915_PERF_OPEN, struct drm_i915_perf_open_param)
 #define DRM_IOCTL_I915_PERF_ADD_CONFIG	DRM_IOW(DRM_COMMAND_BASE + DRM_I915_PERF_ADD_CONFIG, struct drm_i915_perf_oa_config)
 #define DRM_IOCTL_I915_PERF_REMOVE_CONFIG	DRM_IOW(DRM_COMMAND_BASE + DRM_I915_PERF_REMOVE_CONFIG, __u64)
+#define DRM_IOCTL_I915_QUERY			DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_QUERY, struct drm_i915_query)
 
 /* Allow drivers to submit batchbuffers directly to hardware, relying
  * on the security mechanisms provided by hardware.
@@ -1613,6 +1615,45 @@ struct drm_i915_perf_oa_config {
 	__u64 flex_regs_ptr;
 };
 
+
+struct drm_i915_query_item {
+	__u64 query_id;
+
+	/*
+	 * When set to zero by userspace, this is filled with the size of the
+	 * data to be written at the data_ptr pointer. The kernel set this
+	 * value to a negative value to signal an error on a particular query
+	 * item.
+	 */
+	__s32 length;
+
+	/*
+	 * Unused for now.
+	 */
+	__u32 flags;
+
+	/*
+	 * Data will be written at the location pointed by data_ptr when the
+	 * value of length matches the length of the data to be written by the
+	 * kernel.
+	 */
+	__u64 data_ptr;
+};
+
+struct drm_i915_query {
+	__u32 num_items;
+
+	/*
+	 * Unused for now.
+	 */
+	__u32 flags;
+
+	/*
+	 * This point to an array of num_items drm_i915_query_item structures.
+	 */
+	__u64 items_ptr;
+};
+
 #if defined(__cplusplus)
 }
 #endif
-- 
2.15.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 16+ messages in thread

* [PATCH v11 6/6] drm/i915: expose rcs topology through query uAPI
  2018-01-22  8:21 [PATCH v11 0/6] drm/i915: expose RCS topology to userspace Lionel Landwerlin
                   ` (4 preceding siblings ...)
  2018-01-22  8:21 ` [PATCH v11 5/6] drm/i915: add query uAPI Lionel Landwerlin
@ 2018-01-22  8:21 ` Lionel Landwerlin
  2018-01-22  9:44   ` Tvrtko Ursulin
  2018-01-22  8:55 ` ✓ Fi.CI.BAT: success for drm/i915: expose RCS topology to userspace Patchwork
  2018-01-22 11:48 ` ✗ Fi.CI.IGT: warning " Patchwork
  7 siblings, 1 reply; 16+ messages in thread
From: Lionel Landwerlin @ 2018-01-22  8:21 UTC (permalink / raw)
  To: intel-gfx

With the introduction of asymmetric slices in CNL, we cannot rely on
the previous SUBSLICE_MASK getparam to tell userspace what subslices
are available. Here we introduce a more detailed way of querying the
Gen's GPU topology that doesn't aggregate numbers.

This is essential for monitoring parts of the GPU with the OA unit,
because counters need to be normalized to the number of
EUs/subslices/slices. The current aggregated numbers like EU_TOTAL do
not gives us sufficient information.

As a bonus we can draw representations of the GPU :

        https://imgur.com/a/vuqpa

v2: Rename uapi struct s/_mask/_info/ (Tvrtko)
    Report max_slice/subslice/eus_per_subslice rather than strides (Tvrtko)
    Add uapi macros to read data from *_info structs (Tvrtko)

v3: Use !!(v & DRM_I915_BIT()) for uapi macros instead of custom shifts (Tvrtko)

v4: factorize query item writting (Tvrtko)
    tweak uapi struct/define names (Tvrtko)

v5: Replace ALIGN() macro (Chris)

v6: Updated uapi comments (Tvrtko)
    Moved flags != 0 checks into vfuncs (Tvrtko)

v7: Use access_ok() before copying anything, to avoid overflows (Chris)
    Switch BUG_ON() to GEM_WARN_ON() (Tvrtko)

v8: Tweak uapi comments style to match the coding style (Lionel)

v9: Fix error in comment about computation of enabled subslice (Tvrtko)

Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
---
 drivers/gpu/drm/i915/i915_query.c | 110 ++++++++++++++++++++++++++++++++++++++
 include/uapi/drm/i915_drm.h       |  71 ++++++++++++++++++++++++
 2 files changed, 181 insertions(+)

diff --git a/drivers/gpu/drm/i915/i915_query.c b/drivers/gpu/drm/i915/i915_query.c
index 5aa886313cf9..ff87ec8a321a 100644
--- a/drivers/gpu/drm/i915/i915_query.c
+++ b/drivers/gpu/drm/i915/i915_query.c
@@ -25,8 +25,118 @@
 #include "i915_drv.h"
 #include <uapi/drm/i915_drm.h>
 
+static int copy_query_data(struct drm_i915_query_item *query_item,
+			   const void *item_ptr, u32 item_length,
+			   const void *data_ptr, u32 data_length)
+{
+	u32 total_length = item_length + data_length;
+
+	if (GEM_WARN_ON(add_overflows(item_length, data_length)))
+		return -EINVAL;
+
+	if (query_item->length == 0)
+		return total_length;
+
+	if (query_item->length < total_length)
+		return -EINVAL;
+
+	if (!access_ok(VERIFY_WRITE, u64_to_user_ptr(query_item->data_ptr),
+		       total_length))
+		return -EFAULT;
+
+	if (__copy_to_user(u64_to_user_ptr(query_item->data_ptr),
+			   item_ptr, item_length))
+		return -EFAULT;
+
+	if (__copy_to_user(u64_to_user_ptr(query_item->data_ptr + item_length),
+			   data_ptr, data_length))
+		return -EFAULT;
+
+	return total_length;
+}
+
+static int query_slice_info(struct drm_i915_private *dev_priv,
+			    struct drm_i915_query_item *query_item)
+{
+	const struct sseu_dev_info *sseu = &INTEL_INFO(dev_priv)->sseu;
+	struct drm_i915_query_slice_info slice_info;
+
+	if (query_item->flags != 0)
+		return -EINVAL;
+
+	if (sseu->max_slices == 0)
+		return -ENODEV;
+
+	/*
+	 * If we ever change the internal slice mask data type, we'll need to
+	 * update this function.
+	 */
+	BUILD_BUG_ON(sizeof(u8) != sizeof(sseu->slice_mask));
+
+	memset(&slice_info, 0, sizeof(slice_info));
+	slice_info.max_slices = sseu->max_slices;
+
+	return copy_query_data(query_item, &slice_info, sizeof(slice_info),
+			       &sseu->slice_mask, sizeof(sseu->slice_mask));
+}
+
+static int query_subslice_info(struct drm_i915_private *dev_priv,
+			       struct drm_i915_query_item *query_item)
+{
+	const struct sseu_dev_info *sseu = &INTEL_INFO(dev_priv)->sseu;
+	struct drm_i915_query_subslice_info subslice_info;
+	u32 data_length;
+
+	if (query_item->flags != 0)
+		return -EINVAL;
+
+	if (sseu->max_slices == 0)
+		return -ENODEV;
+
+	memset(&subslice_info, 0, sizeof(subslice_info));
+	subslice_info.max_slices = sseu->max_slices;
+	subslice_info.max_subslices = sseu->max_subslices;
+
+	data_length = subslice_info.max_slices *
+		DIV_ROUND_UP(subslice_info.max_subslices,
+			     sizeof(sseu->subslice_mask[0]) * BITS_PER_BYTE);
+
+	return copy_query_data(query_item,
+			       &subslice_info, sizeof(subslice_info),
+			       sseu->subslice_mask, data_length);
+}
+
+static int query_eu_info(struct drm_i915_private *dev_priv,
+			 struct drm_i915_query_item *query_item)
+{
+	const struct sseu_dev_info *sseu = &INTEL_INFO(dev_priv)->sseu;
+	struct drm_i915_query_eu_info eu_info;
+	u32 data_length;
+
+	if (query_item->flags != 0)
+		return -EINVAL;
+
+	if (sseu->max_slices == 0)
+		return -ENODEV;
+
+	memset(&eu_info, 0, sizeof(eu_info));
+	eu_info.max_slices = sseu->max_slices;
+	eu_info.max_subslices = sseu->max_subslices;
+	eu_info.max_eus_per_subslice = sseu->max_eus_per_subslice;
+
+	data_length = eu_info.max_slices * eu_info.max_subslices *
+		DIV_ROUND_UP(eu_info.max_eus_per_subslice, BITS_PER_BYTE);
+
+	return copy_query_data(query_item,
+			       &eu_info, sizeof(eu_info),
+			       sseu->eu_mask, data_length);
+}
+
 static int (* const i915_query_funcs[])(struct drm_i915_private *dev_priv,
 					struct drm_i915_query_item *query_item) = {
+	query_slice_info,
+	query_subslice_info,
+	query_eu_info,
 };
 
 int i915_query_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index 03f031652d86..7ae8e9aa0206 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -1618,6 +1618,9 @@ struct drm_i915_perf_oa_config {
 
 struct drm_i915_query_item {
 	__u64 query_id;
+#define DRM_I915_QUERY_SLICE_INFO	0x01
+#define DRM_I915_QUERY_SUBSLICE_INFO	0x02
+#define DRM_I915_QUERY_EU_INFO		0x03
 
 	/*
 	 * When set to zero by userspace, this is filled with the size of the
@@ -1654,6 +1657,74 @@ struct drm_i915_query {
 	__u64 items_ptr;
 };
 
+#define DRM_I915_BIT(bit) ((__u32)1 << (bit))
+#define DRM_I915_DIV_ROUND_UP(val, div) (((val) + (div) - 1) / (div))
+
+/*
+ * Data written by the kernel with query DRM_I915_QUERY_SLICE_INFO :
+ *
+ * data: each bit indicates whether a slice is available (1) or fused off (0).
+ *       Formula to tell if slice X is available :
+ *
+ *            (data[X / 8] >> (X % 8)) & 1
+ *
+ *       Alternatively, use DRM_I915_QUERY_SLICE_AVAILABLE() to query a
+ *       given subslice's availability.
+ */
+struct drm_i915_query_slice_info {
+	__u32 max_slices;
+
+#define DRM_I915_QUERY_SLICE_AVAILABLE(info, slice) \
+	!!((info)->data[(slice) / 8] & DRM_I915_BIT((slice) % 8))
+	__u8 data[];
+};
+
+/*
+ * Data written by the kernel with query DRM_I915_QUERY_SUBSLICE_INFO :
+ *
+ * data: each bit indicates whether a subslice is available (1) or fused off
+ *       (0). Formula to tell if slice X subslice Y is available :
+ *
+ *            (data[(X * ALIGN(max_subslices, 8) / 8) + Y / 8] >> (Y % 8)) & 1
+ *
+ *       Alternatively, use DRM_I915_QUERY_SUBSLICE_AVAILABLE() to query a
+ *       given subslice's availability.
+ */
+struct drm_i915_query_subslice_info {
+	__u32 max_slices;
+	__u32 max_subslices;
+
+#define DRM_I915_QUERY_SUBSLICE_AVAILABLE(info, slice, subslice) \
+	!!((info)->data[(slice) * DRM_I915_DIV_ROUND_UP((info)->max_subslices, 8) + \
+			(subslice) / 8] & DRM_I915_BIT((subslice) % 8))
+	__u8 data[];
+};
+
+/*
+ * Data written by the kernel with query DRM_I915_QUERY_EU_INFO :
+ *
+ * data: Each bit indicates whether an EU is available (1) or fused off (0).
+ *       Formula to tell if slice X subslice Y eu Z is available :
+ *
+ *           (data[X * max_subslices * ALIGN(max_eus_per_subslice, 8) / 8 +
+ *                 Y * ALIGN(max_eus_per_subslice, 8) / 8 +
+ *                 Z / 8] >> (Z % 8)) & 1
+ *
+ *       Alternatively, use DRM_I915_QUERY_EU_AVAILABLE() to query a given
+ *       EU's availability.
+ */
+struct drm_i915_query_eu_info {
+	__u32 max_slices;
+	__u32 max_subslices;
+	__u32 max_eus_per_subslice;
+
+#define DRM_I915_QUERY_EU_AVAILABLE(info, slice, subslice, eu) \
+	!!((info)->data[(slice) * DRM_I915_DIV_ROUND_UP((info)->max_eus_per_subslice, 8) * (info)->max_subslices + \
+			(subslice) * DRM_I915_DIV_ROUND_UP((info)->max_eus_per_subslice, 8) + \
+			(eu) / 8] & DRM_I915_BIT((eu) % 8))
+	__u8 data[];
+};
+
 #if defined(__cplusplus)
 }
 #endif
-- 
2.15.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 16+ messages in thread

* ✓ Fi.CI.BAT: success for drm/i915: expose RCS topology to userspace
  2018-01-22  8:21 [PATCH v11 0/6] drm/i915: expose RCS topology to userspace Lionel Landwerlin
                   ` (5 preceding siblings ...)
  2018-01-22  8:21 ` [PATCH v11 6/6] drm/i915: expose rcs topology through " Lionel Landwerlin
@ 2018-01-22  8:55 ` Patchwork
  2018-01-22 11:48 ` ✗ Fi.CI.IGT: warning " Patchwork
  7 siblings, 0 replies; 16+ messages in thread
From: Patchwork @ 2018-01-22  8:55 UTC (permalink / raw)
  To: Lionel Landwerlin; +Cc: intel-gfx

== Series Details ==

Series: drm/i915: expose RCS topology to userspace
URL   : https://patchwork.freedesktop.org/series/36874/
State : success

== Summary ==

Series 36874v1 drm/i915: expose RCS topology to userspace
https://patchwork.freedesktop.org/api/1.0/series/36874/revisions/1/mbox/

Test gem_ringfill:
        Subgroup basic-default:
                skip       -> PASS       (fi-bsw-n3050)
Test kms_pipe_crc_basic:
        Subgroup suspend-read-crc-pipe-b:
                incomplete -> PASS       (fi-snb-2520m) fdo#103713

fdo#103713 https://bugs.freedesktop.org/show_bug.cgi?id=103713

fi-bdw-5557u     total:288  pass:267  dwarn:0   dfail:0   fail:0   skip:21  time:436s
fi-bdw-gvtdvm    total:288  pass:264  dwarn:0   dfail:0   fail:0   skip:24  time:440s
fi-blb-e6850     total:288  pass:223  dwarn:1   dfail:0   fail:0   skip:64  time:388s
fi-bsw-n3050     total:288  pass:242  dwarn:0   dfail:0   fail:0   skip:46  time:514s
fi-bwr-2160      total:288  pass:183  dwarn:0   dfail:0   fail:0   skip:105 time:294s
fi-bxt-dsi       total:288  pass:258  dwarn:0   dfail:0   fail:0   skip:30  time:504s
fi-bxt-j4205     total:288  pass:259  dwarn:0   dfail:0   fail:0   skip:29  time:501s
fi-byt-j1900     total:288  pass:253  dwarn:0   dfail:0   fail:0   skip:35  time:494s
fi-byt-n2820     total:288  pass:249  dwarn:0   dfail:0   fail:0   skip:39  time:482s
fi-elk-e7500     total:224  pass:168  dwarn:10  dfail:0   fail:0   skip:45 
fi-gdg-551       total:288  pass:179  dwarn:0   dfail:0   fail:1   skip:108 time:305s
fi-glk-1         total:288  pass:260  dwarn:0   dfail:0   fail:0   skip:28  time:524s
fi-hsw-4770      total:288  pass:261  dwarn:0   dfail:0   fail:0   skip:27  time:396s
fi-hsw-4770r     total:288  pass:261  dwarn:0   dfail:0   fail:0   skip:27  time:404s
fi-ilk-650       total:288  pass:228  dwarn:0   dfail:0   fail:0   skip:60  time:422s
fi-ivb-3520m     total:288  pass:259  dwarn:0   dfail:0   fail:0   skip:29  time:466s
fi-ivb-3770      total:288  pass:255  dwarn:0   dfail:0   fail:0   skip:33  time:422s
fi-kbl-7500u     total:288  pass:263  dwarn:1   dfail:0   fail:0   skip:24  time:466s
fi-kbl-7560u     total:288  pass:269  dwarn:0   dfail:0   fail:0   skip:19  time:504s
fi-kbl-7567u     total:288  pass:268  dwarn:0   dfail:0   fail:0   skip:20  time:455s
fi-kbl-r         total:288  pass:261  dwarn:0   dfail:0   fail:0   skip:27  time:507s
fi-pnv-d510      total:288  pass:222  dwarn:1   dfail:0   fail:0   skip:65  time:633s
fi-skl-6260u     total:288  pass:268  dwarn:0   dfail:0   fail:0   skip:20  time:434s
fi-skl-6600u     total:288  pass:261  dwarn:0   dfail:0   fail:0   skip:27  time:518s
fi-skl-6700hq    total:288  pass:262  dwarn:0   dfail:0   fail:0   skip:26  time:534s
fi-skl-6700k2    total:288  pass:264  dwarn:0   dfail:0   fail:0   skip:24  time:489s
fi-skl-6770hq    total:288  pass:268  dwarn:0   dfail:0   fail:0   skip:20  time:504s
fi-skl-gvtdvm    total:288  pass:265  dwarn:0   dfail:0   fail:0   skip:23  time:440s
fi-snb-2520m     total:288  pass:248  dwarn:0   dfail:0   fail:0   skip:40  time:537s
fi-snb-2600      total:288  pass:248  dwarn:0   dfail:0   fail:0   skip:40  time:408s
Blacklisted hosts:
fi-cfl-s2        total:288  pass:256  dwarn:0   dfail:0   fail:3   skip:26  time:558s
fi-glk-dsi       total:288  pass:258  dwarn:0   dfail:0   fail:0   skip:30  time:481s
fi-skl-guc       total:288  pass:260  dwarn:0   dfail:0   fail:0   skip:28  time:422s

a1f34f9415a41d5a4d219bcbbe5cf39a0b07cc22 drm-tip: 2018y-01m-20d-19h-27m-32s UTC integration manifest
74683d847ca1 drm/i915: expose rcs topology through query uAPI
264b00831263 drm/i915: add query uAPI
789a3cce76be drm/i915: add rcs topology to error state
474bc6818880 drm/i915/debugfs: add rcs topology entry
e12a463e0997 drm/i915/debugfs: reuse max slice/subslices already stored in sseu
3242318490b2 drm/i915: store all subslice masks

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_7734/issues.html
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH v11 6/6] drm/i915: expose rcs topology through query uAPI
  2018-01-22  8:21 ` [PATCH v11 6/6] drm/i915: expose rcs topology through " Lionel Landwerlin
@ 2018-01-22  9:44   ` Tvrtko Ursulin
  0 siblings, 0 replies; 16+ messages in thread
From: Tvrtko Ursulin @ 2018-01-22  9:44 UTC (permalink / raw)
  To: Lionel Landwerlin, intel-gfx


On 22/01/2018 08:21, Lionel Landwerlin wrote:
> With the introduction of asymmetric slices in CNL, we cannot rely on
> the previous SUBSLICE_MASK getparam to tell userspace what subslices
> are available. Here we introduce a more detailed way of querying the
> Gen's GPU topology that doesn't aggregate numbers.
> 
> This is essential for monitoring parts of the GPU with the OA unit,
> because counters need to be normalized to the number of
> EUs/subslices/slices. The current aggregated numbers like EU_TOTAL do
> not gives us sufficient information.
> 
> As a bonus we can draw representations of the GPU :
> 
>          https://imgur.com/a/vuqpa
> 
> v2: Rename uapi struct s/_mask/_info/ (Tvrtko)
>      Report max_slice/subslice/eus_per_subslice rather than strides (Tvrtko)
>      Add uapi macros to read data from *_info structs (Tvrtko)
> 
> v3: Use !!(v & DRM_I915_BIT()) for uapi macros instead of custom shifts (Tvrtko)
> 
> v4: factorize query item writting (Tvrtko)
>      tweak uapi struct/define names (Tvrtko)
> 
> v5: Replace ALIGN() macro (Chris)
> 
> v6: Updated uapi comments (Tvrtko)
>      Moved flags != 0 checks into vfuncs (Tvrtko)
> 
> v7: Use access_ok() before copying anything, to avoid overflows (Chris)
>      Switch BUG_ON() to GEM_WARN_ON() (Tvrtko)
> 
> v8: Tweak uapi comments style to match the coding style (Lionel)
> 
> v9: Fix error in comment about computation of enabled subslice (Tvrtko)
> 
> Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
> ---
>   drivers/gpu/drm/i915/i915_query.c | 110 ++++++++++++++++++++++++++++++++++++++
>   include/uapi/drm/i915_drm.h       |  71 ++++++++++++++++++++++++
>   2 files changed, 181 insertions(+)
> 
> diff --git a/drivers/gpu/drm/i915/i915_query.c b/drivers/gpu/drm/i915/i915_query.c
> index 5aa886313cf9..ff87ec8a321a 100644
> --- a/drivers/gpu/drm/i915/i915_query.c
> +++ b/drivers/gpu/drm/i915/i915_query.c
> @@ -25,8 +25,118 @@
>   #include "i915_drv.h"
>   #include <uapi/drm/i915_drm.h>
>   
> +static int copy_query_data(struct drm_i915_query_item *query_item,
> +			   const void *item_ptr, u32 item_length,
> +			   const void *data_ptr, u32 data_length)
> +{
> +	u32 total_length = item_length + data_length;
> +
> +	if (GEM_WARN_ON(add_overflows(item_length, data_length)))
> +		return -EINVAL;
> +
> +	if (query_item->length == 0)
> +		return total_length;
> +
> +	if (query_item->length < total_length)
> +		return -EINVAL;
> +
> +	if (!access_ok(VERIFY_WRITE, u64_to_user_ptr(query_item->data_ptr),
> +		       total_length))
> +		return -EFAULT;
> +
> +	if (__copy_to_user(u64_to_user_ptr(query_item->data_ptr),
> +			   item_ptr, item_length))
> +		return -EFAULT;
> +
> +	if (__copy_to_user(u64_to_user_ptr(query_item->data_ptr + item_length),
> +			   data_ptr, data_length))
> +		return -EFAULT;
> +
> +	return total_length;
> +}
> +
> +static int query_slice_info(struct drm_i915_private *dev_priv,
> +			    struct drm_i915_query_item *query_item)
> +{
> +	const struct sseu_dev_info *sseu = &INTEL_INFO(dev_priv)->sseu;
> +	struct drm_i915_query_slice_info slice_info;
> +
> +	if (query_item->flags != 0)
> +		return -EINVAL;
> +
> +	if (sseu->max_slices == 0)
> +		return -ENODEV;
> +
> +	/*
> +	 * If we ever change the internal slice mask data type, we'll need to
> +	 * update this function.
> +	 */
> +	BUILD_BUG_ON(sizeof(u8) != sizeof(sseu->slice_mask));
> +
> +	memset(&slice_info, 0, sizeof(slice_info));
> +	slice_info.max_slices = sseu->max_slices;
> +
> +	return copy_query_data(query_item, &slice_info, sizeof(slice_info),
> +			       &sseu->slice_mask, sizeof(sseu->slice_mask));
> +}
> +
> +static int query_subslice_info(struct drm_i915_private *dev_priv,
> +			       struct drm_i915_query_item *query_item)
> +{
> +	const struct sseu_dev_info *sseu = &INTEL_INFO(dev_priv)->sseu;
> +	struct drm_i915_query_subslice_info subslice_info;
> +	u32 data_length;
> +
> +	if (query_item->flags != 0)
> +		return -EINVAL;
> +
> +	if (sseu->max_slices == 0)
> +		return -ENODEV;
> +
> +	memset(&subslice_info, 0, sizeof(subslice_info));
> +	subslice_info.max_slices = sseu->max_slices;
> +	subslice_info.max_subslices = sseu->max_subslices;
> +
> +	data_length = subslice_info.max_slices *
> +		DIV_ROUND_UP(subslice_info.max_subslices,
> +			     sizeof(sseu->subslice_mask[0]) * BITS_PER_BYTE);
> +
> +	return copy_query_data(query_item,
> +			       &subslice_info, sizeof(subslice_info),
> +			       sseu->subslice_mask, data_length);
> +}
> +
> +static int query_eu_info(struct drm_i915_private *dev_priv,
> +			 struct drm_i915_query_item *query_item)
> +{
> +	const struct sseu_dev_info *sseu = &INTEL_INFO(dev_priv)->sseu;
> +	struct drm_i915_query_eu_info eu_info;
> +	u32 data_length;
> +
> +	if (query_item->flags != 0)
> +		return -EINVAL;
> +
> +	if (sseu->max_slices == 0)
> +		return -ENODEV;
> +
> +	memset(&eu_info, 0, sizeof(eu_info));
> +	eu_info.max_slices = sseu->max_slices;
> +	eu_info.max_subslices = sseu->max_subslices;
> +	eu_info.max_eus_per_subslice = sseu->max_eus_per_subslice;
> +
> +	data_length = eu_info.max_slices * eu_info.max_subslices *
> +		DIV_ROUND_UP(eu_info.max_eus_per_subslice, BITS_PER_BYTE);
> +
> +	return copy_query_data(query_item,
> +			       &eu_info, sizeof(eu_info),
> +			       sseu->eu_mask, data_length);
> +}
> +
>   static int (* const i915_query_funcs[])(struct drm_i915_private *dev_priv,
>   					struct drm_i915_query_item *query_item) = {
> +	query_slice_info,
> +	query_subslice_info,
> +	query_eu_info,
>   };
>   
>   int i915_query_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
> diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
> index 03f031652d86..7ae8e9aa0206 100644
> --- a/include/uapi/drm/i915_drm.h
> +++ b/include/uapi/drm/i915_drm.h
> @@ -1618,6 +1618,9 @@ struct drm_i915_perf_oa_config {
>   
>   struct drm_i915_query_item {
>   	__u64 query_id;
> +#define DRM_I915_QUERY_SLICE_INFO	0x01
> +#define DRM_I915_QUERY_SUBSLICE_INFO	0x02
> +#define DRM_I915_QUERY_EU_INFO		0x03
>   
>   	/*
>   	 * When set to zero by userspace, this is filled with the size of the
> @@ -1654,6 +1657,74 @@ struct drm_i915_query {
>   	__u64 items_ptr;
>   };
>   
> +#define DRM_I915_BIT(bit) ((__u32)1 << (bit))
> +#define DRM_I915_DIV_ROUND_UP(val, div) (((val) + (div) - 1) / (div))
> +
> +/*
> + * Data written by the kernel with query DRM_I915_QUERY_SLICE_INFO :
> + *
> + * data: each bit indicates whether a slice is available (1) or fused off (0).
> + *       Formula to tell if slice X is available :
> + *
> + *            (data[X / 8] >> (X % 8)) & 1
> + *
> + *       Alternatively, use DRM_I915_QUERY_SLICE_AVAILABLE() to query a
> + *       given subslice's availability.
> + */
> +struct drm_i915_query_slice_info {
> +	__u32 max_slices;
> +
> +#define DRM_I915_QUERY_SLICE_AVAILABLE(info, slice) \
> +	!!((info)->data[(slice) / 8] & DRM_I915_BIT((slice) % 8))
> +	__u8 data[];
> +};
> +
> +/*
> + * Data written by the kernel with query DRM_I915_QUERY_SUBSLICE_INFO :
> + *
> + * data: each bit indicates whether a subslice is available (1) or fused off
> + *       (0). Formula to tell if slice X subslice Y is available :
> + *
> + *            (data[(X * ALIGN(max_subslices, 8) / 8) + Y / 8] >> (Y % 8)) & 1
> + *
> + *       Alternatively, use DRM_I915_QUERY_SUBSLICE_AVAILABLE() to query a
> + *       given subslice's availability.
> + */
> +struct drm_i915_query_subslice_info {
> +	__u32 max_slices;
> +	__u32 max_subslices;
> +
> +#define DRM_I915_QUERY_SUBSLICE_AVAILABLE(info, slice, subslice) \
> +	!!((info)->data[(slice) * DRM_I915_DIV_ROUND_UP((info)->max_subslices, 8) + \
> +			(subslice) / 8] & DRM_I915_BIT((subslice) % 8))
> +	__u8 data[];
> +};
> +
> +/*
> + * Data written by the kernel with query DRM_I915_QUERY_EU_INFO :
> + *
> + * data: Each bit indicates whether an EU is available (1) or fused off (0).
> + *       Formula to tell if slice X subslice Y eu Z is available :
> + *
> + *           (data[X * max_subslices * ALIGN(max_eus_per_subslice, 8) / 8 +
> + *                 Y * ALIGN(max_eus_per_subslice, 8) / 8 +
> + *                 Z / 8] >> (Z % 8)) & 1
> + *
> + *       Alternatively, use DRM_I915_QUERY_EU_AVAILABLE() to query a given
> + *       EU's availability.
> + */
> +struct drm_i915_query_eu_info {
> +	__u32 max_slices;
> +	__u32 max_subslices;
> +	__u32 max_eus_per_subslice;
> +
> +#define DRM_I915_QUERY_EU_AVAILABLE(info, slice, subslice, eu) \
> +	!!((info)->data[(slice) * DRM_I915_DIV_ROUND_UP((info)->max_eus_per_subslice, 8) * (info)->max_subslices + \
> +			(subslice) * DRM_I915_DIV_ROUND_UP((info)->max_eus_per_subslice, 8) + \
> +			(eu) / 8] & DRM_I915_BIT((eu) % 8))
> +	__u8 data[];
> +};
> +
>   #if defined(__cplusplus)
>   }
>   #endif
> 

Looks OK to me.

Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 16+ messages in thread

* ✗ Fi.CI.IGT: warning for drm/i915: expose RCS topology to userspace
  2018-01-22  8:21 [PATCH v11 0/6] drm/i915: expose RCS topology to userspace Lionel Landwerlin
                   ` (6 preceding siblings ...)
  2018-01-22  8:55 ` ✓ Fi.CI.BAT: success for drm/i915: expose RCS topology to userspace Patchwork
@ 2018-01-22 11:48 ` Patchwork
  7 siblings, 0 replies; 16+ messages in thread
From: Patchwork @ 2018-01-22 11:48 UTC (permalink / raw)
  To: Lionel Landwerlin; +Cc: intel-gfx

== Series Details ==

Series: drm/i915: expose RCS topology to userspace
URL   : https://patchwork.freedesktop.org/series/36874/
State : warning

== Summary ==

Test kms_flip:
        Subgroup flip-vs-expired-vblank-interruptible:
                fail       -> PASS       (shard-apl) fdo#102887
        Subgroup vblank-vs-suspend-interruptible:
                incomplete -> PASS       (shard-hsw) fdo#100368
        Subgroup rcs-wf_vblank-vs-dpms-interruptible:
                dmesg-warn -> PASS       (shard-hsw) fdo#102614
        Subgroup dpms-vs-vblank-race:
                pass       -> FAIL       (shard-hsw) fdo#103060
        Subgroup vblank-vs-modeset-suspend-interruptible:
                pass       -> INCOMPLETE (shard-hsw) fdo#103540
Test kms_sysfs_edid_timing:
                warn       -> PASS       (shard-apl) fdo#100047
Test perf:
        Subgroup blocking:
                fail       -> PASS       (shard-hsw) fdo#102252
        Subgroup buffer-fill:
                pass       -> FAIL       (shard-apl) fdo#103755
Test perf_pmu:
        Subgroup busy-check-all-vecs0:
                pass       -> SKIP       (shard-apl)
Test pm_sseu:
        Subgroup full-enable:
                pass       -> FAIL       (shard-apl) fdo#104651
Test kms_frontbuffer_tracking:
        Subgroup fbc-1p-primscrn-pri-indfb-draw-render:
                fail       -> PASS       (shard-apl) fdo#101623 +1
Test drv_suspend:
        Subgroup forcewake:
                skip       -> PASS       (shard-hsw)

fdo#102887 https://bugs.freedesktop.org/show_bug.cgi?id=102887
fdo#100368 https://bugs.freedesktop.org/show_bug.cgi?id=100368
fdo#102614 https://bugs.freedesktop.org/show_bug.cgi?id=102614
fdo#103060 https://bugs.freedesktop.org/show_bug.cgi?id=103060
fdo#103540 https://bugs.freedesktop.org/show_bug.cgi?id=103540
fdo#100047 https://bugs.freedesktop.org/show_bug.cgi?id=100047
fdo#102252 https://bugs.freedesktop.org/show_bug.cgi?id=102252
fdo#103755 https://bugs.freedesktop.org/show_bug.cgi?id=103755
fdo#104651 https://bugs.freedesktop.org/show_bug.cgi?id=104651
fdo#101623 https://bugs.freedesktop.org/show_bug.cgi?id=101623

shard-apl        total:2780 pass:1712 dwarn:1   dfail:0   fail:26  skip:1041 time:14738s
shard-hsw        total:2713 pass:1676 dwarn:1   dfail:0   fail:14  skip:1020 time:14906s
shard-snb        total:2780 pass:1316 dwarn:1   dfail:0   fail:13  skip:1450 time:8086s
Blacklisted hosts:
shard-kbl        total:2776 pass:1831 dwarn:1   dfail:0   fail:26  skip:917 time:10545s

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_7734/shards.html
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH v11 5/6] drm/i915: add query uAPI
  2018-01-22  8:21 ` [PATCH v11 5/6] drm/i915: add query uAPI Lionel Landwerlin
@ 2018-01-23 14:17   ` Lionel Landwerlin
  2018-01-24 12:03     ` Tvrtko Ursulin
  0 siblings, 1 reply; 16+ messages in thread
From: Lionel Landwerlin @ 2018-01-23 14:17 UTC (permalink / raw)
  To: intel-gfx, Daniel Vetter, Tvrtko Ursulin, Joonas Lahtinen, Chris Wilson

Hi all,

I've been trying to expose some information to userspace about the fused 
parts of the GPU.
This is the 4th attempt at getting this upstream, here are the previous 
ones :
     https://patchwork.freedesktop.org/patch/185959/
     https://patchwork.freedesktop.org/series/33436/
     https://patchwork.freedesktop.org/series/33950/

This last iteration was based upon some direction by Daniel : 
https://lists.freedesktop.org/archives/dri-devel/2017-December/160162.html
There was, I think, a fair point about having this working in 
environments where sysfs (mechanism used in the 3rd iteration) is not 
available (containers).

Following some discussion on IRC, it seems Joonas would like this 
rewritten in a such way that we essentially drop the generic mechanism 
introduced in this patch, and instead go for an additional ioctl() on 
the drm fd just for querying the state of a fused part of 
slice/subslice/eus.
The proposal is to have a single struct like :

struct drm_i915_topology {
    /* All field are in/out */
    int slice;
    int subslice;
    int eu;

    int enabled;
};

You would let the slice field to -1 and then the kernel would fill it 
with the max slice value. Same for subslice (with a valid slice value) 
and eu.
When querying with slice = 0, and all other fields to -1, the kernel 
would fill the enabled value with 0 or 1.
Essentially that would mean that an application wanting to query the 
state of all of the EUs would have to go through them one by one (which 
would be about ~100 ioctl() on SKL GT4 for example).

Apart from the fact that we'll probably end up adding another ioctl() 
for engine discovery, I don't have any problem with what Joonas is 
proposing.
It's just a bit annoying this comes up on the 4th rewrite.
I really wouldn't like to rewrite this one more time and get turned down 
because this isn't to the taste of one of the reviewer.
So my question is : Is everybody happy with what Joonas is proposing?
Anybody in favor of having a generic mechanism?

Thanks a lot,

-
Lionel



On 22/01/18 08:21, Lionel Landwerlin wrote:
> There are a number of information that are readable from hardware
> registers and that we would like to make accessible to userspace. One
> particular example is the topology of the execution units (how are
> execution units grouped in subslices and slices and also which ones
> have been fused off for die recovery).
>
> At the moment the GET_PARAM ioctl covers some basic needs, but
> generally is only able to return a single value for each defined
> parameter. This is a bit problematic with topology descriptions which
> are array/maps of available units.
>
> This change introduces a new ioctl that can deal with requests to fill
> structures of potentially variable lengths. The user is expected fill
> a query with length fields set at 0 on the first call, the kernel then
> sets the length fields to the their expected values. A second call to
> the kernel with length fields at their expected values will trigger a
> copy of the data to the pointed memory locations.
>
> The scope of this uAPI is only to provide information to userspace,
> not to allow configuration of the device.
>
> v2: Simplify dispatcher code iteration (Tvrtko)
>      Tweak uapi drm_i915_query_item structure (Tvrtko)
>
> v3: Rename pad fields into flags (Chris)
>      Return error on flags field != 0 (Chris)
>      Only copy length back to userspace in drm_i915_query_item (Chris)
>
> v4: Use array of functions instead of switch (Chris)
>
> v5: More comments in uapi (Tvrtko)
>      Return query item errors in length field (All)
>
> v6: Tweak uapi comments style to match the coding style (Lionel)
>
> Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
> Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> ---
>   drivers/gpu/drm/i915/Makefile     |  1 +
>   drivers/gpu/drm/i915/i915_drv.c   |  1 +
>   drivers/gpu/drm/i915/i915_drv.h   |  3 ++
>   drivers/gpu/drm/i915/i915_query.c | 67 +++++++++++++++++++++++++++++++++++++++
>   include/uapi/drm/i915_drm.h       | 41 ++++++++++++++++++++++++
>   5 files changed, 113 insertions(+)
>   create mode 100644 drivers/gpu/drm/i915/i915_query.c
>
> diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
> index 3bddd8a06806..b0415a3e2d59 100644
> --- a/drivers/gpu/drm/i915/Makefile
> +++ b/drivers/gpu/drm/i915/Makefile
> @@ -69,6 +69,7 @@ i915-y += i915_cmd_parser.o \
>   	  i915_gem_timeline.o \
>   	  i915_gem_userptr.o \
>   	  i915_gemfs.o \
> +	  i915_query.o \
>   	  i915_trace_points.o \
>   	  i915_vma.o \
>   	  intel_breadcrumbs.o \
> diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
> index 6d34bc1a7fff..c59be22f19ab 100644
> --- a/drivers/gpu/drm/i915/i915_drv.c
> +++ b/drivers/gpu/drm/i915/i915_drv.c
> @@ -2830,6 +2830,7 @@ static const struct drm_ioctl_desc i915_ioctls[] = {
>   	DRM_IOCTL_DEF_DRV(I915_PERF_OPEN, i915_perf_open_ioctl, DRM_RENDER_ALLOW),
>   	DRM_IOCTL_DEF_DRV(I915_PERF_ADD_CONFIG, i915_perf_add_config_ioctl, DRM_UNLOCKED|DRM_RENDER_ALLOW),
>   	DRM_IOCTL_DEF_DRV(I915_PERF_REMOVE_CONFIG, i915_perf_remove_config_ioctl, DRM_UNLOCKED|DRM_RENDER_ALLOW),
> +	DRM_IOCTL_DEF_DRV(I915_QUERY, i915_query_ioctl, DRM_UNLOCKED|DRM_RENDER_ALLOW),
>   };
>   
>   static struct drm_driver driver = {
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 8333692dac5a..50a7040af71e 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -3627,6 +3627,9 @@ extern void i915_perf_fini(struct drm_i915_private *dev_priv);
>   extern void i915_perf_register(struct drm_i915_private *dev_priv);
>   extern void i915_perf_unregister(struct drm_i915_private *dev_priv);
>   
> +/* i915_query.c */
> +int i915_query_ioctl(struct drm_device *dev, void *data, struct drm_file *file);
> +
>   /* i915_suspend.c */
>   extern int i915_save_state(struct drm_i915_private *dev_priv);
>   extern int i915_restore_state(struct drm_i915_private *dev_priv);
> diff --git a/drivers/gpu/drm/i915/i915_query.c b/drivers/gpu/drm/i915/i915_query.c
> new file mode 100644
> index 000000000000..5aa886313cf9
> --- /dev/null
> +++ b/drivers/gpu/drm/i915/i915_query.c
> @@ -0,0 +1,67 @@
> +/*
> + * Copyright © 2017 Intel Corporation
> + *
> + * Permission is hereby granted, free of charge, to any person obtaining a
> + * copy of this software and associated documentation files (the "Software"),
> + * to deal in the Software without restriction, including without limitation
> + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
> + * and/or sell copies of the Software, and to permit persons to whom the
> + * Software is furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice (including the next
> + * paragraph) shall be included in all copies or substantial portions of the
> + * Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
> + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
> + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
> + * IN THE SOFTWARE.
> + *
> + */
> +
> +#include "i915_drv.h"
> +#include <uapi/drm/i915_drm.h>
> +
> +static int (* const i915_query_funcs[])(struct drm_i915_private *dev_priv,
> +					struct drm_i915_query_item *query_item) = {
> +};
> +
> +int i915_query_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
> +{
> +	struct drm_i915_private *dev_priv = to_i915(dev);
> +	struct drm_i915_query *args = data;
> +	struct drm_i915_query_item __user *user_item_ptr =
> +		u64_to_user_ptr(args->items_ptr);
> +	u32 i;
> +
> +	if (args->flags != 0)
> +		return -EINVAL;
> +
> +	for (i = 0; i < args->num_items; i++, user_item_ptr++) {
> +		struct drm_i915_query_item item;
> +		u64 func_idx;
> +		int ret;
> +
> +		if (copy_from_user(&item, user_item_ptr, sizeof(item)))
> +			return -EFAULT;
> +
> +		if (item.query_id == 0)
> +			return -EINVAL;
> +
> +		func_idx = item.query_id - 1;
> +
> +		if (func_idx >= ARRAY_SIZE(i915_query_funcs))
> +			return -EINVAL;
> +
> +		ret = i915_query_funcs[func_idx](dev_priv, &item);
> +
> +		/* Only write the length back to userspace if they differ. */
> +		if (ret != item.length && put_user(ret, &user_item_ptr->length))
> +			return -EFAULT;
> +	}
> +
> +	return 0;
> +}
> diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
> index 536ee4febd74..03f031652d86 100644
> --- a/include/uapi/drm/i915_drm.h
> +++ b/include/uapi/drm/i915_drm.h
> @@ -318,6 +318,7 @@ typedef struct _drm_i915_sarea {
>   #define DRM_I915_PERF_OPEN		0x36
>   #define DRM_I915_PERF_ADD_CONFIG	0x37
>   #define DRM_I915_PERF_REMOVE_CONFIG	0x38
> +#define DRM_I915_QUERY			0x39
>   
>   #define DRM_IOCTL_I915_INIT		DRM_IOW( DRM_COMMAND_BASE + DRM_I915_INIT, drm_i915_init_t)
>   #define DRM_IOCTL_I915_FLUSH		DRM_IO ( DRM_COMMAND_BASE + DRM_I915_FLUSH)
> @@ -375,6 +376,7 @@ typedef struct _drm_i915_sarea {
>   #define DRM_IOCTL_I915_PERF_OPEN	DRM_IOW(DRM_COMMAND_BASE + DRM_I915_PERF_OPEN, struct drm_i915_perf_open_param)
>   #define DRM_IOCTL_I915_PERF_ADD_CONFIG	DRM_IOW(DRM_COMMAND_BASE + DRM_I915_PERF_ADD_CONFIG, struct drm_i915_perf_oa_config)
>   #define DRM_IOCTL_I915_PERF_REMOVE_CONFIG	DRM_IOW(DRM_COMMAND_BASE + DRM_I915_PERF_REMOVE_CONFIG, __u64)
> +#define DRM_IOCTL_I915_QUERY			DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_QUERY, struct drm_i915_query)
>   
>   /* Allow drivers to submit batchbuffers directly to hardware, relying
>    * on the security mechanisms provided by hardware.
> @@ -1613,6 +1615,45 @@ struct drm_i915_perf_oa_config {
>   	__u64 flex_regs_ptr;
>   };
>   
> +
> +struct drm_i915_query_item {
> +	__u64 query_id;
> +
> +	/*
> +	 * When set to zero by userspace, this is filled with the size of the
> +	 * data to be written at the data_ptr pointer. The kernel set this
> +	 * value to a negative value to signal an error on a particular query
> +	 * item.
> +	 */
> +	__s32 length;
> +
> +	/*
> +	 * Unused for now.
> +	 */
> +	__u32 flags;
> +
> +	/*
> +	 * Data will be written at the location pointed by data_ptr when the
> +	 * value of length matches the length of the data to be written by the
> +	 * kernel.
> +	 */
> +	__u64 data_ptr;
> +};
> +
> +struct drm_i915_query {
> +	__u32 num_items;
> +
> +	/*
> +	 * Unused for now.
> +	 */
> +	__u32 flags;
> +
> +	/*
> +	 * This point to an array of num_items drm_i915_query_item structures.
> +	 */
> +	__u64 items_ptr;
> +};
> +
>   #if defined(__cplusplus)
>   }
>   #endif


_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH v11 5/6] drm/i915: add query uAPI
  2018-01-23 14:17   ` Lionel Landwerlin
@ 2018-01-24 12:03     ` Tvrtko Ursulin
  2018-01-24 15:14       ` Chris Wilson
  0 siblings, 1 reply; 16+ messages in thread
From: Tvrtko Ursulin @ 2018-01-24 12:03 UTC (permalink / raw)
  To: Lionel Landwerlin, intel-gfx, Daniel Vetter, Joonas Lahtinen,
	Chris Wilson


On 23/01/2018 14:17, Lionel Landwerlin wrote:
> Hi all,
> 
> I've been trying to expose some information to userspace about the fused 
> parts of the GPU.
> This is the 4th attempt at getting this upstream, here are the previous 
> ones :
>      https://patchwork.freedesktop.org/patch/185959/
>      https://patchwork.freedesktop.org/series/33436/
>      https://patchwork.freedesktop.org/series/33950/
> 
> This last iteration was based upon some direction by Daniel : 
> https://lists.freedesktop.org/archives/dri-devel/2017-December/160162.html
> There was, I think, a fair point about having this working in 
> environments where sysfs (mechanism used in the 3rd iteration) is not 
> available (containers).
> 
> Following some discussion on IRC, it seems Joonas would like this 
> rewritten in a such way that we essentially drop the generic mechanism 
> introduced in this patch, and instead go for an additional ioctl() on 
> the drm fd just for querying the state of a fused part of 
> slice/subslice/eus.
> The proposal is to have a single struct like :
> 
> struct drm_i915_topology {
>     /* All field are in/out */
>     int slice;
>     int subslice;
>     int eu;
> 
>     int enabled;
> };
> 
> You would let the slice field to -1 and then the kernel would fill it 
> with the max slice value. Same for subslice (with a valid slice value) 
> and eu.
> When querying with slice = 0, and all other fields to -1, the kernel 
> would fill the enabled value with 0 or 1.
> Essentially that would mean that an application wanting to query the 
> state of all of the EUs would have to go through them one by one (which 
> would be about ~100 ioctl() on SKL GT4 for example).
> 
> Apart from the fact that we'll probably end up adding another ioctl() 
> for engine discovery, I don't have any problem with what Joonas is 
> proposing.
> It's just a bit annoying this comes up on the 4th rewrite.
> I really wouldn't like to rewrite this one more time and get turned down 
> because this isn't to the taste of one of the reviewer.
> So my question is : Is everybody happy with what Joonas is proposing?
> Anybody in favor of having a generic mechanism?

I am not very keen on this counter-proposal for two reasons.

First is that I think is a bit inelegant to have to query so many times 
just to get the full topology out. If this ends in some library, we may 
end up running this on every trivial client startup.

Secondly, it is kind of dispatcher in it's own right. Since the 
operation mode will depend on the combination of field values. As such a 
generic, or at least a more explicit, dispatcher, like the proposed 
i915_query_ioctl sounds cleaner to me.

I take the point we can't guess how many other users we will have for it 
in the future. So there is a little bit of a risk of adding something 
generic which won't be used a lot in the future.

Because apart from the three queries Lionel needs, I would be adding an 
engine info query and potentially, depending on userspace interest, 
engine queue depths. So that would be a maximum of five queries I am 
aware of would use the generic framework. Maybe too little, or maybe 
good enough for a start?

Alternative would be to go with dedicated ioctls for all of these. It 
would work for my needs, but I am not sure if Lionel can massage the 
GPU/EU topology into one, or at least fewer than three, nicer ioctls 
(Not the kind which needs to be called hundred times to get the complete 
state.).

Regards,

Tvrtko

> 
> On 22/01/18 08:21, Lionel Landwerlin wrote:
>> There are a number of information that are readable from hardware
>> registers and that we would like to make accessible to userspace. One
>> particular example is the topology of the execution units (how are
>> execution units grouped in subslices and slices and also which ones
>> have been fused off for die recovery).
>>
>> At the moment the GET_PARAM ioctl covers some basic needs, but
>> generally is only able to return a single value for each defined
>> parameter. This is a bit problematic with topology descriptions which
>> are array/maps of available units.
>>
>> This change introduces a new ioctl that can deal with requests to fill
>> structures of potentially variable lengths. The user is expected fill
>> a query with length fields set at 0 on the first call, the kernel then
>> sets the length fields to the their expected values. A second call to
>> the kernel with length fields at their expected values will trigger a
>> copy of the data to the pointed memory locations.
>>
>> The scope of this uAPI is only to provide information to userspace,
>> not to allow configuration of the device.
>>
>> v2: Simplify dispatcher code iteration (Tvrtko)
>>      Tweak uapi drm_i915_query_item structure (Tvrtko)
>>
>> v3: Rename pad fields into flags (Chris)
>>      Return error on flags field != 0 (Chris)
>>      Only copy length back to userspace in drm_i915_query_item (Chris)
>>
>> v4: Use array of functions instead of switch (Chris)
>>
>> v5: More comments in uapi (Tvrtko)
>>      Return query item errors in length field (All)
>>
>> v6: Tweak uapi comments style to match the coding style (Lionel)
>>
>> Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
>> Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>> ---
>>   drivers/gpu/drm/i915/Makefile     |  1 +
>>   drivers/gpu/drm/i915/i915_drv.c   |  1 +
>>   drivers/gpu/drm/i915/i915_drv.h   |  3 ++
>>   drivers/gpu/drm/i915/i915_query.c | 67 
>> +++++++++++++++++++++++++++++++++++++++
>>   include/uapi/drm/i915_drm.h       | 41 ++++++++++++++++++++++++
>>   5 files changed, 113 insertions(+)
>>   create mode 100644 drivers/gpu/drm/i915/i915_query.c
>>
>> diff --git a/drivers/gpu/drm/i915/Makefile 
>> b/drivers/gpu/drm/i915/Makefile
>> index 3bddd8a06806..b0415a3e2d59 100644
>> --- a/drivers/gpu/drm/i915/Makefile
>> +++ b/drivers/gpu/drm/i915/Makefile
>> @@ -69,6 +69,7 @@ i915-y += i915_cmd_parser.o \
>>         i915_gem_timeline.o \
>>         i915_gem_userptr.o \
>>         i915_gemfs.o \
>> +      i915_query.o \
>>         i915_trace_points.o \
>>         i915_vma.o \
>>         intel_breadcrumbs.o \
>> diff --git a/drivers/gpu/drm/i915/i915_drv.c 
>> b/drivers/gpu/drm/i915/i915_drv.c
>> index 6d34bc1a7fff..c59be22f19ab 100644
>> --- a/drivers/gpu/drm/i915/i915_drv.c
>> +++ b/drivers/gpu/drm/i915/i915_drv.c
>> @@ -2830,6 +2830,7 @@ static const struct drm_ioctl_desc i915_ioctls[] 
>> = {
>>       DRM_IOCTL_DEF_DRV(I915_PERF_OPEN, i915_perf_open_ioctl, 
>> DRM_RENDER_ALLOW),
>>       DRM_IOCTL_DEF_DRV(I915_PERF_ADD_CONFIG, 
>> i915_perf_add_config_ioctl, DRM_UNLOCKED|DRM_RENDER_ALLOW),
>>       DRM_IOCTL_DEF_DRV(I915_PERF_REMOVE_CONFIG, 
>> i915_perf_remove_config_ioctl, DRM_UNLOCKED|DRM_RENDER_ALLOW),
>> +    DRM_IOCTL_DEF_DRV(I915_QUERY, i915_query_ioctl, 
>> DRM_UNLOCKED|DRM_RENDER_ALLOW),
>>   };
>>   static struct drm_driver driver = {
>> diff --git a/drivers/gpu/drm/i915/i915_drv.h 
>> b/drivers/gpu/drm/i915/i915_drv.h
>> index 8333692dac5a..50a7040af71e 100644
>> --- a/drivers/gpu/drm/i915/i915_drv.h
>> +++ b/drivers/gpu/drm/i915/i915_drv.h
>> @@ -3627,6 +3627,9 @@ extern void i915_perf_fini(struct 
>> drm_i915_private *dev_priv);
>>   extern void i915_perf_register(struct drm_i915_private *dev_priv);
>>   extern void i915_perf_unregister(struct drm_i915_private *dev_priv);
>> +/* i915_query.c */
>> +int i915_query_ioctl(struct drm_device *dev, void *data, struct 
>> drm_file *file);
>> +
>>   /* i915_suspend.c */
>>   extern int i915_save_state(struct drm_i915_private *dev_priv);
>>   extern int i915_restore_state(struct drm_i915_private *dev_priv);
>> diff --git a/drivers/gpu/drm/i915/i915_query.c 
>> b/drivers/gpu/drm/i915/i915_query.c
>> new file mode 100644
>> index 000000000000..5aa886313cf9
>> --- /dev/null
>> +++ b/drivers/gpu/drm/i915/i915_query.c
>> @@ -0,0 +1,67 @@
>> +/*
>> + * Copyright © 2017 Intel Corporation
>> + *
>> + * Permission is hereby granted, free of charge, to any person 
>> obtaining a
>> + * copy of this software and associated documentation files (the 
>> "Software"),
>> + * to deal in the Software without restriction, including without 
>> limitation
>> + * the rights to use, copy, modify, merge, publish, distribute, 
>> sublicense,
>> + * and/or sell copies of the Software, and to permit persons to whom the
>> + * Software is furnished to do so, subject to the following conditions:
>> + *
>> + * The above copyright notice and this permission notice (including 
>> the next
>> + * paragraph) shall be included in all copies or substantial portions 
>> of the
>> + * Software.
>> + *
>> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
>> EXPRESS OR
>> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 
>> MERCHANTABILITY,
>> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT 
>> SHALL
>> + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES 
>> OR OTHER
>> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 
>> ARISING
>> + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 
>> OTHER DEALINGS
>> + * IN THE SOFTWARE.
>> + *
>> + */
>> +
>> +#include "i915_drv.h"
>> +#include <uapi/drm/i915_drm.h>
>> +
>> +static int (* const i915_query_funcs[])(struct drm_i915_private 
>> *dev_priv,
>> +                    struct drm_i915_query_item *query_item) = {
>> +};
>> +
>> +int i915_query_ioctl(struct drm_device *dev, void *data, struct 
>> drm_file *file)
>> +{
>> +    struct drm_i915_private *dev_priv = to_i915(dev);
>> +    struct drm_i915_query *args = data;
>> +    struct drm_i915_query_item __user *user_item_ptr =
>> +        u64_to_user_ptr(args->items_ptr);
>> +    u32 i;
>> +
>> +    if (args->flags != 0)
>> +        return -EINVAL;
>> +
>> +    for (i = 0; i < args->num_items; i++, user_item_ptr++) {
>> +        struct drm_i915_query_item item;
>> +        u64 func_idx;
>> +        int ret;
>> +
>> +        if (copy_from_user(&item, user_item_ptr, sizeof(item)))
>> +            return -EFAULT;
>> +
>> +        if (item.query_id == 0)
>> +            return -EINVAL;
>> +
>> +        func_idx = item.query_id - 1;
>> +
>> +        if (func_idx >= ARRAY_SIZE(i915_query_funcs))
>> +            return -EINVAL;
>> +
>> +        ret = i915_query_funcs[func_idx](dev_priv, &item);
>> +
>> +        /* Only write the length back to userspace if they differ. */
>> +        if (ret != item.length && put_user(ret, &user_item_ptr->length))
>> +            return -EFAULT;
>> +    }
>> +
>> +    return 0;
>> +}
>> diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
>> index 536ee4febd74..03f031652d86 100644
>> --- a/include/uapi/drm/i915_drm.h
>> +++ b/include/uapi/drm/i915_drm.h
>> @@ -318,6 +318,7 @@ typedef struct _drm_i915_sarea {
>>   #define DRM_I915_PERF_OPEN        0x36
>>   #define DRM_I915_PERF_ADD_CONFIG    0x37
>>   #define DRM_I915_PERF_REMOVE_CONFIG    0x38
>> +#define DRM_I915_QUERY            0x39
>>   #define DRM_IOCTL_I915_INIT        DRM_IOW( DRM_COMMAND_BASE + 
>> DRM_I915_INIT, drm_i915_init_t)
>>   #define DRM_IOCTL_I915_FLUSH        DRM_IO ( DRM_COMMAND_BASE + 
>> DRM_I915_FLUSH)
>> @@ -375,6 +376,7 @@ typedef struct _drm_i915_sarea {
>>   #define DRM_IOCTL_I915_PERF_OPEN    DRM_IOW(DRM_COMMAND_BASE + 
>> DRM_I915_PERF_OPEN, struct drm_i915_perf_open_param)
>>   #define DRM_IOCTL_I915_PERF_ADD_CONFIG    DRM_IOW(DRM_COMMAND_BASE + 
>> DRM_I915_PERF_ADD_CONFIG, struct drm_i915_perf_oa_config)
>>   #define DRM_IOCTL_I915_PERF_REMOVE_CONFIG    
>> DRM_IOW(DRM_COMMAND_BASE + DRM_I915_PERF_REMOVE_CONFIG, __u64)
>> +#define DRM_IOCTL_I915_QUERY            DRM_IOWR(DRM_COMMAND_BASE + 
>> DRM_I915_QUERY, struct drm_i915_query)
>>   /* Allow drivers to submit batchbuffers directly to hardware, relying
>>    * on the security mechanisms provided by hardware.
>> @@ -1613,6 +1615,45 @@ struct drm_i915_perf_oa_config {
>>       __u64 flex_regs_ptr;
>>   };
>> +
>> +struct drm_i915_query_item {
>> +    __u64 query_id;
>> +
>> +    /*
>> +     * When set to zero by userspace, this is filled with the size of 
>> the
>> +     * data to be written at the data_ptr pointer. The kernel set this
>> +     * value to a negative value to signal an error on a particular 
>> query
>> +     * item.
>> +     */
>> +    __s32 length;
>> +
>> +    /*
>> +     * Unused for now.
>> +     */
>> +    __u32 flags;
>> +
>> +    /*
>> +     * Data will be written at the location pointed by data_ptr when the
>> +     * value of length matches the length of the data to be written 
>> by the
>> +     * kernel.
>> +     */
>> +    __u64 data_ptr;
>> +};
>> +
>> +struct drm_i915_query {
>> +    __u32 num_items;
>> +
>> +    /*
>> +     * Unused for now.
>> +     */
>> +    __u32 flags;
>> +
>> +    /*
>> +     * This point to an array of num_items drm_i915_query_item 
>> structures.
>> +     */
>> +    __u64 items_ptr;
>> +};
>> +
>>   #if defined(__cplusplus)
>>   }
>>   #endif
> 
> 
> 
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH v11 5/6] drm/i915: add query uAPI
  2018-01-24 12:03     ` Tvrtko Ursulin
@ 2018-01-24 15:14       ` Chris Wilson
  2018-01-29 16:24         ` Lionel Landwerlin
  0 siblings, 1 reply; 16+ messages in thread
From: Chris Wilson @ 2018-01-24 15:14 UTC (permalink / raw)
  To: Tvrtko Ursulin, Lionel Landwerlin, intel-gfx, Daniel Vetter,
	Joonas Lahtinen

Quoting Tvrtko Ursulin (2018-01-24 12:03:46)
> 
> On 23/01/2018 14:17, Lionel Landwerlin wrote:
> > Hi all,
> > 
> > I've been trying to expose some information to userspace about the fused 
> > parts of the GPU.
> > This is the 4th attempt at getting this upstream, here are the previous 
> > ones :
> >      https://patchwork.freedesktop.org/patch/185959/
> >      https://patchwork.freedesktop.org/series/33436/
> >      https://patchwork.freedesktop.org/series/33950/
> > 
> > This last iteration was based upon some direction by Daniel : 
> > https://lists.freedesktop.org/archives/dri-devel/2017-December/160162.html
> > There was, I think, a fair point about having this working in 
> > environments where sysfs (mechanism used in the 3rd iteration) is not 
> > available (containers).
> > 
> > Following some discussion on IRC, it seems Joonas would like this 
> > rewritten in a such way that we essentially drop the generic mechanism 
> > introduced in this patch, and instead go for an additional ioctl() on 
> > the drm fd just for querying the state of a fused part of 
> > slice/subslice/eus.
> > The proposal is to have a single struct like :
> > 
> > struct drm_i915_topology {
> >     /* All field are in/out */
> >     int slice;
> >     int subslice;
> >     int eu;
> > 
> >     int enabled;
> > };
> > 
> > You would let the slice field to -1 and then the kernel would fill it 
> > with the max slice value. Same for subslice (with a valid slice value) 
> > and eu.
> > When querying with slice = 0, and all other fields to -1, the kernel 
> > would fill the enabled value with 0 or 1.
> > Essentially that would mean that an application wanting to query the 
> > state of all of the EUs would have to go through them one by one (which 
> > would be about ~100 ioctl() on SKL GT4 for example).
> > 
> > Apart from the fact that we'll probably end up adding another ioctl() 
> > for engine discovery, I don't have any problem with what Joonas is 
> > proposing.
> > It's just a bit annoying this comes up on the 4th rewrite.
> > I really wouldn't like to rewrite this one more time and get turned down 
> > because this isn't to the taste of one of the reviewer.
> > So my question is : Is everybody happy with what Joonas is proposing?
> > Anybody in favor of having a generic mechanism?
> 
> I am not very keen on this counter-proposal for two reasons.
> 
> First is that I think is a bit inelegant to have to query so many times 
> just to get the full topology out. If this ends in some library, we may 
> end up running this on every trivial client startup.
> 
> Secondly, it is kind of dispatcher in it's own right. Since the 
> operation mode will depend on the combination of field values. As such a 
> generic, or at least a more explicit, dispatcher, like the proposed 
> i915_query_ioctl sounds cleaner to me.
> 
> I take the point we can't guess how many other users we will have for it 
> in the future. So there is a little bit of a risk of adding something 
> generic which won't be used a lot in the future.
> 
> Because apart from the three queries Lionel needs, I would be adding an 
> engine info query and potentially, depending on userspace interest, 
> engine queue depths. So that would be a maximum of five queries I am 
> aware of would use the generic framework. Maybe too little, or maybe 
> good enough for a start?

Another use case would be a single shot method to gather all GETPARAMs.

There's a lot of too'ing and fro'ing at the start of mesa trying to
determine all of the kernel's capabilities, which more or less come down
to a long series of parsing GETPARAM results. Bundling all of those up
into a single ioctl seems attractive to me (bonus for it being properly
defined and not a compat nightmare).
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH v11 5/6] drm/i915: add query uAPI
  2018-01-24 15:14       ` Chris Wilson
@ 2018-01-29 16:24         ` Lionel Landwerlin
  2018-01-30 15:01           ` Lahtinen, Joonas
  0 siblings, 1 reply; 16+ messages in thread
From: Lionel Landwerlin @ 2018-01-29 16:24 UTC (permalink / raw)
  To: Chris Wilson, Tvrtko Ursulin, intel-gfx, Daniel Vetter, Joonas Lahtinen

On 24/01/18 15:14, Chris Wilson wrote:
> Quoting Tvrtko Ursulin (2018-01-24 12:03:46)
>> On 23/01/2018 14:17, Lionel Landwerlin wrote:
>>> Hi all,
>>>
>>> I've been trying to expose some information to userspace about the fused
>>> parts of the GPU.
>>> This is the 4th attempt at getting this upstream, here are the previous
>>> ones :
>>>       https://patchwork.freedesktop.org/patch/185959/
>>>       https://patchwork.freedesktop.org/series/33436/
>>>       https://patchwork.freedesktop.org/series/33950/
>>>
>>> This last iteration was based upon some direction by Daniel :
>>> https://lists.freedesktop.org/archives/dri-devel/2017-December/160162.html
>>> There was, I think, a fair point about having this working in
>>> environments where sysfs (mechanism used in the 3rd iteration) is not
>>> available (containers).
>>>
>>> Following some discussion on IRC, it seems Joonas would like this
>>> rewritten in a such way that we essentially drop the generic mechanism
>>> introduced in this patch, and instead go for an additional ioctl() on
>>> the drm fd just for querying the state of a fused part of
>>> slice/subslice/eus.
>>> The proposal is to have a single struct like :
>>>
>>> struct drm_i915_topology {
>>>      /* All field are in/out */
>>>      int slice;
>>>      int subslice;
>>>      int eu;
>>>
>>>      int enabled;
>>> };
>>>
>>> You would let the slice field to -1 and then the kernel would fill it
>>> with the max slice value. Same for subslice (with a valid slice value)
>>> and eu.
>>> When querying with slice = 0, and all other fields to -1, the kernel
>>> would fill the enabled value with 0 or 1.
>>> Essentially that would mean that an application wanting to query the
>>> state of all of the EUs would have to go through them one by one (which
>>> would be about ~100 ioctl() on SKL GT4 for example).
>>>
>>> Apart from the fact that we'll probably end up adding another ioctl()
>>> for engine discovery, I don't have any problem with what Joonas is
>>> proposing.
>>> It's just a bit annoying this comes up on the 4th rewrite.
>>> I really wouldn't like to rewrite this one more time and get turned down
>>> because this isn't to the taste of one of the reviewer.
>>> So my question is : Is everybody happy with what Joonas is proposing?
>>> Anybody in favor of having a generic mechanism?
>> I am not very keen on this counter-proposal for two reasons.
>>
>> First is that I think is a bit inelegant to have to query so many times
>> just to get the full topology out. If this ends in some library, we may
>> end up running this on every trivial client startup.
>>
>> Secondly, it is kind of dispatcher in it's own right. Since the
>> operation mode will depend on the combination of field values. As such a
>> generic, or at least a more explicit, dispatcher, like the proposed
>> i915_query_ioctl sounds cleaner to me.
>>
>> I take the point we can't guess how many other users we will have for it
>> in the future. So there is a little bit of a risk of adding something
>> generic which won't be used a lot in the future.
>>
>> Because apart from the three queries Lionel needs, I would be adding an
>> engine info query and potentially, depending on userspace interest,
>> engine queue depths. So that would be a maximum of five queries I am
>> aware of would use the generic framework. Maybe too little, or maybe
>> good enough for a start?
> Another use case would be a single shot method to gather all GETPARAMs.
>
> There's a lot of too'ing and fro'ing at the start of mesa trying to
> determine all of the kernel's capabilities, which more or less come down
> to a long series of parsing GETPARAM results. Bundling all of those up
> into a single ioctl seems attractive to me (bonus for it being properly
> defined and not a compat nightmare).
> -Chris
>
Thanks all,

I don't really read much opposition to the current patch series. If 
anything we could actually want to do more it seems.
It would be good to have the green light and land that.
I've played quickly with a Chris' idea and will add a couple of RFC 
patches for discussion.

Cheers,

-
Lionel

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH v11 5/6] drm/i915: add query uAPI
  2018-01-29 16:24         ` Lionel Landwerlin
@ 2018-01-30 15:01           ` Lahtinen, Joonas
  2018-01-30 15:50             ` Lionel Landwerlin
  0 siblings, 1 reply; 16+ messages in thread
From: Lahtinen, Joonas @ 2018-01-30 15:01 UTC (permalink / raw)
  To: intel-gfx, Landwerlin, Lionel G, daniel.vetter, chris, tvrtko.ursulin

On Mon, 2018-01-29 at 16:24 +0000, Lionel Landwerlin wrote:
>
> Thanks all,
> 
> I don't really read much opposition to the current patch series. If 
> anything we could actually want to do more it seems.
> It would be good to have the green light and land that.
> I've played quickly with a Chris' idea and will add a couple of RFC 
> patches for discussion.

Where is the latest iteration of the less generic IOCTL?

I'm still not sold on this, the resulting uAPI is quite horrendous with
the macros to interpret the results.

Regards, Joonas
---------------------------------------------------------------------
Intel Finland Oy
Registered Address: PL 281, 00181 Helsinki 
Business Identity Code: 0357606 - 4 
Domiciled in Helsinki 

This e-mail and any attachments may contain confidential material for
the sole use of the intended recipient(s). Any review or distribution
by others is strictly prohibited. If you are not the intended
recipient, please contact the sender and delete all copies.
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH v11 5/6] drm/i915: add query uAPI
  2018-01-30 15:01           ` Lahtinen, Joonas
@ 2018-01-30 15:50             ` Lionel Landwerlin
  0 siblings, 0 replies; 16+ messages in thread
From: Lionel Landwerlin @ 2018-01-30 15:50 UTC (permalink / raw)
  To: Lahtinen, Joonas, intel-gfx, daniel.vetter, chris, tvrtko.ursulin

On 30/01/18 15:01, Lahtinen, Joonas wrote:
> On Mon, 2018-01-29 at 16:24 +0000, Lionel Landwerlin wrote:
>> Thanks all,
>>
>> I don't really read much opposition to the current patch series. If
>> anything we could actually want to do more it seems.
>> It would be good to have the green light and land that.
>> I've played quickly with a Chris' idea and will add a couple of RFC
>> patches for discussion.
> Where is the latest iteration of the less generic IOCTL?
>
> I'm still not sold on this, the resulting uAPI is quite horrendous with
> the macros to interpret the results.
>
> Regards, Joonas
Not sure what is the less generic one. Here are the past attempts :

https://patchwork.freedesktop.org/patch/185959/
https://patchwork.freedesktop.org/series/33436/
https://patchwork.freedesktop.org/series/33950/

One of them was based off Tvrtko's stab at exposing the engines.

-
Lionel
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 16+ messages in thread

end of thread, other threads:[~2018-01-30 15:50 UTC | newest]

Thread overview: 16+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-01-22  8:21 [PATCH v11 0/6] drm/i915: expose RCS topology to userspace Lionel Landwerlin
2018-01-22  8:21 ` [PATCH v11 1/6] drm/i915: store all subslice masks Lionel Landwerlin
2018-01-22  8:21 ` [PATCH v11 2/6] drm/i915/debugfs: reuse max slice/subslices already stored in sseu Lionel Landwerlin
2018-01-22  8:21 ` [PATCH v11 3/6] drm/i915/debugfs: add rcs topology entry Lionel Landwerlin
2018-01-22  8:21 ` [PATCH v11 4/6] drm/i915: add rcs topology to error state Lionel Landwerlin
2018-01-22  8:21 ` [PATCH v11 5/6] drm/i915: add query uAPI Lionel Landwerlin
2018-01-23 14:17   ` Lionel Landwerlin
2018-01-24 12:03     ` Tvrtko Ursulin
2018-01-24 15:14       ` Chris Wilson
2018-01-29 16:24         ` Lionel Landwerlin
2018-01-30 15:01           ` Lahtinen, Joonas
2018-01-30 15:50             ` Lionel Landwerlin
2018-01-22  8:21 ` [PATCH v11 6/6] drm/i915: expose rcs topology through " Lionel Landwerlin
2018-01-22  9:44   ` Tvrtko Ursulin
2018-01-22  8:55 ` ✓ Fi.CI.BAT: success for drm/i915: expose RCS topology to userspace Patchwork
2018-01-22 11:48 ` ✗ Fi.CI.IGT: warning " Patchwork

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.