All of lore.kernel.org
 help / color / mirror / Atom feed
From: Lucas De Marchi <lucas.demarchi@intel.com>
To: Matt Roper <matthew.d.roper@intel.com>
Cc: intel-gfx@lists.freedesktop.org,
	Stuart Summers <stuart.summers@intel.com>,
	Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>,
	Steve Hampson <steven.t.hampson@intel.com>
Subject: Re: [Intel-gfx] [PATCH v5 4/9] drm/i915/xehpsdv: Add compute DSS type
Date: Thu, 5 Aug 2021 10:26:19 -0700	[thread overview]
Message-ID: <20210805172619.a6a3jjgr6srmpdla@ldmartin-desk2> (raw)
In-Reply-To: <20210805163647.801064-5-matthew.d.roper@intel.com>

On Thu, Aug 05, 2021 at 09:36:42AM -0700, Matt Roper wrote:
>From: Stuart Summers <stuart.summers@intel.com>
>
>Starting in XeHP, the concept of slice has been removed in favor of
>DSS (Dual-Subslice) masks for various workload types. These workloads have
>been divided into those enabled for geometry and those enabled for compute.
>
>i915 currently maintains a single set of S/SS/EU masks for the device.
>The goal of this patch set is to minimize the amount of impact to prior
>generations while still giving the user maximum flexibility.
>
>v2:
> - Generalize a comment about uapi access to geometry/compute masks; the
>   proposed uapi has changed since the comment was first written, and
>   will show up in a future series once the userspace code is published.
>   (Lucas)
>
>Bspec: 33117, 33118, 20376
>Cc: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
>Cc: Matt Roper <matthew.d.roper@intel.com>
>Cc: Lucas De Marchi <lucas.demarchi@intel.com>
>Signed-off-by: Stuart Summers <stuart.summers@intel.com>
>Signed-off-by: Steve Hampson <steven.t.hampson@intel.com>
>Signed-off-by: Matt Roper <matthew.d.roper@intel.com>
>---
> drivers/gpu/drm/i915/gt/intel_sseu.c | 66 +++++++++++++++++++++-------
> drivers/gpu/drm/i915/gt/intel_sseu.h |  5 ++-
> drivers/gpu/drm/i915/i915_reg.h      |  3 +-
> include/uapi/drm/i915_drm.h          |  3 --
> 4 files changed, 55 insertions(+), 22 deletions(-)
>
>diff --git a/drivers/gpu/drm/i915/gt/intel_sseu.c b/drivers/gpu/drm/i915/gt/intel_sseu.c
>index bbd272943c3f..9cf157a2454f 100644
>--- a/drivers/gpu/drm/i915/gt/intel_sseu.c
>+++ b/drivers/gpu/drm/i915/gt/intel_sseu.c
>@@ -46,11 +46,11 @@ u32 intel_sseu_get_subslices(const struct sseu_dev_info *sseu, u8 slice)
> }
>
> void intel_sseu_set_subslices(struct sseu_dev_info *sseu, int slice,
>-			      u32 ss_mask)
>+			      u8 *subslice_mask, u32 ss_mask)
> {
> 	int offset = slice * sseu->ss_stride;
>
>-	memcpy(&sseu->subslice_mask[offset], &ss_mask, sseu->ss_stride);
>+	memcpy(&subslice_mask[offset], &ss_mask, sseu->ss_stride);
> }
>
> unsigned int
>@@ -100,14 +100,24 @@ static u16 compute_eu_total(const struct sseu_dev_info *sseu)
> 	return total;
> }
>
>-static void gen11_compute_sseu_info(struct sseu_dev_info *sseu,
>-				    u8 s_en, u32 ss_en, u16 eu_en)
>+static u32 get_ss_stride_mask(struct sseu_dev_info *sseu, u8 s, u32 ss_en)
>+{
>+	u32 ss_mask;
>+
>+	ss_mask = ss_en >> (s * sseu->max_subslices);
>+	ss_mask &= GENMASK(sseu->max_subslices - 1, 0);
>+
>+	return ss_mask;
>+}
>+
>+static void gen11_compute_sseu_info(struct sseu_dev_info *sseu, u8 s_en,
>+				    u32 g_ss_en, u32 c_ss_en, u16 eu_en)
> {
> 	int s, ss;
>
>-	/* ss_en represents entire subslice mask across all slices */
>+	/* g_ss_en/c_ss_en represent entire subslice mask across all slices */
> 	GEM_BUG_ON(sseu->max_slices * sseu->max_subslices >
>-		   sizeof(ss_en) * BITS_PER_BYTE);
>+		   sizeof(g_ss_en) * BITS_PER_BYTE);
>
> 	for (s = 0; s < sseu->max_slices; s++) {
> 		if ((s_en & BIT(s)) == 0)
>@@ -115,7 +125,22 @@ static void gen11_compute_sseu_info(struct sseu_dev_info *sseu,
>
> 		sseu->slice_mask |= BIT(s);
>
>-		intel_sseu_set_subslices(sseu, s, ss_en);
>+		/*
>+		 * XeHP introduces the concept of compute vs geometry DSS. To
>+		 * reduce variation between GENs around subslice usage, store a
>+		 * mask for both the geometry and compute enabled masks since
>+		 * userspace will need to be able to query these masks
>+		 * independently.  Also compute a total enabled subslice count
>+		 * for the purposes of selecting subslices to use in a
>+		 * particular GEM context.
>+		 */
>+		intel_sseu_set_subslices(sseu, s, sseu->compute_subslice_mask,
>+					 get_ss_stride_mask(sseu, s, c_ss_en));
>+		intel_sseu_set_subslices(sseu, s, sseu->geometry_subslice_mask,
>+					 get_ss_stride_mask(sseu, s, g_ss_en));
>+		intel_sseu_set_subslices(sseu, s, sseu->subslice_mask,
>+					 get_ss_stride_mask(sseu, s,
>+							    g_ss_en | c_ss_en));
>
> 		for (ss = 0; ss < sseu->max_subslices; ss++)
> 			if (intel_sseu_has_subslice(sseu, s, ss))
>@@ -129,7 +154,7 @@ static void gen12_sseu_info_init(struct intel_gt *gt)
> {
> 	struct sseu_dev_info *sseu = &gt->info.sseu;
> 	struct intel_uncore *uncore = gt->uncore;
>-	u32 dss_en;
>+	u32 g_dss_en, c_dss_en = 0;
> 	u16 eu_en = 0;
> 	u8 eu_en_fuse;
> 	u8 s_en;
>@@ -145,10 +170,12 @@ static void gen12_sseu_info_init(struct intel_gt *gt)
> 	 * across the entire device. Then calculate out the DSS for each
> 	 * workload type within that software slice.
> 	 */
>-	if (IS_DG2(gt->i915) || IS_XEHPSDV(gt->i915))
>+	if (IS_DG2(gt->i915) || IS_XEHPSDV(gt->i915)) {
> 		intel_sseu_set_info(sseu, 1, 32, 16);
>-	else
>+		sseu->has_compute_dss = 1;
>+	} else {
> 		intel_sseu_set_info(sseu, 1, 6, 16);
>+	}

sseu->has_compute_dss doesn't make much sense. It will only be used
locally.

Even after the UAPI change to add a query, it still doesn't make sense
as the query could just be checking the compute mask != 0 to decide if
it has compute dss or not.

So, I'd ditch this sseu->has_compute_dss and replace it with a local
variable.

>
> 	/*
> 	 * As mentioned above, Xe_HP does not have the concept of a slice.
>@@ -160,7 +187,9 @@ static void gen12_sseu_info_init(struct intel_gt *gt)
> 		s_en = intel_uncore_read(uncore, GEN11_GT_SLICE_ENABLE) &
> 		       GEN11_GT_S_ENA_MASK;
>
>-	dss_en = intel_uncore_read(uncore, GEN12_GT_DSS_ENABLE);
>+	g_dss_en = intel_uncore_read(uncore, GEN12_GT_GEOMETRY_DSS_ENABLE);
>+	if (sseu->has_compute_dss)
>+		c_dss_en = intel_uncore_read(uncore, GEN12_GT_COMPUTE_DSS_ENABLE);
>
> 	/* one bit per pair of EUs */
> 	if (GRAPHICS_VER_FULL(gt->i915) >= IP_VER(12, 50))
>@@ -173,7 +202,7 @@ static void gen12_sseu_info_init(struct intel_gt *gt)
> 		if (eu_en_fuse & BIT(eu))
> 			eu_en |= BIT(eu * 2) | BIT(eu * 2 + 1);
>
>-	gen11_compute_sseu_info(sseu, s_en, dss_en, eu_en);
>+	gen11_compute_sseu_info(sseu, s_en, g_dss_en, c_dss_en, eu_en);
>
> 	/* TGL only supports slice-level power gating */
> 	sseu->has_slice_pg = 1;
>@@ -199,7 +228,7 @@ static void gen11_sseu_info_init(struct intel_gt *gt)
> 	eu_en = ~(intel_uncore_read(uncore, GEN11_EU_DISABLE) &
> 		  GEN11_EU_DIS_MASK);
>
>-	gen11_compute_sseu_info(sseu, s_en, ss_en, eu_en);
>+	gen11_compute_sseu_info(sseu, s_en, ss_en, 0, eu_en);
>
> 	/* ICL has no power gating restrictions. */
> 	sseu->has_slice_pg = 1;
>@@ -240,7 +269,7 @@ static void cherryview_sseu_info_init(struct intel_gt *gt)
> 		sseu_set_eus(sseu, 0, 1, ~disabled_mask);
> 	}
>
>-	intel_sseu_set_subslices(sseu, 0, subslice_mask);
>+	intel_sseu_set_subslices(sseu, 0, sseu->subslice_mask, subslice_mask);
>
> 	sseu->eu_total = compute_eu_total(sseu);
>
>@@ -296,7 +325,8 @@ static void gen9_sseu_info_init(struct intel_gt *gt)
> 			/* skip disabled slice */
> 			continue;
>
>-		intel_sseu_set_subslices(sseu, s, subslice_mask);
>+		intel_sseu_set_subslices(sseu, s, sseu->subslice_mask,
>+					 subslice_mask);
>
> 		eu_disable = intel_uncore_read(uncore, GEN9_EU_DISABLE(s));
> 		for (ss = 0; ss < sseu->max_subslices; ss++) {
>@@ -408,7 +438,8 @@ static void bdw_sseu_info_init(struct intel_gt *gt)
> 			/* skip disabled slice */
> 			continue;
>
>-		intel_sseu_set_subslices(sseu, s, subslice_mask);
>+		intel_sseu_set_subslices(sseu, s, sseu->subslice_mask,
>+					 subslice_mask);
>
> 		for (ss = 0; ss < sseu->max_subslices; ss++) {
> 			u8 eu_disabled_mask;
>@@ -506,7 +537,8 @@ static void hsw_sseu_info_init(struct intel_gt *gt)
> 			    sseu->eu_per_subslice);
>
> 	for (s = 0; s < sseu->max_slices; s++) {
>-		intel_sseu_set_subslices(sseu, s, subslice_mask);
>+		intel_sseu_set_subslices(sseu, s, sseu->subslice_mask,
>+					 subslice_mask);
>
> 		for (ss = 0; ss < sseu->max_subslices; ss++) {
> 			sseu_set_eus(sseu, s, ss,
>diff --git a/drivers/gpu/drm/i915/gt/intel_sseu.h b/drivers/gpu/drm/i915/gt/intel_sseu.h
>index 0270acdcc157..61944829d069 100644
>--- a/drivers/gpu/drm/i915/gt/intel_sseu.h
>+++ b/drivers/gpu/drm/i915/gt/intel_sseu.h
>@@ -32,6 +32,8 @@ struct drm_printer;
> struct sseu_dev_info {
> 	u8 slice_mask;
> 	u8 subslice_mask[GEN_MAX_SLICES * GEN_MAX_SUBSLICE_STRIDE];
>+	u8 geometry_subslice_mask[GEN_MAX_SLICES * GEN_MAX_SUBSLICE_STRIDE];
>+	u8 compute_subslice_mask[GEN_MAX_SLICES * GEN_MAX_SUBSLICE_STRIDE];
> 	u8 eu_mask[GEN_MAX_SLICES * GEN_MAX_SUBSLICES * GEN_MAX_EU_STRIDE];
> 	u16 eu_total;
> 	u8 eu_per_subslice;
>@@ -41,6 +43,7 @@ struct sseu_dev_info {
> 	u8 has_slice_pg:1;
> 	u8 has_subslice_pg:1;
> 	u8 has_eu_pg:1;
>+	u8 has_compute_dss:1;

as mentioned above, this should be gone

>
> 	/* Topology fields */
> 	u8 max_slices;
>@@ -104,7 +107,7 @@ intel_sseu_subslices_per_slice(const struct sseu_dev_info *sseu, u8 slice);
> u32  intel_sseu_get_subslices(const struct sseu_dev_info *sseu, u8 slice);
>
> void intel_sseu_set_subslices(struct sseu_dev_info *sseu, int slice,
>-			      u32 ss_mask);
>+			      u8 *subslice_mask, u32 ss_mask);
>
> void intel_sseu_info_init(struct intel_gt *gt);
>
>diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
>index 8bfd646fc403..f8d3cd11eced 100644
>--- a/drivers/gpu/drm/i915/i915_reg.h
>+++ b/drivers/gpu/drm/i915/i915_reg.h
>@@ -3151,7 +3151,8 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg)
>
> #define GEN11_GT_SUBSLICE_DISABLE _MMIO(0x913C)
>
>-#define GEN12_GT_DSS_ENABLE _MMIO(0x913C)
>+#define GEN12_GT_GEOMETRY_DSS_ENABLE _MMIO(0x913C)
>+#define GEN12_GT_COMPUTE_DSS_ENABLE _MMIO(0x9144)
>
> #define XEHP_EU_ENABLE			_MMIO(0x9134)
> #define XEHP_EU_ENA_MASK		0xFF
>diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
>index bde5860b3686..fb36e897cc86 100644
>--- a/include/uapi/drm/i915_drm.h
>+++ b/include/uapi/drm/i915_drm.h
>@@ -2609,9 +2609,6 @@ struct drm_i915_query {
>  *                 Z / 8] >> (Z % 8)) & 1
>  */
> struct drm_i915_query_topology_info {
>-	/*
>-	 * Unused for now. Must be cleared to zero.
>-	 */

without uapi changes we shouldn't change the uapi headers.

thanks
Lucas De Marchi

> 	__u16 flags;
>
> 	__u16 max_slices;
>-- 
>2.25.4
>

  reply	other threads:[~2021-08-05 17:26 UTC|newest]

Thread overview: 31+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-08-05 16:36 [Intel-gfx] [PATCH v5 0/9] Begin enabling Xe_HP SDV and DG2 platforms Matt Roper
2021-08-05 16:36 ` [Intel-gfx] [PATCH v5 1/9] drm/i915/dg2: Add support for new DG2-G11 revid 0x5 Matt Roper
2021-08-05 16:48   ` Lucas De Marchi
2021-08-05 16:36 ` [Intel-gfx] [PATCH v5 2/9] drm/i915/xehp: Loop over all gslices for INSTDONE processing Matt Roper
2021-08-11  0:18   ` Lucas De Marchi
2021-08-05 16:36 ` [Intel-gfx] [PATCH v5 3/9] drm/i915/dg2: Report INSTDONE_GEOM values in error state Matt Roper
2021-08-05 16:36 ` [Intel-gfx] [PATCH v5 4/9] drm/i915/xehpsdv: Add compute DSS type Matt Roper
2021-08-05 17:26   ` Lucas De Marchi [this message]
2021-08-06 17:29     ` [Intel-gfx] [PATCH v5.1 " Matt Roper
2021-08-11  0:17       ` Lucas De Marchi
2021-08-05 16:36 ` [Intel-gfx] [PATCH v5 5/9] drm/i915/xehpsdv: factor out function to read RP_STATE_CAP Matt Roper
2021-08-12 22:49   ` Souza, Jose
2021-08-12 23:14     ` Lucas De Marchi
2021-08-12 23:18       ` Lucas De Marchi
2021-08-05 16:36 ` [Intel-gfx] [PATCH v5 6/9] drm/i915/xehpsdv: Read correct RP_STATE_CAP register Matt Roper
2021-08-05 16:36 ` [Intel-gfx] [PATCH v5 7/9] drm/i915/dg2: Add new LRI reg offsets Matt Roper
2021-08-25  0:03   ` Yokoyama, Caz
2021-08-05 16:36 ` [Intel-gfx] [PATCH v5 8/9] drm/i915/dg2: Maintain backward-compatible nested batch behavior Matt Roper
2021-08-18 21:56   ` Srivatsa, Anusha
2021-08-23  9:26   ` Tvrtko Ursulin
2021-08-24  4:06     ` Matt Roper
2021-08-05 16:36 ` [Intel-gfx] [PATCH v5 9/9] drm/i915/dg2: Configure PCON in DP pre-enable path Matt Roper
2021-08-10 21:51   ` Souza, Jose
2021-08-05 17:42 ` [Intel-gfx] ✗ Fi.CI.CHECKPATCH: warning for Begin enabling Xe_HP SDV and DG2 platforms (rev9) Patchwork
2021-08-05 17:44 ` [Intel-gfx] ✗ Fi.CI.SPARSE: " Patchwork
2021-08-05 18:11 ` [Intel-gfx] ✓ Fi.CI.BAT: success " Patchwork
2021-08-06  7:48 ` [Intel-gfx] ✓ Fi.CI.IGT: " Patchwork
2021-08-06 18:13 ` [Intel-gfx] ✗ Fi.CI.CHECKPATCH: warning for Begin enabling Xe_HP SDV and DG2 platforms (rev10) Patchwork
2021-08-06 18:15 ` [Intel-gfx] ✗ Fi.CI.SPARSE: " Patchwork
2021-08-06 18:42 ` [Intel-gfx] ✓ Fi.CI.BAT: success " Patchwork
2021-08-06 23:49 ` [Intel-gfx] ✓ Fi.CI.IGT: " Patchwork

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20210805172619.a6a3jjgr6srmpdla@ldmartin-desk2 \
    --to=lucas.demarchi@intel.com \
    --cc=daniele.ceraolospurio@intel.com \
    --cc=intel-gfx@lists.freedesktop.org \
    --cc=matthew.d.roper@intel.com \
    --cc=steven.t.hampson@intel.com \
    --cc=stuart.summers@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.