All of lore.kernel.org
 help / color / mirror / Atom feed
From: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
To: Stuart Summers <stuart.summers@intel.com>,
	intel-gfx@lists.freedesktop.org
Subject: Re: [PATCH 5/5] drm/i915: Expand subslice mask
Date: Tue, 7 May 2019 12:00:55 -0700	[thread overview]
Message-ID: <ea334454-c3af-0deb-b5a2-a744549a7249@intel.com> (raw)
In-Reply-To: <20190503213020.25628-6-stuart.summers@intel.com>



On 5/3/19 2:30 PM, Stuart Summers wrote:
> Currently, the subslice_mask runtime parameter is stored as an
> array of subslices per slice. Expand the subslice mask array to
> better match what is presented to userspace through the
> I915_QUERY_TOPOLOGY_INFO ioctl. The index into this array is
> then calculated:
>    slice * subslice stride + subslice index / 8
> 
> v2: fix spacing in set_sseu_info args
>      use set_sseu_info to initialize sseu data when building
>      device status in debugfs
>      rename variables in intel_engine_types.h to avoid checkpatch
>      warnings
> v3: update headers in intel_sseu.h
> v4: add const to some sseu_dev_info variables
>      use sseu->eu_stride for EU stride calculations
> v5: address review comments from Tvrtko and Daniele
> 
> Cc: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
> Cc: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
> Acked-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
> Signed-off-by: Stuart Summers <stuart.summers@intel.com>
> ---
>   drivers/gpu/drm/i915/gt/intel_engine_cs.c    |  24 ++-
>   drivers/gpu/drm/i915/gt/intel_engine_types.h |  30 ++--
>   drivers/gpu/drm/i915/gt/intel_hangcheck.c    |   3 +-
>   drivers/gpu/drm/i915/gt/intel_sseu.c         |  43 ++++-
>   drivers/gpu/drm/i915/gt/intel_sseu.h         |  28 +++-
>   drivers/gpu/drm/i915/gt/intel_workarounds.c  |   2 +-
>   drivers/gpu/drm/i915/i915_debugfs.c          |  40 ++---
>   drivers/gpu/drm/i915/i915_drv.c              |   6 +-
>   drivers/gpu/drm/i915/i915_gpu_error.c        |   5 +-
>   drivers/gpu/drm/i915/i915_query.c            |  10 +-
>   drivers/gpu/drm/i915/intel_device_info.c     | 155 ++++++++++---------
>   11 files changed, 227 insertions(+), 119 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
> index 5907a9613641..290bda5cc82b 100644
> --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
> +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
> @@ -909,12 +909,30 @@ const char *i915_cache_level_str(struct drm_i915_private *i915, int type)
>   	}
>   }
>   
> +static inline u32
> +intel_sseu_fls_subslice(const struct sseu_dev_info *sseu, u32 slice)
> +{
> +	u32 subslice;
> +	int i;
> +
> +	for (i = sseu->ss_stride - 1; i >= 0; i--) {
> +		subslice = fls(sseu->subslice_mask[slice * sseu->ss_stride +
> +						   i]);
> +		if (subslice) {
> +			subslice += i * BITS_PER_BYTE;
> +			break;
> +		}
> +	}
> +
> +	return subslice;
> +}
> +
>   u32 intel_calculate_mcr_s_ss_select(struct drm_i915_private *dev_priv)
>   {
>   	const struct sseu_dev_info *sseu = &RUNTIME_INFO(dev_priv)->sseu;
>   	u32 mcr_s_ss_select;
>   	u32 slice = fls(sseu->slice_mask);
> -	u32 subslice = fls(sseu->subslice_mask[slice]);
> +	u32 subslice = intel_sseu_fls_subslice(sseu, slice);
>   
>   	if (IS_GEN(dev_priv, 10))
>   		mcr_s_ss_select = GEN8_MCR_SLICE(slice) |
> @@ -990,6 +1008,7 @@ void intel_engine_get_instdone(struct intel_engine_cs *engine,
>   			       struct intel_instdone *instdone)
>   {
>   	struct drm_i915_private *dev_priv = engine->i915;
> +	const struct sseu_dev_info *sseu = &RUNTIME_INFO(dev_priv)->sseu;
>   	struct intel_uncore *uncore = engine->uncore;
>   	u32 mmio_base = engine->mmio_base;
>   	int slice;
> @@ -1007,7 +1026,8 @@ void intel_engine_get_instdone(struct intel_engine_cs *engine,
>   
>   		instdone->slice_common =
>   			intel_uncore_read(uncore, GEN7_SC_INSTDONE);
> -		for_each_instdone_slice_subslice(dev_priv, slice, subslice) {
> +		for_each_instdone_slice_subslice(dev_priv, sseu, slice,
> +						 subslice) {
>   			instdone->sampler[slice][subslice] =
>   				read_subslice_reg(dev_priv, slice, subslice,
>   						  GEN7_SAMPLER_INSTDONE);
> diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h
> index c0ab11b12e14..582340b55144 100644
> --- a/drivers/gpu/drm/i915/gt/intel_engine_types.h
> +++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h
> @@ -535,20 +535,20 @@ intel_engine_needs_breadcrumb_tasklet(const struct intel_engine_cs *engine)
>   	return engine->flags & I915_ENGINE_NEEDS_BREADCRUMB_TASKLET;
>   }
>   
> -#define instdone_slice_mask(dev_priv__) \
> -	(IS_GEN(dev_priv__, 7) ? \
> -	 1 : RUNTIME_INFO(dev_priv__)->sseu.slice_mask)
> -
> -#define instdone_subslice_mask(dev_priv__) \
> -	(IS_GEN(dev_priv__, 7) ? \
> -	 1 : RUNTIME_INFO(dev_priv__)->sseu.subslice_mask[0])
> -
> -#define for_each_instdone_slice_subslice(dev_priv__, slice__, subslice__) \
> -	for ((slice__) = 0, (subslice__) = 0; \
> -	     (slice__) < I915_MAX_SLICES; \
> -	     (subslice__) = ((subslice__) + 1) < I915_MAX_SUBSLICES ? (subslice__) + 1 : 0, \
> -	       (slice__) += ((subslice__) == 0)) \
> -		for_each_if((BIT(slice__) & instdone_slice_mask(dev_priv__)) && \
> -			    (BIT(subslice__) & instdone_subslice_mask(dev_priv__)))
> +#define instdone_has_slice(dev_priv___, sseu___, slice___) \
> +	((IS_GEN(dev_priv___, 7) ? 1 : ((sseu___)->slice_mask)) & \
> +	BIT(slice___))
> +
> +#define instdone_has_subslice(dev_priv__, sseu__, slice__, subslice__) \
> +	(IS_GEN(dev_priv__, 7) ? 1 : \

This will return true for all ss on gen7, while the original code 
returned 1 as the subslice mask (i.e. only has ss == 0).

> +	 intel_sseu_has_subslice(sseu__, slice__, subslice__))
> +
> +#define for_each_instdone_slice_subslice(dev_priv_, sseu_, slice_, subslice_) \
> +	for ((slice_) = 0, (subslice_) = 0; (slice_) < I915_MAX_SLICES; \
> +	     (subslice_) = ((subslice_) + 1) % I915_MAX_SUBSLICES, \
> +	     (slice_) += ((subslice_) == 0)) \
> +		for_each_if((instdone_has_slice(dev_priv_, sseu_, slice_)) && \
> +			    (instdone_has_subslice(dev_priv_, sseu_, slice_, \
> +						    subslice_)))
>   
>   #endif /* __INTEL_ENGINE_TYPES_H__ */
> diff --git a/drivers/gpu/drm/i915/gt/intel_hangcheck.c b/drivers/gpu/drm/i915/gt/intel_hangcheck.c
> index 721ab74a382f..10e032c9ab10 100644
> --- a/drivers/gpu/drm/i915/gt/intel_hangcheck.c
> +++ b/drivers/gpu/drm/i915/gt/intel_hangcheck.c
> @@ -51,6 +51,7 @@ static bool instdone_unchanged(u32 current_instdone, u32 *old_instdone)
>   static bool subunits_stuck(struct intel_engine_cs *engine)
>   {
>   	struct drm_i915_private *dev_priv = engine->i915;
> +	const struct sseu_dev_info *sseu = &RUNTIME_INFO(dev_priv)->sseu;
>   	struct intel_instdone instdone;
>   	struct intel_instdone *accu_instdone = &engine->hangcheck.instdone;
>   	bool stuck;
> @@ -72,7 +73,7 @@ static bool subunits_stuck(struct intel_engine_cs *engine)
>   	stuck &= instdone_unchanged(instdone.slice_common,
>   				    &accu_instdone->slice_common);
>   
> -	for_each_instdone_slice_subslice(dev_priv, slice, subslice) {
> +	for_each_instdone_slice_subslice(dev_priv, sseu, slice, subslice) {
>   		stuck &= instdone_unchanged(instdone.sampler[slice][subslice],
>   					    &accu_instdone->sampler[slice][subslice]);
>   		stuck &= instdone_unchanged(instdone.row[slice][subslice],
> diff --git a/drivers/gpu/drm/i915/gt/intel_sseu.c b/drivers/gpu/drm/i915/gt/intel_sseu.c
> index a0756f006f5f..a8b98b0266b7 100644
> --- a/drivers/gpu/drm/i915/gt/intel_sseu.c
> +++ b/drivers/gpu/drm/i915/gt/intel_sseu.c
> @@ -8,6 +8,17 @@
>   #include "intel_lrc_reg.h"
>   #include "intel_sseu.h"
>   
> +void intel_sseu_set_info(struct sseu_dev_info *sseu, u8 max_slices,
> +			 u8 max_subslices, u8 max_eus_per_subslice)
> +{
> +	sseu->max_slices = max_slices;
> +	sseu->max_subslices = max_subslices;
> +	sseu->max_eus_per_subslice = max_eus_per_subslice;
> +
> +	sseu->ss_stride = GEN_SSEU_STRIDE(sseu->max_subslices);
> +	sseu->eu_stride = GEN_SSEU_STRIDE(sseu->max_eus_per_subslice);
> +}
> +
>   unsigned int
>   intel_sseu_subslice_total(const struct sseu_dev_info *sseu)
>   {
> @@ -19,10 +30,40 @@ intel_sseu_subslice_total(const struct sseu_dev_info *sseu)
>   	return total;
>   }
>   
> +void intel_sseu_copy_subslices(const struct sseu_dev_info *sseu, int slice,
> +			       u8 *to_mask)
> +{
> +	int offset = slice * sseu->ss_stride;
> +
> +	memcpy(&to_mask[offset], &sseu->subslice_mask[offset], sseu->ss_stride);
> +}
> +
> +u32  intel_sseu_get_subslices(const struct sseu_dev_info *sseu, u8 slice)

nitpick: extra space

> +{
> +	int i, offset = slice * sseu->ss_stride;
> +	u32 mask;
> +

GEM_BUG_ON(sseu->ss_stride > size(u32)) to catch the unlikely case we go 
over and need to update the mask size?

> +	for (i = 0; i < sseu->ss_stride; i++)
> +		mask |= (u32)sseu->subslice_mask[offset + i] <<
> +			i * BITS_PER_BYTE;
> +
> +	return mask;
> +}
> +
> +void intel_sseu_set_subslices(struct sseu_dev_info *sseu, int slice,
> +			      u32 ss_mask)
> +{
> +	int i, offset = slice * sseu->ss_stride;
> +
> +	for (i = 0; i < sseu->ss_stride; i++)
> +		sseu->subslice_mask[offset + i] =
> +			(ss_mask >> (BITS_PER_BYTE * i)) & 0xff;
> +}
> +
>   unsigned int
>   intel_sseu_subslices_per_slice(const struct sseu_dev_info *sseu, u8 slice)
>   {
> -	return hweight8(sseu->subslice_mask[slice]);
> +	return hweight32(intel_sseu_get_subslices(sseu, slice));
>   }
>   
>   u32 intel_sseu_make_rpcs(struct drm_i915_private *i915,

<snip>

> --- a/drivers/gpu/drm/i915/intel_device_info.c
> +++ b/drivers/gpu/drm/i915/intel_device_info.c
> @@ -84,17 +84,46 @@ void intel_device_info_dump_flags(const struct intel_device_info *info,
>   #undef PRINT_FLAG
>   }
>   
> +#define SS_STR_MAX_SIZE (GEN_MAX_SUBSLICE_STRIDE * 2 + 1)
> +
> +static char *
> +subslice_per_slice_str(char *buf, u8 size, const struct sseu_dev_info *sseu,
> +		       u8 slice)
> +{
> +	int i;
> +	u8 ss_offset = slice * sseu->ss_stride;
> +
> +	GEM_BUG_ON(slice >= sseu->max_slices);
> +
> +	/* Two ASCII character hex plus null terminator */
> +	GEM_BUG_ON(size < sseu->ss_stride * 2 + 1);
> +
> +	memset(buf, 0, size);
> +
> +	/*
> +	 * Print subslice information in reverse order to match
> +	 * userspace expectations.
> +	 */
> +	for (i = 0; i < sseu->ss_stride; i++)
> +		sprintf(&buf[i * 2], "%02x",
> +			sseu->subslice_mask[ss_offset + sseu->ss_stride -
> +					    (i + 1)]);
> +
> +	return buf;
> +}
> +
>   static void sseu_dump(const struct sseu_dev_info *sseu, struct drm_printer *p)
>   {
>   	int s;
> +	char buf[SS_STR_MAX_SIZE];
>   
>   	drm_printf(p, "slice total: %u, mask=%04x\n",
>   		   hweight8(sseu->slice_mask), sseu->slice_mask);
>   	drm_printf(p, "subslice total: %u\n", intel_sseu_subslice_total(sseu));
>   	for (s = 0; s < sseu->max_slices; s++) {
> -		drm_printf(p, "slice%d: %u subslices, mask=%04x\n",
> +		drm_printf(p, "slice%d: %u subslices, mask=%s\n",
>   			   s, intel_sseu_subslices_per_slice(sseu, s),
> -			   sseu->subslice_mask[s]);
> +			   subslice_per_slice_str(buf, ARRAY_SIZE(buf), sseu, s));

Now that we have intel_sseu_get_subslices() can't we just print the 
return from that instead of using the buffer?


>   	}
>   	drm_printf(p, "EU total: %u\n", sseu->eu_total);
>   	drm_printf(p, "EU per subslice: %u\n", sseu->eu_per_subslice);

<snip>

> @@ -555,6 +570,7 @@ static void haswell_sseu_info_init(struct drm_i915_private *dev_priv)
>   	struct sseu_dev_info *sseu = &RUNTIME_INFO(dev_priv)->sseu;
>   	u32 fuse1;
>   	int s, ss;
> +	u32 subslice_mask;
>   
>   	/*
>   	 * There isn't a register to tell us how many slices/subslices. We
> @@ -566,22 +582,18 @@ static void haswell_sseu_info_init(struct drm_i915_private *dev_priv)
>   		/* fall through */
>   	case 1:
>   		sseu->slice_mask = BIT(0);
> -		sseu->subslice_mask[0] = BIT(0);
> +		subslice_mask = BIT(0);
>   		break;
>   	case 2:
>   		sseu->slice_mask = BIT(0);
> -		sseu->subslice_mask[0] = BIT(0) | BIT(1);
> +		subslice_mask = BIT(0) | BIT(1);
>   		break;
>   	case 3:
>   		sseu->slice_mask = BIT(0) | BIT(1);
> -		sseu->subslice_mask[0] = BIT(0) | BIT(1);
> -		sseu->subslice_mask[1] = BIT(0) | BIT(1);
> +		subslice_mask = BIT(0) | BIT(1);
>   		break;
>   	}
>   
> -	sseu->max_slices = hweight8(sseu->slice_mask);
> -	sseu->max_subslices = hweight8(sseu->subslice_mask[0]);
> -
>   	fuse1 = I915_READ(HSW_PAVP_FUSE1);
>   	switch ((fuse1 & HSW_F1_EU_DIS_MASK) >> HSW_F1_EU_DIS_SHIFT) {
>   	default:
> @@ -598,9 +610,14 @@ static void haswell_sseu_info_init(struct drm_i915_private *dev_priv)
>   		sseu->eu_per_subslice = 6;
>   		break;
>   	}
> -	sseu->max_eus_per_subslice = sseu->eu_per_subslice;
> +
> +	intel_sseu_set_info(sseu, hweight8(sseu->slice_mask),
> +			    hweight8(subslice_mask),
> +			    sseu->eu_per_subslice);

I'd still prefer this to use a local variable so that we always only set 
sseu->eu_per_subslice from within intel_sseu_set_info.

Daniele

>   
>   	for (s = 0; s < sseu->max_slices; s++) {
> +		intel_sseu_set_subslices(sseu, s, subslice_mask);
> +
>   		for (ss = 0; ss < sseu->max_subslices; ss++) {
>   			intel_sseu_set_eus(sseu, s, ss,
>   					   (1UL << sseu->eu_per_subslice) - 1);
> 
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

  reply	other threads:[~2019-05-07 19:00 UTC|newest]

Thread overview: 25+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-05-03 21:30 [PATCH 0/5] Refactor to expand subslice mask Stuart Summers
2019-05-03 21:30 ` [PATCH 1/5] drm/i915: Use local variable for SSEU info in GETPARAM ioctl Stuart Summers
2019-05-03 21:30 ` [PATCH 2/5] drm/i915: Add macro for SSEU stride calculation Stuart Summers
2019-05-03 21:30 ` [PATCH 3/5] drm/i915: Move calculation of subslices per slice to new function Stuart Summers
2019-05-03 21:30 ` [PATCH 4/5] drm/i915: Refactor sseu helper functions Stuart Summers
2019-05-07 18:12   ` Daniele Ceraolo Spurio
2019-05-07 18:21     ` Summers, Stuart
2019-05-03 21:30 ` [PATCH 5/5] drm/i915: Expand subslice mask Stuart Summers
2019-05-07 19:00   ` Daniele Ceraolo Spurio [this message]
2019-05-07 20:48     ` Summers, Stuart
2019-05-07 21:16       ` Daniele Ceraolo Spurio
2019-05-07 21:19         ` Summers, Stuart
2019-05-03 21:56 ` ✗ Fi.CI.CHECKPATCH: warning for Refactor to expand subslice mask (rev8) Patchwork
2019-05-03 21:59 ` ✗ Fi.CI.SPARSE: " Patchwork
2019-05-03 22:44 ` ✓ Fi.CI.BAT: success " Patchwork
2019-05-04  4:28 ` ✓ Fi.CI.IGT: " Patchwork
  -- strict thread matches above, loose matches on Subject: below --
2019-05-13 20:56 [PATCH 0/5] Refactor to expand subslice mask Stuart Summers
2019-05-13 20:56 ` [PATCH 5/5] drm/i915: Expand " Stuart Summers
2019-05-16 22:40   ` Daniele Ceraolo Spurio
2019-05-21 15:52     ` Summers, Stuart
2019-04-29 15:51 [PATCH 0/5] Refactor to expand " Stuart Summers
2019-04-29 15:51 ` [PATCH 5/5] drm/i915: Expand " Stuart Summers
2019-04-30  9:03   ` Jani Nikula
2019-04-30 14:16     ` Summers, Stuart
2019-04-26 20:24 [PATCH 0/5] Refactor to expand " Stuart Summers
2019-04-26 20:24 ` [PATCH 5/5] drm/i915: Expand " Stuart Summers
2019-04-25 22:24 [PATCH 0/5] Refactor to expand " Stuart Summers
2019-04-25 22:24 ` [PATCH 5/5] drm/i915: Expand " Stuart Summers
2019-04-19  0:00 [PATCH 0/4] Refactor to expand " Stuart Summers
2019-04-25 16:28 ` [PATCH 0/5] " Stuart Summers
2019-04-25 16:29   ` [PATCH 5/5] drm/i915: Expand " Stuart Summers

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=ea334454-c3af-0deb-b5a2-a744549a7249@intel.com \
    --to=daniele.ceraolospurio@intel.com \
    --cc=intel-gfx@lists.freedesktop.org \
    --cc=stuart.summers@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.