Re: [PATCH] drm/i915: Flip guc_id allocation partition

From: Michal Wajdeczko <michal.wajdeczko@intel.com>
To: Matthew Brost <matthew.brost@intel.com>,
	intel-gfx@lists.freedesktop.org,
	 dri-devel@lists.freedesktop.org
Subject: Re: [PATCH] drm/i915: Flip guc_id allocation partition
Date: Wed, 2 Feb 2022 23:15:00 +0100	[thread overview]
Message-ID: <42aa9c4d-8a09-d02a-c205-491438a52305@intel.com> (raw)
In-Reply-To: <20220113162747.4017-1-matthew.brost@intel.com>

On 13.01.2022 17:27, Matthew Brost wrote:
> Move the multi-lrc guc_id from the lower allocation partition (0 to
> number of multi-lrc guc_ids) to upper allocation partition (number of
> single-lrc to max guc_ids).
> 
> This will help when a native driver transitions to a PF after driver
> load time. If the perma-pin guc_ids (kernel contexts) are in a low range
> it is easy reduce total number of guc_ids as the allocated slrc are in a
> valid range the mlrc range moves to an unused range. Assuming no mlrc
> are allocated and few slrc are used the native to PF transition is
> seamless for the guc_id resource.
> 
> v2:
>  (Michal / Tvrtko)
>   - Add an explaination to commit message of why this patch is needed
>  (Michal / Piotr)
>   - Replace marcos with functions
>  (Michal)
>   - Rework logic flow in new_mlrc_guc_id
>   - Unconditionally call bitmap_free
> v3:
>  (Michal)
>   - Move allocation of mlrc bitmap back submission init
>  (CI)
>   - Resend for CI
> 
> Signed-off-by: Matthew Brost <matthew.brost@intel.com>
> ---
>  .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 77 ++++++++++++++-----
>  1 file changed, 56 insertions(+), 21 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
> index 23a40f10d376d..fce58365b3ff8 100644
> --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
> +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
> @@ -138,17 +138,6 @@ guc_create_parallel(struct intel_engine_cs **engines,
>  
>  #define GUC_REQUEST_SIZE 64 /* bytes */
>  
> -/*
> - * We reserve 1/16 of the guc_ids for multi-lrc as these need to be contiguous
> - * per the GuC submission interface. A different allocation algorithm is used
> - * (bitmap vs. ida) between multi-lrc and single-lrc hence the reason to
> - * partition the guc_id space. We believe the number of multi-lrc contexts in
> - * use should be low and 1/16 should be sufficient. Minimum of 32 guc_ids for
> - * multi-lrc.
> - */
> -#define NUMBER_MULTI_LRC_GUC_ID(guc)	\
> -	((guc)->submission_state.num_guc_ids / 16)
> -
>  /*
>   * Below is a set of functions which control the GuC scheduling state which
>   * require a lock.
> @@ -1746,6 +1735,7 @@ void intel_guc_submission_reset_finish(struct intel_guc *guc)
>  }
>  
>  static void destroyed_worker_func(struct work_struct *w);
> +static int number_mlrc_guc_id(struct intel_guc *guc);
>  
>  /*
>   * Set up the memory resources to be shared with the GuC (via the GGTT)
> @@ -1778,7 +1768,7 @@ int intel_guc_submission_init(struct intel_guc *guc)
>  		  destroyed_worker_func);
>  
>  	guc->submission_state.guc_ids_bitmap =
> -		bitmap_zalloc(NUMBER_MULTI_LRC_GUC_ID(guc), GFP_KERNEL);
> +		bitmap_zalloc(number_mlrc_guc_id(guc), GFP_KERNEL);

to fully benefit from the id partition flip we likely will have to
allocate bitmap 'just-in-time' when first mlrc id is needed

so something like you had in early rev but abandon to avoid alloc inside
spinlock - but I'm wondering why we can't alloc bitmap for mlrc case,
while we allow allocation for slrc (as ida_simple_get may alloc, no?

>  	if (!guc->submission_state.guc_ids_bitmap)
>  		return -ENOMEM;
>  
> @@ -1864,6 +1854,57 @@ static void guc_submit_request(struct i915_request *rq)
>  	spin_unlock_irqrestore(&sched_engine->lock, flags);
>  }
>  
> +/*
> + * We reserve 1/16 of the guc_ids for multi-lrc as these need to be contiguous
> + * per the GuC submission interface. A different allocation algorithm is used
> + * (bitmap vs. ida) between multi-lrc and single-lrc hence the reason to
> + * partition the guc_id space. We believe the number of multi-lrc contexts in
> + * use should be low and 1/16 should be sufficient.

do we have any other numbers as guideline ?

while it is easy assumption that 1/16 from 64K contexts may be
sufficient, what about 1/16 of 1K contexts ? will that work too ?

also, do we have to make hard split ? what if there will be no users for
mlrc but more slrc contexts would be beneficial ? or the opposite ?

> + */
> +#define MLRC_GUC_ID_RATIO	16
> +
> +static int number_mlrc_guc_id(struct intel_guc *guc)
> +{
> +	return guc->submission_state.num_guc_ids / MLRC_GUC_ID_RATIO;
> +}
> +
> +static int number_slrc_guc_id(struct intel_guc *guc)
> +{
> +	return guc->submission_state.num_guc_ids - number_mlrc_guc_id(guc);
> +}
> +
> +static int mlrc_guc_id_base(struct intel_guc *guc)
> +{
> +	return number_slrc_guc_id(guc);
> +}
> +
> +static int new_mlrc_guc_id(struct intel_guc *guc, struct intel_context *ce)
> +{
> +	int ret;
> +
> +	GEM_BUG_ON(!intel_context_is_parent(ce));
> +	GEM_BUG_ON(!guc->submission_state.guc_ids_bitmap);
> +
> +	ret =  bitmap_find_free_region(guc->submission_state.guc_ids_bitmap,
> +				       number_mlrc_guc_id(guc),
> +				       order_base_2(ce->parallel.number_children
> +						    + 1));
> +	if (unlikely(ret < 0))
> +		return ret;
> +
> +	return ret + mlrc_guc_id_base(guc);
> +}
> +
> +static int new_slrc_guc_id(struct intel_guc *guc, struct intel_context *ce)
> +{
> +	GEM_BUG_ON(intel_context_is_parent(ce));
> +
> +	return ida_simple_get(&guc->submission_state.guc_ids,
> +			      0, number_slrc_guc_id(guc),
> +			      GFP_KERNEL | __GFP_RETRY_MAYFAIL |
> +			      __GFP_NOWARN);

are you sure that these gfp flags are safe for use under spinlock ?

-Michal

> +}
> +
>  static int new_guc_id(struct intel_guc *guc, struct intel_context *ce)
>  {
>  	int ret;
> @@ -1871,16 +1912,10 @@ static int new_guc_id(struct intel_guc *guc, struct intel_context *ce)
>  	GEM_BUG_ON(intel_context_is_child(ce));
>  
>  	if (intel_context_is_parent(ce))
> -		ret = bitmap_find_free_region(guc->submission_state.guc_ids_bitmap,
> -					      NUMBER_MULTI_LRC_GUC_ID(guc),
> -					      order_base_2(ce->parallel.number_children
> -							   + 1));
> +		ret = new_mlrc_guc_id(guc, ce);
>  	else
> -		ret = ida_simple_get(&guc->submission_state.guc_ids,
> -				     NUMBER_MULTI_LRC_GUC_ID(guc),
> -				     guc->submission_state.num_guc_ids,
> -				     GFP_KERNEL | __GFP_RETRY_MAYFAIL |
> -				     __GFP_NOWARN);
> +		ret = new_slrc_guc_id(guc, ce);
> +
>  	if (unlikely(ret < 0))
>  		return ret;
>