All of lore.kernel.org
 help / color / mirror / Atom feed
From: John Harrison <john.c.harrison@intel.com>
To: Matthew Brost <matthew.brost@intel.com>,
	<intel-gfx@lists.freedesktop.org>,
	<dri-devel@lists.freedesktop.org>
Cc: daniele.ceraolospurio@intel.com, tvrtko.ursulin@intel.com
Subject: Re: [PATCH 2/4] drm/i915/guc: Cancel requests immediately
Date: Wed, 26 Jan 2022 10:58:46 -0800	[thread overview]
Message-ID: <5c436bf7-0ed6-83b6-6061-fdc73d4cc081@intel.com> (raw)
In-Reply-To: <20220124150157.15758-3-matthew.brost@intel.com>

On 1/24/2022 07:01, Matthew Brost wrote:
> Change the preemption timeout to the smallest possible value (1 us) when
> disabling scheduling to cancel a request and restore it after
> cancellation. This not only cancels the request as fast as possible, it
> fixes a bug where the preemption timeout is 0 which results in the
> schedule disable hanging forever.
Shouldn't there be an 'if' in the above statement? The pre-emption 
timeout is not normally zero.

>
> Reported-by: Jani Saarinen <jani.saarinen@intel.com>
> Fixes: 62eaf0ae217d4 ("drm/i915/guc: Support request cancellation")
> Link: https://gitlab.freedesktop.org/drm/intel/-/issues/4960
> Signed-off-by: Matthew Brost <matthew.brost@intel.com>
> ---
>   drivers/gpu/drm/i915/gt/intel_context_types.h |  5 ++
>   .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 46 +++++++++++--------
>   2 files changed, 31 insertions(+), 20 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h b/drivers/gpu/drm/i915/gt/intel_context_types.h
> index 30cd81ad8911a..730998823dbea 100644
> --- a/drivers/gpu/drm/i915/gt/intel_context_types.h
> +++ b/drivers/gpu/drm/i915/gt/intel_context_types.h
> @@ -198,6 +198,11 @@ struct intel_context {
>   		 * each priority bucket
>   		 */
>   		u32 prio_count[GUC_CLIENT_PRIORITY_NUM];
> +		/**
> +		 * @preemption_timeout: preemption timeout of the context, used
> +		 * to restore this value after request cancellation
> +		 */
> +		u32 preemption_timeout;
>   	} guc_state;
>   
>   	struct {
> diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
> index 3918f1be114fa..966947c450253 100644
> --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
> +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
> @@ -2147,7 +2147,8 @@ static inline u32 get_children_join_value(struct intel_context *ce,
>   	return __get_parent_scratch(ce)->join[child_index].semaphore;
>   }
>   
> -static void guc_context_policy_init(struct intel_engine_cs *engine,
> +static void guc_context_policy_init(struct intel_context *ce,
> +				    struct intel_engine_cs *engine,
>   				    struct guc_lrc_desc *desc)
Shouldn't engine be before ce? The more general structure usually goes 
first.

John.

>   {
>   	desc->policy_flags = 0;
> @@ -2157,7 +2158,8 @@ static void guc_context_policy_init(struct intel_engine_cs *engine,
>   
>   	/* NB: For both of these, zero means disabled. */
>   	desc->execution_quantum = engine->props.timeslice_duration_ms * 1000;
> -	desc->preemption_timeout = engine->props.preempt_timeout_ms * 1000;
> +	ce->guc_state.preemption_timeout = engine->props.preempt_timeout_ms * 1000;
> +	desc->preemption_timeout = ce->guc_state.preemption_timeout;
>   }
>   
>   static int guc_lrc_desc_pin(struct intel_context *ce, bool loop)
> @@ -2193,7 +2195,7 @@ static int guc_lrc_desc_pin(struct intel_context *ce, bool loop)
>   	desc->hw_context_desc = ce->lrc.lrca;
>   	desc->priority = ce->guc_state.prio;
>   	desc->context_flags = CONTEXT_REGISTRATION_FLAG_KMD;
> -	guc_context_policy_init(engine, desc);
> +	guc_context_policy_init(ce, engine, desc);
>   
>   	/*
>   	 * If context is a parent, we need to register a process descriptor
> @@ -2226,7 +2228,7 @@ static int guc_lrc_desc_pin(struct intel_context *ce, bool loop)
>   			desc->hw_context_desc = child->lrc.lrca;
>   			desc->priority = ce->guc_state.prio;
>   			desc->context_flags = CONTEXT_REGISTRATION_FLAG_KMD;
> -			guc_context_policy_init(engine, desc);
> +			guc_context_policy_init(child, engine, desc);
>   		}
>   
>   		clear_children_join_go_memory(ce);
> @@ -2409,6 +2411,19 @@ static u16 prep_context_pending_disable(struct intel_context *ce)
>   	return ce->guc_id.id;
>   }
>   
> +static void __guc_context_set_preemption_timeout(struct intel_guc *guc,
> +						 u16 guc_id,
> +						 u32 preemption_timeout)
> +{
> +	u32 action[] = {
> +		INTEL_GUC_ACTION_SET_CONTEXT_PREEMPTION_TIMEOUT,
> +		guc_id,
> +		preemption_timeout
> +	};
> +
> +	intel_guc_send_busy_loop(guc, action, ARRAY_SIZE(action), 0, true);
> +}
> +
>   static struct i915_sw_fence *guc_context_block(struct intel_context *ce)
>   {
>   	struct intel_guc *guc = ce_to_guc(ce);
> @@ -2442,8 +2457,10 @@ static struct i915_sw_fence *guc_context_block(struct intel_context *ce)
>   
>   	spin_unlock_irqrestore(&ce->guc_state.lock, flags);
>   
> -	with_intel_runtime_pm(runtime_pm, wakeref)
> +	with_intel_runtime_pm(runtime_pm, wakeref) {
> +		__guc_context_set_preemption_timeout(guc, guc_id, 1);
>   		__guc_context_sched_disable(guc, ce, guc_id);
> +	}
>   
>   	return &ce->guc_state.blocked;
>   }
> @@ -2492,8 +2509,10 @@ static void guc_context_unblock(struct intel_context *ce)
>   
>   	spin_unlock_irqrestore(&ce->guc_state.lock, flags);
>   
> -	if (enable) {
> -		with_intel_runtime_pm(runtime_pm, wakeref)
> +	with_intel_runtime_pm(runtime_pm, wakeref) {
> +		__guc_context_set_preemption_timeout(guc, ce->guc_id.id,
> +						     ce->guc_state.preemption_timeout);
> +		if (enable)
>   			__guc_context_sched_enable(guc, ce);
>   	}
>   }
> @@ -2521,19 +2540,6 @@ static void guc_context_cancel_request(struct intel_context *ce,
>   	}
>   }
>   
> -static void __guc_context_set_preemption_timeout(struct intel_guc *guc,
> -						 u16 guc_id,
> -						 u32 preemption_timeout)
> -{
> -	u32 action[] = {
> -		INTEL_GUC_ACTION_SET_CONTEXT_PREEMPTION_TIMEOUT,
> -		guc_id,
> -		preemption_timeout
> -	};
> -
> -	intel_guc_send_busy_loop(guc, action, ARRAY_SIZE(action), 0, true);
> -}
> -
>   static void guc_context_ban(struct intel_context *ce, struct i915_request *rq)
>   {
>   	struct intel_guc *guc = ce_to_guc(ce);


WARNING: multiple messages have this Message-ID (diff)
From: John Harrison <john.c.harrison@intel.com>
To: Matthew Brost <matthew.brost@intel.com>,
	<intel-gfx@lists.freedesktop.org>,
	<dri-devel@lists.freedesktop.org>
Subject: Re: [Intel-gfx] [PATCH 2/4] drm/i915/guc: Cancel requests immediately
Date: Wed, 26 Jan 2022 10:58:46 -0800	[thread overview]
Message-ID: <5c436bf7-0ed6-83b6-6061-fdc73d4cc081@intel.com> (raw)
In-Reply-To: <20220124150157.15758-3-matthew.brost@intel.com>

On 1/24/2022 07:01, Matthew Brost wrote:
> Change the preemption timeout to the smallest possible value (1 us) when
> disabling scheduling to cancel a request and restore it after
> cancellation. This not only cancels the request as fast as possible, it
> fixes a bug where the preemption timeout is 0 which results in the
> schedule disable hanging forever.
Shouldn't there be an 'if' in the above statement? The pre-emption 
timeout is not normally zero.

>
> Reported-by: Jani Saarinen <jani.saarinen@intel.com>
> Fixes: 62eaf0ae217d4 ("drm/i915/guc: Support request cancellation")
> Link: https://gitlab.freedesktop.org/drm/intel/-/issues/4960
> Signed-off-by: Matthew Brost <matthew.brost@intel.com>
> ---
>   drivers/gpu/drm/i915/gt/intel_context_types.h |  5 ++
>   .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 46 +++++++++++--------
>   2 files changed, 31 insertions(+), 20 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h b/drivers/gpu/drm/i915/gt/intel_context_types.h
> index 30cd81ad8911a..730998823dbea 100644
> --- a/drivers/gpu/drm/i915/gt/intel_context_types.h
> +++ b/drivers/gpu/drm/i915/gt/intel_context_types.h
> @@ -198,6 +198,11 @@ struct intel_context {
>   		 * each priority bucket
>   		 */
>   		u32 prio_count[GUC_CLIENT_PRIORITY_NUM];
> +		/**
> +		 * @preemption_timeout: preemption timeout of the context, used
> +		 * to restore this value after request cancellation
> +		 */
> +		u32 preemption_timeout;
>   	} guc_state;
>   
>   	struct {
> diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
> index 3918f1be114fa..966947c450253 100644
> --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
> +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
> @@ -2147,7 +2147,8 @@ static inline u32 get_children_join_value(struct intel_context *ce,
>   	return __get_parent_scratch(ce)->join[child_index].semaphore;
>   }
>   
> -static void guc_context_policy_init(struct intel_engine_cs *engine,
> +static void guc_context_policy_init(struct intel_context *ce,
> +				    struct intel_engine_cs *engine,
>   				    struct guc_lrc_desc *desc)
Shouldn't engine be before ce? The more general structure usually goes 
first.

John.

>   {
>   	desc->policy_flags = 0;
> @@ -2157,7 +2158,8 @@ static void guc_context_policy_init(struct intel_engine_cs *engine,
>   
>   	/* NB: For both of these, zero means disabled. */
>   	desc->execution_quantum = engine->props.timeslice_duration_ms * 1000;
> -	desc->preemption_timeout = engine->props.preempt_timeout_ms * 1000;
> +	ce->guc_state.preemption_timeout = engine->props.preempt_timeout_ms * 1000;
> +	desc->preemption_timeout = ce->guc_state.preemption_timeout;
>   }
>   
>   static int guc_lrc_desc_pin(struct intel_context *ce, bool loop)
> @@ -2193,7 +2195,7 @@ static int guc_lrc_desc_pin(struct intel_context *ce, bool loop)
>   	desc->hw_context_desc = ce->lrc.lrca;
>   	desc->priority = ce->guc_state.prio;
>   	desc->context_flags = CONTEXT_REGISTRATION_FLAG_KMD;
> -	guc_context_policy_init(engine, desc);
> +	guc_context_policy_init(ce, engine, desc);
>   
>   	/*
>   	 * If context is a parent, we need to register a process descriptor
> @@ -2226,7 +2228,7 @@ static int guc_lrc_desc_pin(struct intel_context *ce, bool loop)
>   			desc->hw_context_desc = child->lrc.lrca;
>   			desc->priority = ce->guc_state.prio;
>   			desc->context_flags = CONTEXT_REGISTRATION_FLAG_KMD;
> -			guc_context_policy_init(engine, desc);
> +			guc_context_policy_init(child, engine, desc);
>   		}
>   
>   		clear_children_join_go_memory(ce);
> @@ -2409,6 +2411,19 @@ static u16 prep_context_pending_disable(struct intel_context *ce)
>   	return ce->guc_id.id;
>   }
>   
> +static void __guc_context_set_preemption_timeout(struct intel_guc *guc,
> +						 u16 guc_id,
> +						 u32 preemption_timeout)
> +{
> +	u32 action[] = {
> +		INTEL_GUC_ACTION_SET_CONTEXT_PREEMPTION_TIMEOUT,
> +		guc_id,
> +		preemption_timeout
> +	};
> +
> +	intel_guc_send_busy_loop(guc, action, ARRAY_SIZE(action), 0, true);
> +}
> +
>   static struct i915_sw_fence *guc_context_block(struct intel_context *ce)
>   {
>   	struct intel_guc *guc = ce_to_guc(ce);
> @@ -2442,8 +2457,10 @@ static struct i915_sw_fence *guc_context_block(struct intel_context *ce)
>   
>   	spin_unlock_irqrestore(&ce->guc_state.lock, flags);
>   
> -	with_intel_runtime_pm(runtime_pm, wakeref)
> +	with_intel_runtime_pm(runtime_pm, wakeref) {
> +		__guc_context_set_preemption_timeout(guc, guc_id, 1);
>   		__guc_context_sched_disable(guc, ce, guc_id);
> +	}
>   
>   	return &ce->guc_state.blocked;
>   }
> @@ -2492,8 +2509,10 @@ static void guc_context_unblock(struct intel_context *ce)
>   
>   	spin_unlock_irqrestore(&ce->guc_state.lock, flags);
>   
> -	if (enable) {
> -		with_intel_runtime_pm(runtime_pm, wakeref)
> +	with_intel_runtime_pm(runtime_pm, wakeref) {
> +		__guc_context_set_preemption_timeout(guc, ce->guc_id.id,
> +						     ce->guc_state.preemption_timeout);
> +		if (enable)
>   			__guc_context_sched_enable(guc, ce);
>   	}
>   }
> @@ -2521,19 +2540,6 @@ static void guc_context_cancel_request(struct intel_context *ce,
>   	}
>   }
>   
> -static void __guc_context_set_preemption_timeout(struct intel_guc *guc,
> -						 u16 guc_id,
> -						 u32 preemption_timeout)
> -{
> -	u32 action[] = {
> -		INTEL_GUC_ACTION_SET_CONTEXT_PREEMPTION_TIMEOUT,
> -		guc_id,
> -		preemption_timeout
> -	};
> -
> -	intel_guc_send_busy_loop(guc, action, ARRAY_SIZE(action), 0, true);
> -}
> -
>   static void guc_context_ban(struct intel_context *ce, struct i915_request *rq)
>   {
>   	struct intel_guc *guc = ce_to_guc(ce);


  reply	other threads:[~2022-01-26 18:59 UTC|newest]

Thread overview: 31+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-01-24 15:01 [PATCH 0/4] Fix up request cancel Matthew Brost
2022-01-24 15:01 ` [Intel-gfx] " Matthew Brost
2022-01-24 15:01 ` [PATCH 1/4] drm/i915: Add request cancel low level trace point Matthew Brost
2022-01-24 15:01   ` [Intel-gfx] " Matthew Brost
2022-01-25 12:27   ` Tvrtko Ursulin
2022-01-25 16:39     ` Matthew Brost
2022-01-26 10:29       ` Tvrtko Ursulin
2022-01-24 15:01 ` [PATCH 2/4] drm/i915/guc: Cancel requests immediately Matthew Brost
2022-01-24 15:01   ` [Intel-gfx] " Matthew Brost
2022-01-26 18:58   ` John Harrison [this message]
2022-01-26 18:58     ` John Harrison
2022-01-26 20:12     ` Matthew Brost
2022-01-26 20:12       ` [Intel-gfx] " Matthew Brost
2022-01-24 15:01 ` [PATCH 3/4] drm/i915/execlists: Fix execlists request cancellation corner case Matthew Brost
2022-01-24 15:01   ` [Intel-gfx] " Matthew Brost
2022-01-25 15:27   ` Tvrtko Ursulin
2022-01-25 16:32     ` Matthew Brost
2022-01-26 10:38       ` Tvrtko Ursulin
2022-01-26 19:03   ` John Harrison
2022-01-26 19:03     ` [Intel-gfx] " John Harrison
2022-01-26 20:10     ` Matthew Brost
2022-01-26 20:10       ` [Intel-gfx] " Matthew Brost
2022-01-24 15:01 ` [PATCH 4/4] drm/i915/selftests: Set preemption timeout to zero in cancel reset test Matthew Brost
2022-01-24 15:01   ` [Intel-gfx] " Matthew Brost
2022-01-24 22:52 ` [Intel-gfx] ✗ Fi.CI.CHECKPATCH: warning for Fix up request cancel (rev2) Patchwork
2022-01-24 22:54 ` [Intel-gfx] ✗ Fi.CI.SPARSE: " Patchwork
2022-01-24 23:27 ` [Intel-gfx] ✓ Fi.CI.BAT: success " Patchwork
2022-01-25  4:54 ` [Intel-gfx] ✗ Fi.CI.IGT: failure " Patchwork
2022-01-25  6:45 ` [Intel-gfx] ✗ Fi.CI.CHECKPATCH: warning for Fix up request cancel (rev3) Patchwork
2022-01-25  6:46 ` [Intel-gfx] ✗ Fi.CI.SPARSE: " Patchwork
2022-01-25  7:17 ` [Intel-gfx] ✗ Fi.CI.BAT: failure " Patchwork

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=5c436bf7-0ed6-83b6-6061-fdc73d4cc081@intel.com \
    --to=john.c.harrison@intel.com \
    --cc=daniele.ceraolospurio@intel.com \
    --cc=dri-devel@lists.freedesktop.org \
    --cc=intel-gfx@lists.freedesktop.org \
    --cc=matthew.brost@intel.com \
    --cc=tvrtko.ursulin@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.