All of lore.kernel.org
 help / color / mirror / Atom feed
From: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com>
To: Chris Wilson <chris@chris-wilson.co.uk>, intel-gfx@lists.freedesktop.org
Subject: Re: [PATCH v2 5/7] drm/i915/execlists: Direct submit onto idle engines
Date: Tue, 8 May 2018 11:23:09 +0100	[thread overview]
Message-ID: <19dcc3f6-333a-db5a-9b9f-3dd1a892b0a6@linux.intel.com> (raw)
In-Reply-To: <20180507135731.10587-5-chris@chris-wilson.co.uk>


On 07/05/2018 14:57, Chris Wilson wrote:
> Bypass using the tasklet to submit the first request to HW, as the
> tasklet may be deferred unto ksoftirqd and at a minimum will add in
> excess of 10us (and maybe tens of milliseconds) to our execution
> latency. This latency reduction is most notable when execution flows
> between engines.
> 
> v2: Beware handling preemption completion from the direct submit path as
> well.
> 
> Suggested-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> ---
>   drivers/gpu/drm/i915/intel_guc_submission.c | 12 +++-
>   drivers/gpu/drm/i915/intel_lrc.c            | 66 +++++++++++++++++----
>   drivers/gpu/drm/i915/intel_ringbuffer.h     |  7 +++
>   3 files changed, 69 insertions(+), 16 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/intel_guc_submission.c b/drivers/gpu/drm/i915/intel_guc_submission.c
> index 2feb65096966..6bfe30af7826 100644
> --- a/drivers/gpu/drm/i915/intel_guc_submission.c
> +++ b/drivers/gpu/drm/i915/intel_guc_submission.c
> @@ -754,14 +754,20 @@ static bool __guc_dequeue(struct intel_engine_cs *engine)
>   
>   static void guc_dequeue(struct intel_engine_cs *engine)
>   {
> -	unsigned long flags;
> +	unsigned long uninitialized_var(flags);
>   	bool submit;
>   
>   	local_irq_save(flags);
>   
> -	spin_lock(&engine->timeline.lock);
> +	GEM_BUG_ON(!test_bit(TASKLET_STATE_RUN,
> +			     &engine->execlists.tasklet.state));

Soon it will be time for i915_tasklet. :)

> +	if (!intel_engine_direct_submit(engine))
> +		spin_lock(&engine->timeline.lock);

A bit ugly both on the conditional locking and using engine->flags for 
transient purposes.

Since you are locking the tasklet and own it (and open coding the call) 
completely when calling directly, you could just the same cheat and call 
a different function?

> +
>   	submit = __guc_dequeue(engine);
> -	spin_unlock(&engine->timeline.lock);
> +
> +	if (!intel_engine_direct_submit(engine))
> +		spin_unlock(&engine->timeline.lock);
>   
>   	if (submit)
>   		guc_submit(engine);
> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> index 15c373ea5b7e..ac7c5edee4ee 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/intel_lrc.c
> @@ -357,13 +357,16 @@ execlists_unwind_incomplete_requests(struct intel_engine_execlists *execlists)
>   {
>   	struct intel_engine_cs *engine =
>   		container_of(execlists, typeof(*engine), execlists);
> -	unsigned long flags;
> +	unsigned long uninitialized_var(flags);
>   
> -	spin_lock_irqsave(&engine->timeline.lock, flags);
> +	GEM_BUG_ON(!test_bit(TASKLET_STATE_RUN, &execlists->tasklet.state));
> +	if (!intel_engine_direct_submit(engine))
> +		spin_lock_irqsave(&engine->timeline.lock, flags);
>   
>   	__unwind_incomplete_requests(engine);
>   
> -	spin_unlock_irqrestore(&engine->timeline.lock, flags);
> +	if (!intel_engine_direct_submit(engine))
> +		spin_unlock_irqrestore(&engine->timeline.lock, flags);

Hm ok yes, this one would be a problem..

Maybe at least use some bit under execlists state instead of engine flags?

Regards,

Tvrtko

>   }
>   
>   static inline void
> @@ -602,6 +605,8 @@ static bool __execlists_dequeue(struct intel_engine_cs *engine)
>   		 */
>   		GEM_BUG_ON(!execlists_is_active(execlists,
>   						EXECLISTS_ACTIVE_USER));
> +		GEM_BUG_ON(execlists_is_active(execlists,
> +					       EXECLISTS_ACTIVE_PREEMPT));
>   		GEM_BUG_ON(!port_count(&port[0]));
>   		if (port_count(&port[0]) > 1)
>   			return false;
> @@ -758,12 +763,17 @@ static bool __execlists_dequeue(struct intel_engine_cs *engine)
>   static void execlists_dequeue(struct intel_engine_cs *engine)
>   {
>   	struct intel_engine_execlists * const execlists = &engine->execlists;
> -	unsigned long flags;
> +	unsigned long uninitialized_var(flags);
>   	bool submit;
>   
> -	spin_lock_irqsave(&engine->timeline.lock, flags);
> +	GEM_BUG_ON(!test_bit(TASKLET_STATE_RUN, &execlists->tasklet.state));
> +	if (!intel_engine_direct_submit(engine))
> +		spin_lock_irqsave(&engine->timeline.lock, flags);
> +
>   	submit = __execlists_dequeue(engine);
> -	spin_unlock_irqrestore(&engine->timeline.lock, flags);
> +
> +	if (!intel_engine_direct_submit(engine))
> +		spin_unlock_irqrestore(&engine->timeline.lock, flags);
>   
>   	if (submit)
>   		execlists_submit_ports(engine);
> @@ -1163,16 +1173,45 @@ static void queue_request(struct intel_engine_cs *engine,
>   		      &lookup_priolist(engine, node, prio)->requests);
>   }
>   
> -static void __submit_queue(struct intel_engine_cs *engine, int prio)
> +static void __wakeup_queue(struct intel_engine_cs *engine, int prio)
>   {
>   	engine->execlists.queue_priority = prio;
> +}
> +
> +static void __schedule_queue(struct intel_engine_cs *engine)
> +{
>   	tasklet_hi_schedule(&engine->execlists.tasklet);
>   }
>   
> +static void __submit_queue(struct intel_engine_cs *engine)
> +{
> +	struct intel_engine_execlists * const execlists = &engine->execlists;
> +	struct tasklet_struct * const t = &execlists->tasklet;
> +
> +	GEM_BUG_ON(!engine->i915->gt.awake);
> +
> +	/* If inside GPU reset, the tasklet will be queued later. */
> +	if (unlikely(atomic_read(&t->count)))
> +		return;
> +
> +	/* Directly submit the first request to reduce the initial latency */
> +	if (!port_isset(execlists->port) && tasklet_trylock(t)) {
> +		engine->flags |= I915_ENGINE_DIRECT_SUBMIT;
> +		t->func(t->data);
> +		engine->flags &= ~I915_ENGINE_DIRECT_SUBMIT;
> +		tasklet_unlock(t);
> +		return;
> +	}
> +
> +	__schedule_queue(engine);
> +}
> +
>   static void submit_queue(struct intel_engine_cs *engine, int prio)
>   {
> -	if (prio > engine->execlists.queue_priority)
> -		__submit_queue(engine, prio);
> +	if (prio > engine->execlists.queue_priority) {
> +		__wakeup_queue(engine, prio);
> +		__submit_queue(engine);
> +	}
>   }
>   
>   static void execlists_submit_request(struct i915_request *request)
> @@ -1184,10 +1223,9 @@ static void execlists_submit_request(struct i915_request *request)
>   	spin_lock_irqsave(&engine->timeline.lock, flags);
>   
>   	queue_request(engine, &request->sched, rq_prio(request));
> -	submit_queue(engine, rq_prio(request));
> -
>   	GEM_BUG_ON(!engine->execlists.first);
>   	GEM_BUG_ON(list_empty(&request->sched.link));
> +	submit_queue(engine, rq_prio(request));
>   
>   	spin_unlock_irqrestore(&engine->timeline.lock, flags);
>   }
> @@ -1309,8 +1347,10 @@ static void execlists_schedule(struct i915_request *request,
>   		}
>   
>   		if (prio > engine->execlists.queue_priority &&
> -		    i915_sw_fence_done(&sched_to_request(node)->submit))
> -			__submit_queue(engine, prio);
> +		    i915_sw_fence_done(&sched_to_request(node)->submit)) {
> +			__wakeup_queue(engine, prio);
> +			__schedule_queue(engine);
> +		}
>   	}
>   
>   	spin_unlock_irq(&engine->timeline.lock);
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
> index 010750e8ee44..f5545391d76a 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.h
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
> @@ -569,6 +569,7 @@ struct intel_engine_cs {
>   #define I915_ENGINE_NEEDS_CMD_PARSER BIT(0)
>   #define I915_ENGINE_SUPPORTS_STATS   BIT(1)
>   #define I915_ENGINE_HAS_PREEMPTION   BIT(2)
> +#define I915_ENGINE_DIRECT_SUBMIT    BIT(3)
>   	unsigned int flags;
>   
>   	/*
> @@ -646,6 +647,12 @@ intel_engine_has_preemption(const struct intel_engine_cs *engine)
>   	return engine->flags & I915_ENGINE_HAS_PREEMPTION;
>   }
>   
> +static inline bool
> +intel_engine_direct_submit(const struct intel_engine_cs *engine)
> +{
> +	return engine->flags & I915_ENGINE_DIRECT_SUBMIT;
> +}
> +
>   static inline bool __execlists_need_preempt(int prio, int last)
>   {
>   	return prio > max(0, last);
> 
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

  reply	other threads:[~2018-05-08 10:23 UTC|newest]

Thread overview: 38+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-05-07 13:57 [PATCH v2 1/7] drm/i915: Flush submission tasklet after bumping priority Chris Wilson
2018-05-07 13:57 ` [PATCH v2 2/7] drm/i915: Disable tasklet scheduling across initial scheduling Chris Wilson
2018-05-08 10:02   ` Tvrtko Ursulin
2018-05-08 10:31     ` Chris Wilson
2018-05-07 13:57 ` [PATCH v2 3/7] drm/i915/execlists: Make submission tasklet hardirq safe Chris Wilson
2018-05-08 10:10   ` Tvrtko Ursulin
2018-05-08 10:24     ` Chris Wilson
2018-05-08 10:56       ` Tvrtko Ursulin
2018-05-08 11:05         ` Chris Wilson
2018-05-08 11:38           ` Tvrtko Ursulin
2018-05-08 11:43             ` Chris Wilson
2018-05-08 17:38   ` Tvrtko Ursulin
2018-05-08 17:45   ` Tvrtko Ursulin
2018-05-08 20:59     ` Chris Wilson
2018-05-09  9:23       ` Chris Wilson
2018-05-07 13:57 ` [PATCH v2 4/7] drm/i915/guc: " Chris Wilson
2018-05-08 17:43   ` Tvrtko Ursulin
2018-05-07 13:57 ` [PATCH v2 5/7] drm/i915/execlists: Direct submit onto idle engines Chris Wilson
2018-05-08 10:23   ` Tvrtko Ursulin [this message]
2018-05-08 10:40     ` Chris Wilson
2018-05-08 11:00       ` Tvrtko Ursulin
2018-05-07 13:57 ` [PATCH v2 6/7] drm/i915/execlists: Direct submission from irq handler Chris Wilson
2018-05-08 10:54   ` Tvrtko Ursulin
2018-05-08 11:10     ` Chris Wilson
2018-05-08 11:53       ` Tvrtko Ursulin
2018-05-08 12:17   ` [PATCH] " Chris Wilson
2018-05-07 13:57 ` [PATCH v2 7/7] drm/i915: Speed up idle detection by kicking the tasklets Chris Wilson
2018-05-07 15:31 ` ✗ Fi.CI.CHECKPATCH: warning for series starting with [v2,1/7] drm/i915: Flush submission tasklet after bumping priority Patchwork
2018-05-07 15:32 ` ✗ Fi.CI.SPARSE: " Patchwork
2018-05-07 15:46 ` ✓ Fi.CI.BAT: success " Patchwork
2018-05-07 17:56 ` ✓ Fi.CI.IGT: " Patchwork
2018-05-08  9:40 ` [PATCH v2 1/7] " Tvrtko Ursulin
2018-05-08  9:45   ` Chris Wilson
2018-05-08  9:57     ` Tvrtko Ursulin
2018-05-08 14:11 ` ✗ Fi.CI.CHECKPATCH: warning for series starting with [v2,1/7] drm/i915: Flush submission tasklet after bumping priority (rev2) Patchwork
2018-05-08 14:13 ` ✗ Fi.CI.SPARSE: " Patchwork
2018-05-08 14:28 ` ✓ Fi.CI.BAT: success " Patchwork
2018-05-08 16:27 ` ✓ Fi.CI.IGT: " Patchwork

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=19dcc3f6-333a-db5a-9b9f-3dd1a892b0a6@linux.intel.com \
    --to=tvrtko.ursulin@linux.intel.com \
    --cc=chris@chris-wilson.co.uk \
    --cc=intel-gfx@lists.freedesktop.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.