All of lore.kernel.org
 help / color / mirror / Atom feed
From: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com>
To: Chris Wilson <chris@chris-wilson.co.uk>, intel-gfx@lists.freedesktop.org
Subject: Re: [PATCH 3/5] drm/i915/execlists: Direct submit onto idle engines
Date: Thu, 10 May 2018 17:09:14 +0100	[thread overview]
Message-ID: <52b631ff-5760-dc18-bc9f-e385fe8f10b9@linux.intel.com> (raw)
In-Reply-To: <20180509142801.28130-3-chris@chris-wilson.co.uk>


On 09/05/2018 15:27, Chris Wilson wrote:
> Bypass using the tasklet to submit the first request to HW, as the
> tasklet may be deferred unto ksoftirqd and at a minimum will add in
> excess of 10us (and maybe tens of milliseconds) to our execution
> latency. This latency reduction is most notable when execution flows
> between engines.
> 
> v2: Beware handling preemption completion from the direct submit path as
> well.
> v3: Make the abuse clear and track our extra state inside i915_tasklet.
> 
> Suggested-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> ---
>   drivers/gpu/drm/i915/i915_tasklet.h         | 24 +++++++
>   drivers/gpu/drm/i915/intel_guc_submission.c | 10 ++-
>   drivers/gpu/drm/i915/intel_lrc.c            | 71 +++++++++++++++++----
>   3 files changed, 89 insertions(+), 16 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_tasklet.h b/drivers/gpu/drm/i915/i915_tasklet.h
> index 42b002b88edb..99e2fa2241ba 100644
> --- a/drivers/gpu/drm/i915/i915_tasklet.h
> +++ b/drivers/gpu/drm/i915/i915_tasklet.h
> @@ -8,8 +8,11 @@
>   #define _I915_TASKLET_H_
>   
>   #include <linux/atomic.h>
> +#include <linux/bitops.h>
>   #include <linux/interrupt.h>
>   
> +#include "i915_gem.h"
> +
>   /**
>    * struct i915_tasklet - wrapper around tasklet_struct
>    *
> @@ -19,6 +22,8 @@
>    */
>   struct i915_tasklet {
>   	struct tasklet_struct base;
> +	unsigned long flags;
> +#define I915_TASKLET_DIRECT_SUBMIT BIT(0)

I would suggest a more generic name for the bit since i915_tasklet is 
generic-ish. For instance simply I915_TASKLET_DIRECT would signify the 
callback has been invoked directly and not (necessarily) from softirq 
context. Then it is for each user to know what that means for them 
specifically.

>   };
>   
>   static inline void i915_tasklet_init(struct i915_tasklet *t,
> @@ -43,6 +48,14 @@ static inline bool i915_tasklet_is_enabled(const struct i915_tasklet *t)
>   	return likely(!atomic_read(&t->base.count));
>   }
>   
> +static inline bool i915_tasklet_is_direct_submit(const struct i915_tasklet *t)
> +{
> +	/* Only legal to be checked from inside the tasklet. */
> +	GEM_BUG_ON(!i915_tasklet_is_running(t));
> +
> +	return t->flags & I915_TASKLET_DIRECT_SUBMIT;
> +}

Or maybe i915_tasklet_direct_invocation?

> +
>   static inline void i915_tasklet_schedule(struct i915_tasklet *t)
>   {
>   	tasklet_hi_schedule(&t->base);
> @@ -75,4 +88,15 @@ static inline void i915_tasklet_set_func(struct i915_tasklet *t,
>   	i915_tasklet_unlock(t);
>   }
>   
> +static inline void __i915_tasklet_run(const struct i915_tasklet *t)
> +{
> +	t->base.func(t->base.data);
> +}
> +
> +static inline void i915_tasklet_run(const struct i915_tasklet *t)
> +{
> +	GEM_BUG_ON(!i915_tasklet_is_running(t));
> +	__i915_tasklet_run(t);
> +}
> +
>   #endif /* _I915_TASKLET_H_ */
> diff --git a/drivers/gpu/drm/i915/intel_guc_submission.c b/drivers/gpu/drm/i915/intel_guc_submission.c
> index a7afc976c3b9..f2ded1796523 100644
> --- a/drivers/gpu/drm/i915/intel_guc_submission.c
> +++ b/drivers/gpu/drm/i915/intel_guc_submission.c
> @@ -754,14 +754,18 @@ static bool __guc_dequeue(struct intel_engine_cs *engine)
>   
>   static void guc_dequeue(struct intel_engine_cs *engine)
>   {
> -	unsigned long flags;
> +	unsigned long uninitialized_var(flags);
>   	bool submit;
>   
>   	local_irq_save(flags);
>   
> -	spin_lock(&engine->timeline.lock);
> +	if (!i915_tasklet_is_direct_submit(&engine->execlists.tasklet))
> +		spin_lock(&engine->timeline.lock);
> +
>   	submit = __guc_dequeue(engine);
> -	spin_unlock(&engine->timeline.lock);
> +
> +	if (!i915_tasklet_is_direct_submit(&engine->execlists.tasklet))
> +		spin_unlock(&engine->timeline.lock);
>   
>   	if (submit)
>   		guc_submit(engine);
> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> index 539fa03d7600..09fded9d409f 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/intel_lrc.c
> @@ -356,13 +356,15 @@ execlists_unwind_incomplete_requests(struct intel_engine_execlists *execlists)
>   {
>   	struct intel_engine_cs *engine =
>   		container_of(execlists, typeof(*engine), execlists);
> -	unsigned long flags;
> +	unsigned long uninitialized_var(flags);
>   
> -	spin_lock_irqsave(&engine->timeline.lock, flags);
> +	if (!i915_tasklet_is_direct_submit(&execlists->tasklet))
> +		spin_lock_irqsave(&engine->timeline.lock, flags);
>   
>   	__unwind_incomplete_requests(engine);
>   
> -	spin_unlock_irqrestore(&engine->timeline.lock, flags);
> +	if (!i915_tasklet_is_direct_submit(&execlists->tasklet))
> +		spin_unlock_irqrestore(&engine->timeline.lock, flags);
>   }
>   
>   static inline void
> @@ -601,6 +603,8 @@ static bool __execlists_dequeue(struct intel_engine_cs *engine)
>   		 */
>   		GEM_BUG_ON(!execlists_is_active(execlists,
>   						EXECLISTS_ACTIVE_USER));
> +		GEM_BUG_ON(execlists_is_active(execlists,
> +					       EXECLISTS_ACTIVE_PREEMPT));
>   		GEM_BUG_ON(!port_count(&port[0]));
>   		if (port_count(&port[0]) > 1)
>   			return false;
> @@ -757,12 +761,16 @@ static bool __execlists_dequeue(struct intel_engine_cs *engine)
>   static void execlists_dequeue(struct intel_engine_cs *engine)
>   {
>   	struct intel_engine_execlists * const execlists = &engine->execlists;
> -	unsigned long flags;
> +	unsigned long uninitialized_var(flags);
>   	bool submit;
>   
> -	spin_lock_irqsave(&engine->timeline.lock, flags);
> +	if (!i915_tasklet_is_direct_submit(&execlists->tasklet))
> +		spin_lock_irqsave(&engine->timeline.lock, flags);
> +
>   	submit = __execlists_dequeue(engine);
> -	spin_unlock_irqrestore(&engine->timeline.lock, flags);
> +
> +	if (!i915_tasklet_is_direct_submit(&execlists->tasklet))
> +		spin_unlock_irqrestore(&engine->timeline.lock, flags);
>   
>   	if (submit)
>   		execlists_submit_ports(engine);
> @@ -1162,16 +1170,52 @@ static void queue_request(struct intel_engine_cs *engine,
>   		      &lookup_priolist(engine, prio)->requests);
>   }
>   
> -static void __submit_queue(struct intel_engine_cs *engine, int prio)
> +static void __wakeup_queue(struct intel_engine_cs *engine, int prio)
>   {
>   	engine->execlists.queue_priority = prio;
> +}

Why is this called wakeup? Plans to add something in it later?

> +
> +static void __schedule_queue(struct intel_engine_cs *engine)
> +{
>   	i915_tasklet_schedule(&engine->execlists.tasklet);
>   }
>   
> +static bool __direct_submit(struct intel_engine_execlists *const execlists)
> +{
> +	struct i915_tasklet * const t = &execlists->tasklet;
> +
> +	if (!tasklet_trylock(&t->base))
> +		return false;
> +
> +	t->flags |= I915_TASKLET_DIRECT_SUBMIT;
> +	i915_tasklet_run(t);
> +	t->flags &= ~I915_TASKLET_DIRECT_SUBMIT;
> +
> +	tasklet_unlock(&t->base);

Feels like this whole sequence belongs to i915_tasklet since it touches 
the internals. Maybe i915_tasklet_try_run, or i915_tasklet_run_or_schedule?

> +	return true;
> +}
> +
> +static void __submit_queue(struct intel_engine_cs *engine)
> +{
> +	struct intel_engine_execlists * const execlists = &engine->execlists;
> +
> +	GEM_BUG_ON(!engine->i915->gt.awake);
> +
> +	/* If inside GPU reset, the tasklet will be queued later. */
> +	if (!i915_tasklet_is_enabled(&execlists->tasklet))
> +		return;
> +
> +	/* Directly submit the first request to reduce the initial latency */
> +	if (port_isset(execlists->port) || !__direct_submit(execlists))
> +		__schedule_queue(engine);

Hmm a bit evil to maybe invoke in the condition. Would it be acceptable to:

if (!port_isset(...))
	i915_tasklet_run_or_schedule(...);
else
	i915_tasklet_schedule(...);

It's not ideal but maybe a bit better.

> +}
> +
>   static void submit_queue(struct intel_engine_cs *engine, int prio)
>   {
> -	if (prio > engine->execlists.queue_priority)
> -		__submit_queue(engine, prio);
> +	if (prio > engine->execlists.queue_priority) {
> +		__wakeup_queue(engine, prio);
> +		__submit_queue(engine);
> +	}
>   }
>   
>   static void execlists_submit_request(struct i915_request *request)
> @@ -1183,10 +1227,9 @@ static void execlists_submit_request(struct i915_request *request)
>   	spin_lock_irqsave(&engine->timeline.lock, flags);
>   
>   	queue_request(engine, &request->sched, rq_prio(request));
> -	submit_queue(engine, rq_prio(request));
> -
>   	GEM_BUG_ON(!engine->execlists.first);
>   	GEM_BUG_ON(list_empty(&request->sched.link));
> +	submit_queue(engine, rq_prio(request));
>   
>   	spin_unlock_irqrestore(&engine->timeline.lock, flags);
>   }
> @@ -1314,8 +1357,10 @@ static void execlists_schedule(struct i915_request *request,
>   		}
>   
>   		if (prio > engine->execlists.queue_priority &&
> -		    i915_sw_fence_done(&sched_to_request(node)->submit))
> -			__submit_queue(engine, prio);
> +		    i915_sw_fence_done(&sched_to_request(node)->submit)) {
> +			__wakeup_queue(engine, prio);
> +			__schedule_queue(engine);
> +		}
>   	}
>   
>   	spin_unlock_irq(&engine->timeline.lock);
> 

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

  reply	other threads:[~2018-05-10 16:09 UTC|newest]

Thread overview: 23+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-05-09 14:27 [PATCH 1/5] drm/i915: Remove tasklet flush before disable Chris Wilson
2018-05-09 14:27 ` [PATCH 2/5] drm/i915: Wrap tasklet_struct for abuse Chris Wilson
2018-05-09 14:58   ` Chris Wilson
2018-05-10 15:49   ` Tvrtko Ursulin
2018-05-10 16:03     ` Chris Wilson
2018-05-10 16:15       ` Tvrtko Ursulin
2018-05-10 16:19         ` Chris Wilson
2018-05-10 17:08           ` Tvrtko Ursulin
2018-05-09 14:27 ` [PATCH 3/5] drm/i915/execlists: Direct submit onto idle engines Chris Wilson
2018-05-10 16:09   ` Tvrtko Ursulin [this message]
2018-05-10 16:25     ` Chris Wilson
2018-05-10 17:26       ` Tvrtko Ursulin
2018-05-10 17:40         ` Chris Wilson
2018-05-11  8:25           ` Tvrtko Ursulin
2018-05-11  8:31             ` Chris Wilson
2018-05-11  8:48               ` Tvrtko Ursulin
2018-05-09 14:28 ` [PATCH 4/5] drm/i915/execlists: Direct submission from irq handler Chris Wilson
2018-05-10 12:02   ` Chris Wilson
2018-05-09 14:28 ` [PATCH 5/5] drm/i915: Speed up idle detection by kicking the tasklets Chris Wilson
2018-05-09 14:57 ` ✗ Fi.CI.CHECKPATCH: warning for series starting with [1/5] drm/i915: Remove tasklet flush before disable Patchwork
2018-05-09 14:59 ` ✗ Fi.CI.SPARSE: " Patchwork
2018-05-09 15:14 ` ✓ Fi.CI.BAT: success " Patchwork
2018-05-09 17:50 ` ✓ Fi.CI.IGT: " Patchwork

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=52b631ff-5760-dc18-bc9f-e385fe8f10b9@linux.intel.com \
    --to=tvrtko.ursulin@linux.intel.com \
    --cc=chris@chris-wilson.co.uk \
    --cc=intel-gfx@lists.freedesktop.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.