All of lore.kernel.org
 help / color / mirror / Atom feed
From: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com>
To: Chris Wilson <chris@chris-wilson.co.uk>, intel-gfx@lists.freedesktop.org
Cc: Kenneth Graunke <kenneth@whitecape.org>
Subject: Re: [Intel-gfx] [PATCH 02/10] drm/i915/gt: Yield the timeslice if caught waiting on a user semaphore
Date: Tue, 7 Apr 2020 10:07:11 +0100	[thread overview]
Message-ID: <063fc269-6491-9f3d-2cd3-a6dbe912694e@linux.intel.com> (raw)
In-Reply-To: <20200403091300.14734-2-chris@chris-wilson.co.uk>


On 03/04/2020 10:12, Chris Wilson wrote:
> If we find ourselves waiting on a MI_SEMAPHORE_WAIT, either within the
> user batch or in our own preamble, the engine raises a
> GT_WAIT_ON_SEMAPHORE interrupt. We can unmask that interrupt and so
> respond to a semaphore wait by yielding the timeslice, if we have
> another context to yield to!
> 
> The only real complication is that the interrupt is only generated for
> the start of the semaphore wait, and is asynchronous to our
> process_csb() -- that is, we may not have registered the timeslice before
> we see the interrupt. To ensure we don't miss a potential semaphore
> blocking forward progress (e.g. selftests/live_timeslice_preempt) we mark
> the interrupt and apply it to the next timeslice regardless of whether it
> was active at the time.
> 
> v2: We use semaphores in preempt-to-busy, within the timeslicing
> implementation itself! Ergo, when we do insert a preemption due to an
> expired timeslice, the new context may start with the missed semaphore
> flagged by the retired context and be yielded, ad infinitum. To avoid
> this, read the context id at the time of the semaphore interrupt and
> only yield if that context is still active.
> 
> Fixes: 8ee36e048c98 ("drm/i915/execlists: Minimalistic timeslicing")
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> Cc: Kenneth Graunke <kenneth@whitecape.org>
> ---
>   drivers/gpu/drm/i915/gt/intel_engine_cs.c    |  6 +++
>   drivers/gpu/drm/i915/gt/intel_engine_types.h |  9 +++++
>   drivers/gpu/drm/i915/gt/intel_gt_irq.c       | 13 ++++++-
>   drivers/gpu/drm/i915/gt/intel_lrc.c          | 40 +++++++++++++++++---
>   drivers/gpu/drm/i915/gt/selftest_lrc.c       | 15 +++-----
>   drivers/gpu/drm/i915/i915_reg.h              |  1 +
>   6 files changed, 67 insertions(+), 17 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
> index 843cb6f2f696..04995040407d 100644
> --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
> +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
> @@ -1313,6 +1313,12 @@ static void intel_engine_print_registers(struct intel_engine_cs *engine,
>   
>   	if (engine->id == RENDER_CLASS && IS_GEN_RANGE(dev_priv, 4, 7))
>   		drm_printf(m, "\tCCID: 0x%08x\n", ENGINE_READ(engine, CCID));
> +	if (HAS_EXECLISTS(dev_priv)) {
> +		drm_printf(m, "\tEL_STAT_HI: 0x%08x\n",
> +			   ENGINE_READ(engine, RING_EXECLIST_STATUS_HI));
> +		drm_printf(m, "\tEL_STAT_LO: 0x%08x\n",
> +			   ENGINE_READ(engine, RING_EXECLIST_STATUS_LO));
> +	}
>   	drm_printf(m, "\tRING_START: 0x%08x\n",
>   		   ENGINE_READ(engine, RING_START));
>   	drm_printf(m, "\tRING_HEAD:  0x%08x\n",
> diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h
> index 80cdde712842..ac283ab5d89c 100644
> --- a/drivers/gpu/drm/i915/gt/intel_engine_types.h
> +++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h
> @@ -156,6 +156,15 @@ struct intel_engine_execlists {
>   	 */
>   	struct i915_priolist default_priolist;
>   
> +	/**
> +	 * @yield: CCID at the time of the last semaphore-wait interrupt.
> +	 *
> +	 * Instead of leaving a semaphore busy-spinning on an engine, we would
> +	 * like to switch to another ready context, i.e. yielding the semaphore
> +	 * timeslice.
> +	 */
> +	u32 yield;
> +
>   	/**
>   	 * @error_interrupt: CS Master EIR
>   	 *
> diff --git a/drivers/gpu/drm/i915/gt/intel_gt_irq.c b/drivers/gpu/drm/i915/gt/intel_gt_irq.c
> index f0e7fd95165a..875bd0392ffc 100644
> --- a/drivers/gpu/drm/i915/gt/intel_gt_irq.c
> +++ b/drivers/gpu/drm/i915/gt/intel_gt_irq.c
> @@ -39,6 +39,13 @@ cs_irq_handler(struct intel_engine_cs *engine, u32 iir)
>   		}
>   	}
>   
> +	if (iir & GT_WAIT_SEMAPHORE_INTERRUPT) {
> +		WRITE_ONCE(engine->execlists.yield,
> +			   ENGINE_READ_FW(engine, RING_EXECLIST_STATUS_HI));
> +		if (del_timer(&engine->execlists.timer))
> +			tasklet = true;
> +	}
> +
>   	if (iir & GT_CONTEXT_SWITCH_INTERRUPT)
>   		tasklet = true;
>   
> @@ -228,7 +235,8 @@ void gen11_gt_irq_postinstall(struct intel_gt *gt)
>   	const u32 irqs =
>   		GT_CS_MASTER_ERROR_INTERRUPT |
>   		GT_RENDER_USER_INTERRUPT |
> -		GT_CONTEXT_SWITCH_INTERRUPT;
> +		GT_CONTEXT_SWITCH_INTERRUPT |
> +		GT_WAIT_SEMAPHORE_INTERRUPT;
>   	struct intel_uncore *uncore = gt->uncore;
>   	const u32 dmask = irqs << 16 | irqs;
>   	const u32 smask = irqs << 16;
> @@ -366,7 +374,8 @@ void gen8_gt_irq_postinstall(struct intel_gt *gt)
>   	const u32 irqs =
>   		GT_CS_MASTER_ERROR_INTERRUPT |
>   		GT_RENDER_USER_INTERRUPT |
> -		GT_CONTEXT_SWITCH_INTERRUPT;
> +		GT_CONTEXT_SWITCH_INTERRUPT |
> +		GT_WAIT_SEMAPHORE_INTERRUPT;
>   	const u32 gt_interrupts[] = {
>   		irqs << GEN8_RCS_IRQ_SHIFT | irqs << GEN8_BCS_IRQ_SHIFT,
>   		irqs << GEN8_VCS0_IRQ_SHIFT | irqs << GEN8_VCS1_IRQ_SHIFT,
> diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
> index f028114714cd..55a58709590a 100644
> --- a/drivers/gpu/drm/i915/gt/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
> @@ -1768,7 +1768,8 @@ static void defer_active(struct intel_engine_cs *engine)
>   }
>   
>   static bool
> -need_timeslice(struct intel_engine_cs *engine, const struct i915_request *rq)
> +need_timeslice(const struct intel_engine_cs *engine,
> +	       const struct i915_request *rq)
>   {
>   	int hint;
>   
> @@ -1782,6 +1783,31 @@ need_timeslice(struct intel_engine_cs *engine, const struct i915_request *rq)
>   	return hint >= effective_prio(rq);
>   }
>   
> +static bool
> +timeslice_yield(const struct intel_engine_execlists *el,
> +		const struct i915_request *rq)
> +{
> +	/*
> +	 * Once bitten, forever smitten!
> +	 *
> +	 * If the active context ever busy-waited on a semaphore,
> +	 * it will be treated as a hog until the end of its timeslice.
> +	 * The HW only sends an interrupt on the first miss, and we
> +	 * do know if that semaphore has been signaled, or even if it
> +	 * is now stuck on another semaphore. Play safe, yield if it
> +	 * might be stuck -- it will be given a fresh timeslice in
> +	 * the near future.
> +	 */
> +	return upper_32_bits(rq->context->lrc_desc) == READ_ONCE(el->yield);
> +}
> +
> +static bool
> +timeslice_expired(const struct intel_engine_execlists *el,
> +		  const struct i915_request *rq)
> +{
> +	return timer_expired(&el->timer) || timeslice_yield(el, rq);
> +}
> +
>   static int
>   switch_prio(struct intel_engine_cs *engine, const struct i915_request *rq)
>   {
> @@ -1797,8 +1823,7 @@ timeslice(const struct intel_engine_cs *engine)
>   	return READ_ONCE(engine->props.timeslice_duration_ms);
>   }
>   
> -static unsigned long
> -active_timeslice(const struct intel_engine_cs *engine)
> +static unsigned long active_timeslice(const struct intel_engine_cs *engine)
>   {
>   	const struct intel_engine_execlists *execlists = &engine->execlists;
>   	const struct i915_request *rq = *execlists->active;
> @@ -1989,18 +2014,19 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
>   
>   			last = NULL;
>   		} else if (need_timeslice(engine, last) &&
> -			   timer_expired(&engine->execlists.timer)) {
> +			   timeslice_expired(execlists, last)) {
>   			if (i915_request_completed(last)) {
>   				tasklet_hi_schedule(&execlists->tasklet);
>   				return;
>   			}
>   
>   			ENGINE_TRACE(engine,
> -				     "expired last=%llx:%lld, prio=%d, hint=%d\n",
> +				     "expired last=%llx:%lld, prio=%d, hint=%d, yield?=%s\n",
>   				     last->fence.context,
>   				     last->fence.seqno,
>   				     last->sched.attr.priority,
> -				     execlists->queue_priority_hint);
> +				     execlists->queue_priority_hint,
> +				     yesno(timeslice_yield(execlists, last)));
>   
>   			ring_set_paused(engine, 1);
>   			defer_active(engine);
> @@ -2261,6 +2287,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
>   		}
>   		clear_ports(port + 1, last_port - port);
>   
> +		WRITE_ONCE(execlists->yield, -1);
>   		execlists_submit_ports(engine);
>   		set_preempt_timeout(engine, *active);
>   	} else {
> @@ -4563,6 +4590,7 @@ logical_ring_default_irqs(struct intel_engine_cs *engine)
>   	engine->irq_enable_mask = GT_RENDER_USER_INTERRUPT << shift;
>   	engine->irq_keep_mask = GT_CONTEXT_SWITCH_INTERRUPT << shift;
>   	engine->irq_keep_mask |= GT_CS_MASTER_ERROR_INTERRUPT << shift;
> +	engine->irq_keep_mask |= GT_WAIT_SEMAPHORE_INTERRUPT << shift;
>   }
>   
>   static void rcs_submission_override(struct intel_engine_cs *engine)
> diff --git a/drivers/gpu/drm/i915/gt/selftest_lrc.c b/drivers/gpu/drm/i915/gt/selftest_lrc.c
> index 985d4041d929..8e8b0c0ddc76 100644
> --- a/drivers/gpu/drm/i915/gt/selftest_lrc.c
> +++ b/drivers/gpu/drm/i915/gt/selftest_lrc.c
> @@ -1071,15 +1071,12 @@ static int live_timeslice_rewind(void *arg)
>   		GEM_BUG_ON(!timer_pending(&engine->execlists.timer));
>   
>   		/* ELSP[] = { { A:rq1, A:rq2 }, { B:rq1 } } */
> -		GEM_BUG_ON(!i915_request_is_active(rq[A1]));
> -		GEM_BUG_ON(!i915_request_is_active(rq[A2]));
> -		GEM_BUG_ON(!i915_request_is_active(rq[B1]));
> -
> -		/* Wait for the timeslice to kick in */
> -		del_timer(&engine->execlists.timer);
> -		tasklet_hi_schedule(&engine->execlists.tasklet);
> -		intel_engine_flush_submission(engine);
> -
> +		if (i915_request_is_active(rq[A2])) {
> +			/* Wait for the timeslice to kick in */
> +			del_timer(&engine->execlists.timer);
> +			tasklet_hi_schedule(&engine->execlists.tasklet);
> +			intel_engine_flush_submission(engine);
> +		}
>   		/* -> ELSP[] = { { A:rq1 }, { B:rq1 } } */
>   		GEM_BUG_ON(!i915_request_is_active(rq[A1]));
>   		GEM_BUG_ON(!i915_request_is_active(rq[B1]));
> diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
> index 17484345cb80..f402a9f78969 100644
> --- a/drivers/gpu/drm/i915/i915_reg.h
> +++ b/drivers/gpu/drm/i915/i915_reg.h
> @@ -3094,6 +3094,7 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg)
>   #define GT_BSD_CS_ERROR_INTERRUPT		(1 << 15)
>   #define GT_BSD_USER_INTERRUPT			(1 << 12)
>   #define GT_RENDER_L3_PARITY_ERROR_INTERRUPT_S1	(1 << 11) /* hsw+; rsvd on snb, ivb, vlv */
> +#define GT_WAIT_SEMAPHORE_INTERRUPT		REG_BIT(11) /* bdw+ */
>   #define GT_CONTEXT_SWITCH_INTERRUPT		(1 <<  8)
>   #define GT_RENDER_L3_PARITY_ERROR_INTERRUPT	(1 <<  5) /* !snb */
>   #define GT_RENDER_PIPECTL_NOTIFY_INTERRUPT	(1 <<  4)
> 

Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

  reply	other threads:[~2020-04-07  9:07 UTC|newest]

Thread overview: 31+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-04-03  9:12 [Intel-gfx] [PATCH 01/10] drm/i915/selftests: Add request throughput measurement to perf Chris Wilson
2020-04-03  9:12 ` [Intel-gfx] [PATCH 02/10] drm/i915/gt: Yield the timeslice if caught waiting on a user semaphore Chris Wilson
2020-04-07  9:07   ` Tvrtko Ursulin [this message]
2020-04-03  9:12 ` [Intel-gfx] [PATCH 03/10] dma-buf: Prettify typecasts for dma-fence-chain Chris Wilson
2020-04-03  9:12 ` [Intel-gfx] [PATCH 04/10] dma-buf: Report signaled links inside dma-fence-chain Chris Wilson
2020-04-08 19:46   ` Venkata Sandeep Dhanalakota
2020-04-08 20:00   ` Lionel Landwerlin
2020-04-09 10:52     ` Chris Wilson
2020-04-09 11:16       ` Lionel Landwerlin
2020-04-09 13:46         ` Chris Wilson
2020-04-03  9:12 ` [Intel-gfx] [PATCH 05/10] dma-buf: Exercise dma-fence-chain under selftests Chris Wilson
2020-04-08 19:49   ` Venkata Sandeep Dhanalakota
2020-04-10 16:11   ` Lionel Landwerlin
2020-04-03  9:12 ` [Intel-gfx] [PATCH 06/10] dma-buf: Proxy fence, an unsignaled fence placeholder Chris Wilson
2020-04-05 22:14   ` kbuild test robot
2020-04-05 22:14     ` kbuild test robot
2020-04-06 18:32     ` Nick Desaulniers
2020-04-06 18:32       ` Nick Desaulniers
2020-04-03  9:12 ` [Intel-gfx] [PATCH 07/10] drm/syncobj: Allow use of dma-fence-proxy Chris Wilson
2020-04-03  9:12 ` [Intel-gfx] [PATCH 08/10] drm/i915/gem: Teach execbuf how to wait on future syncobj Chris Wilson
2020-04-03  9:12 ` [Intel-gfx] [PATCH 09/10] drm/i915/gem: Allow combining submit-fences with syncobj Chris Wilson
2020-04-07 10:44   ` Tvrtko Ursulin
2020-04-07 10:51     ` Chris Wilson
2020-04-08  9:28       ` Tvrtko Ursulin
2020-04-03  9:13 ` [Intel-gfx] [PATCH 10/10] drm/i915/gt: Declare when we enabled timeslicing Chris Wilson
2020-04-07 10:50   ` Tvrtko Ursulin
2020-04-07 10:55     ` Chris Wilson
2020-04-03  9:32 ` [Intel-gfx] ✗ Fi.CI.CHECKPATCH: warning for series starting with [01/10] drm/i915/selftests: Add request throughput measurement to perf Patchwork
2020-04-03  9:58 ` [Intel-gfx] ✓ Fi.CI.BAT: success " Patchwork
2020-04-03 17:23 ` [Intel-gfx] ✗ Fi.CI.IGT: failure " Patchwork
  -- strict thread matches above, loose matches on Subject: below --
2020-03-31 21:25 [Intel-gfx] [PATCH 01/10] " Chris Wilson
2020-03-31 21:25 ` [Intel-gfx] [PATCH 02/10] drm/i915/gt: Yield the timeslice if caught waiting on a user semaphore Chris Wilson

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=063fc269-6491-9f3d-2cd3-a6dbe912694e@linux.intel.com \
    --to=tvrtko.ursulin@linux.intel.com \
    --cc=chris@chris-wilson.co.uk \
    --cc=intel-gfx@lists.freedesktop.org \
    --cc=kenneth@whitecape.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.