intel-gfx.lists.freedesktop.org archive mirror
 help / color / mirror / Atom feed
From: Mika Kuoppala <mika.kuoppala@linux.intel.com>
To: Chris Wilson <chris@chris-wilson.co.uk>, intel-gfx@lists.freedesktop.org
Cc: Matthew Auld <matthew.auld@intel.com>
Subject: Re: [Intel-gfx] [PATCH 1/7] drm/i915/gt: Avoid resetting ring->head outside of its timeline mutex
Date: Tue, 11 Feb 2020 13:58:53 +0200	[thread overview]
Message-ID: <87imkd9vya.fsf@gaia.fi.intel.com> (raw)
In-Reply-To: <20200210205722.794180-1-chris@chris-wilson.co.uk>

Chris Wilson <chris@chris-wilson.co.uk> writes:

> We manipulate ring->head while active in i915_request_retire underneath
> the timeline manipulation. We cannot rely on a stable ring->head outside
> of the timeline->mutex, in particular while setting up the context for
> resume and reset.

This solves the immediate problem of ring->head sampling in execlist
submission.

Future work considerations are to make WRITE_ONCE to ring head
even tho it is under timeline and then READ_ONCE on the other,
non lockable places. Or atleast the READ_ONCE notation
outside of lock.

Reviewed-by: Mika Kuoppala <mika.kuoppala@linux.intel.com>


>
> Closes: https://gitlab.freedesktop.org/drm/intel/issues/1126
> Fixes: 0881954965e3 ("drm/i915: Introduce intel_context.pin_mutex for pin management")
> Fixes: e5dadff4b093 ("drm/i915: Protect request retirement with timeline->mutex")
> References: f3c0efc9fe7a ("drm/i915/execlists: Leave resetting ring to intel_ring")
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Matthew Auld <matthew.auld@intel.com>
> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
> ---
>  drivers/gpu/drm/i915/gt/intel_lrc.c    | 36 ++++++++++++--------------
>  drivers/gpu/drm/i915/gt/selftest_lrc.c |  2 +-
>  2 files changed, 18 insertions(+), 20 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
> index 929be03bbe7e..70d91ad923ef 100644
> --- a/drivers/gpu/drm/i915/gt/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
> @@ -235,7 +235,8 @@ static void execlists_init_reg_state(u32 *reg_state,
>  				     bool close);
>  static void
>  __execlists_update_reg_state(const struct intel_context *ce,
> -			     const struct intel_engine_cs *engine);
> +			     const struct intel_engine_cs *engine,
> +			     u32 head);
>  
>  static void mark_eio(struct i915_request *rq)
>  {
> @@ -1184,12 +1185,11 @@ static void reset_active(struct i915_request *rq,
>  		head = rq->tail;
>  	else
>  		head = active_request(ce->timeline, rq)->head;
> -	ce->ring->head = intel_ring_wrap(ce->ring, head);
> -	intel_ring_update_space(ce->ring);
> +	head = intel_ring_wrap(ce->ring, head);
>  
>  	/* Scrub the context image to prevent replaying the previous batch */
>  	restore_default_state(ce, engine);
> -	__execlists_update_reg_state(ce, engine);
> +	__execlists_update_reg_state(ce, engine, head);
>  
>  	/* We've switched away, so this should be a no-op, but intent matters */
>  	ce->lrc_desc |= CTX_DESC_FORCE_RESTORE;
> @@ -2878,16 +2878,17 @@ static void execlists_context_unpin(struct intel_context *ce)
>  
>  static void
>  __execlists_update_reg_state(const struct intel_context *ce,
> -			     const struct intel_engine_cs *engine)
> +			     const struct intel_engine_cs *engine,
> +			     u32 head)
>  {
>  	struct intel_ring *ring = ce->ring;
>  	u32 *regs = ce->lrc_reg_state;
>  
> -	GEM_BUG_ON(!intel_ring_offset_valid(ring, ring->head));
> +	GEM_BUG_ON(!intel_ring_offset_valid(ring, head));
>  	GEM_BUG_ON(!intel_ring_offset_valid(ring, ring->tail));
>  
>  	regs[CTX_RING_START] = i915_ggtt_offset(ring->vma);
> -	regs[CTX_RING_HEAD] = ring->head;
> +	regs[CTX_RING_HEAD] = head;
>  	regs[CTX_RING_TAIL] = ring->tail;
>  
>  	/* RPCS */
> @@ -2916,7 +2917,7 @@ __execlists_context_pin(struct intel_context *ce,
>  
>  	ce->lrc_desc = lrc_descriptor(ce, engine) | CTX_DESC_FORCE_RESTORE;
>  	ce->lrc_reg_state = vaddr + LRC_STATE_PN * PAGE_SIZE;
> -	__execlists_update_reg_state(ce, engine);
> +	__execlists_update_reg_state(ce, engine, ce->ring->tail);
>  
>  	return 0;
>  }
> @@ -2941,7 +2942,7 @@ static void execlists_context_reset(struct intel_context *ce)
>  	/* Scrub away the garbage */
>  	execlists_init_reg_state(ce->lrc_reg_state,
>  				 ce, ce->engine, ce->ring, true);
> -	__execlists_update_reg_state(ce, ce->engine);
> +	__execlists_update_reg_state(ce, ce->engine, ce->ring->tail);
>  
>  	ce->lrc_desc |= CTX_DESC_FORCE_RESTORE;
>  }
> @@ -3538,6 +3539,7 @@ static void __execlists_reset(struct intel_engine_cs *engine, bool stalled)
>  	struct intel_engine_execlists * const execlists = &engine->execlists;
>  	struct intel_context *ce;
>  	struct i915_request *rq;
> +	u32 head;
>  
>  	mb(); /* paranoia: read the CSB pointers from after the reset */
>  	clflush(execlists->csb_write);
> @@ -3565,15 +3567,15 @@ static void __execlists_reset(struct intel_engine_cs *engine, bool stalled)
>  
>  	if (i915_request_completed(rq)) {
>  		/* Idle context; tidy up the ring so we can restart afresh */
> -		ce->ring->head = intel_ring_wrap(ce->ring, rq->tail);
> +		head = intel_ring_wrap(ce->ring, rq->tail);
>  		goto out_replay;
>  	}
>  
>  	/* Context has requests still in-flight; it should not be idle! */
>  	GEM_BUG_ON(i915_active_is_idle(&ce->active));
>  	rq = active_request(ce->timeline, rq);
> -	ce->ring->head = intel_ring_wrap(ce->ring, rq->head);
> -	GEM_BUG_ON(ce->ring->head == ce->ring->tail);
> +	head = intel_ring_wrap(ce->ring, rq->head);
> +	GEM_BUG_ON(head == ce->ring->tail);
>  
>  	/*
>  	 * If this request hasn't started yet, e.g. it is waiting on a
> @@ -3618,10 +3620,9 @@ static void __execlists_reset(struct intel_engine_cs *engine, bool stalled)
>  
>  out_replay:
>  	ENGINE_TRACE(engine, "replay {head:%04x, tail:%04x}\n",
> -		     ce->ring->head, ce->ring->tail);
> -	intel_ring_update_space(ce->ring);
> +		     head, ce->ring->tail);
>  	__execlists_reset_reg_state(ce, engine);
> -	__execlists_update_reg_state(ce, engine);
> +	__execlists_update_reg_state(ce, engine, head);
>  	ce->lrc_desc |= CTX_DESC_FORCE_RESTORE; /* paranoid: GPU was reset! */
>  
>  unwind:
> @@ -5265,10 +5266,7 @@ void intel_lr_context_reset(struct intel_engine_cs *engine,
>  		restore_default_state(ce, engine);
>  
>  	/* Rerun the request; its payload has been neutered (if guilty). */
> -	ce->ring->head = head;
> -	intel_ring_update_space(ce->ring);
> -
> -	__execlists_update_reg_state(ce, engine);
> +	__execlists_update_reg_state(ce, engine, head);
>  }
>  
>  bool
> diff --git a/drivers/gpu/drm/i915/gt/selftest_lrc.c b/drivers/gpu/drm/i915/gt/selftest_lrc.c
> index 7ef68500b2bd..82fa0712808e 100644
> --- a/drivers/gpu/drm/i915/gt/selftest_lrc.c
> +++ b/drivers/gpu/drm/i915/gt/selftest_lrc.c
> @@ -201,7 +201,7 @@ static int live_unlite_restore(struct intel_gt *gt, int prio)
>  		}
>  		GEM_BUG_ON(!ce[1]->ring->size);
>  		intel_ring_reset(ce[1]->ring, ce[1]->ring->size / 2);
> -		__execlists_update_reg_state(ce[1], engine);
> +		__execlists_update_reg_state(ce[1], engine, ce[1]->ring->head);
>  
>  		rq[0] = igt_spinner_create_request(&spin, ce[0], MI_ARB_CHECK);
>  		if (IS_ERR(rq[0])) {
> -- 
> 2.25.0
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

      parent reply	other threads:[~2020-02-11 11:59 UTC|newest]

Thread overview: 22+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-02-10 20:57 [Intel-gfx] [PATCH 1/7] drm/i915/gt: Avoid resetting ring->head outside of its timeline mutex Chris Wilson
2020-02-10 20:57 ` [Intel-gfx] [PATCH 2/7] drm/i915/selftests: Exercise timeslice rewinding Chris Wilson
2020-02-11 14:50   ` Mika Kuoppala
2020-02-11 15:16     ` Chris Wilson
2020-02-10 20:57 ` [Intel-gfx] [PATCH 3/7] drm/i915/selftests: Relax timeout for error-interrupt reset processing Chris Wilson
2020-02-11 15:23   ` Mika Kuoppala
2020-02-11 15:33     ` Chris Wilson
2020-02-11 15:54       ` Mika Kuoppala
2020-02-11 16:00         ` Chris Wilson
2020-02-10 20:57 ` [Intel-gfx] [PATCH 4/7] drm/i915/gem: Don't leak non-persistent requests on changing engines Chris Wilson
2020-02-11 13:41   ` Tvrtko Ursulin
2020-02-11 14:15     ` Chris Wilson
2020-02-10 20:57 ` [Intel-gfx] [PATCH 5/7] drm/i915: Disable use of hwsp_cacheline for kernel_context Chris Wilson
2020-02-11 17:36   ` Mika Kuoppala
2020-02-10 20:57 ` [Intel-gfx] [PATCH 6/7] drm/i915/gt: Yield the timeslice if caught waiting on a user semaphore Chris Wilson
2020-02-10 20:57 ` [Intel-gfx] [PATCH 7/7] drm/i915/execlists: Remove preempt-to-busy roundtrip delay Chris Wilson
2020-02-12  1:08   ` Daniele Ceraolo Spurio
2020-02-14 10:10     ` Chris Wilson
2020-02-10 22:48 ` [Intel-gfx] ✗ Fi.CI.CHECKPATCH: warning for series starting with [1/7] drm/i915/gt: Avoid resetting ring->head outside of its timeline mutex Patchwork
2020-02-10 23:14 ` [Intel-gfx] ✗ Fi.CI.BAT: failure " Patchwork
2020-02-11 11:49 ` [Intel-gfx] [PATCH 1/7] " Andi Shyti
2020-02-11 11:58 ` Mika Kuoppala [this message]

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=87imkd9vya.fsf@gaia.fi.intel.com \
    --to=mika.kuoppala@linux.intel.com \
    --cc=chris@chris-wilson.co.uk \
    --cc=intel-gfx@lists.freedesktop.org \
    --cc=matthew.auld@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).