All of lore.kernel.org
 help / color / mirror / Atom feed
From: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com>
To: Chris Wilson <chris@chris-wilson.co.uk>, intel-gfx@lists.freedesktop.org
Subject: Re: [Intel-gfx] [PATCH 10/28] drm/i915/gem: Separate reloc validation into an earlier step
Date: Tue, 9 Jun 2020 08:47:00 +0100	[thread overview]
Message-ID: <ae303541-7df8-6966-95ea-ed46942acb06@linux.intel.com> (raw)
In-Reply-To: <20200607222108.14401-10-chris@chris-wilson.co.uk>


On 07/06/2020 23:20, Chris Wilson wrote:
> Over the next couple of patches, we will want to lock all the modified
> vma for relocation processing under a single ww_mutex. We neither want
> to have to include the vma that are skipped (due to no modifications
> required) nor do we want those to be marked as written too. So separate
> out the reloc validation into an early step, which we can use both to
> reject the execbuf before committing to making our changes, and to
> filter out the unmodified vma.
> 
> This does introduce a second pass through the reloc[], but only if we
> need to emit relocations.
> 
> v2: reuse the outer loop, not cut'n'paste.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>   .../gpu/drm/i915/gem/i915_gem_execbuffer.c    | 145 +++++++++++-------
>   1 file changed, 86 insertions(+), 59 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
> index 23db79b806db..01ab1e15a142 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
> @@ -911,9 +911,9 @@ static void eb_destroy(const struct i915_execbuffer *eb)
>   
>   static inline u64
>   relocation_target(const struct drm_i915_gem_relocation_entry *reloc,
> -		  const struct i915_vma *target)
> +		  u64 target)
>   {
> -	return gen8_canonical_addr((int)reloc->delta + target->node.start);
> +	return gen8_canonical_addr((int)reloc->delta + target);
>   }
>   
>   static void reloc_cache_init(struct reloc_cache *cache,
> @@ -1292,26 +1292,11 @@ static int __reloc_entry_gpu(struct i915_execbuffer *eb,
>   	return 0;
>   }
>   
> -static u64
> -relocate_entry(struct i915_execbuffer *eb,
> -	       struct i915_vma *vma,
> -	       const struct drm_i915_gem_relocation_entry *reloc,
> -	       const struct i915_vma *target)
> -{
> -	u64 target_addr = relocation_target(reloc, target);
> -	int err;
> -
> -	err = __reloc_entry_gpu(eb, vma, reloc->offset, target_addr);
> -	if (err)
> -		return err;
> -
> -	return target->node.start | UPDATE;
> -}
> -
> -static u64
> -eb_relocate_entry(struct i915_execbuffer *eb,
> -		  struct eb_vma *ev,
> -		  const struct drm_i915_gem_relocation_entry *reloc)
> +static int
> +eb_reloc_prepare(struct i915_execbuffer *eb,
> +		 struct eb_vma *ev,
> +		 const struct drm_i915_gem_relocation_entry *reloc,
> +		 struct drm_i915_gem_relocation_entry __user *user)
>   {
>   	struct drm_i915_private *i915 = eb->i915;
>   	struct eb_vma *target;
> @@ -1389,6 +1374,32 @@ eb_relocate_entry(struct i915_execbuffer *eb,
>   		return -EINVAL;
>   	}
>   
> +	return 1;
> +}
> +
> +static int
> +eb_reloc_entry(struct i915_execbuffer *eb,
> +	       struct eb_vma *ev,
> +	       const struct drm_i915_gem_relocation_entry *reloc,
> +	       struct drm_i915_gem_relocation_entry __user *user)
> +{
> +	struct eb_vma *target;
> +	u64 offset;
> +	int err;
> +
> +	/* we've already hold a reference to all valid objects */
> +	target = eb_get_vma(eb, reloc->target_handle);
> +	if (unlikely(!target))
> +		return -ENOENT;
> +
> +	/*
> +	 * If the relocation already has the right value in it, no
> +	 * more work needs to be done.
> +	 */
> +	offset = gen8_canonical_addr(target->vma->node.start);
> +	if (offset == reloc->presumed_offset) > +		return 0;
> +

Haven't these reloc entries been removed from the list in the prepare phase?

Regards,

Tvrtko

>   	/*
>   	 * If we write into the object, we need to force the synchronisation
>   	 * barrier, either with an asynchronous clflush or if we executed the
> @@ -1399,11 +1410,41 @@ eb_relocate_entry(struct i915_execbuffer *eb,
>   	 */
>   	ev->flags &= ~EXEC_OBJECT_ASYNC;
>   
> -	/* and update the user's relocation entry */
> -	return relocate_entry(eb, ev->vma, reloc, target->vma);
> +	err = __reloc_entry_gpu(eb, ev->vma, reloc->offset,
> +				relocation_target(reloc, offset));
> +	if (err)
> +		return err;
> +
> +	/*
> +	 * Note that reporting an error now
> +	 * leaves everything in an inconsistent
> +	 * state as we have *already* changed
> +	 * the relocation value inside the
> +	 * object. As we have not changed the
> +	 * reloc.presumed_offset or will not
> +	 * change the execobject.offset, on the
> +	 * call we may not rewrite the value
> +	 * inside the object, leaving it
> +	 * dangling and causing a GPU hang. Unless
> +	 * userspace dynamically rebuilds the
> +	 * relocations on each execbuf rather than
> +	 * presume a static tree.
> +	 *
> +	 * We did previously check if the relocations
> +	 * were writable (access_ok), an error now
> +	 * would be a strange race with mprotect,
> +	 * having already demonstrated that we
> +	 * can read from this userspace address.
> +	 */
> +	__put_user(offset, &user->presumed_offset);
> +	return 0;
>   }
>   
> -static int eb_relocate_vma(struct i915_execbuffer *eb, struct eb_vma *ev)
> +static long eb_reloc_vma(struct i915_execbuffer *eb, struct eb_vma *ev,
> +			 int (*fn)(struct i915_execbuffer *eb,
> +				   struct eb_vma *ev,
> +				   const struct drm_i915_gem_relocation_entry *reloc,
> +				   struct drm_i915_gem_relocation_entry __user *user))
>   {
>   #define N_RELOC(x) ((x) / sizeof(struct drm_i915_gem_relocation_entry))
>   	struct drm_i915_gem_relocation_entry stack[N_RELOC(512)];
> @@ -1411,6 +1452,7 @@ static int eb_relocate_vma(struct i915_execbuffer *eb, struct eb_vma *ev)
>   	struct drm_i915_gem_relocation_entry __user *urelocs =
>   		u64_to_user_ptr(entry->relocs_ptr);
>   	unsigned long remain = entry->relocation_count;
> +	int required = 0;
>   
>   	if (unlikely(remain > N_RELOC(ULONG_MAX)))
>   		return -EINVAL;
> @@ -1443,42 +1485,18 @@ static int eb_relocate_vma(struct i915_execbuffer *eb, struct eb_vma *ev)
>   
>   		remain -= count;
>   		do {
> -			u64 offset = eb_relocate_entry(eb, ev, r);
> +			int ret;
>   
> -			if (likely(offset == 0)) {
> -			} else if ((s64)offset < 0) {
> -				return (int)offset;
> -			} else {
> -				/*
> -				 * Note that reporting an error now
> -				 * leaves everything in an inconsistent
> -				 * state as we have *already* changed
> -				 * the relocation value inside the
> -				 * object. As we have not changed the
> -				 * reloc.presumed_offset or will not
> -				 * change the execobject.offset, on the
> -				 * call we may not rewrite the value
> -				 * inside the object, leaving it
> -				 * dangling and causing a GPU hang. Unless
> -				 * userspace dynamically rebuilds the
> -				 * relocations on each execbuf rather than
> -				 * presume a static tree.
> -				 *
> -				 * We did previously check if the relocations
> -				 * were writable (access_ok), an error now
> -				 * would be a strange race with mprotect,
> -				 * having already demonstrated that we
> -				 * can read from this userspace address.
> -				 */
> -				offset = gen8_canonical_addr(offset & ~UPDATE);
> -				__put_user(offset,
> -					   &urelocs[r - stack].presumed_offset);
> -			}
> +			ret = fn(eb, ev, r, &urelocs[r - stack]);
> +			if (ret < 0)
> +				return ret;
> +
> +			required |= ret;
>   		} while (r++, --count);
>   		urelocs += ARRAY_SIZE(stack);
>   	} while (remain);
>   
> -	return 0;
> +	return required;
>   }
>   
>   static int eb_relocate(struct i915_execbuffer *eb)
> @@ -1497,12 +1515,21 @@ static int eb_relocate(struct i915_execbuffer *eb)
>   
>   	/* The objects are in their final locations, apply the relocations. */
>   	if (eb->args->flags & __EXEC_HAS_RELOC) {
> -		struct eb_vma *ev;
> +		struct eb_vma *ev, *en;
>   		int flush;
>   
> +		list_for_each_entry_safe(ev, en, &eb->relocs, reloc_link) {
> +			err = eb_reloc_vma(eb, ev, eb_reloc_prepare);
> +			if (err < 0)
> +				return err;
> +
> +			if (err == 0)
> +				list_del_init(&ev->reloc_link);
> +		}
> +
>   		list_for_each_entry(ev, &eb->relocs, reloc_link) {
> -			err = eb_relocate_vma(eb, ev);
> -			if (err)
> +			err = eb_reloc_vma(eb, ev, eb_reloc_entry);
> +			if (err < 0)
>   				break;
>   		}
>   
> 
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

  reply	other threads:[~2020-06-09  7:47 UTC|newest]

Thread overview: 51+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-06-07 22:20 [Intel-gfx] [PATCH 01/28] drm/i915: Adjust the sentinel assert to match implementation Chris Wilson
2020-06-07 22:20 ` [Intel-gfx] [PATCH 02/28] drm/i915/selftests: Make the hanging request non-preemptible Chris Wilson
2020-06-08 20:58   ` Mika Kuoppala
2020-06-07 22:20 ` [Intel-gfx] [PATCH 03/28] drm/i915/selftests: Teach hang-self to target only itself Chris Wilson
2020-06-10 13:21   ` Mika Kuoppala
2020-06-07 22:20 ` [Intel-gfx] [PATCH 04/28] drm/i915/selftests: Remove live_suppress_wait_preempt Chris Wilson
2020-06-11 11:38   ` Tvrtko Ursulin
2020-06-07 22:20 ` [Intel-gfx] [PATCH 05/28] drm/i915/selftests: Trim execlists runtime Chris Wilson
2020-06-12 23:05   ` Andi Shyti
2020-06-07 22:20 ` [Intel-gfx] [PATCH 06/28] drm/i915/gt: Use virtual_engine during execlists_dequeue Chris Wilson
2020-06-07 22:20 ` [Intel-gfx] [PATCH 07/28] drm/i915/gt: Decouple inflight virtual engines Chris Wilson
2020-06-07 22:20 ` [Intel-gfx] [PATCH 08/28] drm/i915/gt: Resubmit the virtual engine on schedule-out Chris Wilson
2020-06-07 22:20 ` [Intel-gfx] [PATCH 09/28] drm/i915: Add list_for_each_entry_safe_continue_reverse Chris Wilson
2020-06-07 22:20 ` [Intel-gfx] [PATCH 10/28] drm/i915/gem: Separate reloc validation into an earlier step Chris Wilson
2020-06-09  7:47   ` Tvrtko Ursulin [this message]
2020-06-09 10:48     ` Chris Wilson
2020-06-07 22:20 ` [Intel-gfx] [PATCH 11/28] drm/i915/gem: Lift GPU relocation allocation Chris Wilson
2020-06-07 22:20 ` [Intel-gfx] [PATCH 12/28] drm/i915/gem: Build the reloc request first Chris Wilson
2020-06-07 22:20 ` [Intel-gfx] [PATCH 13/28] drm/i915/gem: Add all GPU reloc awaits/signals en masse Chris Wilson
2020-06-07 22:20 ` [Intel-gfx] [PATCH 14/28] dma-buf: Proxy fence, an unsignaled fence placeholder Chris Wilson
2020-06-07 22:20 ` [Intel-gfx] [PATCH 15/28] drm/i915: Lift waiter/signaler iterators Chris Wilson
2020-06-07 22:20 ` [Intel-gfx] [PATCH 16/28] drm/i915: Unpeel awaits on a proxy fence Chris Wilson
2020-06-07 22:20 ` [Intel-gfx] [PATCH 17/28] drm/i915/gem: Make relocations atomic within execbuf Chris Wilson
2020-06-07 22:20 ` [Intel-gfx] [PATCH 18/28] drm/i915: Strip out internal priorities Chris Wilson
2020-06-07 22:20 ` [Intel-gfx] [PATCH 19/28] drm/i915: Remove I915_USER_PRIORITY_SHIFT Chris Wilson
2020-06-07 22:21 ` [Intel-gfx] [PATCH 20/28] drm/i915: Replace engine->schedule() with a known request operation Chris Wilson
2020-06-07 22:21 ` [Intel-gfx] [PATCH 21/28] drm/i915/gt: Do not suspend bonded requests if one hangs Chris Wilson
2020-06-07 22:21 ` [Intel-gfx] [PATCH 22/28] drm/i915: Teach the i915_dependency to use a double-lock Chris Wilson
2020-06-07 22:21 ` [Intel-gfx] [PATCH 23/28] drm/i915: Restructure priority inheritance Chris Wilson
2020-06-07 22:21 ` [Intel-gfx] [PATCH 24/28] ipi-dag Chris Wilson
2020-06-07 22:21 ` [Intel-gfx] [PATCH 25/28] drm/i915/gt: Check for a completed last request once Chris Wilson
2020-06-07 22:21 ` [Intel-gfx] [PATCH 26/28] drm/i915: Fair low-latency scheduling Chris Wilson
2020-06-16  9:07   ` Thomas Hellström (Intel)
2020-06-16 10:12     ` Chris Wilson
2020-06-16 12:11       ` Thomas Hellström (Intel)
2020-06-16 12:44         ` Chris Wilson
2020-06-16 10:54     ` Chris Wilson
2020-06-07 22:21 ` [Intel-gfx] [PATCH 27/28] drm/i915/gt: Specify a deadline for the heartbeat Chris Wilson
2020-06-07 22:21 ` [Intel-gfx] [PATCH 28/28] drm/i915: Replace the priority boosting for the display with a deadline Chris Wilson
2020-06-07 22:49 ` [Intel-gfx] ✗ Fi.CI.CHECKPATCH: warning for series starting with [01/28] drm/i915: Adjust the sentinel assert to match implementation Patchwork
2020-06-07 22:51 ` [Intel-gfx] ✗ Fi.CI.SPARSE: " Patchwork
2020-06-07 23:12 ` [Intel-gfx] ✓ Fi.CI.BAT: success " Patchwork
2020-06-08  0:58 ` [Intel-gfx] ✗ Fi.CI.IGT: failure " Patchwork
2020-06-08  7:44 ` [Intel-gfx] [PATCH 01/28] " Tvrtko Ursulin
2020-06-08  9:33   ` Chris Wilson
2020-06-09  6:59     ` Tvrtko Ursulin
2020-06-09 10:29       ` Chris Wilson
2020-06-09 10:39         ` Tvrtko Ursulin
2020-06-09 10:47           ` Chris Wilson
2020-06-09 11:45             ` Tvrtko Ursulin
2020-06-08 20:43 ` Mika Kuoppala

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=ae303541-7df8-6966-95ea-ed46942acb06@linux.intel.com \
    --to=tvrtko.ursulin@linux.intel.com \
    --cc=chris@chris-wilson.co.uk \
    --cc=intel-gfx@lists.freedesktop.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.