All of lore.kernel.org
 help / color / mirror / Atom feed
From: Daniel Vetter <daniel@ffwll.ch>
To: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
Cc: intel-gfx@lists.freedesktop.org,
	Matthew Auld <matthew.auld@intel.com>,
	Chris Wilson <chris@chris-wilson.co.uk>
Subject: Re: [Intel-gfx] [PATCH 03/24] Revert "drm/i915/gem: Drop relocation slowpath".
Date: Tue, 11 Aug 2020 15:39:18 +0200	[thread overview]
Message-ID: <20200811133918.GD2352366@phenom.ffwll.local> (raw)
In-Reply-To: <20200810103103.303818-4-maarten.lankhorst@linux.intel.com>

On Mon, Aug 10, 2020 at 12:30:42PM +0200, Maarten Lankhorst wrote:
> This reverts commit 7dc8f1143778 ("drm/i915/gem: Drop relocation
> slowpath"). We need the slowpath relocation for taking ww-mutex
> inside the page fault handler, and we will take this mutex when
> pinning all objects.
> 
> With this, we have a proper working slowpath again, which
> will allow us to do fault handling with WW locks held.
> 
> [mlankhorst: Adjusted for reloc_gpu_flush() changes]
> 
> Cc: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Matthew Auld <matthew.auld@intel.com>
> Signed-off-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>

So from looking through this it also functionally reverts ef398881d27d
("drm/i915/gem: Limit struct_mutex to eb_reserve"), and well a tiny part
of 003d8b9143a6 ("drm/i915/gem: Only call eb_lookup_vma once during
execbuf ioctl"): The part that adds a flush_workqueue(userptr) in
eb_reserve, which the former patch then fixes up by adding the
dev->struct_mutex lock dropping around it.

The other part is adding a reloc_gpu_flush, comment about that below.

> ---
>  .../gpu/drm/i915/gem/i915_gem_execbuffer.c    | 263 +++++++++++++++++-
>  1 file changed, 253 insertions(+), 10 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
> index 6acbd08f82f0..12a130f92e72 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
> @@ -707,7 +707,7 @@ static int eb_reserve(struct i915_execbuffer *eb)
>  			if (err)
>  				break;
>  		}
> -		if (!(err == -ENOSPC || err == -EAGAIN))
> +		if (err != -ENOSPC)
>  			break;
>  
>  		/* Resort *all* the objects into priority order */
> @@ -738,13 +738,6 @@ static int eb_reserve(struct i915_execbuffer *eb)
>  		}
>  		list_splice_tail(&last, &eb->unbound);
>  
> -		if (err == -EAGAIN) {
> -			mutex_unlock(&eb->i915->drm.struct_mutex);
> -			flush_workqueue(eb->i915->mm.userptr_wq);
> -			mutex_lock(&eb->i915->drm.struct_mutex);
> -			continue;
> -		}
> -
>  		switch (pass++) {
>  		case 0:
>  			break;
> @@ -1626,7 +1619,9 @@ static int eb_relocate_vma(struct i915_execbuffer *eb, struct eb_vma *ev)
>  		 * we would try to acquire the struct mutex again. Obviously
>  		 * this is bad and so lockdep complains vehemently.
>  		 */
> -		copied = __copy_from_user(r, urelocs, count * sizeof(r[0]));
> +		pagefault_disable();
> +		copied = __copy_from_user_inatomic(r, urelocs, count * sizeof(r[0]));
> +		pagefault_enable();
>  		if (unlikely(copied)) {
>  			remain = -EFAULT;
>  			goto out;
> @@ -1674,6 +1669,251 @@ static int eb_relocate_vma(struct i915_execbuffer *eb, struct eb_vma *ev)
>  	return remain;
>  }
>  
> +static int
> +eb_relocate_vma_slow(struct i915_execbuffer *eb, struct eb_vma *ev)
> +{
> +	const struct drm_i915_gem_exec_object2 *entry = ev->exec;
> +	struct drm_i915_gem_relocation_entry *relocs =
> +		u64_to_ptr(typeof(*relocs), entry->relocs_ptr);
> +	unsigned int i;
> +	int err;
> +
> +	for (i = 0; i < entry->relocation_count; i++) {
> +		u64 offset = eb_relocate_entry(eb, ev, &relocs[i]);
> +
> +		if ((s64)offset < 0) {
> +			err = (int)offset;
> +			goto err;
> +		}
> +	}
> +	err = 0;
> +err:
> +	reloc_cache_reset(&eb->reloc_cache);
> +	return err;
> +}
> +
> +static int check_relocations(const struct drm_i915_gem_exec_object2 *entry)
> +{
> +	const char __user *addr, *end;
> +	unsigned long size;
> +	char __maybe_unused c;
> +
> +	size = entry->relocation_count;
> +	if (size == 0)
> +		return 0;
> +
> +	if (size > N_RELOC(ULONG_MAX))
> +		return -EINVAL;
> +
> +	addr = u64_to_user_ptr(entry->relocs_ptr);
> +	size *= sizeof(struct drm_i915_gem_relocation_entry);
> +	if (!access_ok(addr, size))
> +		return -EFAULT;
> +
> +	end = addr + size;
> +	for (; addr < end; addr += PAGE_SIZE) {
> +		int err = __get_user(c, addr);
> +		if (err)
> +			return err;
> +	}
> +	return __get_user(c, end - 1);


__get_user and friends considered uncool, since access_ok is considered
uncool. But since this a revert I guess ok.

But might be good to go around and just throw out all the access_ok in
execbuf, and replace them by the preferred functions. But that's maybe for
a follow-up series, when we have time again.
> +}
> +
> +static int eb_copy_relocations(const struct i915_execbuffer *eb)
> +{
> +	struct drm_i915_gem_relocation_entry *relocs;
> +	const unsigned int count = eb->buffer_count;
> +	unsigned int i;
> +	int err;
> +
> +	for (i = 0; i < count; i++) {
> +		const unsigned int nreloc = eb->exec[i].relocation_count;
> +		struct drm_i915_gem_relocation_entry __user *urelocs;
> +		unsigned long size;
> +		unsigned long copied;
> +
> +		if (nreloc == 0)
> +			continue;
> +
> +		err = check_relocations(&eb->exec[i]);
> +		if (err)
> +			goto err;
> +
> +		urelocs = u64_to_user_ptr(eb->exec[i].relocs_ptr);
> +		size = nreloc * sizeof(*relocs);
> +
> +		relocs = kvmalloc_array(size, 1, GFP_KERNEL);
> +		if (!relocs) {
> +			err = -ENOMEM;
> +			goto err;
> +		}
> +
> +		/* copy_from_user is limited to < 4GiB */
> +		copied = 0;
> +		do {
> +			unsigned int len =
> +				min_t(u64, BIT_ULL(31), size - copied);
> +
> +			if (__copy_from_user((char *)relocs + copied,
> +					     (char __user *)urelocs + copied,
> +					     len))
> +				goto end;
> +
> +			copied += len;
> +		} while (copied < size);
> +
> +		/*
> +		 * As we do not update the known relocation offsets after
> +		 * relocating (due to the complexities in lock handling),
> +		 * we need to mark them as invalid now so that we force the
> +		 * relocation processing next time. Just in case the target
> +		 * object is evicted and then rebound into its old
> +		 * presumed_offset before the next execbuffer - if that
> +		 * happened we would make the mistake of assuming that the
> +		 * relocations were valid.
> +		 */
> +		if (!user_access_begin(urelocs, size))
> +			goto end;
> +
> +		for (copied = 0; copied < nreloc; copied++)
> +			unsafe_put_user(-1,
> +					&urelocs[copied].presumed_offset,
> +					end_user);
> +		user_access_end();
> +
> +		eb->exec[i].relocs_ptr = (uintptr_t)relocs;
> +	}
> +
> +	return 0;
> +
> +end_user:
> +	user_access_end();
> +end:
> +	kvfree(relocs);
> +	err = -EFAULT;
> +err:
> +	while (i--) {
> +		relocs = u64_to_ptr(typeof(*relocs), eb->exec[i].relocs_ptr);
> +		if (eb->exec[i].relocation_count)
> +			kvfree(relocs);
> +	}
> +	return err;
> +}
> +
> +static int eb_prefault_relocations(const struct i915_execbuffer *eb)
> +{
> +	const unsigned int count = eb->buffer_count;
> +	unsigned int i;
> +
> +	for (i = 0; i < count; i++) {
> +		int err;
> +
> +		err = check_relocations(&eb->exec[i]);
> +		if (err)
> +			return err;
> +	}
> +
> +	return 0;
> +}
> +
> +static noinline int eb_relocate_slow(struct i915_execbuffer *eb)
> +{
> +	bool have_copy = false;
> +	struct eb_vma *ev;
> +	int err = 0;
> +
> +repeat:
> +	if (signal_pending(current)) {
> +		err = -ERESTARTSYS;
> +		goto out;
> +	}
> +
> +	/*
> +	 * We take 3 passes through the slowpatch.
> +	 *
> +	 * 1 - we try to just prefault all the user relocation entries and
> +	 * then attempt to reuse the atomic pagefault disabled fast path again.
> +	 *
> +	 * 2 - we copy the user entries to a local buffer here outside of the
> +	 * local and allow ourselves to wait upon any rendering before
> +	 * relocations
> +	 *
> +	 * 3 - we already have a local copy of the relocation entries, but
> +	 * were interrupted (EAGAIN) whilst waiting for the objects, try again.
> +	 */
> +	if (!err) {
> +		err = eb_prefault_relocations(eb);
> +	} else if (!have_copy) {
> +		err = eb_copy_relocations(eb);
> +		have_copy = err == 0;
> +	} else {
> +		cond_resched();
> +		err = 0;
> +	}
> +
> +	flush_workqueue(eb->i915->mm.userptr_wq);
> +
> +	if (err)
> +		goto out;
> +
> +	err = mutex_lock_interruptible(&eb->i915->drm.struct_mutex);
> +	if (err)
> +		goto out;
> +
> +	list_for_each_entry(ev, &eb->relocs, reloc_link) {
> +		if (!have_copy) {
> +			pagefault_disable();
> +			err = eb_relocate_vma(eb, ev);
> +			pagefault_enable();
> +			if (err)
> +				break;
> +		} else {
> +			err = eb_relocate_vma_slow(eb, ev);
> +			if (err)
> +				break;
> +		}
> +	}
> +
> +	reloc_gpu_flush(&eb->reloc_cache);

So maybe my baseline is all off, but I'm not understanding why this needs
to be added here. Both eb_relocate_vma_slow and eb_relocate_vma have a
reloc_gpu_flush at the end. So no idea why we need to flush once more here
...

Assuming this all works without this line here (which is also not
justified by the revert or the functional revert I spotted), and the
commit message fixed up:

Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch>

> +	mutex_unlock(&eb->i915->drm.struct_mutex);
> +	if (err && !have_copy)
> +		goto repeat;
> +
> +	if (err)
> +		goto err;
> +
> +	/*
> +	 * Leave the user relocations as are, this is the painfully slow path,
> +	 * and we want to avoid the complication of dropping the lock whilst
> +	 * having buffers reserved in the aperture and so causing spurious
> +	 * ENOSPC for random operations.
> +	 */
> +
> +err:
> +	if (err == -EAGAIN)
> +		goto repeat;
> +
> +out:
> +	if (have_copy) {
> +		const unsigned int count = eb->buffer_count;
> +		unsigned int i;
> +
> +		for (i = 0; i < count; i++) {
> +			const struct drm_i915_gem_exec_object2 *entry =
> +				&eb->exec[i];
> +			struct drm_i915_gem_relocation_entry *relocs;
> +
> +			if (!entry->relocation_count)
> +				continue;
> +
> +			relocs = u64_to_ptr(typeof(*relocs), entry->relocs_ptr);
> +			kvfree(relocs);
> +		}
> +	}
> +
> +	return err;
> +}
> +
>  static int eb_relocate(struct i915_execbuffer *eb)
>  {
>  	int err;
> @@ -1695,8 +1935,11 @@ static int eb_relocate(struct i915_execbuffer *eb)
>  		list_for_each_entry(ev, &eb->relocs, reloc_link) {
>  			err = eb_relocate_vma(eb, ev);
>  			if (err)
> -				return err;
> +				break;
>  		}
> +
> +		if (err)
> +			return eb_relocate_slow(eb);
>  	}
>  
>  	return 0;
> -- 
> 2.28.0
> 
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/intel-gfx

-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

  reply	other threads:[~2020-08-11 13:39 UTC|newest]

Thread overview: 65+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-08-10 10:30 [Intel-gfx] [PATCH 00/24] drm/i915: Correct the locking hierarchy in gem Maarten Lankhorst
2020-08-10 10:30 ` [Intel-gfx] [PATCH 01/24] Revert "drm/i915/gem: Async GPU relocations only" Maarten Lankhorst
2020-08-11  9:33   ` Daniel Vetter
2020-08-11 12:11   ` Daniel Vetter
2020-08-12  7:56   ` Chris Wilson
2020-08-10 10:30 ` [Intel-gfx] [PATCH 02/24] drm/i915: Revert relocation chaining commits Maarten Lankhorst
2020-08-11 12:41   ` Daniel Vetter
2020-08-10 10:30 ` [Intel-gfx] [PATCH 03/24] Revert "drm/i915/gem: Drop relocation slowpath" Maarten Lankhorst
2020-08-11 13:39   ` Daniel Vetter [this message]
2020-08-10 10:30 ` [Intel-gfx] [PATCH 04/24] Revert "drm/i915/gem: Split eb_vma into its own allocation" Maarten Lankhorst
2020-08-11 15:12   ` Daniel Vetter
2020-08-12 21:29   ` Thomas Hellström (Intel)
2020-08-10 10:30 ` [Intel-gfx] [PATCH 05/24] drm/i915: Add an implementation for i915_gem_ww_ctx locking, v2 Maarten Lankhorst
2020-08-10 10:30 ` [Intel-gfx] [PATCH 06/24] drm/i915: Remove locking from i915_gem_object_prepare_read/write Maarten Lankhorst
2020-08-10 17:41   ` Thomas Hellström (Intel)
2020-08-10 10:30 ` [Intel-gfx] [PATCH 07/24] drm/i915: Parse command buffer earlier in eb_relocate(slow) Maarten Lankhorst
2020-08-10 17:44   ` Thomas Hellström (Intel)
2020-08-10 10:30 ` [Intel-gfx] [PATCH 08/24] drm/i915: Use per object locking in execbuf, v12 Maarten Lankhorst
2020-08-12 20:59   ` Thomas Hellström (Intel)
2020-08-10 10:30 ` [Intel-gfx] [PATCH 09/24] drm/i915: make lockdep slightly happier about execbuf Maarten Lankhorst
2020-08-10 12:58   ` Maarten Lankhorst
2020-08-10 14:18   ` [Intel-gfx] [PATCH 1/1] dummy empty commit Maarten Lankhorst
2020-08-10 14:58   ` Maarten Lankhorst
2020-08-11  7:34   ` [Intel-gfx] [PATCH 09/24] drm/i915: make lockdep slightly happier about execbuf Thomas Hellström (Intel)
2020-08-11 11:56     ` Maarten Lankhorst
2020-08-10 10:30 ` [Intel-gfx] [PATCH 10/24] drm/i915: Use ww locking in intel_renderstate Maarten Lankhorst
2020-08-11  7:52   ` Thomas Hellström (Intel)
2020-08-10 10:30 ` [Intel-gfx] [PATCH 11/24] drm/i915: Add ww context handling to context_barrier_task Maarten Lankhorst
2020-08-11  8:09   ` Thomas Hellström (Intel)
2020-08-10 10:30 ` [Intel-gfx] [PATCH 12/24] drm/i915: Nuke arguments to eb_pin_engine Maarten Lankhorst
2020-08-11  8:12   ` Thomas Hellström (Intel)
2020-08-10 10:30 ` [Intel-gfx] [PATCH 13/24] drm/i915: Pin engine before pinning all objects, v5 Maarten Lankhorst
2020-08-12 19:01   ` Thomas Hellström (Intel)
2020-08-10 10:30 ` [Intel-gfx] [PATCH 14/24] drm/i915: Rework intel_context pinning to do everything outside of pin_mutex Maarten Lankhorst
2020-08-12 19:14   ` Thomas Hellström (Intel)
2020-08-19 10:38     ` Maarten Lankhorst
2020-08-10 10:30 ` [Intel-gfx] [PATCH 15/24] drm/i915: Make sure execbuffer always passes ww state to i915_vma_pin Maarten Lankhorst
2020-08-12 19:32   ` Thomas Hellström (Intel)
2020-08-12 20:28     ` Thomas Hellström (Intel)
2020-08-19 11:54     ` Maarten Lankhorst
2020-08-10 10:30 ` [Intel-gfx] [PATCH 16/24] drm/i915: Convert i915_gem_object/client_blt.c to use ww locking as well, v2 Maarten Lankhorst
2020-08-12 19:39   ` Thomas Hellström (Intel)
2020-08-10 10:30 ` [Intel-gfx] [PATCH 17/24] drm/i915: Kill last user of intel_context_create_request outside of selftests Maarten Lankhorst
2020-08-12 19:41   ` Thomas Hellström (Intel)
2020-08-10 10:30 ` [Intel-gfx] [PATCH 18/24] drm/i915: Convert i915_perf to ww locking as well Maarten Lankhorst
2020-08-12 19:53   ` Thomas Hellström (Intel)
2020-08-19 11:57     ` Maarten Lankhorst
2020-08-10 10:30 ` [Intel-gfx] [PATCH 19/24] drm/i915: Dirty hack to fix selftests locking inversion Maarten Lankhorst
2020-08-12 19:58   ` Thomas Hellström (Intel)
2020-08-10 10:30 ` [Intel-gfx] [PATCH 20/24] drm/i915/selftests: Fix locking inversion in lrc selftest Maarten Lankhorst
2020-08-12 19:59   ` Thomas Hellström (Intel)
2020-08-10 10:31 ` [Intel-gfx] [PATCH 21/24] drm/i915: Use ww pinning for intel_context_create_request() Maarten Lankhorst
2020-08-12 20:02   ` Thomas Hellström (Intel)
2020-08-10 10:31 ` [Intel-gfx] [PATCH 22/24] drm/i915: Move i915_vma_lock in the selftests to avoid lock inversion, v3 Maarten Lankhorst
2020-08-12 20:09   ` Thomas Hellström (Intel)
2020-08-10 10:31 ` [Intel-gfx] [PATCH 23/24] drm/i915: Add ww locking to vm_fault_gtt Maarten Lankhorst
2020-08-12 20:16   ` Thomas Hellström (Intel)
2020-08-10 10:31 ` [Intel-gfx] [PATCH 24/24] drm/i915: Add ww locking to pin_to_display_plane Maarten Lankhorst
2020-08-12 20:31   ` Thomas Hellström (Intel)
2020-08-10 10:48 ` [Intel-gfx] ✗ Fi.CI.CHECKPATCH: warning for drm/i915: Correct the locking hierarchy in gem Patchwork
2020-08-10 10:49 ` [Intel-gfx] ✗ Fi.CI.SPARSE: " Patchwork
2020-08-10 11:03 ` [Intel-gfx] ✗ Fi.CI.BAT: failure " Patchwork
2020-08-11  8:10 ` [Intel-gfx] [PATCH 00/24] " Chris Wilson
  -- strict thread matches above, loose matches on Subject: below --
2020-04-21 10:46 [Intel-gfx] [PATCH 01/24] perf/core: Only copy-to-user after completely unlocking all locks, v3 Maarten Lankhorst
2020-04-21 10:46 ` [Intel-gfx] [PATCH 03/24] Revert "drm/i915/gem: Drop relocation slowpath" Maarten Lankhorst
2020-04-17 13:39 [Intel-gfx] [PATCH 01/24] perf/core: Only copy-to-user after completely unlocking all locks, v3 Maarten Lankhorst
2020-04-17 13:39 ` [Intel-gfx] [PATCH 03/24] Revert "drm/i915/gem: Drop relocation slowpath" Maarten Lankhorst

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20200811133918.GD2352366@phenom.ffwll.local \
    --to=daniel@ffwll.ch \
    --cc=chris@chris-wilson.co.uk \
    --cc=intel-gfx@lists.freedesktop.org \
    --cc=maarten.lankhorst@linux.intel.com \
    --cc=matthew.auld@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.