All of lore.kernel.org
 help / color / mirror / Atom feed
From: Matthew Auld <matthew.auld@intel.com>
To: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com>,
	Intel-gfx@lists.freedesktop.org
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>,
	dri-devel@lists.freedesktop.org,
	Chris Wilson <chris@chris-wilson.co.uk>
Subject: Re: [PATCH 12/12] drm/i915/gem: Manage all set-domain waits explicitly
Date: Wed, 26 May 2021 15:30:57 +0100	[thread overview]
Message-ID: <5aaf24d3-9171-ee1f-5b89-fd1a501afc01@intel.com> (raw)
In-Reply-To: <20210526141456.2334192-13-tvrtko.ursulin@linux.intel.com>

On 26/05/2021 15:14, Tvrtko Ursulin wrote:
> From: Chris Wilson <chris@chris-wilson.co.uk>
> 
> Only perform the domain transition under the object lock, and push the
> required waits to outside the lock.
> 
> v2 (Tvrtko):
>   * Rebase.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Reviewed-by: Matthew Auld <matthew.auld@intel.com> # v1
> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> ---
>   drivers/gpu/drm/i915/gem/i915_gem_clflush.c   |   9 +-
>   drivers/gpu/drm/i915/gem/i915_gem_clflush.h   |   2 -
>   drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c    |   4 +-
>   drivers/gpu/drm/i915/gem/i915_gem_domain.c    | 163 +++++-------------
>   .../gpu/drm/i915/gem/i915_gem_execbuffer.c    |   4 +-
>   drivers/gpu/drm/i915/gem/i915_gem_object.h    |  12 +-
>   .../gpu/drm/i915/gem/i915_gem_object_types.h  |   6 +
>   .../gpu/drm/i915/gem/selftests/huge_pages.c   |   8 -
>   .../i915/gem/selftests/i915_gem_coherency.c   |  31 +++-
>   .../drm/i915/gem/selftests/i915_gem_phys.c    |   8 +-
>   .../drm/i915/gem/selftests/igt_gem_utils.c    |   3 +
>   drivers/gpu/drm/i915/i915_gem.c               |   4 +-
>   12 files changed, 89 insertions(+), 165 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_clflush.c b/drivers/gpu/drm/i915/gem/i915_gem_clflush.c
> index daf9284ef1f5..e4c24558eaa8 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_clflush.c
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_clflush.c
> @@ -51,8 +51,6 @@ static struct clflush *clflush_work_create(struct drm_i915_gem_object *obj)
>   {
>   	struct clflush *clflush;
>   
> -	GEM_BUG_ON(!obj->cache_dirty);
> -
>   	clflush = kmalloc(sizeof(*clflush), GFP_KERNEL);
>   	if (!clflush)
>   		return NULL;
> @@ -101,13 +99,10 @@ bool i915_gem_clflush_object(struct drm_i915_gem_object *obj,
>   
>   	trace_i915_gem_object_clflush(obj);
>   
> -	clflush = NULL;
> -	if (!(flags & I915_CLFLUSH_SYNC))
> -		clflush = clflush_work_create(obj);
> +	clflush = clflush_work_create(obj);
>   	if (clflush) {
>   		i915_sw_fence_await_reservation(&clflush->base.chain,
> -						obj->base.resv, NULL, true,
> -						i915_fence_timeout(to_i915(obj->base.dev)),
> +						obj->base.resv, NULL, true, 0,
>   						I915_FENCE_GFP);
>   		dma_resv_add_excl_fence(obj->base.resv, &clflush->base.dma);
>   		dma_fence_work_commit(&clflush->base);
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_clflush.h b/drivers/gpu/drm/i915/gem/i915_gem_clflush.h
> index e6c382973129..4cd5787d1507 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_clflush.h
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_clflush.h
> @@ -9,12 +9,10 @@
>   
>   #include <linux/types.h>
>   
> -struct drm_i915_private;
>   struct drm_i915_gem_object;
>   
>   bool i915_gem_clflush_object(struct drm_i915_gem_object *obj,
>   			     unsigned int flags);
>   #define I915_CLFLUSH_FORCE BIT(0)
> -#define I915_CLFLUSH_SYNC BIT(1)
>   
>   #endif /* __I915_GEM_CLFLUSH_H__ */
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c b/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c
> index ccede73c6465..0926e0895ee6 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c
> @@ -132,7 +132,7 @@ static int i915_gem_begin_cpu_access(struct dma_buf *dma_buf, enum dma_data_dire
>   	if (!err)
>   		err = i915_gem_object_pin_pages(obj);
>   	if (!err) {
> -		err = i915_gem_object_set_to_cpu_domain(obj, write);
> +		i915_gem_object_set_to_cpu_domain(obj, write);
>   		i915_gem_object_unpin_pages(obj);
>   	}
>   	if (err == -EDEADLK) {
> @@ -156,7 +156,7 @@ static int i915_gem_end_cpu_access(struct dma_buf *dma_buf, enum dma_data_direct
>   	if (!err)
>   		err = i915_gem_object_pin_pages(obj);
>   	if (!err) {
> -		err = i915_gem_object_set_to_gtt_domain(obj, false);
> +		i915_gem_object_set_to_gtt_domain(obj, false);
>   		i915_gem_object_unpin_pages(obj);
>   	}
>   	if (err == -EDEADLK) {
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_domain.c b/drivers/gpu/drm/i915/gem/i915_gem_domain.c
> index 073822100da7..39fda97c49a7 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_domain.c
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_domain.c
> @@ -49,7 +49,7 @@ flush_write_domain(struct drm_i915_gem_object *obj, unsigned int flush_domains)
>   		break;
>   
>   	case I915_GEM_DOMAIN_CPU:
> -		i915_gem_clflush_object(obj, I915_CLFLUSH_SYNC);
> +		i915_gem_clflush_object(obj, 0);
>   		break;
>   
>   	case I915_GEM_DOMAIN_RENDER:
> @@ -97,34 +97,13 @@ void i915_gem_object_flush_if_display_locked(struct drm_i915_gem_object *obj)
>    * This function returns when the move is complete, including waiting on
>    * flushes to occur.
>    */
> -int
> +void
>   i915_gem_object_set_to_wc_domain(struct drm_i915_gem_object *obj, bool write)
>   {
> -	int ret;
> -
>   	assert_object_held(obj);
>   
> -	ret = i915_gem_object_wait(obj,
> -				   I915_WAIT_INTERRUPTIBLE |
> -				   (write ? I915_WAIT_ALL : 0),
> -				   MAX_SCHEDULE_TIMEOUT);
> -	if (ret)
> -		return ret;
> -
>   	if (obj->write_domain == I915_GEM_DOMAIN_WC)
> -		return 0;
> -
> -	/* Flush and acquire obj->pages so that we are coherent through
> -	 * direct access in memory with previous cached writes through
> -	 * shmemfs and that our cache domain tracking remains valid.
> -	 * For example, if the obj->filp was moved to swap without us
> -	 * being notified and releasing the pages, we would mistakenly
> -	 * continue to assume that the obj remained out of the CPU cached
> -	 * domain.
> -	 */
> -	ret = i915_gem_object_pin_pages(obj);
> -	if (ret)
> -		return ret;
> +		return;
>   
>   	flush_write_domain(obj, ~I915_GEM_DOMAIN_WC);
>   
> @@ -145,9 +124,6 @@ i915_gem_object_set_to_wc_domain(struct drm_i915_gem_object *obj, bool write)
>   		obj->write_domain = I915_GEM_DOMAIN_WC;
>   		obj->mm.dirty = true;
>   	}
> -
> -	i915_gem_object_unpin_pages(obj);
> -	return 0;
>   }
>   
>   /**
> @@ -158,34 +134,13 @@ i915_gem_object_set_to_wc_domain(struct drm_i915_gem_object *obj, bool write)
>    * This function returns when the move is complete, including waiting on
>    * flushes to occur.
>    */
> -int
> +void
>   i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write)
>   {
> -	int ret;
> -
>   	assert_object_held(obj);
>   
> -	ret = i915_gem_object_wait(obj,
> -				   I915_WAIT_INTERRUPTIBLE |
> -				   (write ? I915_WAIT_ALL : 0),
> -				   MAX_SCHEDULE_TIMEOUT);
> -	if (ret)
> -		return ret;
> -
>   	if (obj->write_domain == I915_GEM_DOMAIN_GTT)
> -		return 0;
> -
> -	/* Flush and acquire obj->pages so that we are coherent through
> -	 * direct access in memory with previous cached writes through
> -	 * shmemfs and that our cache domain tracking remains valid.
> -	 * For example, if the obj->filp was moved to swap without us
> -	 * being notified and releasing the pages, we would mistakenly
> -	 * continue to assume that the obj remained out of the CPU cached
> -	 * domain.
> -	 */
> -	ret = i915_gem_object_pin_pages(obj);
> -	if (ret)
> -		return ret;
> +		return;
>   
>   	flush_write_domain(obj, ~I915_GEM_DOMAIN_GTT);
>   
> @@ -214,9 +169,6 @@ i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write)
>   				i915_vma_set_ggtt_write(vma);
>   		spin_unlock(&obj->vma.lock);
>   	}
> -
> -	i915_gem_object_unpin_pages(obj);
> -	return 0;
>   }
>   
>   /**
> @@ -431,25 +383,23 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
>    * This function returns when the move is complete, including waiting on
>    * flushes to occur.
>    */
> -int
> +void
>   i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write)
>   {
> -	int ret;
> -
>   	assert_object_held(obj);
>   
> -	ret = i915_gem_object_wait(obj,
> -				   I915_WAIT_INTERRUPTIBLE |
> -				   (write ? I915_WAIT_ALL : 0),
> -				   MAX_SCHEDULE_TIMEOUT);
> -	if (ret)
> -		return ret;
> -
>   	flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
>   
>   	/* Flush the CPU cache if it's still invalid. */
>   	if ((obj->read_domains & I915_GEM_DOMAIN_CPU) == 0) {
> -		i915_gem_clflush_object(obj, I915_CLFLUSH_SYNC);
> +		/*
> +		 * While we track when we write though the CPU cache
> +		 * (with obj->cache_dirty), this is only a guide as we do
> +		 * not know when the CPU may have speculatively populated
> +		 * the cache. We have to invalidate such speculative cachelines
> +		 * prior to reading writes by the GPU.
> +		 */
> +		i915_gem_clflush_object(obj, 0);
>   		obj->read_domains |= I915_GEM_DOMAIN_CPU;
>   	}
>   
> @@ -463,8 +413,6 @@ i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write)
>   	 */
>   	if (write)
>   		__start_cpu_write(obj);
> -
> -	return 0;
>   }
>   
>   /**
> @@ -502,32 +450,14 @@ i915_gem_set_domain_ioctl(struct drm_device *dev, void *data,
>   	if (!obj)
>   		return -ENOENT;
>   
> -	/*
> -	 * Try to flush the object off the GPU without holding the lock.
> -	 * We will repeat the flush holding the lock in the normal manner
> -	 * to catch cases where we are gazumped.
> -	 */
> -	err = i915_gem_object_wait(obj,
> -				   I915_WAIT_INTERRUPTIBLE |
> -				   I915_WAIT_PRIORITY |
> -				   (write_domain ? I915_WAIT_ALL : 0),
> -				   MAX_SCHEDULE_TIMEOUT);
> -	if (err)
> -		goto out;
> -
>   	if (i915_gem_object_is_userptr(obj)) {
>   		/*
>   		 * Try to grab userptr pages, iris uses set_domain to check
>   		 * userptr validity
>   		 */
>   		err = i915_gem_object_userptr_validate(obj);
> -		if (!err)
> -			err = i915_gem_object_wait(obj,
> -						   I915_WAIT_INTERRUPTIBLE |
> -						   I915_WAIT_PRIORITY |
> -						   (write_domain ? I915_WAIT_ALL : 0),
> -						   MAX_SCHEDULE_TIMEOUT);
> -		goto out;
> +		if (err)
> +			goto out;
>   	}
>   
>   	/*
> @@ -572,11 +502,11 @@ i915_gem_set_domain_ioctl(struct drm_device *dev, void *data,
>   		goto out_unpin;
>   
>   	if (read_domains & I915_GEM_DOMAIN_WC)
> -		err = i915_gem_object_set_to_wc_domain(obj, write_domain);
> +		i915_gem_object_set_to_wc_domain(obj, write_domain);
>   	else if (read_domains & I915_GEM_DOMAIN_GTT)
> -		err = i915_gem_object_set_to_gtt_domain(obj, write_domain);
> +		i915_gem_object_set_to_gtt_domain(obj, write_domain);
>   	else
> -		err = i915_gem_object_set_to_cpu_domain(obj, write_domain);
> +		i915_gem_object_set_to_cpu_domain(obj, write_domain);
>   
>   out_unpin:
>   	i915_gem_object_unpin_pages(obj);
> @@ -584,6 +514,11 @@ i915_gem_set_domain_ioctl(struct drm_device *dev, void *data,
>   out_unlock:
>   	i915_gem_object_unlock(obj);
>   
> +	err = i915_gem_object_wait(obj,
> +				   I915_WAIT_INTERRUPTIBLE |
> +				   I915_WAIT_PRIORITY |
> +				   (write_domain ? I915_WAIT_ALL : 0),
> +				   MAX_SCHEDULE_TIMEOUT);
>   	if (!err && write_domain)
>   		i915_gem_object_invalidate_frontbuffer(obj, ORIGIN_CPU);
>   
> @@ -608,26 +543,21 @@ int i915_gem_object_prepare_read(struct drm_i915_gem_object *obj,
>   
>   	assert_object_held(obj);
>   
> -	ret = i915_gem_object_wait(obj,
> -				   I915_WAIT_INTERRUPTIBLE,
> -				   MAX_SCHEDULE_TIMEOUT);
> -	if (ret)
> -		return ret;
> -
>   	ret = i915_gem_object_pin_pages(obj);
>   	if (ret)
>   		return ret;
>   
>   	if (obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ ||
> -	    !static_cpu_has(X86_FEATURE_CLFLUSH)) {
> -		ret = i915_gem_object_set_to_cpu_domain(obj, false);
> -		if (ret)
> -			goto err_unpin;
> -		else
> -			goto out;
> -	}
> +	    !static_cpu_has(X86_FEATURE_CLFLUSH))
> +		i915_gem_object_set_to_cpu_domain(obj, false);
> +	else
> +		flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
>   
> -	flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
> +	ret = i915_gem_object_wait(obj,
> +				   I915_WAIT_INTERRUPTIBLE,
> +				   MAX_SCHEDULE_TIMEOUT);
> +	if (ret)
> +		goto err_unpin;
>   
>   	/* If we're not in the cpu read domain, set ourself into the gtt
>   	 * read domain and manually flush cachelines (if required). This
> @@ -638,7 +568,6 @@ int i915_gem_object_prepare_read(struct drm_i915_gem_object *obj,
>   	    !(obj->read_domains & I915_GEM_DOMAIN_CPU))
>   		*needs_clflush = CLFLUSH_BEFORE;
>   
> -out:
>   	/* return with the pages pinned */
>   	return 0;
>   
> @@ -658,27 +587,22 @@ int i915_gem_object_prepare_write(struct drm_i915_gem_object *obj,
>   
>   	assert_object_held(obj);
>   
> -	ret = i915_gem_object_wait(obj,
> -				   I915_WAIT_INTERRUPTIBLE |
> -				   I915_WAIT_ALL,
> -				   MAX_SCHEDULE_TIMEOUT);
> -	if (ret)
> -		return ret;
> -
>   	ret = i915_gem_object_pin_pages(obj);
>   	if (ret)
>   		return ret;
>   
>   	if (obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE ||
> -	    !static_cpu_has(X86_FEATURE_CLFLUSH)) {
> -		ret = i915_gem_object_set_to_cpu_domain(obj, true);
> -		if (ret)
> -			goto err_unpin;
> -		else
> -			goto out;
> -	}
> +	    !static_cpu_has(X86_FEATURE_CLFLUSH))
> +		i915_gem_object_set_to_cpu_domain(obj, true);
> +	else
> +		flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
>   
> -	flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
> +	ret = i915_gem_object_wait(obj,
> +				   I915_WAIT_INTERRUPTIBLE |
> +				   I915_WAIT_ALL,
> +				   MAX_SCHEDULE_TIMEOUT);
> +	if (ret)
> +		goto err_unpin;
>   
>   	/* If we're not in the cpu write domain, set ourself into the
>   	 * gtt write domain and manually flush cachelines (as required).
> @@ -696,7 +620,6 @@ int i915_gem_object_prepare_write(struct drm_i915_gem_object *obj,
>   			*needs_clflush |= CLFLUSH_BEFORE;
>   	}
>   
> -out:
>   	i915_gem_object_invalidate_frontbuffer(obj, ORIGIN_CPU);
>   	obj->mm.dirty = true;
>   	/* return with the pages pinned */
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
> index 297143511f99..40fda9e81a78 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
> @@ -1212,9 +1212,7 @@ static void *reloc_iomap(struct drm_i915_gem_object *obj,
>   		if (use_cpu_reloc(cache, obj))
>   			return NULL;
>   
> -		err = i915_gem_object_set_to_gtt_domain(obj, true);
> -		if (err)
> -			return ERR_PTR(err);
> +		i915_gem_object_set_to_gtt_domain(obj, true);
>   
>   		vma = i915_gem_object_ggtt_pin_ww(obj, &eb->ww, NULL, 0, 0,
>   						  PIN_MAPPABLE |
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.h b/drivers/gpu/drm/i915/gem/i915_gem_object.h
> index 2ebd79537aea..8bbc835e70ce 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_object.h
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.h
> @@ -515,12 +515,12 @@ void i915_gem_object_set_cache_coherency(struct drm_i915_gem_object *obj,
>   void i915_gem_object_flush_if_display(struct drm_i915_gem_object *obj);
>   void i915_gem_object_flush_if_display_locked(struct drm_i915_gem_object *obj);
>   
> -int __must_check
> -i915_gem_object_set_to_wc_domain(struct drm_i915_gem_object *obj, bool write);
> -int __must_check
> -i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write);
> -int __must_check
> -i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write);
> +void i915_gem_object_set_to_wc_domain(struct drm_i915_gem_object *obj,
> +				      bool write);
> +void i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj,
> +				       bool write);
> +void i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj,
> +				       bool write);
>   struct i915_vma * __must_check
>   i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
>   				     struct i915_gem_ww_ctx *ww,
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
> index 0727d0c76aa0..b8f0413bc3b0 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
> @@ -188,6 +188,12 @@ struct drm_i915_gem_object {
>   	unsigned int cache_coherent:2;
>   #define I915_BO_CACHE_COHERENT_FOR_READ BIT(0)
>   #define I915_BO_CACHE_COHERENT_FOR_WRITE BIT(1)
> +	/*
> +	 * Note cache_dirty is only a guide; we know when we have written
> +	 * through the CPU cache, but we do not know when the CPU may have
> +	 * speculatively populated the cache. Before a read via the cache
> +	 * of GPU written memory, we have to cautiously invalidate the cache.
> +	 */
>   	unsigned int cache_dirty:1;
>   
>   	/**
> diff --git a/drivers/gpu/drm/i915/gem/selftests/huge_pages.c b/drivers/gpu/drm/i915/gem/selftests/huge_pages.c
> index 33dd4e2a1010..d85ca79ac433 100644
> --- a/drivers/gpu/drm/i915/gem/selftests/huge_pages.c
> +++ b/drivers/gpu/drm/i915/gem/selftests/huge_pages.c
> @@ -972,14 +972,6 @@ static int gpu_write(struct intel_context *ce,
>   		     u32 dw,
>   		     u32 val)
>   {
> -	int err;
> -
> -	i915_gem_object_lock(vma->obj, NULL);
> -	err = i915_gem_object_set_to_gtt_domain(vma->obj, true);
> -	i915_gem_object_unlock(vma->obj);
> -	if (err)
> -		return err;
> -
>   	return igt_gpu_fill_dw(ce, vma, dw * sizeof(u32),
>   			       vma->size >> PAGE_SHIFT, val);
>   }
> diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c
> index e937b6629019..77ba6d1ef4e4 100644
> --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c
> +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c
> @@ -90,8 +90,13 @@ static int gtt_set(struct context *ctx, unsigned long offset, u32 v)
>   	int err = 0;
>   
>   	i915_gem_object_lock(ctx->obj, NULL);
> -	err = i915_gem_object_set_to_gtt_domain(ctx->obj, true);
> +	i915_gem_object_set_to_gtt_domain(ctx->obj, true);
>   	i915_gem_object_unlock(ctx->obj);
> +
> +	err = i915_gem_object_wait(ctx->obj,
> +				   I915_WAIT_ALL |
> +				   I915_WAIT_INTERRUPTIBLE,
> +				   HZ / 2);
>   	if (err)
>   		return err;
>   
> @@ -123,8 +128,12 @@ static int gtt_get(struct context *ctx, unsigned long offset, u32 *v)
>   	int err = 0;
>   
>   	i915_gem_object_lock(ctx->obj, NULL);
> -	err = i915_gem_object_set_to_gtt_domain(ctx->obj, false);
> +	i915_gem_object_set_to_gtt_domain(ctx->obj, false);
>   	i915_gem_object_unlock(ctx->obj);
> +
> +	err = i915_gem_object_wait(ctx->obj,
> +				   I915_WAIT_INTERRUPTIBLE,
> +				   HZ / 2);
>   	if (err)
>   		return err;
>   
> @@ -155,8 +164,13 @@ static int wc_set(struct context *ctx, unsigned long offset, u32 v)
>   	int err;
>   
>   	i915_gem_object_lock(ctx->obj, NULL);
> -	err = i915_gem_object_set_to_wc_domain(ctx->obj, true);
> +	i915_gem_object_set_to_wc_domain(ctx->obj, true);
>   	i915_gem_object_unlock(ctx->obj);
> +
> +	err = i915_gem_object_wait(ctx->obj,
> +				   I915_WAIT_ALL |
> +				   I915_WAIT_INTERRUPTIBLE,
> +				   HZ / 2);
>   	if (err)
>   		return err;
>   
> @@ -178,8 +192,12 @@ static int wc_get(struct context *ctx, unsigned long offset, u32 *v)
>   	int err;
>   
>   	i915_gem_object_lock(ctx->obj, NULL);
> -	err = i915_gem_object_set_to_wc_domain(ctx->obj, false);
> +	i915_gem_object_set_to_wc_domain(ctx->obj, false);
>   	i915_gem_object_unlock(ctx->obj);
> +
> +	err = i915_gem_object_wait(ctx->obj,
> +				   I915_WAIT_INTERRUPTIBLE,
> +				   HZ / 2);
>   	if (err)
>   		return err;
>   
> @@ -205,9 +223,7 @@ static int gpu_set(struct context *ctx, unsigned long offset, u32 v)
>   		return PTR_ERR(vma);
>   
>   	i915_gem_object_lock(ctx->obj, NULL);
> -	err = i915_gem_object_set_to_gtt_domain(ctx->obj, true);
> -	if (err)
> -		goto out_unlock;
> +	i915_gem_object_set_to_gtt_domain(ctx->obj, false);

IIRC Daniel pointed out that this looks odd, since this now becomes 
write=false for some reason. I think keep this as write=true, since it 
does look like that is what gpu_set wants.

WARNING: multiple messages have this Message-ID (diff)
From: Matthew Auld <matthew.auld@intel.com>
To: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com>,
	Intel-gfx@lists.freedesktop.org
Cc: dri-devel@lists.freedesktop.org, Chris Wilson <chris@chris-wilson.co.uk>
Subject: Re: [Intel-gfx] [PATCH 12/12] drm/i915/gem: Manage all set-domain waits explicitly
Date: Wed, 26 May 2021 15:30:57 +0100	[thread overview]
Message-ID: <5aaf24d3-9171-ee1f-5b89-fd1a501afc01@intel.com> (raw)
In-Reply-To: <20210526141456.2334192-13-tvrtko.ursulin@linux.intel.com>

On 26/05/2021 15:14, Tvrtko Ursulin wrote:
> From: Chris Wilson <chris@chris-wilson.co.uk>
> 
> Only perform the domain transition under the object lock, and push the
> required waits to outside the lock.
> 
> v2 (Tvrtko):
>   * Rebase.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Reviewed-by: Matthew Auld <matthew.auld@intel.com> # v1
> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> ---
>   drivers/gpu/drm/i915/gem/i915_gem_clflush.c   |   9 +-
>   drivers/gpu/drm/i915/gem/i915_gem_clflush.h   |   2 -
>   drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c    |   4 +-
>   drivers/gpu/drm/i915/gem/i915_gem_domain.c    | 163 +++++-------------
>   .../gpu/drm/i915/gem/i915_gem_execbuffer.c    |   4 +-
>   drivers/gpu/drm/i915/gem/i915_gem_object.h    |  12 +-
>   .../gpu/drm/i915/gem/i915_gem_object_types.h  |   6 +
>   .../gpu/drm/i915/gem/selftests/huge_pages.c   |   8 -
>   .../i915/gem/selftests/i915_gem_coherency.c   |  31 +++-
>   .../drm/i915/gem/selftests/i915_gem_phys.c    |   8 +-
>   .../drm/i915/gem/selftests/igt_gem_utils.c    |   3 +
>   drivers/gpu/drm/i915/i915_gem.c               |   4 +-
>   12 files changed, 89 insertions(+), 165 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_clflush.c b/drivers/gpu/drm/i915/gem/i915_gem_clflush.c
> index daf9284ef1f5..e4c24558eaa8 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_clflush.c
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_clflush.c
> @@ -51,8 +51,6 @@ static struct clflush *clflush_work_create(struct drm_i915_gem_object *obj)
>   {
>   	struct clflush *clflush;
>   
> -	GEM_BUG_ON(!obj->cache_dirty);
> -
>   	clflush = kmalloc(sizeof(*clflush), GFP_KERNEL);
>   	if (!clflush)
>   		return NULL;
> @@ -101,13 +99,10 @@ bool i915_gem_clflush_object(struct drm_i915_gem_object *obj,
>   
>   	trace_i915_gem_object_clflush(obj);
>   
> -	clflush = NULL;
> -	if (!(flags & I915_CLFLUSH_SYNC))
> -		clflush = clflush_work_create(obj);
> +	clflush = clflush_work_create(obj);
>   	if (clflush) {
>   		i915_sw_fence_await_reservation(&clflush->base.chain,
> -						obj->base.resv, NULL, true,
> -						i915_fence_timeout(to_i915(obj->base.dev)),
> +						obj->base.resv, NULL, true, 0,
>   						I915_FENCE_GFP);
>   		dma_resv_add_excl_fence(obj->base.resv, &clflush->base.dma);
>   		dma_fence_work_commit(&clflush->base);
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_clflush.h b/drivers/gpu/drm/i915/gem/i915_gem_clflush.h
> index e6c382973129..4cd5787d1507 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_clflush.h
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_clflush.h
> @@ -9,12 +9,10 @@
>   
>   #include <linux/types.h>
>   
> -struct drm_i915_private;
>   struct drm_i915_gem_object;
>   
>   bool i915_gem_clflush_object(struct drm_i915_gem_object *obj,
>   			     unsigned int flags);
>   #define I915_CLFLUSH_FORCE BIT(0)
> -#define I915_CLFLUSH_SYNC BIT(1)
>   
>   #endif /* __I915_GEM_CLFLUSH_H__ */
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c b/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c
> index ccede73c6465..0926e0895ee6 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c
> @@ -132,7 +132,7 @@ static int i915_gem_begin_cpu_access(struct dma_buf *dma_buf, enum dma_data_dire
>   	if (!err)
>   		err = i915_gem_object_pin_pages(obj);
>   	if (!err) {
> -		err = i915_gem_object_set_to_cpu_domain(obj, write);
> +		i915_gem_object_set_to_cpu_domain(obj, write);
>   		i915_gem_object_unpin_pages(obj);
>   	}
>   	if (err == -EDEADLK) {
> @@ -156,7 +156,7 @@ static int i915_gem_end_cpu_access(struct dma_buf *dma_buf, enum dma_data_direct
>   	if (!err)
>   		err = i915_gem_object_pin_pages(obj);
>   	if (!err) {
> -		err = i915_gem_object_set_to_gtt_domain(obj, false);
> +		i915_gem_object_set_to_gtt_domain(obj, false);
>   		i915_gem_object_unpin_pages(obj);
>   	}
>   	if (err == -EDEADLK) {
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_domain.c b/drivers/gpu/drm/i915/gem/i915_gem_domain.c
> index 073822100da7..39fda97c49a7 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_domain.c
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_domain.c
> @@ -49,7 +49,7 @@ flush_write_domain(struct drm_i915_gem_object *obj, unsigned int flush_domains)
>   		break;
>   
>   	case I915_GEM_DOMAIN_CPU:
> -		i915_gem_clflush_object(obj, I915_CLFLUSH_SYNC);
> +		i915_gem_clflush_object(obj, 0);
>   		break;
>   
>   	case I915_GEM_DOMAIN_RENDER:
> @@ -97,34 +97,13 @@ void i915_gem_object_flush_if_display_locked(struct drm_i915_gem_object *obj)
>    * This function returns when the move is complete, including waiting on
>    * flushes to occur.
>    */
> -int
> +void
>   i915_gem_object_set_to_wc_domain(struct drm_i915_gem_object *obj, bool write)
>   {
> -	int ret;
> -
>   	assert_object_held(obj);
>   
> -	ret = i915_gem_object_wait(obj,
> -				   I915_WAIT_INTERRUPTIBLE |
> -				   (write ? I915_WAIT_ALL : 0),
> -				   MAX_SCHEDULE_TIMEOUT);
> -	if (ret)
> -		return ret;
> -
>   	if (obj->write_domain == I915_GEM_DOMAIN_WC)
> -		return 0;
> -
> -	/* Flush and acquire obj->pages so that we are coherent through
> -	 * direct access in memory with previous cached writes through
> -	 * shmemfs and that our cache domain tracking remains valid.
> -	 * For example, if the obj->filp was moved to swap without us
> -	 * being notified and releasing the pages, we would mistakenly
> -	 * continue to assume that the obj remained out of the CPU cached
> -	 * domain.
> -	 */
> -	ret = i915_gem_object_pin_pages(obj);
> -	if (ret)
> -		return ret;
> +		return;
>   
>   	flush_write_domain(obj, ~I915_GEM_DOMAIN_WC);
>   
> @@ -145,9 +124,6 @@ i915_gem_object_set_to_wc_domain(struct drm_i915_gem_object *obj, bool write)
>   		obj->write_domain = I915_GEM_DOMAIN_WC;
>   		obj->mm.dirty = true;
>   	}
> -
> -	i915_gem_object_unpin_pages(obj);
> -	return 0;
>   }
>   
>   /**
> @@ -158,34 +134,13 @@ i915_gem_object_set_to_wc_domain(struct drm_i915_gem_object *obj, bool write)
>    * This function returns when the move is complete, including waiting on
>    * flushes to occur.
>    */
> -int
> +void
>   i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write)
>   {
> -	int ret;
> -
>   	assert_object_held(obj);
>   
> -	ret = i915_gem_object_wait(obj,
> -				   I915_WAIT_INTERRUPTIBLE |
> -				   (write ? I915_WAIT_ALL : 0),
> -				   MAX_SCHEDULE_TIMEOUT);
> -	if (ret)
> -		return ret;
> -
>   	if (obj->write_domain == I915_GEM_DOMAIN_GTT)
> -		return 0;
> -
> -	/* Flush and acquire obj->pages so that we are coherent through
> -	 * direct access in memory with previous cached writes through
> -	 * shmemfs and that our cache domain tracking remains valid.
> -	 * For example, if the obj->filp was moved to swap without us
> -	 * being notified and releasing the pages, we would mistakenly
> -	 * continue to assume that the obj remained out of the CPU cached
> -	 * domain.
> -	 */
> -	ret = i915_gem_object_pin_pages(obj);
> -	if (ret)
> -		return ret;
> +		return;
>   
>   	flush_write_domain(obj, ~I915_GEM_DOMAIN_GTT);
>   
> @@ -214,9 +169,6 @@ i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write)
>   				i915_vma_set_ggtt_write(vma);
>   		spin_unlock(&obj->vma.lock);
>   	}
> -
> -	i915_gem_object_unpin_pages(obj);
> -	return 0;
>   }
>   
>   /**
> @@ -431,25 +383,23 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
>    * This function returns when the move is complete, including waiting on
>    * flushes to occur.
>    */
> -int
> +void
>   i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write)
>   {
> -	int ret;
> -
>   	assert_object_held(obj);
>   
> -	ret = i915_gem_object_wait(obj,
> -				   I915_WAIT_INTERRUPTIBLE |
> -				   (write ? I915_WAIT_ALL : 0),
> -				   MAX_SCHEDULE_TIMEOUT);
> -	if (ret)
> -		return ret;
> -
>   	flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
>   
>   	/* Flush the CPU cache if it's still invalid. */
>   	if ((obj->read_domains & I915_GEM_DOMAIN_CPU) == 0) {
> -		i915_gem_clflush_object(obj, I915_CLFLUSH_SYNC);
> +		/*
> +		 * While we track when we write though the CPU cache
> +		 * (with obj->cache_dirty), this is only a guide as we do
> +		 * not know when the CPU may have speculatively populated
> +		 * the cache. We have to invalidate such speculative cachelines
> +		 * prior to reading writes by the GPU.
> +		 */
> +		i915_gem_clflush_object(obj, 0);
>   		obj->read_domains |= I915_GEM_DOMAIN_CPU;
>   	}
>   
> @@ -463,8 +413,6 @@ i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write)
>   	 */
>   	if (write)
>   		__start_cpu_write(obj);
> -
> -	return 0;
>   }
>   
>   /**
> @@ -502,32 +450,14 @@ i915_gem_set_domain_ioctl(struct drm_device *dev, void *data,
>   	if (!obj)
>   		return -ENOENT;
>   
> -	/*
> -	 * Try to flush the object off the GPU without holding the lock.
> -	 * We will repeat the flush holding the lock in the normal manner
> -	 * to catch cases where we are gazumped.
> -	 */
> -	err = i915_gem_object_wait(obj,
> -				   I915_WAIT_INTERRUPTIBLE |
> -				   I915_WAIT_PRIORITY |
> -				   (write_domain ? I915_WAIT_ALL : 0),
> -				   MAX_SCHEDULE_TIMEOUT);
> -	if (err)
> -		goto out;
> -
>   	if (i915_gem_object_is_userptr(obj)) {
>   		/*
>   		 * Try to grab userptr pages, iris uses set_domain to check
>   		 * userptr validity
>   		 */
>   		err = i915_gem_object_userptr_validate(obj);
> -		if (!err)
> -			err = i915_gem_object_wait(obj,
> -						   I915_WAIT_INTERRUPTIBLE |
> -						   I915_WAIT_PRIORITY |
> -						   (write_domain ? I915_WAIT_ALL : 0),
> -						   MAX_SCHEDULE_TIMEOUT);
> -		goto out;
> +		if (err)
> +			goto out;
>   	}
>   
>   	/*
> @@ -572,11 +502,11 @@ i915_gem_set_domain_ioctl(struct drm_device *dev, void *data,
>   		goto out_unpin;
>   
>   	if (read_domains & I915_GEM_DOMAIN_WC)
> -		err = i915_gem_object_set_to_wc_domain(obj, write_domain);
> +		i915_gem_object_set_to_wc_domain(obj, write_domain);
>   	else if (read_domains & I915_GEM_DOMAIN_GTT)
> -		err = i915_gem_object_set_to_gtt_domain(obj, write_domain);
> +		i915_gem_object_set_to_gtt_domain(obj, write_domain);
>   	else
> -		err = i915_gem_object_set_to_cpu_domain(obj, write_domain);
> +		i915_gem_object_set_to_cpu_domain(obj, write_domain);
>   
>   out_unpin:
>   	i915_gem_object_unpin_pages(obj);
> @@ -584,6 +514,11 @@ i915_gem_set_domain_ioctl(struct drm_device *dev, void *data,
>   out_unlock:
>   	i915_gem_object_unlock(obj);
>   
> +	err = i915_gem_object_wait(obj,
> +				   I915_WAIT_INTERRUPTIBLE |
> +				   I915_WAIT_PRIORITY |
> +				   (write_domain ? I915_WAIT_ALL : 0),
> +				   MAX_SCHEDULE_TIMEOUT);
>   	if (!err && write_domain)
>   		i915_gem_object_invalidate_frontbuffer(obj, ORIGIN_CPU);
>   
> @@ -608,26 +543,21 @@ int i915_gem_object_prepare_read(struct drm_i915_gem_object *obj,
>   
>   	assert_object_held(obj);
>   
> -	ret = i915_gem_object_wait(obj,
> -				   I915_WAIT_INTERRUPTIBLE,
> -				   MAX_SCHEDULE_TIMEOUT);
> -	if (ret)
> -		return ret;
> -
>   	ret = i915_gem_object_pin_pages(obj);
>   	if (ret)
>   		return ret;
>   
>   	if (obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ ||
> -	    !static_cpu_has(X86_FEATURE_CLFLUSH)) {
> -		ret = i915_gem_object_set_to_cpu_domain(obj, false);
> -		if (ret)
> -			goto err_unpin;
> -		else
> -			goto out;
> -	}
> +	    !static_cpu_has(X86_FEATURE_CLFLUSH))
> +		i915_gem_object_set_to_cpu_domain(obj, false);
> +	else
> +		flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
>   
> -	flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
> +	ret = i915_gem_object_wait(obj,
> +				   I915_WAIT_INTERRUPTIBLE,
> +				   MAX_SCHEDULE_TIMEOUT);
> +	if (ret)
> +		goto err_unpin;
>   
>   	/* If we're not in the cpu read domain, set ourself into the gtt
>   	 * read domain and manually flush cachelines (if required). This
> @@ -638,7 +568,6 @@ int i915_gem_object_prepare_read(struct drm_i915_gem_object *obj,
>   	    !(obj->read_domains & I915_GEM_DOMAIN_CPU))
>   		*needs_clflush = CLFLUSH_BEFORE;
>   
> -out:
>   	/* return with the pages pinned */
>   	return 0;
>   
> @@ -658,27 +587,22 @@ int i915_gem_object_prepare_write(struct drm_i915_gem_object *obj,
>   
>   	assert_object_held(obj);
>   
> -	ret = i915_gem_object_wait(obj,
> -				   I915_WAIT_INTERRUPTIBLE |
> -				   I915_WAIT_ALL,
> -				   MAX_SCHEDULE_TIMEOUT);
> -	if (ret)
> -		return ret;
> -
>   	ret = i915_gem_object_pin_pages(obj);
>   	if (ret)
>   		return ret;
>   
>   	if (obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE ||
> -	    !static_cpu_has(X86_FEATURE_CLFLUSH)) {
> -		ret = i915_gem_object_set_to_cpu_domain(obj, true);
> -		if (ret)
> -			goto err_unpin;
> -		else
> -			goto out;
> -	}
> +	    !static_cpu_has(X86_FEATURE_CLFLUSH))
> +		i915_gem_object_set_to_cpu_domain(obj, true);
> +	else
> +		flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
>   
> -	flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
> +	ret = i915_gem_object_wait(obj,
> +				   I915_WAIT_INTERRUPTIBLE |
> +				   I915_WAIT_ALL,
> +				   MAX_SCHEDULE_TIMEOUT);
> +	if (ret)
> +		goto err_unpin;
>   
>   	/* If we're not in the cpu write domain, set ourself into the
>   	 * gtt write domain and manually flush cachelines (as required).
> @@ -696,7 +620,6 @@ int i915_gem_object_prepare_write(struct drm_i915_gem_object *obj,
>   			*needs_clflush |= CLFLUSH_BEFORE;
>   	}
>   
> -out:
>   	i915_gem_object_invalidate_frontbuffer(obj, ORIGIN_CPU);
>   	obj->mm.dirty = true;
>   	/* return with the pages pinned */
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
> index 297143511f99..40fda9e81a78 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
> @@ -1212,9 +1212,7 @@ static void *reloc_iomap(struct drm_i915_gem_object *obj,
>   		if (use_cpu_reloc(cache, obj))
>   			return NULL;
>   
> -		err = i915_gem_object_set_to_gtt_domain(obj, true);
> -		if (err)
> -			return ERR_PTR(err);
> +		i915_gem_object_set_to_gtt_domain(obj, true);
>   
>   		vma = i915_gem_object_ggtt_pin_ww(obj, &eb->ww, NULL, 0, 0,
>   						  PIN_MAPPABLE |
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.h b/drivers/gpu/drm/i915/gem/i915_gem_object.h
> index 2ebd79537aea..8bbc835e70ce 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_object.h
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.h
> @@ -515,12 +515,12 @@ void i915_gem_object_set_cache_coherency(struct drm_i915_gem_object *obj,
>   void i915_gem_object_flush_if_display(struct drm_i915_gem_object *obj);
>   void i915_gem_object_flush_if_display_locked(struct drm_i915_gem_object *obj);
>   
> -int __must_check
> -i915_gem_object_set_to_wc_domain(struct drm_i915_gem_object *obj, bool write);
> -int __must_check
> -i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write);
> -int __must_check
> -i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write);
> +void i915_gem_object_set_to_wc_domain(struct drm_i915_gem_object *obj,
> +				      bool write);
> +void i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj,
> +				       bool write);
> +void i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj,
> +				       bool write);
>   struct i915_vma * __must_check
>   i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
>   				     struct i915_gem_ww_ctx *ww,
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
> index 0727d0c76aa0..b8f0413bc3b0 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
> @@ -188,6 +188,12 @@ struct drm_i915_gem_object {
>   	unsigned int cache_coherent:2;
>   #define I915_BO_CACHE_COHERENT_FOR_READ BIT(0)
>   #define I915_BO_CACHE_COHERENT_FOR_WRITE BIT(1)
> +	/*
> +	 * Note cache_dirty is only a guide; we know when we have written
> +	 * through the CPU cache, but we do not know when the CPU may have
> +	 * speculatively populated the cache. Before a read via the cache
> +	 * of GPU written memory, we have to cautiously invalidate the cache.
> +	 */
>   	unsigned int cache_dirty:1;
>   
>   	/**
> diff --git a/drivers/gpu/drm/i915/gem/selftests/huge_pages.c b/drivers/gpu/drm/i915/gem/selftests/huge_pages.c
> index 33dd4e2a1010..d85ca79ac433 100644
> --- a/drivers/gpu/drm/i915/gem/selftests/huge_pages.c
> +++ b/drivers/gpu/drm/i915/gem/selftests/huge_pages.c
> @@ -972,14 +972,6 @@ static int gpu_write(struct intel_context *ce,
>   		     u32 dw,
>   		     u32 val)
>   {
> -	int err;
> -
> -	i915_gem_object_lock(vma->obj, NULL);
> -	err = i915_gem_object_set_to_gtt_domain(vma->obj, true);
> -	i915_gem_object_unlock(vma->obj);
> -	if (err)
> -		return err;
> -
>   	return igt_gpu_fill_dw(ce, vma, dw * sizeof(u32),
>   			       vma->size >> PAGE_SHIFT, val);
>   }
> diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c
> index e937b6629019..77ba6d1ef4e4 100644
> --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c
> +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c
> @@ -90,8 +90,13 @@ static int gtt_set(struct context *ctx, unsigned long offset, u32 v)
>   	int err = 0;
>   
>   	i915_gem_object_lock(ctx->obj, NULL);
> -	err = i915_gem_object_set_to_gtt_domain(ctx->obj, true);
> +	i915_gem_object_set_to_gtt_domain(ctx->obj, true);
>   	i915_gem_object_unlock(ctx->obj);
> +
> +	err = i915_gem_object_wait(ctx->obj,
> +				   I915_WAIT_ALL |
> +				   I915_WAIT_INTERRUPTIBLE,
> +				   HZ / 2);
>   	if (err)
>   		return err;
>   
> @@ -123,8 +128,12 @@ static int gtt_get(struct context *ctx, unsigned long offset, u32 *v)
>   	int err = 0;
>   
>   	i915_gem_object_lock(ctx->obj, NULL);
> -	err = i915_gem_object_set_to_gtt_domain(ctx->obj, false);
> +	i915_gem_object_set_to_gtt_domain(ctx->obj, false);
>   	i915_gem_object_unlock(ctx->obj);
> +
> +	err = i915_gem_object_wait(ctx->obj,
> +				   I915_WAIT_INTERRUPTIBLE,
> +				   HZ / 2);
>   	if (err)
>   		return err;
>   
> @@ -155,8 +164,13 @@ static int wc_set(struct context *ctx, unsigned long offset, u32 v)
>   	int err;
>   
>   	i915_gem_object_lock(ctx->obj, NULL);
> -	err = i915_gem_object_set_to_wc_domain(ctx->obj, true);
> +	i915_gem_object_set_to_wc_domain(ctx->obj, true);
>   	i915_gem_object_unlock(ctx->obj);
> +
> +	err = i915_gem_object_wait(ctx->obj,
> +				   I915_WAIT_ALL |
> +				   I915_WAIT_INTERRUPTIBLE,
> +				   HZ / 2);
>   	if (err)
>   		return err;
>   
> @@ -178,8 +192,12 @@ static int wc_get(struct context *ctx, unsigned long offset, u32 *v)
>   	int err;
>   
>   	i915_gem_object_lock(ctx->obj, NULL);
> -	err = i915_gem_object_set_to_wc_domain(ctx->obj, false);
> +	i915_gem_object_set_to_wc_domain(ctx->obj, false);
>   	i915_gem_object_unlock(ctx->obj);
> +
> +	err = i915_gem_object_wait(ctx->obj,
> +				   I915_WAIT_INTERRUPTIBLE,
> +				   HZ / 2);
>   	if (err)
>   		return err;
>   
> @@ -205,9 +223,7 @@ static int gpu_set(struct context *ctx, unsigned long offset, u32 v)
>   		return PTR_ERR(vma);
>   
>   	i915_gem_object_lock(ctx->obj, NULL);
> -	err = i915_gem_object_set_to_gtt_domain(ctx->obj, true);
> -	if (err)
> -		goto out_unlock;
> +	i915_gem_object_set_to_gtt_domain(ctx->obj, false);

IIRC Daniel pointed out that this looks odd, since this now becomes 
write=false for some reason. I think keep this as write=true, since it 
does look like that is what gpu_set wants.
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

  reply	other threads:[~2021-05-26 14:31 UTC|newest]

Thread overview: 38+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-05-26 14:14 [PATCH 00/12] Catchup with a few dropped patches Tvrtko Ursulin
2021-05-26 14:14 ` [Intel-gfx] " Tvrtko Ursulin
2021-05-26 14:14 ` [PATCH 01/12] drm/i915: Take rcu_read_lock for querying fence's driver/timeline names Tvrtko Ursulin
2021-05-26 14:14   ` [Intel-gfx] " Tvrtko Ursulin
2021-05-27 10:46   ` Daniel Vetter
2021-05-27 10:46     ` Daniel Vetter
2021-05-26 14:14 ` [PATCH 02/12] drm/i915: Remove notion of GEM from i915_gem_shrinker_taints_mutex Tvrtko Ursulin
2021-05-26 14:14   ` [Intel-gfx] " Tvrtko Ursulin
2021-05-26 14:14 ` [PATCH 03/12] drm/i915: Lift marking a lock as used to utils Tvrtko Ursulin
2021-05-26 14:14   ` [Intel-gfx] " Tvrtko Ursulin
2021-05-26 14:14 ` [PATCH 04/12] drm/i915: Wrap cmpxchg64 with try_cmpxchg64() helper Tvrtko Ursulin
2021-05-26 14:14   ` [Intel-gfx] " Tvrtko Ursulin
2021-05-26 14:14 ` [PATCH 05/12] drm/i915/selftests: Set cache status for huge_gem_object Tvrtko Ursulin
2021-05-26 14:14   ` [Intel-gfx] " Tvrtko Ursulin
2021-05-26 14:14 ` [PATCH 06/12] drm/i915/selftests: Use a coherent map to setup scratch batch buffers Tvrtko Ursulin
2021-05-26 14:14   ` [Intel-gfx] " Tvrtko Ursulin
2021-05-26 14:14 ` [PATCH 07/12] drm/i915/selftests: Replace the unbounded set-domain with an explicit wait Tvrtko Ursulin
2021-05-26 14:14   ` [Intel-gfx] " Tvrtko Ursulin
2021-05-26 14:14 ` [PATCH 08/12] drm/i915/selftests: Remove redundant set-to-gtt-domain Tvrtko Ursulin
2021-05-26 14:14   ` [Intel-gfx] " Tvrtko Ursulin
2021-05-26 14:14 ` [PATCH 09/12] drm/i915/selftests: Replace unbound set-domain waits with explicit timeouts Tvrtko Ursulin
2021-05-26 14:14   ` [Intel-gfx] " Tvrtko Ursulin
2021-05-26 14:14 ` [PATCH 10/12] drm/i915/selftests: Replace an unbounded set-domain wait with a timeout Tvrtko Ursulin
2021-05-26 14:14   ` [Intel-gfx] " Tvrtko Ursulin
2021-05-26 14:14 ` [PATCH 11/12] drm/i915/selftests: Remove redundant set-to-gtt-domain before batch submission Tvrtko Ursulin
2021-05-26 14:14   ` [Intel-gfx] " Tvrtko Ursulin
2021-05-26 14:14 ` [PATCH 12/12] drm/i915/gem: Manage all set-domain waits explicitly Tvrtko Ursulin
2021-05-26 14:14   ` [Intel-gfx] " Tvrtko Ursulin
2021-05-26 14:30   ` Matthew Auld [this message]
2021-05-26 14:30     ` Matthew Auld
2021-05-26 14:37     ` [PATCH v2 " Tvrtko Ursulin
2021-05-26 14:37       ` [Intel-gfx] " Tvrtko Ursulin
2021-05-27 10:44     ` [PATCH " Daniel Vetter
2021-05-27 10:44       ` [Intel-gfx] " Daniel Vetter
2021-05-27 13:33   ` kernel test robot
2021-05-26 20:19 ` [Intel-gfx] ✗ Fi.CI.CHECKPATCH: warning for Catchup with a few dropped patches (rev2) Patchwork
2021-05-26 20:21 ` [Intel-gfx] ✗ Fi.CI.SPARSE: " Patchwork
2021-05-26 21:00 ` [Intel-gfx] ✗ Fi.CI.BAT: failure " Patchwork

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=5aaf24d3-9171-ee1f-5b89-fd1a501afc01@intel.com \
    --to=matthew.auld@intel.com \
    --cc=Intel-gfx@lists.freedesktop.org \
    --cc=chris@chris-wilson.co.uk \
    --cc=dri-devel@lists.freedesktop.org \
    --cc=tvrtko.ursulin@intel.com \
    --cc=tvrtko.ursulin@linux.intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.