All of lore.kernel.org
 help / color / mirror / Atom feed
From: Daniel Vetter <daniel@ffwll.ch>
To: Chris Wilson <chris@chris-wilson.co.uk>
Cc: intel-gfx@lists.freedesktop.org
Subject: Re: [PATCH 06/38] drm/i915: Pad GTT views of exec objects up to user specified size
Date: Wed, 8 Jun 2016 11:41:01 +0200	[thread overview]
Message-ID: <20160608094101.GT3363@phenom.ffwll.local> (raw)
In-Reply-To: <1464972953-2726-7-git-send-email-chris@chris-wilson.co.uk>

On Fri, Jun 03, 2016 at 05:55:21PM +0100, Chris Wilson wrote:
> Our GPUs impose certain requirements upon buffers that depend upon how
> exactly they are used. Typically this is expressed as that they require
> a larger surface than would be naively computed by pitch * height.
> Normally such requirements are hidden away in the userspace driver, but
> when we accept pointers from strangers and later impose extra conditions
> on them, the original client allocator has no idea about the
> monstrosities in the GPU and we require the userspace driver to inform
> the kernel how many padding pages are required beyond the client
> allocation.
> 
> v2: Long time, no see
> v3: Try an anonymous union for uapi struct compatability
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Hm, where's the userspace for this? Commit message should elaborate imo a
bit more on what's going on here ...
-Daniel

> ---
>  drivers/gpu/drm/i915/i915_drv.h            |  6 ++-
>  drivers/gpu/drm/i915/i915_gem.c            | 82 +++++++++++++++---------------
>  drivers/gpu/drm/i915/i915_gem_execbuffer.c | 16 +++++-
>  include/uapi/drm/i915_drm.h                |  8 ++-
>  4 files changed, 65 insertions(+), 47 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index a065325580d8..9520adba33f6 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -2945,11 +2945,13 @@ void i915_gem_free_object(struct drm_gem_object *obj);
>  int __must_check
>  i915_gem_object_pin(struct drm_i915_gem_object *obj,
>  		    struct i915_address_space *vm,
> +		    uint64_t size,
>  		    uint32_t alignment,
>  		    uint64_t flags);
>  int __must_check
>  i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj,
>  			 const struct i915_ggtt_view *view,
> +			 uint64_t size,
>  			 uint32_t alignment,
>  			 uint64_t flags);
>  
> @@ -3209,8 +3211,8 @@ i915_gem_obj_ggtt_pin(struct drm_i915_gem_object *obj,
>  	struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
>  	struct i915_ggtt *ggtt = &dev_priv->ggtt;
>  
> -	return i915_gem_object_pin(obj, &ggtt->base,
> -				   alignment, flags | PIN_GLOBAL);
> +	return i915_gem_object_pin(obj, &ggtt->base, 0, alignment,
> +				   flags | PIN_GLOBAL);
>  }
>  
>  void i915_gem_object_ggtt_unpin_view(struct drm_i915_gem_object *obj,
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index 19b8d2ea7698..0f0101300b2b 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -1438,7 +1438,7 @@ int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
>  	}
>  
>  	/* Now pin it into the GTT if needed */
> -	ret = i915_gem_object_ggtt_pin(obj, &view, 0, PIN_MAPPABLE);
> +	ret = i915_gem_object_ggtt_pin(obj, &view, 0, 0, PIN_MAPPABLE);
>  	if (ret)
>  		goto unlock;
>  
> @@ -2678,21 +2678,20 @@ static struct i915_vma *
>  i915_gem_object_bind_to_vm(struct drm_i915_gem_object *obj,
>  			   struct i915_address_space *vm,
>  			   const struct i915_ggtt_view *ggtt_view,
> +			   uint64_t size,
>  			   unsigned alignment,
>  			   uint64_t flags)
>  {
>  	struct drm_device *dev = obj->base.dev;
>  	struct drm_i915_private *dev_priv = to_i915(dev);
> -	struct i915_ggtt *ggtt = &dev_priv->ggtt;
> -	u32 fence_alignment, unfenced_alignment;
> -	u32 search_flag, alloc_flag;
>  	u64 start, end;
> -	u64 size, fence_size;
> +	u32 search_flag, alloc_flag;
>  	struct i915_vma *vma;
>  	int ret;
>  
>  	if (i915_is_ggtt(vm)) {
> -		u32 view_size;
> +		u32 fence_size, fence_alignment, unfenced_alignment;
> +		u64 view_size;
>  
>  		if (WARN_ON(!ggtt_view))
>  			return ERR_PTR(-EINVAL);
> @@ -2710,48 +2709,39 @@ i915_gem_object_bind_to_vm(struct drm_i915_gem_object *obj,
>  								view_size,
>  								obj->tiling_mode,
>  								false);
> -		size = flags & PIN_MAPPABLE ? fence_size : view_size;
> +		size = max(size, view_size);
> +		if (flags & PIN_MAPPABLE)
> +			size = max_t(u64, size, fence_size);
> +
> +		if (alignment == 0)
> +			alignment = flags & PIN_MAPPABLE ? fence_alignment :
> +				unfenced_alignment;
> +		if (flags & PIN_MAPPABLE && alignment & (fence_alignment - 1)) {
> +			DRM_DEBUG("Invalid object (view type=%u) alignment requested %u\n",
> +				  ggtt_view ? ggtt_view->type : 0,
> +				  alignment);
> +			return ERR_PTR(-EINVAL);
> +		}
>  	} else {
> -		fence_size = i915_gem_get_gtt_size(dev,
> -						   obj->base.size,
> -						   obj->tiling_mode);
> -		fence_alignment = i915_gem_get_gtt_alignment(dev,
> -							     obj->base.size,
> -							     obj->tiling_mode,
> -							     true);
> -		unfenced_alignment =
> -			i915_gem_get_gtt_alignment(dev,
> -						   obj->base.size,
> -						   obj->tiling_mode,
> -						   false);
> -		size = flags & PIN_MAPPABLE ? fence_size : obj->base.size;
> +		size = max_t(u64, size, obj->base.size);
> +		alignment = 4096;
>  	}
>  
>  	start = flags & PIN_OFFSET_BIAS ? flags & PIN_OFFSET_MASK : 0;
>  	end = vm->total;
>  	if (flags & PIN_MAPPABLE)
> -		end = min_t(u64, end, ggtt->mappable_end);
> +		end = min_t(u64, end, dev_priv->ggtt.mappable_end);
>  	if (flags & PIN_ZONE_4G)
>  		end = min_t(u64, end, (1ULL << 32) - PAGE_SIZE);
>  
> -	if (alignment == 0)
> -		alignment = flags & PIN_MAPPABLE ? fence_alignment :
> -						unfenced_alignment;
> -	if (flags & PIN_MAPPABLE && alignment & (fence_alignment - 1)) {
> -		DRM_DEBUG("Invalid object (view type=%u) alignment requested %u\n",
> -			  ggtt_view ? ggtt_view->type : 0,
> -			  alignment);
> -		return ERR_PTR(-EINVAL);
> -	}
> -
>  	/* If binding the object/GGTT view requires more space than the entire
>  	 * aperture has, reject it early before evicting everything in a vain
>  	 * attempt to find space.
>  	 */
>  	if (size > end) {
> -		DRM_DEBUG("Attempting to bind an object (view type=%u) larger than the aperture: size=%llu > %s aperture=%llu\n",
> +		DRM_DEBUG("Attempting to bind an object (view type=%u) larger than the aperture: request=%llu [object=%zd] > %s aperture=%llu\n",
>  			  ggtt_view ? ggtt_view->type : 0,
> -			  size,
> +			  size, obj->base.size,
>  			  flags & PIN_MAPPABLE ? "mappable" : "total",
>  			  end);
>  		return ERR_PTR(-E2BIG);
> @@ -3243,7 +3233,7 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
>  	 * (e.g. libkms for the bootup splash), we have to ensure that we
>  	 * always use map_and_fenceable for all scanout buffers.
>  	 */
> -	ret = i915_gem_object_ggtt_pin(obj, view, alignment,
> +	ret = i915_gem_object_ggtt_pin(obj, view, 0, alignment,
>  				       view->type == I915_GGTT_VIEW_NORMAL ?
>  				       PIN_MAPPABLE : 0);
>  	if (ret)
> @@ -3393,12 +3383,17 @@ i915_gem_ring_throttle(struct drm_device *dev, struct drm_file *file)
>  }
>  
>  static bool
> -i915_vma_misplaced(struct i915_vma *vma, uint32_t alignment, uint64_t flags)
> +i915_vma_misplaced(struct i915_vma *vma,
> +		   uint64_t size,
> +		   uint32_t alignment,
> +		   uint64_t flags)
>  {
>  	struct drm_i915_gem_object *obj = vma->obj;
>  
> -	if (alignment &&
> -	    vma->node.start & (alignment - 1))
> +	if (vma->node.size < size)
> +		return true;
> +
> +	if (alignment && vma->node.start & (alignment - 1))
>  		return true;
>  
>  	if (flags & PIN_MAPPABLE && !obj->map_and_fenceable)
> @@ -3442,6 +3437,7 @@ static int
>  i915_gem_object_do_pin(struct drm_i915_gem_object *obj,
>  		       struct i915_address_space *vm,
>  		       const struct i915_ggtt_view *ggtt_view,
> +		       uint64_t size,
>  		       uint32_t alignment,
>  		       uint64_t flags)
>  {
> @@ -3469,7 +3465,7 @@ i915_gem_object_do_pin(struct drm_i915_gem_object *obj,
>  		if (WARN_ON(vma->pin_count == DRM_I915_GEM_OBJECT_MAX_PIN_COUNT))
>  			return -EBUSY;
>  
> -		if (i915_vma_misplaced(vma, alignment, flags)) {
> +		if (i915_vma_misplaced(vma, size, alignment, flags)) {
>  			WARN(vma->pin_count,
>  			     "bo is already pinned in %s with incorrect alignment:"
>  			     " offset=%08x %08x, req.alignment=%x, req.map_and_fenceable=%d,"
> @@ -3490,8 +3486,8 @@ i915_gem_object_do_pin(struct drm_i915_gem_object *obj,
>  
>  	bound = vma ? vma->bound : 0;
>  	if (vma == NULL || !drm_mm_node_allocated(&vma->node)) {
> -		vma = i915_gem_object_bind_to_vm(obj, vm, ggtt_view, alignment,
> -						 flags);
> +		vma = i915_gem_object_bind_to_vm(obj, vm, ggtt_view,
> +						 size, alignment, flags);
>  		if (IS_ERR(vma))
>  			return PTR_ERR(vma);
>  	} else {
> @@ -3513,17 +3509,19 @@ i915_gem_object_do_pin(struct drm_i915_gem_object *obj,
>  int
>  i915_gem_object_pin(struct drm_i915_gem_object *obj,
>  		    struct i915_address_space *vm,
> +		    uint64_t size,
>  		    uint32_t alignment,
>  		    uint64_t flags)
>  {
>  	return i915_gem_object_do_pin(obj, vm,
>  				      i915_is_ggtt(vm) ? &i915_ggtt_view_normal : NULL,
> -				      alignment, flags);
> +				      size, alignment, flags);
>  }
>  
>  int
>  i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj,
>  			 const struct i915_ggtt_view *view,
> +			 uint64_t size,
>  			 uint32_t alignment,
>  			 uint64_t flags)
>  {
> @@ -3534,7 +3532,7 @@ i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj,
>  	BUG_ON(!view);
>  
>  	return i915_gem_object_do_pin(obj, &ggtt->base, view,
> -				      alignment, flags | PIN_GLOBAL);
> +				      size, alignment, flags | PIN_GLOBAL);
>  }
>  
>  void
> diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> index 40937a09855d..c1e7ee212e7e 100644
> --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> @@ -652,10 +652,14 @@ i915_gem_execbuffer_reserve_vma(struct i915_vma *vma,
>  			flags |= PIN_HIGH;
>  	}
>  
> -	ret = i915_gem_object_pin(obj, vma->vm, entry->alignment, flags);
> +	ret = i915_gem_object_pin(obj, vma->vm,
> +				  entry->pad_to_size,
> +				  entry->alignment,
> +				  flags);
>  	if ((ret == -ENOSPC  || ret == -E2BIG) &&
>  	    only_mappable_for_reloc(entry->flags))
>  		ret = i915_gem_object_pin(obj, vma->vm,
> +					  entry->pad_to_size,
>  					  entry->alignment,
>  					  flags & ~PIN_MAPPABLE);
>  	if (ret)
> @@ -718,6 +722,9 @@ eb_vma_misplaced(struct i915_vma *vma)
>  	    vma->node.start & (entry->alignment - 1))
>  		return true;
>  
> +	if (vma->node.size < entry->pad_to_size)
> +		return true;
> +
>  	if (entry->flags & EXEC_OBJECT_PINNED &&
>  	    vma->node.start != entry->offset)
>  		return true;
> @@ -1058,6 +1065,13 @@ validate_exec_list(struct drm_device *dev,
>  		if (exec[i].alignment && !is_power_of_2(exec[i].alignment))
>  			return -EINVAL;
>  
> +		/* pad_to_size was once a reserved field, so sanitize it */
> +		if (exec[i].flags & EXEC_OBJECT_PAD_TO_SIZE) {
> +			if (offset_in_page(exec[i].pad_to_size))
> +				return -EINVAL;
> +		} else
> +			exec[i].pad_to_size = 0;
> +
>  		/* First check for malicious input causing overflow in
>  		 * the worst case where we need to allocate the entire
>  		 * relocation tree as a single array.
> diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
> index d6c668e58426..3b861746ba7a 100644
> --- a/include/uapi/drm/i915_drm.h
> +++ b/include/uapi/drm/i915_drm.h
> @@ -701,10 +701,14 @@ struct drm_i915_gem_exec_object2 {
>  #define EXEC_OBJECT_WRITE	(1<<2)
>  #define EXEC_OBJECT_SUPPORTS_48B_ADDRESS (1<<3)
>  #define EXEC_OBJECT_PINNED	(1<<4)
> -#define __EXEC_OBJECT_UNKNOWN_FLAGS -(EXEC_OBJECT_PINNED<<1)
> +#define EXEC_OBJECT_PAD_TO_SIZE	(1<<5)
> +#define __EXEC_OBJECT_UNKNOWN_FLAGS -(EXEC_OBJECT_PAD_TO_SIZE<<1)
>  	__u64 flags;
>  
> -	__u64 rsvd1;
> +	union {
> +		__u64 rsvd1;
> +		__u64 pad_to_size;
> +	};
>  	__u64 rsvd2;
>  };
>  
> -- 
> 2.8.1
> 
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/intel-gfx

-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

  reply	other threads:[~2016-06-08  9:41 UTC|newest]

Thread overview: 58+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2016-06-03 16:55 Tracking VMA Chris Wilson
2016-06-03 16:55 ` [PATCH 01/38] drm/i915: Combine loops within i915_gem_evict_something Chris Wilson
2016-06-03 16:55 ` [PATCH 02/38] drm/i915: Remove surplus drm_device parameter to i915_gem_evict_something() Chris Wilson
2016-06-03 16:55 ` [PATCH 03/38] drm/i915: Double check the active status on the batch pool Chris Wilson
2016-06-03 16:55 ` [PATCH 04/38] drm/i915: Remove request retirement before each batch Chris Wilson
2016-06-06 13:40   ` Mika Kuoppala
2016-06-03 16:55 ` [PATCH 05/38] drm/i915: Remove i915_gem_execbuffer_retire_commands() Chris Wilson
2016-06-06 14:26   ` Mika Kuoppala
2016-06-03 16:55 ` [PATCH 06/38] drm/i915: Pad GTT views of exec objects up to user specified size Chris Wilson
2016-06-08  9:41   ` Daniel Vetter [this message]
2016-06-08 10:08     ` Chris Wilson
2016-06-03 16:55 ` [PATCH 07/38] drm/i915: Split insertion/binding of an object into the VM Chris Wilson
2016-06-03 16:55 ` [PATCH 08/38] drm/i915: Record allocated vma size Chris Wilson
2016-06-03 16:55 ` [PATCH 09/38] drm/i915: Start passing around i915_vma from execbuffer Chris Wilson
2016-06-03 16:55 ` [PATCH 10/38] drm/i915: Remove highly confusing i915_gem_obj_ggtt_pin() Chris Wilson
2016-06-08  9:43   ` Daniel Vetter
2016-06-08 12:58     ` Chris Wilson
2016-06-03 16:55 ` [PATCH 11/38] drm/i915: Make fb_tracking.lock a spinlock Chris Wilson
2016-06-03 16:55 ` [PATCH 12/38] drm/i915: Use atomics to manipulate obj->frontbuffer_bits Chris Wilson
2016-06-03 16:55 ` [PATCH 13/38] drm/i915: Move obj->active:5 to obj->flags Chris Wilson
2016-06-08  9:53   ` Daniel Vetter
2016-06-03 16:55 ` [PATCH 14/38] drm/i915: Move i915_gem_object_wait_rendering() Chris Wilson
2016-06-03 16:55 ` [PATCH 15/38] drm/i915: Mark all current requests as complete before resetting them Chris Wilson
2016-06-03 16:55 ` [PATCH 16/38] drm/i915: Enable lockless lookup of request tracking via RCU Chris Wilson
2016-06-03 16:55 ` [PATCH 17/38] drm/i915: Introduce i915_gem_active_wait_unlocked() Chris Wilson
2016-06-03 16:55 ` [PATCH 18/38] drm/i915: Convert non-blocking waits for requests over to using RCU Chris Wilson
2016-06-03 16:55 ` [PATCH 19/38] drm/i915: Convert non-blocking userptr " Chris Wilson
2016-06-03 16:55 ` [PATCH 20/38] drm/i915/userptr: Remove superfluous interruptible=false on waiting Chris Wilson
2016-06-03 16:55 ` [PATCH 21/38] drm/i915: Avoid requiring struct_mutex during suspend Chris Wilson
2016-06-03 16:55 ` [PATCH 22/38] drm/gem/shrinker: Wait before acquiring struct_mutex under oom Chris Wilson
2016-06-08  9:57   ` Daniel Vetter
2016-06-08 10:04     ` Chris Wilson
2016-06-03 16:55 ` [PATCH 23/38] suspend Chris Wilson
2016-06-03 16:55 ` [PATCH 24/38] drm/i915: Do a nonblocking wait first in pread/pwrite Chris Wilson
2016-06-03 16:55 ` [PATCH 25/38] drm/i915: Remove (struct_mutex) locking for wait-ioctl Chris Wilson
2016-06-03 16:55 ` [PATCH 26/38] drm/i915: Remove (struct_mutex) locking for busy-ioctl Chris Wilson
2016-06-03 16:55 ` [PATCH 27/38] drm/i915: Reduce locking inside swfinish ioctl Chris Wilson
2016-06-08  9:59   ` Daniel Vetter
2016-06-08 10:03     ` Chris Wilson
2016-06-03 16:55 ` [PATCH 28/38] drm/i915: Remove pinned check from madvise ioctl Chris Wilson
2016-06-08 10:01   ` Daniel Vetter
2016-06-03 16:55 ` [PATCH 29/38] drm/i915: Remove locking for get_tiling Chris Wilson
2016-06-08 10:02   ` Daniel Vetter
2016-06-08 10:11     ` Chris Wilson
2016-06-13 14:19       ` Daniel Vetter
2016-06-03 16:55 ` [PATCH 30/38] drm/i915: Assert that the request hasn't been retired Chris Wilson
2016-06-03 16:55 ` [PATCH 31/38] drm/i915: Reduce amount of duplicate buffer information captured on error Chris Wilson
2016-06-03 16:55 ` [PATCH 32/38] drm/i915: Stop the machine whilst capturing the GPU crash dump Chris Wilson
2016-06-08 10:06   ` Daniel Vetter
2016-06-08 11:37     ` Chris Wilson
2016-06-03 16:55 ` [PATCH 33/38] drm/i915: Scan GGTT active list for context object Chris Wilson
2016-06-03 16:55 ` [PATCH 34/38] drm/i915: Move setting of request->batch into its single callsite Chris Wilson
2016-06-03 16:55 ` [PATCH 35/38] drm/i915: Mark unmappable GGTT entries as PIN_HIGH Chris Wilson
2016-06-03 16:55 ` [PATCH 36/38] drm/i915: Track pinned vma inside guc Chris Wilson
2016-06-03 16:55 ` [PATCH 37/38] drm/i915: Track pinned VMA Chris Wilson
2016-06-08 10:08   ` Daniel Vetter
2016-06-03 16:55 ` [PATCH 38/38] drm/i915/overlay: Use VMA as the primary tracker for images Chris Wilson
2016-06-06 10:42 ` ✗ Ro.CI.BAT: failure for series starting with [01/38] drm/i915: Combine loops within i915_gem_evict_something Patchwork

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20160608094101.GT3363@phenom.ffwll.local \
    --to=daniel@ffwll.ch \
    --cc=chris@chris-wilson.co.uk \
    --cc=intel-gfx@lists.freedesktop.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.