All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] drm/i915: Add soft-pinning API for execbuffer
@ 2015-03-06  9:44 Chris Wilson
  2015-03-11  6:41 ` [Intel-gfx] " Zhenyu Wang
                   ` (2 more replies)
  0 siblings, 3 replies; 32+ messages in thread
From: Chris Wilson @ 2015-03-06  9:44 UTC (permalink / raw)
  To: intel-gfx

Userspace can pass in an offset that it presumes the object is located
at. The kernel will then do its utmost to fit the object into that
location. The assumption is that userspace is handling its own object
locations (for example along with full-ppgtt) and that the kernel will
rarely have to make space for the user's requests.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_drv.h            |  5 +++
 drivers/gpu/drm/i915/i915_gem.c            | 53 ++++++++++++++++++++++--------
 drivers/gpu/drm/i915/i915_gem_evict.c      | 52 +++++++++++++++++++++++++++++
 drivers/gpu/drm/i915/i915_gem_execbuffer.c |  9 ++++-
 include/uapi/drm/i915_drm.h                |  3 +-
 5 files changed, 106 insertions(+), 16 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 38ee0811f53f..5ac96e2adc6c 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2590,7 +2590,9 @@ void i915_gem_vma_destroy(struct i915_vma *vma);
 #define PIN_NONBLOCK 0x2
 #define PIN_GLOBAL 0x4
 #define PIN_OFFSET_BIAS 0x8
+#define PIN_OFFSET_FIXED 0x10
 #define PIN_OFFSET_MASK (~4095)
+
 int __must_check i915_gem_object_pin_view(struct drm_i915_gem_object *obj,
 					  struct i915_address_space *vm,
 					  uint32_t size,
@@ -2942,6 +2944,9 @@ int __must_check i915_gem_evict_something(struct drm_device *dev,
 					  unsigned long start,
 					  unsigned long end,
 					  unsigned flags);
+int __must_check
+i915_gem_evict_range(struct drm_device *dev, struct i915_address_space *vm,
+		     unsigned long start, unsigned long end);
 int i915_gem_evict_vm(struct i915_address_space *vm, bool do_idle);
 int i915_gem_evict_everything(struct drm_device *dev);
 
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 9d0df4d85693..b266b31690e4 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -3592,22 +3592,43 @@ i915_gem_object_bind_to_vm(struct drm_i915_gem_object *obj,
 	if (IS_ERR(vma))
 		goto err_unpin;
 
+	if (flags & PIN_OFFSET_FIXED) {
+		uint64_t offset = flags & PIN_OFFSET_MASK;
+		if (offset & (alignment - 1)) {
+			vma = ERR_PTR(-EINVAL);
+			goto err_free_vma;
+		}
+		vma->node.start = offset;
+		vma->node.size = size;
+		vma->node.color = obj->cache_level;
+		ret = drm_mm_reserve_node(&vm->mm, &vma->node);
+		if (ret) {
+			ret = i915_gem_evict_range(dev, vm, start, end);
+			if (ret == 0)
+				ret = drm_mm_reserve_node(&vm->mm, &vma->node);
+		}
+		if (ret) {
+			vma = ERR_PTR(ret);
+			goto err_free_vma;
+		}
+	} else {
 search_free:
-	ret = drm_mm_insert_node_in_range_generic(&vm->mm, &vma->node,
-						  size, alignment,
-						  obj->cache_level,
-						  start, end,
-						  DRM_MM_SEARCH_DEFAULT,
-						  DRM_MM_CREATE_DEFAULT);
-	if (ret) {
-		ret = i915_gem_evict_something(dev, vm, size, alignment,
-					       obj->cache_level,
-					       start, end,
-					       flags);
-		if (ret == 0)
-			goto search_free;
+		ret = drm_mm_insert_node_in_range_generic(&vm->mm, &vma->node,
+							  size, alignment,
+							  obj->cache_level,
+							  start, end,
+							  DRM_MM_SEARCH_DEFAULT,
+							  DRM_MM_CREATE_DEFAULT);
+		if (ret) {
+			ret = i915_gem_evict_something(dev, vm, size, alignment,
+						       obj->cache_level,
+						       start, end,
+						       flags);
+			if (ret == 0)
+				goto search_free;
 
-		goto err_free_vma;
+			goto err_free_vma;
+		}
 	}
 	if (WARN_ON(!i915_gem_valid_gtt_space(vma, obj->cache_level))) {
 		ret = -EINVAL;
@@ -4187,6 +4208,10 @@ i915_vma_misplaced(struct i915_vma *vma,
 	    vma->node.start < (flags & PIN_OFFSET_MASK))
 		return true;
 
+	if (flags & PIN_OFFSET_FIXED &&
+	    vma->node.start != (flags & PIN_OFFSET_MASK))
+		return true;
+
 	return false;
 }
 
diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c b/drivers/gpu/drm/i915/i915_gem_evict.c
index e3a49d94da3a..c4b2ead0d805 100644
--- a/drivers/gpu/drm/i915/i915_gem_evict.c
+++ b/drivers/gpu/drm/i915/i915_gem_evict.c
@@ -195,6 +195,58 @@ found:
 	return ret;
 }
 
+int
+i915_gem_evict_range(struct drm_device *dev, struct i915_address_space *vm,
+		     unsigned long start, unsigned long end)
+{
+	struct drm_mm_node *node;
+	struct list_head eviction_list;
+	int ret = 0;
+
+	INIT_LIST_HEAD(&eviction_list);
+	drm_mm_for_each_node(node, &vm->mm) {
+		struct i915_vma *vma;
+
+		if (node->start + node->size <= start)
+			continue;
+		if (node->start >= end)
+			break;
+
+		vma = container_of(node, typeof(*vma), node);
+		if (vma->pin_count) {
+			ret = -EBUSY;
+			break;
+		}
+
+		if (WARN_ON(!list_empty(&vma->exec_list))) {
+			ret = -EINVAL;
+			break;
+		}
+
+		drm_gem_object_reference(&vma->obj->base);
+		list_add(&vma->exec_list, &eviction_list);
+	}
+
+	while (!list_empty(&eviction_list)) {
+		struct i915_vma *vma;
+		struct drm_gem_object *obj;
+
+		vma = list_first_entry(&eviction_list,
+				       struct i915_vma,
+				       exec_list);
+
+		obj = &vma->obj->base;
+
+		list_del_init(&vma->exec_list);
+		if (ret == 0)
+			ret = i915_vma_unbind(vma);
+
+		drm_gem_object_unreference(obj);
+	}
+
+	return ret;
+}
+
 /**
  * i915_gem_evict_vm - Evict all idle vmas from a vm
  * @vm: Address space to cleanse
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 183f2914486e..8129a8c75a21 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -590,6 +590,8 @@ i915_gem_execbuffer_reserve_vma(struct i915_vma *vma,
 			flags |= PIN_GLOBAL;
 		if (entry->flags & __EXEC_OBJECT_NEEDS_BIAS)
 			flags |= BATCH_OFFSET_BIAS | PIN_OFFSET_BIAS;
+		if (entry->flags & EXEC_OBJECT_PINNED)
+			flags |= entry->offset | PIN_OFFSET_FIXED;
 	}
 
 	ret = i915_gem_object_pin(obj, vma->vm,
@@ -666,6 +668,10 @@ eb_vma_misplaced(struct i915_vma *vma)
 	if (vma->node.size < entry->pad_to_size)
 		return true;
 
+	if (entry->flags & EXEC_OBJECT_PINNED &&
+	    vma->node.start != entry->offset)
+		return true;
+
 	if (entry->flags & __EXEC_OBJECT_NEEDS_BIAS &&
 	    vma->node.start < BATCH_OFFSET_BIAS)
 		return true;
@@ -1392,7 +1398,8 @@ eb_get_batch(struct eb_vmas *eb)
 	 * Note that actual hangs have only been observed on gen7, but for
 	 * paranoia do it everywhere.
 	 */
-	vma->exec_entry->flags |= __EXEC_OBJECT_NEEDS_BIAS;
+	if ((vma->exec_entry->flags & EXEC_OBJECT_PINNED) == 0)
+		vma->exec_entry->flags |= __EXEC_OBJECT_NEEDS_BIAS;
 
 	return vma->obj;
 }
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index cb3417ca406f..ddef37e970d3 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -679,7 +679,8 @@ struct drm_i915_gem_exec_object2 {
 #define EXEC_OBJECT_NEEDS_GTT	(1<<1)
 #define EXEC_OBJECT_WRITE	(1<<2)
 #define EXEC_OBJECT_PAD_TO_SIZE	(1<<3)
-#define __EXEC_OBJECT_UNKNOWN_FLAGS -(EXEC_OBJECT_PAD_TO_SIZE<<1)
+#define EXEC_OBJECT_PINNED	(1<<4)
+#define __EXEC_OBJECT_UNKNOWN_FLAGS -(EXEC_OBJECT_PINNED<<1)
 	__u64 flags;
 
 	__u64 pad_to_size;
-- 
2.1.4

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 32+ messages in thread

* Re: [Intel-gfx] [PATCH] drm/i915: Add soft-pinning API for execbuffer
  2015-03-06  9:44 [PATCH] drm/i915: Add soft-pinning API for execbuffer Chris Wilson
@ 2015-03-11  6:41 ` Zhenyu Wang
  2015-04-29 13:28 ` Daniel, Thomas
  2015-06-30 14:13 ` [PATCH v4] " Thomas Daniel
  2 siblings, 0 replies; 32+ messages in thread
From: Zhenyu Wang @ 2015-03-11  6:41 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx, beignet


[-- Attachment #1.1: Type: text/plain, Size: 619 bytes --]

On 2015.03.06 09:44:07 +0000, Chris Wilson wrote:
> Userspace can pass in an offset that it presumes the object is located
> at. The kernel will then do its utmost to fit the object into that
> location. The assumption is that userspace is handling its own object
> locations (for example along with full-ppgtt) and that the kernel will
> rarely have to make space for the user's requests.
> 

Chris, would you add libdrm support for this? e.g beignet doesn't
handle exec object itself but use libdrm.

-- 
Open Source Technology Center, Intel ltd.

$gpg --keyserver wwwkeys.pgp.net --recv-keys 4D781827

[-- Attachment #1.2: Digital signature --]
[-- Type: application/pgp-signature, Size: 181 bytes --]

[-- Attachment #2: Type: text/plain, Size: 153 bytes --]

_______________________________________________
Beignet mailing list
Beignet@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/beignet

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCH] drm/i915: Add soft-pinning API for execbuffer
  2015-03-06  9:44 [PATCH] drm/i915: Add soft-pinning API for execbuffer Chris Wilson
  2015-03-11  6:41 ` [Intel-gfx] " Zhenyu Wang
@ 2015-04-29 13:28 ` Daniel, Thomas
  2015-04-29 14:01   ` Chris Wilson
  2015-06-30 14:13 ` [PATCH v4] " Thomas Daniel
  2 siblings, 1 reply; 32+ messages in thread
From: Daniel, Thomas @ 2015-04-29 13:28 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx

> -----Original Message-----
> From: Intel-gfx [mailto:intel-gfx-bounces@lists.freedesktop.org] On Behalf Of
> Chris Wilson
> Sent: Friday, March 6, 2015 9:44 AM
> To: intel-gfx@lists.freedesktop.org
> Subject: [Intel-gfx] [PATCH] drm/i915: Add soft-pinning API for execbuffer
> 
> Userspace can pass in an offset that it presumes the object is located
> at. The kernel will then do its utmost to fit the object into that
> location. The assumption is that userspace is handling its own object
> locations (for example along with full-ppgtt) and that the kernel will
> rarely have to make space for the user's requests.	
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>  drivers/gpu/drm/i915/i915_drv.h            |  5 +++
>  drivers/gpu/drm/i915/i915_gem.c            | 53 ++++++++++++++++++++++--------
>  drivers/gpu/drm/i915/i915_gem_evict.c      | 52
> +++++++++++++++++++++++++++++
>  drivers/gpu/drm/i915/i915_gem_execbuffer.c |  9 ++++-
>  include/uapi/drm/i915_drm.h                |  3 +-
>  5 files changed, 106 insertions(+), 16 deletions(-)
> 

> diff --git a/drivers/gpu/drm/i915/i915_gem.c
> b/drivers/gpu/drm/i915/i915_gem.c
> index 9d0df4d85693..b266b31690e4 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -3592,22 +3592,43 @@ i915_gem_object_bind_to_vm(struct
> drm_i915_gem_object *obj,
>  	if (IS_ERR(vma))
>  		goto err_unpin;
> 
> +	if (flags & PIN_OFFSET_FIXED) {
> +		uint64_t offset = flags & PIN_OFFSET_MASK;
> +		if (offset & (alignment - 1)) {
> +			vma = ERR_PTR(-EINVAL);
> +			goto err_free_vma;
> +		}
> +		vma->node.start = offset;
> +		vma->node.size = size;
> +		vma->node.color = obj->cache_level;
> +		ret = drm_mm_reserve_node(&vm->mm, &vma->node);
> +		if (ret) {
> +			ret = i915_gem_evict_range(dev, vm, start, end);
Did you mean i915_gem_evict_range(dev, vm, offset, offset+size) ?

> +			if (ret == 0)
> +				ret = drm_mm_reserve_node(&vm->mm,
> &vma->node);
> +		}
> +		if (ret) {
> +			vma = ERR_PTR(ret);
> +			goto err_free_vma;
> +		}
> +	} else {
>  search_free:
> -	ret = drm_mm_insert_node_in_range_generic(&vm->mm, &vma->node,
> -						  size, alignment,
> -						  obj->cache_level,
> -						  start, end,
> -
> DRM_MM_SEARCH_DEFAULT,
> -
> DRM_MM_CREATE_DEFAULT);
> -	if (ret) {
> -		ret = i915_gem_evict_something(dev, vm, size, alignment,
> -					       obj->cache_level,
> -					       start, end,
> -					       flags);
> -		if (ret == 0)
> -			goto search_free;
> +		ret = drm_mm_insert_node_in_range_generic(&vm->mm,
> &vma->node,
> +							  size, alignment,
> +							  obj->cache_level,
> +							  start, end,
> +
> DRM_MM_SEARCH_DEFAULT,
> +
> DRM_MM_CREATE_DEFAULT);
> +		if (ret) {
> +			ret = i915_gem_evict_something(dev, vm, size,
> alignment,
> +						       obj->cache_level,
> +						       start, end,
> +						       flags);
> +			if (ret == 0)
> +				goto search_free;
> 
> -		goto err_free_vma;
> +			goto err_free_vma;
> +		}
>  	}
>  	if (WARN_ON(!i915_gem_valid_gtt_space(vma, obj->cache_level))) {
>  		ret = -EINVAL;

> 
> diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c
> b/drivers/gpu/drm/i915/i915_gem_evict.c
> index e3a49d94da3a..c4b2ead0d805 100644
> --- a/drivers/gpu/drm/i915/i915_gem_evict.c
> +++ b/drivers/gpu/drm/i915/i915_gem_evict.c
> @@ -195,6 +195,58 @@ found:
>  	return ret;
>  }
> 
> +int
> +i915_gem_evict_range(struct drm_device *dev, struct i915_address_space
> *vm,
> +		     unsigned long start, unsigned long end)
> +{
> +	struct drm_mm_node *node;
> +	struct list_head eviction_list;
> +	int ret = 0;
> +
> +	INIT_LIST_HEAD(&eviction_list);
> +	drm_mm_for_each_node(node, &vm->mm) {
> +		struct i915_vma *vma;
> +
> +		if (node->start + node->size <= start)
> +			continue;
> +		if (node->start >= end)
> +			break;
> +
> +		vma = container_of(node, typeof(*vma), node);
> +		if (vma->pin_count) {
> +			ret = -EBUSY;
> +			break;
> +		}
> +
> +		if (WARN_ON(!list_empty(&vma->exec_list))) { 
So if an execbuffer uses both EXEC_OBJECT_PINNED and ordinary buffers in its exec_list then the ordinary buffers cannot be relocated if they are in the range of the pinned buffer.  Was this your intention?

> +			ret = -EINVAL;
> +			break;
> +		}
> +
> +		drm_gem_object_reference(&vma->obj->base);
> +		list_add(&vma->exec_list, &eviction_list);
I guess we need another list_head if we want to support both types of object in the same execbuffer call and allow relocation.

> +	}
> +
> +	while (!list_empty(&eviction_list)) {
> +		struct i915_vma *vma;
> +		struct drm_gem_object *obj;
> +
> +		vma = list_first_entry(&eviction_list,
> +				       struct i915_vma,
> +				       exec_list);
> +
> +		obj = &vma->obj->base;
> +
> +		list_del_init(&vma->exec_list);
> +		if (ret == 0)
> +			ret = i915_vma_unbind(vma);
> +
> +		drm_gem_object_unreference(obj);
> +	}
> +
> +	return ret;
> +}

Useful and worthwhile patch though.  Cheers.
Thomas.
 
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCH] drm/i915: Add soft-pinning API for execbuffer
  2015-04-29 13:28 ` Daniel, Thomas
@ 2015-04-29 14:01   ` Chris Wilson
  0 siblings, 0 replies; 32+ messages in thread
From: Chris Wilson @ 2015-04-29 14:01 UTC (permalink / raw)
  To: Daniel, Thomas; +Cc: intel-gfx

On Wed, Apr 29, 2015 at 01:28:19PM +0000, Daniel, Thomas wrote:
> > -----Original Message-----
> > From: Intel-gfx [mailto:intel-gfx-bounces@lists.freedesktop.org] On Behalf Of
> > Chris Wilson
> > Sent: Friday, March 6, 2015 9:44 AM
> > To: intel-gfx@lists.freedesktop.org
> > Subject: [Intel-gfx] [PATCH] drm/i915: Add soft-pinning API for execbuffer
> > 
> > Userspace can pass in an offset that it presumes the object is located
> > at. The kernel will then do its utmost to fit the object into that
> > location. The assumption is that userspace is handling its own object
> > locations (for example along with full-ppgtt) and that the kernel will
> > rarely have to make space for the user's requests.	
> > 
> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > ---
> >  drivers/gpu/drm/i915/i915_drv.h            |  5 +++
> >  drivers/gpu/drm/i915/i915_gem.c            | 53 ++++++++++++++++++++++--------
> >  drivers/gpu/drm/i915/i915_gem_evict.c      | 52
> > +++++++++++++++++++++++++++++
> >  drivers/gpu/drm/i915/i915_gem_execbuffer.c |  9 ++++-
> >  include/uapi/drm/i915_drm.h                |  3 +-
> >  5 files changed, 106 insertions(+), 16 deletions(-)
> > 
> 
> > diff --git a/drivers/gpu/drm/i915/i915_gem.c
> > b/drivers/gpu/drm/i915/i915_gem.c
> > index 9d0df4d85693..b266b31690e4 100644
> > --- a/drivers/gpu/drm/i915/i915_gem.c
> > +++ b/drivers/gpu/drm/i915/i915_gem.c
> > @@ -3592,22 +3592,43 @@ i915_gem_object_bind_to_vm(struct
> > drm_i915_gem_object *obj,
> >  	if (IS_ERR(vma))
> >  		goto err_unpin;
> > 
> > +	if (flags & PIN_OFFSET_FIXED) {
> > +		uint64_t offset = flags & PIN_OFFSET_MASK;
> > +		if (offset & (alignment - 1)) {
> > +			vma = ERR_PTR(-EINVAL);
> > +			goto err_free_vma;
> > +		}
> > +		vma->node.start = offset;
> > +		vma->node.size = size;
> > +		vma->node.color = obj->cache_level;
> > +		ret = drm_mm_reserve_node(&vm->mm, &vma->node);
> > +		if (ret) {
> > +			ret = i915_gem_evict_range(dev, vm, start, end);
> Did you mean i915_gem_evict_range(dev, vm, offset, offset+size) ?

Whoops. I guess i915_gem_evict_vma() would be a better interface.

> > +int
> > +i915_gem_evict_range(struct drm_device *dev, struct i915_address_space
> > *vm,
> > +		     unsigned long start, unsigned long end)
> > +{
> > +	struct drm_mm_node *node;
> > +	struct list_head eviction_list;
> > +	int ret = 0;
> > +
> > +	INIT_LIST_HEAD(&eviction_list);
> > +	drm_mm_for_each_node(node, &vm->mm) {
> > +		struct i915_vma *vma;
> > +
> > +		if (node->start + node->size <= start)
> > +			continue;
> > +		if (node->start >= end)
> > +			break;
> > +
> > +		vma = container_of(node, typeof(*vma), node);
> > +		if (vma->pin_count) {
> > +			ret = -EBUSY;
> > +			break;
> > +		}
> > +
> > +		if (WARN_ON(!list_empty(&vma->exec_list))) { 
> So if an execbuffer uses both EXEC_OBJECT_PINNED and ordinary buffers in its exec_list then the ordinary buffers cannot be relocated if they are in the range of the pinned buffer.  Was this your intention?

The desired behaviour should be to restart the reservation process with
bind the fixed objects first, then bind the rest.

The reservation logic is design around not moving objects, with the
expectation that we can reuse the layout from the last batch and
optimised for that. We should return ENOSPC here which causes the
reservation logic to unpin everything, evict and try again. It will be
easy enough to place the fixed objects at the head of the second pass.

I choose EINVAL initially thinking it should catch the condition of two
overlapping fixed objects without consider evicting ordinary bo (I was
caught thinking of userspace doing an all or nothing approach).

> 
> > +			ret = -EINVAL;
> > +			break;
> > +		}
> > +
> > +		drm_gem_object_reference(&vma->obj->base);
> > +		list_add(&vma->exec_list, &eviction_list);
> I guess we need another list_head if we want to support both types of object in the same execbuffer call and allow relocation.

Actually, here since we are walking the drm_mm rather than using the
scanner, we can switch over to list_for_each_entry_safe() and do the
unbinding inline.

Reduces to
int
i915_gem_evict_for_vma(struct i915_vma *target)
{
        struct drm_mm_node *node, *next;
        int ret;

        list_for_each_entry_safe(node, next,
                                 &target->vm->mm.head_node.node_list,
                                 node_list) {
                struct i915_vma *vma;

                if (node->start + node->size <= target->node.start)
                        continue;
                if (node->start >= target->node.start + target->node.size)
                        break;

                vma = container_of(node, typeof(*vma), node);
                if (vma->exec_entry &&
                    vma->exec_entry->flags & EXEC_OBJECT_PINNED)
                        /* Overlapping fixed objects in the same batch */
                        return -EINVAL;

                if (vma->pin_count)
                        /* We may need to evict an buffer in the same batch */
                        return vma->exec_entry ? -ENOSPC : -EBUSY;

                ret = i915_vma_unbind(vma);
                if (ret)
                        return ret;
        }

        return 0;
}

-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 32+ messages in thread

* [PATCH v4] drm/i915: Add soft-pinning API for execbuffer
  2015-03-06  9:44 [PATCH] drm/i915: Add soft-pinning API for execbuffer Chris Wilson
  2015-03-11  6:41 ` [Intel-gfx] " Zhenyu Wang
  2015-04-29 13:28 ` Daniel, Thomas
@ 2015-06-30 14:13 ` Thomas Daniel
  2015-06-30 14:20   ` Daniel, Thomas
                     ` (3 more replies)
  2 siblings, 4 replies; 32+ messages in thread
From: Thomas Daniel @ 2015-06-30 14:13 UTC (permalink / raw)
  To: intel-gfx; +Cc: Akash Goel, Michal Winiarsky, Vinay Belgaumkar

From: Chris Wilson <chris@chris-wilson.co.uk>

Userspace can pass in an offset that it presumes the object is located
at. The kernel will then do its utmost to fit the object into that
location. The assumption is that userspace is handling its own object
locations (for example along with full-ppgtt) and that the kernel will
rarely have to make space for the user's requests.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>

v2: Fixed incorrect eviction found by Michal Winiarski - fix suggested by Chris
Wilson.  Fixed incorrect error paths causing crash found by Michal Winiarski.
(Not published externally)

v3: Rebased because of trivial conflict in object_bind_to_vm.  Fixed eviction
to allow eviction of soft-pinned objects when another soft-pinned object used
by a subsequent execbuffer overlaps reported by Michal Winiarski.
(Not published externally)

v4: Moved soft-pinned objects to the front of ordered_vmas so that they are
pinned first after an address conflict happens to avoid repeated conflicts in
rare cases (Suggested by Chris Wilson).  Expanded comment on
drm_i915_gem_exec_object2.offset to cover this new API.

Cc: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Akash Goel <akash.goel@intel.com>
Cc: Vinay Belgaumkar <vinay.belgaumkar@intel.com>
Cc: Michal Winiarsky <michal.winiarsky@intel.com>
Cc: Zou Nanhai <nanhai.zou@intel.com>
Signed-off-by: Thomas Daniel <thomas.daniel@intel.com>
---
 drivers/gpu/drm/i915/i915_drv.h            |    4 +++
 drivers/gpu/drm/i915/i915_gem.c            |   51 ++++++++++++++++++++--------
 drivers/gpu/drm/i915/i915_gem_evict.c      |   38 +++++++++++++++++++++
 drivers/gpu/drm/i915/i915_gem_execbuffer.c |   16 +++++++--
 include/uapi/drm/i915_drm.h                |    9 +++--
 5 files changed, 99 insertions(+), 19 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index d90a782..e96c101 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2747,7 +2747,9 @@ void i915_gem_vma_destroy(struct i915_vma *vma);
 #define PIN_USER	(1<<4)
 #define PIN_UPDATE	(1<<5)
 #define PIN_FULL_RANGE	(1<<6)
+#define PIN_OFFSET_FIXED	(1<<7)
 #define PIN_OFFSET_MASK (~4095)
+
 int __must_check
 i915_gem_object_pin(struct drm_i915_gem_object *obj,
 		    struct i915_address_space *vm,
@@ -3085,6 +3087,8 @@ int __must_check i915_gem_evict_something(struct drm_device *dev,
 					  unsigned long start,
 					  unsigned long end,
 					  unsigned flags);
+int __must_check
+i915_gem_evict_for_vma(struct i915_vma *target);
 int i915_gem_evict_vm(struct i915_address_space *vm, bool do_idle);
 int i915_gem_evict_everything(struct drm_device *dev);
 
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index f170da6..a7e5ff2 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -3806,22 +3806,41 @@ i915_gem_object_bind_to_vm(struct drm_i915_gem_object *obj,
 	if (IS_ERR(vma))
 		goto err_unpin;
 
+	if (flags & PIN_OFFSET_FIXED) {
+		uint64_t offset = flags & PIN_OFFSET_MASK;
+		if (offset & (alignment - 1)) {
+			ret = -EINVAL;
+			goto err_free_vma;
+		}
+		vma->node.start = offset;
+		vma->node.size = size;
+		vma->node.color = obj->cache_level;
+		ret = drm_mm_reserve_node(&vm->mm, &vma->node);
+		if (ret) {
+			ret = i915_gem_evict_for_vma(vma);
+			if (ret == 0)
+				ret = drm_mm_reserve_node(&vm->mm, &vma->node);
+		}
+		if (ret)
+			goto err_free_vma;
+	} else {
 search_free:
-	ret = drm_mm_insert_node_in_range_generic(&vm->mm, &vma->node,
-						  size, alignment,
-						  obj->cache_level,
-						  start, end,
-						  search_flag,
-						  alloc_flag);
-	if (ret) {
-		ret = i915_gem_evict_something(dev, vm, size, alignment,
-					       obj->cache_level,
-					       start, end,
-					       flags);
-		if (ret == 0)
-			goto search_free;
+		ret = drm_mm_insert_node_in_range_generic(&vm->mm, &vma->node,
+						  	  size, alignment,
+						  	  obj->cache_level,
+						  	  start, end,
+						  	  search_flag,
+						  	  alloc_flag);
+		if (ret) {
+			ret = i915_gem_evict_something(dev, vm, size, alignment,
+					       	       obj->cache_level,
+					       	       start, end,
+					       	       flags);
+			if (ret == 0)
+				goto search_free;
 
-		goto err_free_vma;
+			goto err_free_vma;
+		}
 	}
 	if (WARN_ON(!i915_gem_valid_gtt_space(vma, obj->cache_level))) {
 		ret = -EINVAL;
@@ -4357,6 +4376,10 @@ i915_vma_misplaced(struct i915_vma *vma, uint32_t alignment, uint64_t flags)
 	    vma->node.start < (flags & PIN_OFFSET_MASK))
 		return true;
 
+	if (flags & PIN_OFFSET_FIXED &&
+	    vma->node.start != (flags & PIN_OFFSET_MASK))
+		return true;
+
 	return false;
 }
 
diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c b/drivers/gpu/drm/i915/i915_gem_evict.c
index d09e35e..6098e2f 100644
--- a/drivers/gpu/drm/i915/i915_gem_evict.c
+++ b/drivers/gpu/drm/i915/i915_gem_evict.c
@@ -199,6 +199,44 @@ found:
 	return ret;
 }
 
+int
+i915_gem_evict_for_vma(struct i915_vma *target)
+{
+	struct drm_mm_node *node, *next;
+
+	list_for_each_entry_safe(node, next,
+			&target->vm->mm.head_node.node_list,
+			node_list) {
+		struct i915_vma *vma;
+		int ret;
+
+		if (node->start + node->size <= target->node.start)
+			continue;
+		if (node->start >= target->node.start + target->node.size)
+			break;
+
+		vma = container_of(node, typeof(*vma), node);
+
+		if (vma->pin_count) {
+			/* We may need to evict a buffer in the same batch */
+			if (!vma->exec_entry)
+				return -EBUSY;
+
+			if (vma->exec_entry->flags & EXEC_OBJECT_PINNED)
+				/* Overlapping fixed objects in the same batch */
+				return -EINVAL;
+
+			return -ENOSPC;
+		}
+
+		ret = i915_vma_unbind(vma);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
+
 /**
  * i915_gem_evict_vm - Evict all idle vmas from a vm
  * @vm: Address space to cleanse
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 3d94744..4e6955e 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -596,6 +596,8 @@ i915_gem_execbuffer_reserve_vma(struct i915_vma *vma,
 			flags |= PIN_GLOBAL | PIN_MAPPABLE;
 		if (entry->flags & __EXEC_OBJECT_NEEDS_BIAS)
 			flags |= BATCH_OFFSET_BIAS | PIN_OFFSET_BIAS;
+		if (entry->flags & EXEC_OBJECT_PINNED)
+			flags |= entry->offset | PIN_OFFSET_FIXED;
 	}
 
 	ret = i915_gem_object_pin(obj, vma->vm, entry->alignment, flags);
@@ -665,6 +667,10 @@ eb_vma_misplaced(struct i915_vma *vma)
 	    vma->node.start & (entry->alignment - 1))
 		return true;
 
+	if (entry->flags & EXEC_OBJECT_PINNED &&
+	    vma->node.start != entry->offset)
+		return true;
+
 	if (entry->flags & __EXEC_OBJECT_NEEDS_BIAS &&
 	    vma->node.start < BATCH_OFFSET_BIAS)
 		return true;
@@ -690,6 +696,7 @@ i915_gem_execbuffer_reserve(struct intel_engine_cs *ring,
 	struct i915_vma *vma;
 	struct i915_address_space *vm;
 	struct list_head ordered_vmas;
+	struct list_head pinned_vmas;
 	bool has_fenced_gpu_access = INTEL_INFO(ring->dev)->gen < 4;
 	int retry;
 
@@ -698,6 +705,7 @@ i915_gem_execbuffer_reserve(struct intel_engine_cs *ring,
 	vm = list_first_entry(vmas, struct i915_vma, exec_list)->vm;
 
 	INIT_LIST_HEAD(&ordered_vmas);
+	INIT_LIST_HEAD(&pinned_vmas);
 	while (!list_empty(vmas)) {
 		struct drm_i915_gem_exec_object2 *entry;
 		bool need_fence, need_mappable;
@@ -716,7 +724,9 @@ i915_gem_execbuffer_reserve(struct intel_engine_cs *ring,
 			obj->tiling_mode != I915_TILING_NONE;
 		need_mappable = need_fence || need_reloc_mappable(vma);
 
-		if (need_mappable) {
+		if (entry->flags & EXEC_OBJECT_PINNED)
+			list_move_tail(&vma->exec_list, &pinned_vmas);
+		else if (need_mappable) {
 			entry->flags |= __EXEC_OBJECT_NEEDS_MAP;
 			list_move(&vma->exec_list, &ordered_vmas);
 		} else
@@ -726,6 +736,7 @@ i915_gem_execbuffer_reserve(struct intel_engine_cs *ring,
 		obj->base.pending_write_domain = 0;
 	}
 	list_splice(&ordered_vmas, vmas);
+	list_splice(&pinned_vmas, vmas);
 
 	/* Attempt to pin all of the buffers into the GTT.
 	 * This is done in 3 phases:
@@ -1404,7 +1415,8 @@ eb_get_batch(struct eb_vmas *eb)
 	 * Note that actual hangs have only been observed on gen7, but for
 	 * paranoia do it everywhere.
 	 */
-	vma->exec_entry->flags |= __EXEC_OBJECT_NEEDS_BIAS;
+	if ((vma->exec_entry->flags & EXEC_OBJECT_PINNED) == 0)
+		vma->exec_entry->flags |= __EXEC_OBJECT_NEEDS_BIAS;
 
 	return vma->obj;
 }
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index 55ba527..18f8f99 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -677,8 +677,10 @@ struct drm_i915_gem_exec_object2 {
 	__u64 alignment;
 
 	/**
-	 * Returned value of the updated offset of the object, for future
-	 * presumed_offset writes.
+	 * When the EXEC_OBJECT_PINNED flag is specified this is populated by
+	 * the user with the GTT offset at which this object will be pinned.
+	 * Otherwise the kernel populates it with the value of the updated
+	 * offset of the object, for future presumed_offset writes.
 	 */
 	__u64 offset;
 
@@ -686,7 +688,8 @@ struct drm_i915_gem_exec_object2 {
 #define EXEC_OBJECT_NEEDS_GTT	(1<<1)
 #define EXEC_OBJECT_WRITE	(1<<2)
 #define EXEC_OBJECT_SUPPORTS_48BBADDRESS (1<<3)
-#define __EXEC_OBJECT_UNKNOWN_FLAGS -(EXEC_OBJECT_SUPPORTS_48BBADDRESS<<1)
+#define EXEC_OBJECT_PINNED	(1<<4)
+#define __EXEC_OBJECT_UNKNOWN_FLAGS -(EXEC_OBJECT_PINNED<<1)
 	__u64 flags;
 
 	__u64 rsvd1;
-- 
1.7.9.5

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 32+ messages in thread

* Re: [PATCH v4] drm/i915: Add soft-pinning API for execbuffer
  2015-06-30 14:13 ` [PATCH v4] " Thomas Daniel
@ 2015-06-30 14:20   ` Daniel, Thomas
  2015-07-15 14:55     ` Goel, Akash
  2015-07-04  5:29   ` Kristian Høgsberg
                     ` (2 subsequent siblings)
  3 siblings, 1 reply; 32+ messages in thread
From: Daniel, Thomas @ 2015-06-30 14:20 UTC (permalink / raw)
  To: intel-gfx; +Cc: Belgaumkar, Vinay, Goel, Akash

Many apologies to Michal for incorrectly spelling his name in the CC list.

Thomas.

> -----Original Message-----
> From: Daniel, Thomas
> Sent: Tuesday, June 30, 2015 3:13 PM
> To: intel-gfx@lists.freedesktop.org
> Cc: Chris Wilson; Goel, Akash; Belgaumkar, Vinay; Michal Winiarsky; Zou,
> Nanhai; Daniel, Thomas
> Subject: [PATCH v4] drm/i915: Add soft-pinning API for execbuffer
> 
> From: Chris Wilson <chris@chris-wilson.co.uk>
> 
> Userspace can pass in an offset that it presumes the object is located
> at. The kernel will then do its utmost to fit the object into that
> location. The assumption is that userspace is handling its own object
> locations (for example along with full-ppgtt) and that the kernel will
> rarely have to make space for the user's requests.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> 
> v2: Fixed incorrect eviction found by Michal Winiarski - fix suggested by Chris
> Wilson.  Fixed incorrect error paths causing crash found by Michal Winiarski.
> (Not published externally)
> 
> v3: Rebased because of trivial conflict in object_bind_to_vm.  Fixed eviction
> to allow eviction of soft-pinned objects when another soft-pinned object used
> by a subsequent execbuffer overlaps reported by Michal Winiarski.
> (Not published externally)
> 
> v4: Moved soft-pinned objects to the front of ordered_vmas so that they are
> pinned first after an address conflict happens to avoid repeated conflicts in
> rare cases (Suggested by Chris Wilson).  Expanded comment on
> drm_i915_gem_exec_object2.offset to cover this new API.
> 
> Cc: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Akash Goel <akash.goel@intel.com>
> Cc: Vinay Belgaumkar <vinay.belgaumkar@intel.com>
> Cc: Michal Winiarsky <michal.winiarsky@intel.com>
> Cc: Zou Nanhai <nanhai.zou@intel.com>
> Signed-off-by: Thomas Daniel <thomas.daniel@intel.com>
> ---
>  drivers/gpu/drm/i915/i915_drv.h            |    4 +++
>  drivers/gpu/drm/i915/i915_gem.c            |   51 ++++++++++++++++++++--------
>  drivers/gpu/drm/i915/i915_gem_evict.c      |   38 +++++++++++++++++++++
>  drivers/gpu/drm/i915/i915_gem_execbuffer.c |   16 +++++++--
>  include/uapi/drm/i915_drm.h                |    9 +++--
>  5 files changed, 99 insertions(+), 19 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index d90a782..e96c101 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -2747,7 +2747,9 @@ void i915_gem_vma_destroy(struct i915_vma *vma);
>  #define PIN_USER	(1<<4)
>  #define PIN_UPDATE	(1<<5)
>  #define PIN_FULL_RANGE	(1<<6)
> +#define PIN_OFFSET_FIXED	(1<<7)
>  #define PIN_OFFSET_MASK (~4095)
> +
>  int __must_check
>  i915_gem_object_pin(struct drm_i915_gem_object *obj,
>  		    struct i915_address_space *vm,
> @@ -3085,6 +3087,8 @@ int __must_check i915_gem_evict_something(struct
> drm_device *dev,
>  					  unsigned long start,
>  					  unsigned long end,
>  					  unsigned flags);
> +int __must_check
> +i915_gem_evict_for_vma(struct i915_vma *target);
>  int i915_gem_evict_vm(struct i915_address_space *vm, bool do_idle);
>  int i915_gem_evict_everything(struct drm_device *dev);
> 
> diff --git a/drivers/gpu/drm/i915/i915_gem.c
> b/drivers/gpu/drm/i915/i915_gem.c
> index f170da6..a7e5ff2 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -3806,22 +3806,41 @@ i915_gem_object_bind_to_vm(struct
> drm_i915_gem_object *obj,
>  	if (IS_ERR(vma))
>  		goto err_unpin;
> 
> +	if (flags & PIN_OFFSET_FIXED) {
> +		uint64_t offset = flags & PIN_OFFSET_MASK;
> +		if (offset & (alignment - 1)) {
> +			ret = -EINVAL;
> +			goto err_free_vma;
> +		}
> +		vma->node.start = offset;
> +		vma->node.size = size;
> +		vma->node.color = obj->cache_level;
> +		ret = drm_mm_reserve_node(&vm->mm, &vma->node);
> +		if (ret) {
> +			ret = i915_gem_evict_for_vma(vma);
> +			if (ret == 0)
> +				ret = drm_mm_reserve_node(&vm->mm,
> &vma->node);
> +		}
> +		if (ret)
> +			goto err_free_vma;
> +	} else {
>  search_free:
> -	ret = drm_mm_insert_node_in_range_generic(&vm->mm, &vma->node,
> -						  size, alignment,
> -						  obj->cache_level,
> -						  start, end,
> -						  search_flag,
> -						  alloc_flag);
> -	if (ret) {
> -		ret = i915_gem_evict_something(dev, vm, size, alignment,
> -					       obj->cache_level,
> -					       start, end,
> -					       flags);
> -		if (ret == 0)
> -			goto search_free;
> +		ret = drm_mm_insert_node_in_range_generic(&vm->mm,
> &vma->node,
> +						  	  size, alignment,
> +						  	  obj->cache_level,
> +						  	  start, end,
> +						  	  search_flag,
> +						  	  alloc_flag);
> +		if (ret) {
> +			ret = i915_gem_evict_something(dev, vm, size,
> alignment,
> +					       	       obj->cache_level,
> +					       	       start, end,
> +					       	       flags);
> +			if (ret == 0)
> +				goto search_free;
> 
> -		goto err_free_vma;
> +			goto err_free_vma;
> +		}
>  	}
>  	if (WARN_ON(!i915_gem_valid_gtt_space(vma, obj->cache_level))) {
>  		ret = -EINVAL;
> @@ -4357,6 +4376,10 @@ i915_vma_misplaced(struct i915_vma *vma,
> uint32_t alignment, uint64_t flags)
>  	    vma->node.start < (flags & PIN_OFFSET_MASK))
>  		return true;
> 
> +	if (flags & PIN_OFFSET_FIXED &&
> +	    vma->node.start != (flags & PIN_OFFSET_MASK))
> +		return true;
> +
>  	return false;
>  }
> 
> diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c
> b/drivers/gpu/drm/i915/i915_gem_evict.c
> index d09e35e..6098e2f 100644
> --- a/drivers/gpu/drm/i915/i915_gem_evict.c
> +++ b/drivers/gpu/drm/i915/i915_gem_evict.c
> @@ -199,6 +199,44 @@ found:
>  	return ret;
>  }
> 
> +int
> +i915_gem_evict_for_vma(struct i915_vma *target)
> +{
> +	struct drm_mm_node *node, *next;
> +
> +	list_for_each_entry_safe(node, next,
> +			&target->vm->mm.head_node.node_list,
> +			node_list) {
> +		struct i915_vma *vma;
> +		int ret;
> +
> +		if (node->start + node->size <= target->node.start)
> +			continue;
> +		if (node->start >= target->node.start + target->node.size)
> +			break;
> +
> +		vma = container_of(node, typeof(*vma), node);
> +
> +		if (vma->pin_count) {
> +			/* We may need to evict a buffer in the same batch */
> +			if (!vma->exec_entry)
> +				return -EBUSY;
> +
> +			if (vma->exec_entry->flags & EXEC_OBJECT_PINNED)
> +				/* Overlapping fixed objects in the same batch
> */
> +				return -EINVAL;
> +
> +			return -ENOSPC;
> +		}
> +
> +		ret = i915_vma_unbind(vma);
> +		if (ret)
> +			return ret;
> +	}
> +
> +	return 0;
> +}
> +
>  /**
>   * i915_gem_evict_vm - Evict all idle vmas from a vm
>   * @vm: Address space to cleanse
> diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> index 3d94744..4e6955e 100644
> --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> @@ -596,6 +596,8 @@ i915_gem_execbuffer_reserve_vma(struct i915_vma
> *vma,
>  			flags |= PIN_GLOBAL | PIN_MAPPABLE;
>  		if (entry->flags & __EXEC_OBJECT_NEEDS_BIAS)
>  			flags |= BATCH_OFFSET_BIAS | PIN_OFFSET_BIAS;
> +		if (entry->flags & EXEC_OBJECT_PINNED)
> +			flags |= entry->offset | PIN_OFFSET_FIXED;
>  	}
> 
>  	ret = i915_gem_object_pin(obj, vma->vm, entry->alignment, flags);
> @@ -665,6 +667,10 @@ eb_vma_misplaced(struct i915_vma *vma)
>  	    vma->node.start & (entry->alignment - 1))
>  		return true;
> 
> +	if (entry->flags & EXEC_OBJECT_PINNED &&
> +	    vma->node.start != entry->offset)
> +		return true;
> +
>  	if (entry->flags & __EXEC_OBJECT_NEEDS_BIAS &&
>  	    vma->node.start < BATCH_OFFSET_BIAS)
>  		return true;
> @@ -690,6 +696,7 @@ i915_gem_execbuffer_reserve(struct intel_engine_cs
> *ring,
>  	struct i915_vma *vma;
>  	struct i915_address_space *vm;
>  	struct list_head ordered_vmas;
> +	struct list_head pinned_vmas;
>  	bool has_fenced_gpu_access = INTEL_INFO(ring->dev)->gen < 4;
>  	int retry;
> 
> @@ -698,6 +705,7 @@ i915_gem_execbuffer_reserve(struct intel_engine_cs
> *ring,
>  	vm = list_first_entry(vmas, struct i915_vma, exec_list)->vm;
> 
>  	INIT_LIST_HEAD(&ordered_vmas);
> +	INIT_LIST_HEAD(&pinned_vmas);
>  	while (!list_empty(vmas)) {
>  		struct drm_i915_gem_exec_object2 *entry;
>  		bool need_fence, need_mappable;
> @@ -716,7 +724,9 @@ i915_gem_execbuffer_reserve(struct intel_engine_cs
> *ring,
>  			obj->tiling_mode != I915_TILING_NONE;
>  		need_mappable = need_fence || need_reloc_mappable(vma);
> 
> -		if (need_mappable) {
> +		if (entry->flags & EXEC_OBJECT_PINNED)
> +			list_move_tail(&vma->exec_list, &pinned_vmas);
> +		else if (need_mappable) {
>  			entry->flags |= __EXEC_OBJECT_NEEDS_MAP;
>  			list_move(&vma->exec_list, &ordered_vmas);
>  		} else
> @@ -726,6 +736,7 @@ i915_gem_execbuffer_reserve(struct intel_engine_cs
> *ring,
>  		obj->base.pending_write_domain = 0;
>  	}
>  	list_splice(&ordered_vmas, vmas);
> +	list_splice(&pinned_vmas, vmas);
> 
>  	/* Attempt to pin all of the buffers into the GTT.
>  	 * This is done in 3 phases:
> @@ -1404,7 +1415,8 @@ eb_get_batch(struct eb_vmas *eb)
>  	 * Note that actual hangs have only been observed on gen7, but for
>  	 * paranoia do it everywhere.
>  	 */
> -	vma->exec_entry->flags |= __EXEC_OBJECT_NEEDS_BIAS;
> +	if ((vma->exec_entry->flags & EXEC_OBJECT_PINNED) == 0)
> +		vma->exec_entry->flags |= __EXEC_OBJECT_NEEDS_BIAS;
> 
>  	return vma->obj;
>  }
> diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
> index 55ba527..18f8f99 100644
> --- a/include/uapi/drm/i915_drm.h
> +++ b/include/uapi/drm/i915_drm.h
> @@ -677,8 +677,10 @@ struct drm_i915_gem_exec_object2 {
>  	__u64 alignment;
> 
>  	/**
> -	 * Returned value of the updated offset of the object, for future
> -	 * presumed_offset writes.
> +	 * When the EXEC_OBJECT_PINNED flag is specified this is populated by
> +	 * the user with the GTT offset at which this object will be pinned.
> +	 * Otherwise the kernel populates it with the value of the updated
> +	 * offset of the object, for future presumed_offset writes.
>  	 */
>  	__u64 offset;
> 
> @@ -686,7 +688,8 @@ struct drm_i915_gem_exec_object2 {
>  #define EXEC_OBJECT_NEEDS_GTT	(1<<1)
>  #define EXEC_OBJECT_WRITE	(1<<2)
>  #define EXEC_OBJECT_SUPPORTS_48BBADDRESS (1<<3)
> -#define __EXEC_OBJECT_UNKNOWN_FLAGS -
> (EXEC_OBJECT_SUPPORTS_48BBADDRESS<<1)
> +#define EXEC_OBJECT_PINNED	(1<<4)
> +#define __EXEC_OBJECT_UNKNOWN_FLAGS -(EXEC_OBJECT_PINNED<<1)
>  	__u64 flags;
> 
>  	__u64 rsvd1;
> --
> 1.7.9.5

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCH v4] drm/i915: Add soft-pinning API for execbuffer
  2015-06-30 14:13 ` [PATCH v4] " Thomas Daniel
  2015-06-30 14:20   ` Daniel, Thomas
@ 2015-07-04  5:29   ` Kristian Høgsberg
  2015-07-04 12:23     ` Chris Wilson
  2015-07-04  7:53   ` Chris Wilson
  2015-07-20 16:41   ` [PATCH v5] " Thomas Daniel
  3 siblings, 1 reply; 32+ messages in thread
From: Kristian Høgsberg @ 2015-07-04  5:29 UTC (permalink / raw)
  To: Thomas Daniel; +Cc: Vinay Belgaumkar, intel-gfx, Michal Winiarsky, Akash Goel

On Tue, Jun 30, 2015 at 7:13 AM, Thomas Daniel <thomas.daniel@intel.com> wrote:
> From: Chris Wilson <chris@chris-wilson.co.uk>
>
> Userspace can pass in an offset that it presumes the object is located
> at. The kernel will then do its utmost to fit the object into that
> location. The assumption is that userspace is handling its own object
> locations (for example along with full-ppgtt) and that the kernel will
> rarely have to make space for the user's requests.
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
>
> v2: Fixed incorrect eviction found by Michal Winiarski - fix suggested by Chris
> Wilson.  Fixed incorrect error paths causing crash found by Michal Winiarski.
> (Not published externally)
>
> v3: Rebased because of trivial conflict in object_bind_to_vm.  Fixed eviction
> to allow eviction of soft-pinned objects when another soft-pinned object used
> by a subsequent execbuffer overlaps reported by Michal Winiarski.
> (Not published externally)
>
> v4: Moved soft-pinned objects to the front of ordered_vmas so that they are
> pinned first after an address conflict happens to avoid repeated conflicts in
> rare cases (Suggested by Chris Wilson).  Expanded comment on
> drm_i915_gem_exec_object2.offset to cover this new API.

Can we add an I915_PARAM_HAS_EXEC_PINNED param for this feature?

Kristian

> Cc: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Akash Goel <akash.goel@intel.com>
> Cc: Vinay Belgaumkar <vinay.belgaumkar@intel.com>
> Cc: Michal Winiarsky <michal.winiarsky@intel.com>
> Cc: Zou Nanhai <nanhai.zou@intel.com>
> Signed-off-by: Thomas Daniel <thomas.daniel@intel.com>
> ---
>  drivers/gpu/drm/i915/i915_drv.h            |    4 +++
>  drivers/gpu/drm/i915/i915_gem.c            |   51 ++++++++++++++++++++--------
>  drivers/gpu/drm/i915/i915_gem_evict.c      |   38 +++++++++++++++++++++
>  drivers/gpu/drm/i915/i915_gem_execbuffer.c |   16 +++++++--
>  include/uapi/drm/i915_drm.h                |    9 +++--
>  5 files changed, 99 insertions(+), 19 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index d90a782..e96c101 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -2747,7 +2747,9 @@ void i915_gem_vma_destroy(struct i915_vma *vma);
>  #define PIN_USER       (1<<4)
>  #define PIN_UPDATE     (1<<5)
>  #define PIN_FULL_RANGE (1<<6)
> +#define PIN_OFFSET_FIXED       (1<<7)
>  #define PIN_OFFSET_MASK (~4095)
> +
>  int __must_check
>  i915_gem_object_pin(struct drm_i915_gem_object *obj,
>                     struct i915_address_space *vm,
> @@ -3085,6 +3087,8 @@ int __must_check i915_gem_evict_something(struct drm_device *dev,
>                                           unsigned long start,
>                                           unsigned long end,
>                                           unsigned flags);
> +int __must_check
> +i915_gem_evict_for_vma(struct i915_vma *target);
>  int i915_gem_evict_vm(struct i915_address_space *vm, bool do_idle);
>  int i915_gem_evict_everything(struct drm_device *dev);
>
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index f170da6..a7e5ff2 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -3806,22 +3806,41 @@ i915_gem_object_bind_to_vm(struct drm_i915_gem_object *obj,
>         if (IS_ERR(vma))
>                 goto err_unpin;
>
> +       if (flags & PIN_OFFSET_FIXED) {
> +               uint64_t offset = flags & PIN_OFFSET_MASK;
> +               if (offset & (alignment - 1)) {
> +                       ret = -EINVAL;
> +                       goto err_free_vma;
> +               }
> +               vma->node.start = offset;
> +               vma->node.size = size;
> +               vma->node.color = obj->cache_level;
> +               ret = drm_mm_reserve_node(&vm->mm, &vma->node);
> +               if (ret) {
> +                       ret = i915_gem_evict_for_vma(vma);
> +                       if (ret == 0)
> +                               ret = drm_mm_reserve_node(&vm->mm, &vma->node);
> +               }
> +               if (ret)
> +                       goto err_free_vma;
> +       } else {
>  search_free:
> -       ret = drm_mm_insert_node_in_range_generic(&vm->mm, &vma->node,
> -                                                 size, alignment,
> -                                                 obj->cache_level,
> -                                                 start, end,
> -                                                 search_flag,
> -                                                 alloc_flag);
> -       if (ret) {
> -               ret = i915_gem_evict_something(dev, vm, size, alignment,
> -                                              obj->cache_level,
> -                                              start, end,
> -                                              flags);
> -               if (ret == 0)
> -                       goto search_free;
> +               ret = drm_mm_insert_node_in_range_generic(&vm->mm, &vma->node,
> +                                                         size, alignment,
> +                                                         obj->cache_level,
> +                                                         start, end,
> +                                                         search_flag,
> +                                                         alloc_flag);
> +               if (ret) {
> +                       ret = i915_gem_evict_something(dev, vm, size, alignment,
> +                                                      obj->cache_level,
> +                                                      start, end,
> +                                                      flags);
> +                       if (ret == 0)
> +                               goto search_free;
>
> -               goto err_free_vma;
> +                       goto err_free_vma;
> +               }
>         }
>         if (WARN_ON(!i915_gem_valid_gtt_space(vma, obj->cache_level))) {
>                 ret = -EINVAL;
> @@ -4357,6 +4376,10 @@ i915_vma_misplaced(struct i915_vma *vma, uint32_t alignment, uint64_t flags)
>             vma->node.start < (flags & PIN_OFFSET_MASK))
>                 return true;
>
> +       if (flags & PIN_OFFSET_FIXED &&
> +           vma->node.start != (flags & PIN_OFFSET_MASK))
> +               return true;
> +
>         return false;
>  }
>
> diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c b/drivers/gpu/drm/i915/i915_gem_evict.c
> index d09e35e..6098e2f 100644
> --- a/drivers/gpu/drm/i915/i915_gem_evict.c
> +++ b/drivers/gpu/drm/i915/i915_gem_evict.c
> @@ -199,6 +199,44 @@ found:
>         return ret;
>  }
>
> +int
> +i915_gem_evict_for_vma(struct i915_vma *target)
> +{
> +       struct drm_mm_node *node, *next;
> +
> +       list_for_each_entry_safe(node, next,
> +                       &target->vm->mm.head_node.node_list,
> +                       node_list) {
> +               struct i915_vma *vma;
> +               int ret;
> +
> +               if (node->start + node->size <= target->node.start)
> +                       continue;
> +               if (node->start >= target->node.start + target->node.size)
> +                       break;
> +
> +               vma = container_of(node, typeof(*vma), node);
> +
> +               if (vma->pin_count) {
> +                       /* We may need to evict a buffer in the same batch */
> +                       if (!vma->exec_entry)
> +                               return -EBUSY;
> +
> +                       if (vma->exec_entry->flags & EXEC_OBJECT_PINNED)
> +                               /* Overlapping fixed objects in the same batch */
> +                               return -EINVAL;
> +
> +                       return -ENOSPC;
> +               }
> +
> +               ret = i915_vma_unbind(vma);
> +               if (ret)
> +                       return ret;
> +       }
> +
> +       return 0;
> +}
> +
>  /**
>   * i915_gem_evict_vm - Evict all idle vmas from a vm
>   * @vm: Address space to cleanse
> diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> index 3d94744..4e6955e 100644
> --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> @@ -596,6 +596,8 @@ i915_gem_execbuffer_reserve_vma(struct i915_vma *vma,
>                         flags |= PIN_GLOBAL | PIN_MAPPABLE;
>                 if (entry->flags & __EXEC_OBJECT_NEEDS_BIAS)
>                         flags |= BATCH_OFFSET_BIAS | PIN_OFFSET_BIAS;
> +               if (entry->flags & EXEC_OBJECT_PINNED)
> +                       flags |= entry->offset | PIN_OFFSET_FIXED;
>         }
>
>         ret = i915_gem_object_pin(obj, vma->vm, entry->alignment, flags);
> @@ -665,6 +667,10 @@ eb_vma_misplaced(struct i915_vma *vma)
>             vma->node.start & (entry->alignment - 1))
>                 return true;
>
> +       if (entry->flags & EXEC_OBJECT_PINNED &&
> +           vma->node.start != entry->offset)
> +               return true;
> +
>         if (entry->flags & __EXEC_OBJECT_NEEDS_BIAS &&
>             vma->node.start < BATCH_OFFSET_BIAS)
>                 return true;
> @@ -690,6 +696,7 @@ i915_gem_execbuffer_reserve(struct intel_engine_cs *ring,
>         struct i915_vma *vma;
>         struct i915_address_space *vm;
>         struct list_head ordered_vmas;
> +       struct list_head pinned_vmas;
>         bool has_fenced_gpu_access = INTEL_INFO(ring->dev)->gen < 4;
>         int retry;
>
> @@ -698,6 +705,7 @@ i915_gem_execbuffer_reserve(struct intel_engine_cs *ring,
>         vm = list_first_entry(vmas, struct i915_vma, exec_list)->vm;
>
>         INIT_LIST_HEAD(&ordered_vmas);
> +       INIT_LIST_HEAD(&pinned_vmas);
>         while (!list_empty(vmas)) {
>                 struct drm_i915_gem_exec_object2 *entry;
>                 bool need_fence, need_mappable;
> @@ -716,7 +724,9 @@ i915_gem_execbuffer_reserve(struct intel_engine_cs *ring,
>                         obj->tiling_mode != I915_TILING_NONE;
>                 need_mappable = need_fence || need_reloc_mappable(vma);
>
> -               if (need_mappable) {
> +               if (entry->flags & EXEC_OBJECT_PINNED)
> +                       list_move_tail(&vma->exec_list, &pinned_vmas);
> +               else if (need_mappable) {
>                         entry->flags |= __EXEC_OBJECT_NEEDS_MAP;
>                         list_move(&vma->exec_list, &ordered_vmas);
>                 } else
> @@ -726,6 +736,7 @@ i915_gem_execbuffer_reserve(struct intel_engine_cs *ring,
>                 obj->base.pending_write_domain = 0;
>         }
>         list_splice(&ordered_vmas, vmas);
> +       list_splice(&pinned_vmas, vmas);
>
>         /* Attempt to pin all of the buffers into the GTT.
>          * This is done in 3 phases:
> @@ -1404,7 +1415,8 @@ eb_get_batch(struct eb_vmas *eb)
>          * Note that actual hangs have only been observed on gen7, but for
>          * paranoia do it everywhere.
>          */
> -       vma->exec_entry->flags |= __EXEC_OBJECT_NEEDS_BIAS;
> +       if ((vma->exec_entry->flags & EXEC_OBJECT_PINNED) == 0)
> +               vma->exec_entry->flags |= __EXEC_OBJECT_NEEDS_BIAS;
>
>         return vma->obj;
>  }
> diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
> index 55ba527..18f8f99 100644
> --- a/include/uapi/drm/i915_drm.h
> +++ b/include/uapi/drm/i915_drm.h
> @@ -677,8 +677,10 @@ struct drm_i915_gem_exec_object2 {
>         __u64 alignment;
>
>         /**
> -        * Returned value of the updated offset of the object, for future
> -        * presumed_offset writes.
> +        * When the EXEC_OBJECT_PINNED flag is specified this is populated by
> +        * the user with the GTT offset at which this object will be pinned.
> +        * Otherwise the kernel populates it with the value of the updated
> +        * offset of the object, for future presumed_offset writes.
>          */
>         __u64 offset;
>
> @@ -686,7 +688,8 @@ struct drm_i915_gem_exec_object2 {
>  #define EXEC_OBJECT_NEEDS_GTT  (1<<1)
>  #define EXEC_OBJECT_WRITE      (1<<2)
>  #define EXEC_OBJECT_SUPPORTS_48BBADDRESS (1<<3)
> -#define __EXEC_OBJECT_UNKNOWN_FLAGS -(EXEC_OBJECT_SUPPORTS_48BBADDRESS<<1)
> +#define EXEC_OBJECT_PINNED     (1<<4)
> +#define __EXEC_OBJECT_UNKNOWN_FLAGS -(EXEC_OBJECT_PINNED<<1)
>         __u64 flags;
>
>         __u64 rsvd1;
> --
> 1.7.9.5
>
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/intel-gfx
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCH v4] drm/i915: Add soft-pinning API for execbuffer
  2015-06-30 14:13 ` [PATCH v4] " Thomas Daniel
  2015-06-30 14:20   ` Daniel, Thomas
  2015-07-04  5:29   ` Kristian Høgsberg
@ 2015-07-04  7:53   ` Chris Wilson
  2015-07-20 16:41   ` [PATCH v5] " Thomas Daniel
  3 siblings, 0 replies; 32+ messages in thread
From: Chris Wilson @ 2015-07-04  7:53 UTC (permalink / raw)
  To: Thomas Daniel; +Cc: Vinay Belgaumkar, intel-gfx, Michal Winiarsky, Akash Goel

On Tue, Jun 30, 2015 at 03:13:13PM +0100, Thomas Daniel wrote:
>  	/**
> -	 * Returned value of the updated offset of the object, for future
> -	 * presumed_offset writes.
> +	 * When the EXEC_OBJECT_PINNED flag is specified this is populated by
> +	 * the user with the GTT offset at which this object will be pinned.
> +	 * Otherwise the kernel populates it with the value of the updated
> +	 * offset of the object, for future presumed_offset writes.
>  	 */

This isn't strictly true, even without PINNED but NO_RELOC this field must
be accurately populated with the presumed_offset. No matter what, the
kernel returns the value of the current address after execbuffer.
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCH v4] drm/i915: Add soft-pinning API for execbuffer
  2015-07-04  5:29   ` Kristian Høgsberg
@ 2015-07-04 12:23     ` Chris Wilson
  2015-07-08 15:04       ` Daniel, Thomas
  0 siblings, 1 reply; 32+ messages in thread
From: Chris Wilson @ 2015-07-04 12:23 UTC (permalink / raw)
  To: Kristian Høgsberg
  Cc: intel-gfx, Michal Winiarsky, Akash Goel, Vinay Belgaumkar

On Fri, Jul 03, 2015 at 10:29:44PM -0700, Kristian Høgsberg wrote:
> On Tue, Jun 30, 2015 at 7:13 AM, Thomas Daniel <thomas.daniel@intel.com> wrote:
> > From: Chris Wilson <chris@chris-wilson.co.uk>
> >
> > Userspace can pass in an offset that it presumes the object is located
> > at. The kernel will then do its utmost to fit the object into that
> > location. The assumption is that userspace is handling its own object
> > locations (for example along with full-ppgtt) and that the kernel will
> > rarely have to make space for the user's requests.
> >
> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> >
> > v2: Fixed incorrect eviction found by Michal Winiarski - fix suggested by Chris
> > Wilson.  Fixed incorrect error paths causing crash found by Michal Winiarski.
> > (Not published externally)
> >
> > v3: Rebased because of trivial conflict in object_bind_to_vm.  Fixed eviction
> > to allow eviction of soft-pinned objects when another soft-pinned object used
> > by a subsequent execbuffer overlaps reported by Michal Winiarski.
> > (Not published externally)
> >
> > v4: Moved soft-pinned objects to the front of ordered_vmas so that they are
> > pinned first after an address conflict happens to avoid repeated conflicts in
> > rare cases (Suggested by Chris Wilson).  Expanded comment on
> > drm_i915_gem_exec_object2.offset to cover this new API.

Note this patch is outdated compared to the one I have in my tree. There
are also requirements to improve drm_mm_reserve_node().
 
> Can we add an I915_PARAM_HAS_EXEC_PINNED param for this feature?

Yeah, it's not that difficult to test,

static bool test_has_softpin(int fd)
{
   struct drm_i915_gem_create create;
   struct drm_i915_gem_exec_object2 object;
   struct drm_i915_gem_pwrite pwrite;
   struct drm_i915_gem_execbuffer2 execbuf;
   uint32_t batch[2] = { 0xa << 23 };
   bool ret = false;

   if (DBG_NO_SOFTPIN)
      return DBG_NO_SOFTPIN < 0;

   if (gem_param(fd, I915_PARAM_HAS_ALIASING_PPGTT) < 2)
      return false;

   memset(&create, 0, sizeof(create));
   create.size = 4096;
   drmIoctl(fd, DRM_IOCTL_I915_GEM_CREATE, &create);

   memset(&pwrite, 0, sizeof(pwrite));
   pwrite.handle = create.handle;
   pwrite.offset = 0;
   pwrite.size = sizeof(batch);
   pwrite.data_ptr = (uintptr_t)batch;
   if (drmIoctl(fd, DRM_IOCTL_I915_GEM_PWRITE, &pwrite))
      goto fail;

   object.handle = create.handle;

   memset(&execbuf, 0, sizeof(execbuf));
   execbuf.buffers_ptr = (uintptr_t)&object;
   execbuf.buffer_count = 1;
   if (drmIoctl(fd, DRM_IOCTL_I915_GEM_EXECBUFFER2, &execbuf))
      goto fail;

   object.flags = EXEC_OBJECT_PINNED;
   ret  = drmIoctl(fd, DRM_IOCTL_I915_GEM_EXECBUFFER2, &execbuf) == 0;
fail:
   drmIoctl(fd, DRM_IOCTL_GEM_CLOSE, &create);

   return ret;
}

but compares to

static bool test_has_softpin(int fd)
{
   if (DBG_NO_SOFTPIN)
      return DBG_NO_SOFTPIN < 0;

   if (gem_param(fd, I915_PARAM_HAS_ALIASING_PPGTT) < 2)
      return false;

   return gem_param(fd, I915_PARAM_HAS_EXEC_SOFTPIN) > 0;
}

with a parameter. At some point, we probably want to add a GETPARAMV!
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCH v4] drm/i915: Add soft-pinning API for execbuffer
  2015-07-04 12:23     ` Chris Wilson
@ 2015-07-08 15:04       ` Daniel, Thomas
  2015-07-08 15:35         ` Chris Wilson
  0 siblings, 1 reply; 32+ messages in thread
From: Daniel, Thomas @ 2015-07-08 15:04 UTC (permalink / raw)
  To: Chris Wilson, Kristian Høgsberg
  Cc: Belgaumkar, Vinay, intel-gfx, Goel, Akash

> -----Original Message-----
> From: Chris Wilson [mailto:chris@chris-wilson.co.uk]
> Sent: Saturday, July 4, 2015 1:24 PM
> To: Kristian Høgsberg
> Cc: Daniel, Thomas; Belgaumkar, Vinay; intel-gfx@lists.freedesktop.org; Michal
> Winiarsky; Goel, Akash
> Subject: Re: [Intel-gfx] [PATCH v4] drm/i915: Add soft-pinning API for execbuffer
> 
> On Fri, Jul 03, 2015 at 10:29:44PM -0700, Kristian Høgsberg wrote:
> > On Tue, Jun 30, 2015 at 7:13 AM, Thomas Daniel <thomas.daniel@intel.com>
> wrote:
> > > From: Chris Wilson <chris@chris-wilson.co.uk>
> > >
> > > Userspace can pass in an offset that it presumes the object is located
> > > at. The kernel will then do its utmost to fit the object into that
> > > location. The assumption is that userspace is handling its own object
> > > locations (for example along with full-ppgtt) and that the kernel will
> > > rarely have to make space for the user's requests.
> > >
> > > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > >
> > > v2: Fixed incorrect eviction found by Michal Winiarski - fix suggested by Chris
> > > Wilson.  Fixed incorrect error paths causing crash found by Michal Winiarski.
> > > (Not published externally)
> > >
> > > v3: Rebased because of trivial conflict in object_bind_to_vm.  Fixed eviction
> > > to allow eviction of soft-pinned objects when another soft-pinned object
> used
> > > by a subsequent execbuffer overlaps reported by Michal Winiarski.
> > > (Not published externally)
> > >
> > > v4: Moved soft-pinned objects to the front of ordered_vmas so that they are
> > > pinned first after an address conflict happens to avoid repeated conflicts in
> > > rare cases (Suggested by Chris Wilson).  Expanded comment on
> > > drm_i915_gem_exec_object2.offset to cover this new API.
> 
> Note this patch is outdated compared to the one I have in my tree. There
> are also requirements to improve drm_mm_reserve_node().
What requirements are these?

> > Can we add an I915_PARAM_HAS_EXEC_PINNED param for this feature?
> 
> Yeah, it's not that difficult to test,
> 
> static bool test_has_softpin(int fd)
> {
>    struct drm_i915_gem_create create;
>    struct drm_i915_gem_exec_object2 object;
>    struct drm_i915_gem_pwrite pwrite;
>    struct drm_i915_gem_execbuffer2 execbuf;
>    uint32_t batch[2] = { 0xa << 23 };
>    bool ret = false;
> 
>    if (DBG_NO_SOFTPIN)
>       return DBG_NO_SOFTPIN < 0;
> 
>    if (gem_param(fd, I915_PARAM_HAS_ALIASING_PPGTT) < 2)
>       return false;
> 
>    memset(&create, 0, sizeof(create));
>    create.size = 4096;
>    drmIoctl(fd, DRM_IOCTL_I915_GEM_CREATE, &create);
> 
>    memset(&pwrite, 0, sizeof(pwrite));
>    pwrite.handle = create.handle;
>    pwrite.offset = 0;
>    pwrite.size = sizeof(batch);
>    pwrite.data_ptr = (uintptr_t)batch;
>    if (drmIoctl(fd, DRM_IOCTL_I915_GEM_PWRITE, &pwrite))
>       goto fail;
> 
>    object.handle = create.handle;
> 
>    memset(&execbuf, 0, sizeof(execbuf));
>    execbuf.buffers_ptr = (uintptr_t)&object;
>    execbuf.buffer_count = 1;
>    if (drmIoctl(fd, DRM_IOCTL_I915_GEM_EXECBUFFER2, &execbuf))
>       goto fail;
> 
>    object.flags = EXEC_OBJECT_PINNED;
>    ret  = drmIoctl(fd, DRM_IOCTL_I915_GEM_EXECBUFFER2, &execbuf) == 0;
> fail:
>    drmIoctl(fd, DRM_IOCTL_GEM_CLOSE, &create);
> 
>    return ret;
> }
> 
> but compares to
> 
> static bool test_has_softpin(int fd)
> {
>    if (DBG_NO_SOFTPIN)
>       return DBG_NO_SOFTPIN < 0;
> 
>    if (gem_param(fd, I915_PARAM_HAS_ALIASING_PPGTT) < 2)
>       return false;
> 
>    return gem_param(fd, I915_PARAM_HAS_EXEC_SOFTPIN) > 0;
> }
> 
> with a parameter. At some point, we probably want to add a GETPARAMV!
Yes, a parameter would be cleaner.

Thomas.


_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCH v4] drm/i915: Add soft-pinning API for execbuffer
  2015-07-08 15:04       ` Daniel, Thomas
@ 2015-07-08 15:35         ` Chris Wilson
  0 siblings, 0 replies; 32+ messages in thread
From: Chris Wilson @ 2015-07-08 15:35 UTC (permalink / raw)
  To: Daniel, Thomas; +Cc: Belgaumkar, Vinay, intel-gfx, Goel, Akash

On Wed, Jul 08, 2015 at 03:04:45PM +0000, Daniel, Thomas wrote:
> > -----Original Message-----
> > From: Chris Wilson [mailto:chris@chris-wilson.co.uk]
> > Sent: Saturday, July 4, 2015 1:24 PM
> > To: Kristian Høgsberg
> > Cc: Daniel, Thomas; Belgaumkar, Vinay; intel-gfx@lists.freedesktop.org; Michal
> > Winiarsky; Goel, Akash
> > Subject: Re: [Intel-gfx] [PATCH v4] drm/i915: Add soft-pinning API for execbuffer
> > 
> > On Fri, Jul 03, 2015 at 10:29:44PM -0700, Kristian Høgsberg wrote:
> > > On Tue, Jun 30, 2015 at 7:13 AM, Thomas Daniel <thomas.daniel@intel.com>
> > wrote:
> > > > From: Chris Wilson <chris@chris-wilson.co.uk>
> > > >
> > > > Userspace can pass in an offset that it presumes the object is located
> > > > at. The kernel will then do its utmost to fit the object into that
> > > > location. The assumption is that userspace is handling its own object
> > > > locations (for example along with full-ppgtt) and that the kernel will
> > > > rarely have to make space for the user's requests.
> > > >
> > > > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > > >
> > > > v2: Fixed incorrect eviction found by Michal Winiarski - fix suggested by Chris
> > > > Wilson.  Fixed incorrect error paths causing crash found by Michal Winiarski.
> > > > (Not published externally)
> > > >
> > > > v3: Rebased because of trivial conflict in object_bind_to_vm.  Fixed eviction
> > > > to allow eviction of soft-pinned objects when another soft-pinned object
> > used
> > > > by a subsequent execbuffer overlaps reported by Michal Winiarski.
> > > > (Not published externally)
> > > >
> > > > v4: Moved soft-pinned objects to the front of ordered_vmas so that they are
> > > > pinned first after an address conflict happens to avoid repeated conflicts in
> > > > rare cases (Suggested by Chris Wilson).  Expanded comment on
> > > > drm_i915_gem_exec_object2.offset to cover this new API.
> > 
> > Note this patch is outdated compared to the one I have in my tree. There
> > are also requirements to improve drm_mm_reserve_node().
> What requirements are these?

I switched execbuffer to try reusing the last address by default (i.e.
softpin everything) and the linear walk in reserve_node and
evict_for_vma were major bottlenecks. Adding an interval-tree to drm_mm
fixes the issue but also requires rewriting evict_for_vma. Plus using it
by default highlighted a few errors in the failure handling.

Also don't forget that softpinning requires us to be able to reuse the
vm for all rings, i.e. we need the API fix to allow user contexts for
rings other than RCS.
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCH v4] drm/i915: Add soft-pinning API for execbuffer
  2015-06-30 14:20   ` Daniel, Thomas
@ 2015-07-15 14:55     ` Goel, Akash
  2015-07-15 15:06       ` Chris Wilson
  0 siblings, 1 reply; 32+ messages in thread
From: Goel, Akash @ 2015-07-15 14:55 UTC (permalink / raw)
  To: Daniel, Thomas, intel-gfx; +Cc: Belgaumkar, Vinay



On 6/30/2015 7:50 PM, Daniel, Thomas wrote:
> Many apologies to Michal for incorrectly spelling his name in the CC list.
>
> Thomas.
>
>> -----Original Message-----
>> From: Daniel, Thomas
>> Sent: Tuesday, June 30, 2015 3:13 PM
>> To: intel-gfx@lists.freedesktop.org
>> Cc: Chris Wilson; Goel, Akash; Belgaumkar, Vinay; Michal Winiarsky; Zou,
>> Nanhai; Daniel, Thomas
>> Subject: [PATCH v4] drm/i915: Add soft-pinning API for execbuffer
>>
>> From: Chris Wilson <chris@chris-wilson.co.uk>
>>
>> Userspace can pass in an offset that it presumes the object is located
>> at. The kernel will then do its utmost to fit the object into that
>> location. The assumption is that userspace is handling its own object
>> locations (for example along with full-ppgtt) and that the kernel will
>> rarely have to make space for the user's requests.
>>
>> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
>>
>> v2: Fixed incorrect eviction found by Michal Winiarski - fix suggested by Chris
>> Wilson.  Fixed incorrect error paths causing crash found by Michal Winiarski.
>> (Not published externally)
>>
>> v3: Rebased because of trivial conflict in object_bind_to_vm.  Fixed eviction
>> to allow eviction of soft-pinned objects when another soft-pinned object used
>> by a subsequent execbuffer overlaps reported by Michal Winiarski.
>> (Not published externally)
>>
>> v4: Moved soft-pinned objects to the front of ordered_vmas so that they are
>> pinned first after an address conflict happens to avoid repeated conflicts in
>> rare cases (Suggested by Chris Wilson).  Expanded comment on
>> drm_i915_gem_exec_object2.offset to cover this new API.
>>
>> Cc: Chris Wilson <chris@chris-wilson.co.uk>
>> Cc: Akash Goel <akash.goel@intel.com>
>> Cc: Vinay Belgaumkar <vinay.belgaumkar@intel.com>
>> Cc: Michal Winiarsky <michal.winiarsky@intel.com>
>> Cc: Zou Nanhai <nanhai.zou@intel.com>
>> Signed-off-by: Thomas Daniel <thomas.daniel@intel.com>
>> ---
>>   drivers/gpu/drm/i915/i915_drv.h            |    4 +++
>>   drivers/gpu/drm/i915/i915_gem.c            |   51 ++++++++++++++++++++--------
>>   drivers/gpu/drm/i915/i915_gem_evict.c      |   38 +++++++++++++++++++++
>>   drivers/gpu/drm/i915/i915_gem_execbuffer.c |   16 +++++++--
>>   include/uapi/drm/i915_drm.h                |    9 +++--
>>   5 files changed, 99 insertions(+), 19 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
>> index d90a782..e96c101 100644
>> --- a/drivers/gpu/drm/i915/i915_drv.h
>> +++ b/drivers/gpu/drm/i915/i915_drv.h
>> @@ -2747,7 +2747,9 @@ void i915_gem_vma_destroy(struct i915_vma *vma);
>>   #define PIN_USER	(1<<4)
>>   #define PIN_UPDATE	(1<<5)
>>   #define PIN_FULL_RANGE	(1<<6)
>> +#define PIN_OFFSET_FIXED	(1<<7)
>>   #define PIN_OFFSET_MASK (~4095)
>> +
>>   int __must_check
>>   i915_gem_object_pin(struct drm_i915_gem_object *obj,
>>   		    struct i915_address_space *vm,
>> @@ -3085,6 +3087,8 @@ int __must_check i915_gem_evict_something(struct
>> drm_device *dev,
>>   					  unsigned long start,
>>   					  unsigned long end,
>>   					  unsigned flags);
>> +int __must_check
>> +i915_gem_evict_for_vma(struct i915_vma *target);
>>   int i915_gem_evict_vm(struct i915_address_space *vm, bool do_idle);
>>   int i915_gem_evict_everything(struct drm_device *dev);
>>
>> diff --git a/drivers/gpu/drm/i915/i915_gem.c
>> b/drivers/gpu/drm/i915/i915_gem.c
>> index f170da6..a7e5ff2 100644
>> --- a/drivers/gpu/drm/i915/i915_gem.c
>> +++ b/drivers/gpu/drm/i915/i915_gem.c
>> @@ -3806,22 +3806,41 @@ i915_gem_object_bind_to_vm(struct
>> drm_i915_gem_object *obj,
>>   	if (IS_ERR(vma))
>>   		goto err_unpin;
>>
>> +	if (flags & PIN_OFFSET_FIXED) {
>> +		uint64_t offset = flags & PIN_OFFSET_MASK;
>> +		if (offset & (alignment - 1)) {
>> +			ret = -EINVAL;
>> +			goto err_free_vma;
>> +		}
>> +		vma->node.start = offset;
>> +		vma->node.size = size;
>> +		vma->node.color = obj->cache_level;
>> +		ret = drm_mm_reserve_node(&vm->mm, &vma->node);
>> +		if (ret) {
>> +			ret = i915_gem_evict_for_vma(vma);
>> +			if (ret == 0)
>> +				ret = drm_mm_reserve_node(&vm->mm,
>> &vma->node);
>> +		}
>> +		if (ret)
>> +			goto err_free_vma;
>> +	} else {
>>   search_free:
>> -	ret = drm_mm_insert_node_in_range_generic(&vm->mm, &vma->node,
>> -						  size, alignment,
>> -						  obj->cache_level,
>> -						  start, end,
>> -						  search_flag,
>> -						  alloc_flag);
>> -	if (ret) {
>> -		ret = i915_gem_evict_something(dev, vm, size, alignment,
>> -					       obj->cache_level,
>> -					       start, end,
>> -					       flags);
>> -		if (ret == 0)
>> -			goto search_free;
>> +		ret = drm_mm_insert_node_in_range_generic(&vm->mm,
>> &vma->node,
>> +						  	  size, alignment,
>> +						  	  obj->cache_level,
>> +						  	  start, end,
>> +						  	  search_flag,
>> +						  	  alloc_flag);
>> +		if (ret) {
>> +			ret = i915_gem_evict_something(dev, vm, size,
>> alignment,
>> +					       	       obj->cache_level,
>> +					       	       start, end,
>> +					       	       flags);
>> +			if (ret == 0)
>> +				goto search_free;
>>
>> -		goto err_free_vma;
>> +			goto err_free_vma;
>> +		}
>>   	}
>>   	if (WARN_ON(!i915_gem_valid_gtt_space(vma, obj->cache_level))) {
>>   		ret = -EINVAL;
>> @@ -4357,6 +4376,10 @@ i915_vma_misplaced(struct i915_vma *vma,
>> uint32_t alignment, uint64_t flags)
>>   	    vma->node.start < (flags & PIN_OFFSET_MASK))
>>   		return true;
>>
>> +	if (flags & PIN_OFFSET_FIXED &&
>> +	    vma->node.start != (flags & PIN_OFFSET_MASK))
>> +		return true;
>> +
>>   	return false;
>>   }
>>
>> diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c
>> b/drivers/gpu/drm/i915/i915_gem_evict.c
>> index d09e35e..6098e2f 100644
>> --- a/drivers/gpu/drm/i915/i915_gem_evict.c
>> +++ b/drivers/gpu/drm/i915/i915_gem_evict.c
>> @@ -199,6 +199,44 @@ found:
>>   	return ret;
>>   }
>>
>> +int
>> +i915_gem_evict_for_vma(struct i915_vma *target)
>> +{
>> +	struct drm_mm_node *node, *next;
>> +
>> +	list_for_each_entry_safe(node, next,
>> +			&target->vm->mm.head_node.node_list,
>> +			node_list) {
>> +		struct i915_vma *vma;
>> +		int ret;
>> +
>> +		if (node->start + node->size <= target->node.start)
>> +			continue;
>> +		if (node->start >= target->node.start + target->node.size)
>> +			break;
>> +
>> +		vma = container_of(node, typeof(*vma), node);
>> +
>> +		if (vma->pin_count) {
>> +			/* We may need to evict a buffer in the same batch */
>> +			if (!vma->exec_entry)
>> +				return -EBUSY;
>> +
>> +			if (vma->exec_entry->flags & EXEC_OBJECT_PINNED)
>> +				/* Overlapping fixed objects in the same batch
>> */
>> +				return -EINVAL;
>> +
>> +			return -ENOSPC;

Can we actually hit this condition, considering the soft pinned objects 
are now on the front side of 'eb->vmas' list ?
If we do encounter such a case, it probably means that the overlapping 
object is already pinned from some other path.

Is there a scope of an additional check here ?
i.e. if (vma->pin_count) is > 1, this indicates that the object is not 
only pinned due to execbuffer, hence cannot be evicted, so -EBUSY can be 
straight away returned to User.

Best regards
Akash

>> +		}
>> +
>> +		ret = i915_vma_unbind(vma);
>> +		if (ret)
>> +			return ret;
>> +	}
>> +
>> +	return 0;
>> +}
>> +
>>   /**
>>    * i915_gem_evict_vm - Evict all idle vmas from a vm
>>    * @vm: Address space to cleanse
>> diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
>> b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
>> index 3d94744..4e6955e 100644
>> --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
>> +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
>> @@ -596,6 +596,8 @@ i915_gem_execbuffer_reserve_vma(struct i915_vma
>> *vma,
>>   			flags |= PIN_GLOBAL | PIN_MAPPABLE;
>>   		if (entry->flags & __EXEC_OBJECT_NEEDS_BIAS)
>>   			flags |= BATCH_OFFSET_BIAS | PIN_OFFSET_BIAS;
>> +		if (entry->flags & EXEC_OBJECT_PINNED)
>> +			flags |= entry->offset | PIN_OFFSET_FIXED;
>>   	}
>>
>>   	ret = i915_gem_object_pin(obj, vma->vm, entry->alignment, flags);
>> @@ -665,6 +667,10 @@ eb_vma_misplaced(struct i915_vma *vma)
>>   	    vma->node.start & (entry->alignment - 1))
>>   		return true;
>>
>> +	if (entry->flags & EXEC_OBJECT_PINNED &&
>> +	    vma->node.start != entry->offset)
>> +		return true;
>> +
>>   	if (entry->flags & __EXEC_OBJECT_NEEDS_BIAS &&
>>   	    vma->node.start < BATCH_OFFSET_BIAS)
>>   		return true;
>> @@ -690,6 +696,7 @@ i915_gem_execbuffer_reserve(struct intel_engine_cs
>> *ring,
>>   	struct i915_vma *vma;
>>   	struct i915_address_space *vm;
>>   	struct list_head ordered_vmas;
>> +	struct list_head pinned_vmas;
>>   	bool has_fenced_gpu_access = INTEL_INFO(ring->dev)->gen < 4;
>>   	int retry;
>>
>> @@ -698,6 +705,7 @@ i915_gem_execbuffer_reserve(struct intel_engine_cs
>> *ring,
>>   	vm = list_first_entry(vmas, struct i915_vma, exec_list)->vm;
>>
>>   	INIT_LIST_HEAD(&ordered_vmas);
>> +	INIT_LIST_HEAD(&pinned_vmas);
>>   	while (!list_empty(vmas)) {
>>   		struct drm_i915_gem_exec_object2 *entry;
>>   		bool need_fence, need_mappable;
>> @@ -716,7 +724,9 @@ i915_gem_execbuffer_reserve(struct intel_engine_cs
>> *ring,
>>   			obj->tiling_mode != I915_TILING_NONE;
>>   		need_mappable = need_fence || need_reloc_mappable(vma);
>>
>> -		if (need_mappable) {
>> +		if (entry->flags & EXEC_OBJECT_PINNED)
>> +			list_move_tail(&vma->exec_list, &pinned_vmas);
>> +		else if (need_mappable) {
>>   			entry->flags |= __EXEC_OBJECT_NEEDS_MAP;
>>   			list_move(&vma->exec_list, &ordered_vmas);
>>   		} else
>> @@ -726,6 +736,7 @@ i915_gem_execbuffer_reserve(struct intel_engine_cs
>> *ring,
>>   		obj->base.pending_write_domain = 0;
>>   	}
>>   	list_splice(&ordered_vmas, vmas);
>> +	list_splice(&pinned_vmas, vmas);
>>
>>   	/* Attempt to pin all of the buffers into the GTT.
>>   	 * This is done in 3 phases:
>> @@ -1404,7 +1415,8 @@ eb_get_batch(struct eb_vmas *eb)
>>   	 * Note that actual hangs have only been observed on gen7, but for
>>   	 * paranoia do it everywhere.
>>   	 */
>> -	vma->exec_entry->flags |= __EXEC_OBJECT_NEEDS_BIAS;
>> +	if ((vma->exec_entry->flags & EXEC_OBJECT_PINNED) == 0)
>> +		vma->exec_entry->flags |= __EXEC_OBJECT_NEEDS_BIAS;
>>
>>   	return vma->obj;
>>   }
>> diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
>> index 55ba527..18f8f99 100644
>> --- a/include/uapi/drm/i915_drm.h
>> +++ b/include/uapi/drm/i915_drm.h
>> @@ -677,8 +677,10 @@ struct drm_i915_gem_exec_object2 {
>>   	__u64 alignment;
>>
>>   	/**
>> -	 * Returned value of the updated offset of the object, for future
>> -	 * presumed_offset writes.
>> +	 * When the EXEC_OBJECT_PINNED flag is specified this is populated by
>> +	 * the user with the GTT offset at which this object will be pinned.
>> +	 * Otherwise the kernel populates it with the value of the updated
>> +	 * offset of the object, for future presumed_offset writes.
>>   	 */
>>   	__u64 offset;
>>
>> @@ -686,7 +688,8 @@ struct drm_i915_gem_exec_object2 {
>>   #define EXEC_OBJECT_NEEDS_GTT	(1<<1)
>>   #define EXEC_OBJECT_WRITE	(1<<2)
>>   #define EXEC_OBJECT_SUPPORTS_48BBADDRESS (1<<3)
>> -#define __EXEC_OBJECT_UNKNOWN_FLAGS -
>> (EXEC_OBJECT_SUPPORTS_48BBADDRESS<<1)
>> +#define EXEC_OBJECT_PINNED	(1<<4)
>> +#define __EXEC_OBJECT_UNKNOWN_FLAGS -(EXEC_OBJECT_PINNED<<1)
>>   	__u64 flags;
>>
>>   	__u64 rsvd1;
>> --
>> 1.7.9.5
>
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCH v4] drm/i915: Add soft-pinning API for execbuffer
  2015-07-15 14:55     ` Goel, Akash
@ 2015-07-15 15:06       ` Chris Wilson
  2015-07-15 15:41         ` Daniel, Thomas
  0 siblings, 1 reply; 32+ messages in thread
From: Chris Wilson @ 2015-07-15 15:06 UTC (permalink / raw)
  To: Goel, Akash; +Cc: intel-gfx, Belgaumkar, Vinay

On Wed, Jul 15, 2015 at 08:25:23PM +0530, Goel, Akash wrote:
> >>+int
> >>+i915_gem_evict_for_vma(struct i915_vma *target)
> >>+{
> >>+	struct drm_mm_node *node, *next;
> >>+
> >>+	list_for_each_entry_safe(node, next,
> >>+			&target->vm->mm.head_node.node_list,
> >>+			node_list) {
> >>+		struct i915_vma *vma;
> >>+		int ret;
> >>+
> >>+		if (node->start + node->size <= target->node.start)
> >>+			continue;
> >>+		if (node->start >= target->node.start + target->node.size)
> >>+			break;
> >>+
> >>+		vma = container_of(node, typeof(*vma), node);
> >>+
> >>+		if (vma->pin_count) {
> >>+			/* We may need to evict a buffer in the same batch */
> >>+			if (!vma->exec_entry)
> >>+				return -EBUSY;
> >>+
> >>+			if (vma->exec_entry->flags & EXEC_OBJECT_PINNED)
> >>+				/* Overlapping fixed objects in the same batch
> >>*/
> >>+				return -EINVAL;
> >>+
> >>+			return -ENOSPC;
> 
> Can we actually hit this condition, considering the soft pinned
> objects are now on the front side of 'eb->vmas' list ?
> If we do encounter such a case, it probably means that the
> overlapping object is already pinned from some other path.

Note that softpinned objects are only first on the second pass through
the reservation.
 
> Is there a scope of an additional check here ?
> i.e. if (vma->pin_count) is > 1, this indicates that the object is
> not only pinned due to execbuffer, hence cannot be evicted, so
> -EBUSY can be straight away returned to User.

Consider this instead:

int
i915_gem_evict_for_vma(struct i915_vma *target, unsigned flags)
{
        struct list_head eviction_list;
        struct interval_tree_node *it;
        u64 end = target->node.start + target->node.size;
        struct drm_mm_node *node;
        struct i915_vma *vma, *next;
        int ret;

        it = interval_tree_iter_first(&target->vm->mm.interval_tree,
                                      target->node.start, end -1);
        if (it == NULL)
                return 0;

        INIT_LIST_HEAD(&eviction_list);
        node = container_of(it, typeof(*node), it);
        list_for_each_entry_from(node,
                                 &target->vm->mm.head_node.node_list,
                                 node_list) {
                if (node->start >= end)
                        break;

                vma = container_of(node, typeof(*vma), node);
                if (flags & PIN_NONBLOCK &&
                    (vma->pin_count || vma->active.request)) {
                        ret = -ENOSPC;
                        break;
                }

                if (vma->exec_entry &&
                    vma->exec_entry->flags & EXEC_OBJECT_PINNED) {
                        /* Overlapping pinned objects in the same batch */
                        ret = -EINVAL;
                        break;
                }

                if (vma->pin_count) {
                        /* We may need to evict an buffer in the same batch */
                        ret = vma->exec_entry ? -ENOSPC : -EBUSY;
                        break;
                }

                list_add(&vma->exec_list, &eviction_list);
                drm_gem_object_reference(&vma->obj->base);
        }

        ret = 0;
        list_for_each_entry_safe(vma, next, &eviction_list, exec_list) {
                struct drm_i915_gem_object *obj = vma->obj;
                if (ret == 0)
                        ret = i915_vma_unbind(vma);
                drm_gem_object_unreference(&obj->base);
        }

        return ret;
}
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCH v4] drm/i915: Add soft-pinning API for execbuffer
  2015-07-15 15:06       ` Chris Wilson
@ 2015-07-15 15:41         ` Daniel, Thomas
  2015-07-15 15:46           ` Chris Wilson
  0 siblings, 1 reply; 32+ messages in thread
From: Daniel, Thomas @ 2015-07-15 15:41 UTC (permalink / raw)
  To: Chris Wilson, Goel, Akash; +Cc: Belgaumkar, Vinay, intel-gfx

> -----Original Message-----
> From: Chris Wilson [mailto:chris@chris-wilson.co.uk]
> Sent: Wednesday, July 15, 2015 4:06 PM
> To: Goel, Akash
> Cc: Daniel, Thomas; intel-gfx@lists.freedesktop.org; Belgaumkar, Vinay;
> Winiarski, Michal; Zou, Nanhai
> Subject: Re: [PATCH v4] drm/i915: Add soft-pinning API for execbuffer
> 
> On Wed, Jul 15, 2015 at 08:25:23PM +0530, Goel, Akash wrote:
> > >>+int
> > >>+i915_gem_evict_for_vma(struct i915_vma *target)
> > >>+{
> > >>+	struct drm_mm_node *node, *next;
> > >>+
> > >>+	list_for_each_entry_safe(node, next,
> > >>+			&target->vm->mm.head_node.node_list,
> > >>+			node_list) {
> > >>+		struct i915_vma *vma;
> > >>+		int ret;
> > >>+
> > >>+		if (node->start + node->size <= target->node.start)
> > >>+			continue;
> > >>+		if (node->start >= target->node.start + target->node.size)
> > >>+			break;
> > >>+
> > >>+		vma = container_of(node, typeof(*vma), node);
> > >>+
> > >>+		if (vma->pin_count) {
> > >>+			/* We may need to evict a buffer in the same batch */
> > >>+			if (!vma->exec_entry)
> > >>+				return -EBUSY;
> > >>+
> > >>+			if (vma->exec_entry->flags & EXEC_OBJECT_PINNED)
> > >>+				/* Overlapping fixed objects in the same batch
> > >>*/
> > >>+				return -EINVAL;
> > >>+
> > >>+			return -ENOSPC;
> >
> > Can we actually hit this condition, considering the soft pinned
> > objects are now on the front side of 'eb->vmas' list ?
> > If we do encounter such a case, it probably means that the
> > overlapping object is already pinned from some other path.
> 
> Note that softpinned objects are only first on the second pass through
> the reservation.
Eh?  I modified i915_gem_execbuffer_reserve() to always put the softpinned vmas first so they should never collide with objects in the same execbuff.

> 
> > Is there a scope of an additional check here ?
> > i.e. if (vma->pin_count) is > 1, this indicates that the object is
> > not only pinned due to execbuffer, hence cannot be evicted, so
> > -EBUSY can be straight away returned to User.
> 
> Consider this instead:
> 
> int
> i915_gem_evict_for_vma(struct i915_vma *target, unsigned flags)
> {
>         struct list_head eviction_list;
>         struct interval_tree_node *it;
>         u64 end = target->node.start + target->node.size;
>         struct drm_mm_node *node;
>         struct i915_vma *vma, *next;
>         int ret;
> 
>         it = interval_tree_iter_first(&target->vm->mm.interval_tree,
>                                       target->node.start, end -1);
>         if (it == NULL)
>                 return 0;
> 
>         INIT_LIST_HEAD(&eviction_list);
>         node = container_of(it, typeof(*node), it);
>         list_for_each_entry_from(node,
>                                  &target->vm->mm.head_node.node_list,
>                                  node_list) {
>                 if (node->start >= end)
>                         break;
> 
>                 vma = container_of(node, typeof(*vma), node);
>                 if (flags & PIN_NONBLOCK &&
>                     (vma->pin_count || vma->active.request)) {
>                         ret = -ENOSPC;
>                         break;
>                 }
> 
>                 if (vma->exec_entry &&
>                     vma->exec_entry->flags & EXEC_OBJECT_PINNED) {
>                         /* Overlapping pinned objects in the same batch */
>                         ret = -EINVAL;
>                         break;
>                 }
> 
>                 if (vma->pin_count) {
>                         /* We may need to evict an buffer in the same batch */
>                         ret = vma->exec_entry ? -ENOSPC : -EBUSY;
>                         break;
>                 }
> 
>                 list_add(&vma->exec_list, &eviction_list);
>                 drm_gem_object_reference(&vma->obj->base);
>         }
> 
>         ret = 0;
>         list_for_each_entry_safe(vma, next, &eviction_list, exec_list) {
>                 struct drm_i915_gem_object *obj = vma->obj;
>                 if (ret == 0)
>                         ret = i915_vma_unbind(vma);
>                 drm_gem_object_unreference(&obj->base);
>         }
> 
>         return ret;
> }
> -Chris
This contains special stuff which I don't have visibility of (mm.interval_tree, vma.active).

Thomas.

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCH v4] drm/i915: Add soft-pinning API for execbuffer
  2015-07-15 15:41         ` Daniel, Thomas
@ 2015-07-15 15:46           ` Chris Wilson
  2015-07-15 15:58             ` Daniel, Thomas
  0 siblings, 1 reply; 32+ messages in thread
From: Chris Wilson @ 2015-07-15 15:46 UTC (permalink / raw)
  To: Daniel, Thomas; +Cc: Belgaumkar, Vinay, Goel, Akash, intel-gfx

On Wed, Jul 15, 2015 at 03:41:49PM +0000, Daniel, Thomas wrote:
> > -----Original Message-----
> > From: Chris Wilson [mailto:chris@chris-wilson.co.uk]
> > Sent: Wednesday, July 15, 2015 4:06 PM
> > To: Goel, Akash
> > Cc: Daniel, Thomas; intel-gfx@lists.freedesktop.org; Belgaumkar, Vinay;
> > Winiarski, Michal; Zou, Nanhai
> > Subject: Re: [PATCH v4] drm/i915: Add soft-pinning API for execbuffer
> > 
> > On Wed, Jul 15, 2015 at 08:25:23PM +0530, Goel, Akash wrote:
> > > >>+int
> > > >>+i915_gem_evict_for_vma(struct i915_vma *target)
> > > >>+{
> > > >>+	struct drm_mm_node *node, *next;
> > > >>+
> > > >>+	list_for_each_entry_safe(node, next,
> > > >>+			&target->vm->mm.head_node.node_list,
> > > >>+			node_list) {
> > > >>+		struct i915_vma *vma;
> > > >>+		int ret;
> > > >>+
> > > >>+		if (node->start + node->size <= target->node.start)
> > > >>+			continue;
> > > >>+		if (node->start >= target->node.start + target->node.size)
> > > >>+			break;
> > > >>+
> > > >>+		vma = container_of(node, typeof(*vma), node);
> > > >>+
> > > >>+		if (vma->pin_count) {
> > > >>+			/* We may need to evict a buffer in the same batch */
> > > >>+			if (!vma->exec_entry)
> > > >>+				return -EBUSY;
> > > >>+
> > > >>+			if (vma->exec_entry->flags & EXEC_OBJECT_PINNED)
> > > >>+				/* Overlapping fixed objects in the same batch
> > > >>*/
> > > >>+				return -EINVAL;
> > > >>+
> > > >>+			return -ENOSPC;
> > >
> > > Can we actually hit this condition, considering the soft pinned
> > > objects are now on the front side of 'eb->vmas' list ?
> > > If we do encounter such a case, it probably means that the
> > > overlapping object is already pinned from some other path.
> > 
> > Note that softpinned objects are only first on the second pass through
> > the reservation.
> Eh?  I modified i915_gem_execbuffer_reserve() to always put the softpinned vmas first so they should never collide with objects in the same execbuff.

That would be a severe performance penalty for the usual case where we
only have to run once through the list.

> This contains special stuff which I don't have visibility of (mm.interval_tree, vma.active).

Yes. I mentioned that using softpin introduced some perf regressions.
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCH v4] drm/i915: Add soft-pinning API for execbuffer
  2015-07-15 15:46           ` Chris Wilson
@ 2015-07-15 15:58             ` Daniel, Thomas
  2015-07-15 16:04               ` Chris Wilson
  0 siblings, 1 reply; 32+ messages in thread
From: Daniel, Thomas @ 2015-07-15 15:58 UTC (permalink / raw)
  To: Chris Wilson; +Cc: Belgaumkar, Vinay, Goel, Akash, intel-gfx

> -----Original Message-----
> From: Chris Wilson [mailto:chris@chris-wilson.co.uk]
> Sent: Wednesday, July 15, 2015 4:47 PM
> To: Daniel, Thomas
> Cc: Goel, Akash; intel-gfx@lists.freedesktop.org; Belgaumkar, Vinay; Winiarski,
> Michal; Zou, Nanhai
> Subject: Re: [PATCH v4] drm/i915: Add soft-pinning API for execbuffer
> 
> On Wed, Jul 15, 2015 at 03:41:49PM +0000, Daniel, Thomas wrote:
> > > -----Original Message-----
> > > From: Chris Wilson [mailto:chris@chris-wilson.co.uk]
> > > Sent: Wednesday, July 15, 2015 4:06 PM
> > > To: Goel, Akash
> > > Cc: Daniel, Thomas; intel-gfx@lists.freedesktop.org; Belgaumkar, Vinay;
> > > Winiarski, Michal; Zou, Nanhai
> > > Subject: Re: [PATCH v4] drm/i915: Add soft-pinning API for execbuffer
> > >
> > > On Wed, Jul 15, 2015 at 08:25:23PM +0530, Goel, Akash wrote:
> > > > >>+int
> > > > >>+i915_gem_evict_for_vma(struct i915_vma *target)
> > > > >>+{
> > > > >>+	struct drm_mm_node *node, *next;
> > > > >>+
> > > > >>+	list_for_each_entry_safe(node, next,
> > > > >>+			&target->vm->mm.head_node.node_list,
> > > > >>+			node_list) {
> > > > >>+		struct i915_vma *vma;
> > > > >>+		int ret;
> > > > >>+
> > > > >>+		if (node->start + node->size <= target->node.start)
> > > > >>+			continue;
> > > > >>+		if (node->start >= target->node.start + target-
> >node.size)
> > > > >>+			break;
> > > > >>+
> > > > >>+		vma = container_of(node, typeof(*vma), node);
> > > > >>+
> > > > >>+		if (vma->pin_count) {
> > > > >>+			/* We may need to evict a buffer in the same
> batch */
> > > > >>+			if (!vma->exec_entry)
> > > > >>+				return -EBUSY;
> > > > >>+
> > > > >>+			if (vma->exec_entry->flags &
> EXEC_OBJECT_PINNED)
> > > > >>+				/* Overlapping fixed objects in the
> same batch
> > > > >>*/
> > > > >>+				return -EINVAL;
> > > > >>+
> > > > >>+			return -ENOSPC;
> > > >
> > > > Can we actually hit this condition, considering the soft pinned
> > > > objects are now on the front side of 'eb->vmas' list ?
> > > > If we do encounter such a case, it probably means that the
> > > > overlapping object is already pinned from some other path.
> > >
> > > Note that softpinned objects are only first on the second pass through
> > > the reservation.
> > Eh?  I modified i915_gem_execbuffer_reserve() to always put the softpinned
> vmas first so they should never collide with objects in the same execbuff.
> 
> That would be a severe performance penalty for the usual case where we
> only have to run once through the list.
How so?  execbuffer_reserve() already traverses the entire vma list and does a list_move or list_move_tail onto ordered_vmas for each entry.  I've simply created a new list for the softpinned vmas and spliced the two together at the end.

> 
> > This contains special stuff which I don't have visibility of (mm.interval_tree,
> vma.active).
> 
> Yes. I mentioned that using softpin introduced some perf regressions.
> -Chris
Can we get your improvements as follow-up patches?

Cheers,
Thomas.


_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCH v4] drm/i915: Add soft-pinning API for execbuffer
  2015-07-15 15:58             ` Daniel, Thomas
@ 2015-07-15 16:04               ` Chris Wilson
  0 siblings, 0 replies; 32+ messages in thread
From: Chris Wilson @ 2015-07-15 16:04 UTC (permalink / raw)
  To: Daniel, Thomas; +Cc: Belgaumkar, Vinay, Goel, Akash, intel-gfx

On Wed, Jul 15, 2015 at 03:58:33PM +0000, Daniel, Thomas wrote:
> > -----Original Message-----
> > From: Chris Wilson [mailto:chris@chris-wilson.co.uk]
> > Sent: Wednesday, July 15, 2015 4:47 PM
> > To: Daniel, Thomas
> > Cc: Goel, Akash; intel-gfx@lists.freedesktop.org; Belgaumkar, Vinay; Winiarski,
> > Michal; Zou, Nanhai
> > Subject: Re: [PATCH v4] drm/i915: Add soft-pinning API for execbuffer
> > 
> > On Wed, Jul 15, 2015 at 03:41:49PM +0000, Daniel, Thomas wrote:
> > > > -----Original Message-----
> > > > From: Chris Wilson [mailto:chris@chris-wilson.co.uk]
> > > > Sent: Wednesday, July 15, 2015 4:06 PM
> > > > To: Goel, Akash
> > > > Cc: Daniel, Thomas; intel-gfx@lists.freedesktop.org; Belgaumkar, Vinay;
> > > > Winiarski, Michal; Zou, Nanhai
> > > > Subject: Re: [PATCH v4] drm/i915: Add soft-pinning API for execbuffer
> > > >
> > > > On Wed, Jul 15, 2015 at 08:25:23PM +0530, Goel, Akash wrote:
> > > > > >>+int
> > > > > >>+i915_gem_evict_for_vma(struct i915_vma *target)
> > > > > >>+{
> > > > > >>+	struct drm_mm_node *node, *next;
> > > > > >>+
> > > > > >>+	list_for_each_entry_safe(node, next,
> > > > > >>+			&target->vm->mm.head_node.node_list,
> > > > > >>+			node_list) {
> > > > > >>+		struct i915_vma *vma;
> > > > > >>+		int ret;
> > > > > >>+
> > > > > >>+		if (node->start + node->size <= target->node.start)
> > > > > >>+			continue;
> > > > > >>+		if (node->start >= target->node.start + target-
> > >node.size)
> > > > > >>+			break;
> > > > > >>+
> > > > > >>+		vma = container_of(node, typeof(*vma), node);
> > > > > >>+
> > > > > >>+		if (vma->pin_count) {
> > > > > >>+			/* We may need to evict a buffer in the same
> > batch */
> > > > > >>+			if (!vma->exec_entry)
> > > > > >>+				return -EBUSY;
> > > > > >>+
> > > > > >>+			if (vma->exec_entry->flags &
> > EXEC_OBJECT_PINNED)
> > > > > >>+				/* Overlapping fixed objects in the
> > same batch
> > > > > >>*/
> > > > > >>+				return -EINVAL;
> > > > > >>+
> > > > > >>+			return -ENOSPC;
> > > > >
> > > > > Can we actually hit this condition, considering the soft pinned
> > > > > objects are now on the front side of 'eb->vmas' list ?
> > > > > If we do encounter such a case, it probably means that the
> > > > > overlapping object is already pinned from some other path.
> > > >
> > > > Note that softpinned objects are only first on the second pass through
> > > > the reservation.
> > > Eh?  I modified i915_gem_execbuffer_reserve() to always put the softpinned
> > vmas first so they should never collide with objects in the same execbuff.
> > 
> > That would be a severe performance penalty for the usual case where we
> > only have to run once through the list.
> How so?  execbuffer_reserve() already traverses the entire vma list and does a list_move or list_move_tail onto ordered_vmas for each entry.  I've simply created a new list for the softpinned vmas and spliced the two together at the end.

I posted the fix for that a few months ago.

> > > This contains special stuff which I don't have visibility of (mm.interval_tree,
> > vma.active).
> > 
> > Yes. I mentioned that using softpin introduced some perf regressions.
> > -Chris
> Can we get your improvements as follow-up patches?

No. Softpin has requirements that are not met by the current kernel.
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 32+ messages in thread

* [PATCH v5] drm/i915: Add soft-pinning API for execbuffer
  2015-06-30 14:13 ` [PATCH v4] " Thomas Daniel
                     ` (2 preceding siblings ...)
  2015-07-04  7:53   ` Chris Wilson
@ 2015-07-20 16:41   ` Thomas Daniel
  2015-07-20 16:50     ` Chris Wilson
  2015-10-16 10:59     ` [PATCH v6] " Thomas Daniel
  3 siblings, 2 replies; 32+ messages in thread
From: Thomas Daniel @ 2015-07-20 16:41 UTC (permalink / raw)
  To: intel-gfx; +Cc: Kristian Høgsberg, Akash Goel, Vinay Belgaumkar

From: Chris Wilson <chris@chris-wilson.co.uk>

Userspace can pass in an offset that it presumes the object is located
at. The kernel will then do its utmost to fit the object into that
location. The assumption is that userspace is handling its own object
locations (for example along with full-ppgtt) and that the kernel will
rarely have to make space for the user's requests.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>

v2: Fixed incorrect eviction found by Michal Winiarski - fix suggested by Chris
Wilson.  Fixed incorrect error paths causing crash found by Michal Winiarski.
(Not published externally)

v3: Rebased because of trivial conflict in object_bind_to_vm.  Fixed eviction
to allow eviction of soft-pinned objects when another soft-pinned object used
by a subsequent execbuffer overlaps reported by Michal Winiarski.
(Not published externally)

v4: Moved soft-pinned objects to the front of ordered_vmas so that they are
pinned first after an address conflict happens to avoid repeated conflicts in
rare cases (Suggested by Chris Wilson).  Expanded comment on
drm_i915_gem_exec_object2.offset to cover this new API.

v5: Added I915_PARAM_HAS_EXEC_SOFTPIN parameter for detecting this capability
(Kristian). Added check for multiple pinnings on eviction (Akash). Made sure
buffers are not considered misplaced without the user specifying
EXEC_OBJECT_SUPPORTS_48BBADDRESS.  User must assume responsibility for any
addressing workarounds.  Updated object2.offset field comment again to clarify
NO_RELOC case (Chris).  checkpatch cleanup.

Cc: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Akash Goel <akash.goel@intel.com>
Cc: Vinay Belgaumkar <vinay.belgaumkar@intel.com>
Cc: Michal Winiarski <michal.winiarski@intel.com>
Cc: Zou Nanhai <nanhai.zou@intel.com>
Cc: Kristian Høgsberg <hoegsberg@gmail.com>
Signed-off-by: Thomas Daniel <thomas.daniel@intel.com>
---
 drivers/gpu/drm/i915/i915_dma.c            |    3 ++
 drivers/gpu/drm/i915/i915_drv.h            |    4 +++
 drivers/gpu/drm/i915/i915_gem.c            |   52 ++++++++++++++++++++--------
 drivers/gpu/drm/i915/i915_gem_evict.c      |   39 +++++++++++++++++++++
 drivers/gpu/drm/i915/i915_gem_execbuffer.c |   19 ++++++++--
 include/uapi/drm/i915_drm.h                |   12 +++++--
 6 files changed, 109 insertions(+), 20 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
index c5349fa..9805546 100644
--- a/drivers/gpu/drm/i915/i915_dma.c
+++ b/drivers/gpu/drm/i915/i915_dma.c
@@ -167,6 +167,9 @@ static int i915_getparam(struct drm_device *dev, void *data,
 		value = i915.enable_hangcheck &&
 			intel_has_gpu_reset(dev);
 		break;
+	case I915_PARAM_HAS_EXEC_SOFTPIN:
+		value = 1;
+		break;
 	default:
 		DRM_DEBUG("Unknown parameter %d\n", param->param);
 		return -EINVAL;
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index d90a782..e96c101 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2747,7 +2747,9 @@ void i915_gem_vma_destroy(struct i915_vma *vma);
 #define PIN_USER	(1<<4)
 #define PIN_UPDATE	(1<<5)
 #define PIN_FULL_RANGE	(1<<6)
+#define PIN_OFFSET_FIXED	(1<<7)
 #define PIN_OFFSET_MASK (~4095)
+
 int __must_check
 i915_gem_object_pin(struct drm_i915_gem_object *obj,
 		    struct i915_address_space *vm,
@@ -3085,6 +3087,8 @@ int __must_check i915_gem_evict_something(struct drm_device *dev,
 					  unsigned long start,
 					  unsigned long end,
 					  unsigned flags);
+int __must_check
+i915_gem_evict_for_vma(struct i915_vma *target);
 int i915_gem_evict_vm(struct i915_address_space *vm, bool do_idle);
 int i915_gem_evict_everything(struct drm_device *dev);
 
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index f170da6..fea4197 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -3806,22 +3806,42 @@ i915_gem_object_bind_to_vm(struct drm_i915_gem_object *obj,
 	if (IS_ERR(vma))
 		goto err_unpin;
 
+	if (flags & PIN_OFFSET_FIXED) {
+		uint64_t offset = flags & PIN_OFFSET_MASK;
+
+		if (offset & (alignment - 1)) {
+			ret = -EINVAL;
+			goto err_free_vma;
+		}
+		vma->node.start = offset;
+		vma->node.size = size;
+		vma->node.color = obj->cache_level;
+		ret = drm_mm_reserve_node(&vm->mm, &vma->node);
+		if (ret) {
+			ret = i915_gem_evict_for_vma(vma);
+			if (ret == 0)
+				ret = drm_mm_reserve_node(&vm->mm, &vma->node);
+		}
+		if (ret)
+			goto err_free_vma;
+	} else {
 search_free:
-	ret = drm_mm_insert_node_in_range_generic(&vm->mm, &vma->node,
-						  size, alignment,
-						  obj->cache_level,
-						  start, end,
-						  search_flag,
-						  alloc_flag);
-	if (ret) {
-		ret = i915_gem_evict_something(dev, vm, size, alignment,
-					       obj->cache_level,
-					       start, end,
-					       flags);
-		if (ret == 0)
-			goto search_free;
+		ret = drm_mm_insert_node_in_range_generic(&vm->mm, &vma->node,
+							  size, alignment,
+							  obj->cache_level,
+							  start, end,
+							  search_flag,
+							  alloc_flag);
+		if (ret) {
+			ret = i915_gem_evict_something(dev, vm, size, alignment,
+						       obj->cache_level,
+						       start, end,
+						       flags);
+			if (ret == 0)
+				goto search_free;
 
-		goto err_free_vma;
+			goto err_free_vma;
+		}
 	}
 	if (WARN_ON(!i915_gem_valid_gtt_space(vma, obj->cache_level))) {
 		ret = -EINVAL;
@@ -4357,6 +4377,10 @@ i915_vma_misplaced(struct i915_vma *vma, uint32_t alignment, uint64_t flags)
 	    vma->node.start < (flags & PIN_OFFSET_MASK))
 		return true;
 
+	if (flags & PIN_OFFSET_FIXED &&
+	    vma->node.start != (flags & PIN_OFFSET_MASK))
+		return true;
+
 	return false;
 }
 
diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c b/drivers/gpu/drm/i915/i915_gem_evict.c
index d09e35e..bc5b91c 100644
--- a/drivers/gpu/drm/i915/i915_gem_evict.c
+++ b/drivers/gpu/drm/i915/i915_gem_evict.c
@@ -199,6 +199,45 @@ found:
 	return ret;
 }
 
+int
+i915_gem_evict_for_vma(struct i915_vma *target)
+{
+	struct drm_mm_node *node, *next;
+
+	list_for_each_entry_safe(node, next,
+			&target->vm->mm.head_node.node_list,
+			node_list) {
+		struct i915_vma *vma;
+		int ret;
+
+		if (node->start + node->size <= target->node.start)
+			continue;
+		if (node->start >= target->node.start + target->node.size)
+			break;
+
+		vma = container_of(node, typeof(*vma), node);
+
+		if (vma->pin_count) {
+			if (!vma->exec_entry || (vma->pin_count > 1))
+				/* Object is pinned for some other use */
+				return -EBUSY;
+
+			/* We need to evict a buffer in the same batch */
+			if (vma->exec_entry->flags & EXEC_OBJECT_PINNED)
+				/* Overlapping fixed objects in the same batch */
+				return -EINVAL;
+
+			return -ENOSPC;
+		}
+
+		ret = i915_vma_unbind(vma);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
+
 /**
  * i915_gem_evict_vm - Evict all idle vmas from a vm
  * @vm: Address space to cleanse
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 3d94744..6cd3c66 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -596,6 +596,8 @@ i915_gem_execbuffer_reserve_vma(struct i915_vma *vma,
 			flags |= PIN_GLOBAL | PIN_MAPPABLE;
 		if (entry->flags & __EXEC_OBJECT_NEEDS_BIAS)
 			flags |= BATCH_OFFSET_BIAS | PIN_OFFSET_BIAS;
+		if (entry->flags & EXEC_OBJECT_PINNED)
+			flags |= entry->offset | PIN_OFFSET_FIXED;
 	}
 
 	ret = i915_gem_object_pin(obj, vma->vm, entry->alignment, flags);
@@ -665,6 +667,10 @@ eb_vma_misplaced(struct i915_vma *vma)
 	    vma->node.start & (entry->alignment - 1))
 		return true;
 
+	if (entry->flags & EXEC_OBJECT_PINNED &&
+	    vma->node.start != entry->offset)
+		return true;
+
 	if (entry->flags & __EXEC_OBJECT_NEEDS_BIAS &&
 	    vma->node.start < BATCH_OFFSET_BIAS)
 		return true;
@@ -673,7 +679,8 @@ eb_vma_misplaced(struct i915_vma *vma)
 	if (entry->flags & __EXEC_OBJECT_NEEDS_MAP && !obj->map_and_fenceable)
 		return !only_mappable_for_reloc(entry->flags);
 
-	if (!(entry->flags & EXEC_OBJECT_SUPPORTS_48BBADDRESS) &&
+	if (!(entry->flags &
+	    (EXEC_OBJECT_SUPPORTS_48BBADDRESS | EXEC_OBJECT_PINNED)) &&
 	    vma->node.start >= (1ULL << 32))
 		return true;
 
@@ -690,6 +697,7 @@ i915_gem_execbuffer_reserve(struct intel_engine_cs *ring,
 	struct i915_vma *vma;
 	struct i915_address_space *vm;
 	struct list_head ordered_vmas;
+	struct list_head pinned_vmas;
 	bool has_fenced_gpu_access = INTEL_INFO(ring->dev)->gen < 4;
 	int retry;
 
@@ -698,6 +706,7 @@ i915_gem_execbuffer_reserve(struct intel_engine_cs *ring,
 	vm = list_first_entry(vmas, struct i915_vma, exec_list)->vm;
 
 	INIT_LIST_HEAD(&ordered_vmas);
+	INIT_LIST_HEAD(&pinned_vmas);
 	while (!list_empty(vmas)) {
 		struct drm_i915_gem_exec_object2 *entry;
 		bool need_fence, need_mappable;
@@ -716,7 +725,9 @@ i915_gem_execbuffer_reserve(struct intel_engine_cs *ring,
 			obj->tiling_mode != I915_TILING_NONE;
 		need_mappable = need_fence || need_reloc_mappable(vma);
 
-		if (need_mappable) {
+		if (entry->flags & EXEC_OBJECT_PINNED)
+			list_move_tail(&vma->exec_list, &pinned_vmas);
+		else if (need_mappable) {
 			entry->flags |= __EXEC_OBJECT_NEEDS_MAP;
 			list_move(&vma->exec_list, &ordered_vmas);
 		} else
@@ -726,6 +737,7 @@ i915_gem_execbuffer_reserve(struct intel_engine_cs *ring,
 		obj->base.pending_write_domain = 0;
 	}
 	list_splice(&ordered_vmas, vmas);
+	list_splice(&pinned_vmas, vmas);
 
 	/* Attempt to pin all of the buffers into the GTT.
 	 * This is done in 3 phases:
@@ -1404,7 +1416,8 @@ eb_get_batch(struct eb_vmas *eb)
 	 * Note that actual hangs have only been observed on gen7, but for
 	 * paranoia do it everywhere.
 	 */
-	vma->exec_entry->flags |= __EXEC_OBJECT_NEEDS_BIAS;
+	if ((vma->exec_entry->flags & EXEC_OBJECT_PINNED) == 0)
+		vma->exec_entry->flags |= __EXEC_OBJECT_NEEDS_BIAS;
 
 	return vma->obj;
 }
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index 55ba527..03ad6a5 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -355,6 +355,7 @@ typedef struct drm_i915_irq_wait {
 #define I915_PARAM_SUBSLICE_TOTAL	 33
 #define I915_PARAM_EU_TOTAL		 34
 #define I915_PARAM_HAS_GPU_RESET	 35
+#define I915_PARAM_HAS_EXEC_SOFTPIN	 36
 
 typedef struct drm_i915_getparam {
 	int param;
@@ -677,8 +678,12 @@ struct drm_i915_gem_exec_object2 {
 	__u64 alignment;
 
 	/**
-	 * Returned value of the updated offset of the object, for future
-	 * presumed_offset writes.
+	 * When the EXEC_OBJECT_PINNED flag is specified this is populated by
+	 * the user with the GTT offset at which this object will be pinned.
+	 * When the I915_EXEC_NO_RELOC flag is specified this must contain the
+	 * presumed_offset of the object.
+	 * During execbuffer2 the kernel populates it with the value of the
+	 * current GTT offset of the object, for future presumed_offset writes.
 	 */
 	__u64 offset;
 
@@ -686,7 +691,8 @@ struct drm_i915_gem_exec_object2 {
 #define EXEC_OBJECT_NEEDS_GTT	(1<<1)
 #define EXEC_OBJECT_WRITE	(1<<2)
 #define EXEC_OBJECT_SUPPORTS_48BBADDRESS (1<<3)
-#define __EXEC_OBJECT_UNKNOWN_FLAGS -(EXEC_OBJECT_SUPPORTS_48BBADDRESS<<1)
+#define EXEC_OBJECT_PINNED	(1<<4)
+#define __EXEC_OBJECT_UNKNOWN_FLAGS -(EXEC_OBJECT_PINNED<<1)
 	__u64 flags;
 
 	__u64 rsvd1;
-- 
1.7.9.5

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 32+ messages in thread

* Re: [PATCH v5] drm/i915: Add soft-pinning API for execbuffer
  2015-07-20 16:41   ` [PATCH v5] " Thomas Daniel
@ 2015-07-20 16:50     ` Chris Wilson
  2015-10-16 10:59     ` [PATCH v6] " Thomas Daniel
  1 sibling, 0 replies; 32+ messages in thread
From: Chris Wilson @ 2015-07-20 16:50 UTC (permalink / raw)
  To: Thomas Daniel
  Cc: intel-gfx, Kristian Høgsberg, Akash Goel, Vinay Belgaumkar

On Mon, Jul 20, 2015 at 05:41:44PM +0100, Thomas Daniel wrote:
> From: Chris Wilson <chris@chris-wilson.co.uk>
> 
> Userspace can pass in an offset that it presumes the object is located
> at. The kernel will then do its utmost to fit the object into that
> location. The assumption is that userspace is handling its own object
> locations (for example along with full-ppgtt) and that the kernel will
> rarely have to make space for the user's requests.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> 
> v2: Fixed incorrect eviction found by Michal Winiarski - fix suggested by Chris
> Wilson.  Fixed incorrect error paths causing crash found by Michal Winiarski.
> (Not published externally)
> 
> v3: Rebased because of trivial conflict in object_bind_to_vm.  Fixed eviction
> to allow eviction of soft-pinned objects when another soft-pinned object used
> by a subsequent execbuffer overlaps reported by Michal Winiarski.
> (Not published externally)
> 
> v4: Moved soft-pinned objects to the front of ordered_vmas so that they are
> pinned first after an address conflict happens to avoid repeated conflicts in
> rare cases (Suggested by Chris Wilson).  Expanded comment on
> drm_i915_gem_exec_object2.offset to cover this new API.
> 
> v5: Added I915_PARAM_HAS_EXEC_SOFTPIN parameter for detecting this capability
> (Kristian). Added check for multiple pinnings on eviction (Akash). Made sure
> buffers are not considered misplaced without the user specifying
> EXEC_OBJECT_SUPPORTS_48BBADDRESS.  User must assume responsibility for any
> addressing workarounds.  Updated object2.offset field comment again to clarify
> NO_RELOC case (Chris).  checkpatch cleanup.

There are a couple of requirements we need first. One we have to be able
to use our vm on any engine, and the second is that without fixing the
list iterator in drm_mm using softpining ends up as a performance
regression.

It would be nice if people reviewed patches so that I can get those
fixes in.
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 32+ messages in thread

* [PATCH v6] drm/i915: Add soft-pinning API for execbuffer
  2015-07-20 16:41   ` [PATCH v5] " Thomas Daniel
  2015-07-20 16:50     ` Chris Wilson
@ 2015-10-16 10:59     ` Thomas Daniel
  2015-10-16 14:09       ` Goel, Akash
                         ` (2 more replies)
  1 sibling, 3 replies; 32+ messages in thread
From: Thomas Daniel @ 2015-10-16 10:59 UTC (permalink / raw)
  To: intel-gfx; +Cc: Kristian Høgsberg, Akash Goel, Vinay Belgaumkar

From: Chris Wilson <chris@chris-wilson.co.uk>

Userspace can pass in an offset that it presumes the object is located
at. The kernel will then do its utmost to fit the object into that
location. The assumption is that userspace is handling its own object
locations (for example along with full-ppgtt) and that the kernel will
rarely have to make space for the user's requests.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>

v2: Fixed incorrect eviction found by Michal Winiarski - fix suggested by Chris
Wilson.  Fixed incorrect error paths causing crash found by Michal Winiarski.
(Not published externally)

v3: Rebased because of trivial conflict in object_bind_to_vm.  Fixed eviction
to allow eviction of soft-pinned objects when another soft-pinned object used
by a subsequent execbuffer overlaps reported by Michal Winiarski.
(Not published externally)

v4: Moved soft-pinned objects to the front of ordered_vmas so that they are
pinned first after an address conflict happens to avoid repeated conflicts in
rare cases (Suggested by Chris Wilson).  Expanded comment on
drm_i915_gem_exec_object2.offset to cover this new API.

v5: Added I915_PARAM_HAS_EXEC_SOFTPIN parameter for detecting this capability
(Kristian). Added check for multiple pinnings on eviction (Akash). Made sure
buffers are not considered misplaced without the user specifying
EXEC_OBJECT_SUPPORTS_48B_ADDRESS.  User must assume responsibility for any
addressing workarounds.  Updated object2.offset field comment again to clarify
NO_RELOC case (Chris).  checkpatch cleanup.

v6: Trivial rebase on latest drm-intel-nightly

Cc: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Akash Goel <akash.goel@intel.com>
Cc: Vinay Belgaumkar <vinay.belgaumkar@intel.com>
Cc: Michal Winiarski <michal.winiarski@intel.com>
Cc: Zou Nanhai <nanhai.zou@intel.com>
Cc: Kristian Høgsberg <hoegsberg@gmail.com>
Signed-off-by: Thomas Daniel <thomas.daniel@intel.com>
---
 drivers/gpu/drm/i915/i915_dma.c            |  3 ++
 drivers/gpu/drm/i915/i915_drv.h            |  2 +
 drivers/gpu/drm/i915/i915_gem.c            | 64 ++++++++++++++++++++----------
 drivers/gpu/drm/i915/i915_gem_evict.c      | 39 ++++++++++++++++++
 drivers/gpu/drm/i915/i915_gem_execbuffer.c | 19 +++++++--
 include/uapi/drm/i915_drm.h                | 12 ++++--
 6 files changed, 113 insertions(+), 26 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
index 2336af9..824c6c3 100644
--- a/drivers/gpu/drm/i915/i915_dma.c
+++ b/drivers/gpu/drm/i915/i915_dma.c
@@ -170,6 +170,9 @@ static int i915_getparam(struct drm_device *dev, void *data,
 	case I915_PARAM_HAS_RESOURCE_STREAMER:
 		value = HAS_RESOURCE_STREAMER(dev);
 		break;
+	case I915_PARAM_HAS_EXEC_SOFTPIN:
+		value = 1;
+		break;
 	default:
 		DRM_DEBUG("Unknown parameter %d\n", param->param);
 		return -EINVAL;
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 1396af9..73c3acf 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2816,6 +2816,7 @@ void i915_gem_vma_destroy(struct i915_vma *vma);
 #define PIN_UPDATE	(1<<5)
 #define PIN_ZONE_4G	(1<<6)
 #define PIN_HIGH	(1<<7)
+#define PIN_OFFSET_FIXED	(1<<8)
 #define PIN_OFFSET_MASK (~4095)
 int __must_check
 i915_gem_object_pin(struct drm_i915_gem_object *obj,
@@ -3163,6 +3164,7 @@ int __must_check i915_gem_evict_something(struct drm_device *dev,
 					  unsigned long start,
 					  unsigned long end,
 					  unsigned flags);
+int __must_check i915_gem_evict_for_vma(struct i915_vma *target);
 int i915_gem_evict_vm(struct i915_address_space *vm, bool do_idle);
 
 /* belongs in i915_gem_gtt.h */
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 1e67484..c3453bd 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -3450,30 +3450,50 @@ i915_gem_object_bind_to_vm(struct drm_i915_gem_object *obj,
 	if (IS_ERR(vma))
 		goto err_unpin;
 
-	if (flags & PIN_HIGH) {
-		search_flag = DRM_MM_SEARCH_BELOW;
-		alloc_flag = DRM_MM_CREATE_TOP;
+	if (flags & PIN_OFFSET_FIXED) {
+		uint64_t offset = flags & PIN_OFFSET_MASK;
+
+		if (offset & (alignment - 1)) {
+			ret = -EINVAL;
+			goto err_free_vma;
+		}
+		vma->node.start = offset;
+		vma->node.size = size;
+		vma->node.color = obj->cache_level;
+		ret = drm_mm_reserve_node(&vm->mm, &vma->node);
+		if (ret) {
+			ret = i915_gem_evict_for_vma(vma);
+			if (ret == 0)
+				ret = drm_mm_reserve_node(&vm->mm, &vma->node);
+		}
+		if (ret)
+			goto err_free_vma;
 	} else {
-		search_flag = DRM_MM_SEARCH_DEFAULT;
-		alloc_flag = DRM_MM_CREATE_DEFAULT;
-	}
+		if (flags & PIN_HIGH) {
+			search_flag = DRM_MM_SEARCH_BELOW;
+			alloc_flag = DRM_MM_CREATE_TOP;
+		} else {
+			search_flag = DRM_MM_SEARCH_DEFAULT;
+			alloc_flag = DRM_MM_CREATE_DEFAULT;
+		}
 
 search_free:
-	ret = drm_mm_insert_node_in_range_generic(&vm->mm, &vma->node,
-						  size, alignment,
-						  obj->cache_level,
-						  start, end,
-						  search_flag,
-						  alloc_flag);
-	if (ret) {
-		ret = i915_gem_evict_something(dev, vm, size, alignment,
-					       obj->cache_level,
-					       start, end,
-					       flags);
-		if (ret == 0)
-			goto search_free;
+		ret = drm_mm_insert_node_in_range_generic(&vm->mm, &vma->node,
+							  size, alignment,
+							  obj->cache_level,
+							  start, end,
+							  search_flag,
+							  alloc_flag);
+		if (ret) {
+			ret = i915_gem_evict_something(dev, vm, size, alignment,
+						       obj->cache_level,
+						       start, end,
+						       flags);
+			if (ret == 0)
+				goto search_free;
 
-		goto err_free_vma;
+			goto err_free_vma;
+		}
 	}
 	if (WARN_ON(!i915_gem_valid_gtt_space(vma, obj->cache_level))) {
 		ret = -EINVAL;
@@ -4007,6 +4027,10 @@ i915_vma_misplaced(struct i915_vma *vma, uint32_t alignment, uint64_t flags)
 	    vma->node.start < (flags & PIN_OFFSET_MASK))
 		return true;
 
+	if (flags & PIN_OFFSET_FIXED &&
+	    vma->node.start != (flags & PIN_OFFSET_MASK))
+		return true;
+
 	return false;
 }
 
diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c b/drivers/gpu/drm/i915/i915_gem_evict.c
index d71a133..07c6e4d 100644
--- a/drivers/gpu/drm/i915/i915_gem_evict.c
+++ b/drivers/gpu/drm/i915/i915_gem_evict.c
@@ -199,6 +199,45 @@ found:
 	return ret;
 }
 
+int
+i915_gem_evict_for_vma(struct i915_vma *target)
+{
+	struct drm_mm_node *node, *next;
+
+	list_for_each_entry_safe(node, next,
+			&target->vm->mm.head_node.node_list,
+			node_list) {
+		struct i915_vma *vma;
+		int ret;
+
+		if (node->start + node->size <= target->node.start)
+			continue;
+		if (node->start >= target->node.start + target->node.size)
+			break;
+
+		vma = container_of(node, typeof(*vma), node);
+
+		if (vma->pin_count) {
+			if (!vma->exec_entry || (vma->pin_count > 1))
+				/* Object is pinned for some other use */
+				return -EBUSY;
+
+			/* We need to evict a buffer in the same batch */
+			if (vma->exec_entry->flags & EXEC_OBJECT_PINNED)
+				/* Overlapping fixed objects in the same batch */
+				return -EINVAL;
+
+			return -ENOSPC;
+		}
+
+		ret = i915_vma_unbind(vma);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
+
 /**
  * i915_gem_evict_vm - Evict all idle vmas from a vm
  * @vm: Address space to cleanse
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 6ed7d63a..b72ffe6 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -599,6 +599,8 @@ i915_gem_execbuffer_reserve_vma(struct i915_vma *vma,
 			flags |= PIN_GLOBAL | PIN_MAPPABLE;
 		if (entry->flags & __EXEC_OBJECT_NEEDS_BIAS)
 			flags |= BATCH_OFFSET_BIAS | PIN_OFFSET_BIAS;
+		if (entry->flags & EXEC_OBJECT_PINNED)
+			flags |= entry->offset | PIN_OFFSET_FIXED;
 		if ((flags & PIN_MAPPABLE) == 0)
 			flags |= PIN_HIGH;
 	}
@@ -670,6 +672,10 @@ eb_vma_misplaced(struct i915_vma *vma)
 	    vma->node.start & (entry->alignment - 1))
 		return true;
 
+	if (entry->flags & EXEC_OBJECT_PINNED &&
+	    vma->node.start != entry->offset)
+		return true;
+
 	if (entry->flags & __EXEC_OBJECT_NEEDS_BIAS &&
 	    vma->node.start < BATCH_OFFSET_BIAS)
 		return true;
@@ -678,7 +684,8 @@ eb_vma_misplaced(struct i915_vma *vma)
 	if (entry->flags & __EXEC_OBJECT_NEEDS_MAP && !obj->map_and_fenceable)
 		return !only_mappable_for_reloc(entry->flags);
 
-	if ((entry->flags & EXEC_OBJECT_SUPPORTS_48B_ADDRESS) == 0 &&
+	if (!(entry->flags &
+	    (EXEC_OBJECT_SUPPORTS_48B_ADDRESS | EXEC_OBJECT_PINNED)) &&
 	    (vma->node.start + vma->node.size - 1) >> 32)
 		return true;
 
@@ -695,6 +702,7 @@ i915_gem_execbuffer_reserve(struct intel_engine_cs *ring,
 	struct i915_vma *vma;
 	struct i915_address_space *vm;
 	struct list_head ordered_vmas;
+	struct list_head pinned_vmas;
 	bool has_fenced_gpu_access = INTEL_INFO(ring->dev)->gen < 4;
 	int retry;
 
@@ -703,6 +711,7 @@ i915_gem_execbuffer_reserve(struct intel_engine_cs *ring,
 	vm = list_first_entry(vmas, struct i915_vma, exec_list)->vm;
 
 	INIT_LIST_HEAD(&ordered_vmas);
+	INIT_LIST_HEAD(&pinned_vmas);
 	while (!list_empty(vmas)) {
 		struct drm_i915_gem_exec_object2 *entry;
 		bool need_fence, need_mappable;
@@ -721,7 +730,9 @@ i915_gem_execbuffer_reserve(struct intel_engine_cs *ring,
 			obj->tiling_mode != I915_TILING_NONE;
 		need_mappable = need_fence || need_reloc_mappable(vma);
 
-		if (need_mappable) {
+		if (entry->flags & EXEC_OBJECT_PINNED)
+			list_move_tail(&vma->exec_list, &pinned_vmas);
+		else if (need_mappable) {
 			entry->flags |= __EXEC_OBJECT_NEEDS_MAP;
 			list_move(&vma->exec_list, &ordered_vmas);
 		} else
@@ -731,6 +742,7 @@ i915_gem_execbuffer_reserve(struct intel_engine_cs *ring,
 		obj->base.pending_write_domain = 0;
 	}
 	list_splice(&ordered_vmas, vmas);
+	list_splice(&pinned_vmas, vmas);
 
 	/* Attempt to pin all of the buffers into the GTT.
 	 * This is done in 3 phases:
@@ -1317,7 +1329,8 @@ eb_get_batch(struct eb_vmas *eb)
 	 * Note that actual hangs have only been observed on gen7, but for
 	 * paranoia do it everywhere.
 	 */
-	vma->exec_entry->flags |= __EXEC_OBJECT_NEEDS_BIAS;
+	if ((vma->exec_entry->flags & EXEC_OBJECT_PINNED) == 0)
+		vma->exec_entry->flags |= __EXEC_OBJECT_NEEDS_BIAS;
 
 	return vma->obj;
 }
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index 484a9fb..fc754a3 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -356,6 +356,7 @@ typedef struct drm_i915_irq_wait {
 #define I915_PARAM_EU_TOTAL		 34
 #define I915_PARAM_HAS_GPU_RESET	 35
 #define I915_PARAM_HAS_RESOURCE_STREAMER 36
+#define I915_PARAM_HAS_EXEC_SOFTPIN	 37
 
 typedef struct drm_i915_getparam {
 	__s32 param;
@@ -682,8 +683,12 @@ struct drm_i915_gem_exec_object2 {
 	__u64 alignment;
 
 	/**
-	 * Returned value of the updated offset of the object, for future
-	 * presumed_offset writes.
+	 * When the EXEC_OBJECT_PINNED flag is specified this is populated by
+	 * the user with the GTT offset at which this object will be pinned.
+	 * When the I915_EXEC_NO_RELOC flag is specified this must contain the
+	 * presumed_offset of the object.
+	 * During execbuffer2 the kernel populates it with the value of the
+	 * current GTT offset of the object, for future presumed_offset writes.
 	 */
 	__u64 offset;
 
@@ -691,7 +696,8 @@ struct drm_i915_gem_exec_object2 {
 #define EXEC_OBJECT_NEEDS_GTT	(1<<1)
 #define EXEC_OBJECT_WRITE	(1<<2)
 #define EXEC_OBJECT_SUPPORTS_48B_ADDRESS (1<<3)
-#define __EXEC_OBJECT_UNKNOWN_FLAGS -(EXEC_OBJECT_SUPPORTS_48B_ADDRESS<<1)
+#define EXEC_OBJECT_PINNED	(1<<4)
+#define __EXEC_OBJECT_UNKNOWN_FLAGS -(EXEC_OBJECT_PINNED<<1)
 	__u64 flags;
 
 	__u64 rsvd1;
-- 
1.9.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 32+ messages in thread

* Re: [PATCH v6] drm/i915: Add soft-pinning API for execbuffer
  2015-10-16 10:59     ` [PATCH v6] " Thomas Daniel
@ 2015-10-16 14:09       ` Goel, Akash
  2015-10-16 14:15         ` Chris Wilson
  2015-12-02 11:28       ` Tvrtko Ursulin
  2015-12-08 11:55       ` [PATCH v7] " Thomas Daniel
  2 siblings, 1 reply; 32+ messages in thread
From: Goel, Akash @ 2015-10-16 14:09 UTC (permalink / raw)
  To: Thomas Daniel, intel-gfx; +Cc: Vinay Belgaumkar, Kristian Høgsberg


Discussed sometime back about this patch with Chris. He mainly has 2 
concerns with it.
1. The linear walk used by the patch to detect the overlapping objects 
would be expensive.
2. Restriction to disallow !RCS submissions for non-default contexts, 
which could lead to lot of conflicts for the placements, once multiple 
engines like media/blit/vebox are used

Both of them can be addressed by the subsequent patches.
Chris already has the patch ready to reduce the validation overhead with 
the use of rbtree and there should be no implications of allowing the 
non-default contexts for !RCS submissions in execbuffer.

In the standalone form, the patch looks good, so
Reviewed-by: "Akash Goel <akash.goel@intel.com>"

On 10/16/2015 4:29 PM, Thomas Daniel wrote:
> From: Chris Wilson <chris@chris-wilson.co.uk>
>
> Userspace can pass in an offset that it presumes the object is located
> at. The kernel will then do its utmost to fit the object into that
> location. The assumption is that userspace is handling its own object
> locations (for example along with full-ppgtt) and that the kernel will
> rarely have to make space for the user's requests.
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
>
> v2: Fixed incorrect eviction found by Michal Winiarski - fix suggested by Chris
> Wilson.  Fixed incorrect error paths causing crash found by Michal Winiarski.
> (Not published externally)
>
> v3: Rebased because of trivial conflict in object_bind_to_vm.  Fixed eviction
> to allow eviction of soft-pinned objects when another soft-pinned object used
> by a subsequent execbuffer overlaps reported by Michal Winiarski.
> (Not published externally)
>
> v4: Moved soft-pinned objects to the front of ordered_vmas so that they are
> pinned first after an address conflict happens to avoid repeated conflicts in
> rare cases (Suggested by Chris Wilson).  Expanded comment on
> drm_i915_gem_exec_object2.offset to cover this new API.
>
> v5: Added I915_PARAM_HAS_EXEC_SOFTPIN parameter for detecting this capability
> (Kristian). Added check for multiple pinnings on eviction (Akash). Made sure
> buffers are not considered misplaced without the user specifying
> EXEC_OBJECT_SUPPORTS_48B_ADDRESS.  User must assume responsibility for any
> addressing workarounds.  Updated object2.offset field comment again to clarify
> NO_RELOC case (Chris).  checkpatch cleanup.
>
> v6: Trivial rebase on latest drm-intel-nightly
>
> Cc: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Akash Goel <akash.goel@intel.com>
> Cc: Vinay Belgaumkar <vinay.belgaumkar@intel.com>
> Cc: Michal Winiarski <michal.winiarski@intel.com>
> Cc: Zou Nanhai <nanhai.zou@intel.com>
> Cc: Kristian Høgsberg <hoegsberg@gmail.com>
> Signed-off-by: Thomas Daniel <thomas.daniel@intel.com>
> ---
>   drivers/gpu/drm/i915/i915_dma.c            |  3 ++
>   drivers/gpu/drm/i915/i915_drv.h            |  2 +
>   drivers/gpu/drm/i915/i915_gem.c            | 64 ++++++++++++++++++++----------
>   drivers/gpu/drm/i915/i915_gem_evict.c      | 39 ++++++++++++++++++
>   drivers/gpu/drm/i915/i915_gem_execbuffer.c | 19 +++++++--
>   include/uapi/drm/i915_drm.h                | 12 ++++--
>   6 files changed, 113 insertions(+), 26 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
> index 2336af9..824c6c3 100644
> --- a/drivers/gpu/drm/i915/i915_dma.c
> +++ b/drivers/gpu/drm/i915/i915_dma.c
> @@ -170,6 +170,9 @@ static int i915_getparam(struct drm_device *dev, void *data,
>   	case I915_PARAM_HAS_RESOURCE_STREAMER:
>   		value = HAS_RESOURCE_STREAMER(dev);
>   		break;
> +	case I915_PARAM_HAS_EXEC_SOFTPIN:
> +		value = 1;
> +		break;
>   	default:
>   		DRM_DEBUG("Unknown parameter %d\n", param->param);
>   		return -EINVAL;
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 1396af9..73c3acf 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -2816,6 +2816,7 @@ void i915_gem_vma_destroy(struct i915_vma *vma);
>   #define PIN_UPDATE	(1<<5)
>   #define PIN_ZONE_4G	(1<<6)
>   #define PIN_HIGH	(1<<7)
> +#define PIN_OFFSET_FIXED	(1<<8)
>   #define PIN_OFFSET_MASK (~4095)
>   int __must_check
>   i915_gem_object_pin(struct drm_i915_gem_object *obj,
> @@ -3163,6 +3164,7 @@ int __must_check i915_gem_evict_something(struct drm_device *dev,
>   					  unsigned long start,
>   					  unsigned long end,
>   					  unsigned flags);
> +int __must_check i915_gem_evict_for_vma(struct i915_vma *target);
>   int i915_gem_evict_vm(struct i915_address_space *vm, bool do_idle);
>
>   /* belongs in i915_gem_gtt.h */
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index 1e67484..c3453bd 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -3450,30 +3450,50 @@ i915_gem_object_bind_to_vm(struct drm_i915_gem_object *obj,
>   	if (IS_ERR(vma))
>   		goto err_unpin;
>
> -	if (flags & PIN_HIGH) {
> -		search_flag = DRM_MM_SEARCH_BELOW;
> -		alloc_flag = DRM_MM_CREATE_TOP;
> +	if (flags & PIN_OFFSET_FIXED) {
> +		uint64_t offset = flags & PIN_OFFSET_MASK;
> +
> +		if (offset & (alignment - 1)) {
> +			ret = -EINVAL;
> +			goto err_free_vma;
> +		}
> +		vma->node.start = offset;
> +		vma->node.size = size;
> +		vma->node.color = obj->cache_level;
> +		ret = drm_mm_reserve_node(&vm->mm, &vma->node);
> +		if (ret) {
> +			ret = i915_gem_evict_for_vma(vma);
> +			if (ret == 0)
> +				ret = drm_mm_reserve_node(&vm->mm, &vma->node);
> +		}
> +		if (ret)
> +			goto err_free_vma;
>   	} else {
> -		search_flag = DRM_MM_SEARCH_DEFAULT;
> -		alloc_flag = DRM_MM_CREATE_DEFAULT;
> -	}
> +		if (flags & PIN_HIGH) {
> +			search_flag = DRM_MM_SEARCH_BELOW;
> +			alloc_flag = DRM_MM_CREATE_TOP;
> +		} else {
> +			search_flag = DRM_MM_SEARCH_DEFAULT;
> +			alloc_flag = DRM_MM_CREATE_DEFAULT;
> +		}
>
>   search_free:
> -	ret = drm_mm_insert_node_in_range_generic(&vm->mm, &vma->node,
> -						  size, alignment,
> -						  obj->cache_level,
> -						  start, end,
> -						  search_flag,
> -						  alloc_flag);
> -	if (ret) {
> -		ret = i915_gem_evict_something(dev, vm, size, alignment,
> -					       obj->cache_level,
> -					       start, end,
> -					       flags);
> -		if (ret == 0)
> -			goto search_free;
> +		ret = drm_mm_insert_node_in_range_generic(&vm->mm, &vma->node,
> +							  size, alignment,
> +							  obj->cache_level,
> +							  start, end,
> +							  search_flag,
> +							  alloc_flag);
> +		if (ret) {
> +			ret = i915_gem_evict_something(dev, vm, size, alignment,
> +						       obj->cache_level,
> +						       start, end,
> +						       flags);
> +			if (ret == 0)
> +				goto search_free;
>
> -		goto err_free_vma;
> +			goto err_free_vma;
> +		}
>   	}
>   	if (WARN_ON(!i915_gem_valid_gtt_space(vma, obj->cache_level))) {
>   		ret = -EINVAL;
> @@ -4007,6 +4027,10 @@ i915_vma_misplaced(struct i915_vma *vma, uint32_t alignment, uint64_t flags)
>   	    vma->node.start < (flags & PIN_OFFSET_MASK))
>   		return true;
>
> +	if (flags & PIN_OFFSET_FIXED &&
> +	    vma->node.start != (flags & PIN_OFFSET_MASK))
> +		return true;
> +
>   	return false;
>   }
>
> diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c b/drivers/gpu/drm/i915/i915_gem_evict.c
> index d71a133..07c6e4d 100644
> --- a/drivers/gpu/drm/i915/i915_gem_evict.c
> +++ b/drivers/gpu/drm/i915/i915_gem_evict.c
> @@ -199,6 +199,45 @@ found:
>   	return ret;
>   }
>
> +int
> +i915_gem_evict_for_vma(struct i915_vma *target)
> +{
> +	struct drm_mm_node *node, *next;
> +
> +	list_for_each_entry_safe(node, next,
> +			&target->vm->mm.head_node.node_list,
> +			node_list) {
> +		struct i915_vma *vma;
> +		int ret;
> +
> +		if (node->start + node->size <= target->node.start)
> +			continue;
> +		if (node->start >= target->node.start + target->node.size)
> +			break;
> +
> +		vma = container_of(node, typeof(*vma), node);
> +
> +		if (vma->pin_count) {
> +			if (!vma->exec_entry || (vma->pin_count > 1))
> +				/* Object is pinned for some other use */
> +				return -EBUSY;
> +
> +			/* We need to evict a buffer in the same batch */
> +			if (vma->exec_entry->flags & EXEC_OBJECT_PINNED)
> +				/* Overlapping fixed objects in the same batch */
> +				return -EINVAL;
> +
> +			return -ENOSPC;
> +		}
> +
> +		ret = i915_vma_unbind(vma);
> +		if (ret)
> +			return ret;
> +	}
> +
> +	return 0;
> +}
> +
>   /**
>    * i915_gem_evict_vm - Evict all idle vmas from a vm
>    * @vm: Address space to cleanse
> diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> index 6ed7d63a..b72ffe6 100644
> --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> @@ -599,6 +599,8 @@ i915_gem_execbuffer_reserve_vma(struct i915_vma *vma,
>   			flags |= PIN_GLOBAL | PIN_MAPPABLE;
>   		if (entry->flags & __EXEC_OBJECT_NEEDS_BIAS)
>   			flags |= BATCH_OFFSET_BIAS | PIN_OFFSET_BIAS;
> +		if (entry->flags & EXEC_OBJECT_PINNED)
> +			flags |= entry->offset | PIN_OFFSET_FIXED;
>   		if ((flags & PIN_MAPPABLE) == 0)
>   			flags |= PIN_HIGH;
>   	}
> @@ -670,6 +672,10 @@ eb_vma_misplaced(struct i915_vma *vma)
>   	    vma->node.start & (entry->alignment - 1))
>   		return true;
>
> +	if (entry->flags & EXEC_OBJECT_PINNED &&
> +	    vma->node.start != entry->offset)
> +		return true;
> +
>   	if (entry->flags & __EXEC_OBJECT_NEEDS_BIAS &&
>   	    vma->node.start < BATCH_OFFSET_BIAS)
>   		return true;
> @@ -678,7 +684,8 @@ eb_vma_misplaced(struct i915_vma *vma)
>   	if (entry->flags & __EXEC_OBJECT_NEEDS_MAP && !obj->map_and_fenceable)
>   		return !only_mappable_for_reloc(entry->flags);
>
> -	if ((entry->flags & EXEC_OBJECT_SUPPORTS_48B_ADDRESS) == 0 &&
> +	if (!(entry->flags &
> +	    (EXEC_OBJECT_SUPPORTS_48B_ADDRESS | EXEC_OBJECT_PINNED)) &&
>   	    (vma->node.start + vma->node.size - 1) >> 32)
>   		return true;
>
> @@ -695,6 +702,7 @@ i915_gem_execbuffer_reserve(struct intel_engine_cs *ring,
>   	struct i915_vma *vma;
>   	struct i915_address_space *vm;
>   	struct list_head ordered_vmas;
> +	struct list_head pinned_vmas;
>   	bool has_fenced_gpu_access = INTEL_INFO(ring->dev)->gen < 4;
>   	int retry;
>
> @@ -703,6 +711,7 @@ i915_gem_execbuffer_reserve(struct intel_engine_cs *ring,
>   	vm = list_first_entry(vmas, struct i915_vma, exec_list)->vm;
>
>   	INIT_LIST_HEAD(&ordered_vmas);
> +	INIT_LIST_HEAD(&pinned_vmas);
>   	while (!list_empty(vmas)) {
>   		struct drm_i915_gem_exec_object2 *entry;
>   		bool need_fence, need_mappable;
> @@ -721,7 +730,9 @@ i915_gem_execbuffer_reserve(struct intel_engine_cs *ring,
>   			obj->tiling_mode != I915_TILING_NONE;
>   		need_mappable = need_fence || need_reloc_mappable(vma);
>
> -		if (need_mappable) {
> +		if (entry->flags & EXEC_OBJECT_PINNED)
> +			list_move_tail(&vma->exec_list, &pinned_vmas);
> +		else if (need_mappable) {
>   			entry->flags |= __EXEC_OBJECT_NEEDS_MAP;
>   			list_move(&vma->exec_list, &ordered_vmas);
>   		} else
> @@ -731,6 +742,7 @@ i915_gem_execbuffer_reserve(struct intel_engine_cs *ring,
>   		obj->base.pending_write_domain = 0;
>   	}
>   	list_splice(&ordered_vmas, vmas);
> +	list_splice(&pinned_vmas, vmas);
>
>   	/* Attempt to pin all of the buffers into the GTT.
>   	 * This is done in 3 phases:
> @@ -1317,7 +1329,8 @@ eb_get_batch(struct eb_vmas *eb)
>   	 * Note that actual hangs have only been observed on gen7, but for
>   	 * paranoia do it everywhere.
>   	 */
> -	vma->exec_entry->flags |= __EXEC_OBJECT_NEEDS_BIAS;
> +	if ((vma->exec_entry->flags & EXEC_OBJECT_PINNED) == 0)
> +		vma->exec_entry->flags |= __EXEC_OBJECT_NEEDS_BIAS;
>
>   	return vma->obj;
>   }
> diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
> index 484a9fb..fc754a3 100644
> --- a/include/uapi/drm/i915_drm.h
> +++ b/include/uapi/drm/i915_drm.h
> @@ -356,6 +356,7 @@ typedef struct drm_i915_irq_wait {
>   #define I915_PARAM_EU_TOTAL		 34
>   #define I915_PARAM_HAS_GPU_RESET	 35
>   #define I915_PARAM_HAS_RESOURCE_STREAMER 36
> +#define I915_PARAM_HAS_EXEC_SOFTPIN	 37
>
>   typedef struct drm_i915_getparam {
>   	__s32 param;
> @@ -682,8 +683,12 @@ struct drm_i915_gem_exec_object2 {
>   	__u64 alignment;
>
>   	/**
> -	 * Returned value of the updated offset of the object, for future
> -	 * presumed_offset writes.
> +	 * When the EXEC_OBJECT_PINNED flag is specified this is populated by
> +	 * the user with the GTT offset at which this object will be pinned.
> +	 * When the I915_EXEC_NO_RELOC flag is specified this must contain the
> +	 * presumed_offset of the object.
> +	 * During execbuffer2 the kernel populates it with the value of the
> +	 * current GTT offset of the object, for future presumed_offset writes.
>   	 */
>   	__u64 offset;
>
> @@ -691,7 +696,8 @@ struct drm_i915_gem_exec_object2 {
>   #define EXEC_OBJECT_NEEDS_GTT	(1<<1)
>   #define EXEC_OBJECT_WRITE	(1<<2)
>   #define EXEC_OBJECT_SUPPORTS_48B_ADDRESS (1<<3)
> -#define __EXEC_OBJECT_UNKNOWN_FLAGS -(EXEC_OBJECT_SUPPORTS_48B_ADDRESS<<1)
> +#define EXEC_OBJECT_PINNED	(1<<4)
> +#define __EXEC_OBJECT_UNKNOWN_FLAGS -(EXEC_OBJECT_PINNED<<1)
>   	__u64 flags;
>
>   	__u64 rsvd1;
>
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCH v6] drm/i915: Add soft-pinning API for execbuffer
  2015-10-16 14:09       ` Goel, Akash
@ 2015-10-16 14:15         ` Chris Wilson
  0 siblings, 0 replies; 32+ messages in thread
From: Chris Wilson @ 2015-10-16 14:15 UTC (permalink / raw)
  To: Goel, Akash; +Cc: intel-gfx, Kristian Høgsberg, Vinay Belgaumkar

On Fri, Oct 16, 2015 at 07:39:20PM +0530, Goel, Akash wrote:
> 
> Discussed sometime back about this patch with Chris. He mainly has 2
> concerns with it.
> 1. The linear walk used by the patch to detect the overlapping
> objects would be expensive.
> 2. Restriction to disallow !RCS submissions for non-default
> contexts, which could lead to lot of conflicts for the placements,
> once multiple engines like media/blit/vebox are used
> 
> Both of them can be addressed by the subsequent patches.
> Chris already has the patch ready to reduce the validation overhead
> with the use of rbtree and there should be no implications of
> allowing the non-default contexts for !RCS submissions in
> execbuffer.
> 
> In the standalone form, the patch looks good, so
> Reviewed-by: "Akash Goel <akash.goel@intel.com>"

I am sorry, but this patch does not reflect the final version that I
wrote, in particular does not provide the support I actually use inside
the kernel.
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCH v6] drm/i915: Add soft-pinning API for execbuffer
  2015-10-16 10:59     ` [PATCH v6] " Thomas Daniel
  2015-10-16 14:09       ` Goel, Akash
@ 2015-12-02 11:28       ` Tvrtko Ursulin
  2015-12-02 11:35         ` Chris Wilson
  2015-12-08 11:55       ` [PATCH v7] " Thomas Daniel
  2 siblings, 1 reply; 32+ messages in thread
From: Tvrtko Ursulin @ 2015-12-02 11:28 UTC (permalink / raw)
  To: Thomas Daniel, intel-gfx
  Cc: Vinay Belgaumkar, Akash Goel, Kristian Høgsberg


On 16/10/15 11:59, Thomas Daniel wrote:
> From: Chris Wilson <chris@chris-wilson.co.uk>
>
> Userspace can pass in an offset that it presumes the object is located
> at. The kernel will then do its utmost to fit the object into that
> location. The assumption is that userspace is handling its own object
> locations (for example along with full-ppgtt) and that the kernel will
> rarely have to make space for the user's requests.
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
>
> v2: Fixed incorrect eviction found by Michal Winiarski - fix suggested by Chris
> Wilson.  Fixed incorrect error paths causing crash found by Michal Winiarski.
> (Not published externally)
>
> v3: Rebased because of trivial conflict in object_bind_to_vm.  Fixed eviction
> to allow eviction of soft-pinned objects when another soft-pinned object used
> by a subsequent execbuffer overlaps reported by Michal Winiarski.
> (Not published externally)
>
> v4: Moved soft-pinned objects to the front of ordered_vmas so that they are
> pinned first after an address conflict happens to avoid repeated conflicts in
> rare cases (Suggested by Chris Wilson).  Expanded comment on
> drm_i915_gem_exec_object2.offset to cover this new API.
>
> v5: Added I915_PARAM_HAS_EXEC_SOFTPIN parameter for detecting this capability
> (Kristian). Added check for multiple pinnings on eviction (Akash). Made sure
> buffers are not considered misplaced without the user specifying
> EXEC_OBJECT_SUPPORTS_48B_ADDRESS.  User must assume responsibility for any
> addressing workarounds.  Updated object2.offset field comment again to clarify
> NO_RELOC case (Chris).  checkpatch cleanup.
>
> v6: Trivial rebase on latest drm-intel-nightly
>
> Cc: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Akash Goel <akash.goel@intel.com>
> Cc: Vinay Belgaumkar <vinay.belgaumkar@intel.com>
> Cc: Michal Winiarski <michal.winiarski@intel.com>
> Cc: Zou Nanhai <nanhai.zou@intel.com>
> Cc: Kristian Høgsberg <hoegsberg@gmail.com>
> Signed-off-by: Thomas Daniel <thomas.daniel@intel.com>
> ---
>   drivers/gpu/drm/i915/i915_dma.c            |  3 ++
>   drivers/gpu/drm/i915/i915_drv.h            |  2 +
>   drivers/gpu/drm/i915/i915_gem.c            | 64 ++++++++++++++++++++----------
>   drivers/gpu/drm/i915/i915_gem_evict.c      | 39 ++++++++++++++++++
>   drivers/gpu/drm/i915/i915_gem_execbuffer.c | 19 +++++++--
>   include/uapi/drm/i915_drm.h                | 12 ++++--
>   6 files changed, 113 insertions(+), 26 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
> index 2336af9..824c6c3 100644
> --- a/drivers/gpu/drm/i915/i915_dma.c
> +++ b/drivers/gpu/drm/i915/i915_dma.c
> @@ -170,6 +170,9 @@ static int i915_getparam(struct drm_device *dev, void *data,
>   	case I915_PARAM_HAS_RESOURCE_STREAMER:
>   		value = HAS_RESOURCE_STREAMER(dev);
>   		break;
> +	case I915_PARAM_HAS_EXEC_SOFTPIN:
> +		value = 1;
> +		break;
>   	default:
>   		DRM_DEBUG("Unknown parameter %d\n", param->param);
>   		return -EINVAL;
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 1396af9..73c3acf 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -2816,6 +2816,7 @@ void i915_gem_vma_destroy(struct i915_vma *vma);
>   #define PIN_UPDATE	(1<<5)
>   #define PIN_ZONE_4G	(1<<6)
>   #define PIN_HIGH	(1<<7)
> +#define PIN_OFFSET_FIXED	(1<<8)
>   #define PIN_OFFSET_MASK (~4095)
>   int __must_check
>   i915_gem_object_pin(struct drm_i915_gem_object *obj,
> @@ -3163,6 +3164,7 @@ int __must_check i915_gem_evict_something(struct drm_device *dev,
>   					  unsigned long start,
>   					  unsigned long end,
>   					  unsigned flags);
> +int __must_check i915_gem_evict_for_vma(struct i915_vma *target);
>   int i915_gem_evict_vm(struct i915_address_space *vm, bool do_idle);
>
>   /* belongs in i915_gem_gtt.h */
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index 1e67484..c3453bd 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -3450,30 +3450,50 @@ i915_gem_object_bind_to_vm(struct drm_i915_gem_object *obj,
>   	if (IS_ERR(vma))
>   		goto err_unpin;
>
> -	if (flags & PIN_HIGH) {
> -		search_flag = DRM_MM_SEARCH_BELOW;
> -		alloc_flag = DRM_MM_CREATE_TOP;
> +	if (flags & PIN_OFFSET_FIXED) {
> +		uint64_t offset = flags & PIN_OFFSET_MASK;
> +
> +		if (offset & (alignment - 1)) {
> +			ret = -EINVAL;
> +			goto err_free_vma;
> +		}
> +		vma->node.start = offset;
> +		vma->node.size = size;
> +		vma->node.color = obj->cache_level;
> +		ret = drm_mm_reserve_node(&vm->mm, &vma->node);

This will return ENOSPC to userspace when they have passed in an address 
outside the (PP)GTT range which is misleading.

I would suggest explicit checking against the 'end' (available a bit 
further up in the same function) and returning something more 
appropriate. At least EINVAL, although execbuf overloads that one so 
much I would even rather steal some other semi-appropriate one like 
ENXIO (No such device or _address_) or something.

Regards,

Tvrtko

> +		if (ret) {
> +			ret = i915_gem_evict_for_vma(vma);
> +			if (ret == 0)
> +				ret = drm_mm_reserve_node(&vm->mm, &vma->node);
> +		}
> +		if (ret)
> +			goto err_free_vma;
>   	} else {
> -		search_flag = DRM_MM_SEARCH_DEFAULT;
> -		alloc_flag = DRM_MM_CREATE_DEFAULT;
> -	}
> +		if (flags & PIN_HIGH) {
> +			search_flag = DRM_MM_SEARCH_BELOW;
> +			alloc_flag = DRM_MM_CREATE_TOP;
> +		} else {
> +			search_flag = DRM_MM_SEARCH_DEFAULT;
> +			alloc_flag = DRM_MM_CREATE_DEFAULT;
> +		}
>
>   search_free:
> -	ret = drm_mm_insert_node_in_range_generic(&vm->mm, &vma->node,
> -						  size, alignment,
> -						  obj->cache_level,
> -						  start, end,
> -						  search_flag,
> -						  alloc_flag);
> -	if (ret) {
> -		ret = i915_gem_evict_something(dev, vm, size, alignment,
> -					       obj->cache_level,
> -					       start, end,
> -					       flags);
> -		if (ret == 0)
> -			goto search_free;
> +		ret = drm_mm_insert_node_in_range_generic(&vm->mm, &vma->node,
> +							  size, alignment,
> +							  obj->cache_level,
> +							  start, end,
> +							  search_flag,
> +							  alloc_flag);
> +		if (ret) {
> +			ret = i915_gem_evict_something(dev, vm, size, alignment,
> +						       obj->cache_level,
> +						       start, end,
> +						       flags);
> +			if (ret == 0)
> +				goto search_free;
>
> -		goto err_free_vma;
> +			goto err_free_vma;
> +		}
>   	}
>   	if (WARN_ON(!i915_gem_valid_gtt_space(vma, obj->cache_level))) {
>   		ret = -EINVAL;
> @@ -4007,6 +4027,10 @@ i915_vma_misplaced(struct i915_vma *vma, uint32_t alignment, uint64_t flags)
>   	    vma->node.start < (flags & PIN_OFFSET_MASK))
>   		return true;
>
> +	if (flags & PIN_OFFSET_FIXED &&
> +	    vma->node.start != (flags & PIN_OFFSET_MASK))
> +		return true;
> +
>   	return false;
>   }
>
> diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c b/drivers/gpu/drm/i915/i915_gem_evict.c
> index d71a133..07c6e4d 100644
> --- a/drivers/gpu/drm/i915/i915_gem_evict.c
> +++ b/drivers/gpu/drm/i915/i915_gem_evict.c
> @@ -199,6 +199,45 @@ found:
>   	return ret;
>   }
>
> +int
> +i915_gem_evict_for_vma(struct i915_vma *target)
> +{
> +	struct drm_mm_node *node, *next;
> +
> +	list_for_each_entry_safe(node, next,
> +			&target->vm->mm.head_node.node_list,
> +			node_list) {
> +		struct i915_vma *vma;
> +		int ret;
> +
> +		if (node->start + node->size <= target->node.start)
> +			continue;
> +		if (node->start >= target->node.start + target->node.size)
> +			break;
> +
> +		vma = container_of(node, typeof(*vma), node);
> +
> +		if (vma->pin_count) {
> +			if (!vma->exec_entry || (vma->pin_count > 1))
> +				/* Object is pinned for some other use */
> +				return -EBUSY;
> +
> +			/* We need to evict a buffer in the same batch */
> +			if (vma->exec_entry->flags & EXEC_OBJECT_PINNED)
> +				/* Overlapping fixed objects in the same batch */
> +				return -EINVAL;
> +
> +			return -ENOSPC;
> +		}
> +
> +		ret = i915_vma_unbind(vma);
> +		if (ret)
> +			return ret;
> +	}
> +
> +	return 0;
> +}
> +
>   /**
>    * i915_gem_evict_vm - Evict all idle vmas from a vm
>    * @vm: Address space to cleanse
> diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> index 6ed7d63a..b72ffe6 100644
> --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> @@ -599,6 +599,8 @@ i915_gem_execbuffer_reserve_vma(struct i915_vma *vma,
>   			flags |= PIN_GLOBAL | PIN_MAPPABLE;
>   		if (entry->flags & __EXEC_OBJECT_NEEDS_BIAS)
>   			flags |= BATCH_OFFSET_BIAS | PIN_OFFSET_BIAS;
> +		if (entry->flags & EXEC_OBJECT_PINNED)
> +			flags |= entry->offset | PIN_OFFSET_FIXED;
>   		if ((flags & PIN_MAPPABLE) == 0)
>   			flags |= PIN_HIGH;
>   	}
> @@ -670,6 +672,10 @@ eb_vma_misplaced(struct i915_vma *vma)
>   	    vma->node.start & (entry->alignment - 1))
>   		return true;
>
> +	if (entry->flags & EXEC_OBJECT_PINNED &&
> +	    vma->node.start != entry->offset)
> +		return true;
> +
>   	if (entry->flags & __EXEC_OBJECT_NEEDS_BIAS &&
>   	    vma->node.start < BATCH_OFFSET_BIAS)
>   		return true;
> @@ -678,7 +684,8 @@ eb_vma_misplaced(struct i915_vma *vma)
>   	if (entry->flags & __EXEC_OBJECT_NEEDS_MAP && !obj->map_and_fenceable)
>   		return !only_mappable_for_reloc(entry->flags);
>
> -	if ((entry->flags & EXEC_OBJECT_SUPPORTS_48B_ADDRESS) == 0 &&
> +	if (!(entry->flags &
> +	    (EXEC_OBJECT_SUPPORTS_48B_ADDRESS | EXEC_OBJECT_PINNED)) &&
>   	    (vma->node.start + vma->node.size - 1) >> 32)
>   		return true;
>
> @@ -695,6 +702,7 @@ i915_gem_execbuffer_reserve(struct intel_engine_cs *ring,
>   	struct i915_vma *vma;
>   	struct i915_address_space *vm;
>   	struct list_head ordered_vmas;
> +	struct list_head pinned_vmas;
>   	bool has_fenced_gpu_access = INTEL_INFO(ring->dev)->gen < 4;
>   	int retry;
>
> @@ -703,6 +711,7 @@ i915_gem_execbuffer_reserve(struct intel_engine_cs *ring,
>   	vm = list_first_entry(vmas, struct i915_vma, exec_list)->vm;
>
>   	INIT_LIST_HEAD(&ordered_vmas);
> +	INIT_LIST_HEAD(&pinned_vmas);
>   	while (!list_empty(vmas)) {
>   		struct drm_i915_gem_exec_object2 *entry;
>   		bool need_fence, need_mappable;
> @@ -721,7 +730,9 @@ i915_gem_execbuffer_reserve(struct intel_engine_cs *ring,
>   			obj->tiling_mode != I915_TILING_NONE;
>   		need_mappable = need_fence || need_reloc_mappable(vma);
>
> -		if (need_mappable) {
> +		if (entry->flags & EXEC_OBJECT_PINNED)
> +			list_move_tail(&vma->exec_list, &pinned_vmas);
> +		else if (need_mappable) {
>   			entry->flags |= __EXEC_OBJECT_NEEDS_MAP;
>   			list_move(&vma->exec_list, &ordered_vmas);
>   		} else
> @@ -731,6 +742,7 @@ i915_gem_execbuffer_reserve(struct intel_engine_cs *ring,
>   		obj->base.pending_write_domain = 0;
>   	}
>   	list_splice(&ordered_vmas, vmas);
> +	list_splice(&pinned_vmas, vmas);
>
>   	/* Attempt to pin all of the buffers into the GTT.
>   	 * This is done in 3 phases:
> @@ -1317,7 +1329,8 @@ eb_get_batch(struct eb_vmas *eb)
>   	 * Note that actual hangs have only been observed on gen7, but for
>   	 * paranoia do it everywhere.
>   	 */
> -	vma->exec_entry->flags |= __EXEC_OBJECT_NEEDS_BIAS;
> +	if ((vma->exec_entry->flags & EXEC_OBJECT_PINNED) == 0)
> +		vma->exec_entry->flags |= __EXEC_OBJECT_NEEDS_BIAS;
>
>   	return vma->obj;
>   }
> diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
> index 484a9fb..fc754a3 100644
> --- a/include/uapi/drm/i915_drm.h
> +++ b/include/uapi/drm/i915_drm.h
> @@ -356,6 +356,7 @@ typedef struct drm_i915_irq_wait {
>   #define I915_PARAM_EU_TOTAL		 34
>   #define I915_PARAM_HAS_GPU_RESET	 35
>   #define I915_PARAM_HAS_RESOURCE_STREAMER 36
> +#define I915_PARAM_HAS_EXEC_SOFTPIN	 37
>
>   typedef struct drm_i915_getparam {
>   	__s32 param;
> @@ -682,8 +683,12 @@ struct drm_i915_gem_exec_object2 {
>   	__u64 alignment;
>
>   	/**
> -	 * Returned value of the updated offset of the object, for future
> -	 * presumed_offset writes.
> +	 * When the EXEC_OBJECT_PINNED flag is specified this is populated by
> +	 * the user with the GTT offset at which this object will be pinned.
> +	 * When the I915_EXEC_NO_RELOC flag is specified this must contain the
> +	 * presumed_offset of the object.
> +	 * During execbuffer2 the kernel populates it with the value of the
> +	 * current GTT offset of the object, for future presumed_offset writes.
>   	 */
>   	__u64 offset;
>
> @@ -691,7 +696,8 @@ struct drm_i915_gem_exec_object2 {
>   #define EXEC_OBJECT_NEEDS_GTT	(1<<1)
>   #define EXEC_OBJECT_WRITE	(1<<2)
>   #define EXEC_OBJECT_SUPPORTS_48B_ADDRESS (1<<3)
> -#define __EXEC_OBJECT_UNKNOWN_FLAGS -(EXEC_OBJECT_SUPPORTS_48B_ADDRESS<<1)
> +#define EXEC_OBJECT_PINNED	(1<<4)
> +#define __EXEC_OBJECT_UNKNOWN_FLAGS -(EXEC_OBJECT_PINNED<<1)
>   	__u64 flags;
>
>   	__u64 rsvd1;
>
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCH v6] drm/i915: Add soft-pinning API for execbuffer
  2015-12-02 11:28       ` Tvrtko Ursulin
@ 2015-12-02 11:35         ` Chris Wilson
  0 siblings, 0 replies; 32+ messages in thread
From: Chris Wilson @ 2015-12-02 11:35 UTC (permalink / raw)
  To: Tvrtko Ursulin
  Cc: intel-gfx, Kristian Høgsberg, Akash Goel, Vinay Belgaumkar

On Wed, Dec 02, 2015 at 11:28:52AM +0000, Tvrtko Ursulin wrote:
> This will return ENOSPC to userspace when they have passed in an
> address outside the (PP)GTT range which is misleading.

Reviewing the wrong patch.
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 32+ messages in thread

* [PATCH v7] drm/i915: Add soft-pinning API for execbuffer
  2015-10-16 10:59     ` [PATCH v6] " Thomas Daniel
  2015-10-16 14:09       ` Goel, Akash
  2015-12-02 11:28       ` Tvrtko Ursulin
@ 2015-12-08 11:55       ` Thomas Daniel
  2015-12-08 18:49         ` Michel Thierry
  2 siblings, 1 reply; 32+ messages in thread
From: Thomas Daniel @ 2015-12-08 11:55 UTC (permalink / raw)
  To: intel-gfx; +Cc: Kristian Høgsberg, Akash Goel, Vinay Belgaumkar

From: Chris Wilson <chris@chris-wilson.co.uk>

Userspace can pass in an offset that it presumes the object is located
at. The kernel will then do its utmost to fit the object into that
location. The assumption is that userspace is handling its own object
locations (for example along with full-ppgtt) and that the kernel will
rarely have to make space for the user's requests.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>

v2: Fixed incorrect eviction found by Michal Winiarski - fix suggested by Chris
Wilson.  Fixed incorrect error paths causing crash found by Michal Winiarski.
(Not published externally)

v3: Rebased because of trivial conflict in object_bind_to_vm.  Fixed eviction
to allow eviction of soft-pinned objects when another soft-pinned object used
by a subsequent execbuffer overlaps reported by Michal Winiarski.
(Not published externally)

v4: Moved soft-pinned objects to the front of ordered_vmas so that they are
pinned first after an address conflict happens to avoid repeated conflicts in
rare cases (Suggested by Chris Wilson).  Expanded comment on
drm_i915_gem_exec_object2.offset to cover this new API.

v5: Added I915_PARAM_HAS_EXEC_SOFTPIN parameter for detecting this capability
(Kristian). Added check for multiple pinnings on eviction (Akash). Made sure
buffers are not considered misplaced without the user specifying
EXEC_OBJECT_SUPPORTS_48B_ADDRESS.  User must assume responsibility for any
addressing workarounds.  Updated object2.offset field comment again to clarify
NO_RELOC case (Chris).  checkpatch cleanup.

v6: Trivial rebase on latest drm-intel-nightly

v7: Catch attempts to pin above the max virtual address size and return
EINVAL (Tvrtko). Decouple EXEC_OBJECT_SUPPORTS_48B_ADDRESS and
EXEC_OBJECT_PINNED flags, user must pass both flags in any attempt to pin
something at an offset above 4GB (Chris, Daniel Vetter).

Cc: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Akash Goel <akash.goel@intel.com>
Cc: Vinay Belgaumkar <vinay.belgaumkar@intel.com>
Cc: Michal Winiarski <michal.winiarski@intel.com>
Cc: Zou Nanhai <nanhai.zou@intel.com>
Cc: Kristian Høgsberg <hoegsberg@gmail.com>
Cc: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com>
Signed-off-by: Thomas Daniel <thomas.daniel@intel.com>
---
 drivers/gpu/drm/i915/i915_dma.c            |  3 ++
 drivers/gpu/drm/i915/i915_drv.h            |  2 +
 drivers/gpu/drm/i915/i915_gem.c            | 64 ++++++++++++++++++++----------
 drivers/gpu/drm/i915/i915_gem_evict.c      | 39 ++++++++++++++++++
 drivers/gpu/drm/i915/i915_gem_execbuffer.c | 16 +++++++-
 include/uapi/drm/i915_drm.h                | 12 ++++--
 6 files changed, 111 insertions(+), 25 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
index a81c766..52b8289 100644
--- a/drivers/gpu/drm/i915/i915_dma.c
+++ b/drivers/gpu/drm/i915/i915_dma.c
@@ -169,6 +169,9 @@ static int i915_getparam(struct drm_device *dev, void *data,
 	case I915_PARAM_HAS_RESOURCE_STREAMER:
 		value = HAS_RESOURCE_STREAMER(dev);
 		break;
+	case I915_PARAM_HAS_EXEC_SOFTPIN:
+		value = 1;
+		break;
 	default:
 		DRM_DEBUG("Unknown parameter %d\n", param->param);
 		return -EINVAL;
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index f1a8a53..315d79d 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2860,6 +2860,7 @@ void i915_gem_vma_destroy(struct i915_vma *vma);
 #define PIN_UPDATE	(1<<5)
 #define PIN_ZONE_4G	(1<<6)
 #define PIN_HIGH	(1<<7)
+#define PIN_OFFSET_FIXED	(1<<8)
 #define PIN_OFFSET_MASK (~4095)
 int __must_check
 i915_gem_object_pin(struct drm_i915_gem_object *obj,
@@ -3205,6 +3206,7 @@ int __must_check i915_gem_evict_something(struct drm_device *dev,
 					  unsigned long start,
 					  unsigned long end,
 					  unsigned flags);
+int __must_check i915_gem_evict_for_vma(struct i915_vma *target);
 int i915_gem_evict_vm(struct i915_address_space *vm, bool do_idle);
 
 /* belongs in i915_gem_gtt.h */
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index dfaf25b..801f3d0 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -3476,30 +3476,50 @@ i915_gem_object_bind_to_vm(struct drm_i915_gem_object *obj,
 	if (IS_ERR(vma))
 		goto err_unpin;
 
-	if (flags & PIN_HIGH) {
-		search_flag = DRM_MM_SEARCH_BELOW;
-		alloc_flag = DRM_MM_CREATE_TOP;
+	if (flags & PIN_OFFSET_FIXED) {
+		uint64_t offset = flags & PIN_OFFSET_MASK;
+
+		if (offset & (alignment - 1) || offset + size > end) {
+			ret = -EINVAL;
+			goto err_free_vma;
+		}
+		vma->node.start = offset;
+		vma->node.size = size;
+		vma->node.color = obj->cache_level;
+		ret = drm_mm_reserve_node(&vm->mm, &vma->node);
+		if (ret) {
+			ret = i915_gem_evict_for_vma(vma);
+			if (ret == 0)
+				ret = drm_mm_reserve_node(&vm->mm, &vma->node);
+		}
+		if (ret)
+			goto err_free_vma;
 	} else {
-		search_flag = DRM_MM_SEARCH_DEFAULT;
-		alloc_flag = DRM_MM_CREATE_DEFAULT;
-	}
+		if (flags & PIN_HIGH) {
+			search_flag = DRM_MM_SEARCH_BELOW;
+			alloc_flag = DRM_MM_CREATE_TOP;
+		} else {
+			search_flag = DRM_MM_SEARCH_DEFAULT;
+			alloc_flag = DRM_MM_CREATE_DEFAULT;
+		}
 
 search_free:
-	ret = drm_mm_insert_node_in_range_generic(&vm->mm, &vma->node,
-						  size, alignment,
-						  obj->cache_level,
-						  start, end,
-						  search_flag,
-						  alloc_flag);
-	if (ret) {
-		ret = i915_gem_evict_something(dev, vm, size, alignment,
-					       obj->cache_level,
-					       start, end,
-					       flags);
-		if (ret == 0)
-			goto search_free;
+		ret = drm_mm_insert_node_in_range_generic(&vm->mm, &vma->node,
+							  size, alignment,
+							  obj->cache_level,
+							  start, end,
+							  search_flag,
+							  alloc_flag);
+		if (ret) {
+			ret = i915_gem_evict_something(dev, vm, size, alignment,
+						       obj->cache_level,
+						       start, end,
+						       flags);
+			if (ret == 0)
+				goto search_free;
 
-		goto err_free_vma;
+			goto err_free_vma;
+		}
 	}
 	if (WARN_ON(!i915_gem_valid_gtt_space(vma, obj->cache_level))) {
 		ret = -EINVAL;
@@ -4090,6 +4110,10 @@ i915_vma_misplaced(struct i915_vma *vma, uint32_t alignment, uint64_t flags)
 	    vma->node.start < (flags & PIN_OFFSET_MASK))
 		return true;
 
+	if (flags & PIN_OFFSET_FIXED &&
+	    vma->node.start != (flags & PIN_OFFSET_MASK))
+		return true;
+
 	return false;
 }
 
diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c b/drivers/gpu/drm/i915/i915_gem_evict.c
index d71a133..07c6e4d 100644
--- a/drivers/gpu/drm/i915/i915_gem_evict.c
+++ b/drivers/gpu/drm/i915/i915_gem_evict.c
@@ -199,6 +199,45 @@ found:
 	return ret;
 }
 
+int
+i915_gem_evict_for_vma(struct i915_vma *target)
+{
+	struct drm_mm_node *node, *next;
+
+	list_for_each_entry_safe(node, next,
+			&target->vm->mm.head_node.node_list,
+			node_list) {
+		struct i915_vma *vma;
+		int ret;
+
+		if (node->start + node->size <= target->node.start)
+			continue;
+		if (node->start >= target->node.start + target->node.size)
+			break;
+
+		vma = container_of(node, typeof(*vma), node);
+
+		if (vma->pin_count) {
+			if (!vma->exec_entry || (vma->pin_count > 1))
+				/* Object is pinned for some other use */
+				return -EBUSY;
+
+			/* We need to evict a buffer in the same batch */
+			if (vma->exec_entry->flags & EXEC_OBJECT_PINNED)
+				/* Overlapping fixed objects in the same batch */
+				return -EINVAL;
+
+			return -ENOSPC;
+		}
+
+		ret = i915_vma_unbind(vma);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
+
 /**
  * i915_gem_evict_vm - Evict all idle vmas from a vm
  * @vm: Address space to cleanse
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index a4c243c..48ec484 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -599,6 +599,8 @@ i915_gem_execbuffer_reserve_vma(struct i915_vma *vma,
 			flags |= PIN_GLOBAL | PIN_MAPPABLE;
 		if (entry->flags & __EXEC_OBJECT_NEEDS_BIAS)
 			flags |= BATCH_OFFSET_BIAS | PIN_OFFSET_BIAS;
+		if (entry->flags & EXEC_OBJECT_PINNED)
+			flags |= entry->offset | PIN_OFFSET_FIXED;
 		if ((flags & PIN_MAPPABLE) == 0)
 			flags |= PIN_HIGH;
 	}
@@ -670,6 +672,10 @@ eb_vma_misplaced(struct i915_vma *vma)
 	    vma->node.start & (entry->alignment - 1))
 		return true;
 
+	if (entry->flags & EXEC_OBJECT_PINNED &&
+	    vma->node.start != entry->offset)
+		return true;
+
 	if (entry->flags & __EXEC_OBJECT_NEEDS_BIAS &&
 	    vma->node.start < BATCH_OFFSET_BIAS)
 		return true;
@@ -695,6 +701,7 @@ i915_gem_execbuffer_reserve(struct intel_engine_cs *ring,
 	struct i915_vma *vma;
 	struct i915_address_space *vm;
 	struct list_head ordered_vmas;
+	struct list_head pinned_vmas;
 	bool has_fenced_gpu_access = INTEL_INFO(ring->dev)->gen < 4;
 	int retry;
 
@@ -703,6 +710,7 @@ i915_gem_execbuffer_reserve(struct intel_engine_cs *ring,
 	vm = list_first_entry(vmas, struct i915_vma, exec_list)->vm;
 
 	INIT_LIST_HEAD(&ordered_vmas);
+	INIT_LIST_HEAD(&pinned_vmas);
 	while (!list_empty(vmas)) {
 		struct drm_i915_gem_exec_object2 *entry;
 		bool need_fence, need_mappable;
@@ -721,7 +729,9 @@ i915_gem_execbuffer_reserve(struct intel_engine_cs *ring,
 			obj->tiling_mode != I915_TILING_NONE;
 		need_mappable = need_fence || need_reloc_mappable(vma);
 
-		if (need_mappable) {
+		if (entry->flags & EXEC_OBJECT_PINNED)
+			list_move_tail(&vma->exec_list, &pinned_vmas);
+		else if (need_mappable) {
 			entry->flags |= __EXEC_OBJECT_NEEDS_MAP;
 			list_move(&vma->exec_list, &ordered_vmas);
 		} else
@@ -731,6 +741,7 @@ i915_gem_execbuffer_reserve(struct intel_engine_cs *ring,
 		obj->base.pending_write_domain = 0;
 	}
 	list_splice(&ordered_vmas, vmas);
+	list_splice(&pinned_vmas, vmas);
 
 	/* Attempt to pin all of the buffers into the GTT.
 	 * This is done in 3 phases:
@@ -1317,7 +1328,8 @@ eb_get_batch(struct eb_vmas *eb)
 	 * Note that actual hangs have only been observed on gen7, but for
 	 * paranoia do it everywhere.
 	 */
-	vma->exec_entry->flags |= __EXEC_OBJECT_NEEDS_BIAS;
+	if ((vma->exec_entry->flags & EXEC_OBJECT_PINNED) == 0)
+		vma->exec_entry->flags |= __EXEC_OBJECT_NEEDS_BIAS;
 
 	return vma->obj;
 }
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index 67ef73a..d727b49 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -356,6 +356,7 @@ typedef struct drm_i915_irq_wait {
 #define I915_PARAM_EU_TOTAL		 34
 #define I915_PARAM_HAS_GPU_RESET	 35
 #define I915_PARAM_HAS_RESOURCE_STREAMER 36
+#define I915_PARAM_HAS_EXEC_SOFTPIN	 37
 
 typedef struct drm_i915_getparam {
 	__s32 param;
@@ -682,8 +683,12 @@ struct drm_i915_gem_exec_object2 {
 	__u64 alignment;
 
 	/**
-	 * Returned value of the updated offset of the object, for future
-	 * presumed_offset writes.
+	 * When the EXEC_OBJECT_PINNED flag is specified this is populated by
+	 * the user with the GTT offset at which this object will be pinned.
+	 * When the I915_EXEC_NO_RELOC flag is specified this must contain the
+	 * presumed_offset of the object.
+	 * During execbuffer2 the kernel populates it with the value of the
+	 * current GTT offset of the object, for future presumed_offset writes.
 	 */
 	__u64 offset;
 
@@ -691,7 +696,8 @@ struct drm_i915_gem_exec_object2 {
 #define EXEC_OBJECT_NEEDS_GTT	(1<<1)
 #define EXEC_OBJECT_WRITE	(1<<2)
 #define EXEC_OBJECT_SUPPORTS_48B_ADDRESS (1<<3)
-#define __EXEC_OBJECT_UNKNOWN_FLAGS -(EXEC_OBJECT_SUPPORTS_48B_ADDRESS<<1)
+#define EXEC_OBJECT_PINNED	(1<<4)
+#define __EXEC_OBJECT_UNKNOWN_FLAGS -(EXEC_OBJECT_PINNED<<1)
 	__u64 flags;
 
 	__u64 rsvd1;
-- 
1.9.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 32+ messages in thread

* Re: [PATCH v7] drm/i915: Add soft-pinning API for execbuffer
  2015-12-08 11:55       ` [PATCH v7] " Thomas Daniel
@ 2015-12-08 18:49         ` Michel Thierry
  2015-12-09 10:30           ` Tvrtko Ursulin
  0 siblings, 1 reply; 32+ messages in thread
From: Michel Thierry @ 2015-12-08 18:49 UTC (permalink / raw)
  To: Thomas Daniel, intel-gfx
  Cc: Belgaumkar, Vinay, Goel, Akash, Kristian Høgsberg

On 12/8/2015 11:55 AM, Thomas Daniel wrote:
> From: Chris Wilson <chris@chris-wilson.co.uk>
>
> Userspace can pass in an offset that it presumes the object is located
> at. The kernel will then do its utmost to fit the object into that
> location. The assumption is that userspace is handling its own object
> locations (for example along with full-ppgtt) and that the kernel will
> rarely have to make space for the user's requests.
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
>
> v2: Fixed incorrect eviction found by Michal Winiarski - fix suggested by Chris
> Wilson.  Fixed incorrect error paths causing crash found by Michal Winiarski.
> (Not published externally)
>
> v3: Rebased because of trivial conflict in object_bind_to_vm.  Fixed eviction
> to allow eviction of soft-pinned objects when another soft-pinned object used
> by a subsequent execbuffer overlaps reported by Michal Winiarski.
> (Not published externally)
>
> v4: Moved soft-pinned objects to the front of ordered_vmas so that they are
> pinned first after an address conflict happens to avoid repeated conflicts in
> rare cases (Suggested by Chris Wilson).  Expanded comment on
> drm_i915_gem_exec_object2.offset to cover this new API.
>
> v5: Added I915_PARAM_HAS_EXEC_SOFTPIN parameter for detecting this capability
> (Kristian). Added check for multiple pinnings on eviction (Akash). Made sure
> buffers are not considered misplaced without the user specifying
> EXEC_OBJECT_SUPPORTS_48B_ADDRESS.  User must assume responsibility for any
> addressing workarounds.  Updated object2.offset field comment again to clarify
> NO_RELOC case (Chris).  checkpatch cleanup.
>
> v6: Trivial rebase on latest drm-intel-nightly
>
> v7: Catch attempts to pin above the max virtual address size and return
> EINVAL (Tvrtko). Decouple EXEC_OBJECT_SUPPORTS_48B_ADDRESS and
> EXEC_OBJECT_PINNED flags, user must pass both flags in any attempt to pin
> something at an offset above 4GB (Chris, Daniel Vetter).
>
> Cc: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Akash Goel <akash.goel@intel.com>
> Cc: Vinay Belgaumkar <vinay.belgaumkar@intel.com>
> Cc: Michal Winiarski <michal.winiarski@intel.com>
> Cc: Zou Nanhai <nanhai.zou@intel.com>
> Cc: Kristian Høgsberg <hoegsberg@gmail.com>
> Cc: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com>
> Signed-off-by: Thomas Daniel <thomas.daniel@intel.com>
> ---
>   drivers/gpu/drm/i915/i915_dma.c            |  3 ++
>   drivers/gpu/drm/i915/i915_drv.h            |  2 +
>   drivers/gpu/drm/i915/i915_gem.c            | 64 ++++++++++++++++++++----------
>   drivers/gpu/drm/i915/i915_gem_evict.c      | 39 ++++++++++++++++++
>   drivers/gpu/drm/i915/i915_gem_execbuffer.c | 16 +++++++-
>   include/uapi/drm/i915_drm.h                | 12 ++++--
>   6 files changed, 111 insertions(+), 25 deletions(-)
>

Extra support from the other patch aside, v6 already had rb from Akash 
and this one,
Reviewed-by: Michel Thierry <michel.thierry@intel.com>

> diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
> index a81c766..52b8289 100644
> --- a/drivers/gpu/drm/i915/i915_dma.c
> +++ b/drivers/gpu/drm/i915/i915_dma.c
> @@ -169,6 +169,9 @@ static int i915_getparam(struct drm_device *dev, void *data,
>          case I915_PARAM_HAS_RESOURCE_STREAMER:
>                  value = HAS_RESOURCE_STREAMER(dev);
>                  break;
> +       case I915_PARAM_HAS_EXEC_SOFTPIN:
> +               value = 1;
> +               break;
>          default:
>                  DRM_DEBUG("Unknown parameter %d\n", param->param);
>                  return -EINVAL;
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index f1a8a53..315d79d 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -2860,6 +2860,7 @@ void i915_gem_vma_destroy(struct i915_vma *vma);
>   #define PIN_UPDATE     (1<<5)
>   #define PIN_ZONE_4G    (1<<6)
>   #define PIN_HIGH       (1<<7)
> +#define PIN_OFFSET_FIXED       (1<<8)
>   #define PIN_OFFSET_MASK (~4095)
>   int __must_check
>   i915_gem_object_pin(struct drm_i915_gem_object *obj,
> @@ -3205,6 +3206,7 @@ int __must_check i915_gem_evict_something(struct drm_device *dev,
>                                            unsigned long start,
>                                            unsigned long end,
>                                            unsigned flags);
> +int __must_check i915_gem_evict_for_vma(struct i915_vma *target);
>   int i915_gem_evict_vm(struct i915_address_space *vm, bool do_idle);
>
>   /* belongs in i915_gem_gtt.h */
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index dfaf25b..801f3d0 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -3476,30 +3476,50 @@ i915_gem_object_bind_to_vm(struct drm_i915_gem_object *obj,
>          if (IS_ERR(vma))
>                  goto err_unpin;
>
> -       if (flags & PIN_HIGH) {
> -               search_flag = DRM_MM_SEARCH_BELOW;
> -               alloc_flag = DRM_MM_CREATE_TOP;
> +       if (flags & PIN_OFFSET_FIXED) {
> +               uint64_t offset = flags & PIN_OFFSET_MASK;
> +
> +               if (offset & (alignment - 1) || offset + size > end) {
> +                       ret = -EINVAL;
> +                       goto err_free_vma;
> +               }
> +               vma->node.start = offset;
> +               vma->node.size = size;
> +               vma->node.color = obj->cache_level;
> +               ret = drm_mm_reserve_node(&vm->mm, &vma->node);
> +               if (ret) {
> +                       ret = i915_gem_evict_for_vma(vma);
> +                       if (ret == 0)
> +                               ret = drm_mm_reserve_node(&vm->mm, &vma->node);
> +               }
> +               if (ret)
> +                       goto err_free_vma;
>          } else {
> -               search_flag = DRM_MM_SEARCH_DEFAULT;
> -               alloc_flag = DRM_MM_CREATE_DEFAULT;
> -       }
> +               if (flags & PIN_HIGH) {
> +                       search_flag = DRM_MM_SEARCH_BELOW;
> +                       alloc_flag = DRM_MM_CREATE_TOP;
> +               } else {
> +                       search_flag = DRM_MM_SEARCH_DEFAULT;
> +                       alloc_flag = DRM_MM_CREATE_DEFAULT;
> +               }
>
>   search_free:
> -       ret = drm_mm_insert_node_in_range_generic(&vm->mm, &vma->node,
> -                                                 size, alignment,
> -                                                 obj->cache_level,
> -                                                 start, end,
> -                                                 search_flag,
> -                                                 alloc_flag);
> -       if (ret) {
> -               ret = i915_gem_evict_something(dev, vm, size, alignment,
> -                                              obj->cache_level,
> -                                              start, end,
> -                                              flags);
> -               if (ret == 0)
> -                       goto search_free;
> +               ret = drm_mm_insert_node_in_range_generic(&vm->mm, &vma->node,
> +                                                         size, alignment,
> +                                                         obj->cache_level,
> +                                                         start, end,
> +                                                         search_flag,
> +                                                         alloc_flag);
> +               if (ret) {
> +                       ret = i915_gem_evict_something(dev, vm, size, alignment,
> +                                                      obj->cache_level,
> +                                                      start, end,
> +                                                      flags);
> +                       if (ret == 0)
> +                               goto search_free;
>
> -               goto err_free_vma;
> +                       goto err_free_vma;
> +               }
>          }
>          if (WARN_ON(!i915_gem_valid_gtt_space(vma, obj->cache_level))) {
>                  ret = -EINVAL;
> @@ -4090,6 +4110,10 @@ i915_vma_misplaced(struct i915_vma *vma, uint32_t alignment, uint64_t flags)
>              vma->node.start < (flags & PIN_OFFSET_MASK))
>                  return true;
>
> +       if (flags & PIN_OFFSET_FIXED &&
> +           vma->node.start != (flags & PIN_OFFSET_MASK))
> +               return true;
> +
>          return false;
>   }
>
> diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c b/drivers/gpu/drm/i915/i915_gem_evict.c
> index d71a133..07c6e4d 100644
> --- a/drivers/gpu/drm/i915/i915_gem_evict.c
> +++ b/drivers/gpu/drm/i915/i915_gem_evict.c
> @@ -199,6 +199,45 @@ found:
>          return ret;
>   }
>
> +int
> +i915_gem_evict_for_vma(struct i915_vma *target)
> +{
> +       struct drm_mm_node *node, *next;
> +
> +       list_for_each_entry_safe(node, next,
> +                       &target->vm->mm.head_node.node_list,
> +                       node_list) {
> +               struct i915_vma *vma;
> +               int ret;
> +
> +               if (node->start + node->size <= target->node.start)
> +                       continue;
> +               if (node->start >= target->node.start + target->node.size)
> +                       break;
> +
> +               vma = container_of(node, typeof(*vma), node);
> +
> +               if (vma->pin_count) {
> +                       if (!vma->exec_entry || (vma->pin_count > 1))
> +                               /* Object is pinned for some other use */
> +                               return -EBUSY;
> +
> +                       /* We need to evict a buffer in the same batch */
> +                       if (vma->exec_entry->flags & EXEC_OBJECT_PINNED)
> +                               /* Overlapping fixed objects in the same batch */
> +                               return -EINVAL;
> +
> +                       return -ENOSPC;
> +               }
> +
> +               ret = i915_vma_unbind(vma);
> +               if (ret)
> +                       return ret;
> +       }
> +
> +       return 0;
> +}
> +
>   /**
>    * i915_gem_evict_vm - Evict all idle vmas from a vm
>    * @vm: Address space to cleanse
> diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> index a4c243c..48ec484 100644
> --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> @@ -599,6 +599,8 @@ i915_gem_execbuffer_reserve_vma(struct i915_vma *vma,
>                          flags |= PIN_GLOBAL | PIN_MAPPABLE;
>                  if (entry->flags & __EXEC_OBJECT_NEEDS_BIAS)
>                          flags |= BATCH_OFFSET_BIAS | PIN_OFFSET_BIAS;
> +               if (entry->flags & EXEC_OBJECT_PINNED)
> +                       flags |= entry->offset | PIN_OFFSET_FIXED;
>                  if ((flags & PIN_MAPPABLE) == 0)
>                          flags |= PIN_HIGH;
>          }
> @@ -670,6 +672,10 @@ eb_vma_misplaced(struct i915_vma *vma)
>              vma->node.start & (entry->alignment - 1))
>                  return true;
>
> +       if (entry->flags & EXEC_OBJECT_PINNED &&
> +           vma->node.start != entry->offset)
> +               return true;
> +
>          if (entry->flags & __EXEC_OBJECT_NEEDS_BIAS &&
>              vma->node.start < BATCH_OFFSET_BIAS)
>                  return true;
> @@ -695,6 +701,7 @@ i915_gem_execbuffer_reserve(struct intel_engine_cs *ring,
>          struct i915_vma *vma;
>          struct i915_address_space *vm;
>          struct list_head ordered_vmas;
> +       struct list_head pinned_vmas;
>          bool has_fenced_gpu_access = INTEL_INFO(ring->dev)->gen < 4;
>          int retry;
>
> @@ -703,6 +710,7 @@ i915_gem_execbuffer_reserve(struct intel_engine_cs *ring,
>          vm = list_first_entry(vmas, struct i915_vma, exec_list)->vm;
>
>          INIT_LIST_HEAD(&ordered_vmas);
> +       INIT_LIST_HEAD(&pinned_vmas);
>          while (!list_empty(vmas)) {
>                  struct drm_i915_gem_exec_object2 *entry;
>                  bool need_fence, need_mappable;
> @@ -721,7 +729,9 @@ i915_gem_execbuffer_reserve(struct intel_engine_cs *ring,
>                          obj->tiling_mode != I915_TILING_NONE;
>                  need_mappable = need_fence || need_reloc_mappable(vma);
>
> -               if (need_mappable) {
> +               if (entry->flags & EXEC_OBJECT_PINNED)
> +                       list_move_tail(&vma->exec_list, &pinned_vmas);
> +               else if (need_mappable) {
>                          entry->flags |= __EXEC_OBJECT_NEEDS_MAP;
>                          list_move(&vma->exec_list, &ordered_vmas);
>                  } else
> @@ -731,6 +741,7 @@ i915_gem_execbuffer_reserve(struct intel_engine_cs *ring,
>                  obj->base.pending_write_domain = 0;
>          }
>          list_splice(&ordered_vmas, vmas);
> +       list_splice(&pinned_vmas, vmas);
>
>          /* Attempt to pin all of the buffers into the GTT.
>           * This is done in 3 phases:
> @@ -1317,7 +1328,8 @@ eb_get_batch(struct eb_vmas *eb)
>           * Note that actual hangs have only been observed on gen7, but for
>           * paranoia do it everywhere.
>           */
> -       vma->exec_entry->flags |= __EXEC_OBJECT_NEEDS_BIAS;
> +       if ((vma->exec_entry->flags & EXEC_OBJECT_PINNED) == 0)
> +               vma->exec_entry->flags |= __EXEC_OBJECT_NEEDS_BIAS;
>
>          return vma->obj;
>   }
> diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
> index 67ef73a..d727b49 100644
> --- a/include/uapi/drm/i915_drm.h
> +++ b/include/uapi/drm/i915_drm.h
> @@ -356,6 +356,7 @@ typedef struct drm_i915_irq_wait {
>   #define I915_PARAM_EU_TOTAL             34
>   #define I915_PARAM_HAS_GPU_RESET        35
>   #define I915_PARAM_HAS_RESOURCE_STREAMER 36
> +#define I915_PARAM_HAS_EXEC_SOFTPIN     37
>
>   typedef struct drm_i915_getparam {
>          __s32 param;
> @@ -682,8 +683,12 @@ struct drm_i915_gem_exec_object2 {
>          __u64 alignment;
>
>          /**
> -        * Returned value of the updated offset of the object, for future
> -        * presumed_offset writes.
> +        * When the EXEC_OBJECT_PINNED flag is specified this is populated by
> +        * the user with the GTT offset at which this object will be pinned.
> +        * When the I915_EXEC_NO_RELOC flag is specified this must contain the
> +        * presumed_offset of the object.
> +        * During execbuffer2 the kernel populates it with the value of the
> +        * current GTT offset of the object, for future presumed_offset writes.
>           */
>          __u64 offset;
>
> @@ -691,7 +696,8 @@ struct drm_i915_gem_exec_object2 {
>   #define EXEC_OBJECT_NEEDS_GTT  (1<<1)
>   #define EXEC_OBJECT_WRITE      (1<<2)
>   #define EXEC_OBJECT_SUPPORTS_48B_ADDRESS (1<<3)
> -#define __EXEC_OBJECT_UNKNOWN_FLAGS -(EXEC_OBJECT_SUPPORTS_48B_ADDRESS<<1)
> +#define EXEC_OBJECT_PINNED     (1<<4)
> +#define __EXEC_OBJECT_UNKNOWN_FLAGS -(EXEC_OBJECT_PINNED<<1)
>          __u64 flags;
>
>          __u64 rsvd1;
> --
> 1.9.1
>
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/intel-gfx
>
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCH v7] drm/i915: Add soft-pinning API for execbuffer
  2015-12-08 18:49         ` Michel Thierry
@ 2015-12-09 10:30           ` Tvrtko Ursulin
  2015-12-09 10:51             ` Chris Wilson
  0 siblings, 1 reply; 32+ messages in thread
From: Tvrtko Ursulin @ 2015-12-09 10:30 UTC (permalink / raw)
  To: Michel Thierry, Thomas Daniel, intel-gfx
  Cc: Belgaumkar, Vinay, Goel, Akash, Kristian Høgsberg


On 08/12/15 18:49, Michel Thierry wrote:
> On 12/8/2015 11:55 AM, Thomas Daniel wrote:
>> From: Chris Wilson <chris@chris-wilson.co.uk>
>>
>> Userspace can pass in an offset that it presumes the object is located
>> at. The kernel will then do its utmost to fit the object into that
>> location. The assumption is that userspace is handling its own object
>> locations (for example along with full-ppgtt) and that the kernel will
>> rarely have to make space for the user's requests.
>>
>> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
>>
>> v2: Fixed incorrect eviction found by Michal Winiarski - fix suggested
>> by Chris
>> Wilson.  Fixed incorrect error paths causing crash found by Michal
>> Winiarski.
>> (Not published externally)
>>
>> v3: Rebased because of trivial conflict in object_bind_to_vm.  Fixed
>> eviction
>> to allow eviction of soft-pinned objects when another soft-pinned
>> object used
>> by a subsequent execbuffer overlaps reported by Michal Winiarski.
>> (Not published externally)
>>
>> v4: Moved soft-pinned objects to the front of ordered_vmas so that
>> they are
>> pinned first after an address conflict happens to avoid repeated
>> conflicts in
>> rare cases (Suggested by Chris Wilson).  Expanded comment on
>> drm_i915_gem_exec_object2.offset to cover this new API.
>>
>> v5: Added I915_PARAM_HAS_EXEC_SOFTPIN parameter for detecting this
>> capability
>> (Kristian). Added check for multiple pinnings on eviction (Akash).
>> Made sure
>> buffers are not considered misplaced without the user specifying
>> EXEC_OBJECT_SUPPORTS_48B_ADDRESS.  User must assume responsibility for
>> any
>> addressing workarounds.  Updated object2.offset field comment again to
>> clarify
>> NO_RELOC case (Chris).  checkpatch cleanup.
>>
>> v6: Trivial rebase on latest drm-intel-nightly
>>
>> v7: Catch attempts to pin above the max virtual address size and return
>> EINVAL (Tvrtko). Decouple EXEC_OBJECT_SUPPORTS_48B_ADDRESS and
>> EXEC_OBJECT_PINNED flags, user must pass both flags in any attempt to pin
>> something at an offset above 4GB (Chris, Daniel Vetter).
>>
>> Cc: Chris Wilson <chris@chris-wilson.co.uk>
>> Cc: Akash Goel <akash.goel@intel.com>
>> Cc: Vinay Belgaumkar <vinay.belgaumkar@intel.com>
>> Cc: Michal Winiarski <michal.winiarski@intel.com>
>> Cc: Zou Nanhai <nanhai.zou@intel.com>
>> Cc: Kristian Høgsberg <hoegsberg@gmail.com>
>> Cc: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com>
>> Signed-off-by: Thomas Daniel <thomas.daniel@intel.com>
>> ---
>>   drivers/gpu/drm/i915/i915_dma.c            |  3 ++
>>   drivers/gpu/drm/i915/i915_drv.h            |  2 +
>>   drivers/gpu/drm/i915/i915_gem.c            | 64
>> ++++++++++++++++++++----------
>>   drivers/gpu/drm/i915/i915_gem_evict.c      | 39 ++++++++++++++++++
>>   drivers/gpu/drm/i915/i915_gem_execbuffer.c | 16 +++++++-
>>   include/uapi/drm/i915_drm.h                | 12 ++++--
>>   6 files changed, 111 insertions(+), 25 deletions(-)
>>
>
> Extra support from the other patch aside, v6 already had rb from Akash
> and this one,
> Reviewed-by: Michel Thierry <michel.thierry@intel.com>

This patch was acked by the PDT so I merged it to drm-intel-next-queued.

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCH v7] drm/i915: Add soft-pinning API for execbuffer
  2015-12-09 10:30           ` Tvrtko Ursulin
@ 2015-12-09 10:51             ` Chris Wilson
  2015-12-09 12:34               ` Tvrtko Ursulin
  0 siblings, 1 reply; 32+ messages in thread
From: Chris Wilson @ 2015-12-09 10:51 UTC (permalink / raw)
  To: Tvrtko Ursulin
  Cc: intel-gfx, Kristian Høgsberg, Goel, Akash, Belgaumkar, Vinay

On Wed, Dec 09, 2015 at 10:30:41AM +0000, Tvrtko Ursulin wrote:
> 
> On 08/12/15 18:49, Michel Thierry wrote:
> >On 12/8/2015 11:55 AM, Thomas Daniel wrote:
> >>From: Chris Wilson <chris@chris-wilson.co.uk>
> >>
> >>Userspace can pass in an offset that it presumes the object is located
> >>at. The kernel will then do its utmost to fit the object into that
> >>location. The assumption is that userspace is handling its own object
> >>locations (for example along with full-ppgtt) and that the kernel will
> >>rarely have to make space for the user's requests.
> >>
> >>Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> >>
> >>v2: Fixed incorrect eviction found by Michal Winiarski - fix suggested
> >>by Chris
> >>Wilson.  Fixed incorrect error paths causing crash found by Michal
> >>Winiarski.
> >>(Not published externally)
> >>
> >>v3: Rebased because of trivial conflict in object_bind_to_vm.  Fixed
> >>eviction
> >>to allow eviction of soft-pinned objects when another soft-pinned
> >>object used
> >>by a subsequent execbuffer overlaps reported by Michal Winiarski.
> >>(Not published externally)
> >>
> >>v4: Moved soft-pinned objects to the front of ordered_vmas so that
> >>they are
> >>pinned first after an address conflict happens to avoid repeated
> >>conflicts in
> >>rare cases (Suggested by Chris Wilson).  Expanded comment on
> >>drm_i915_gem_exec_object2.offset to cover this new API.
> >>
> >>v5: Added I915_PARAM_HAS_EXEC_SOFTPIN parameter for detecting this
> >>capability
> >>(Kristian). Added check for multiple pinnings on eviction (Akash).
> >>Made sure
> >>buffers are not considered misplaced without the user specifying
> >>EXEC_OBJECT_SUPPORTS_48B_ADDRESS.  User must assume responsibility for
> >>any
> >>addressing workarounds.  Updated object2.offset field comment again to
> >>clarify
> >>NO_RELOC case (Chris).  checkpatch cleanup.
> >>
> >>v6: Trivial rebase on latest drm-intel-nightly
> >>
> >>v7: Catch attempts to pin above the max virtual address size and return
> >>EINVAL (Tvrtko). Decouple EXEC_OBJECT_SUPPORTS_48B_ADDRESS and
> >>EXEC_OBJECT_PINNED flags, user must pass both flags in any attempt to pin
> >>something at an offset above 4GB (Chris, Daniel Vetter).
> >>
> >>Cc: Chris Wilson <chris@chris-wilson.co.uk>
> >>Cc: Akash Goel <akash.goel@intel.com>
> >>Cc: Vinay Belgaumkar <vinay.belgaumkar@intel.com>
> >>Cc: Michal Winiarski <michal.winiarski@intel.com>
> >>Cc: Zou Nanhai <nanhai.zou@intel.com>
> >>Cc: Kristian Høgsberg <hoegsberg@gmail.com>
> >>Cc: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com>
> >>Signed-off-by: Thomas Daniel <thomas.daniel@intel.com>
> >>---
> >>  drivers/gpu/drm/i915/i915_dma.c            |  3 ++
> >>  drivers/gpu/drm/i915/i915_drv.h            |  2 +
> >>  drivers/gpu/drm/i915/i915_gem.c            | 64
> >>++++++++++++++++++++----------
> >>  drivers/gpu/drm/i915/i915_gem_evict.c      | 39 ++++++++++++++++++
> >>  drivers/gpu/drm/i915/i915_gem_execbuffer.c | 16 +++++++-
> >>  include/uapi/drm/i915_drm.h                | 12 ++++--
> >>  6 files changed, 111 insertions(+), 25 deletions(-)
> >>
> >
> >Extra support from the other patch aside, v6 already had rb from Akash
> >and this one,
> >Reviewed-by: Michel Thierry <michel.thierry@intel.com>
> 
> This patch was acked by the PDT so I merged it to drm-intel-next-queued.

Please revert immediately. We need to fix the ABI for canonical
addressing before proceeding. Then please work on the better patch.
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCH v7] drm/i915: Add soft-pinning API for execbuffer
  2015-12-09 10:51             ` Chris Wilson
@ 2015-12-09 12:34               ` Tvrtko Ursulin
  2015-12-09 13:33                 ` Michel Thierry
  0 siblings, 1 reply; 32+ messages in thread
From: Tvrtko Ursulin @ 2015-12-09 12:34 UTC (permalink / raw)
  To: Chris Wilson, Michel Thierry, Thomas Daniel, intel-gfx,
	Belgaumkar, Vinay, Goel, Akash, Kristian Høgsberg


On 09/12/15 10:51, Chris Wilson wrote:
> On Wed, Dec 09, 2015 at 10:30:41AM +0000, Tvrtko Ursulin wrote:
>>
>> On 08/12/15 18:49, Michel Thierry wrote:
>>> On 12/8/2015 11:55 AM, Thomas Daniel wrote:
>>>> From: Chris Wilson <chris@chris-wilson.co.uk>
>>>>
>>>> Userspace can pass in an offset that it presumes the object is located
>>>> at. The kernel will then do its utmost to fit the object into that
>>>> location. The assumption is that userspace is handling its own object
>>>> locations (for example along with full-ppgtt) and that the kernel will
>>>> rarely have to make space for the user's requests.
>>>>
>>>> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
>>>>
>>>> v2: Fixed incorrect eviction found by Michal Winiarski - fix suggested
>>>> by Chris
>>>> Wilson.  Fixed incorrect error paths causing crash found by Michal
>>>> Winiarski.
>>>> (Not published externally)
>>>>
>>>> v3: Rebased because of trivial conflict in object_bind_to_vm.  Fixed
>>>> eviction
>>>> to allow eviction of soft-pinned objects when another soft-pinned
>>>> object used
>>>> by a subsequent execbuffer overlaps reported by Michal Winiarski.
>>>> (Not published externally)
>>>>
>>>> v4: Moved soft-pinned objects to the front of ordered_vmas so that
>>>> they are
>>>> pinned first after an address conflict happens to avoid repeated
>>>> conflicts in
>>>> rare cases (Suggested by Chris Wilson).  Expanded comment on
>>>> drm_i915_gem_exec_object2.offset to cover this new API.
>>>>
>>>> v5: Added I915_PARAM_HAS_EXEC_SOFTPIN parameter for detecting this
>>>> capability
>>>> (Kristian). Added check for multiple pinnings on eviction (Akash).
>>>> Made sure
>>>> buffers are not considered misplaced without the user specifying
>>>> EXEC_OBJECT_SUPPORTS_48B_ADDRESS.  User must assume responsibility for
>>>> any
>>>> addressing workarounds.  Updated object2.offset field comment again to
>>>> clarify
>>>> NO_RELOC case (Chris).  checkpatch cleanup.
>>>>
>>>> v6: Trivial rebase on latest drm-intel-nightly
>>>>
>>>> v7: Catch attempts to pin above the max virtual address size and return
>>>> EINVAL (Tvrtko). Decouple EXEC_OBJECT_SUPPORTS_48B_ADDRESS and
>>>> EXEC_OBJECT_PINNED flags, user must pass both flags in any attempt to pin
>>>> something at an offset above 4GB (Chris, Daniel Vetter).
>>>>
>>>> Cc: Chris Wilson <chris@chris-wilson.co.uk>
>>>> Cc: Akash Goel <akash.goel@intel.com>
>>>> Cc: Vinay Belgaumkar <vinay.belgaumkar@intel.com>
>>>> Cc: Michal Winiarski <michal.winiarski@intel.com>
>>>> Cc: Zou Nanhai <nanhai.zou@intel.com>
>>>> Cc: Kristian Høgsberg <hoegsberg@gmail.com>
>>>> Cc: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com>
>>>> Signed-off-by: Thomas Daniel <thomas.daniel@intel.com>
>>>> ---
>>>>   drivers/gpu/drm/i915/i915_dma.c            |  3 ++
>>>>   drivers/gpu/drm/i915/i915_drv.h            |  2 +
>>>>   drivers/gpu/drm/i915/i915_gem.c            | 64
>>>> ++++++++++++++++++++----------
>>>>   drivers/gpu/drm/i915/i915_gem_evict.c      | 39 ++++++++++++++++++
>>>>   drivers/gpu/drm/i915/i915_gem_execbuffer.c | 16 +++++++-
>>>>   include/uapi/drm/i915_drm.h                | 12 ++++--
>>>>   6 files changed, 111 insertions(+), 25 deletions(-)
>>>>
>>>
>>> Extra support from the other patch aside, v6 already had rb from Akash
>>> and this one,
>>> Reviewed-by: Michel Thierry <michel.thierry@intel.com>
>>
>> This patch was acked by the PDT so I merged it to drm-intel-next-queued.
>
> Please revert immediately. We need to fix the ABI for canonical
> addressing before proceeding. Then please work on the better patch.

Sounds like this is a valid comment, guys please check the thread with 
subject "[PATCH v2] drm/i915: Avoid writing relocs with addresses in 
non-canonical form".

amd64 ABI mandates rules on virtual addresess - 
https://en.wikipedia.org/wiki/X86-64#Canonical_form_addresses.

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCH v7] drm/i915: Add soft-pinning API for execbuffer
  2015-12-09 12:34               ` Tvrtko Ursulin
@ 2015-12-09 13:33                 ` Michel Thierry
  2015-12-09 13:35                   ` Tvrtko Ursulin
  0 siblings, 1 reply; 32+ messages in thread
From: Michel Thierry @ 2015-12-09 13:33 UTC (permalink / raw)
  To: Tvrtko Ursulin, Chris Wilson, Thomas Daniel, intel-gfx,
	Belgaumkar, Vinay, Goel, Akash, Kristian Høgsberg

On 12/9/2015 12:34 PM, Tvrtko Ursulin wrote:
>
> On 09/12/15 10:51, Chris Wilson wrote:
>> On Wed, Dec 09, 2015 at 10:30:41AM +0000, Tvrtko Ursulin wrote:
>>>
>>> On 08/12/15 18:49, Michel Thierry wrote:
>>>> On 12/8/2015 11:55 AM, Thomas Daniel wrote:
>>>>> From: Chris Wilson <chris@chris-wilson.co.uk>
>>>>>
>>>>> Userspace can pass in an offset that it presumes the object is located
>>>>> at. The kernel will then do its utmost to fit the object into that
>>>>> location. The assumption is that userspace is handling its own object
>>>>> locations (for example along with full-ppgtt) and that the kernel will
>>>>> rarely have to make space for the user's requests.
>>>>>
>>>>> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
>>>>>
>>>>> v2: Fixed incorrect eviction found by Michal Winiarski - fix suggested
>>>>> by Chris
>>>>> Wilson.  Fixed incorrect error paths causing crash found by Michal
>>>>> Winiarski.
>>>>> (Not published externally)
>>>>>
>>>>> v3: Rebased because of trivial conflict in object_bind_to_vm.  Fixed
>>>>> eviction
>>>>> to allow eviction of soft-pinned objects when another soft-pinned
>>>>> object used
>>>>> by a subsequent execbuffer overlaps reported by Michal Winiarski.
>>>>> (Not published externally)
>>>>>
>>>>> v4: Moved soft-pinned objects to the front of ordered_vmas so that
>>>>> they are
>>>>> pinned first after an address conflict happens to avoid repeated
>>>>> conflicts in
>>>>> rare cases (Suggested by Chris Wilson).  Expanded comment on
>>>>> drm_i915_gem_exec_object2.offset to cover this new API.
>>>>>
>>>>> v5: Added I915_PARAM_HAS_EXEC_SOFTPIN parameter for detecting this
>>>>> capability
>>>>> (Kristian). Added check for multiple pinnings on eviction (Akash).
>>>>> Made sure
>>>>> buffers are not considered misplaced without the user specifying
>>>>> EXEC_OBJECT_SUPPORTS_48B_ADDRESS.  User must assume responsibility for
>>>>> any
>>>>> addressing workarounds.  Updated object2.offset field comment again to
>>>>> clarify
>>>>> NO_RELOC case (Chris).  checkpatch cleanup.
>>>>>
>>>>> v6: Trivial rebase on latest drm-intel-nightly
>>>>>
>>>>> v7: Catch attempts to pin above the max virtual address size and
>>>>> return
>>>>> EINVAL (Tvrtko). Decouple EXEC_OBJECT_SUPPORTS_48B_ADDRESS and
>>>>> EXEC_OBJECT_PINNED flags, user must pass both flags in any attempt
>>>>> to pin
>>>>> something at an offset above 4GB (Chris, Daniel Vetter).
>>>>>
>>>>> Cc: Chris Wilson <chris@chris-wilson.co.uk>
>>>>> Cc: Akash Goel <akash.goel@intel.com>
>>>>> Cc: Vinay Belgaumkar <vinay.belgaumkar@intel.com>
>>>>> Cc: Michal Winiarski <michal.winiarski@intel.com>
>>>>> Cc: Zou Nanhai <nanhai.zou@intel.com>
>>>>> Cc: Kristian Høgsberg <hoegsberg@gmail.com>
>>>>> Cc: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com>
>>>>> Signed-off-by: Thomas Daniel <thomas.daniel@intel.com>
>>>>> ---
>>>>>   drivers/gpu/drm/i915/i915_dma.c            |  3 ++
>>>>>   drivers/gpu/drm/i915/i915_drv.h            |  2 +
>>>>>   drivers/gpu/drm/i915/i915_gem.c            | 64
>>>>> ++++++++++++++++++++----------
>>>>>   drivers/gpu/drm/i915/i915_gem_evict.c      | 39 ++++++++++++++++++
>>>>>   drivers/gpu/drm/i915/i915_gem_execbuffer.c | 16 +++++++-
>>>>>   include/uapi/drm/i915_drm.h                | 12 ++++--
>>>>>   6 files changed, 111 insertions(+), 25 deletions(-)
>>>>>
>>>>
>>>> Extra support from the other patch aside, v6 already had rb from Akash
>>>> and this one,
>>>> Reviewed-by: Michel Thierry <michel.thierry@intel.com>
>>>
>>> This patch was acked by the PDT so I merged it to drm-intel-next-queued.
>>
>> Please revert immediately. We need to fix the ABI for canonical
>> addressing before proceeding. Then please work on the better patch.
>
> Sounds like this is a valid comment, guys please check the thread with
> subject "[PATCH v2] drm/i915: Avoid writing relocs with addresses in
> non-canonical form".
>
> amd64 ABI mandates rules on virtual addresess -
> https://en.wikipedia.org/wiki/X86-64#Canonical_form_addresses.
>
> Regards,
>
> Tvrtko

And if the someone tries to use softpin with a virtual address in 
non-canonical form, reject with EINVAL?

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCH v7] drm/i915: Add soft-pinning API for execbuffer
  2015-12-09 13:33                 ` Michel Thierry
@ 2015-12-09 13:35                   ` Tvrtko Ursulin
  2015-12-09 19:09                     ` Belgaumkar, Vinay
  0 siblings, 1 reply; 32+ messages in thread
From: Tvrtko Ursulin @ 2015-12-09 13:35 UTC (permalink / raw)
  To: Michel Thierry, Chris Wilson, Thomas Daniel, intel-gfx,
	Belgaumkar, Vinay, Goel, Akash, Kristian Høgsberg


On 09/12/15 13:33, Michel Thierry wrote:
> On 12/9/2015 12:34 PM, Tvrtko Ursulin wrote:
>>
>> On 09/12/15 10:51, Chris Wilson wrote:
>>> On Wed, Dec 09, 2015 at 10:30:41AM +0000, Tvrtko Ursulin wrote:
>>>>
>>>> On 08/12/15 18:49, Michel Thierry wrote:
>>>>> On 12/8/2015 11:55 AM, Thomas Daniel wrote:
>>>>>> From: Chris Wilson <chris@chris-wilson.co.uk>
>>>>>>
>>>>>> Userspace can pass in an offset that it presumes the object is
>>>>>> located
>>>>>> at. The kernel will then do its utmost to fit the object into that
>>>>>> location. The assumption is that userspace is handling its own object
>>>>>> locations (for example along with full-ppgtt) and that the kernel
>>>>>> will
>>>>>> rarely have to make space for the user's requests.
>>>>>>
>>>>>> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
>>>>>>
>>>>>> v2: Fixed incorrect eviction found by Michal Winiarski - fix
>>>>>> suggested
>>>>>> by Chris
>>>>>> Wilson.  Fixed incorrect error paths causing crash found by Michal
>>>>>> Winiarski.
>>>>>> (Not published externally)
>>>>>>
>>>>>> v3: Rebased because of trivial conflict in object_bind_to_vm.  Fixed
>>>>>> eviction
>>>>>> to allow eviction of soft-pinned objects when another soft-pinned
>>>>>> object used
>>>>>> by a subsequent execbuffer overlaps reported by Michal Winiarski.
>>>>>> (Not published externally)
>>>>>>
>>>>>> v4: Moved soft-pinned objects to the front of ordered_vmas so that
>>>>>> they are
>>>>>> pinned first after an address conflict happens to avoid repeated
>>>>>> conflicts in
>>>>>> rare cases (Suggested by Chris Wilson).  Expanded comment on
>>>>>> drm_i915_gem_exec_object2.offset to cover this new API.
>>>>>>
>>>>>> v5: Added I915_PARAM_HAS_EXEC_SOFTPIN parameter for detecting this
>>>>>> capability
>>>>>> (Kristian). Added check for multiple pinnings on eviction (Akash).
>>>>>> Made sure
>>>>>> buffers are not considered misplaced without the user specifying
>>>>>> EXEC_OBJECT_SUPPORTS_48B_ADDRESS.  User must assume responsibility
>>>>>> for
>>>>>> any
>>>>>> addressing workarounds.  Updated object2.offset field comment
>>>>>> again to
>>>>>> clarify
>>>>>> NO_RELOC case (Chris).  checkpatch cleanup.
>>>>>>
>>>>>> v6: Trivial rebase on latest drm-intel-nightly
>>>>>>
>>>>>> v7: Catch attempts to pin above the max virtual address size and
>>>>>> return
>>>>>> EINVAL (Tvrtko). Decouple EXEC_OBJECT_SUPPORTS_48B_ADDRESS and
>>>>>> EXEC_OBJECT_PINNED flags, user must pass both flags in any attempt
>>>>>> to pin
>>>>>> something at an offset above 4GB (Chris, Daniel Vetter).
>>>>>>
>>>>>> Cc: Chris Wilson <chris@chris-wilson.co.uk>
>>>>>> Cc: Akash Goel <akash.goel@intel.com>
>>>>>> Cc: Vinay Belgaumkar <vinay.belgaumkar@intel.com>
>>>>>> Cc: Michal Winiarski <michal.winiarski@intel.com>
>>>>>> Cc: Zou Nanhai <nanhai.zou@intel.com>
>>>>>> Cc: Kristian Høgsberg <hoegsberg@gmail.com>
>>>>>> Cc: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com>
>>>>>> Signed-off-by: Thomas Daniel <thomas.daniel@intel.com>
>>>>>> ---
>>>>>>   drivers/gpu/drm/i915/i915_dma.c            |  3 ++
>>>>>>   drivers/gpu/drm/i915/i915_drv.h            |  2 +
>>>>>>   drivers/gpu/drm/i915/i915_gem.c            | 64
>>>>>> ++++++++++++++++++++----------
>>>>>>   drivers/gpu/drm/i915/i915_gem_evict.c      | 39 ++++++++++++++++++
>>>>>>   drivers/gpu/drm/i915/i915_gem_execbuffer.c | 16 +++++++-
>>>>>>   include/uapi/drm/i915_drm.h                | 12 ++++--
>>>>>>   6 files changed, 111 insertions(+), 25 deletions(-)
>>>>>>
>>>>>
>>>>> Extra support from the other patch aside, v6 already had rb from Akash
>>>>> and this one,
>>>>> Reviewed-by: Michel Thierry <michel.thierry@intel.com>
>>>>
>>>> This patch was acked by the PDT so I merged it to
>>>> drm-intel-next-queued.
>>>
>>> Please revert immediately. We need to fix the ABI for canonical
>>> addressing before proceeding. Then please work on the better patch.
>>
>> Sounds like this is a valid comment, guys please check the thread with
>> subject "[PATCH v2] drm/i915: Avoid writing relocs with addresses in
>> non-canonical form".
>>
>> amd64 ABI mandates rules on virtual addresess -
>> https://en.wikipedia.org/wiki/X86-64#Canonical_form_addresses.
>>
>> Regards,
>>
>> Tvrtko
>
> And if the someone tries to use softpin with a virtual address in
> non-canonical form, reject with EINVAL?

I think so, since they are not valid userspace virtual addresses.

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCH v7] drm/i915: Add soft-pinning API for execbuffer
  2015-12-09 13:35                   ` Tvrtko Ursulin
@ 2015-12-09 19:09                     ` Belgaumkar, Vinay
  0 siblings, 0 replies; 32+ messages in thread
From: Belgaumkar, Vinay @ 2015-12-09 19:09 UTC (permalink / raw)
  To: Tvrtko Ursulin, Thierry, Michel, Chris Wilson, Daniel, Thomas,
	intel-gfx, Goel, Akash, Kristian Høgsberg

The stress test will need to be modified to ensure a canonical address(currently uses starting address of 0x8000000000000). The invalid_vma test takes care of the non-canonical scenario in an indirect way. Do we still need a separate test for this change then?

Thanks,
Vinay. 

-----Original Message-----
From: Tvrtko Ursulin [mailto:tvrtko.ursulin@linux.intel.com] 
Sent: Wednesday, December 9, 2015 5:36 AM
To: Thierry, Michel; Chris Wilson; Daniel, Thomas; intel-gfx@lists.freedesktop.org; Belgaumkar, Vinay; Goel, Akash; Kristian Høgsberg
Subject: Re: [Intel-gfx] [PATCH v7] drm/i915: Add soft-pinning API for execbuffer


On 09/12/15 13:33, Michel Thierry wrote:
> On 12/9/2015 12:34 PM, Tvrtko Ursulin wrote:
>>
>> On 09/12/15 10:51, Chris Wilson wrote:
>>> On Wed, Dec 09, 2015 at 10:30:41AM +0000, Tvrtko Ursulin wrote:
>>>>
>>>> On 08/12/15 18:49, Michel Thierry wrote:
>>>>> On 12/8/2015 11:55 AM, Thomas Daniel wrote:
>>>>>> From: Chris Wilson <chris@chris-wilson.co.uk>
>>>>>>
>>>>>> Userspace can pass in an offset that it presumes the object is 
>>>>>> located at. The kernel will then do its utmost to fit the object 
>>>>>> into that location. The assumption is that userspace is handling 
>>>>>> its own object locations (for example along with full-ppgtt) and 
>>>>>> that the kernel will rarely have to make space for the user's 
>>>>>> requests.
>>>>>>
>>>>>> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
>>>>>>
>>>>>> v2: Fixed incorrect eviction found by Michal Winiarski - fix 
>>>>>> suggested by Chris Wilson.  Fixed incorrect error paths causing 
>>>>>> crash found by Michal Winiarski.
>>>>>> (Not published externally)
>>>>>>
>>>>>> v3: Rebased because of trivial conflict in object_bind_to_vm.  
>>>>>> Fixed eviction to allow eviction of soft-pinned objects when 
>>>>>> another soft-pinned object used by a subsequent execbuffer 
>>>>>> overlaps reported by Michal Winiarski.
>>>>>> (Not published externally)
>>>>>>
>>>>>> v4: Moved soft-pinned objects to the front of ordered_vmas so 
>>>>>> that they are pinned first after an address conflict happens to 
>>>>>> avoid repeated conflicts in rare cases (Suggested by Chris 
>>>>>> Wilson).  Expanded comment on drm_i915_gem_exec_object2.offset to 
>>>>>> cover this new API.
>>>>>>
>>>>>> v5: Added I915_PARAM_HAS_EXEC_SOFTPIN parameter for detecting 
>>>>>> this capability (Kristian). Added check for multiple pinnings on 
>>>>>> eviction (Akash).
>>>>>> Made sure
>>>>>> buffers are not considered misplaced without the user specifying 
>>>>>> EXEC_OBJECT_SUPPORTS_48B_ADDRESS.  User must assume 
>>>>>> responsibility for any addressing workarounds.  Updated 
>>>>>> object2.offset field comment again to clarify NO_RELOC case 
>>>>>> (Chris).  checkpatch cleanup.
>>>>>>
>>>>>> v6: Trivial rebase on latest drm-intel-nightly
>>>>>>
>>>>>> v7: Catch attempts to pin above the max virtual address size and 
>>>>>> return EINVAL (Tvrtko). Decouple EXEC_OBJECT_SUPPORTS_48B_ADDRESS 
>>>>>> and EXEC_OBJECT_PINNED flags, user must pass both flags in any 
>>>>>> attempt to pin something at an offset above 4GB (Chris, Daniel 
>>>>>> Vetter).
>>>>>>
>>>>>> Cc: Chris Wilson <chris@chris-wilson.co.uk>
>>>>>> Cc: Akash Goel <akash.goel@intel.com>
>>>>>> Cc: Vinay Belgaumkar <vinay.belgaumkar@intel.com>
>>>>>> Cc: Michal Winiarski <michal.winiarski@intel.com>
>>>>>> Cc: Zou Nanhai <nanhai.zou@intel.com>
>>>>>> Cc: Kristian Høgsberg <hoegsberg@gmail.com>
>>>>>> Cc: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com>
>>>>>> Signed-off-by: Thomas Daniel <thomas.daniel@intel.com>
>>>>>> ---
>>>>>>   drivers/gpu/drm/i915/i915_dma.c            |  3 ++
>>>>>>   drivers/gpu/drm/i915/i915_drv.h            |  2 +
>>>>>>   drivers/gpu/drm/i915/i915_gem.c            | 64
>>>>>> ++++++++++++++++++++----------
>>>>>>   drivers/gpu/drm/i915/i915_gem_evict.c      | 39 ++++++++++++++++++
>>>>>>   drivers/gpu/drm/i915/i915_gem_execbuffer.c | 16 +++++++-
>>>>>>   include/uapi/drm/i915_drm.h                | 12 ++++--
>>>>>>   6 files changed, 111 insertions(+), 25 deletions(-)
>>>>>>
>>>>>
>>>>> Extra support from the other patch aside, v6 already had rb from 
>>>>> Akash and this one,
>>>>> Reviewed-by: Michel Thierry <michel.thierry@intel.com>
>>>>
>>>> This patch was acked by the PDT so I merged it to 
>>>> drm-intel-next-queued.
>>>
>>> Please revert immediately. We need to fix the ABI for canonical 
>>> addressing before proceeding. Then please work on the better patch.
>>
>> Sounds like this is a valid comment, guys please check the thread 
>> with subject "[PATCH v2] drm/i915: Avoid writing relocs with 
>> addresses in non-canonical form".
>>
>> amd64 ABI mandates rules on virtual addresess - 
>> https://en.wikipedia.org/wiki/X86-64#Canonical_form_addresses.
>>
>> Regards,
>>
>> Tvrtko
>
> And if the someone tries to use softpin with a virtual address in 
> non-canonical form, reject with EINVAL?

I think so, since they are not valid userspace virtual addresses.

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 32+ messages in thread

end of thread, other threads:[~2015-12-09 19:09 UTC | newest]

Thread overview: 32+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2015-03-06  9:44 [PATCH] drm/i915: Add soft-pinning API for execbuffer Chris Wilson
2015-03-11  6:41 ` [Intel-gfx] " Zhenyu Wang
2015-04-29 13:28 ` Daniel, Thomas
2015-04-29 14:01   ` Chris Wilson
2015-06-30 14:13 ` [PATCH v4] " Thomas Daniel
2015-06-30 14:20   ` Daniel, Thomas
2015-07-15 14:55     ` Goel, Akash
2015-07-15 15:06       ` Chris Wilson
2015-07-15 15:41         ` Daniel, Thomas
2015-07-15 15:46           ` Chris Wilson
2015-07-15 15:58             ` Daniel, Thomas
2015-07-15 16:04               ` Chris Wilson
2015-07-04  5:29   ` Kristian Høgsberg
2015-07-04 12:23     ` Chris Wilson
2015-07-08 15:04       ` Daniel, Thomas
2015-07-08 15:35         ` Chris Wilson
2015-07-04  7:53   ` Chris Wilson
2015-07-20 16:41   ` [PATCH v5] " Thomas Daniel
2015-07-20 16:50     ` Chris Wilson
2015-10-16 10:59     ` [PATCH v6] " Thomas Daniel
2015-10-16 14:09       ` Goel, Akash
2015-10-16 14:15         ` Chris Wilson
2015-12-02 11:28       ` Tvrtko Ursulin
2015-12-02 11:35         ` Chris Wilson
2015-12-08 11:55       ` [PATCH v7] " Thomas Daniel
2015-12-08 18:49         ` Michel Thierry
2015-12-09 10:30           ` Tvrtko Ursulin
2015-12-09 10:51             ` Chris Wilson
2015-12-09 12:34               ` Tvrtko Ursulin
2015-12-09 13:33                 ` Michel Thierry
2015-12-09 13:35                   ` Tvrtko Ursulin
2015-12-09 19:09                     ` Belgaumkar, Vinay

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.