All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 1/3] drm/i915: Only perform set-to-gtt domain for objects bound to the global gtt
@ 2014-08-09 16:37 Chris Wilson
  2014-08-09 16:37 ` [PATCH 2/3] drm/i915: Fix obj->map_and_fenceable for ppgtt Chris Wilson
                   ` (2 more replies)
  0 siblings, 3 replies; 11+ messages in thread
From: Chris Wilson @ 2014-08-09 16:37 UTC (permalink / raw)
  To: intel-gfx

If an object is not bound into the global GTT, then it cannot be
accessed via the GTT. This restores the original code that was muddled
by ppGTT. In the process, we remove a WARN that had long outlived its
usefulness and was simply being coded around instead.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_gem.c | 22 +++++-----------------
 1 file changed, 5 insertions(+), 17 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 80b807b724a6..99250d27668d 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -3594,11 +3594,12 @@ int
 i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write)
 {
 	struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
+	struct i915_vma *vma = i915_gem_obj_to_ggtt(obj);
 	uint32_t old_write_domain, old_read_domains;
 	int ret;
 
 	/* Not valid to be called on unbound objects. */
-	if (!i915_gem_obj_bound_any(obj))
+	if (vma == NULL)
 		return -EINVAL;
 
 	if (obj->base.write_domain == I915_GEM_DOMAIN_GTT)
@@ -3640,13 +3641,9 @@ i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write)
 					    old_write_domain);
 
 	/* And bump the LRU for this access */
-	if (i915_gem_object_is_inactive(obj)) {
-		struct i915_vma *vma = i915_gem_obj_to_ggtt(obj);
-		if (vma)
-			list_move_tail(&vma->mm_list,
-				       &dev_priv->gtt.base.inactive_list);
-
-	}
+	if (i915_gem_object_is_inactive(obj))
+		list_move_tail(&vma->mm_list,
+			       &dev_priv->gtt.base.inactive_list);
 
 	return 0;
 }
@@ -3810,9 +3807,6 @@ static bool is_pin_display(struct drm_i915_gem_object *obj)
 {
 	struct i915_vma *vma;
 
-	if (list_empty(&obj->vma_list))
-		return false;
-
 	vma = i915_gem_obj_to_ggtt(obj);
 	if (!vma)
 		return false;
@@ -5255,12 +5249,6 @@ struct i915_vma *i915_gem_obj_to_ggtt(struct drm_i915_gem_object *obj)
 {
 	struct i915_vma *vma;
 
-	/* This WARN has probably outlived its usefulness (callers already
-	 * WARN if they don't find the GGTT vma they expect). When removing,
-	 * remember to remove the pre-check in is_pin_display() as well */
-	if (WARN_ON(list_empty(&obj->vma_list)))
-		return NULL;
-
 	vma = list_first_entry(&obj->vma_list, typeof(*vma), vma_link);
 	if (vma->vm != obj_to_ggtt(obj))
 		return NULL;
-- 
2.1.0.rc1

^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [PATCH 2/3] drm/i915: Fix obj->map_and_fenceable for ppgtt
  2014-08-09 16:37 [PATCH 1/3] drm/i915: Only perform set-to-gtt domain for objects bound to the global gtt Chris Wilson
@ 2014-08-09 16:37 ` Chris Wilson
  2014-08-09 19:29   ` Ben Widawsky
  2014-08-09 16:37 ` [PATCH 3/3] drm/i915: Remove fenced_gpu_access and pending_fenced_gpu_access Chris Wilson
  2014-08-11  9:36 ` [PATCH 1/3] drm/i915: Only perform set-to-gtt domain for objects bound to the global gtt Daniel Vetter
  2 siblings, 1 reply; 11+ messages in thread
From: Chris Wilson @ 2014-08-09 16:37 UTC (permalink / raw)
  To: intel-gfx; +Cc: Daniel Vetter, Ben Widawsky

With ppgtt, it is no longer correct to mark an object as
map_and_fenceable if we simply unbind it from the global gtt. This has
consequences during execbuffer where we simply use
obj->map_and_fenceable in use_cpu_reloc() to decide which access method
to use for writing into the object. Now for a ppgtt object,
map_and_fenceable will be true when it is not bound into the ggtt but
only in a ppgtt, leading to an invalid access on a non-llc architecture.

v2: Revamp and resend to ease future patches.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Ben Widawsky <benjamin.widawsky@intel.com>
Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
---
 drivers/gpu/drm/i915/i915_gem.c            |  8 +--
 drivers/gpu/drm/i915/i915_gem_execbuffer.c | 82 ++++++++++++++----------------
 2 files changed, 43 insertions(+), 47 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 99250d27668d..6366230c4e32 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2930,9 +2930,8 @@ int i915_vma_unbind(struct i915_vma *vma)
 	vma->unbind_vma(vma);
 
 	list_del_init(&vma->mm_list);
-	/* Avoid an unnecessary call to unbind on rebind. */
 	if (i915_is_ggtt(vma->vm))
-		obj->map_and_fenceable = true;
+		obj->map_and_fenceable = false;
 
 	drm_mm_remove_node(&vma->node);
 	i915_gem_vma_destroy(vma);
@@ -3284,6 +3283,9 @@ i915_gem_object_get_fence(struct drm_i915_gem_object *obj)
 			return 0;
 		}
 	} else if (enable) {
+		if (WARN_ON(!obj->map_and_fenceable))
+			return -EINVAL;
+
 		reg = i915_find_fence_reg(dev);
 		if (IS_ERR(reg))
 			return PTR_ERR(reg);
@@ -4333,8 +4335,6 @@ void i915_gem_object_init(struct drm_i915_gem_object *obj,
 
 	obj->fence_reg = I915_FENCE_REG_NONE;
 	obj->madv = I915_MADV_WILLNEED;
-	/* Avoid an unnecessary call to unbind on the first bind. */
-	obj->map_and_fenceable = true;
 
 	i915_gem_info_add_obj(obj->base.dev->dev_private, obj->base.size);
 }
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 60998fc4e5b2..87e9599f9e4b 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -35,6 +35,7 @@
 
 #define  __EXEC_OBJECT_HAS_PIN (1<<31)
 #define  __EXEC_OBJECT_HAS_FENCE (1<<30)
+#define  __EXEC_OBJECT_NEEDS_MAP (1<<29)
 #define  __EXEC_OBJECT_NEEDS_BIAS (1<<28)
 
 #define BATCH_OFFSET_BIAS (256*1024)
@@ -433,7 +434,7 @@ i915_gem_execbuffer_relocate_entry(struct drm_i915_gem_object *obj,
 	if (obj->active && in_atomic())
 		return -EFAULT;
 
-	if (use_cpu_reloc(obj))
+	if (use_cpu_reloc(obj) || !i915_gem_obj_ggtt_bound(obj))
 		ret = relocate_entry_cpu(obj, reloc, target_offset);
 	else
 		ret = relocate_entry_gtt(obj, reloc, target_offset);
@@ -535,14 +536,6 @@ i915_gem_execbuffer_relocate(struct eb_vmas *eb)
 }
 
 static int
-need_reloc_mappable(struct i915_vma *vma)
-{
-	struct drm_i915_gem_exec_object2 *entry = vma->exec_entry;
-	return entry->relocation_count && !use_cpu_reloc(vma->obj) &&
-		i915_is_ggtt(vma->vm);
-}
-
-static int
 i915_gem_execbuffer_reserve_vma(struct i915_vma *vma,
 				struct intel_engine_cs *ring,
 				bool *need_reloc)
@@ -550,19 +543,12 @@ i915_gem_execbuffer_reserve_vma(struct i915_vma *vma,
 	struct drm_i915_gem_object *obj = vma->obj;
 	struct drm_i915_gem_exec_object2 *entry = vma->exec_entry;
 	bool has_fenced_gpu_access = INTEL_INFO(ring->dev)->gen < 4;
-	bool need_fence;
 	uint64_t flags;
 	int ret;
 
 	flags = 0;
-
-	need_fence =
-		has_fenced_gpu_access &&
-		entry->flags & EXEC_OBJECT_NEEDS_FENCE &&
-		obj->tiling_mode != I915_TILING_NONE;
-	if (need_fence || need_reloc_mappable(vma))
+	if (entry->flags & __EXEC_OBJECT_NEEDS_MAP)
 		flags |= PIN_MAPPABLE;
-
 	if (entry->flags & EXEC_OBJECT_NEEDS_GTT)
 		flags |= PIN_GLOBAL;
 	if (entry->flags & __EXEC_OBJECT_NEEDS_BIAS)
@@ -601,26 +587,40 @@ i915_gem_execbuffer_reserve_vma(struct i915_vma *vma,
 }
 
 static bool
-eb_vma_misplaced(struct i915_vma *vma, bool has_fenced_gpu_access)
+need_reloc_mappable(struct i915_vma *vma)
 {
 	struct drm_i915_gem_exec_object2 *entry = vma->exec_entry;
-	struct drm_i915_gem_object *obj = vma->obj;
-	bool need_fence, need_mappable;
 
-	need_fence =
-		has_fenced_gpu_access &&
-		entry->flags & EXEC_OBJECT_NEEDS_FENCE &&
-		obj->tiling_mode != I915_TILING_NONE;
-	need_mappable = need_fence || need_reloc_mappable(vma);
+	if (entry->relocation_count == 0)
+		return false;
 
-	WARN_ON((need_mappable || need_fence) &&
+	if (!i915_is_ggtt(vma->vm))
+		return false;
+
+	/* See also use_cpu_reloc() */
+	if (HAS_LLC(vma->obj->base.dev))
+		return false;
+
+	if (vma->obj->base.write_domain == I915_GEM_DOMAIN_CPU)
+		return false;
+
+	return true;
+}
+
+static bool
+eb_vma_misplaced(struct i915_vma *vma)
+{
+	struct drm_i915_gem_exec_object2 *entry = vma->exec_entry;
+	struct drm_i915_gem_object *obj = vma->obj;
+
+	WARN_ON(entry->flags & __EXEC_OBJECT_NEEDS_MAP &&
 	       !i915_is_ggtt(vma->vm));
 
 	if (entry->alignment &&
 	    vma->node.start & (entry->alignment - 1))
 		return true;
 
-	if (need_mappable && !obj->map_and_fenceable)
+	if (entry->flags & __EXEC_OBJECT_NEEDS_MAP && !obj->map_and_fenceable)
 		return true;
 
 	if (entry->flags & __EXEC_OBJECT_NEEDS_BIAS &&
@@ -664,9 +664,10 @@ i915_gem_execbuffer_reserve(struct intel_engine_cs *ring,
 			obj->tiling_mode != I915_TILING_NONE;
 		need_mappable = need_fence || need_reloc_mappable(vma);
 
-		if (need_mappable)
+		if (need_mappable) {
+			entry->flags |= __EXEC_OBJECT_NEEDS_MAP;
 			list_move(&vma->exec_list, &ordered_vmas);
-		else
+		} else
 			list_move_tail(&vma->exec_list, &ordered_vmas);
 
 		obj->base.pending_read_domains = I915_GEM_GPU_DOMAINS & ~I915_GEM_DOMAIN_COMMAND;
@@ -696,7 +697,7 @@ i915_gem_execbuffer_reserve(struct intel_engine_cs *ring,
 			if (!drm_mm_node_allocated(&vma->node))
 				continue;
 
-			if (eb_vma_misplaced(vma, has_fenced_gpu_access))
+			if (eb_vma_misplaced(vma))
 				ret = i915_vma_unbind(vma);
 			else
 				ret = i915_gem_execbuffer_reserve_vma(vma, ring, need_relocs);
@@ -1386,25 +1387,20 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
 	/* snb/ivb/vlv conflate the "batch in ppgtt" bit with the "non-secure
 	 * batch" bit. Hence we need to pin secure batches into the global gtt.
 	 * hsw should have this fixed, but bdw mucks it up again. */
-	if (flags & I915_DISPATCH_SECURE &&
-	    !batch_obj->has_global_gtt_mapping) {
-		/* When we have multiple VMs, we'll need to make sure that we
-		 * allocate space first */
-		struct i915_vma *vma = i915_gem_obj_to_ggtt(batch_obj);
-		BUG_ON(!vma);
-		vma->bind_vma(vma, batch_obj->cache_level, GLOBAL_BIND);
-	}
+	if (flags & I915_DISPATCH_SECURE) {
+		ret = i915_gem_obj_ggtt_pin(batch_obj, 0, 0);
+		if (ret)
+			goto err;
 
-	if (flags & I915_DISPATCH_SECURE)
 		exec_start += i915_gem_obj_ggtt_offset(batch_obj);
-	else
+	} else
 		exec_start += i915_gem_obj_offset(batch_obj, vm);
 
 	ret = legacy_ringbuffer_submission(dev, file, ring, ctx,
-			args, &eb->vmas, batch_obj, exec_start, flags);
-	if (ret)
-		goto err;
+					   args, &eb->vmas, batch_obj, exec_start, flags);
 
+	if (flags & I915_DISPATCH_SECURE)
+		i915_gem_object_ggtt_unpin(batch_obj);
 err:
 	/* the request owns the ref now */
 	i915_gem_context_unreference(ctx);
-- 
2.1.0.rc1

^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [PATCH 3/3] drm/i915: Remove fenced_gpu_access and pending_fenced_gpu_access
  2014-08-09 16:37 [PATCH 1/3] drm/i915: Only perform set-to-gtt domain for objects bound to the global gtt Chris Wilson
  2014-08-09 16:37 ` [PATCH 2/3] drm/i915: Fix obj->map_and_fenceable for ppgtt Chris Wilson
@ 2014-08-09 16:37 ` Chris Wilson
  2014-08-11 10:20   ` Daniel Vetter
  2014-08-11  9:36 ` [PATCH 1/3] drm/i915: Only perform set-to-gtt domain for objects bound to the global gtt Daniel Vetter
  2 siblings, 1 reply; 11+ messages in thread
From: Chris Wilson @ 2014-08-09 16:37 UTC (permalink / raw)
  To: intel-gfx

This migrates the fence tracking onto the existing seqno
infrastructure so that the later conversion to tracking via requests is
simplified.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_drv.h            |  7 ------
 drivers/gpu/drm/i915/i915_gem.c            | 17 ---------------
 drivers/gpu/drm/i915/i915_gem_execbuffer.c | 34 +++++++++++++++++-------------
 drivers/gpu/drm/i915/i915_gem_tiling.c     |  2 +-
 4 files changed, 20 insertions(+), 40 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index fab97bc3215f..5c3f033ff928 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1777,13 +1777,6 @@ struct drm_i915_gem_object {
 	 * Only honoured if hardware has relevant pte bit
 	 */
 	unsigned long gt_ro:1;
-
-	/*
-	 * Is the GPU currently using a fence to access this buffer,
-	 */
-	unsigned int pending_fenced_gpu_access:1;
-	unsigned int fenced_gpu_access:1;
-
 	unsigned int cache_level:3;
 
 	unsigned int has_aliasing_ppgtt_mapping:1;
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 6366230c4e32..e808b449ac75 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2163,8 +2163,6 @@ static void
 i915_gem_object_move_to_active(struct drm_i915_gem_object *obj,
 			       struct intel_engine_cs *ring)
 {
-	struct drm_device *dev = obj->base.dev;
-	struct drm_i915_private *dev_priv = dev->dev_private;
 	u32 seqno = intel_ring_get_seqno(ring);
 
 	BUG_ON(ring == NULL);
@@ -2183,19 +2181,6 @@ i915_gem_object_move_to_active(struct drm_i915_gem_object *obj,
 	list_move_tail(&obj->ring_list, &ring->active_list);
 
 	obj->last_read_seqno = seqno;
-
-	if (obj->fenced_gpu_access) {
-		obj->last_fenced_seqno = seqno;
-
-		/* Bump MRU to take account of the delayed flush */
-		if (obj->fence_reg != I915_FENCE_REG_NONE) {
-			struct drm_i915_fence_reg *reg;
-
-			reg = &dev_priv->fence_regs[obj->fence_reg];
-			list_move_tail(&reg->lru_list,
-				       &dev_priv->mm.fence_list);
-		}
-	}
 }
 
 void i915_vma_move_to_active(struct i915_vma *vma,
@@ -2231,7 +2216,6 @@ i915_gem_object_move_to_inactive(struct drm_i915_gem_object *obj)
 	obj->base.write_domain = 0;
 
 	obj->last_fenced_seqno = 0;
-	obj->fenced_gpu_access = false;
 
 	obj->active = 0;
 	drm_gem_object_unreference(&obj->base);
@@ -3176,7 +3160,6 @@ i915_gem_object_wait_fence(struct drm_i915_gem_object *obj)
 		obj->last_fenced_seqno = 0;
 	}
 
-	obj->fenced_gpu_access = false;
 	return 0;
 }
 
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 87e9599f9e4b..fbac7b51736c 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -542,7 +542,6 @@ i915_gem_execbuffer_reserve_vma(struct i915_vma *vma,
 {
 	struct drm_i915_gem_object *obj = vma->obj;
 	struct drm_i915_gem_exec_object2 *entry = vma->exec_entry;
-	bool has_fenced_gpu_access = INTEL_INFO(ring->dev)->gen < 4;
 	uint64_t flags;
 	int ret;
 
@@ -560,17 +559,13 @@ i915_gem_execbuffer_reserve_vma(struct i915_vma *vma,
 
 	entry->flags |= __EXEC_OBJECT_HAS_PIN;
 
-	if (has_fenced_gpu_access) {
-		if (entry->flags & EXEC_OBJECT_NEEDS_FENCE) {
-			ret = i915_gem_object_get_fence(obj);
-			if (ret)
-				return ret;
-
-			if (i915_gem_object_pin_fence(obj))
-				entry->flags |= __EXEC_OBJECT_HAS_FENCE;
+	if (entry->flags & EXEC_OBJECT_NEEDS_FENCE) {
+		ret = i915_gem_object_get_fence(obj);
+		if (ret)
+			return ret;
 
-			obj->pending_fenced_gpu_access = true;
-		}
+		if (i915_gem_object_pin_fence(obj))
+			entry->flags |= __EXEC_OBJECT_HAS_FENCE;
 	}
 
 	if (entry->offset != vma->node.start) {
@@ -658,8 +653,9 @@ i915_gem_execbuffer_reserve(struct intel_engine_cs *ring,
 		obj = vma->obj;
 		entry = vma->exec_entry;
 
+		if (!has_fenced_gpu_access)
+			entry->flags &= ~EXEC_OBJECT_NEEDS_FENCE;
 		need_fence =
-			has_fenced_gpu_access &&
 			entry->flags & EXEC_OBJECT_NEEDS_FENCE &&
 			obj->tiling_mode != I915_TILING_NONE;
 		need_mappable = need_fence || need_reloc_mappable(vma);
@@ -672,7 +668,6 @@ i915_gem_execbuffer_reserve(struct intel_engine_cs *ring,
 
 		obj->base.pending_read_domains = I915_GEM_GPU_DOMAINS & ~I915_GEM_DOMAIN_COMMAND;
 		obj->base.pending_write_domain = 0;
-		obj->pending_fenced_gpu_access = false;
 	}
 	list_splice(&ordered_vmas, vmas);
 
@@ -959,9 +954,11 @@ static void
 i915_gem_execbuffer_move_to_active(struct list_head *vmas,
 				   struct intel_engine_cs *ring)
 {
+	u32 seqno = intel_ring_get_seqno(ring);
 	struct i915_vma *vma;
 
 	list_for_each_entry(vma, vmas, exec_list) {
+		struct drm_i915_gem_exec_object2 *entry = vma->exec_entry;
 		struct drm_i915_gem_object *obj = vma->obj;
 		u32 old_read = obj->base.read_domains;
 		u32 old_write = obj->base.write_domain;
@@ -970,18 +967,25 @@ i915_gem_execbuffer_move_to_active(struct list_head *vmas,
 		if (obj->base.write_domain == 0)
 			obj->base.pending_read_domains |= obj->base.read_domains;
 		obj->base.read_domains = obj->base.pending_read_domains;
-		obj->fenced_gpu_access = obj->pending_fenced_gpu_access;
 
 		i915_vma_move_to_active(vma, ring);
 		if (obj->base.write_domain) {
 			obj->dirty = 1;
-			obj->last_write_seqno = intel_ring_get_seqno(ring);
+			obj->last_write_seqno = seqno;
 
 			intel_fb_obj_invalidate(obj, ring);
 
 			/* update for the implicit flush after a batch */
 			obj->base.write_domain &= ~I915_GEM_GPU_DOMAINS;
 		}
+		if (entry->flags & EXEC_OBJECT_NEEDS_FENCE) {
+			obj->last_fenced_seqno = seqno;
+			if (entry->flags & __EXEC_OBJECT_HAS_FENCE) {
+				struct drm_i915_private *dev_priv = to_i915(ring->dev);
+				list_move_tail(&dev_priv->fence_regs[obj->fence_reg].lru_list,
+					       &dev_priv->mm.fence_list);
+			}
+		}
 
 		trace_i915_gem_object_change_domain(obj, old_read, old_write);
 	}
diff --git a/drivers/gpu/drm/i915/i915_gem_tiling.c b/drivers/gpu/drm/i915/i915_gem_tiling.c
index cb150e8b4336..7e623bf097a1 100644
--- a/drivers/gpu/drm/i915/i915_gem_tiling.c
+++ b/drivers/gpu/drm/i915/i915_gem_tiling.c
@@ -376,7 +376,7 @@ i915_gem_set_tiling(struct drm_device *dev, void *data,
 
 		if (ret == 0) {
 			obj->fence_dirty =
-				obj->fenced_gpu_access ||
+				obj->last_fenced_seqno ||
 				obj->fence_reg != I915_FENCE_REG_NONE;
 
 			obj->tiling_mode = args->tiling_mode;
-- 
2.1.0.rc1

^ permalink raw reply related	[flat|nested] 11+ messages in thread

* Re: [PATCH 2/3] drm/i915: Fix obj->map_and_fenceable for ppgtt
  2014-08-09 16:37 ` [PATCH 2/3] drm/i915: Fix obj->map_and_fenceable for ppgtt Chris Wilson
@ 2014-08-09 19:29   ` Ben Widawsky
  2014-08-10  6:55     ` Chris Wilson
  2014-08-10  7:06     ` [PATCH] drm/i915: Force CPU relocations if not GTT mapped Chris Wilson
  0 siblings, 2 replies; 11+ messages in thread
From: Ben Widawsky @ 2014-08-09 19:29 UTC (permalink / raw)
  To: Chris Wilson; +Cc: Daniel Vetter, intel-gfx

On Sat, Aug 09, 2014 at 05:37:23PM +0100, Chris Wilson wrote:
> With ppgtt, it is no longer correct to mark an object as
> map_and_fenceable if we simply unbind it from the global gtt. This has
> consequences during execbuffer where we simply use
> obj->map_and_fenceable in use_cpu_reloc() to decide which access method
> to use for writing into the object. Now for a ppgtt object,
> map_and_fenceable will be true when it is not bound into the ggtt but
> only in a ppgtt, leading to an invalid access on a non-llc architecture.

The last sentence confused me for some reason. I think the first two
sentences were perfectly succinct in describing the problem.

> 
> v2: Revamp and resend to ease future patches.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Ben Widawsky <benjamin.widawsky@intel.com>
> Cc: Daniel Vetter <daniel.vetter@ffwll.ch>

I didn't read the patch closely, but the problem is clear. It seems like
the one hunk:
> -     if (use_cpu_reloc(obj))
> +     if (use_cpu_reloc(obj) || !i915_gem_obj_ggtt_bound(obj))

would be sufficient to fix the problem.

I had a local solution at one point (I'm too lazy to check what I've
sent out these days) where I turned map_and_fenceable into a getter(). I
thought that looked pretty nice as well.

lgtm

[snip]

-- 
Ben Widawsky, Intel Open Source Technology Center

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH 2/3] drm/i915: Fix obj->map_and_fenceable for ppgtt
  2014-08-09 19:29   ` Ben Widawsky
@ 2014-08-10  6:55     ` Chris Wilson
  2014-08-10  7:04       ` Chris Wilson
  2014-08-10  7:06     ` [PATCH] drm/i915: Force CPU relocations if not GTT mapped Chris Wilson
  1 sibling, 1 reply; 11+ messages in thread
From: Chris Wilson @ 2014-08-10  6:55 UTC (permalink / raw)
  To: Ben Widawsky; +Cc: Daniel Vetter, intel-gfx

On Sat, Aug 09, 2014 at 12:29:17PM -0700, Ben Widawsky wrote:
> On Sat, Aug 09, 2014 at 05:37:23PM +0100, Chris Wilson wrote:
> > With ppgtt, it is no longer correct to mark an object as
> > map_and_fenceable if we simply unbind it from the global gtt. This has
> > consequences during execbuffer where we simply use
> > obj->map_and_fenceable in use_cpu_reloc() to decide which access method
> > to use for writing into the object. Now for a ppgtt object,
> > map_and_fenceable will be true when it is not bound into the ggtt but
> > only in a ppgtt, leading to an invalid access on a non-llc architecture.
> 
> The last sentence confused me for some reason. I think the first two
> sentences were perfectly succinct in describing the problem.

Actually I think the last sentence is obsolete. So long as we only tweak
map_and_fenceable when manipulating the ggtt, the current optimisation
still holds and the subject is wrong.
 
> > v2: Revamp and resend to ease future patches.
> > 
> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > Cc: Ben Widawsky <benjamin.widawsky@intel.com>
> > Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
> 
> I didn't read the patch closely, but the problem is clear. It seems like
> the one hunk:
> > -     if (use_cpu_reloc(obj))
> > +     if (use_cpu_reloc(obj) || !i915_gem_obj_ggtt_bound(obj))
> 
> would be sufficient to fix the problem.

Right, I wanted this patch as later patches build upon the flags and
using the flags makes the code cleaner. This tweak was something that
occurred to me when reviewing the patch and looking at recent bug
reports.

Let me go back and look at whether we can keep the obj->map_and_fencceable
micro-optimisation.
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH 2/3] drm/i915: Fix obj->map_and_fenceable for ppgtt
  2014-08-10  6:55     ` Chris Wilson
@ 2014-08-10  7:04       ` Chris Wilson
  0 siblings, 0 replies; 11+ messages in thread
From: Chris Wilson @ 2014-08-10  7:04 UTC (permalink / raw)
  To: Ben Widawsky, intel-gfx, Daniel Vetter

On Sun, Aug 10, 2014 at 07:55:46AM +0100, Chris Wilson wrote:
> On Sat, Aug 09, 2014 at 12:29:17PM -0700, Ben Widawsky wrote:
> > I didn't read the patch closely, but the problem is clear. It seems like
> > the one hunk:
> > > -     if (use_cpu_reloc(obj))
> > > +     if (use_cpu_reloc(obj) || !i915_gem_obj_ggtt_bound(obj))
> > 
> > would be sufficient to fix the problem.
> 
> Right, I wanted this patch as later patches build upon the flags and
> using the flags makes the code cleaner. This tweak was something that
> occurred to me when reviewing the patch and looking at recent bug
> reports.

As it happens, that is the key to the patch. Setting
obj->map_and_fenceable correctly makes that tweak redundant.
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre

^ permalink raw reply	[flat|nested] 11+ messages in thread

* [PATCH] drm/i915: Force CPU relocations if not GTT mapped
  2014-08-09 19:29   ` Ben Widawsky
  2014-08-10  6:55     ` Chris Wilson
@ 2014-08-10  7:06     ` Chris Wilson
  2014-08-11 10:02       ` Daniel Vetter
  1 sibling, 1 reply; 11+ messages in thread
From: Chris Wilson @ 2014-08-10  7:06 UTC (permalink / raw)
  To: intel-gfx; +Cc: Daniel Vetter, Ben Widawsky

Move the decision on whether we need to have a mappable object during
execbuffer to the fore and then reuse that decision by propagating the
flag through to reservation. As a corollary, before doing the actual
relocation through the GTT, we can make sure that we do have a GTT
mapping through which to operate.

v2: Revamp and resend to ease future patches.
v3: Refresh patch rationale

References: https://bugs.freedesktop.org/show_bug.cgi?id=81094
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Ben Widawsky <benjamin.widawsky@intel.com>
Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
---
 drivers/gpu/drm/i915/i915_gem.c            |  8 +--
 drivers/gpu/drm/i915/i915_gem_execbuffer.c | 80 ++++++++++++++----------------
 2 files changed, 42 insertions(+), 46 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 99250d27668d..6366230c4e32 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2930,9 +2930,8 @@ int i915_vma_unbind(struct i915_vma *vma)
 	vma->unbind_vma(vma);
 
 	list_del_init(&vma->mm_list);
-	/* Avoid an unnecessary call to unbind on rebind. */
 	if (i915_is_ggtt(vma->vm))
-		obj->map_and_fenceable = true;
+		obj->map_and_fenceable = false;
 
 	drm_mm_remove_node(&vma->node);
 	i915_gem_vma_destroy(vma);
@@ -3284,6 +3283,9 @@ i915_gem_object_get_fence(struct drm_i915_gem_object *obj)
 			return 0;
 		}
 	} else if (enable) {
+		if (WARN_ON(!obj->map_and_fenceable))
+			return -EINVAL;
+
 		reg = i915_find_fence_reg(dev);
 		if (IS_ERR(reg))
 			return PTR_ERR(reg);
@@ -4333,8 +4335,6 @@ void i915_gem_object_init(struct drm_i915_gem_object *obj,
 
 	obj->fence_reg = I915_FENCE_REG_NONE;
 	obj->madv = I915_MADV_WILLNEED;
-	/* Avoid an unnecessary call to unbind on the first bind. */
-	obj->map_and_fenceable = true;
 
 	i915_gem_info_add_obj(obj->base.dev->dev_private, obj->base.size);
 }
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 60998fc4e5b2..8b734d5d16b4 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -35,6 +35,7 @@
 
 #define  __EXEC_OBJECT_HAS_PIN (1<<31)
 #define  __EXEC_OBJECT_HAS_FENCE (1<<30)
+#define  __EXEC_OBJECT_NEEDS_MAP (1<<29)
 #define  __EXEC_OBJECT_NEEDS_BIAS (1<<28)
 
 #define BATCH_OFFSET_BIAS (256*1024)
@@ -535,14 +536,6 @@ i915_gem_execbuffer_relocate(struct eb_vmas *eb)
 }
 
 static int
-need_reloc_mappable(struct i915_vma *vma)
-{
-	struct drm_i915_gem_exec_object2 *entry = vma->exec_entry;
-	return entry->relocation_count && !use_cpu_reloc(vma->obj) &&
-		i915_is_ggtt(vma->vm);
-}
-
-static int
 i915_gem_execbuffer_reserve_vma(struct i915_vma *vma,
 				struct intel_engine_cs *ring,
 				bool *need_reloc)
@@ -550,19 +543,12 @@ i915_gem_execbuffer_reserve_vma(struct i915_vma *vma,
 	struct drm_i915_gem_object *obj = vma->obj;
 	struct drm_i915_gem_exec_object2 *entry = vma->exec_entry;
 	bool has_fenced_gpu_access = INTEL_INFO(ring->dev)->gen < 4;
-	bool need_fence;
 	uint64_t flags;
 	int ret;
 
 	flags = 0;
-
-	need_fence =
-		has_fenced_gpu_access &&
-		entry->flags & EXEC_OBJECT_NEEDS_FENCE &&
-		obj->tiling_mode != I915_TILING_NONE;
-	if (need_fence || need_reloc_mappable(vma))
+	if (entry->flags & __EXEC_OBJECT_NEEDS_MAP)
 		flags |= PIN_MAPPABLE;
-
 	if (entry->flags & EXEC_OBJECT_NEEDS_GTT)
 		flags |= PIN_GLOBAL;
 	if (entry->flags & __EXEC_OBJECT_NEEDS_BIAS)
@@ -601,26 +587,40 @@ i915_gem_execbuffer_reserve_vma(struct i915_vma *vma,
 }
 
 static bool
-eb_vma_misplaced(struct i915_vma *vma, bool has_fenced_gpu_access)
+need_reloc_mappable(struct i915_vma *vma)
 {
 	struct drm_i915_gem_exec_object2 *entry = vma->exec_entry;
-	struct drm_i915_gem_object *obj = vma->obj;
-	bool need_fence, need_mappable;
 
-	need_fence =
-		has_fenced_gpu_access &&
-		entry->flags & EXEC_OBJECT_NEEDS_FENCE &&
-		obj->tiling_mode != I915_TILING_NONE;
-	need_mappable = need_fence || need_reloc_mappable(vma);
+	if (entry->relocation_count == 0)
+		return false;
 
-	WARN_ON((need_mappable || need_fence) &&
+	if (!i915_is_ggtt(vma->vm))
+		return false;
+
+	/* See also use_cpu_reloc() */
+	if (HAS_LLC(vma->obj->base.dev))
+		return false;
+
+	if (vma->obj->base.write_domain == I915_GEM_DOMAIN_CPU)
+		return false;
+
+	return true;
+}
+
+static bool
+eb_vma_misplaced(struct i915_vma *vma)
+{
+	struct drm_i915_gem_exec_object2 *entry = vma->exec_entry;
+	struct drm_i915_gem_object *obj = vma->obj;
+
+	WARN_ON(entry->flags & __EXEC_OBJECT_NEEDS_MAP &&
 	       !i915_is_ggtt(vma->vm));
 
 	if (entry->alignment &&
 	    vma->node.start & (entry->alignment - 1))
 		return true;
 
-	if (need_mappable && !obj->map_and_fenceable)
+	if (entry->flags & __EXEC_OBJECT_NEEDS_MAP && !obj->map_and_fenceable)
 		return true;
 
 	if (entry->flags & __EXEC_OBJECT_NEEDS_BIAS &&
@@ -664,9 +664,10 @@ i915_gem_execbuffer_reserve(struct intel_engine_cs *ring,
 			obj->tiling_mode != I915_TILING_NONE;
 		need_mappable = need_fence || need_reloc_mappable(vma);
 
-		if (need_mappable)
+		if (need_mappable) {
+			entry->flags |= __EXEC_OBJECT_NEEDS_MAP;
 			list_move(&vma->exec_list, &ordered_vmas);
-		else
+		} else
 			list_move_tail(&vma->exec_list, &ordered_vmas);
 
 		obj->base.pending_read_domains = I915_GEM_GPU_DOMAINS & ~I915_GEM_DOMAIN_COMMAND;
@@ -696,7 +697,7 @@ i915_gem_execbuffer_reserve(struct intel_engine_cs *ring,
 			if (!drm_mm_node_allocated(&vma->node))
 				continue;
 
-			if (eb_vma_misplaced(vma, has_fenced_gpu_access))
+			if (eb_vma_misplaced(vma))
 				ret = i915_vma_unbind(vma);
 			else
 				ret = i915_gem_execbuffer_reserve_vma(vma, ring, need_relocs);
@@ -1386,25 +1387,20 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
 	/* snb/ivb/vlv conflate the "batch in ppgtt" bit with the "non-secure
 	 * batch" bit. Hence we need to pin secure batches into the global gtt.
 	 * hsw should have this fixed, but bdw mucks it up again. */
-	if (flags & I915_DISPATCH_SECURE &&
-	    !batch_obj->has_global_gtt_mapping) {
-		/* When we have multiple VMs, we'll need to make sure that we
-		 * allocate space first */
-		struct i915_vma *vma = i915_gem_obj_to_ggtt(batch_obj);
-		BUG_ON(!vma);
-		vma->bind_vma(vma, batch_obj->cache_level, GLOBAL_BIND);
-	}
+	if (flags & I915_DISPATCH_SECURE) {
+		ret = i915_gem_obj_ggtt_pin(batch_obj, 0, 0);
+		if (ret)
+			goto err;
 
-	if (flags & I915_DISPATCH_SECURE)
 		exec_start += i915_gem_obj_ggtt_offset(batch_obj);
-	else
+	} else
 		exec_start += i915_gem_obj_offset(batch_obj, vm);
 
 	ret = legacy_ringbuffer_submission(dev, file, ring, ctx,
-			args, &eb->vmas, batch_obj, exec_start, flags);
-	if (ret)
-		goto err;
+					   args, &eb->vmas, batch_obj, exec_start, flags);
 
+	if (flags & I915_DISPATCH_SECURE)
+		i915_gem_object_ggtt_unpin(batch_obj);
 err:
 	/* the request owns the ref now */
 	i915_gem_context_unreference(ctx);
-- 
2.1.0.rc1

^ permalink raw reply related	[flat|nested] 11+ messages in thread

* Re: [PATCH 1/3] drm/i915: Only perform set-to-gtt domain for objects bound to the global gtt
  2014-08-09 16:37 [PATCH 1/3] drm/i915: Only perform set-to-gtt domain for objects bound to the global gtt Chris Wilson
  2014-08-09 16:37 ` [PATCH 2/3] drm/i915: Fix obj->map_and_fenceable for ppgtt Chris Wilson
  2014-08-09 16:37 ` [PATCH 3/3] drm/i915: Remove fenced_gpu_access and pending_fenced_gpu_access Chris Wilson
@ 2014-08-11  9:36 ` Daniel Vetter
  2 siblings, 0 replies; 11+ messages in thread
From: Daniel Vetter @ 2014-08-11  9:36 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

On Sat, Aug 09, 2014 at 05:37:22PM +0100, Chris Wilson wrote:
> If an object is not bound into the global GTT, then it cannot be
> accessed via the GTT. This restores the original code that was muddled
> by ppGTT. In the process, we remove a WARN that had long outlived its
> usefulness and was simply being coded around instead.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>  drivers/gpu/drm/i915/i915_gem.c | 22 +++++-----------------
>  1 file changed, 5 insertions(+), 17 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index 80b807b724a6..99250d27668d 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -3594,11 +3594,12 @@ int
>  i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write)
>  {
>  	struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
> +	struct i915_vma *vma = i915_gem_obj_to_ggtt(obj);
>  	uint32_t old_write_domain, old_read_domains;
>  	int ret;
>  
>  	/* Not valid to be called on unbound objects. */
> -	if (!i915_gem_obj_bound_any(obj))
> +	if (vma == NULL)
>  		return -EINVAL;
>  
>  	if (obj->base.write_domain == I915_GEM_DOMAIN_GTT)
> @@ -3640,13 +3641,9 @@ i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write)
>  					    old_write_domain);
>  
>  	/* And bump the LRU for this access */
> -	if (i915_gem_object_is_inactive(obj)) {
> -		struct i915_vma *vma = i915_gem_obj_to_ggtt(obj);
> -		if (vma)
> -			list_move_tail(&vma->mm_list,
> -				       &dev_priv->gtt.base.inactive_list);
> -
> -	}
> +	if (i915_gem_object_is_inactive(obj))

Ben will put a few more needles into his danvet voodoo doll, but I guess
this here is a fairly clear indicator that tracking the active bit on the
object instead of the vma wasn't an awesome idea ...

Anyway, patch looks good, merged.
-Daniel

> +		list_move_tail(&vma->mm_list,
> +			       &dev_priv->gtt.base.inactive_list);
>  
>  	return 0;
>  }
> @@ -3810,9 +3807,6 @@ static bool is_pin_display(struct drm_i915_gem_object *obj)
>  {
>  	struct i915_vma *vma;
>  
> -	if (list_empty(&obj->vma_list))
> -		return false;
> -
>  	vma = i915_gem_obj_to_ggtt(obj);
>  	if (!vma)
>  		return false;
> @@ -5255,12 +5249,6 @@ struct i915_vma *i915_gem_obj_to_ggtt(struct drm_i915_gem_object *obj)
>  {
>  	struct i915_vma *vma;
>  
> -	/* This WARN has probably outlived its usefulness (callers already
> -	 * WARN if they don't find the GGTT vma they expect). When removing,
> -	 * remember to remove the pre-check in is_pin_display() as well */
> -	if (WARN_ON(list_empty(&obj->vma_list)))
> -		return NULL;
> -
>  	vma = list_first_entry(&obj->vma_list, typeof(*vma), vma_link);
>  	if (vma->vm != obj_to_ggtt(obj))
>  		return NULL;
> -- 
> 2.1.0.rc1
> 
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/intel-gfx

-- 
Daniel Vetter
Software Engineer, Intel Corporation
+41 (0) 79 365 57 48 - http://blog.ffwll.ch

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH] drm/i915: Force CPU relocations if not GTT mapped
  2014-08-10  7:06     ` [PATCH] drm/i915: Force CPU relocations if not GTT mapped Chris Wilson
@ 2014-08-11 10:02       ` Daniel Vetter
  0 siblings, 0 replies; 11+ messages in thread
From: Daniel Vetter @ 2014-08-11 10:02 UTC (permalink / raw)
  To: Chris Wilson; +Cc: Daniel Vetter, intel-gfx, Ben Widawsky

On Sun, Aug 10, 2014 at 08:06:57AM +0100, Chris Wilson wrote:
> Move the decision on whether we need to have a mappable object during
> execbuffer to the fore and then reuse that decision by propagating the
> flag through to reservation. As a corollary, before doing the actual
> relocation through the GTT, we can make sure that we do have a GTT
> mapping through which to operate.
> 
> v2: Revamp and resend to ease future patches.
> v3: Refresh patch rationale
> 
> References: https://bugs.freedesktop.org/show_bug.cgi?id=81094
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Ben Widawsky <benjamin.widawsky@intel.com>
> Cc: Daniel Vetter <daniel.vetter@ffwll.ch>

Ok, the secure batch hunk in here looks rather unrelated and imo also a
bit incomplete. I've dropped it. And I've added a bit of text to the
commit message to explain why this patch touches map_and_fenceable logic.
-Daniel

> ---
>  drivers/gpu/drm/i915/i915_gem.c            |  8 +--
>  drivers/gpu/drm/i915/i915_gem_execbuffer.c | 80 ++++++++++++++----------------
>  2 files changed, 42 insertions(+), 46 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index 99250d27668d..6366230c4e32 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -2930,9 +2930,8 @@ int i915_vma_unbind(struct i915_vma *vma)
>  	vma->unbind_vma(vma);
>  
>  	list_del_init(&vma->mm_list);
> -	/* Avoid an unnecessary call to unbind on rebind. */
>  	if (i915_is_ggtt(vma->vm))
> -		obj->map_and_fenceable = true;
> +		obj->map_and_fenceable = false;
>  
>  	drm_mm_remove_node(&vma->node);
>  	i915_gem_vma_destroy(vma);
> @@ -3284,6 +3283,9 @@ i915_gem_object_get_fence(struct drm_i915_gem_object *obj)
>  			return 0;
>  		}
>  	} else if (enable) {
> +		if (WARN_ON(!obj->map_and_fenceable))
> +			return -EINVAL;
> +
>  		reg = i915_find_fence_reg(dev);
>  		if (IS_ERR(reg))
>  			return PTR_ERR(reg);
> @@ -4333,8 +4335,6 @@ void i915_gem_object_init(struct drm_i915_gem_object *obj,
>  
>  	obj->fence_reg = I915_FENCE_REG_NONE;
>  	obj->madv = I915_MADV_WILLNEED;
> -	/* Avoid an unnecessary call to unbind on the first bind. */
> -	obj->map_and_fenceable = true;
>  
>  	i915_gem_info_add_obj(obj->base.dev->dev_private, obj->base.size);
>  }
> diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> index 60998fc4e5b2..8b734d5d16b4 100644
> --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> @@ -35,6 +35,7 @@
>  
>  #define  __EXEC_OBJECT_HAS_PIN (1<<31)
>  #define  __EXEC_OBJECT_HAS_FENCE (1<<30)
> +#define  __EXEC_OBJECT_NEEDS_MAP (1<<29)
>  #define  __EXEC_OBJECT_NEEDS_BIAS (1<<28)
>  
>  #define BATCH_OFFSET_BIAS (256*1024)
> @@ -535,14 +536,6 @@ i915_gem_execbuffer_relocate(struct eb_vmas *eb)
>  }
>  
>  static int
> -need_reloc_mappable(struct i915_vma *vma)
> -{
> -	struct drm_i915_gem_exec_object2 *entry = vma->exec_entry;
> -	return entry->relocation_count && !use_cpu_reloc(vma->obj) &&
> -		i915_is_ggtt(vma->vm);
> -}
> -
> -static int
>  i915_gem_execbuffer_reserve_vma(struct i915_vma *vma,
>  				struct intel_engine_cs *ring,
>  				bool *need_reloc)
> @@ -550,19 +543,12 @@ i915_gem_execbuffer_reserve_vma(struct i915_vma *vma,
>  	struct drm_i915_gem_object *obj = vma->obj;
>  	struct drm_i915_gem_exec_object2 *entry = vma->exec_entry;
>  	bool has_fenced_gpu_access = INTEL_INFO(ring->dev)->gen < 4;
> -	bool need_fence;
>  	uint64_t flags;
>  	int ret;
>  
>  	flags = 0;
> -
> -	need_fence =
> -		has_fenced_gpu_access &&
> -		entry->flags & EXEC_OBJECT_NEEDS_FENCE &&
> -		obj->tiling_mode != I915_TILING_NONE;
> -	if (need_fence || need_reloc_mappable(vma))
> +	if (entry->flags & __EXEC_OBJECT_NEEDS_MAP)
>  		flags |= PIN_MAPPABLE;
> -
>  	if (entry->flags & EXEC_OBJECT_NEEDS_GTT)
>  		flags |= PIN_GLOBAL;
>  	if (entry->flags & __EXEC_OBJECT_NEEDS_BIAS)
> @@ -601,26 +587,40 @@ i915_gem_execbuffer_reserve_vma(struct i915_vma *vma,
>  }
>  
>  static bool
> -eb_vma_misplaced(struct i915_vma *vma, bool has_fenced_gpu_access)
> +need_reloc_mappable(struct i915_vma *vma)
>  {
>  	struct drm_i915_gem_exec_object2 *entry = vma->exec_entry;
> -	struct drm_i915_gem_object *obj = vma->obj;
> -	bool need_fence, need_mappable;
>  
> -	need_fence =
> -		has_fenced_gpu_access &&
> -		entry->flags & EXEC_OBJECT_NEEDS_FENCE &&
> -		obj->tiling_mode != I915_TILING_NONE;
> -	need_mappable = need_fence || need_reloc_mappable(vma);
> +	if (entry->relocation_count == 0)
> +		return false;
>  
> -	WARN_ON((need_mappable || need_fence) &&
> +	if (!i915_is_ggtt(vma->vm))
> +		return false;
> +
> +	/* See also use_cpu_reloc() */
> +	if (HAS_LLC(vma->obj->base.dev))
> +		return false;
> +
> +	if (vma->obj->base.write_domain == I915_GEM_DOMAIN_CPU)
> +		return false;
> +
> +	return true;
> +}
> +
> +static bool
> +eb_vma_misplaced(struct i915_vma *vma)
> +{
> +	struct drm_i915_gem_exec_object2 *entry = vma->exec_entry;
> +	struct drm_i915_gem_object *obj = vma->obj;
> +
> +	WARN_ON(entry->flags & __EXEC_OBJECT_NEEDS_MAP &&
>  	       !i915_is_ggtt(vma->vm));
>  
>  	if (entry->alignment &&
>  	    vma->node.start & (entry->alignment - 1))
>  		return true;
>  
> -	if (need_mappable && !obj->map_and_fenceable)
> +	if (entry->flags & __EXEC_OBJECT_NEEDS_MAP && !obj->map_and_fenceable)
>  		return true;
>  
>  	if (entry->flags & __EXEC_OBJECT_NEEDS_BIAS &&
> @@ -664,9 +664,10 @@ i915_gem_execbuffer_reserve(struct intel_engine_cs *ring,
>  			obj->tiling_mode != I915_TILING_NONE;
>  		need_mappable = need_fence || need_reloc_mappable(vma);
>  
> -		if (need_mappable)
> +		if (need_mappable) {
> +			entry->flags |= __EXEC_OBJECT_NEEDS_MAP;
>  			list_move(&vma->exec_list, &ordered_vmas);
> -		else
> +		} else
>  			list_move_tail(&vma->exec_list, &ordered_vmas);
>  
>  		obj->base.pending_read_domains = I915_GEM_GPU_DOMAINS & ~I915_GEM_DOMAIN_COMMAND;
> @@ -696,7 +697,7 @@ i915_gem_execbuffer_reserve(struct intel_engine_cs *ring,
>  			if (!drm_mm_node_allocated(&vma->node))
>  				continue;
>  
> -			if (eb_vma_misplaced(vma, has_fenced_gpu_access))
> +			if (eb_vma_misplaced(vma))
>  				ret = i915_vma_unbind(vma);
>  			else
>  				ret = i915_gem_execbuffer_reserve_vma(vma, ring, need_relocs);
> @@ -1386,25 +1387,20 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
>  	/* snb/ivb/vlv conflate the "batch in ppgtt" bit with the "non-secure
>  	 * batch" bit. Hence we need to pin secure batches into the global gtt.
>  	 * hsw should have this fixed, but bdw mucks it up again. */
> -	if (flags & I915_DISPATCH_SECURE &&
> -	    !batch_obj->has_global_gtt_mapping) {
> -		/* When we have multiple VMs, we'll need to make sure that we
> -		 * allocate space first */
> -		struct i915_vma *vma = i915_gem_obj_to_ggtt(batch_obj);
> -		BUG_ON(!vma);
> -		vma->bind_vma(vma, batch_obj->cache_level, GLOBAL_BIND);
> -	}
> +	if (flags & I915_DISPATCH_SECURE) {
> +		ret = i915_gem_obj_ggtt_pin(batch_obj, 0, 0);
> +		if (ret)
> +			goto err;
>  
> -	if (flags & I915_DISPATCH_SECURE)
>  		exec_start += i915_gem_obj_ggtt_offset(batch_obj);
> -	else
> +	} else
>  		exec_start += i915_gem_obj_offset(batch_obj, vm);
>  
>  	ret = legacy_ringbuffer_submission(dev, file, ring, ctx,
> -			args, &eb->vmas, batch_obj, exec_start, flags);
> -	if (ret)
> -		goto err;
> +					   args, &eb->vmas, batch_obj, exec_start, flags);
>  
> +	if (flags & I915_DISPATCH_SECURE)
> +		i915_gem_object_ggtt_unpin(batch_obj);
>  err:
>  	/* the request owns the ref now */
>  	i915_gem_context_unreference(ctx);
> -- 
> 2.1.0.rc1
> 

-- 
Daniel Vetter
Software Engineer, Intel Corporation
+41 (0) 79 365 57 48 - http://blog.ffwll.ch

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH 3/3] drm/i915: Remove fenced_gpu_access and pending_fenced_gpu_access
  2014-08-09 16:37 ` [PATCH 3/3] drm/i915: Remove fenced_gpu_access and pending_fenced_gpu_access Chris Wilson
@ 2014-08-11 10:20   ` Daniel Vetter
  0 siblings, 0 replies; 11+ messages in thread
From: Daniel Vetter @ 2014-08-11 10:20 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

On Sat, Aug 09, 2014 at 05:37:24PM +0100, Chris Wilson wrote:
> This migrates the fence tracking onto the existing seqno
> infrastructure so that the later conversion to tracking via requests is
> simplified.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>

Pulled in all 3 with the changes to patch 2 as discussed on irc.

Thanks, Daniel
> ---
>  drivers/gpu/drm/i915/i915_drv.h            |  7 ------
>  drivers/gpu/drm/i915/i915_gem.c            | 17 ---------------
>  drivers/gpu/drm/i915/i915_gem_execbuffer.c | 34 +++++++++++++++++-------------
>  drivers/gpu/drm/i915/i915_gem_tiling.c     |  2 +-
>  4 files changed, 20 insertions(+), 40 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index fab97bc3215f..5c3f033ff928 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -1777,13 +1777,6 @@ struct drm_i915_gem_object {
>  	 * Only honoured if hardware has relevant pte bit
>  	 */
>  	unsigned long gt_ro:1;
> -
> -	/*
> -	 * Is the GPU currently using a fence to access this buffer,
> -	 */
> -	unsigned int pending_fenced_gpu_access:1;
> -	unsigned int fenced_gpu_access:1;
> -
>  	unsigned int cache_level:3;
>  
>  	unsigned int has_aliasing_ppgtt_mapping:1;
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index 6366230c4e32..e808b449ac75 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -2163,8 +2163,6 @@ static void
>  i915_gem_object_move_to_active(struct drm_i915_gem_object *obj,
>  			       struct intel_engine_cs *ring)
>  {
> -	struct drm_device *dev = obj->base.dev;
> -	struct drm_i915_private *dev_priv = dev->dev_private;
>  	u32 seqno = intel_ring_get_seqno(ring);
>  
>  	BUG_ON(ring == NULL);
> @@ -2183,19 +2181,6 @@ i915_gem_object_move_to_active(struct drm_i915_gem_object *obj,
>  	list_move_tail(&obj->ring_list, &ring->active_list);
>  
>  	obj->last_read_seqno = seqno;
> -
> -	if (obj->fenced_gpu_access) {
> -		obj->last_fenced_seqno = seqno;
> -
> -		/* Bump MRU to take account of the delayed flush */
> -		if (obj->fence_reg != I915_FENCE_REG_NONE) {
> -			struct drm_i915_fence_reg *reg;
> -
> -			reg = &dev_priv->fence_regs[obj->fence_reg];
> -			list_move_tail(&reg->lru_list,
> -				       &dev_priv->mm.fence_list);
> -		}
> -	}
>  }
>  
>  void i915_vma_move_to_active(struct i915_vma *vma,
> @@ -2231,7 +2216,6 @@ i915_gem_object_move_to_inactive(struct drm_i915_gem_object *obj)
>  	obj->base.write_domain = 0;
>  
>  	obj->last_fenced_seqno = 0;
> -	obj->fenced_gpu_access = false;
>  
>  	obj->active = 0;
>  	drm_gem_object_unreference(&obj->base);
> @@ -3176,7 +3160,6 @@ i915_gem_object_wait_fence(struct drm_i915_gem_object *obj)
>  		obj->last_fenced_seqno = 0;
>  	}
>  
> -	obj->fenced_gpu_access = false;
>  	return 0;
>  }
>  
> diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> index 87e9599f9e4b..fbac7b51736c 100644
> --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> @@ -542,7 +542,6 @@ i915_gem_execbuffer_reserve_vma(struct i915_vma *vma,
>  {
>  	struct drm_i915_gem_object *obj = vma->obj;
>  	struct drm_i915_gem_exec_object2 *entry = vma->exec_entry;
> -	bool has_fenced_gpu_access = INTEL_INFO(ring->dev)->gen < 4;
>  	uint64_t flags;
>  	int ret;
>  
> @@ -560,17 +559,13 @@ i915_gem_execbuffer_reserve_vma(struct i915_vma *vma,
>  
>  	entry->flags |= __EXEC_OBJECT_HAS_PIN;
>  
> -	if (has_fenced_gpu_access) {
> -		if (entry->flags & EXEC_OBJECT_NEEDS_FENCE) {
> -			ret = i915_gem_object_get_fence(obj);
> -			if (ret)
> -				return ret;
> -
> -			if (i915_gem_object_pin_fence(obj))
> -				entry->flags |= __EXEC_OBJECT_HAS_FENCE;
> +	if (entry->flags & EXEC_OBJECT_NEEDS_FENCE) {
> +		ret = i915_gem_object_get_fence(obj);
> +		if (ret)
> +			return ret;
>  
> -			obj->pending_fenced_gpu_access = true;
> -		}
> +		if (i915_gem_object_pin_fence(obj))
> +			entry->flags |= __EXEC_OBJECT_HAS_FENCE;
>  	}
>  
>  	if (entry->offset != vma->node.start) {
> @@ -658,8 +653,9 @@ i915_gem_execbuffer_reserve(struct intel_engine_cs *ring,
>  		obj = vma->obj;
>  		entry = vma->exec_entry;
>  
> +		if (!has_fenced_gpu_access)
> +			entry->flags &= ~EXEC_OBJECT_NEEDS_FENCE;
>  		need_fence =
> -			has_fenced_gpu_access &&
>  			entry->flags & EXEC_OBJECT_NEEDS_FENCE &&
>  			obj->tiling_mode != I915_TILING_NONE;
>  		need_mappable = need_fence || need_reloc_mappable(vma);
> @@ -672,7 +668,6 @@ i915_gem_execbuffer_reserve(struct intel_engine_cs *ring,
>  
>  		obj->base.pending_read_domains = I915_GEM_GPU_DOMAINS & ~I915_GEM_DOMAIN_COMMAND;
>  		obj->base.pending_write_domain = 0;
> -		obj->pending_fenced_gpu_access = false;
>  	}
>  	list_splice(&ordered_vmas, vmas);
>  
> @@ -959,9 +954,11 @@ static void
>  i915_gem_execbuffer_move_to_active(struct list_head *vmas,
>  				   struct intel_engine_cs *ring)
>  {
> +	u32 seqno = intel_ring_get_seqno(ring);
>  	struct i915_vma *vma;
>  
>  	list_for_each_entry(vma, vmas, exec_list) {
> +		struct drm_i915_gem_exec_object2 *entry = vma->exec_entry;
>  		struct drm_i915_gem_object *obj = vma->obj;
>  		u32 old_read = obj->base.read_domains;
>  		u32 old_write = obj->base.write_domain;
> @@ -970,18 +967,25 @@ i915_gem_execbuffer_move_to_active(struct list_head *vmas,
>  		if (obj->base.write_domain == 0)
>  			obj->base.pending_read_domains |= obj->base.read_domains;
>  		obj->base.read_domains = obj->base.pending_read_domains;
> -		obj->fenced_gpu_access = obj->pending_fenced_gpu_access;
>  
>  		i915_vma_move_to_active(vma, ring);
>  		if (obj->base.write_domain) {
>  			obj->dirty = 1;
> -			obj->last_write_seqno = intel_ring_get_seqno(ring);
> +			obj->last_write_seqno = seqno;
>  
>  			intel_fb_obj_invalidate(obj, ring);
>  
>  			/* update for the implicit flush after a batch */
>  			obj->base.write_domain &= ~I915_GEM_GPU_DOMAINS;
>  		}
> +		if (entry->flags & EXEC_OBJECT_NEEDS_FENCE) {
> +			obj->last_fenced_seqno = seqno;
> +			if (entry->flags & __EXEC_OBJECT_HAS_FENCE) {
> +				struct drm_i915_private *dev_priv = to_i915(ring->dev);
> +				list_move_tail(&dev_priv->fence_regs[obj->fence_reg].lru_list,
> +					       &dev_priv->mm.fence_list);
> +			}
> +		}
>  
>  		trace_i915_gem_object_change_domain(obj, old_read, old_write);
>  	}
> diff --git a/drivers/gpu/drm/i915/i915_gem_tiling.c b/drivers/gpu/drm/i915/i915_gem_tiling.c
> index cb150e8b4336..7e623bf097a1 100644
> --- a/drivers/gpu/drm/i915/i915_gem_tiling.c
> +++ b/drivers/gpu/drm/i915/i915_gem_tiling.c
> @@ -376,7 +376,7 @@ i915_gem_set_tiling(struct drm_device *dev, void *data,
>  
>  		if (ret == 0) {
>  			obj->fence_dirty =
> -				obj->fenced_gpu_access ||
> +				obj->last_fenced_seqno ||
>  				obj->fence_reg != I915_FENCE_REG_NONE;
>  
>  			obj->tiling_mode = args->tiling_mode;
> -- 
> 2.1.0.rc1
> 
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/intel-gfx

-- 
Daniel Vetter
Software Engineer, Intel Corporation
+41 (0) 79 365 57 48 - http://blog.ffwll.ch

^ permalink raw reply	[flat|nested] 11+ messages in thread

* [PATCH 2/3] drm/i915: Fix obj->map_and_fenceable for ppgtt
  2014-01-01 14:00 [PATCH 1/3] drm/i915: Only bind each object rather than for every execbuffer Chris Wilson
@ 2014-01-01 14:00 ` Chris Wilson
  0 siblings, 0 replies; 11+ messages in thread
From: Chris Wilson @ 2014-01-01 14:00 UTC (permalink / raw)
  To: intel-gfx; +Cc: Daniel Vetter, Ben Widawsky

With ppgtt, it is no longer correct to mark an object as
map_and_fenceable if we simply unbind it from the global gtt. This has
consequences during execbuffer where we simply use
obj->map_and_fenceable in use_cpu_reloc() to decide which access method
to use for writing into the object. Now for a ppgtt object,
map_and_fenceable will be true when it is not bound into the ggtt but
only in a ppgtt, leading to an invalid access on a non-llc architecture.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Ben Widawsky <benjamin.widawsky@intel.com>
Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
---
 drivers/gpu/drm/i915/i915_gem.c            |  5 +-
 drivers/gpu/drm/i915/i915_gem_execbuffer.c | 92 ++++++++++++++----------------
 drivers/gpu/drm/i915/i915_gem_tiling.c     |  7 +--
 3 files changed, 47 insertions(+), 57 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index d9acfa78d1a5..344bb47adaeb 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2791,9 +2791,8 @@ int i915_vma_unbind(struct i915_vma *vma)
 	i915_gem_gtt_finish_object(obj);
 
 	list_del(&vma->mm_list);
-	/* Avoid an unnecessary call to unbind on rebind. */
 	if (i915_is_ggtt(vma->vm))
-		obj->map_and_fenceable = true;
+		obj->map_and_fenceable = false;
 
 	drm_mm_remove_node(&vma->node);
 	i915_gem_vma_destroy(vma);
@@ -4114,8 +4113,6 @@ void i915_gem_object_init(struct drm_i915_gem_object *obj,
 
 	obj->fence_reg = I915_FENCE_REG_NONE;
 	obj->madv = I915_MADV_WILLNEED;
-	/* Avoid an unnecessary call to unbind on the first bind. */
-	obj->map_and_fenceable = true;
 
 	i915_gem_info_add_obj(obj->base.dev->dev_private, obj->base.size);
 }
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index cd09d42f507b..26b57f1135c9 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -35,6 +35,7 @@
 
 #define  __EXEC_OBJECT_HAS_PIN (1<<31)
 #define  __EXEC_OBJECT_HAS_FENCE (1<<30)
+#define  __EXEC_OBJECT_NEEDS_MAP (1<<29)
 
 struct eb_vmas {
 	struct list_head vmas;
@@ -529,14 +530,6 @@ i915_gem_execbuffer_relocate(struct eb_vmas *eb)
 }
 
 static int
-need_reloc_mappable(struct i915_vma *vma)
-{
-	struct drm_i915_gem_exec_object2 *entry = vma->exec_entry;
-	return entry->relocation_count && !use_cpu_reloc(vma->obj) &&
-		i915_is_ggtt(vma->vm);
-}
-
-static int
 i915_gem_execbuffer_reserve_vma(struct i915_vma *vma,
 				struct intel_ring_buffer *ring,
 				bool *need_reloc)
@@ -544,19 +537,12 @@ i915_gem_execbuffer_reserve_vma(struct i915_vma *vma,
 	struct drm_i915_gem_object *obj = vma->obj;
 	struct drm_i915_gem_exec_object2 *entry = vma->exec_entry;
 	bool has_fenced_gpu_access = INTEL_INFO(ring->dev)->gen < 4;
-	bool need_fence;
 	unsigned flags;
 	int ret;
 
 	flags = 0;
-
-	need_fence =
-		has_fenced_gpu_access &&
-		entry->flags & EXEC_OBJECT_NEEDS_FENCE &&
-		obj->tiling_mode != I915_TILING_NONE;
-	if (need_fence || need_reloc_mappable(vma))
+	if (entry->flags & __EXEC_OBJECT_NEEDS_MAP)
 		flags |= PIN_MAPPABLE;
-
 	if (entry->flags & EXEC_OBJECT_NEEDS_GTT)
 		flags |= PIN_GLOBAL;
 
@@ -592,6 +578,27 @@ i915_gem_execbuffer_reserve_vma(struct i915_vma *vma,
 	return 0;
 }
 
+static bool
+need_reloc_mappable(struct i915_vma *vma)
+{
+	struct drm_i915_gem_exec_object2 *entry = vma->exec_entry;
+
+	if (entry->relocation_count == 0)
+		return false;
+
+	if (!i915_is_ggtt(vma->vm))
+		return false;
+
+	/* See also use_cpu_reloc() */
+	if (HAS_LLC(vma->obj->base.dev))
+		return false;
+
+	if (vma->obj->base.write_domain == I915_GEM_DOMAIN_CPU)
+		return false;
+
+	return true;
+}
+
 static int
 i915_gem_execbuffer_reserve(struct intel_ring_buffer *ring,
 			    struct list_head *vmas,
@@ -624,9 +631,10 @@ i915_gem_execbuffer_reserve(struct intel_ring_buffer *ring,
 			obj->tiling_mode != I915_TILING_NONE;
 		need_mappable = need_fence || need_reloc_mappable(vma);
 
-		if (need_mappable)
+		if (need_mappable) {
+			entry->flags |= __EXEC_OBJECT_NEEDS_MAP;
 			list_move(&vma->exec_list, &ordered_vmas);
-		else
+		} else
 			list_move_tail(&vma->exec_list, &ordered_vmas);
 
 		obj->base.pending_read_domains = I915_GEM_GPU_DOMAINS & ~I915_GEM_DOMAIN_COMMAND;
@@ -654,25 +662,14 @@ i915_gem_execbuffer_reserve(struct intel_ring_buffer *ring,
 		/* Unbind any ill-fitting objects or pin. */
 		list_for_each_entry(vma, vmas, exec_list) {
 			struct drm_i915_gem_exec_object2 *entry = vma->exec_entry;
-			bool need_fence, need_mappable;
 
 			obj = vma->obj;
 
 			if (!drm_mm_node_allocated(&vma->node))
 				continue;
 
-			need_fence =
-				has_fenced_gpu_access &&
-				entry->flags & EXEC_OBJECT_NEEDS_FENCE &&
-				obj->tiling_mode != I915_TILING_NONE;
-			need_mappable = need_fence || need_reloc_mappable(vma);
-
-			WARN_ON((need_mappable || need_fence) &&
-			       !i915_is_ggtt(vma->vm));
-
-			if ((entry->alignment &&
-			     vma->node.start & (entry->alignment - 1)) ||
-			    (need_mappable && !obj->map_and_fenceable))
+			if ((entry->alignment && vma->node.start & (entry->alignment - 1)) ||
+			    (entry->flags & __EXEC_OBJECT_NEEDS_MAP && !obj->map_and_fenceable))
 				ret = i915_vma_unbind(vma);
 			else
 				ret = i915_gem_execbuffer_reserve_vma(vma, ring, need_relocs);
@@ -1185,33 +1182,28 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
 	/* snb/ivb/vlv conflate the "batch in ppgtt" bit with the "non-secure
 	 * batch" bit. Hence we need to pin secure batches into the global gtt.
 	 * hsw should have this fixed, but bdw mucks it up again. */
-	if (flags & I915_DISPATCH_SECURE &&
-	    !batch_obj->has_global_gtt_mapping) {
-		/* When we have multiple VMs, we'll need to make sure that we
-		 * allocate space first */
-		struct i915_vma *vma = i915_gem_obj_to_ggtt(batch_obj);
-		BUG_ON(!vma);
-		vma->bind_vma(vma, batch_obj->cache_level, GLOBAL_BIND);
-	}
+	if (flags & I915_DISPATCH_SECURE) {
+		ret = i915_gem_obj_ggtt_pin(batch_obj, 0, 0);
+		if (ret)
+			goto err;
 
-	if (flags & I915_DISPATCH_SECURE)
 		exec_start += i915_gem_obj_ggtt_offset(batch_obj);
-	else
+	} else
 		exec_start += i915_gem_obj_offset(batch_obj, vm);
 
 	ret = i915_gem_execbuffer_move_to_gpu(ring, &eb->vmas);
 	if (ret)
-		goto err;
+		goto err_batch;
 
 	ret = i915_switch_context(ring, file, ctx);
 	if (ret)
-		goto err;
+		goto err_batch;
 
 	if (ring == &dev_priv->ring[RCS] &&
 	    mode != dev_priv->relative_constants_mode) {
 		ret = intel_ring_begin(ring, 4);
 		if (ret)
-				goto err;
+			goto err_batch;
 
 		intel_ring_emit(ring, MI_NOOP);
 		intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
@@ -1225,30 +1217,29 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
 	if (args->flags & I915_EXEC_GEN7_SOL_RESET) {
 		ret = i915_reset_gen7_sol_offsets(dev, ring);
 		if (ret)
-			goto err;
+			goto err_batch;
 	}
 
-
 	exec_len = args->batch_len;
 	if (cliprects) {
 		for (i = 0; i < args->num_cliprects; i++) {
 			ret = i915_emit_box(dev, &cliprects[i],
 					    args->DR1, args->DR4);
 			if (ret)
-				goto err;
+				goto err_batch;
 
 			ret = ring->dispatch_execbuffer(ring,
 							exec_start, exec_len,
 							flags);
 			if (ret)
-				goto err;
+				goto err_batch;
 		}
 	} else {
 		ret = ring->dispatch_execbuffer(ring,
 						exec_start, exec_len,
 						flags);
 		if (ret)
-			goto err;
+			goto err_batch;
 	}
 
 	trace_i915_gem_ring_dispatch(ring, intel_ring_get_seqno(ring), flags);
@@ -1256,6 +1247,9 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
 	i915_gem_execbuffer_move_to_active(&eb->vmas, ring);
 	i915_gem_execbuffer_retire_commands(dev, file, ring, batch_obj);
 
+err_batch:
+	if (flags & I915_DISPATCH_SECURE)
+		i915_gem_object_ggtt_unpin(batch_obj);
 err:
 	/* the request owns the ref now */
 	i915_gem_context_unreference(ctx);
diff --git a/drivers/gpu/drm/i915/i915_gem_tiling.c b/drivers/gpu/drm/i915/i915_gem_tiling.c
index eb993584aa6b..e49c662ca9c6 100644
--- a/drivers/gpu/drm/i915/i915_gem_tiling.c
+++ b/drivers/gpu/drm/i915/i915_gem_tiling.c
@@ -359,10 +359,9 @@ i915_gem_set_tiling(struct drm_device *dev, void *data,
 		 */
 
 		obj->map_and_fenceable =
-			!i915_gem_obj_ggtt_bound(obj) ||
-			(i915_gem_obj_ggtt_offset(obj) +
-			 obj->base.size <= dev_priv->gtt.mappable_end &&
-			 i915_gem_object_fence_ok(obj, args->tiling_mode));
+			i915_gem_obj_ggtt_bound(obj) &&
+			i915_gem_obj_ggtt_offset(obj) + obj->base.size <= dev_priv->gtt.mappable_end &&
+			i915_gem_object_fence_ok(obj, args->tiling_mode);
 
 		/* Rebind if we need a change of alignment */
 		if (!obj->map_and_fenceable) {
-- 
1.8.5.2

^ permalink raw reply related	[flat|nested] 11+ messages in thread

end of thread, other threads:[~2014-08-11 10:20 UTC | newest]

Thread overview: 11+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2014-08-09 16:37 [PATCH 1/3] drm/i915: Only perform set-to-gtt domain for objects bound to the global gtt Chris Wilson
2014-08-09 16:37 ` [PATCH 2/3] drm/i915: Fix obj->map_and_fenceable for ppgtt Chris Wilson
2014-08-09 19:29   ` Ben Widawsky
2014-08-10  6:55     ` Chris Wilson
2014-08-10  7:04       ` Chris Wilson
2014-08-10  7:06     ` [PATCH] drm/i915: Force CPU relocations if not GTT mapped Chris Wilson
2014-08-11 10:02       ` Daniel Vetter
2014-08-09 16:37 ` [PATCH 3/3] drm/i915: Remove fenced_gpu_access and pending_fenced_gpu_access Chris Wilson
2014-08-11 10:20   ` Daniel Vetter
2014-08-11  9:36 ` [PATCH 1/3] drm/i915: Only perform set-to-gtt domain for objects bound to the global gtt Daniel Vetter
  -- strict thread matches above, loose matches on Subject: below --
2014-01-01 14:00 [PATCH 1/3] drm/i915: Only bind each object rather than for every execbuffer Chris Wilson
2014-01-01 14:00 ` [PATCH 2/3] drm/i915: Fix obj->map_and_fenceable for ppgtt Chris Wilson

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.