All of lore.kernel.org
 help / color / mirror / Atom feed
* Partial VMA fixes
@ 2016-08-12 10:28 Chris Wilson
  2016-08-12 10:28 ` [PATCH 01/10] drm/i915: Move map-and-fenceable tracking to the VMA Chris Wilson
                   ` (10 more replies)
  0 siblings, 11 replies; 34+ messages in thread
From: Chris Wilson @ 2016-08-12 10:28 UTC (permalink / raw)
  To: intel-gfx

Almost there! After this will be the severe execbuf performance
regressions...
-Chris

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 34+ messages in thread

* [PATCH 01/10] drm/i915: Move map-and-fenceable tracking to the VMA
  2016-08-12 10:28 Partial VMA fixes Chris Wilson
@ 2016-08-12 10:28 ` Chris Wilson
  2016-08-15  8:03   ` Joonas Lahtinen
  2016-08-12 10:28 ` [PATCH 02/10] drm/i915/userptr: Make gup errors stickier Chris Wilson
                   ` (9 subsequent siblings)
  10 siblings, 1 reply; 34+ messages in thread
From: Chris Wilson @ 2016-08-12 10:28 UTC (permalink / raw)
  To: intel-gfx

By moving map-and-fenceable tracking from the object to the VMA, we gain
fine-grained tracking and the ability to track individual fences on the VMA
(subsequent patch).

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_drv.h            |  6 -----
 drivers/gpu/drm/i915/i915_gem.c            | 36 +++++++++++++-----------------
 drivers/gpu/drm/i915/i915_gem_execbuffer.c |  4 ++--
 drivers/gpu/drm/i915/i915_gem_fence.c      |  7 +++---
 drivers/gpu/drm/i915/i915_gem_gtt.c        |  2 +-
 drivers/gpu/drm/i915/i915_gem_gtt.h        | 10 +++++++--
 drivers/gpu/drm/i915/i915_gem_tiling.c     |  4 ++--
 drivers/gpu/drm/i915/intel_display.c       |  6 ++---
 8 files changed, 35 insertions(+), 40 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 74fd13330de9..1276ad1c791b 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2196,12 +2196,6 @@ struct drm_i915_gem_object {
 	unsigned int fence_dirty:1;
 
 	/**
-	 * Is the object at the current location in the gtt mappable and
-	 * fenceable? Used to avoid costly recalculations.
-	 */
-	unsigned int map_and_fenceable:1;
-
-	/**
 	 * Whether the current gtt mapping needs to be mappable (and isn't just
 	 * mappable by accident). Track pin and fault separate for a more
 	 * accurate mappable working set.
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 5ad08071b931..c6ae9333b770 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2843,8 +2843,7 @@ int i915_vma_unbind(struct i915_vma *vma)
 	GEM_BUG_ON(obj->bind_count == 0);
 	GEM_BUG_ON(!obj->pages);
 
-	if (i915_vma_is_ggtt(vma) &&
-	    vma->ggtt_view.type == I915_GGTT_VIEW_NORMAL) {
+	if (i915_vma_is_map_and_fenceable(vma)) {
 		i915_gem_object_finish_gtt(obj);
 
 		/* release the fence reg _after_ flushing */
@@ -2853,6 +2852,7 @@ int i915_vma_unbind(struct i915_vma *vma)
 			return ret;
 
 		__i915_vma_iounmap(vma);
+		vma->flags &= ~I915_VMA_CAN_FENCE;
 	}
 
 	if (likely(!vma->vm->closed)) {
@@ -2864,13 +2864,9 @@ int i915_vma_unbind(struct i915_vma *vma)
 	drm_mm_remove_node(&vma->node);
 	list_move_tail(&vma->vm_link, &vma->vm->unbound_list);
 
-	if (i915_vma_is_ggtt(vma)) {
-		if (vma->ggtt_view.type == I915_GGTT_VIEW_NORMAL) {
-			obj->map_and_fenceable = false;
-		} else if (vma->pages) {
-			sg_free_table(vma->pages);
-			kfree(vma->pages);
-		}
+	if (vma->ggtt_view.type != I915_GGTT_VIEW_NORMAL) {
+		sg_free_table(vma->pages);
+		kfree(vma->pages);
 	}
 	vma->pages = NULL;
 
@@ -3647,8 +3643,6 @@ i915_gem_ring_throttle(struct drm_device *dev, struct drm_file *file)
 static bool
 i915_vma_misplaced(struct i915_vma *vma, u64 size, u64 alignment, u64 flags)
 {
-	struct drm_i915_gem_object *obj = vma->obj;
-
 	if (!drm_mm_node_allocated(&vma->node))
 		return false;
 
@@ -3658,7 +3652,7 @@ i915_vma_misplaced(struct i915_vma *vma, u64 size, u64 alignment, u64 flags)
 	if (alignment && vma->node.start & (alignment - 1))
 		return true;
 
-	if (flags & PIN_MAPPABLE && !obj->map_and_fenceable)
+	if (flags & PIN_MAPPABLE && !i915_vma_is_map_and_fenceable(vma))
 		return true;
 
 	if (flags & PIN_OFFSET_BIAS &&
@@ -3680,10 +3674,10 @@ void __i915_vma_set_map_and_fenceable(struct i915_vma *vma)
 	u32 fence_size, fence_alignment;
 
 	fence_size = i915_gem_get_ggtt_size(dev_priv,
-					    obj->base.size,
+					    vma->size,
 					    i915_gem_object_get_tiling(obj));
 	fence_alignment = i915_gem_get_ggtt_alignment(dev_priv,
-						      obj->base.size,
+						      vma->size,
 						      i915_gem_object_get_tiling(obj),
 						      true);
 
@@ -3693,7 +3687,10 @@ void __i915_vma_set_map_and_fenceable(struct i915_vma *vma)
 	mappable = (vma->node.start + fence_size <=
 		    dev_priv->ggtt.mappable_end);
 
-	obj->map_and_fenceable = mappable && fenceable;
+	if (mappable && fenceable)
+		vma->flags |= I915_VMA_CAN_FENCE;
+	else
+		vma->flags &= ~I915_VMA_CAN_FENCE;
 }
 
 int __i915_vma_do_pin(struct i915_vma *vma,
@@ -3753,12 +3750,11 @@ i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj,
 
 		WARN(i915_vma_is_pinned(vma),
 		     "bo is already pinned in ggtt with incorrect alignment:"
-		     " offset=%08x, req.alignment=%llx, req.map_and_fenceable=%d,"
-		     " obj->map_and_fenceable=%d\n",
-		     i915_ggtt_offset(vma),
-		     alignment,
+		     " offset=%08x, req.alignment=%llx,"
+		     " req.map_and_fenceable=%d, vma->map_and_fenceable=%d\n",
+		     i915_ggtt_offset(vma), alignment,
 		     !!(flags & PIN_MAPPABLE),
-		     obj->map_and_fenceable);
+		     i915_vma_is_map_and_fenceable(vma));
 		ret = i915_vma_unbind(vma);
 		if (ret)
 			return ERR_PTR(ret);
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 98f9945c5b1d..572e6a4f9d7e 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -853,7 +853,6 @@ static bool
 eb_vma_misplaced(struct i915_vma *vma)
 {
 	struct drm_i915_gem_exec_object2 *entry = vma->exec_entry;
-	struct drm_i915_gem_object *obj = vma->obj;
 
 	WARN_ON(entry->flags & __EXEC_OBJECT_NEEDS_MAP &&
 		!i915_vma_is_ggtt(vma));
@@ -874,7 +873,8 @@ eb_vma_misplaced(struct i915_vma *vma)
 		return true;
 
 	/* avoid costly ping-pong once a batch bo ended up non-mappable */
-	if (entry->flags & __EXEC_OBJECT_NEEDS_MAP && !obj->map_and_fenceable)
+	if (entry->flags & __EXEC_OBJECT_NEEDS_MAP &&
+	    !i915_vma_is_map_and_fenceable(vma))
 		return !only_mappable_for_reloc(entry->flags);
 
 	if ((entry->flags & EXEC_OBJECT_SUPPORTS_48B_ADDRESS) == 0 &&
diff --git a/drivers/gpu/drm/i915/i915_gem_fence.c b/drivers/gpu/drm/i915/i915_gem_fence.c
index 911cea719e79..015f9228bc87 100644
--- a/drivers/gpu/drm/i915/i915_gem_fence.c
+++ b/drivers/gpu/drm/i915/i915_gem_fence.c
@@ -130,7 +130,9 @@ static void i915_write_fence_reg(struct drm_device *dev, int reg,
 		     !is_power_of_2(vma->node.size) ||
 		     (vma->node.start & (vma->node.size - 1)),
 		     "object 0x%08llx [fenceable? %d] not 1M or pot-size (0x%08llx) aligned\n",
-		     vma->node.start, obj->map_and_fenceable, vma->node.size);
+		     vma->node.start,
+		     i915_vma_is_map_and_fenceable(vma),
+		     vma->node.size);
 
 		if (tiling == I915_TILING_Y && HAS_128_BYTE_Y_TILING(dev))
 			tile_width = 128;
@@ -389,9 +391,6 @@ i915_gem_object_get_fence(struct drm_i915_gem_object *obj)
 			return 0;
 		}
 	} else if (enable) {
-		if (WARN_ON(!obj->map_and_fenceable))
-			return -EINVAL;
-
 		reg = i915_find_fence_reg(dev);
 		if (IS_ERR(reg))
 			return PTR_ERR(reg);
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index e173ccdebdc6..54063057eabf 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -3689,7 +3689,7 @@ void __iomem *i915_vma_pin_iomap(struct i915_vma *vma)
 	assert_rpm_wakelock_held(to_i915(vma->vm->dev));
 
 	lockdep_assert_held(&vma->vm->dev->struct_mutex);
-	if (WARN_ON(!vma->obj->map_and_fenceable))
+	if (WARN_ON(!i915_vma_is_map_and_fenceable(vma)))
 		return IO_ERR_PTR(-ENODEV);
 
 	GEM_BUG_ON(!i915_vma_is_ggtt(vma));
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h
index 79a08a050487..ed0a6c7a1883 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.h
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.h
@@ -197,8 +197,9 @@ struct i915_vma {
 #define I915_VMA_LOCAL_BIND	BIT(7)
 #define I915_VMA_BIND_MASK (I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND | I915_VMA_PIN_OVERFLOW)
 
-#define I915_VMA_GGTT	BIT(8)
-#define I915_VMA_CLOSED BIT(9)
+#define I915_VMA_GGTT		BIT(8)
+#define I915_VMA_CAN_FENCE	BIT(9)
+#define I915_VMA_CLOSED		BIT(10)
 
 	unsigned int active;
 	struct i915_gem_active last_read[I915_NUM_ENGINES];
@@ -239,6 +240,11 @@ static inline bool i915_vma_is_ggtt(const struct i915_vma *vma)
 	return vma->flags & I915_VMA_GGTT;
 }
 
+static inline bool i915_vma_is_map_and_fenceable(const struct i915_vma *vma)
+{
+	return vma->flags & I915_VMA_CAN_FENCE;
+}
+
 static inline bool i915_vma_is_closed(const struct i915_vma *vma)
 {
 	return vma->flags & I915_VMA_CLOSED;
diff --git a/drivers/gpu/drm/i915/i915_gem_tiling.c b/drivers/gpu/drm/i915/i915_gem_tiling.c
index bfefb63a55ef..af70d4460a9e 100644
--- a/drivers/gpu/drm/i915/i915_gem_tiling.c
+++ b/drivers/gpu/drm/i915/i915_gem_tiling.c
@@ -134,7 +134,7 @@ i915_gem_object_fence_prepare(struct drm_i915_gem_object *obj, int tiling_mode)
 	if (!vma)
 		return 0;
 
-	if (!obj->map_and_fenceable)
+	if (!i915_vma_is_map_and_fenceable(vma))
 		return 0;
 
 	if (IS_GEN3(dev_priv)) {
@@ -145,7 +145,7 @@ i915_gem_object_fence_prepare(struct drm_i915_gem_object *obj, int tiling_mode)
 			goto bad;
 	}
 
-	size = i915_gem_get_ggtt_size(dev_priv, obj->base.size, tiling_mode);
+	size = i915_gem_get_ggtt_size(dev_priv, vma->size, tiling_mode);
 	if (vma->node.size < size)
 		goto bad;
 
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index 31eaeedfad30..04a8900f68c1 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -2224,7 +2224,7 @@ intel_pin_and_fence_fb_obj(struct drm_framebuffer *fb, unsigned int rotation)
 	 * framebuffer compression.  For simplicity, we always install
 	 * a fence as the cost is not that onerous.
 	 */
-	if (view.type == I915_GGTT_VIEW_NORMAL) {
+	if (i915_vma_is_map_and_fenceable(vma)) {
 		ret = i915_gem_object_get_fence(obj);
 		if (ret == -EDEADLK) {
 			/*
@@ -2262,11 +2262,11 @@ void intel_unpin_fb_obj(struct drm_framebuffer *fb, unsigned int rotation)
 	WARN_ON(!mutex_is_locked(&obj->base.dev->struct_mutex));
 
 	intel_fill_fb_ggtt_view(&view, fb, rotation);
+	vma = i915_gem_object_to_ggtt(obj, &view);
 
-	if (view.type == I915_GGTT_VIEW_NORMAL)
+	if (i915_vma_is_map_and_fenceable(vma))
 		i915_gem_object_unpin_fence(obj);
 
-	vma = i915_gem_object_to_ggtt(obj, &view);
 	i915_gem_object_unpin_from_display_plane(vma);
 }
 
-- 
2.8.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 34+ messages in thread

* [PATCH 02/10] drm/i915/userptr: Make gup errors stickier
  2016-08-12 10:28 Partial VMA fixes Chris Wilson
  2016-08-12 10:28 ` [PATCH 01/10] drm/i915: Move map-and-fenceable tracking to the VMA Chris Wilson
@ 2016-08-12 10:28 ` Chris Wilson
  2016-08-15 11:16   ` Joonas Lahtinen
                     ` (2 more replies)
  2016-08-12 10:28 ` [PATCH 03/10] drm/i915: Move fence tracking from object to vma Chris Wilson
                   ` (8 subsequent siblings)
  10 siblings, 3 replies; 34+ messages in thread
From: Chris Wilson @ 2016-08-12 10:28 UTC (permalink / raw)
  To: intel-gfx

Keep any error reported by the gup_worker until we are notified that the
arena has changed (via the mmu-notifier). This has the importance of
making two consecutive calls to i915_gem_object_get_pages() reporting
the same error, and curtailing an loop of detecting a fault and requeueing
a gup_worker.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_gem_userptr.c | 17 +++++++----------
 1 file changed, 7 insertions(+), 10 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_userptr.c b/drivers/gpu/drm/i915/i915_gem_userptr.c
index 57218cca7e05..be54825ef3e8 100644
--- a/drivers/gpu/drm/i915/i915_gem_userptr.c
+++ b/drivers/gpu/drm/i915/i915_gem_userptr.c
@@ -542,8 +542,6 @@ __i915_gem_userptr_get_pages_worker(struct work_struct *_work)
 			}
 		}
 		obj->userptr.work = ERR_PTR(ret);
-		if (ret)
-			__i915_gem_userptr_set_active(obj, false);
 	}
 
 	obj->userptr.workers--;
@@ -628,15 +626,14 @@ i915_gem_userptr_get_pages(struct drm_i915_gem_object *obj)
 	 * to the vma (discard or cloning) which should prevent the more
 	 * egregious cases from causing harm.
 	 */
-	if (IS_ERR(obj->userptr.work)) {
-		/* active flag will have been dropped already by the worker */
-		ret = PTR_ERR(obj->userptr.work);
-		obj->userptr.work = NULL;
-		return ret;
-	}
-	if (obj->userptr.work)
+
+	if (obj->userptr.work) {
 		/* active flag should still be held for the pending work */
-		return -EAGAIN;
+		if (IS_ERR(obj->userptr.work))
+			return PTR_ERR(obj->userptr.work);
+		else
+			return -EAGAIN;
+	}
 
 	/* Let the mmu-notifier know that we have begun and need cancellation */
 	ret = __i915_gem_userptr_set_active(obj, true);
-- 
2.8.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 34+ messages in thread

* [PATCH 03/10] drm/i915: Move fence tracking from object to vma
  2016-08-12 10:28 Partial VMA fixes Chris Wilson
  2016-08-12 10:28 ` [PATCH 01/10] drm/i915: Move map-and-fenceable tracking to the VMA Chris Wilson
  2016-08-12 10:28 ` [PATCH 02/10] drm/i915/userptr: Make gup errors stickier Chris Wilson
@ 2016-08-12 10:28 ` Chris Wilson
  2016-08-15  9:18   ` Joonas Lahtinen
  2016-08-12 10:28 ` [PATCH 04/10] drm/i915: Choose partial chunksize based on tile row size Chris Wilson
                   ` (7 subsequent siblings)
  10 siblings, 1 reply; 34+ messages in thread
From: Chris Wilson @ 2016-08-12 10:28 UTC (permalink / raw)
  To: intel-gfx

In order to handle tiled partial GTT mmappings, we need to associate the
fence with an individual vma.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_debugfs.c        |  16 +-
 drivers/gpu/drm/i915/i915_drv.h            |  82 ++++--
 drivers/gpu/drm/i915/i915_gem.c            |  34 +--
 drivers/gpu/drm/i915/i915_gem_execbuffer.c |  20 +-
 drivers/gpu/drm/i915/i915_gem_fence.c      | 404 +++++++++++------------------
 drivers/gpu/drm/i915/i915_gem_gtt.c        |   2 +
 drivers/gpu/drm/i915/i915_gem_gtt.h        |   8 +
 drivers/gpu/drm/i915/i915_gem_tiling.c     |  67 +++--
 drivers/gpu/drm/i915/i915_gpu_error.c      |   2 +-
 drivers/gpu/drm/i915/intel_display.c       |  57 ++--
 drivers/gpu/drm/i915/intel_fbc.c           |  11 +-
 drivers/gpu/drm/i915/intel_overlay.c       |   2 +-
 12 files changed, 319 insertions(+), 386 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index 61e12a0f08d4..02d517317f3f 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -152,11 +152,9 @@ describe_obj(struct seq_file *m, struct drm_i915_gem_object *obj)
 		seq_printf(m, "%x ",
 			   i915_gem_active_get_seqno(&obj->last_read[id],
 						     &obj->base.dev->struct_mutex));
-	seq_printf(m, "] %x %x%s%s%s",
+	seq_printf(m, "] %x %s%s%s",
 		   i915_gem_active_get_seqno(&obj->last_write,
 					     &obj->base.dev->struct_mutex),
-		   i915_gem_active_get_seqno(&obj->last_fence,
-					     &obj->base.dev->struct_mutex),
 		   i915_cache_level_str(to_i915(obj->base.dev), obj->cache_level),
 		   obj->dirty ? " dirty" : "",
 		   obj->madv == I915_MADV_DONTNEED ? " purgeable" : "");
@@ -169,8 +167,6 @@ describe_obj(struct seq_file *m, struct drm_i915_gem_object *obj)
 	seq_printf(m, " (pinned x %d)", pin_count);
 	if (obj->pin_display)
 		seq_printf(m, " (display)");
-	if (obj->fence_reg != I915_FENCE_REG_NONE)
-		seq_printf(m, " (fence: %d)", obj->fence_reg);
 	list_for_each_entry(vma, &obj->vma_list, obj_link) {
 		if (!drm_mm_node_allocated(&vma->node))
 			continue;
@@ -180,6 +176,10 @@ describe_obj(struct seq_file *m, struct drm_i915_gem_object *obj)
 			   vma->node.start, vma->node.size);
 		if (i915_vma_is_ggtt(vma))
 			seq_printf(m, ", type: %u", vma->ggtt_view.type);
+		if (vma->fence)
+			seq_printf(m, " , fence: %d%s",
+				   vma->fence->id,
+				   i915_gem_active_isset(&vma->last_fence) ? "*" : "");
 		seq_puts(m, ")");
 	}
 	if (obj->stolen)
@@ -936,14 +936,14 @@ static int i915_gem_fence_regs_info(struct seq_file *m, void *data)
 
 	seq_printf(m, "Total fences = %d\n", dev_priv->num_fence_regs);
 	for (i = 0; i < dev_priv->num_fence_regs; i++) {
-		struct drm_i915_gem_object *obj = dev_priv->fence_regs[i].obj;
+		struct i915_vma *vma = dev_priv->fence_regs[i].vma;
 
 		seq_printf(m, "Fence %d, pin count = %d, object = ",
 			   i, dev_priv->fence_regs[i].pin_count);
-		if (obj == NULL)
+		if (!vma)
 			seq_puts(m, "unused");
 		else
-			describe_obj(m, obj);
+			describe_obj(m, vma->obj);
 		seq_putc(m, '\n');
 	}
 
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 1276ad1c791b..7c188883bdc1 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -455,15 +455,21 @@ struct intel_opregion {
 struct intel_overlay;
 struct intel_overlay_error_state;
 
-#define I915_FENCE_REG_NONE -1
-#define I915_MAX_NUM_FENCES 32
-/* 32 fences + sign bit for FENCE_REG_NONE */
-#define I915_MAX_NUM_FENCE_BITS 6
-
 struct drm_i915_fence_reg {
 	struct list_head lru_list;
-	struct drm_i915_gem_object *obj;
+	struct drm_i915_private *i915;
+	struct i915_vma *vma;
 	int pin_count;
+	int id;
+	/**
+	 * Whether the tiling parameters for the currently
+	 * associated fence register have changed. Note that
+	 * for the purposes of tracking tiling changes we also
+	 * treat the unfenced register, the register slot that
+	 * the object occupies whilst it executes a fenced
+	 * command (such as BLT on gen2/3), as a "fence".
+	 */
+	bool dirty;
 };
 
 struct sdvo_device_mapping {
@@ -2175,27 +2181,11 @@ struct drm_i915_gem_object {
 	unsigned int dirty:1;
 
 	/**
-	 * Fence register bits (if any) for this object.  Will be set
-	 * as needed when mapped into the GTT.
-	 * Protected by dev->struct_mutex.
-	 */
-	signed int fence_reg:I915_MAX_NUM_FENCE_BITS;
-
-	/**
 	 * Advice: are the backing pages purgeable?
 	 */
 	unsigned int madv:2;
 
 	/**
-	 * Whether the tiling parameters for the currently associated fence
-	 * register have changed. Note that for the purposes of tracking
-	 * tiling changes we also treat the unfenced register, the register
-	 * slot that the object occupies whilst it executes a fenced
-	 * command (such as BLT on gen2/3), as a "fence".
-	 */
-	unsigned int fence_dirty:1;
-
-	/**
 	 * Whether the current gtt mapping needs to be mappable (and isn't just
 	 * mappable by accident). Track pin and fault separate for a more
 	 * accurate mappable working set.
@@ -2243,7 +2233,6 @@ struct drm_i915_gem_object {
 	 */
 	struct i915_gem_active last_read[I915_NUM_ENGINES];
 	struct i915_gem_active last_write;
-	struct i915_gem_active last_fence;
 
 	/** References from framebuffers, locks out tiling changes. */
 	unsigned long framebuffer_references;
@@ -3331,11 +3320,50 @@ i915_gem_object_ggtt_offset(struct drm_i915_gem_object *o,
 }
 
 /* i915_gem_fence.c */
-int __must_check i915_gem_object_get_fence(struct drm_i915_gem_object *obj);
-int __must_check i915_gem_object_put_fence(struct drm_i915_gem_object *obj);
+int __must_check i915_vma_get_fence(struct i915_vma *vma);
+int __must_check i915_vma_put_fence(struct i915_vma *vma);
+
+/**
+ * i915_vma_pin_fence - pin fencing state
+ * @vma: vma to pin fencing for
+ *
+ * This pins the fencing state (whether tiled or untiled) to make sure the
+ * vma (and its object) is ready to be used as a scanout target. Fencing
+ * status must be synchronize first by calling i915_vma_get_fence():
+ *
+ * The resulting fence pin reference must be released again with
+ * i915_vma_unpin_fence().
+ *
+ * Returns:
+ *
+ * True if the vma has a fence, false otherwise.
+ */
+static inline bool
+i915_vma_pin_fence(struct i915_vma *vma)
+{
+	if (vma->fence) {
+		vma->fence->pin_count++;
+		return true;
+	} else
+		return false;
+}
 
-bool i915_gem_object_pin_fence(struct drm_i915_gem_object *obj);
-void i915_gem_object_unpin_fence(struct drm_i915_gem_object *obj);
+/**
+ * i915_vma_unpin_fence - unpin fencing state
+ * @vma: vma to unpin fencing for
+ *
+ * This releases the fence pin reference acquired through
+ * i915_vma_pin_fence. It will handle both objects with and without an
+ * attached fence correctly, callers do not need to distinguish this.
+ */
+static inline void
+i915_vma_unpin_fence(struct i915_vma *vma)
+{
+	if (vma->fence) {
+		GEM_BUG_ON(vma->fence->pin_count <= 0);
+		vma->fence->pin_count--;
+	}
+}
 
 void i915_gem_restore_fences(struct drm_device *dev);
 
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index c6ae9333b770..b386efaa5aa2 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -832,7 +832,7 @@ i915_gem_gtt_pread(struct drm_device *dev,
 	} else {
 		node.start = i915_ggtt_offset(vma);
 		node.allocated = false;
-		ret = i915_gem_object_put_fence(obj);
+		ret = i915_vma_put_fence(vma);
 		if (ret)
 			goto out_unpin;
 	}
@@ -1131,15 +1131,11 @@ i915_gem_gtt_pwrite_fast(struct drm_i915_private *i915,
 	} else {
 		node.start = i915_ggtt_offset(vma);
 		node.allocated = false;
-		ret = i915_gem_object_put_fence(obj);
+		ret = i915_vma_put_fence(vma);
 		if (ret)
 			goto out_unpin;
 	}
 
-	ret = i915_gem_object_set_to_gtt_domain(obj, true);
-	if (ret)
-		goto out_unpin;
-
 	intel_fb_obj_invalidate(obj, ORIGIN_GTT);
 	obj->dirty = true;
 
@@ -1736,7 +1732,7 @@ int i915_gem_fault(struct vm_area_struct *area, struct vm_fault *vmf)
 	if (ret)
 		goto err_unpin;
 
-	ret = i915_gem_object_get_fence(obj);
+	ret = i915_vma_get_fence(vma);
 	if (ret)
 		goto err_unpin;
 
@@ -2847,7 +2843,7 @@ int i915_vma_unbind(struct i915_vma *vma)
 		i915_gem_object_finish_gtt(obj);
 
 		/* release the fence reg _after_ flushing */
-		ret = i915_gem_object_put_fence(obj);
+		ret = i915_vma_put_fence(vma);
 		if (ret)
 			return ret;
 
@@ -3328,9 +3324,11 @@ restart:
 			 * dropped the fence as all snoopable access is
 			 * supposed to be linear.
 			 */
-			ret = i915_gem_object_put_fence(obj);
-			if (ret)
-				return ret;
+			list_for_each_entry(vma, &obj->vma_list, obj_link) {
+				ret = i915_vma_put_fence(vma);
+				if (ret)
+					return ret;
+			}
 		} else {
 			/* We either have incoherent backing store and
 			 * so no GTT access or the architecture is fully
@@ -3988,14 +3986,12 @@ void i915_gem_object_init(struct drm_i915_gem_object *obj,
 				    i915_gem_object_retire__read);
 	init_request_active(&obj->last_write,
 			    i915_gem_object_retire__write);
-	init_request_active(&obj->last_fence, NULL);
 	INIT_LIST_HEAD(&obj->obj_exec_link);
 	INIT_LIST_HEAD(&obj->vma_list);
 	INIT_LIST_HEAD(&obj->batch_pool_link);
 
 	obj->ops = ops;
 
-	obj->fence_reg = I915_FENCE_REG_NONE;
 	obj->madv = I915_MADV_WILLNEED;
 
 	i915_gem_info_add_obj(to_i915(obj->base.dev), obj->base.size);
@@ -4425,6 +4421,7 @@ void
 i915_gem_load_init_fences(struct drm_i915_private *dev_priv)
 {
 	struct drm_device *dev = &dev_priv->drm;
+	int i;
 
 	if (INTEL_INFO(dev_priv)->gen >= 7 && !IS_VALLEYVIEW(dev_priv) &&
 	    !IS_CHERRYVIEW(dev_priv))
@@ -4440,6 +4437,13 @@ i915_gem_load_init_fences(struct drm_i915_private *dev_priv)
 				I915_READ(vgtif_reg(avail_rs.fence_num));
 
 	/* Initialize fence registers to zero */
+	for (i = 0; i < dev_priv->num_fence_regs; i++) {
+		struct drm_i915_fence_reg *fence = &dev_priv->fence_regs[i];
+
+		fence->i915 = dev_priv;
+		fence->id = i;
+		list_add_tail(&fence->lru_list, &dev_priv->mm.fence_list);
+	}
 	i915_gem_restore_fences(dev);
 
 	i915_gem_detect_bit_6_swizzle(dev);
@@ -4475,8 +4479,6 @@ i915_gem_load_init(struct drm_device *dev)
 	INIT_LIST_HEAD(&dev_priv->mm.fence_list);
 	for (i = 0; i < I915_NUM_ENGINES; i++)
 		init_engine_lists(&dev_priv->engine[i]);
-	for (i = 0; i < I915_MAX_NUM_FENCES; i++)
-		INIT_LIST_HEAD(&dev_priv->fence_regs[i].lru_list);
 	INIT_DELAYED_WORK(&dev_priv->gt.retire_work,
 			  i915_gem_retire_work_handler);
 	INIT_DELAYED_WORK(&dev_priv->gt.idle_work,
@@ -4486,8 +4488,6 @@ i915_gem_load_init(struct drm_device *dev)
 
 	dev_priv->relative_constants_mode = I915_EXEC_CONSTANTS_REL_GENERAL;
 
-	INIT_LIST_HEAD(&dev_priv->mm.fence_list);
-
 	init_waitqueue_head(&dev_priv->pending_flip_queue);
 
 	dev_priv->mm.interruptible = true;
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 572e6a4f9d7e..c012a0d94878 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -250,7 +250,6 @@ static void
 i915_gem_execbuffer_unreserve_vma(struct i915_vma *vma)
 {
 	struct drm_i915_gem_exec_object2 *entry;
-	struct drm_i915_gem_object *obj = vma->obj;
 
 	if (!drm_mm_node_allocated(&vma->node))
 		return;
@@ -258,7 +257,7 @@ i915_gem_execbuffer_unreserve_vma(struct i915_vma *vma)
 	entry = vma->exec_entry;
 
 	if (entry->flags & __EXEC_OBJECT_HAS_FENCE)
-		i915_gem_object_unpin_fence(obj);
+		i915_vma_unpin_fence(vma);
 
 	if (entry->flags & __EXEC_OBJECT_HAS_PIN)
 		__i915_vma_unpin(vma);
@@ -451,7 +450,7 @@ static void *reloc_iomap(struct drm_i915_gem_object *obj,
 			if (ret)
 				return ERR_PTR(ret);
 		} else {
-			ret = i915_gem_object_put_fence(obj);
+			ret = i915_vma_put_fence(vma);
 			if (ret) {
 				i915_vma_unpin(vma);
 				return ERR_PTR(ret);
@@ -807,11 +806,11 @@ i915_gem_execbuffer_reserve_vma(struct i915_vma *vma,
 	entry->flags |= __EXEC_OBJECT_HAS_PIN;
 
 	if (entry->flags & EXEC_OBJECT_NEEDS_FENCE) {
-		ret = i915_gem_object_get_fence(obj);
+		ret = i915_vma_get_fence(vma);
 		if (ret)
 			return ret;
 
-		if (i915_gem_object_pin_fence(obj))
+		if (i915_vma_pin_fence(vma))
 			entry->flags |= __EXEC_OBJECT_HAS_FENCE;
 	}
 
@@ -1308,15 +1307,8 @@ void i915_vma_move_to_active(struct i915_vma *vma,
 		obj->base.write_domain &= ~I915_GEM_GPU_DOMAINS;
 	}
 
-	if (flags & EXEC_OBJECT_NEEDS_FENCE) {
-		i915_gem_active_set(&obj->last_fence, req);
-		if (flags & __EXEC_OBJECT_HAS_FENCE) {
-			struct drm_i915_private *dev_priv = req->i915;
-
-			list_move_tail(&dev_priv->fence_regs[obj->fence_reg].lru_list,
-				       &dev_priv->mm.fence_list);
-		}
-	}
+	if (flags & EXEC_OBJECT_NEEDS_FENCE)
+		i915_gem_active_set(&vma->last_fence, req);
 
 	i915_vma_set_active(vma, idx);
 	i915_gem_active_set(&vma->last_read[idx], req);
diff --git a/drivers/gpu/drm/i915/i915_gem_fence.c b/drivers/gpu/drm/i915/i915_gem_fence.c
index 015f9228bc87..bdc946dbb36d 100644
--- a/drivers/gpu/drm/i915/i915_gem_fence.c
+++ b/drivers/gpu/drm/i915/i915_gem_fence.c
@@ -55,74 +55,70 @@
  * CPU ptes into GTT mmaps (not the GTT ptes themselves) as needed.
  */
 
-static void i965_write_fence_reg(struct drm_device *dev, int reg,
-				 struct drm_i915_gem_object *obj)
+static void i965_write_fence_reg(struct drm_i915_fence_reg *fence,
+				 struct i915_vma *vma)
 {
-	struct drm_i915_private *dev_priv = to_i915(dev);
 	i915_reg_t fence_reg_lo, fence_reg_hi;
 	int fence_pitch_shift;
+	u64 val;
 
-	if (INTEL_INFO(dev)->gen >= 6) {
-		fence_reg_lo = FENCE_REG_GEN6_LO(reg);
-		fence_reg_hi = FENCE_REG_GEN6_HI(reg);
+	if (INTEL_INFO(fence->i915)->gen >= 6) {
+		fence_reg_lo = FENCE_REG_GEN6_LO(fence->id);
+		fence_reg_hi = FENCE_REG_GEN6_HI(fence->id);
 		fence_pitch_shift = GEN6_FENCE_PITCH_SHIFT;
+
 	} else {
-		fence_reg_lo = FENCE_REG_965_LO(reg);
-		fence_reg_hi = FENCE_REG_965_HI(reg);
+		fence_reg_lo = FENCE_REG_965_LO(fence->id);
+		fence_reg_hi = FENCE_REG_965_HI(fence->id);
 		fence_pitch_shift = I965_FENCE_PITCH_SHIFT;
 	}
 
-	/* To w/a incoherency with non-atomic 64-bit register updates,
-	 * we split the 64-bit update into two 32-bit writes. In order
-	 * for a partial fence not to be evaluated between writes, we
-	 * precede the update with write to turn off the fence register,
-	 * and only enable the fence as the last step.
-	 *
-	 * For extra levels of paranoia, we make sure each step lands
-	 * before applying the next step.
-	 */
-	I915_WRITE(fence_reg_lo, 0);
-	POSTING_READ(fence_reg_lo);
-
-	if (obj) {
-		struct i915_vma *vma = i915_gem_object_to_ggtt(obj, NULL);
-		unsigned int tiling = i915_gem_object_get_tiling(obj);
-		unsigned int stride = i915_gem_object_get_stride(obj);
-		u64 size = vma->node.size;
-		u32 row_size = stride * (tiling == I915_TILING_Y ? 32 : 8);
-		u64 val;
-
-		/* Adjust fence size to match tiled area */
-		size = rounddown(size, row_size);
+	if (vma) {
+		unsigned int tiling = i915_gem_object_get_tiling(vma->obj);
+		unsigned int tiling_y = tiling == I915_TILING_Y;
+		unsigned int stride = i915_gem_object_get_stride(vma->obj);
+		u32 row_size = stride * (tiling_y ? 32 : 8);
+		u32 size = rounddown(vma->node.size, row_size);
 
 		val = ((vma->node.start + size - 4096) & 0xfffff000) << 32;
 		val |= vma->node.start & 0xfffff000;
 		val |= (u64)((stride / 128) - 1) << fence_pitch_shift;
-		if (tiling == I915_TILING_Y)
+		if (tiling_y)
 			val |= 1 << I965_FENCE_TILING_Y_SHIFT;
 		val |= I965_FENCE_REG_VALID;
+	} else
+		val = 0;
+
+	if (1) {
+		struct drm_i915_private *dev_priv = fence->i915;
 
-		I915_WRITE(fence_reg_hi, val >> 32);
-		POSTING_READ(fence_reg_hi);
+		/* To w/a incoherency with non-atomic 64-bit register updates,
+		 * we split the 64-bit update into two 32-bit writes. In order
+		 * for a partial fence not to be evaluated between writes, we
+		 * precede the update with write to turn off the fence register,
+		 * and only enable the fence as the last step.
+		 *
+		 * For extra levels of paranoia, we make sure each step lands
+		 * before applying the next step.
+		 */
+		I915_WRITE(fence_reg_lo, 0);
+		POSTING_READ(fence_reg_lo);
 
-		I915_WRITE(fence_reg_lo, val);
+		I915_WRITE(fence_reg_hi, upper_32_bits(val));
+		I915_WRITE(fence_reg_lo, lower_32_bits(val));
 		POSTING_READ(fence_reg_lo);
-	} else {
-		I915_WRITE(fence_reg_hi, 0);
-		POSTING_READ(fence_reg_hi);
 	}
 }
 
-static void i915_write_fence_reg(struct drm_device *dev, int reg,
-				 struct drm_i915_gem_object *obj)
+static void i915_write_fence_reg(struct drm_i915_fence_reg *fence,
+				 struct i915_vma *vma)
 {
-	struct drm_i915_private *dev_priv = to_i915(dev);
 	u32 val;
 
-	if (obj) {
-		struct i915_vma *vma = i915_gem_object_to_ggtt(obj, NULL);
-		unsigned int tiling = i915_gem_object_get_tiling(obj);
-		unsigned int stride = i915_gem_object_get_stride(obj);
+	if (vma) {
+		unsigned int tiling = i915_gem_object_get_tiling(vma->obj);
+		unsigned int tiling_y = tiling == I915_TILING_Y;
+		unsigned int stride = i915_gem_object_get_stride(vma->obj);
 		int pitch_val;
 		int tile_width;
 
@@ -134,7 +130,7 @@ static void i915_write_fence_reg(struct drm_device *dev, int reg,
 		     i915_vma_is_map_and_fenceable(vma),
 		     vma->node.size);
 
-		if (tiling == I915_TILING_Y && HAS_128_BYTE_Y_TILING(dev))
+		if (tiling_y && HAS_128_BYTE_Y_TILING(fence->i915))
 			tile_width = 128;
 		else
 			tile_width = 512;
@@ -144,7 +140,7 @@ static void i915_write_fence_reg(struct drm_device *dev, int reg,
 		pitch_val = ffs(pitch_val) - 1;
 
 		val = vma->node.start;
-		if (tiling == I915_TILING_Y)
+		if (tiling_y)
 			val |= 1 << I830_FENCE_TILING_Y_SHIFT;
 		val |= I915_FENCE_SIZE_BITS(vma->node.size);
 		val |= pitch_val << I830_FENCE_PITCH_SHIFT;
@@ -152,20 +148,23 @@ static void i915_write_fence_reg(struct drm_device *dev, int reg,
 	} else
 		val = 0;
 
-	I915_WRITE(FENCE_REG(reg), val);
-	POSTING_READ(FENCE_REG(reg));
+	if (1) {
+		struct drm_i915_private *dev_priv = fence->i915;
+		i915_reg_t reg = FENCE_REG(fence->id);
+
+		I915_WRITE(reg, val);
+		POSTING_READ(reg);
+	}
 }
 
-static void i830_write_fence_reg(struct drm_device *dev, int reg,
-				struct drm_i915_gem_object *obj)
+static void i830_write_fence_reg(struct drm_i915_fence_reg *fence,
+				 struct i915_vma *vma)
 {
-	struct drm_i915_private *dev_priv = to_i915(dev);
 	u32 val;
 
-	if (obj) {
-		struct i915_vma *vma = i915_gem_object_to_ggtt(obj, NULL);
-		unsigned int tiling = i915_gem_object_get_tiling(obj);
-		unsigned int stride = i915_gem_object_get_stride(obj);
+	if (vma) {
+		unsigned int tiling = i915_gem_object_get_tiling(vma->obj);
+		unsigned int stride = i915_gem_object_get_stride(vma->obj);
 		u32 pitch_val;
 
 		WARN((vma->node.start & ~I830_FENCE_START_MASK) ||
@@ -186,96 +185,95 @@ static void i830_write_fence_reg(struct drm_device *dev, int reg,
 	} else
 		val = 0;
 
-	I915_WRITE(FENCE_REG(reg), val);
-	POSTING_READ(FENCE_REG(reg));
-}
+	if (1) {
+		struct drm_i915_private *dev_priv = fence->i915;
+		i915_reg_t reg = FENCE_REG(fence->id);
 
-inline static bool i915_gem_object_needs_mb(struct drm_i915_gem_object *obj)
-{
-	return obj && obj->base.read_domains & I915_GEM_DOMAIN_GTT;
+		I915_WRITE(reg, val);
+		POSTING_READ(reg);
+	}
 }
 
-static void i915_gem_write_fence(struct drm_device *dev, int reg,
-				 struct drm_i915_gem_object *obj)
+static void fence_write(struct drm_i915_fence_reg *fence,
+			struct i915_vma *vma)
 {
-	struct drm_i915_private *dev_priv = to_i915(dev);
-
-	/* Ensure that all CPU reads are completed before installing a fence
-	 * and all writes before removing the fence.
+	/* Previous access through the fence register is marshalled by
+	 * the mb() inside the fault handlers (i915_gem_release_mmaps)
+	 * and explicitly managed for internal users.
 	 */
-	if (i915_gem_object_needs_mb(dev_priv->fence_regs[reg].obj))
-		mb();
-
-	WARN(obj &&
-	     (!i915_gem_object_get_stride(obj) ||
-	      !i915_gem_object_get_tiling(obj)),
-	     "bogus fence setup with stride: 0x%x, tiling mode: %i\n",
-	     i915_gem_object_get_stride(obj),
-	     i915_gem_object_get_tiling(obj));
-
-	if (IS_GEN2(dev))
-		i830_write_fence_reg(dev, reg, obj);
-	else if (IS_GEN3(dev))
-		i915_write_fence_reg(dev, reg, obj);
-	else if (INTEL_INFO(dev)->gen >= 4)
-		i965_write_fence_reg(dev, reg, obj);
-
-	/* And similarly be paranoid that no direct access to this region
-	 * is reordered to before the fence is installed.
+
+	if (IS_GEN2(fence->i915))
+		i830_write_fence_reg(fence, vma);
+	else if (IS_GEN3(fence->i915))
+		i915_write_fence_reg(fence, vma);
+	else
+		i965_write_fence_reg(fence, vma);
+
+	/* Access through the fenced region afterwards is
+	 * ordered by the posting reads whilst writing the registers.
 	 */
-	if (i915_gem_object_needs_mb(obj))
-		mb();
-}
 
-static inline int fence_number(struct drm_i915_private *dev_priv,
-			       struct drm_i915_fence_reg *fence)
-{
-	return fence - dev_priv->fence_regs;
+	fence->dirty = false;
 }
 
-static void i915_gem_object_update_fence(struct drm_i915_gem_object *obj,
-					 struct drm_i915_fence_reg *fence,
-					 bool enable)
+static int fence_update(struct drm_i915_fence_reg *fence,
+			struct i915_vma *vma)
 {
-	struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
-	int reg = fence_number(dev_priv, fence);
+	int ret;
 
-	i915_gem_write_fence(obj->base.dev, reg, enable ? obj : NULL);
+	if (vma) {
+		if (!i915_vma_is_map_and_fenceable(vma))
+			return -EINVAL;
 
-	if (enable) {
-		obj->fence_reg = reg;
-		fence->obj = obj;
-		list_move_tail(&fence->lru_list, &dev_priv->mm.fence_list);
-	} else {
-		obj->fence_reg = I915_FENCE_REG_NONE;
-		fence->obj = NULL;
-		list_del_init(&fence->lru_list);
+		if (WARN(!i915_gem_object_get_stride(vma->obj) ||
+			 !i915_gem_object_get_tiling(vma->obj),
+			 "bogus fence setup with stride: 0x%x, tiling mode: %i\n",
+			 i915_gem_object_get_stride(vma->obj),
+			 i915_gem_object_get_tiling(vma->obj)))
+			return -EINVAL;
+
+		ret = i915_gem_active_retire(&vma->last_fence,
+					     &vma->obj->base.dev->struct_mutex);
+		if (ret)
+			return ret;
 	}
-	obj->fence_dirty = false;
-}
 
-static inline void i915_gem_object_fence_lost(struct drm_i915_gem_object *obj)
-{
-	if (i915_gem_object_is_tiled(obj))
-		i915_gem_release_mmap(obj);
+	if (fence->vma) {
+		ret = i915_gem_active_retire(&fence->vma->last_fence,
+				      &fence->vma->obj->base.dev->struct_mutex);
+		if (ret)
+			return ret;
+	}
 
-	/* As we do not have an associated fence register, we will force
-	 * a tiling change if we ever need to acquire one.
-	 */
-	obj->fence_dirty = false;
-	obj->fence_reg = I915_FENCE_REG_NONE;
-}
+	if (fence->vma && fence->vma != vma) {
+		/* Ensure that all userspace CPU access is completed before
+		 * stealing the fence.
+		 */
+		i915_gem_release_mmap(fence->vma->obj);
 
-static int
-i915_gem_object_wait_fence(struct drm_i915_gem_object *obj)
-{
-	return i915_gem_active_retire(&obj->last_fence,
-				      &obj->base.dev->struct_mutex);
+		fence->vma->fence = NULL;
+		fence->vma = NULL;
+
+		list_move(&fence->lru_list, &fence->i915->mm.fence_list);
+	}
+
+	fence_write(fence, vma);
+
+	if (vma) {
+		if (fence->vma != vma) {
+			vma->fence = fence;
+			fence->vma = vma;
+		}
+
+		list_move_tail(&fence->lru_list, &fence->i915->mm.fence_list);
+	}
+
+	return 0;
 }
 
 /**
- * i915_gem_object_put_fence - force-remove fence for an object
- * @obj: object to map through a fence reg
+ * i915_vma_put_fence - force-remove fence for a VMA
+ * @vma: vma to map linearly (not through a fence reg)
  *
  * This function force-removes any fence from the given object, which is useful
  * if the kernel wants to do untiled GTT access.
@@ -285,70 +283,40 @@ i915_gem_object_wait_fence(struct drm_i915_gem_object *obj)
  * 0 on success, negative error code on failure.
  */
 int
-i915_gem_object_put_fence(struct drm_i915_gem_object *obj)
+i915_vma_put_fence(struct i915_vma *vma)
 {
-	struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
-	struct drm_i915_fence_reg *fence;
-	int ret;
+	struct drm_i915_fence_reg *fence = vma->fence;
 
-	ret = i915_gem_object_wait_fence(obj);
-	if (ret)
-		return ret;
-
-	if (obj->fence_reg == I915_FENCE_REG_NONE)
+	if (!fence)
 		return 0;
 
-	fence = &dev_priv->fence_regs[obj->fence_reg];
-
 	if (WARN_ON(fence->pin_count))
 		return -EBUSY;
 
-	i915_gem_object_fence_lost(obj);
-	i915_gem_object_update_fence(obj, fence, false);
-
-	return 0;
+	return fence_update(fence, NULL);
 }
 
-static struct drm_i915_fence_reg *
-i915_find_fence_reg(struct drm_device *dev)
+static struct drm_i915_fence_reg *fence_find(struct drm_i915_private *dev_priv)
 {
-	struct drm_i915_private *dev_priv = to_i915(dev);
-	struct drm_i915_fence_reg *reg, *avail;
-	int i;
-
-	/* First try to find a free reg */
-	avail = NULL;
-	for (i = 0; i < dev_priv->num_fence_regs; i++) {
-		reg = &dev_priv->fence_regs[i];
-		if (!reg->obj)
-			return reg;
-
-		if (!reg->pin_count)
-			avail = reg;
-	}
-
-	if (avail == NULL)
-		goto deadlock;
+	struct drm_i915_fence_reg *fence;
 
-	/* None available, try to steal one or wait for a user to finish */
-	list_for_each_entry(reg, &dev_priv->mm.fence_list, lru_list) {
-		if (reg->pin_count)
+	list_for_each_entry(fence, &dev_priv->mm.fence_list, lru_list) {
+		if (fence->pin_count)
 			continue;
 
-		return reg;
+		return fence;
 	}
 
-deadlock:
 	/* Wait for completion of pending flips which consume fences */
-	if (intel_has_pending_fb_unpin(dev))
+	if (intel_has_pending_fb_unpin(&dev_priv->drm))
 		return ERR_PTR(-EAGAIN);
 
 	return ERR_PTR(-EDEADLK);
 }
 
 /**
- * i915_gem_object_get_fence - set up fencing for an object
- * @obj: object to map through a fence reg
+ * i915_vma_get_fence - set up fencing for a vma
+ * @vma: vma to map through a fence reg
  *
  * When mapping objects through the GTT, userspace wants to be able to write
  * to them without having to worry about swizzling if the object is tiled.
@@ -365,94 +333,27 @@ deadlock:
  * 0 on success, negative error code on failure.
  */
 int
-i915_gem_object_get_fence(struct drm_i915_gem_object *obj)
+i915_vma_get_fence(struct i915_vma *vma)
 {
-	struct drm_device *dev = obj->base.dev;
-	struct drm_i915_private *dev_priv = to_i915(dev);
-	bool enable = i915_gem_object_is_tiled(obj);
-	struct drm_i915_fence_reg *reg;
-	int ret;
-
-	/* Have we updated the tiling parameters upon the object and so
-	 * will need to serialise the write to the associated fence register?
-	 */
-	if (obj->fence_dirty) {
-		ret = i915_gem_object_wait_fence(obj);
-		if (ret)
-			return ret;
-	}
+	struct drm_i915_fence_reg *fence;
+	struct i915_vma *set = i915_gem_object_is_tiled(vma->obj) ? vma : NULL;
 
 	/* Just update our place in the LRU if our fence is getting reused. */
-	if (obj->fence_reg != I915_FENCE_REG_NONE) {
-		reg = &dev_priv->fence_regs[obj->fence_reg];
-		if (!obj->fence_dirty) {
-			list_move_tail(&reg->lru_list,
-				       &dev_priv->mm.fence_list);
+	if (vma->fence) {
+		fence = vma->fence;
+		if (!fence->dirty) {
+			list_move_tail(&fence->lru_list,
+				       &fence->i915->mm.fence_list);
 			return 0;
 		}
-	} else if (enable) {
-		reg = i915_find_fence_reg(dev);
-		if (IS_ERR(reg))
-			return PTR_ERR(reg);
-
-		if (reg->obj) {
-			struct drm_i915_gem_object *old = reg->obj;
-
-			ret = i915_gem_object_wait_fence(old);
-			if (ret)
-				return ret;
-
-			i915_gem_object_fence_lost(old);
-		}
+	} else if (set) {
+		fence = fence_find(to_i915(vma->vm->dev));
+		if (IS_ERR(fence))
+			return PTR_ERR(fence);
 	} else
 		return 0;
 
-	i915_gem_object_update_fence(obj, reg, enable);
-
-	return 0;
-}
-
-/**
- * i915_gem_object_pin_fence - pin fencing state
- * @obj: object to pin fencing for
- *
- * This pins the fencing state (whether tiled or untiled) to make sure the
- * object is ready to be used as a scanout target. Fencing status must be
- * synchronize first by calling i915_gem_object_get_fence():
- *
- * The resulting fence pin reference must be released again with
- * i915_gem_object_unpin_fence().
- *
- * Returns:
- *
- * True if the object has a fence, false otherwise.
- */
-bool
-i915_gem_object_pin_fence(struct drm_i915_gem_object *obj)
-{
-	if (obj->fence_reg != I915_FENCE_REG_NONE) {
-		to_i915(obj->base.dev)->fence_regs[obj->fence_reg].pin_count++;
-		return true;
-	} else
-		return false;
-}
-
-/**
- * i915_gem_object_unpin_fence - unpin fencing state
- * @obj: object to unpin fencing for
- *
- * This releases the fence pin reference acquired through
- * i915_gem_object_pin_fence. It will handle both objects with and without an
- * attached fence correctly, callers do not need to distinguish this.
- */
-void
-i915_gem_object_unpin_fence(struct drm_i915_gem_object *obj)
-{
-	if (obj->fence_reg != I915_FENCE_REG_NONE) {
-		struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
-		WARN_ON(dev_priv->fence_regs[obj->fence_reg].pin_count <= 0);
-		dev_priv->fence_regs[obj->fence_reg].pin_count--;
-	}
+	return fence_update(fence, set);
 }
 
 /**
@@ -474,12 +375,7 @@ void i915_gem_restore_fences(struct drm_device *dev)
 		 * Commit delayed tiling changes if we have an object still
 		 * attached to the fence, otherwise just clear the fence.
 		 */
-		if (reg->obj) {
-			i915_gem_object_update_fence(reg->obj, reg,
-						     i915_gem_object_get_tiling(reg->obj));
-		} else {
-			i915_gem_write_fence(dev, i, NULL);
-		}
+		fence_write(reg, reg->vma);
 	}
 }
 
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 54063057eabf..ef59fae59b17 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -3339,6 +3339,7 @@ void i915_vma_destroy(struct i915_vma *vma)
 	GEM_BUG_ON(vma->node.allocated);
 	GEM_BUG_ON(i915_vma_is_active(vma));
 	GEM_BUG_ON(!i915_vma_is_closed(vma));
+	GEM_BUG_ON(vma->fence);
 
 	list_del(&vma->vm_link);
 	if (!i915_vma_is_ggtt(vma))
@@ -3375,6 +3376,7 @@ __i915_vma_create(struct drm_i915_gem_object *obj,
 	INIT_LIST_HEAD(&vma->exec_list);
 	for (i = 0; i < ARRAY_SIZE(vma->last_read); i++)
 		init_request_active(&vma->last_read[i], i915_vma_retire);
+	init_request_active(&vma->last_fence, NULL);
 	list_add(&vma->vm_link, &vm->unbound_list);
 	vma->vm = vm;
 	vma->obj = obj;
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h
index ed0a6c7a1883..4b67347834b5 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.h
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.h
@@ -38,7 +38,13 @@
 
 #include "i915_gem_request.h"
 
+#define I915_FENCE_REG_NONE -1
+#define I915_MAX_NUM_FENCES 32
+/* 32 fences + sign bit for FENCE_REG_NONE */
+#define I915_MAX_NUM_FENCE_BITS 6
+
 struct drm_i915_file_private;
+struct drm_i915_fence_reg;
 
 typedef uint32_t gen6_pte_t;
 typedef uint64_t gen8_pte_t;
@@ -174,6 +180,7 @@ struct i915_vma {
 	struct drm_mm_node node;
 	struct drm_i915_gem_object *obj;
 	struct i915_address_space *vm;
+	struct drm_i915_fence_reg *fence;
 	struct sg_table *pages;
 	void __iomem *iomap;
 	u64 size;
@@ -203,6 +210,7 @@ struct i915_vma {
 
 	unsigned int active;
 	struct i915_gem_active last_read[I915_NUM_ENGINES];
+	struct i915_gem_active last_fence;
 
 	/**
 	 * Support different GGTT views into the same object.
diff --git a/drivers/gpu/drm/i915/i915_gem_tiling.c b/drivers/gpu/drm/i915/i915_gem_tiling.c
index af70d4460a9e..a14b1e3d4c78 100644
--- a/drivers/gpu/drm/i915/i915_gem_tiling.c
+++ b/drivers/gpu/drm/i915/i915_gem_tiling.c
@@ -116,13 +116,39 @@ i915_tiling_ok(struct drm_device *dev, int stride, int size, int tiling_mode)
 	return true;
 }
 
+static bool i915_vma_fence_prepare(struct i915_vma *vma, int tiling_mode)
+{
+	struct drm_i915_private *dev_priv = to_i915(vma->vm->dev);
+	u32 size;
+
+	if (!i915_vma_is_map_and_fenceable(vma))
+		return true;
+
+	if (INTEL_GEN(dev_priv) == 3) {
+		if (vma->node.start & ~I915_FENCE_START_MASK)
+			return false;
+	} else {
+		if (vma->node.start & ~I830_FENCE_START_MASK)
+			return false;
+	}
+
+	size = i915_gem_get_ggtt_size(dev_priv, vma->size, tiling_mode);
+	if (vma->node.size < size)
+		return false;
+
+	if (vma->node.start & (size - 1))
+		return false;
+
+	return true;
+}
+
 /* Make the current GTT allocation valid for the change in tiling. */
 static int
 i915_gem_object_fence_prepare(struct drm_i915_gem_object *obj, int tiling_mode)
 {
 	struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
 	struct i915_vma *vma;
-	u32 size;
+	int ret;
 
 	if (tiling_mode == I915_TILING_NONE)
 		return 0;
@@ -130,32 +156,16 @@ i915_gem_object_fence_prepare(struct drm_i915_gem_object *obj, int tiling_mode)
 	if (INTEL_GEN(dev_priv) >= 4)
 		return 0;
 
-	vma = i915_gem_object_to_ggtt(obj, NULL);
-	if (!vma)
-		return 0;
-
-	if (!i915_vma_is_map_and_fenceable(vma))
-		return 0;
+	list_for_each_entry(vma, &obj->vma_list, obj_link) {
+		if (i915_vma_fence_prepare(vma, tiling_mode))
+			continue;
 
-	if (IS_GEN3(dev_priv)) {
-		if (vma->node.start & ~I915_FENCE_START_MASK)
-			goto bad;
-	} else {
-		if (vma->node.start & ~I830_FENCE_START_MASK)
-			goto bad;
+		ret = i915_vma_unbind(vma);
+		if (ret)
+			return ret;
 	}
 
-	size = i915_gem_get_ggtt_size(dev_priv, vma->size, tiling_mode);
-	if (vma->node.size < size)
-		goto bad;
-
-	if (vma->node.start & (size - 1))
-		goto bad;
-
 	return 0;
-
-bad:
-	return i915_vma_unbind(vma);
 }
 
 /**
@@ -248,6 +258,8 @@ i915_gem_set_tiling(struct drm_device *dev, void *data,
 
 		err = i915_gem_object_fence_prepare(obj, args->tiling_mode);
 		if (!err) {
+			struct i915_vma *vma;
+
 			if (obj->pages &&
 			    obj->madv == I915_MADV_WILLNEED &&
 			    dev_priv->quirks & QUIRK_PIN_SWIZZLED_PAGES) {
@@ -257,11 +269,12 @@ i915_gem_set_tiling(struct drm_device *dev, void *data,
 					i915_gem_object_pin_pages(obj);
 			}
 
-			obj->fence_dirty =
-				!i915_gem_active_is_idle(&obj->last_fence,
-							 &dev->struct_mutex) ||
-				obj->fence_reg != I915_FENCE_REG_NONE;
+			list_for_each_entry(vma, &obj->vma_list, obj_link) {
+				if (!vma->fence)
+					continue;
 
+				vma->fence->dirty = true;
+			}
 			obj->tiling_and_stride =
 				args->stride | args->tiling_mode;
 
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
index e14526107ccb..26b41d8ef561 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -840,7 +840,7 @@ static void capture_bo(struct drm_i915_error_buffer *err,
 	err->gtt_offset = vma->node.start;
 	err->read_domains = obj->base.read_domains;
 	err->write_domain = obj->base.write_domain;
-	err->fence_reg = obj->fence_reg;
+	err->fence_reg = vma->fence ? vma->fence->id : -1;
 	err->tiling = i915_gem_object_get_tiling(obj);
 	err->dirty = obj->dirty;
 	err->purgeable = obj->madv != I915_MADV_WILLNEED;
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index 04a8900f68c1..c81c89adaff3 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -2188,7 +2188,6 @@ intel_pin_and_fence_fb_obj(struct drm_framebuffer *fb, unsigned int rotation)
 	struct i915_ggtt_view view;
 	struct i915_vma *vma;
 	u32 alignment;
-	int ret;
 
 	WARN_ON(!mutex_is_locked(&dev->struct_mutex));
 
@@ -2214,43 +2213,33 @@ intel_pin_and_fence_fb_obj(struct drm_framebuffer *fb, unsigned int rotation)
 	intel_runtime_pm_get(dev_priv);
 
 	vma = i915_gem_object_pin_to_display_plane(obj, alignment, &view);
-	if (IS_ERR(vma)) {
-		ret = PTR_ERR(vma);
-		goto err_pm;
-	}
+	if (IS_ERR(vma))
+		goto err;
 
-	/* Install a fence for tiled scan-out. Pre-i965 always needs a
-	 * fence, whereas 965+ only requires a fence if using
-	 * framebuffer compression.  For simplicity, we always install
-	 * a fence as the cost is not that onerous.
-	 */
 	if (i915_vma_is_map_and_fenceable(vma)) {
-		ret = i915_gem_object_get_fence(obj);
-		if (ret == -EDEADLK) {
-			/*
-			 * -EDEADLK means there are no free fences
-			 * no pending flips.
-			 *
-			 * This is propagated to atomic, but it uses
-			 * -EDEADLK to force a locking recovery, so
-			 * change the returned error to -EBUSY.
-			 */
-			ret = -EBUSY;
-			goto err_unpin;
-		} else if (ret)
-			goto err_unpin;
-
-		i915_gem_object_pin_fence(obj);
+		/* Install a fence for tiled scan-out. Pre-i965 always needs a
+		 * fence, whereas 965+ only requires a fence if using
+		 * framebuffer compression.  For simplicity, we always, when
+		 * possible, install a fence as the cost is not that onerous.
+		 *
+		 * If we fail to fence the tiled scanout, then either the
+		 * modeset will reject the change (which is highly unlikely as
+		 * the affected systems, all but one, do not have unmappable
+		 * space) or we will not be able to enable full powersaving
+		 * techniques (also likely not to apply due to various limits
+		 * FBC and the like impose on the size of the buffer, which
+		 * presumably we violated anyway with this unmappable buffer).
+		 * Anyway, it is presumably better to stumble onwards with
+		 * something and try to run the system in a "less than optimal"
+		 * mode that matches the user configuration.
+		 */
+		if (i915_vma_get_fence(vma) == 0)
+			i915_vma_pin_fence(vma);
 	}
 
+err:
 	intel_runtime_pm_put(dev_priv);
 	return vma;
-
-err_unpin:
-	i915_gem_object_unpin_from_display_plane(vma);
-err_pm:
-	intel_runtime_pm_put(dev_priv);
-	return ERR_PTR(ret);
 }
 
 void intel_unpin_fb_obj(struct drm_framebuffer *fb, unsigned int rotation)
@@ -2264,9 +2253,7 @@ void intel_unpin_fb_obj(struct drm_framebuffer *fb, unsigned int rotation)
 	intel_fill_fb_ggtt_view(&view, fb, rotation);
 	vma = i915_gem_object_to_ggtt(obj, &view);
 
-	if (i915_vma_is_map_and_fenceable(vma))
-		i915_gem_object_unpin_fence(obj);
-
+	i915_vma_unpin_fence(vma);
 	i915_gem_object_unpin_from_display_plane(vma);
 }
 
diff --git a/drivers/gpu/drm/i915/intel_fbc.c b/drivers/gpu/drm/i915/intel_fbc.c
index e122052c4081..565a0a8120fb 100644
--- a/drivers/gpu/drm/i915/intel_fbc.c
+++ b/drivers/gpu/drm/i915/intel_fbc.c
@@ -244,7 +244,6 @@ static void ilk_fbc_activate(struct drm_i915_private *dev_priv)
 		dpfc_ctl |= DPFC_CTL_LIMIT_1X;
 		break;
 	}
-	dpfc_ctl |= DPFC_CTL_FENCE_EN;
 	if (IS_GEN5(dev_priv))
 		dpfc_ctl |= params->fb.fence_reg;
 
@@ -709,6 +708,14 @@ static bool intel_fbc_hw_tracking_covers_screen(struct intel_crtc *crtc)
 	return effective_w <= max_w && effective_h <= max_h;
 }
 
+/* XXX replace me when we have VMA tracking for intel_plane_state */
+static int get_fence_id(struct drm_framebuffer *fb)
+{
+	struct i915_vma *vma = i915_gem_object_to_ggtt(intel_fb_obj(fb), NULL);
+
+	return vma->fence ? vma->fence->id : I915_FENCE_REG_NONE;
+}
+
 static void intel_fbc_update_state_cache(struct intel_crtc *crtc,
 					 struct intel_crtc_state *crtc_state,
 					 struct intel_plane_state *plane_state)
@@ -740,7 +747,7 @@ static void intel_fbc_update_state_cache(struct intel_crtc *crtc,
 		cache->fb.ilk_ggtt_offset = i915_gem_object_ggtt_offset(obj, NULL);
 	cache->fb.pixel_format = fb->pixel_format;
 	cache->fb.stride = fb->pitches[0];
-	cache->fb.fence_reg = obj->fence_reg;
+	cache->fb.fence_reg = get_fence_id(fb);
 	cache->fb.tiling_mode = i915_gem_object_get_tiling(obj);
 }
 
diff --git a/drivers/gpu/drm/i915/intel_overlay.c b/drivers/gpu/drm/i915/intel_overlay.c
index 72f8990a13d2..3cf8d02064a8 100644
--- a/drivers/gpu/drm/i915/intel_overlay.c
+++ b/drivers/gpu/drm/i915/intel_overlay.c
@@ -760,7 +760,7 @@ static int intel_overlay_do_put_image(struct intel_overlay *overlay,
 	if (IS_ERR(vma))
 		return PTR_ERR(vma);
 
-	ret = i915_gem_object_put_fence(new_bo);
+	ret = i915_vma_put_fence(vma);
 	if (ret)
 		goto out_unpin;
 
-- 
2.8.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 34+ messages in thread

* [PATCH 04/10] drm/i915: Choose partial chunksize based on tile row size
  2016-08-12 10:28 Partial VMA fixes Chris Wilson
                   ` (2 preceding siblings ...)
  2016-08-12 10:28 ` [PATCH 03/10] drm/i915: Move fence tracking from object to vma Chris Wilson
@ 2016-08-12 10:28 ` Chris Wilson
  2016-08-12 10:38   ` Joonas Lahtinen
  2016-08-12 10:28 ` [PATCH 05/10] drm/i915: Fix partial GGTT faulting Chris Wilson
                   ` (6 subsequent siblings)
  10 siblings, 1 reply; 34+ messages in thread
From: Chris Wilson @ 2016-08-12 10:28 UTC (permalink / raw)
  To: intel-gfx

In order to support setting up fences for partial mappings of an object,
we have to align those mappings with the fence. The minimum chunksize we
choose is at least the size of a single tile row.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_gem.c | 16 +++++++++++++++-
 1 file changed, 15 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index b386efaa5aa2..b0e73c5a29c6 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -1650,6 +1650,16 @@ i915_gem_mmap_ioctl(struct drm_device *dev, void *data,
 	return 0;
 }
 
+static unsigned int tile_row_pages(struct drm_i915_gem_object *obj)
+{
+	u64 size;
+
+	size = i915_gem_object_get_stride(obj);
+	size *= i915_gem_object_get_tiling(obj) == I915_TILING_Y ? 32 : 8;
+
+	return size >> PAGE_SHIFT;
+}
+
 /**
  * i915_gem_fault - fault a page into the GTT
  * @area: CPU VMA in question
@@ -1709,7 +1719,11 @@ int i915_gem_fault(struct vm_area_struct *area, struct vm_fault *vmf)
 	/* Use a partial view if the object is bigger than the aperture. */
 	if (obj->base.size >= ggtt->mappable_end &&
 	    !i915_gem_object_is_tiled(obj)) {
-		static const unsigned int chunk_size = 256; // 1 MiB
+		unsigned int chunk_size;
+
+		chunk_size = 256; /* 1 MiB */
+		if (i915_gem_object_is_tiled(obj))
+			chunk_size = max(chunk_size, tile_row_pages(obj));
 
 		memset(&view, 0, sizeof(view));
 		view.type = I915_GGTT_VIEW_PARTIAL;
-- 
2.8.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 34+ messages in thread

* [PATCH 05/10] drm/i915: Fix partial GGTT faulting
  2016-08-12 10:28 Partial VMA fixes Chris Wilson
                   ` (3 preceding siblings ...)
  2016-08-12 10:28 ` [PATCH 04/10] drm/i915: Choose partial chunksize based on tile row size Chris Wilson
@ 2016-08-12 10:28 ` Chris Wilson
  2016-08-15  9:29   ` Joonas Lahtinen
  2016-08-12 10:28 ` [PATCH 06/10] drm/i915: Choose not to evict faultable objects from the GGTT Chris Wilson
                   ` (5 subsequent siblings)
  10 siblings, 1 reply; 34+ messages in thread
From: Chris Wilson @ 2016-08-12 10:28 UTC (permalink / raw)
  To: intel-gfx

We want to always use the partial VMA as a fallback for a failure to
bind the object into the GGTT. This extends the support partial objects
in the GGTT to cover everything, not just objects too large.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_gem.c | 68 +++++++++++++++++++++--------------------
 1 file changed, 35 insertions(+), 33 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index b0e73c5a29c6..a64e8a133cdb 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -1682,7 +1682,6 @@ int i915_gem_fault(struct vm_area_struct *area, struct vm_fault *vmf)
 	struct drm_device *dev = obj->base.dev;
 	struct drm_i915_private *dev_priv = to_i915(dev);
 	struct i915_ggtt *ggtt = &dev_priv->ggtt;
-	struct i915_ggtt_view view = i915_ggtt_view_normal;
 	bool write = !!(vmf->flags & FAULT_FLAG_WRITE);
 	struct i915_vma *vma;
 	pgoff_t page_offset;
@@ -1717,26 +1716,30 @@ int i915_gem_fault(struct vm_area_struct *area, struct vm_fault *vmf)
 	}
 
 	/* Use a partial view if the object is bigger than the aperture. */
-	if (obj->base.size >= ggtt->mappable_end &&
-	    !i915_gem_object_is_tiled(obj)) {
+	/* Now pin it into the GTT if needed */
+	vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0,
+				       PIN_MAPPABLE | PIN_NONBLOCK);
+	if (IS_ERR(vma)) {
+		struct i915_ggtt_view partial;
 		unsigned int chunk_size;
 
 		chunk_size = 256; /* 1 MiB */
 		if (i915_gem_object_is_tiled(obj))
 			chunk_size = max(chunk_size, tile_row_pages(obj));
 
-		memset(&view, 0, sizeof(view));
-		view.type = I915_GGTT_VIEW_PARTIAL;
-		view.params.partial.offset = rounddown(page_offset, chunk_size);
-		view.params.partial.size =
+		memset(&partial, 0, sizeof(partial));
+		partial.type = I915_GGTT_VIEW_PARTIAL;
+		partial.params.partial.offset =
+			rounddown(page_offset, chunk_size);
+		partial.params.partial.size =
 			min_t(unsigned int,
 			      chunk_size,
 			      (area->vm_end - area->vm_start) / PAGE_SIZE -
-			      view.params.partial.offset);
-	}
+			      partial.params.partial.offset);
 
-	/* Now pin it into the GTT if needed */
-	vma = i915_gem_object_ggtt_pin(obj, &view, 0, 0, PIN_MAPPABLE);
+		vma = i915_gem_object_ggtt_pin(obj, &partial, 0, 0,
+					       PIN_MAPPABLE);
+	}
 	if (IS_ERR(vma)) {
 		ret = PTR_ERR(vma);
 		goto err_unlock;
@@ -1754,26 +1757,7 @@ int i915_gem_fault(struct vm_area_struct *area, struct vm_fault *vmf)
 	pfn = ggtt->mappable_base + i915_ggtt_offset(vma);
 	pfn >>= PAGE_SHIFT;
 
-	if (unlikely(view.type == I915_GGTT_VIEW_PARTIAL)) {
-		/* Overriding existing pages in partial view does not cause
-		 * us any trouble as TLBs are still valid because the fault
-		 * is due to userspace losing part of the mapping or never
-		 * having accessed it before (at this partials' range).
-		 */
-		unsigned long base = area->vm_start +
-				     (view.params.partial.offset << PAGE_SHIFT);
-		unsigned int i;
-
-		for (i = 0; i < view.params.partial.size; i++) {
-			ret = vm_insert_pfn(area,
-					    base + i * PAGE_SIZE,
-					    pfn + i);
-			if (ret)
-				break;
-		}
-
-		obj->fault_mappable = true;
-	} else {
+	if (vma->ggtt_view.type == I915_GGTT_VIEW_NORMAL) {
 		if (!obj->fault_mappable) {
 			unsigned long size =
 				min_t(unsigned long,
@@ -1789,13 +1773,31 @@ int i915_gem_fault(struct vm_area_struct *area, struct vm_fault *vmf)
 				if (ret)
 					break;
 			}
-
-			obj->fault_mappable = true;
 		} else
 			ret = vm_insert_pfn(area,
 					    (unsigned long)vmf->virtual_address,
 					    pfn + page_offset);
+	} else {
+		/* Overriding existing pages in partial view does not cause
+		 * us any trouble as TLBs are still valid because the fault
+		 * is due to userspace losing part of the mapping or never
+		 * having accessed it before (at this partials' range).
+		 */
+		const struct i915_ggtt_view *view = &vma->ggtt_view;
+		unsigned long base = area->vm_start +
+			(view->params.partial.offset << PAGE_SHIFT);
+		unsigned int i;
+
+		for (i = 0; i < view->params.partial.size; i++) {
+			ret = vm_insert_pfn(area,
+					    base + i * PAGE_SIZE,
+					    pfn + i);
+			if (ret)
+				break;
+		}
 	}
+
+	obj->fault_mappable = true;
 err_unpin:
 	__i915_vma_unpin(vma);
 err_unlock:
-- 
2.8.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 34+ messages in thread

* [PATCH 06/10] drm/i915: Choose not to evict faultable objects from the GGTT
  2016-08-12 10:28 Partial VMA fixes Chris Wilson
                   ` (4 preceding siblings ...)
  2016-08-12 10:28 ` [PATCH 05/10] drm/i915: Fix partial GGTT faulting Chris Wilson
@ 2016-08-12 10:28 ` Chris Wilson
  2016-08-12 10:50   ` Joonas Lahtinen
  2016-08-12 10:28 ` [PATCH 07/10] drm/i915: Fallback to using unmappable memory for scanout Chris Wilson
                   ` (4 subsequent siblings)
  10 siblings, 1 reply; 34+ messages in thread
From: Chris Wilson @ 2016-08-12 10:28 UTC (permalink / raw)
  To: intel-gfx

Often times we do not want to evict mapped objects from the GGTT as
these are quite expensive to teardown and frequently reused (causing an
equally, if not more so, expensive setup). In particular, when faulting
in a new object we want to avoid evicting an active object, or else we
may trigger a page-fault-of-doom as we ping-pong between evicting two
objects.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_gem.c       | 9 +++++----
 drivers/gpu/drm/i915/i915_gem_evict.c | 7 +++++--
 drivers/gpu/drm/i915/i915_gem_gtt.h   | 1 +
 3 files changed, 11 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index a64e8a133cdb..90308b2c4b0f 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -1686,6 +1686,7 @@ int i915_gem_fault(struct vm_area_struct *area, struct vm_fault *vmf)
 	struct i915_vma *vma;
 	pgoff_t page_offset;
 	unsigned long pfn;
+	unsigned int flags;
 	int ret;
 
 	/* We don't use vmf->pgoff since that has the fake offset */
@@ -1715,10 +1716,10 @@ int i915_gem_fault(struct vm_area_struct *area, struct vm_fault *vmf)
 		goto err_unlock;
 	}
 
-	/* Use a partial view if the object is bigger than the aperture. */
-	/* Now pin it into the GTT if needed */
-	vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0,
-				       PIN_MAPPABLE | PIN_NONBLOCK);
+	flags = PIN_MAPPABLE;
+	if (obj->base.size > 2 << 20)
+		flags |= PIN_NONBLOCK | PIN_NOFAULT;
+	vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, flags);
 	if (IS_ERR(vma)) {
 		struct i915_ggtt_view partial;
 		unsigned int chunk_size;
diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c b/drivers/gpu/drm/i915/i915_gem_evict.c
index f76c06e92677..fde61d7adf4d 100644
--- a/drivers/gpu/drm/i915/i915_gem_evict.c
+++ b/drivers/gpu/drm/i915/i915_gem_evict.c
@@ -47,7 +47,7 @@ gpu_is_idle(struct drm_i915_private *dev_priv)
 }
 
 static bool
-mark_free(struct i915_vma *vma, struct list_head *unwind)
+mark_free(struct i915_vma *vma, unsigned int flags, struct list_head *unwind)
 {
 	if (i915_vma_is_pinned(vma))
 		return false;
@@ -55,6 +55,9 @@ mark_free(struct i915_vma *vma, struct list_head *unwind)
 	if (WARN_ON(!list_empty(&vma->exec_list)))
 		return false;
 
+	if (flags & PIN_NOFAULT && vma->obj->fault_mappable)
+		return false;
+
 	list_add(&vma->exec_list, unwind);
 	return drm_mm_scan_add_block(&vma->node);
 }
@@ -129,7 +132,7 @@ search_again:
 	phase = phases;
 	do {
 		list_for_each_entry(vma, *phase, vm_link)
-			if (mark_free(vma, &eviction_list))
+			if (mark_free(vma, flags, &eviction_list))
 				goto found;
 	} while (*++phase);
 
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h
index 4b67347834b5..3d85a1959bd1 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.h
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.h
@@ -639,6 +639,7 @@ void i915_gem_gtt_finish_object(struct drm_i915_gem_object *obj);
 #define PIN_NONBLOCK		BIT(0)
 #define PIN_MAPPABLE		BIT(1)
 #define PIN_ZONE_4G		BIT(2)
+#define PIN_NOFAULT		BIT(3)
 
 #define PIN_MBZ			BIT(5) /* I915_VMA_PIN_OVERFLOW */
 #define PIN_GLOBAL		BIT(6) /* I915_VMA_GLOBAL_BIND */
-- 
2.8.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 34+ messages in thread

* [PATCH 07/10] drm/i915: Fallback to using unmappable memory for scanout
  2016-08-12 10:28 Partial VMA fixes Chris Wilson
                   ` (5 preceding siblings ...)
  2016-08-12 10:28 ` [PATCH 06/10] drm/i915: Choose not to evict faultable objects from the GGTT Chris Wilson
@ 2016-08-12 10:28 ` Chris Wilson
  2016-08-15  9:33   ` Joonas Lahtinen
  2016-08-12 10:28 ` [PATCH 08/10] drm/i915: Track display alignment on VMA Chris Wilson
                   ` (3 subsequent siblings)
  10 siblings, 1 reply; 34+ messages in thread
From: Chris Wilson @ 2016-08-12 10:28 UTC (permalink / raw)
  To: intel-gfx; +Cc: Daniel Vetter

The existing ABI says that scanouts are pinned into the mappable region
so that legacy clients (e.g. old Xorg or plymouthd) can write directly
into the scanout through a GTT mapping. However if the surface does not
fit into the mappable region, we are better off just trying to fit it
anywhere and hoping for the best. (Any userspace that is cappable of
using ginormous scanouts is also likely not to rely on pure GTT
updates.) With the partial vma fault support, we are no longer
restricted to only using scanouts that we can pin (though it is still
preferred for performance reasons and for powersaving features like
FBC).

v2: Skip fence pinning when not mappable.
v3: Add a comment to explain the possible rammifactions of not being
    able to use fences for unmappable scanouts.
v4: Rebase to skip over some local patches
v5: Rebase to defer until after we have unmappable GTT fault support

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Deepak S <deepak.s@linux.intel.com>
Cc: Damien Lespiau <damien.lespiau@intel.com>
Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
Cc: Jani Nikula <jani.nikula@linux.intel.com>
---
 drivers/gpu/drm/i915/i915_gem.c  | 14 ++++++++++----
 drivers/gpu/drm/i915/intel_fbc.c |  4 ++++
 2 files changed, 14 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 90308b2c4b0f..6459e2766145 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -3504,11 +3504,17 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
 
 	/* As the user may map the buffer once pinned in the display plane
 	 * (e.g. libkms for the bootup splash), we have to ensure that we
-	 * always use map_and_fenceable for all scanout buffers.
+	 * always use map_and_fenceable for all scanout buffers. However,
+	 * it may simply be too big to fit into mappable, in which case
+	 * put it anyway and hope that userspace can cope (but always first
+	 * try to preserve the existing ABI).
 	 */
-	vma = i915_gem_object_ggtt_pin(obj, view, 0, alignment,
-				       view->type == I915_GGTT_VIEW_NORMAL ?
-				       PIN_MAPPABLE : 0);
+	vma = ERR_PTR(-ENOSPC);
+	if (view->type == I915_GGTT_VIEW_NORMAL)
+		vma = i915_gem_object_ggtt_pin(obj, view, 0, alignment,
+					       PIN_MAPPABLE | PIN_NONBLOCK);
+	if (IS_ERR(vma))
+		vma = i915_gem_object_ggtt_pin(obj, view, 0, alignment, 0);
 	if (IS_ERR(vma))
 		goto err_unpin_display;
 
diff --git a/drivers/gpu/drm/i915/intel_fbc.c b/drivers/gpu/drm/i915/intel_fbc.c
index 565a0a8120fb..aa3b80946021 100644
--- a/drivers/gpu/drm/i915/intel_fbc.c
+++ b/drivers/gpu/drm/i915/intel_fbc.c
@@ -775,6 +775,10 @@ static bool intel_fbc_can_activate(struct intel_crtc *crtc)
 
 	/* The use of a CPU fence is mandatory in order to detect writes
 	 * by the CPU to the scanout and trigger updates to the FBC.
+	 *
+	 * Note that is possible for a tiled surface to be unmappable (and
+	 * so have no fence associated with it) due to aperture constaints
+	 * at the time of pinning.
 	 */
 	if (cache->fb.tiling_mode != I915_TILING_X ||
 	    cache->fb.fence_reg == I915_FENCE_REG_NONE) {
-- 
2.8.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 34+ messages in thread

* [PATCH 08/10] drm/i915: Track display alignment on VMA
  2016-08-12 10:28 Partial VMA fixes Chris Wilson
                   ` (6 preceding siblings ...)
  2016-08-12 10:28 ` [PATCH 07/10] drm/i915: Fallback to using unmappable memory for scanout Chris Wilson
@ 2016-08-12 10:28 ` Chris Wilson
  2016-08-15  9:38   ` Joonas Lahtinen
  2016-08-12 10:28 ` [PATCH 09/10] drm/i915: Bump the inactive MRU tracking for all VMA accessed Chris Wilson
                   ` (2 subsequent siblings)
  10 siblings, 1 reply; 34+ messages in thread
From: Chris Wilson @ 2016-08-12 10:28 UTC (permalink / raw)
  To: intel-gfx

When using the aliasing ppgtt and pagefliping with the shrinker/eviction
active, we note that we often have to rebind the backbuffer before
flipping onto the scanout because it has an invalid alignment. If we
store the worst-case alignment required for a VMA, we can avoid having
to rebind at critical junctures.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_gem.c     | 21 ++++++++-------------
 drivers/gpu/drm/i915/i915_gem_gtt.h |  2 +-
 2 files changed, 9 insertions(+), 14 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 6459e2766145..d8f2fc282041 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2973,7 +2973,6 @@ i915_vma_insert(struct i915_vma *vma, u64 size, u64 alignment, u64 flags)
 	struct drm_i915_private *dev_priv = to_i915(vma->vm->dev);
 	struct drm_i915_gem_object *obj = vma->obj;
 	u64 start, end;
-	u64 min_alignment;
 	int ret;
 
 	GEM_BUG_ON(vma->flags & (I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND));
@@ -2984,17 +2983,10 @@ i915_vma_insert(struct i915_vma *vma, u64 size, u64 alignment, u64 flags)
 		size = i915_gem_get_ggtt_size(dev_priv, size,
 					      i915_gem_object_get_tiling(obj));
 
-	min_alignment =
-		i915_gem_get_ggtt_alignment(dev_priv, size,
-					    i915_gem_object_get_tiling(obj),
-					    flags & PIN_MAPPABLE);
-	if (alignment == 0)
-		alignment = min_alignment;
-	if (alignment & (min_alignment - 1)) {
-		DRM_DEBUG("Invalid object alignment requested %llu, minimum %llu\n",
-			  alignment, min_alignment);
-		return -EINVAL;
-	}
+	alignment = max(max(alignment, vma->display_alignment),
+			i915_gem_get_ggtt_alignment(dev_priv, size,
+						    i915_gem_object_get_tiling(obj),
+						    flags & PIN_MAPPABLE));
 
 	start = flags & PIN_OFFSET_BIAS ? flags & PIN_OFFSET_MASK : 0;
 
@@ -3518,6 +3510,8 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
 	if (IS_ERR(vma))
 		goto err_unpin_display;
 
+	vma->display_alignment = max_t(u64, vma->display_alignment, alignment);
+
 	WARN_ON(obj->pin_display > i915_vma_pin_count(vma));
 
 	i915_gem_object_flush_cpu_write_domain(obj);
@@ -3548,7 +3542,8 @@ i915_gem_object_unpin_from_display_plane(struct i915_vma *vma)
 	if (WARN_ON(vma->obj->pin_display == 0))
 		return;
 
-	vma->obj->pin_display--;
+	if (--vma->obj->pin_display == 0)
+		vma->display_alignment = 0;
 
 	i915_vma_unpin(vma);
 	WARN_ON(vma->obj->pin_display > i915_vma_pin_count(vma));
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h
index 3d85a1959bd1..d2f79a1fb75f 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.h
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.h
@@ -183,7 +183,7 @@ struct i915_vma {
 	struct drm_i915_fence_reg *fence;
 	struct sg_table *pages;
 	void __iomem *iomap;
-	u64 size;
+	u64 size, display_alignment;
 
 	unsigned int flags;
 	/**
-- 
2.8.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 34+ messages in thread

* [PATCH 09/10] drm/i915: Bump the inactive MRU tracking for all VMA accessed
  2016-08-12 10:28 Partial VMA fixes Chris Wilson
                   ` (7 preceding siblings ...)
  2016-08-12 10:28 ` [PATCH 08/10] drm/i915: Track display alignment on VMA Chris Wilson
@ 2016-08-12 10:28 ` Chris Wilson
  2016-08-15  9:59   ` Joonas Lahtinen
  2016-08-12 10:28 ` [PATCH 10/10] drm/i915: Stop discarding GTT cache-domain on unbind vma Chris Wilson
  2016-08-12 10:33 ` ✗ Ro.CI.BAT: failure for series starting with [01/10] drm/i915: Move map-and-fenceable tracking to the VMA Patchwork
  10 siblings, 1 reply; 34+ messages in thread
From: Chris Wilson @ 2016-08-12 10:28 UTC (permalink / raw)
  To: intel-gfx

When we bump the MRU access tracking on set-to-gtt, we need to not only
bump the primary GGTT VMA but all partials as well. Similarly we want to
bump the MRU access for when unpinning an object from the scanout.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_gem.c | 29 +++++++++++++++++++++++------
 1 file changed, 23 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index d8f2fc282041..71a5a40442c9 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -3176,6 +3176,24 @@ i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj)
 					    I915_GEM_DOMAIN_CPU);
 }
 
+static void i915_gem_object_bump_inactive_ggtt(struct drm_i915_gem_object *obj)
+{
+	struct i915_vma *vma;
+
+	list_for_each_entry(vma, &obj->vma_list, obj_link) {
+		if (!i915_vma_is_ggtt(vma))
+			continue;
+
+		if (i915_vma_is_active(vma))
+			continue;
+
+		if (!drm_mm_node_allocated(&vma->node))
+			continue;
+
+		list_move_tail(&vma->vm_link, &vma->vm->inactive_list);
+	}
+}
+
 /**
  * Moves a single object to the GTT read, and possibly write domain.
  * @obj: object to act on
@@ -3188,7 +3206,6 @@ int
 i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write)
 {
 	uint32_t old_write_domain, old_read_domains;
-	struct i915_vma *vma;
 	int ret;
 
 	ret = i915_gem_object_wait_rendering(obj, !write);
@@ -3238,11 +3255,7 @@ i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write)
 					    old_write_domain);
 
 	/* And bump the LRU for this access */
-	vma = i915_gem_object_to_ggtt(obj, NULL);
-	if (vma &&
-	    drm_mm_node_allocated(&vma->node) &&
-	    !i915_vma_is_active(vma))
-		list_move_tail(&vma->vm_link, &vma->vm->inactive_list);
+	i915_gem_object_bump_inactive_ggtt(obj);
 
 	return 0;
 }
@@ -3545,6 +3558,10 @@ i915_gem_object_unpin_from_display_plane(struct i915_vma *vma)
 	if (--vma->obj->pin_display == 0)
 		vma->display_alignment = 0;
 
+	/* Bump the LRU to try and avoid premature eviction whilst flipping  */
+	if (!i915_vma_is_active(vma))
+		list_move_tail(&vma->vm_link, &vma->vm->inactive_list);
+
 	i915_vma_unpin(vma);
 	WARN_ON(vma->obj->pin_display > i915_vma_pin_count(vma));
 }
-- 
2.8.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 34+ messages in thread

* [PATCH 10/10] drm/i915: Stop discarding GTT cache-domain on unbind vma
  2016-08-12 10:28 Partial VMA fixes Chris Wilson
                   ` (8 preceding siblings ...)
  2016-08-12 10:28 ` [PATCH 09/10] drm/i915: Bump the inactive MRU tracking for all VMA accessed Chris Wilson
@ 2016-08-12 10:28 ` Chris Wilson
  2016-08-15 10:03   ` Joonas Lahtinen
  2016-08-12 10:33 ` ✗ Ro.CI.BAT: failure for series starting with [01/10] drm/i915: Move map-and-fenceable tracking to the VMA Patchwork
  10 siblings, 1 reply; 34+ messages in thread
From: Chris Wilson @ 2016-08-12 10:28 UTC (permalink / raw)
  To: intel-gfx; +Cc: Akash Goel

Since commit 43566dedde54 ("drm/i915: Broaden application of
set-domain(GTT)") we allowed objects to be in the GTT domain, but unbound.
Therefore removing the GTT cache domain when removing the GGTT vma is no
longer semantically correct.

An unfortunate side-effect is we lose the wondrously named
i915_gem_object_finish_gtt(), not to be confused with
i915_gem_gtt_finish_object()!

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Akash Goel <akash.goel@intel.com>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 drivers/gpu/drm/i915/i915_gem.c | 26 +++-----------------------
 1 file changed, 3 insertions(+), 23 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 71a5a40442c9..7ab6630b505b 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2781,27 +2781,6 @@ i915_gem_object_sync(struct drm_i915_gem_object *obj,
 	return 0;
 }
 
-static void i915_gem_object_finish_gtt(struct drm_i915_gem_object *obj)
-{
-	u32 old_write_domain, old_read_domains;
-
-	/* Force a pagefault for domain tracking on next user access */
-	i915_gem_release_mmap(obj);
-
-	if ((obj->base.read_domains & I915_GEM_DOMAIN_GTT) == 0)
-		return;
-
-	old_read_domains = obj->base.read_domains;
-	old_write_domain = obj->base.write_domain;
-
-	obj->base.read_domains &= ~I915_GEM_DOMAIN_GTT;
-	obj->base.write_domain &= ~I915_GEM_DOMAIN_GTT;
-
-	trace_i915_gem_object_change_domain(obj,
-					    old_read_domains,
-					    old_write_domain);
-}
-
 static void __i915_vma_iounmap(struct i915_vma *vma)
 {
 	GEM_BUG_ON(i915_vma_is_pinned(vma));
@@ -2857,13 +2836,14 @@ int i915_vma_unbind(struct i915_vma *vma)
 	GEM_BUG_ON(!obj->pages);
 
 	if (i915_vma_is_map_and_fenceable(vma)) {
-		i915_gem_object_finish_gtt(obj);
-
 		/* release the fence reg _after_ flushing */
 		ret = i915_vma_put_fence(vma);
 		if (ret)
 			return ret;
 
+		/* Force a pagefault for domain tracking on next user access */
+		i915_gem_release_mmap(obj);
+
 		__i915_vma_iounmap(vma);
 		vma->flags &= ~I915_VMA_CAN_FENCE;
 	}
-- 
2.8.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 34+ messages in thread

* ✗ Ro.CI.BAT: failure for series starting with [01/10] drm/i915: Move map-and-fenceable tracking to the VMA
  2016-08-12 10:28 Partial VMA fixes Chris Wilson
                   ` (9 preceding siblings ...)
  2016-08-12 10:28 ` [PATCH 10/10] drm/i915: Stop discarding GTT cache-domain on unbind vma Chris Wilson
@ 2016-08-12 10:33 ` Patchwork
  10 siblings, 0 replies; 34+ messages in thread
From: Patchwork @ 2016-08-12 10:33 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

== Series Details ==

Series: series starting with [01/10] drm/i915: Move map-and-fenceable tracking to the VMA
URL   : https://patchwork.freedesktop.org/series/11015/
State : failure

== Summary ==

Applying: drm/i915: Move map-and-fenceable tracking to the VMA
fatal: sha1 information is lacking or useless (drivers/gpu/drm/i915/i915_drv.h).
error: could not build fake ancestor
Patch failed at 0001 drm/i915: Move map-and-fenceable tracking to the VMA
The copy of the patch that failed is found in: .git/rebase-apply/patch
When you have resolved this problem, run "git am --continue".
If you prefer to skip this patch, run "git am --skip" instead.
To restore the original branch and stop patching, run "git am --abort".

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH 04/10] drm/i915: Choose partial chunksize based on tile row size
  2016-08-12 10:28 ` [PATCH 04/10] drm/i915: Choose partial chunksize based on tile row size Chris Wilson
@ 2016-08-12 10:38   ` Joonas Lahtinen
  0 siblings, 0 replies; 34+ messages in thread
From: Joonas Lahtinen @ 2016-08-12 10:38 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx

On pe, 2016-08-12 at 11:28 +0100, Chris Wilson wrote:
> In order to support setting up fences for partial mappings of an object,
> we have to align those mappings with the fence. The minimum chunksize we
> choose is at least the size of a single tile row.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>

Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>

Regards, Joonas
-- 
Joonas Lahtinen
Open Source Technology Center
Intel Corporation
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH 06/10] drm/i915: Choose not to evict faultable objects from the GGTT
  2016-08-12 10:28 ` [PATCH 06/10] drm/i915: Choose not to evict faultable objects from the GGTT Chris Wilson
@ 2016-08-12 10:50   ` Joonas Lahtinen
  2016-08-12 11:13     ` Chris Wilson
  0 siblings, 1 reply; 34+ messages in thread
From: Joonas Lahtinen @ 2016-08-12 10:50 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx

On pe, 2016-08-12 at 11:28 +0100, Chris Wilson wrote:
> @@ -1715,10 +1716,10 @@ int i915_gem_fault(struct vm_area_struct *area, struct vm_fault *vmf)
>  		goto err_unlock;
>  	}
>  
> -	/* Use a partial view if the object is bigger than the aperture. */
> -	/* Now pin it into the GTT if needed */
> -	vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0,
> -				       PIN_MAPPABLE | PIN_NONBLOCK);
> +	flags = PIN_MAPPABLE;
> +	if (obj->base.size > 2 << 20)

Magic number.

> @@ -55,6 +55,9 @@ mark_free(struct i915_vma *vma, struct list_head *unwind)
>  	if (WARN_ON(!list_empty(&vma->exec_list)))
>  		return false;
>  
> +	if (flags & PIN_NOFAULT && vma->obj->fault_mappable)
> +		return false;

The flag name is rather counter-intuitive for it describes other VMAs
rather than our new VMA...

Regards, Joonas
-- 
Joonas Lahtinen
Open Source Technology Center
Intel Corporation
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH 06/10] drm/i915: Choose not to evict faultable objects from the GGTT
  2016-08-12 10:50   ` Joonas Lahtinen
@ 2016-08-12 11:13     ` Chris Wilson
  2016-08-15 10:20       ` Joonas Lahtinen
  0 siblings, 1 reply; 34+ messages in thread
From: Chris Wilson @ 2016-08-12 11:13 UTC (permalink / raw)
  To: Joonas Lahtinen; +Cc: intel-gfx

On Fri, Aug 12, 2016 at 01:50:56PM +0300, Joonas Lahtinen wrote:
> On pe, 2016-08-12 at 11:28 +0100, Chris Wilson wrote:
> > @@ -1715,10 +1716,10 @@ int i915_gem_fault(struct vm_area_struct *area, struct vm_fault *vmf)
> >  		goto err_unlock;
> >  	}
> >  
> > -	/* Use a partial view if the object is bigger than the aperture. */
> > -	/* Now pin it into the GTT if needed */
> > -	vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0,
> > -				       PIN_MAPPABLE | PIN_NONBLOCK);
> > +	flags = PIN_MAPPABLE;
> > +	if (obj->base.size > 2 << 20)
> 
> Magic number.

One day there may be a MiB() macro. It is a magic number, just a rule of
thumb based on minimum chunksize for a partial.
 
> > @@ -55,6 +55,9 @@ mark_free(struct i915_vma *vma, struct list_head *unwind)
> >  	if (WARN_ON(!list_empty(&vma->exec_list)))
> >  		return false;
> >  
> > +	if (flags & PIN_NOFAULT && vma->obj->fault_mappable)
> > +		return false;
> 
> The flag name is rather counter-intuitive for it describes other VMAs
> rather than our new VMA...

As does NONBLOCKING. We could loose this flag in favour of NOEVICT, but
I haven't run anything to confirm if that's a good tradeoff.
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH 01/10] drm/i915: Move map-and-fenceable tracking to the VMA
  2016-08-12 10:28 ` [PATCH 01/10] drm/i915: Move map-and-fenceable tracking to the VMA Chris Wilson
@ 2016-08-15  8:03   ` Joonas Lahtinen
  2016-08-15  8:14     ` Chris Wilson
  0 siblings, 1 reply; 34+ messages in thread
From: Joonas Lahtinen @ 2016-08-15  8:03 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx

On pe, 2016-08-12 at 11:28 +0100, Chris Wilson wrote:
> @@ -2843,8 +2843,7 @@ int i915_vma_unbind(struct i915_vma *vma)
>  	GEM_BUG_ON(obj->bind_count == 0);
>  	GEM_BUG_ON(!obj->pages);
>  
> -	if (i915_vma_is_ggtt(vma) &&
> -	    vma->ggtt_view.type == I915_GGTT_VIEW_NORMAL) {

Maybe make a comment here, as the test feel out-of-place quickly
glancing. Especially wrt. what it replaces. Although you mentioned in
IRC this will soon be eliminated?

> +	if (i915_vma_is_map_and_fenceable(vma)) {
>  		i915_gem_object_finish_gtt(obj);
>  
>  		/* release the fence reg _after_ flushing */

<SNIP>

> @@ -2864,13 +2864,9 @@ int i915_vma_unbind(struct i915_vma *vma)
>  	drm_mm_remove_node(&vma->node);
>  	list_move_tail(&vma->vm_link, &vma->vm->unbound_list);
>  
> -	if (i915_vma_is_ggtt(vma)) {
> -		if (vma->ggtt_view.type == I915_GGTT_VIEW_NORMAL) {
> -			obj->map_and_fenceable = false;
> -		} else if (vma->pages) {
> -			sg_free_table(vma->pages);
> -			kfree(vma->pages);
> -		}

Not sure if there should be a comment that for 1:1 mappings vma->pages
is just obj->pages so it should not be freed. Or maybe you could even
make the test if vma->pages != vma->obj->pages? More self-documenting.

> +	if (vma->ggtt_view.type != I915_GGTT_VIEW_NORMAL) {
> +		sg_free_table(vma->pages);
> +		kfree(vma->pages);
>  	}
>  	vma->pages = NULL;

<SNIP>

> @@ -3693,7 +3687,10 @@ void __i915_vma_set_map_and_fenceable(struct i915_vma *vma)

This might also clear, so function name should be
update_map_and_fenceable, really.

> @@ -2262,11 +2262,11 @@ void intel_unpin_fb_obj(struct drm_framebuffer *fb, unsigned int rotation)
>  	WARN_ON(!mutex_is_locked(&obj->base.dev->struct_mutex));
>  
>  	intel_fill_fb_ggtt_view(&view, fb, rotation);
> +	vma = i915_gem_object_to_ggtt(obj, &view);
>  
> -	if (view.type == I915_GGTT_VIEW_NORMAL)
> +	if (i915_vma_is_map_and_fenceable(vma))
>  		i915_gem_object_unpin_fence(obj);
>  
> -	vma = i915_gem_object_to_ggtt(obj, &view);
>  	i915_gem_object_unpin_from_display_plane(vma);

This did not have NULL protection previously either, so should be OK.

Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>

Regards, Joonas 
-- 
Joonas Lahtinen
Open Source Technology Center
Intel Corporation
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH 01/10] drm/i915: Move map-and-fenceable tracking to the VMA
  2016-08-15  8:03   ` Joonas Lahtinen
@ 2016-08-15  8:14     ` Chris Wilson
  0 siblings, 0 replies; 34+ messages in thread
From: Chris Wilson @ 2016-08-15  8:14 UTC (permalink / raw)
  To: Joonas Lahtinen; +Cc: intel-gfx

On Mon, Aug 15, 2016 at 11:03:32AM +0300, Joonas Lahtinen wrote:
> On pe, 2016-08-12 at 11:28 +0100, Chris Wilson wrote:
> Not sure if there should be a comment that for 1:1 mappings vma->pages
> is just obj->pages so it should not be freed. Or maybe you could even
> make the test if vma->pages != vma->obj->pages? More self-documenting.

I contemplated making this vma->pages != vma->obj->pages as well in
light of the recent changes, will do.
> 
> > +	if (vma->ggtt_view.type != I915_GGTT_VIEW_NORMAL) {
> > +		sg_free_table(vma->pages);
> > +		kfree(vma->pages);
> >  	}
> >  	vma->pages = NULL;
> 
> <SNIP>
> 
> > @@ -3693,7 +3687,10 @@ void __i915_vma_set_map_and_fenceable(struct i915_vma *vma)
> 
> This might also clear, so function name should be
> update_map_and_fenceable, really.

update/compute either is a fine TODO ;)
 
> > @@ -2262,11 +2262,11 @@ void intel_unpin_fb_obj(struct drm_framebuffer *fb, unsigned int rotation)
> >  	WARN_ON(!mutex_is_locked(&obj->base.dev->struct_mutex));
> >  
> >  	intel_fill_fb_ggtt_view(&view, fb, rotation);
> > +	vma = i915_gem_object_to_ggtt(obj, &view);
> >  
> > -	if (view.type == I915_GGTT_VIEW_NORMAL)
> > +	if (i915_vma_is_map_and_fenceable(vma))
> >  		i915_gem_object_unpin_fence(obj);
> >  
> > -	vma = i915_gem_object_to_ggtt(obj, &view);
> >  	i915_gem_object_unpin_from_display_plane(vma);
> 
> This did not have NULL protection previously either, so should be OK.

Yup, the long goal here is to pass in the vma.
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH 03/10] drm/i915: Move fence tracking from object to vma
  2016-08-12 10:28 ` [PATCH 03/10] drm/i915: Move fence tracking from object to vma Chris Wilson
@ 2016-08-15  9:18   ` Joonas Lahtinen
  2016-08-15  9:25     ` Chris Wilson
  2016-08-15  9:52     ` Chris Wilson
  0 siblings, 2 replies; 34+ messages in thread
From: Joonas Lahtinen @ 2016-08-15  9:18 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx

On pe, 2016-08-12 at 11:28 +0100, Chris Wilson wrote:
> @@ -455,15 +455,21 @@ struct intel_opregion {
>  struct intel_overlay;
>  struct intel_overlay_error_state;
>  
> -#define I915_FENCE_REG_NONE -1
> -#define I915_MAX_NUM_FENCES 32
> -/* 32 fences + sign bit for FENCE_REG_NONE */
> -#define I915_MAX_NUM_FENCE_BITS 6
> -
>  struct drm_i915_fence_reg {
>  	struct list_head lru_list;

Could be converted to lru_link while at it.

> @@ -1131,15 +1131,11 @@ i915_gem_gtt_pwrite_fast(struct drm_i915_private *i915,
>  	} else {
>  		node.start = i915_ggtt_offset(vma);
>  		node.allocated = false;
> -		ret = i915_gem_object_put_fence(obj);
> +		ret = i915_vma_put_fence(vma);
>  		if (ret)
>  			goto out_unpin;
>  	}
>  
> -	ret = i915_gem_object_set_to_gtt_domain(obj, true);
> -	if (ret)
> -		goto out_unpin;
> -

This is a somewhat an unexpected change in here. Care to explain?

> +static void i965_write_fence_reg(struct drm_i915_fence_reg *fence,
> +				 struct i915_vma *vma)
>  {
> -	struct drm_i915_private *dev_priv = to_i915(dev);
>  	i915_reg_t fence_reg_lo, fence_reg_hi;
>  	int fence_pitch_shift;
> +	u64 val;
>  
> -	if (INTEL_INFO(dev)->gen >= 6) {
> -		fence_reg_lo = FENCE_REG_GEN6_LO(reg);
> -		fence_reg_hi = FENCE_REG_GEN6_HI(reg);
> +	if (INTEL_INFO(fence->i915)->gen >= 6) {
> +		fence_reg_lo = FENCE_REG_GEN6_LO(fence->id);
> +		fence_reg_hi = FENCE_REG_GEN6_HI(fence->id);
>  		fence_pitch_shift = GEN6_FENCE_PITCH_SHIFT;
> +
>  	} else {
> -		fence_reg_lo = FENCE_REG_965_LO(reg);
> -		fence_reg_hi = FENCE_REG_965_HI(reg);
> +		fence_reg_lo = FENCE_REG_965_LO(fence->id);
> +		fence_reg_hi = FENCE_REG_965_HI(fence->id);
>  		fence_pitch_shift = I965_FENCE_PITCH_SHIFT;
>  	}
>  
> -	/* To w/a incoherency with non-atomic 64-bit register updates,
> -	 * we split the 64-bit update into two 32-bit writes. In order
> -	 * for a partial fence not to be evaluated between writes, we
> -	 * precede the update with write to turn off the fence register,
> -	 * and only enable the fence as the last step.
> -	 *
> -	 * For extra levels of paranoia, we make sure each step lands
> -	 * before applying the next step.
> -	 */
> -	I915_WRITE(fence_reg_lo, 0);
> -	POSTING_READ(fence_reg_lo);
> -
> -	if (obj) {
> -		struct i915_vma *vma = i915_gem_object_to_ggtt(obj, NULL);
> -		unsigned int tiling = i915_gem_object_get_tiling(obj);
> -		unsigned int stride = i915_gem_object_get_stride(obj);
> -		u64 size = vma->node.size;
> -		u32 row_size = stride * (tiling == I915_TILING_Y ? 32 : 8);
> -		u64 val;
> -
> -		/* Adjust fence size to match tiled area */
> -		size = rounddown(size, row_size);
> +	if (vma) {
> +		unsigned int tiling = i915_gem_object_get_tiling(vma->obj);
> +		unsigned int tiling_y = tiling == I915_TILING_Y;

bool and maybe 'y_tiled'?

> +		unsigned int stride = i915_gem_object_get_stride(vma->obj);
> +		u32 row_size = stride * (tiling_y ? 32 : 8);
> +		u32 size = rounddown(vma->node.size, row_size);
>  
>  		val = ((vma->node.start + size - 4096) & 0xfffff000) << 32;
>  		val |= vma->node.start & 0xfffff000;
>  		val |= (u64)((stride / 128) - 1) << fence_pitch_shift;
> -		if (tiling == I915_TILING_Y)
> +		if (tiling_y)
>  			val |= 1 << I965_FENCE_TILING_Y_SHIFT;

While around, BIT()

>  		val |= I965_FENCE_REG_VALID;
> +	} else
> +		val = 0;
> +
> +	if (1) {

Umm? At least ought to have TODO: / FIXME: or some explanation. And

if (!1)
	return;

Would make the code more readable too, as you do not have any else
branch.

> @@ -152,20 +148,23 @@ static void i915_write_fence_reg(struct drm_device *dev, int reg,
>  	} else
>  		val = 0;
>  
> -	I915_WRITE(FENCE_REG(reg), val);
> -	POSTING_READ(FENCE_REG(reg));
> +	if (1) {

Ditto.

> @@ -186,96 +185,95 @@ static void i830_write_fence_reg(struct drm_device *dev, int reg,
>  	} else
>  		val = 0;
>  
> -	I915_WRITE(FENCE_REG(reg), val);
> -	POSTING_READ(FENCE_REG(reg));
> -}
> +	if (1) {

Ditto.

> -static struct drm_i915_fence_reg *
> -i915_find_fence_reg(struct drm_device *dev)
> +static struct drm_i915_fence_reg *fence_find(struct drm_i915_private *dev_priv)
>  {
> -	struct drm_i915_private *dev_priv = to_i915(dev);
> -	struct drm_i915_fence_reg *reg, *avail;
> -	int i;
> -
> -	/* First try to find a free reg */
> -	avail = NULL;
> -	for (i = 0; i < dev_priv->num_fence_regs; i++) {
> -		reg = &dev_priv->fence_regs[i];
> -		if (!reg->obj)
> -			return reg;
> -
> -		if (!reg->pin_count)
> -			avail = reg;
> -	}
> -
> -	if (avail == NULL)
> -		goto deadlock;
> +	struct drm_i915_fence_reg *fence;
>  
> -	/* None available, try to steal one or wait for a user to finish */
> -	list_for_each_entry(reg, &dev_priv->mm.fence_list, lru_list) {
> -		if (reg->pin_count)
> +	list_for_each_entry(fence, &dev_priv->mm.fence_list, lru_list) {
> +		if (fence->pin_count)
>  			continue;
>  
> -		return reg;
> +		return fence;

Umm, one could check for !fence->pin_count and then return fence and
drop the braces.

>  	}
>  
> -deadlock:
>  	/* Wait for completion of pending flips which consume fences */
> -	if (intel_has_pending_fb_unpin(dev))
> +	if (intel_has_pending_fb_unpin(&dev_priv->drm))
>  		return ERR_PTR(-EAGAIN);
>  
>  	return ERR_PTR(-EDEADLK);
>  }
>  
>  /**
> - * i915_gem_object_get_fence - set up fencing for an object
> - * @obj: object to map through a fence reg
> + * i915_vma_get_fence - set up fencing for a vma
> + * @vma: vma to map through a fence reg

I think we should use uppercase VMA in the documentation sections?

> +static bool i915_vma_fence_prepare(struct i915_vma *vma, int tiling_mode)
> +{
> +	struct drm_i915_private *dev_priv = to_i915(vma->vm->dev);
> +	u32 size;
> +
> +	if (!i915_vma_is_map_and_fenceable(vma))
> +		return true;
> +
> +	if (INTEL_GEN(dev_priv) == 3) {

Up there IS_GEN2 and IS_GEN3 is still used, just noting.

> +++ b/drivers/gpu/drm/i915/intel_fbc.c
> @@ -244,7 +244,6 @@ static void ilk_fbc_activate(struct drm_i915_private *dev_priv)
>  		dpfc_ctl |= DPFC_CTL_LIMIT_1X;
>  		break;
>  	}
> -	dpfc_ctl |= DPFC_CTL_FENCE_EN;

This bit is not set elsewhere, forgot to reinsert elsewhere?

Regards, Joonas
-- 
Joonas Lahtinen
Open Source Technology Center
Intel Corporation
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH 03/10] drm/i915: Move fence tracking from object to vma
  2016-08-15  9:18   ` Joonas Lahtinen
@ 2016-08-15  9:25     ` Chris Wilson
  2016-08-15 10:16       ` Joonas Lahtinen
  2016-08-15  9:52     ` Chris Wilson
  1 sibling, 1 reply; 34+ messages in thread
From: Chris Wilson @ 2016-08-15  9:25 UTC (permalink / raw)
  To: Joonas Lahtinen; +Cc: intel-gfx

On Mon, Aug 15, 2016 at 12:18:20PM +0300, Joonas Lahtinen wrote:
> On pe, 2016-08-12 at 11:28 +0100, Chris Wilson wrote:
> > +	if (1) {
> 
> Umm? At least ought to have TODO: / FIXME: or some explanation. And

You're not aware of the pipelined fencing?
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH 05/10] drm/i915: Fix partial GGTT faulting
  2016-08-12 10:28 ` [PATCH 05/10] drm/i915: Fix partial GGTT faulting Chris Wilson
@ 2016-08-15  9:29   ` Joonas Lahtinen
  0 siblings, 0 replies; 34+ messages in thread
From: Joonas Lahtinen @ 2016-08-15  9:29 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx

On pe, 2016-08-12 at 11:28 +0100, Chris Wilson wrote:
> @@ -1717,26 +1716,30 @@ int i915_gem_fault(struct vm_area_struct *area, struct vm_fault *vmf)
>  	}
>  
>  	/* Use a partial view if the object is bigger than the aperture. */

Move this comment down to where partial view is actually created.

> -	if (obj->base.size >= ggtt->mappable_end &&
> -	    !i915_gem_object_is_tiled(obj)) {
> +	/* Now pin it into the GTT if needed */
> +	vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0,
> +				       PIN_MAPPABLE | PIN_NONBLOCK);
> +	if (IS_ERR(vma)) {
> +		struct i915_ggtt_view partial;

'view' still makes more sense, less repeating of the word partial down.

> @@ -1754,26 +1757,7 @@ int i915_gem_fault(struct vm_area_struct *area, struct vm_fault *vmf)
>  	pfn = ggtt->mappable_base + i915_ggtt_offset(vma);
>  	pfn >>= PAGE_SHIFT;
>  
> -	if (unlikely(view.type == I915_GGTT_VIEW_PARTIAL)) {
> -		/* Overriding existing pages in partial view does not cause
> -		 * us any trouble as TLBs are still valid because the fault
> -		 * is due to userspace losing part of the mapping or never
> -		 * having accessed it before (at this partials' range).
> -		 */
> -		unsigned long base = area->vm_start +
> -				     (view.params.partial.offset << PAGE_SHIFT);
> -		unsigned int i;
> -
> -		for (i = 0; i < view.params.partial.size; i++) {
> -			ret = vm_insert_pfn(area,
> -					    base + i * PAGE_SIZE,
> -					    pfn + i);
> -			if (ret)
> -				break;
> -		}
> -
> -		obj->fault_mappable = true;
> -	} else {
> +	if (vma->ggtt_view.type == I915_GGTT_VIEW_NORMAL) {

likely() ?

>  		if (!obj->fault_mappable) {
>  			unsigned long size =
>  				min_t(unsigned long,
> @@ -1789,13 +1773,31 @@ int i915_gem_fault(struct vm_area_struct *area, struct vm_fault *vmf)
>  				if (ret)
>  					break;
>  			}
> -
> -			obj->fault_mappable = true;
>  		} else
>  			ret = vm_insert_pfn(area,
>  					    (unsigned long)vmf->virtual_address,
>  					    pfn + page_offset);
> +	} else {
> +		/* Overriding existing pages in partial view does not cause
> +		 * us any trouble as TLBs are still valid because the fault
> +		 * is due to userspace losing part of the mapping or never
> +		 * having accessed it before (at this partials' range).
> +		 */
> +		const struct i915_ggtt_view *view = &vma->ggtt_view;

I now see why you did the rename. Do not have a better idea really, so;

Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>

Regards, Joonas
-- 
Joonas Lahtinen
Open Source Technology Center
Intel Corporation
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH 07/10] drm/i915: Fallback to using unmappable memory for scanout
  2016-08-12 10:28 ` [PATCH 07/10] drm/i915: Fallback to using unmappable memory for scanout Chris Wilson
@ 2016-08-15  9:33   ` Joonas Lahtinen
  0 siblings, 0 replies; 34+ messages in thread
From: Joonas Lahtinen @ 2016-08-15  9:33 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx; +Cc: Daniel Vetter

On pe, 2016-08-12 at 11:28 +0100, Chris Wilson wrote:
> The existing ABI says that scanouts are pinned into the mappable region
> so that legacy clients (e.g. old Xorg or plymouthd) can write directly
> into the scanout through a GTT mapping. However if the surface does not
> fit into the mappable region, we are better off just trying to fit it
> anywhere and hoping for the best. (Any userspace that is cappable of

s/cappable/capable/

> using ginormous scanouts is also likely not to rely on pure GTT
> updates.) With the partial vma fault support, we are no longer
> restricted to only using scanouts that we can pin (though it is still
> preferred for performance reasons and for powersaving features like
> FBC).
> 
> v2: Skip fence pinning when not mappable.
> v3: Add a comment to explain the possible rammifactions of not being
>     able to use fences for unmappable scanouts.
> v4: Rebase to skip over some local patches
> v5: Rebase to defer until after we have unmappable GTT fault support
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>

Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>

Could use some Acked-by tags.

Regards, Joonas
-- 
Joonas Lahtinen
Open Source Technology Center
Intel Corporation
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH 08/10] drm/i915: Track display alignment on VMA
  2016-08-12 10:28 ` [PATCH 08/10] drm/i915: Track display alignment on VMA Chris Wilson
@ 2016-08-15  9:38   ` Joonas Lahtinen
  2016-08-16  8:40     ` Chris Wilson
  0 siblings, 1 reply; 34+ messages in thread
From: Joonas Lahtinen @ 2016-08-15  9:38 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx

On pe, 2016-08-12 at 11:28 +0100, Chris Wilson wrote:
> When using the aliasing ppgtt and pagefliping with the shrinker/eviction

s/fliping/flipping/

> active, we note that we often have to rebind the backbuffer before
> flipping onto the scanout because it has an invalid alignment. If we
> store the worst-case alignment required for a VMA, we can avoid having
> to rebind at critical junctures.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>

<SNIP>

> @@ -2984,17 +2983,10 @@ i915_vma_insert(struct i915_vma *vma, u64 size, u64 alignment, u64 flags)
>  		size = i915_gem_get_ggtt_size(dev_priv, size,
>  					      i915_gem_object_get_tiling(obj));
>  
> -	min_alignment =
> -		i915_gem_get_ggtt_alignment(dev_priv, size,
> -					    i915_gem_object_get_tiling(obj),
> -					    flags & PIN_MAPPABLE);
> -	if (alignment == 0)
> -		alignment = min_alignment;
> -	if (alignment & (min_alignment - 1)) {
> -		DRM_DEBUG("Invalid object alignment requested %llu, minimum %llu\n",
> -			  alignment, min_alignment);
> -		return -EINVAL;
> -	}
> +	alignment = max(max(alignment, vma->display_alignment),
> +			i915_gem_get_ggtt_alignment(dev_priv, size,
> +						    i915_gem_object_get_tiling(obj),
> +						    flags & PIN_MAPPABLE));

No DRM_DEBUG no more?

> @@ -183,7 +183,7 @@ struct i915_vma {
>  	struct drm_i915_fence_reg *fence;
>  	struct sg_table *pages;
>  	void __iomem *iomap;
> -	u64 size;
> +	u64 size, display_alignment;

Unrelated variables, better off their own lines.

Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>

Regards, Joonas
-- 
Joonas Lahtinen
Open Source Technology Center
Intel Corporation
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH 03/10] drm/i915: Move fence tracking from object to vma
  2016-08-15  9:18   ` Joonas Lahtinen
  2016-08-15  9:25     ` Chris Wilson
@ 2016-08-15  9:52     ` Chris Wilson
  1 sibling, 0 replies; 34+ messages in thread
From: Chris Wilson @ 2016-08-15  9:52 UTC (permalink / raw)
  To: Joonas Lahtinen; +Cc: intel-gfx

On Mon, Aug 15, 2016 at 12:18:20PM +0300, Joonas Lahtinen wrote:
> On pe, 2016-08-12 at 11:28 +0100, Chris Wilson wrote:
> > @@ -1131,15 +1131,11 @@ i915_gem_gtt_pwrite_fast(struct drm_i915_private *i915,
> >  	} else {
> >  		node.start = i915_ggtt_offset(vma);
> >  		node.allocated = false;
> > -		ret = i915_gem_object_put_fence(obj);
> > +		ret = i915_vma_put_fence(vma);
> >  		if (ret)
> >  			goto out_unpin;
> >  	}
> >  
> > -	ret = i915_gem_object_set_to_gtt_domain(obj, true);
> > -	if (ret)
> > -		goto out_unpin;
> > -
> 
> This is a somewhat an unexpected change in here. Care to explain?

Spontaneous disappearance due to rebasing. Pops back into existence
again later!
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH 09/10] drm/i915: Bump the inactive MRU tracking for all VMA accessed
  2016-08-12 10:28 ` [PATCH 09/10] drm/i915: Bump the inactive MRU tracking for all VMA accessed Chris Wilson
@ 2016-08-15  9:59   ` Joonas Lahtinen
  2016-08-15 10:12     ` Chris Wilson
  0 siblings, 1 reply; 34+ messages in thread
From: Joonas Lahtinen @ 2016-08-15  9:59 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx

> When we bump the MRU access tracking on set-to-gtt, we need to not only
> bump the primary GGTT VMA but all partials as well. Similarly we want to
> bump the MRU access for when unpinning an object from the scanout.

Refer to the list as LRU in the commit title and message to avoid confusion.

On pe, 2016-08-12 at 11:28 +0100, Chris Wilson wrote:
> +static void i915_gem_object_bump_inactive_ggtt(struct drm_i915_gem_object *obj)
> +{
> +	struct i915_vma *vma;
> +
> +	list_for_each_entry(vma, &obj->vma_list, obj_link) {
> +		if (!i915_vma_is_ggtt(vma))
> +			continue;
> +
> +		if (i915_vma_is_active(vma))
> +			continue;

Could combine these two to one if.

Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>

Regards, Joonas
-- 
Joonas Lahtinen
Open Source Technology Center
Intel Corporation
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH 10/10] drm/i915: Stop discarding GTT cache-domain on unbind vma
  2016-08-12 10:28 ` [PATCH 10/10] drm/i915: Stop discarding GTT cache-domain on unbind vma Chris Wilson
@ 2016-08-15 10:03   ` Joonas Lahtinen
  0 siblings, 0 replies; 34+ messages in thread
From: Joonas Lahtinen @ 2016-08-15 10:03 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx; +Cc: Akash Goel

On pe, 2016-08-12 at 11:28 +0100, Chris Wilson wrote:
> Since commit 43566dedde54 ("drm/i915: Broaden application of
> set-domain(GTT)") we allowed objects to be in the GTT domain, but unbound.
> Therefore removing the GTT cache domain when removing the GGTT vma is no
> longer semantically correct.
> 
> An unfortunate side-effect is we lose the wondrously named
> i915_gem_object_finish_gtt(), not to be confused with
> i915_gem_gtt_finish_object()!
> 

Does what it promises.

Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>

Regards, Joonas
-- 
Joonas Lahtinen
Open Source Technology Center
Intel Corporation
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH 09/10] drm/i915: Bump the inactive MRU tracking for all VMA accessed
  2016-08-15  9:59   ` Joonas Lahtinen
@ 2016-08-15 10:12     ` Chris Wilson
  2016-08-15 11:10       ` Joonas Lahtinen
  0 siblings, 1 reply; 34+ messages in thread
From: Chris Wilson @ 2016-08-15 10:12 UTC (permalink / raw)
  To: Joonas Lahtinen; +Cc: intel-gfx

On Mon, Aug 15, 2016 at 12:59:09PM +0300, Joonas Lahtinen wrote:
> > When we bump the MRU access tracking on set-to-gtt, we need to not only
> > bump the primary GGTT VMA but all partials as well. Similarly we want to
> > bump the MRU access for when unpinning an object from the scanout.
> 
> Refer to the list as LRU in the commit title and message to avoid confusion.

Still disagree. We are adjusting the MRU entity, the code always has and
then evicting from the LRU.
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH 03/10] drm/i915: Move fence tracking from object to vma
  2016-08-15  9:25     ` Chris Wilson
@ 2016-08-15 10:16       ` Joonas Lahtinen
  0 siblings, 0 replies; 34+ messages in thread
From: Joonas Lahtinen @ 2016-08-15 10:16 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

On ma, 2016-08-15 at 10:25 +0100, Chris Wilson wrote:
> On Mon, Aug 15, 2016 at 12:18:20PM +0300, Joonas Lahtinen wrote:
> > 
> > On pe, 2016-08-12 at 11:28 +0100, Chris Wilson wrote:
> > > 
> > > +	if (1) {
> > Umm? At least ought to have TODO: / FIXME: or some explanation. And
> You're not aware of the pipelined fencing?

I was most definitely not, now I am somewhat. Still need to add dem
TODOs.

Regards, Joonas

> -Chris
> 
-- 
Joonas Lahtinen
Open Source Technology Center
Intel Corporation
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH 06/10] drm/i915: Choose not to evict faultable objects from the GGTT
  2016-08-12 11:13     ` Chris Wilson
@ 2016-08-15 10:20       ` Joonas Lahtinen
  0 siblings, 0 replies; 34+ messages in thread
From: Joonas Lahtinen @ 2016-08-15 10:20 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

On pe, 2016-08-12 at 12:13 +0100, Chris Wilson wrote:
> On Fri, Aug 12, 2016 at 01:50:56PM +0300, Joonas Lahtinen wrote:
> > 
> > On pe, 2016-08-12 at 11:28 +0100, Chris Wilson wrote:
> > > 
> > > @@ -1715,10 +1716,10 @@ int i915_gem_fault(struct vm_area_struct *area, struct vm_fault *vmf)
> > >  		goto err_unlock;
> > >  	}
> > >  
> > > -	/* Use a partial view if the object is bigger than the aperture. */
> > > -	/* Now pin it into the GTT if needed */
> > > -	vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0,
> > > -				       PIN_MAPPABLE | PIN_NONBLOCK);
> > > +	flags = PIN_MAPPABLE;
> > > +	if (obj->base.size > 2 << 20)
> > Magic number.
> One day there may be a MiB() macro. It is a magic number, just a rule of
> thumb based on minimum chunksize for a partial.

#define the minimum chunk size and use it here too? With a warning of
the number being derived from the wildest approximations.

>  
> > 
> > > 
> > > @@ -55,6 +55,9 @@ mark_free(struct i915_vma *vma, struct list_head *unwind)
> > >  	if (WARN_ON(!list_empty(&vma->exec_list)))
> > >  		return false;
> > >  
> > > +	if (flags & PIN_NOFAULT && vma->obj->fault_mappable)
> > > +		return false;
> > The flag name is rather counter-intuitive for it describes other VMAs
> > rather than our new VMA...
> As does NONBLOCKING. We could loose this flag in favour of NOEVICT, but
> I haven't run anything to confirm if that's a good tradeoff.

Maybe the flag should be like __PIN_NOFAULTING to distinct in addition
to __PIN_NONBLOCKING? And then make sure they're never set on vma
itself.

Regards, Joonas

> -Chris
> 
-- 
Joonas Lahtinen
Open Source Technology Center
Intel Corporation
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH 09/10] drm/i915: Bump the inactive MRU tracking for all VMA accessed
  2016-08-15 10:12     ` Chris Wilson
@ 2016-08-15 11:10       ` Joonas Lahtinen
  0 siblings, 0 replies; 34+ messages in thread
From: Joonas Lahtinen @ 2016-08-15 11:10 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

On ma, 2016-08-15 at 11:12 +0100, Chris Wilson wrote:
> On Mon, Aug 15, 2016 at 12:59:09PM +0300, Joonas Lahtinen wrote:
> > 
> > > 
> > > When we bump the MRU access tracking on set-to-gtt, we need to not only
> > > bump the primary GGTT VMA but all partials as well. Similarly we want to
> > > bump the MRU access for when unpinning an object from the scanout.
> > Refer to the list as LRU in the commit title and message to avoid confusion.
> Still disagree. We are adjusting the MRU entity, the code always has and
> then evicting from the LRU.

I would not use the abbreviation MRU when discussing LRU scheme, but
it's only the commit message so I can live with it.

Regards, Joonas

> -Chris
> 
-- 
Joonas Lahtinen
Open Source Technology Center
Intel Corporation
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH 02/10] drm/i915/userptr: Make gup errors stickier
  2016-08-12 10:28 ` [PATCH 02/10] drm/i915/userptr: Make gup errors stickier Chris Wilson
@ 2016-08-15 11:16   ` Joonas Lahtinen
  2016-08-15 15:08   ` Mika Kuoppala
  2016-08-16  7:40   ` Mika Kuoppala
  2 siblings, 0 replies; 34+ messages in thread
From: Joonas Lahtinen @ 2016-08-15 11:16 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx

On pe, 2016-08-12 at 11:28 +0100, Chris Wilson wrote:
> Keep any error reported by the gup_worker until we are notified that the
> arena has changed (via the mmu-notifier). This has the importance of
> making two consecutive calls to i915_gem_object_get_pages() reporting
> the same error, and curtailing an loop of detecting a fault and requeueing
> a gup_worker.
> 

I think this is for Mika to review.

Regards, Joonas

> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>  drivers/gpu/drm/i915/i915_gem_userptr.c | 17 +++++++----------
>  1 file changed, 7 insertions(+), 10 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_gem_userptr.c b/drivers/gpu/drm/i915/i915_gem_userptr.c
> index 57218cca7e05..be54825ef3e8 100644
> --- a/drivers/gpu/drm/i915/i915_gem_userptr.c
> +++ b/drivers/gpu/drm/i915/i915_gem_userptr.c
> @@ -542,8 +542,6 @@ __i915_gem_userptr_get_pages_worker(struct work_struct *_work)
>  			}
>  		}
>  		obj->userptr.work = ERR_PTR(ret);
> -		if (ret)
> -			__i915_gem_userptr_set_active(obj, false);
>  	}
>  
>  	obj->userptr.workers--;
> @@ -628,15 +626,14 @@ i915_gem_userptr_get_pages(struct drm_i915_gem_object *obj)
>  	 * to the vma (discard or cloning) which should prevent the more
>  	 * egregious cases from causing harm.
>  	 */
> -	if (IS_ERR(obj->userptr.work)) {
> -		/* active flag will have been dropped already by the worker */
> -		ret = PTR_ERR(obj->userptr.work);
> -		obj->userptr.work = NULL;
> -		return ret;
> -	}
> -	if (obj->userptr.work)
> +
> +	if (obj->userptr.work) {
>  		/* active flag should still be held for the pending work */
> -		return -EAGAIN;
> +		if (IS_ERR(obj->userptr.work))
> +			return PTR_ERR(obj->userptr.work);
> +		else
> +			return -EAGAIN;
> +	}
>  
>  	/* Let the mmu-notifier know that we have begun and need cancellation */
>  	ret = __i915_gem_userptr_set_active(obj, true);
-- 
Joonas Lahtinen
Open Source Technology Center
Intel Corporation
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH 02/10] drm/i915/userptr: Make gup errors stickier
  2016-08-12 10:28 ` [PATCH 02/10] drm/i915/userptr: Make gup errors stickier Chris Wilson
  2016-08-15 11:16   ` Joonas Lahtinen
@ 2016-08-15 15:08   ` Mika Kuoppala
  2016-08-15 15:28     ` Chris Wilson
  2016-08-16  7:40   ` Mika Kuoppala
  2 siblings, 1 reply; 34+ messages in thread
From: Mika Kuoppala @ 2016-08-15 15:08 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx

Chris Wilson <chris@chris-wilson.co.uk> writes:

> Keep any error reported by the gup_worker until we are notified that the
> arena has changed (via the mmu-notifier). This has the importance of
> making two consecutive calls to i915_gem_object_get_pages() reporting
> the same error, and curtailing an loop of detecting a fault and requeueing
> a gup_worker.
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>  drivers/gpu/drm/i915/i915_gem_userptr.c | 17 +++++++----------
>  1 file changed, 7 insertions(+), 10 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_gem_userptr.c b/drivers/gpu/drm/i915/i915_gem_userptr.c
> index 57218cca7e05..be54825ef3e8 100644
> --- a/drivers/gpu/drm/i915/i915_gem_userptr.c
> +++ b/drivers/gpu/drm/i915/i915_gem_userptr.c
> @@ -542,8 +542,6 @@ __i915_gem_userptr_get_pages_worker(struct work_struct *_work)
>  			}
>  		}
>  		obj->userptr.work = ERR_PTR(ret);
> -		if (ret)
> -			__i915_gem_userptr_set_active(obj, false);
>  	}
>  
>  	obj->userptr.workers--;
> @@ -628,15 +626,14 @@ i915_gem_userptr_get_pages(struct drm_i915_gem_object *obj)
>  	 * to the vma (discard or cloning) which should prevent the more
>  	 * egregious cases from causing harm.
>  	 */
> -	if (IS_ERR(obj->userptr.work)) {
> -		/* active flag will have been dropped already by the worker */
> -		ret = PTR_ERR(obj->userptr.work);
> -		obj->userptr.work = NULL;
> -		return ret;
> -	}
> -	if (obj->userptr.work)
> +
> +	if (obj->userptr.work) {
>  		/* active flag should still be held for the pending work */
> -		return -EAGAIN;
> +		if (IS_ERR(obj->userptr.work))
> +			return PTR_ERR(obj->userptr.work);

Previously you did set the work to null before returning error,
now you dont.

Is it the responsibility of cancel_userptr now, through mm notifier,
that clears the pointer?

-Mika


> +		else
> +			return -EAGAIN;
> +	}
>  
>  	/* Let the mmu-notifier know that we have begun and need cancellation */
>  	ret = __i915_gem_userptr_set_active(obj, true);
> -- 
> 2.8.1
>
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/intel-gfx
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH 02/10] drm/i915/userptr: Make gup errors stickier
  2016-08-15 15:08   ` Mika Kuoppala
@ 2016-08-15 15:28     ` Chris Wilson
  0 siblings, 0 replies; 34+ messages in thread
From: Chris Wilson @ 2016-08-15 15:28 UTC (permalink / raw)
  To: Mika Kuoppala; +Cc: intel-gfx

On Mon, Aug 15, 2016 at 06:08:21PM +0300, Mika Kuoppala wrote:
> Chris Wilson <chris@chris-wilson.co.uk> writes:
> 
> > +	if (obj->userptr.work) {
> >  		/* active flag should still be held for the pending work */
> > -		return -EAGAIN;
> > +		if (IS_ERR(obj->userptr.work))
> > +			return PTR_ERR(obj->userptr.work);
> 
> Previously you did set the work to null before returning error,
> now you dont.
> 
> Is it the responsibility of cancel_userptr now, through mm notifier,
> that clears the pointer?

Yes. 

> > Keep any error reported by the gup_worker until we are notified that the
> > arena has changed (via the mmu-notifier). This has the importance of
> > making two consecutive calls to i915_gem_object_get_pages() reporting
> > the same error, and curtailing a loop of detecting a fault and requeueing
> > a gup_worker.

-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH 02/10] drm/i915/userptr: Make gup errors stickier
  2016-08-12 10:28 ` [PATCH 02/10] drm/i915/userptr: Make gup errors stickier Chris Wilson
  2016-08-15 11:16   ` Joonas Lahtinen
  2016-08-15 15:08   ` Mika Kuoppala
@ 2016-08-16  7:40   ` Mika Kuoppala
  2 siblings, 0 replies; 34+ messages in thread
From: Mika Kuoppala @ 2016-08-16  7:40 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx

Chris Wilson <chris@chris-wilson.co.uk> writes:

> Keep any error reported by the gup_worker until we are notified that the
> arena has changed (via the mmu-notifier). This has the importance of
> making two consecutive calls to i915_gem_object_get_pages() reporting
> the same error, and curtailing an loop of detecting a fault and requeueing
> a gup_worker.
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>

Reviewed-by: Mika Kuoppala <mika.kuoppala@intel.com>

> ---
>  drivers/gpu/drm/i915/i915_gem_userptr.c | 17 +++++++----------
>  1 file changed, 7 insertions(+), 10 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_gem_userptr.c b/drivers/gpu/drm/i915/i915_gem_userptr.c
> index 57218cca7e05..be54825ef3e8 100644
> --- a/drivers/gpu/drm/i915/i915_gem_userptr.c
> +++ b/drivers/gpu/drm/i915/i915_gem_userptr.c
> @@ -542,8 +542,6 @@ __i915_gem_userptr_get_pages_worker(struct work_struct *_work)
>  			}
>  		}
>  		obj->userptr.work = ERR_PTR(ret);
> -		if (ret)
> -			__i915_gem_userptr_set_active(obj, false);
>  	}
>  
>  	obj->userptr.workers--;
> @@ -628,15 +626,14 @@ i915_gem_userptr_get_pages(struct drm_i915_gem_object *obj)
>  	 * to the vma (discard or cloning) which should prevent the more
>  	 * egregious cases from causing harm.
>  	 */
> -	if (IS_ERR(obj->userptr.work)) {
> -		/* active flag will have been dropped already by the worker */
> -		ret = PTR_ERR(obj->userptr.work);
> -		obj->userptr.work = NULL;
> -		return ret;
> -	}
> -	if (obj->userptr.work)
> +
> +	if (obj->userptr.work) {
>  		/* active flag should still be held for the pending work */
> -		return -EAGAIN;
> +		if (IS_ERR(obj->userptr.work))
> +			return PTR_ERR(obj->userptr.work);
> +		else
> +			return -EAGAIN;
> +	}
>  
>  	/* Let the mmu-notifier know that we have begun and need cancellation */
>  	ret = __i915_gem_userptr_set_active(obj, true);
> -- 
> 2.8.1
>
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/intel-gfx
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH 08/10] drm/i915: Track display alignment on VMA
  2016-08-15  9:38   ` Joonas Lahtinen
@ 2016-08-16  8:40     ` Chris Wilson
  0 siblings, 0 replies; 34+ messages in thread
From: Chris Wilson @ 2016-08-16  8:40 UTC (permalink / raw)
  To: Joonas Lahtinen; +Cc: intel-gfx

On Mon, Aug 15, 2016 at 12:38:42PM +0300, Joonas Lahtinen wrote:
> On pe, 2016-08-12 at 11:28 +0100, Chris Wilson wrote:
> > @@ -2984,17 +2983,10 @@ i915_vma_insert(struct i915_vma *vma, u64 size, u64 alignment, u64 flags)
> >  		size = i915_gem_get_ggtt_size(dev_priv, size,
> >  					      i915_gem_object_get_tiling(obj));
> >  
> > -	min_alignment =
> > -		i915_gem_get_ggtt_alignment(dev_priv, size,
> > -					    i915_gem_object_get_tiling(obj),
> > -					    flags & PIN_MAPPABLE);
> > -	if (alignment == 0)
> > -		alignment = min_alignment;
> > -	if (alignment & (min_alignment - 1)) {
> > -		DRM_DEBUG("Invalid object alignment requested %llu, minimum %llu\n",
> > -			  alignment, min_alignment);
> > -		return -EINVAL;
> > -	}
> > +	alignment = max(max(alignment, vma->display_alignment),
> > +			i915_gem_get_ggtt_alignment(dev_priv, size,
> > +						    i915_gem_object_get_tiling(obj),
> > +						    flags & PIN_MAPPABLE));
> 
> No DRM_DEBUG no more?

The invalid uabi alignment is caught by execbuf and here instead of
treating it as an error we just overrule the caller with our minimum
requirements.
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 34+ messages in thread

end of thread, other threads:[~2016-08-16  8:40 UTC | newest]

Thread overview: 34+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2016-08-12 10:28 Partial VMA fixes Chris Wilson
2016-08-12 10:28 ` [PATCH 01/10] drm/i915: Move map-and-fenceable tracking to the VMA Chris Wilson
2016-08-15  8:03   ` Joonas Lahtinen
2016-08-15  8:14     ` Chris Wilson
2016-08-12 10:28 ` [PATCH 02/10] drm/i915/userptr: Make gup errors stickier Chris Wilson
2016-08-15 11:16   ` Joonas Lahtinen
2016-08-15 15:08   ` Mika Kuoppala
2016-08-15 15:28     ` Chris Wilson
2016-08-16  7:40   ` Mika Kuoppala
2016-08-12 10:28 ` [PATCH 03/10] drm/i915: Move fence tracking from object to vma Chris Wilson
2016-08-15  9:18   ` Joonas Lahtinen
2016-08-15  9:25     ` Chris Wilson
2016-08-15 10:16       ` Joonas Lahtinen
2016-08-15  9:52     ` Chris Wilson
2016-08-12 10:28 ` [PATCH 04/10] drm/i915: Choose partial chunksize based on tile row size Chris Wilson
2016-08-12 10:38   ` Joonas Lahtinen
2016-08-12 10:28 ` [PATCH 05/10] drm/i915: Fix partial GGTT faulting Chris Wilson
2016-08-15  9:29   ` Joonas Lahtinen
2016-08-12 10:28 ` [PATCH 06/10] drm/i915: Choose not to evict faultable objects from the GGTT Chris Wilson
2016-08-12 10:50   ` Joonas Lahtinen
2016-08-12 11:13     ` Chris Wilson
2016-08-15 10:20       ` Joonas Lahtinen
2016-08-12 10:28 ` [PATCH 07/10] drm/i915: Fallback to using unmappable memory for scanout Chris Wilson
2016-08-15  9:33   ` Joonas Lahtinen
2016-08-12 10:28 ` [PATCH 08/10] drm/i915: Track display alignment on VMA Chris Wilson
2016-08-15  9:38   ` Joonas Lahtinen
2016-08-16  8:40     ` Chris Wilson
2016-08-12 10:28 ` [PATCH 09/10] drm/i915: Bump the inactive MRU tracking for all VMA accessed Chris Wilson
2016-08-15  9:59   ` Joonas Lahtinen
2016-08-15 10:12     ` Chris Wilson
2016-08-15 11:10       ` Joonas Lahtinen
2016-08-12 10:28 ` [PATCH 10/10] drm/i915: Stop discarding GTT cache-domain on unbind vma Chris Wilson
2016-08-15 10:03   ` Joonas Lahtinen
2016-08-12 10:33 ` ✗ Ro.CI.BAT: failure for series starting with [01/10] drm/i915: Move map-and-fenceable tracking to the VMA Patchwork

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.