All of lore.kernel.org
 help / color / mirror / Atom feed
* Tracking VMA
@ 2016-06-03 16:55 Chris Wilson
  2016-06-03 16:55 ` [PATCH 01/38] drm/i915: Combine loops within i915_gem_evict_something Chris Wilson
                   ` (38 more replies)
  0 siblings, 39 replies; 58+ messages in thread
From: Chris Wilson @ 2016-06-03 16:55 UTC (permalink / raw)
  To: intel-gfx

One issue with the current VMA api is that callers do not take ownership
of the VMA they pin for their use, and corresponding never explicitly
unpin it. Being able to track the VMA they are using, imo, allows for
simpler code that is more easily verified (and is faster and more
accurate - less guessing over state).

However, at the start are patches to take advantage of lockless request
lookups.
-Chris

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 58+ messages in thread

* [PATCH 01/38] drm/i915: Combine loops within i915_gem_evict_something
  2016-06-03 16:55 Tracking VMA Chris Wilson
@ 2016-06-03 16:55 ` Chris Wilson
  2016-06-03 16:55 ` [PATCH 02/38] drm/i915: Remove surplus drm_device parameter to i915_gem_evict_something() Chris Wilson
                   ` (37 subsequent siblings)
  38 siblings, 0 replies; 58+ messages in thread
From: Chris Wilson @ 2016-06-03 16:55 UTC (permalink / raw)
  To: intel-gfx

Slight micro-optimise to produce combine loops so that gcc is able to
optimise the inner-loops concisely. Since we are reviewing the loops, we
can update the comments to describe the current state of affairs, in
particular the distinction between evicting from the global GTT (which
may contain untracked items and transient global pins) and the
per-process GTT.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_gem_evict.c | 139 +++++++++++++++++-----------------
 1 file changed, 69 insertions(+), 70 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c b/drivers/gpu/drm/i915/i915_gem_evict.c
index 2a9adc802e85..677297bf970e 100644
--- a/drivers/gpu/drm/i915/i915_gem_evict.c
+++ b/drivers/gpu/drm/i915/i915_gem_evict.c
@@ -63,6 +63,18 @@ static int switch_to_pinned_context(struct drm_i915_private *dev_priv)
 	return 0;
 }
 
+static bool
+gpu_is_idle(struct drm_i915_private *dev_priv)
+{
+	struct intel_engine_cs *engine;
+
+	for_each_engine(engine, dev_priv) {
+		if (!list_empty(&engine->request_list))
+			return false;
+	}
+
+	return true;
+}
 
 static bool
 mark_free(struct i915_vma *vma, struct list_head *unwind)
@@ -107,37 +119,32 @@ i915_gem_evict_something(struct drm_device *dev, struct i915_address_space *vm,
 			 unsigned long start, unsigned long end,
 			 unsigned flags)
 {
-	struct list_head eviction_list, unwind_list;
-	struct i915_vma *vma;
-	int ret = 0;
-	int pass = 0;
+	struct drm_i915_private *dev_priv = to_i915(dev);
+	struct list_head eviction_list;
+	struct list_head *phases[] = {
+		&vm->inactive_list,
+		&vm->active_list,
+		NULL,
+	}, **phase;
+	struct i915_vma *vma, *next;
+	int ret;
 
 	trace_i915_gem_evict(dev, min_size, alignment, flags);
 
 	/*
 	 * The goal is to evict objects and amalgamate space in LRU order.
 	 * The oldest idle objects reside on the inactive list, which is in
-	 * retirement order. The next objects to retire are those on the (per
-	 * ring) active list that do not have an outstanding flush. Once the
-	 * hardware reports completion (the seqno is updated after the
-	 * batchbuffer has been finished) the clean buffer objects would
-	 * be retired to the inactive list. Any dirty objects would be added
-	 * to the tail of the flushing list. So after processing the clean
-	 * active objects we need to emit a MI_FLUSH to retire the flushing
-	 * list, hence the retirement order of the flushing list is in
-	 * advance of the dirty objects on the active lists.
+	 * retirement order. The next objects to retire are those in flight,
+	 * on the active list, again in retirement order.
 	 *
 	 * The retirement sequence is thus:
 	 *   1. Inactive objects (already retired)
-	 *   2. Clean active objects
-	 *   3. Flushing list
-	 *   4. Dirty active objects.
+	 *   2. Active objects (will stall on unbinding)
 	 *
 	 * On each list, the oldest objects lie at the HEAD with the freshest
 	 * object on the TAIL.
 	 */
-
-	INIT_LIST_HEAD(&unwind_list);
+	INIT_LIST_HEAD(&eviction_list);
 	if (start != 0 || end != vm->total) {
 		drm_mm_init_scan_with_range(&vm->mm, min_size,
 					    alignment, cache_level,
@@ -145,26 +152,20 @@ i915_gem_evict_something(struct drm_device *dev, struct i915_address_space *vm,
 	} else
 		drm_mm_init_scan(&vm->mm, min_size, alignment, cache_level);
 
-search_again:
-	/* First see if there is a large enough contiguous idle region... */
-	list_for_each_entry(vma, &vm->inactive_list, vm_link) {
-		if (mark_free(vma, &unwind_list))
-			goto found;
-	}
-
 	if (flags & PIN_NONBLOCK)
-		goto none;
+		phases[1] = NULL;
 
-	/* Now merge in the soon-to-be-expired objects... */
-	list_for_each_entry(vma, &vm->active_list, vm_link) {
-		if (mark_free(vma, &unwind_list))
-			goto found;
-	}
+search_again:
+	phase = phases;
+	do {
+		list_for_each_entry(vma, *phase, vm_link)
+			if (mark_free(vma, &eviction_list))
+				goto found;
+	} while (*++phase);
 
-none:
 	/* Nothing found, clean up and bail out! */
-	while (!list_empty(&unwind_list)) {
-		vma = list_first_entry(&unwind_list,
+	while (!list_empty(&eviction_list)) {
+		vma = list_first_entry(&eviction_list,
 				       struct i915_vma,
 				       exec_list);
 		ret = drm_mm_scan_remove_block(&vma->node);
@@ -174,50 +175,50 @@ none:
 	}
 
 	/* Can we unpin some objects such as idle hw contents,
-	 * or pending flips?
+	 * or pending flips? But since only the GGTT has global entries
+	 * such as scanouts, rinbuffers and contexts, we can skip the
+	 * purge when inspecting per-process local address spaces.
 	 */
-	if (flags & PIN_NONBLOCK)
+	if (!i915_is_ggtt(vm) || flags & PIN_NONBLOCK)
 		return -ENOSPC;
 
-	/* Only idle the GPU and repeat the search once */
-	if (pass++ == 0) {
-		struct drm_i915_private *dev_priv = to_i915(dev);
-
-		if (i915_is_ggtt(vm)) {
-			ret = switch_to_pinned_context(dev_priv);
-			if (ret)
-				return ret;
-		}
-
-		ret = i915_gem_wait_for_idle(dev_priv);
-		if (ret)
-			return ret;
-
-		i915_gem_retire_requests(dev_priv);
-		goto search_again;
+	if (gpu_is_idle(dev_priv)) {
+		/* If we still have pending pageflip completions, drop
+		 * back to userspace to give our workqueues time to
+		 * acquire our locks and unpin the old scanouts.
+		 */
+		return intel_has_pending_fb_unpin(dev) ? -EAGAIN : -ENOSPC;
 	}
 
-	/* If we still have pending pageflip completions, drop
-	 * back to userspace to give our workqueues time to
-	 * acquire our locks and unpin the old scanouts.
+	/* Not everything in the GGTT is tracked via vma (otherwise we
+	 * could evict as required with minimal stalling) so we are forced
+	 * to idle the GPU and explicitly retire outstanding requests in
+	 * the hopes that we can then remove contexts and the like only
+	 * bound by their active reference.
 	 */
-	return intel_has_pending_fb_unpin(dev) ? -EAGAIN : -ENOSPC;
+	ret = switch_to_pinned_context(dev_priv);
+	if (ret)
+		return ret;
+
+	ret = i915_gem_wait_for_idle(dev_priv);
+	if (ret)
+		return ret;
+
+	i915_gem_retire_requests(dev_priv);
+	goto search_again;
 
 found:
 	/* drm_mm doesn't allow any other other operations while
-	 * scanning, therefore store to be evicted objects on a
-	 * temporary list. */
-	INIT_LIST_HEAD(&eviction_list);
-	while (!list_empty(&unwind_list)) {
-		vma = list_first_entry(&unwind_list,
-				       struct i915_vma,
-				       exec_list);
-		if (drm_mm_scan_remove_block(&vma->node)) {
+	 * scanning, therefore store to-be-evicted objects on a
+	 * temporary list and take a reference for all before
+	 * calling unbind (which may remove the active reference
+	 * of any of our objects, thus corrupting the list).
+	 */
+	list_for_each_entry_safe(vma, next, &eviction_list, exec_list) {
+		if (drm_mm_scan_remove_block(&vma->node))
 			vma->pin_count++;
-			list_move(&vma->exec_list, &eviction_list);
-			continue;
-		}
-		list_del_init(&vma->exec_list);
+		else
+			list_del_init(&vma->exec_list);
 	}
 
 	/* Unbinding will emit any required flushes */
@@ -231,7 +232,6 @@ found:
 		if (ret == 0)
 			ret = i915_vma_unbind(vma);
 	}
-
 	return ret;
 }
 
@@ -310,7 +310,6 @@ int i915_gem_evict_vm(struct i915_address_space *vm, bool do_idle)
 			return ret;
 
 		i915_gem_retire_requests(dev_priv);
-
 		WARN_ON(!list_empty(&vm->active_list));
 	}
 
-- 
2.8.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 58+ messages in thread

* [PATCH 02/38] drm/i915: Remove surplus drm_device parameter to i915_gem_evict_something()
  2016-06-03 16:55 Tracking VMA Chris Wilson
  2016-06-03 16:55 ` [PATCH 01/38] drm/i915: Combine loops within i915_gem_evict_something Chris Wilson
@ 2016-06-03 16:55 ` Chris Wilson
  2016-06-03 16:55 ` [PATCH 03/38] drm/i915: Double check the active status on the batch pool Chris Wilson
                   ` (36 subsequent siblings)
  38 siblings, 0 replies; 58+ messages in thread
From: Chris Wilson @ 2016-06-03 16:55 UTC (permalink / raw)
  To: intel-gfx

Eviction is VM local, so we can ignore the significance of the
drm_device in the caller, and leave it to i915_gem_evict_something() to
manager itself.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_drv.h       |  3 +--
 drivers/gpu/drm/i915/i915_gem.c       |  2 +-
 drivers/gpu/drm/i915/i915_gem_evict.c |  9 ++++-----
 drivers/gpu/drm/i915/i915_gem_gtt.c   |  2 +-
 drivers/gpu/drm/i915/i915_trace.h     | 12 +++++++-----
 5 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 09999ebf1a70..09f6f0eecd96 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -3291,8 +3291,7 @@ int i915_gem_context_reset_stats_ioctl(struct drm_device *dev, void *data,
 				       struct drm_file *file);
 
 /* i915_gem_evict.c */
-int __must_check i915_gem_evict_something(struct drm_device *dev,
-					  struct i915_address_space *vm,
+int __must_check i915_gem_evict_something(struct i915_address_space *vm,
 					  int min_size,
 					  unsigned alignment,
 					  unsigned cache_level,
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index e6c46f2d08e7..a7aa465cb76d 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2802,7 +2802,7 @@ search_free:
 							  search_flag,
 							  alloc_flag);
 		if (ret) {
-			ret = i915_gem_evict_something(dev, vm, size, alignment,
+			ret = i915_gem_evict_something(vm, size, alignment,
 						       obj->cache_level,
 						       start, end,
 						       flags);
diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c b/drivers/gpu/drm/i915/i915_gem_evict.c
index 677297bf970e..09e9078f5856 100644
--- a/drivers/gpu/drm/i915/i915_gem_evict.c
+++ b/drivers/gpu/drm/i915/i915_gem_evict.c
@@ -91,7 +91,6 @@ mark_free(struct i915_vma *vma, struct list_head *unwind)
 
 /**
  * i915_gem_evict_something - Evict vmas to make room for binding a new one
- * @dev: drm_device
  * @vm: address space to evict from
  * @min_size: size of the desired free space
  * @alignment: alignment constraint of the desired free space
@@ -114,12 +113,12 @@ mark_free(struct i915_vma *vma, struct list_head *unwind)
  * memory in e.g. the shrinker.
  */
 int
-i915_gem_evict_something(struct drm_device *dev, struct i915_address_space *vm,
+i915_gem_evict_something(struct i915_address_space *vm,
 			 int min_size, unsigned alignment, unsigned cache_level,
 			 unsigned long start, unsigned long end,
 			 unsigned flags)
 {
-	struct drm_i915_private *dev_priv = to_i915(dev);
+	struct drm_i915_private *dev_priv = to_i915(vm->dev);
 	struct list_head eviction_list;
 	struct list_head *phases[] = {
 		&vm->inactive_list,
@@ -129,7 +128,7 @@ i915_gem_evict_something(struct drm_device *dev, struct i915_address_space *vm,
 	struct i915_vma *vma, *next;
 	int ret;
 
-	trace_i915_gem_evict(dev, min_size, alignment, flags);
+	trace_i915_gem_evict(vm, min_size, alignment, flags);
 
 	/*
 	 * The goal is to evict objects and amalgamate space in LRU order.
@@ -187,7 +186,7 @@ search_again:
 		 * back to userspace to give our workqueues time to
 		 * acquire our locks and unpin the old scanouts.
 		 */
-		return intel_has_pending_fb_unpin(dev) ? -EAGAIN : -ENOSPC;
+		return intel_has_pending_fb_unpin(vm->dev) ? -EAGAIN : -ENOSPC;
 	}
 
 	/* Not everything in the GGTT is tracked via vma (otherwise we
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 9db542f761f7..fb2dd65b16e6 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -2029,7 +2029,7 @@ alloc:
 						  0, ggtt->base.total,
 						  DRM_MM_TOPDOWN);
 	if (ret == -ENOSPC && !retried) {
-		ret = i915_gem_evict_something(dev, &ggtt->base,
+		ret = i915_gem_evict_something(&ggtt->base,
 					       GEN6_PD_SIZE, GEN6_PD_ALIGN,
 					       I915_CACHE_NONE,
 					       0, ggtt->base.total,
diff --git a/drivers/gpu/drm/i915/i915_trace.h b/drivers/gpu/drm/i915/i915_trace.h
index e7b3e6e4f4a4..e20355d447db 100644
--- a/drivers/gpu/drm/i915/i915_trace.h
+++ b/drivers/gpu/drm/i915/i915_trace.h
@@ -394,25 +394,27 @@ DEFINE_EVENT(i915_gem_object, i915_gem_object_destroy,
 );
 
 TRACE_EVENT(i915_gem_evict,
-	    TP_PROTO(struct drm_device *dev, u32 size, u32 align, unsigned flags),
-	    TP_ARGS(dev, size, align, flags),
+	    TP_PROTO(struct i915_address_space *vm, u32 size, u32 align, unsigned flags),
+	    TP_ARGS(vm, size, align, flags),
 
 	    TP_STRUCT__entry(
 			     __field(u32, dev)
+			     __field(struct i915_address_space *, vm)
 			     __field(u32, size)
 			     __field(u32, align)
 			     __field(unsigned, flags)
 			    ),
 
 	    TP_fast_assign(
-			   __entry->dev = dev->primary->index;
+			   __entry->dev = vm->dev->primary->index;
+			   __entry->vm = vm;
 			   __entry->size = size;
 			   __entry->align = align;
 			   __entry->flags = flags;
 			  ),
 
-	    TP_printk("dev=%d, size=%d, align=%d %s",
-		      __entry->dev, __entry->size, __entry->align,
+	    TP_printk("dev=%d, vm=%p, size=%d, align=%d %s",
+		      __entry->dev, __entry->vm, __entry->size, __entry->align,
 		      __entry->flags & PIN_MAPPABLE ? ", mappable" : "")
 );
 
-- 
2.8.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 58+ messages in thread

* [PATCH 03/38] drm/i915: Double check the active status on the batch pool
  2016-06-03 16:55 Tracking VMA Chris Wilson
  2016-06-03 16:55 ` [PATCH 01/38] drm/i915: Combine loops within i915_gem_evict_something Chris Wilson
  2016-06-03 16:55 ` [PATCH 02/38] drm/i915: Remove surplus drm_device parameter to i915_gem_evict_something() Chris Wilson
@ 2016-06-03 16:55 ` Chris Wilson
  2016-06-03 16:55 ` [PATCH 04/38] drm/i915: Remove request retirement before each batch Chris Wilson
                   ` (35 subsequent siblings)
  38 siblings, 0 replies; 58+ messages in thread
From: Chris Wilson @ 2016-06-03 16:55 UTC (permalink / raw)
  To: intel-gfx

We should not rely on obj->active being uptodate unless we manually
flush it. Instead, we can verify that the next available batch object is
idle by looking at its last active request (and checking it for
completion).

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_gem_batch_pool.c | 15 ++++++++-------
 drivers/gpu/drm/i915/i915_gem_batch_pool.h |  7 +++++--
 drivers/gpu/drm/i915/intel_lrc.c           |  2 +-
 drivers/gpu/drm/i915/intel_ringbuffer.c    |  2 +-
 4 files changed, 15 insertions(+), 11 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_batch_pool.c b/drivers/gpu/drm/i915/i915_gem_batch_pool.c
index 3507b2753fd3..bd646e259012 100644
--- a/drivers/gpu/drm/i915/i915_gem_batch_pool.c
+++ b/drivers/gpu/drm/i915/i915_gem_batch_pool.c
@@ -41,15 +41,15 @@
 
 /**
  * i915_gem_batch_pool_init() - initialize a batch buffer pool
- * @dev: the drm device
+ * @engine: the associated request submission engine
  * @pool: the batch buffer pool
  */
-void i915_gem_batch_pool_init(struct drm_device *dev,
+void i915_gem_batch_pool_init(struct intel_engine_cs *engine,
 			      struct i915_gem_batch_pool *pool)
 {
 	int n;
 
-	pool->dev = dev;
+	pool->engine = engine;
 
 	for (n = 0; n < ARRAY_SIZE(pool->cache_list); n++)
 		INIT_LIST_HEAD(&pool->cache_list[n]);
@@ -65,7 +65,7 @@ void i915_gem_batch_pool_fini(struct i915_gem_batch_pool *pool)
 {
 	int n;
 
-	WARN_ON(!mutex_is_locked(&pool->dev->struct_mutex));
+	lockdep_assert_held(&pool->engine->i915->dev->struct_mutex);
 
 	for (n = 0; n < ARRAY_SIZE(pool->cache_list); n++) {
 		while (!list_empty(&pool->cache_list[n])) {
@@ -102,7 +102,7 @@ i915_gem_batch_pool_get(struct i915_gem_batch_pool *pool,
 	struct list_head *list;
 	int n;
 
-	WARN_ON(!mutex_is_locked(&pool->dev->struct_mutex));
+	lockdep_assert_held(&pool->engine->i915->dev->struct_mutex);
 
 	/* Compute a power-of-two bucket, but throw everything greater than
 	 * 16KiB into the same bucket: i.e. the the buckets hold objects of
@@ -115,7 +115,8 @@ i915_gem_batch_pool_get(struct i915_gem_batch_pool *pool,
 
 	list_for_each_entry_safe(tmp, next, list, batch_pool_link) {
 		/* The batches are strictly LRU ordered */
-		if (tmp->active)
+		if (!i915_gem_active_is_idle(&tmp->last_read[pool->engine->id],
+					  &tmp->base.dev->struct_mutex))
 			break;
 
 		/* While we're looping, do some clean up */
@@ -134,7 +135,7 @@ i915_gem_batch_pool_get(struct i915_gem_batch_pool *pool,
 	if (obj == NULL) {
 		int ret;
 
-		obj = i915_gem_object_create(pool->dev, size);
+		obj = i915_gem_object_create(pool->engine->i915->dev, size);
 		if (IS_ERR(obj))
 			return obj;
 
diff --git a/drivers/gpu/drm/i915/i915_gem_batch_pool.h b/drivers/gpu/drm/i915/i915_gem_batch_pool.h
index 848e90703eed..7fd4df0a29fe 100644
--- a/drivers/gpu/drm/i915/i915_gem_batch_pool.h
+++ b/drivers/gpu/drm/i915/i915_gem_batch_pool.h
@@ -27,13 +27,16 @@
 
 #include "i915_drv.h"
 
+struct drm_device;
+struct intel_engine_cs;
+
 struct i915_gem_batch_pool {
-	struct drm_device *dev;
+	struct intel_engine_cs *engine;
 	struct list_head cache_list[4];
 };
 
 /* i915_gem_batch_pool.c */
-void i915_gem_batch_pool_init(struct drm_device *dev,
+void i915_gem_batch_pool_init(struct intel_engine_cs *engine,
 			      struct i915_gem_batch_pool *pool);
 void i915_gem_batch_pool_fini(struct i915_gem_batch_pool *pool);
 struct drm_i915_gem_object*
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 69fca2f27f8b..964108cbb9c0 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -1798,7 +1798,7 @@ logical_ring_setup(struct drm_device *dev, enum intel_engine_id id)
 	logical_ring_default_irqs(engine, info->irq_shift);
 
 	intel_engine_init_hangcheck(engine);
-	i915_gem_batch_pool_init(dev, &engine->batch_pool);
+	i915_gem_batch_pool_init(engine, &engine->batch_pool);
 
 	return engine;
 }
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index 33d2c019576e..d63e4fdc60de 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -2060,7 +2060,7 @@ static int intel_init_engine(struct drm_device *dev,
 	INIT_LIST_HEAD(&engine->request_list);
 	INIT_LIST_HEAD(&engine->execlist_queue);
 	INIT_LIST_HEAD(&engine->buffers);
-	i915_gem_batch_pool_init(dev, &engine->batch_pool);
+	i915_gem_batch_pool_init(engine, &engine->batch_pool);
 	memset(engine->semaphore.sync_seqno, 0,
 	       sizeof(engine->semaphore.sync_seqno));
 
-- 
2.8.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 58+ messages in thread

* [PATCH 04/38] drm/i915: Remove request retirement before each batch
  2016-06-03 16:55 Tracking VMA Chris Wilson
                   ` (2 preceding siblings ...)
  2016-06-03 16:55 ` [PATCH 03/38] drm/i915: Double check the active status on the batch pool Chris Wilson
@ 2016-06-03 16:55 ` Chris Wilson
  2016-06-06 13:40   ` Mika Kuoppala
  2016-06-03 16:55 ` [PATCH 05/38] drm/i915: Remove i915_gem_execbuffer_retire_commands() Chris Wilson
                   ` (34 subsequent siblings)
  38 siblings, 1 reply; 58+ messages in thread
From: Chris Wilson @ 2016-06-03 16:55 UTC (permalink / raw)
  To: intel-gfx

This reimplements the denial-of-service protection against igt from

commit 227f782e4667fc622810bce8be8ccdeee45f89c2
Author: Chris Wilson <chris@chris-wilson.co.uk>
Date:   Thu May 15 10:41:42 2014 +0100

    drm/i915: Retire requests before creating a new one

and transfers the stall from before each batch into get_pages().
The issue is that the stall is increasing latency between batches which
is detrimental in some cases (especially coupled with execlists) to
keeping the GPU well fed. Also we have made the observation that retiring
requests can of itself free objects (and requests) and therefore makes
a good first step when shrinking.

v2: Recycle objects prior to i915_gem_object_get_pages()

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_drv.h            | 1 -
 drivers/gpu/drm/i915/i915_gem.c            | 9 ++++++---
 drivers/gpu/drm/i915/i915_gem_execbuffer.c | 2 --
 drivers/gpu/drm/i915/i915_gem_request.c    | 2 +-
 4 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 09f6f0eecd96..a065325580d8 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -3065,7 +3065,6 @@ struct drm_i915_gem_request *
 i915_gem_find_active_request(struct intel_engine_cs *engine);
 
 void i915_gem_retire_requests(struct drm_i915_private *dev_priv);
-void i915_gem_retire_requests_ring(struct intel_engine_cs *engine);
 
 static inline u32 i915_reset_counter(struct i915_gpu_error *error)
 {
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index a7aa465cb76d..19b8d2ea7698 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -1989,8 +1989,7 @@ err_pages:
 int
 i915_gem_object_get_pages(struct drm_i915_gem_object *obj)
 {
-	struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
-	const struct drm_i915_gem_object_ops *ops = obj->ops;
+	struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
 	int ret;
 
 	if (obj->pages)
@@ -2003,7 +2002,10 @@ i915_gem_object_get_pages(struct drm_i915_gem_object *obj)
 
 	BUG_ON(obj->pages_pin_count);
 
-	ret = ops->get_pages(obj);
+	/* Recycle as many active objects as possible first */
+	i915_gem_retire_requests(dev_priv);
+
+	ret = obj->ops->get_pages(obj);
 	if (ret)
 		return ret;
 
@@ -4161,6 +4163,7 @@ i915_gem_cleanup_engines(struct drm_device *dev)
 static void
 init_engine_lists(struct intel_engine_cs *engine)
 {
+	/* Early initialisation so that core GEM works during engine setup */
 	INIT_LIST_HEAD(&engine->request_list);
 }
 
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 7b381358512e..1b19a36adedc 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -751,8 +751,6 @@ i915_gem_execbuffer_reserve(struct intel_engine_cs *engine,
 	bool has_fenced_gpu_access = INTEL_GEN(engine->i915) < 4;
 	int retry;
 
-	i915_gem_retire_requests_ring(engine);
-
 	vm = list_first_entry(vmas, struct i915_vma, exec_list)->vm;
 
 	INIT_LIST_HEAD(&ordered_vmas);
diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c
index 38e5daecd8f5..59afc8e547c4 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.c
+++ b/drivers/gpu/drm/i915/i915_gem_request.c
@@ -734,7 +734,7 @@ int i915_wait_request(struct drm_i915_gem_request *req)
 	return 0;
 }
 
-void i915_gem_retire_requests_ring(struct intel_engine_cs *engine)
+static void i915_gem_retire_requests_ring(struct intel_engine_cs *engine)
 {
 	struct drm_i915_gem_request *request, *next;
 
-- 
2.8.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 58+ messages in thread

* [PATCH 05/38] drm/i915: Remove i915_gem_execbuffer_retire_commands()
  2016-06-03 16:55 Tracking VMA Chris Wilson
                   ` (3 preceding siblings ...)
  2016-06-03 16:55 ` [PATCH 04/38] drm/i915: Remove request retirement before each batch Chris Wilson
@ 2016-06-03 16:55 ` Chris Wilson
  2016-06-06 14:26   ` Mika Kuoppala
  2016-06-03 16:55 ` [PATCH 06/38] drm/i915: Pad GTT views of exec objects up to user specified size Chris Wilson
                   ` (33 subsequent siblings)
  38 siblings, 1 reply; 58+ messages in thread
From: Chris Wilson @ 2016-06-03 16:55 UTC (permalink / raw)
  To: intel-gfx

Move the single line to the callsite as the name is now misleading, and
the purpose is solely to add the request to the execution queue.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_gem_execbuffer.c | 9 +--------
 1 file changed, 1 insertion(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 1b19a36adedc..40937a09855d 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -1177,13 +1177,6 @@ i915_gem_execbuffer_move_to_active(struct list_head *vmas,
 	}
 }
 
-static void
-i915_gem_execbuffer_retire_commands(struct i915_execbuffer_params *params)
-{
-	/* Add a breadcrumb for the completion of the batch buffer */
-	__i915_add_request(params->request, params->batch_obj, true);
-}
-
 static int
 i915_reset_gen7_sol_offsets(struct drm_i915_gem_request *req)
 {
@@ -1677,7 +1670,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
 
 	ret = execbuf_submit(params, args, &eb->vmas);
 err_request:
-	i915_gem_execbuffer_retire_commands(params);
+	__i915_add_request(params->request, params->batch_obj, ret == 0);
 
 err_batch_unpin:
 	/*
-- 
2.8.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 58+ messages in thread

* [PATCH 06/38] drm/i915: Pad GTT views of exec objects up to user specified size
  2016-06-03 16:55 Tracking VMA Chris Wilson
                   ` (4 preceding siblings ...)
  2016-06-03 16:55 ` [PATCH 05/38] drm/i915: Remove i915_gem_execbuffer_retire_commands() Chris Wilson
@ 2016-06-03 16:55 ` Chris Wilson
  2016-06-08  9:41   ` Daniel Vetter
  2016-06-03 16:55 ` [PATCH 07/38] drm/i915: Split insertion/binding of an object into the VM Chris Wilson
                   ` (32 subsequent siblings)
  38 siblings, 1 reply; 58+ messages in thread
From: Chris Wilson @ 2016-06-03 16:55 UTC (permalink / raw)
  To: intel-gfx

Our GPUs impose certain requirements upon buffers that depend upon how
exactly they are used. Typically this is expressed as that they require
a larger surface than would be naively computed by pitch * height.
Normally such requirements are hidden away in the userspace driver, but
when we accept pointers from strangers and later impose extra conditions
on them, the original client allocator has no idea about the
monstrosities in the GPU and we require the userspace driver to inform
the kernel how many padding pages are required beyond the client
allocation.

v2: Long time, no see
v3: Try an anonymous union for uapi struct compatability

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 drivers/gpu/drm/i915/i915_drv.h            |  6 ++-
 drivers/gpu/drm/i915/i915_gem.c            | 82 +++++++++++++++---------------
 drivers/gpu/drm/i915/i915_gem_execbuffer.c | 16 +++++-
 include/uapi/drm/i915_drm.h                |  8 ++-
 4 files changed, 65 insertions(+), 47 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index a065325580d8..9520adba33f6 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2945,11 +2945,13 @@ void i915_gem_free_object(struct drm_gem_object *obj);
 int __must_check
 i915_gem_object_pin(struct drm_i915_gem_object *obj,
 		    struct i915_address_space *vm,
+		    uint64_t size,
 		    uint32_t alignment,
 		    uint64_t flags);
 int __must_check
 i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj,
 			 const struct i915_ggtt_view *view,
+			 uint64_t size,
 			 uint32_t alignment,
 			 uint64_t flags);
 
@@ -3209,8 +3211,8 @@ i915_gem_obj_ggtt_pin(struct drm_i915_gem_object *obj,
 	struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
 	struct i915_ggtt *ggtt = &dev_priv->ggtt;
 
-	return i915_gem_object_pin(obj, &ggtt->base,
-				   alignment, flags | PIN_GLOBAL);
+	return i915_gem_object_pin(obj, &ggtt->base, 0, alignment,
+				   flags | PIN_GLOBAL);
 }
 
 void i915_gem_object_ggtt_unpin_view(struct drm_i915_gem_object *obj,
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 19b8d2ea7698..0f0101300b2b 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -1438,7 +1438,7 @@ int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 	}
 
 	/* Now pin it into the GTT if needed */
-	ret = i915_gem_object_ggtt_pin(obj, &view, 0, PIN_MAPPABLE);
+	ret = i915_gem_object_ggtt_pin(obj, &view, 0, 0, PIN_MAPPABLE);
 	if (ret)
 		goto unlock;
 
@@ -2678,21 +2678,20 @@ static struct i915_vma *
 i915_gem_object_bind_to_vm(struct drm_i915_gem_object *obj,
 			   struct i915_address_space *vm,
 			   const struct i915_ggtt_view *ggtt_view,
+			   uint64_t size,
 			   unsigned alignment,
 			   uint64_t flags)
 {
 	struct drm_device *dev = obj->base.dev;
 	struct drm_i915_private *dev_priv = to_i915(dev);
-	struct i915_ggtt *ggtt = &dev_priv->ggtt;
-	u32 fence_alignment, unfenced_alignment;
-	u32 search_flag, alloc_flag;
 	u64 start, end;
-	u64 size, fence_size;
+	u32 search_flag, alloc_flag;
 	struct i915_vma *vma;
 	int ret;
 
 	if (i915_is_ggtt(vm)) {
-		u32 view_size;
+		u32 fence_size, fence_alignment, unfenced_alignment;
+		u64 view_size;
 
 		if (WARN_ON(!ggtt_view))
 			return ERR_PTR(-EINVAL);
@@ -2710,48 +2709,39 @@ i915_gem_object_bind_to_vm(struct drm_i915_gem_object *obj,
 								view_size,
 								obj->tiling_mode,
 								false);
-		size = flags & PIN_MAPPABLE ? fence_size : view_size;
+		size = max(size, view_size);
+		if (flags & PIN_MAPPABLE)
+			size = max_t(u64, size, fence_size);
+
+		if (alignment == 0)
+			alignment = flags & PIN_MAPPABLE ? fence_alignment :
+				unfenced_alignment;
+		if (flags & PIN_MAPPABLE && alignment & (fence_alignment - 1)) {
+			DRM_DEBUG("Invalid object (view type=%u) alignment requested %u\n",
+				  ggtt_view ? ggtt_view->type : 0,
+				  alignment);
+			return ERR_PTR(-EINVAL);
+		}
 	} else {
-		fence_size = i915_gem_get_gtt_size(dev,
-						   obj->base.size,
-						   obj->tiling_mode);
-		fence_alignment = i915_gem_get_gtt_alignment(dev,
-							     obj->base.size,
-							     obj->tiling_mode,
-							     true);
-		unfenced_alignment =
-			i915_gem_get_gtt_alignment(dev,
-						   obj->base.size,
-						   obj->tiling_mode,
-						   false);
-		size = flags & PIN_MAPPABLE ? fence_size : obj->base.size;
+		size = max_t(u64, size, obj->base.size);
+		alignment = 4096;
 	}
 
 	start = flags & PIN_OFFSET_BIAS ? flags & PIN_OFFSET_MASK : 0;
 	end = vm->total;
 	if (flags & PIN_MAPPABLE)
-		end = min_t(u64, end, ggtt->mappable_end);
+		end = min_t(u64, end, dev_priv->ggtt.mappable_end);
 	if (flags & PIN_ZONE_4G)
 		end = min_t(u64, end, (1ULL << 32) - PAGE_SIZE);
 
-	if (alignment == 0)
-		alignment = flags & PIN_MAPPABLE ? fence_alignment :
-						unfenced_alignment;
-	if (flags & PIN_MAPPABLE && alignment & (fence_alignment - 1)) {
-		DRM_DEBUG("Invalid object (view type=%u) alignment requested %u\n",
-			  ggtt_view ? ggtt_view->type : 0,
-			  alignment);
-		return ERR_PTR(-EINVAL);
-	}
-
 	/* If binding the object/GGTT view requires more space than the entire
 	 * aperture has, reject it early before evicting everything in a vain
 	 * attempt to find space.
 	 */
 	if (size > end) {
-		DRM_DEBUG("Attempting to bind an object (view type=%u) larger than the aperture: size=%llu > %s aperture=%llu\n",
+		DRM_DEBUG("Attempting to bind an object (view type=%u) larger than the aperture: request=%llu [object=%zd] > %s aperture=%llu\n",
 			  ggtt_view ? ggtt_view->type : 0,
-			  size,
+			  size, obj->base.size,
 			  flags & PIN_MAPPABLE ? "mappable" : "total",
 			  end);
 		return ERR_PTR(-E2BIG);
@@ -3243,7 +3233,7 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
 	 * (e.g. libkms for the bootup splash), we have to ensure that we
 	 * always use map_and_fenceable for all scanout buffers.
 	 */
-	ret = i915_gem_object_ggtt_pin(obj, view, alignment,
+	ret = i915_gem_object_ggtt_pin(obj, view, 0, alignment,
 				       view->type == I915_GGTT_VIEW_NORMAL ?
 				       PIN_MAPPABLE : 0);
 	if (ret)
@@ -3393,12 +3383,17 @@ i915_gem_ring_throttle(struct drm_device *dev, struct drm_file *file)
 }
 
 static bool
-i915_vma_misplaced(struct i915_vma *vma, uint32_t alignment, uint64_t flags)
+i915_vma_misplaced(struct i915_vma *vma,
+		   uint64_t size,
+		   uint32_t alignment,
+		   uint64_t flags)
 {
 	struct drm_i915_gem_object *obj = vma->obj;
 
-	if (alignment &&
-	    vma->node.start & (alignment - 1))
+	if (vma->node.size < size)
+		return true;
+
+	if (alignment && vma->node.start & (alignment - 1))
 		return true;
 
 	if (flags & PIN_MAPPABLE && !obj->map_and_fenceable)
@@ -3442,6 +3437,7 @@ static int
 i915_gem_object_do_pin(struct drm_i915_gem_object *obj,
 		       struct i915_address_space *vm,
 		       const struct i915_ggtt_view *ggtt_view,
+		       uint64_t size,
 		       uint32_t alignment,
 		       uint64_t flags)
 {
@@ -3469,7 +3465,7 @@ i915_gem_object_do_pin(struct drm_i915_gem_object *obj,
 		if (WARN_ON(vma->pin_count == DRM_I915_GEM_OBJECT_MAX_PIN_COUNT))
 			return -EBUSY;
 
-		if (i915_vma_misplaced(vma, alignment, flags)) {
+		if (i915_vma_misplaced(vma, size, alignment, flags)) {
 			WARN(vma->pin_count,
 			     "bo is already pinned in %s with incorrect alignment:"
 			     " offset=%08x %08x, req.alignment=%x, req.map_and_fenceable=%d,"
@@ -3490,8 +3486,8 @@ i915_gem_object_do_pin(struct drm_i915_gem_object *obj,
 
 	bound = vma ? vma->bound : 0;
 	if (vma == NULL || !drm_mm_node_allocated(&vma->node)) {
-		vma = i915_gem_object_bind_to_vm(obj, vm, ggtt_view, alignment,
-						 flags);
+		vma = i915_gem_object_bind_to_vm(obj, vm, ggtt_view,
+						 size, alignment, flags);
 		if (IS_ERR(vma))
 			return PTR_ERR(vma);
 	} else {
@@ -3513,17 +3509,19 @@ i915_gem_object_do_pin(struct drm_i915_gem_object *obj,
 int
 i915_gem_object_pin(struct drm_i915_gem_object *obj,
 		    struct i915_address_space *vm,
+		    uint64_t size,
 		    uint32_t alignment,
 		    uint64_t flags)
 {
 	return i915_gem_object_do_pin(obj, vm,
 				      i915_is_ggtt(vm) ? &i915_ggtt_view_normal : NULL,
-				      alignment, flags);
+				      size, alignment, flags);
 }
 
 int
 i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj,
 			 const struct i915_ggtt_view *view,
+			 uint64_t size,
 			 uint32_t alignment,
 			 uint64_t flags)
 {
@@ -3534,7 +3532,7 @@ i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj,
 	BUG_ON(!view);
 
 	return i915_gem_object_do_pin(obj, &ggtt->base, view,
-				      alignment, flags | PIN_GLOBAL);
+				      size, alignment, flags | PIN_GLOBAL);
 }
 
 void
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 40937a09855d..c1e7ee212e7e 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -652,10 +652,14 @@ i915_gem_execbuffer_reserve_vma(struct i915_vma *vma,
 			flags |= PIN_HIGH;
 	}
 
-	ret = i915_gem_object_pin(obj, vma->vm, entry->alignment, flags);
+	ret = i915_gem_object_pin(obj, vma->vm,
+				  entry->pad_to_size,
+				  entry->alignment,
+				  flags);
 	if ((ret == -ENOSPC  || ret == -E2BIG) &&
 	    only_mappable_for_reloc(entry->flags))
 		ret = i915_gem_object_pin(obj, vma->vm,
+					  entry->pad_to_size,
 					  entry->alignment,
 					  flags & ~PIN_MAPPABLE);
 	if (ret)
@@ -718,6 +722,9 @@ eb_vma_misplaced(struct i915_vma *vma)
 	    vma->node.start & (entry->alignment - 1))
 		return true;
 
+	if (vma->node.size < entry->pad_to_size)
+		return true;
+
 	if (entry->flags & EXEC_OBJECT_PINNED &&
 	    vma->node.start != entry->offset)
 		return true;
@@ -1058,6 +1065,13 @@ validate_exec_list(struct drm_device *dev,
 		if (exec[i].alignment && !is_power_of_2(exec[i].alignment))
 			return -EINVAL;
 
+		/* pad_to_size was once a reserved field, so sanitize it */
+		if (exec[i].flags & EXEC_OBJECT_PAD_TO_SIZE) {
+			if (offset_in_page(exec[i].pad_to_size))
+				return -EINVAL;
+		} else
+			exec[i].pad_to_size = 0;
+
 		/* First check for malicious input causing overflow in
 		 * the worst case where we need to allocate the entire
 		 * relocation tree as a single array.
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index d6c668e58426..3b861746ba7a 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -701,10 +701,14 @@ struct drm_i915_gem_exec_object2 {
 #define EXEC_OBJECT_WRITE	(1<<2)
 #define EXEC_OBJECT_SUPPORTS_48B_ADDRESS (1<<3)
 #define EXEC_OBJECT_PINNED	(1<<4)
-#define __EXEC_OBJECT_UNKNOWN_FLAGS -(EXEC_OBJECT_PINNED<<1)
+#define EXEC_OBJECT_PAD_TO_SIZE	(1<<5)
+#define __EXEC_OBJECT_UNKNOWN_FLAGS -(EXEC_OBJECT_PAD_TO_SIZE<<1)
 	__u64 flags;
 
-	__u64 rsvd1;
+	union {
+		__u64 rsvd1;
+		__u64 pad_to_size;
+	};
 	__u64 rsvd2;
 };
 
-- 
2.8.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 58+ messages in thread

* [PATCH 07/38] drm/i915: Split insertion/binding of an object into the VM
  2016-06-03 16:55 Tracking VMA Chris Wilson
                   ` (5 preceding siblings ...)
  2016-06-03 16:55 ` [PATCH 06/38] drm/i915: Pad GTT views of exec objects up to user specified size Chris Wilson
@ 2016-06-03 16:55 ` Chris Wilson
  2016-06-03 16:55 ` [PATCH 08/38] drm/i915: Record allocated vma size Chris Wilson
                   ` (31 subsequent siblings)
  38 siblings, 0 replies; 58+ messages in thread
From: Chris Wilson @ 2016-06-03 16:55 UTC (permalink / raw)
  To: intel-gfx

Split the insertion into the address space's range manager and binding
of that object into the GTT to simplify the code flow when pinning a
VMA.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_gem.c | 33 +++++++++++++++------------------
 1 file changed, 15 insertions(+), 18 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 0f0101300b2b..cd54c290680d 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2675,12 +2675,12 @@ static bool i915_gem_valid_gtt_space(struct i915_vma *vma,
  * there.
  */
 static struct i915_vma *
-i915_gem_object_bind_to_vm(struct drm_i915_gem_object *obj,
-			   struct i915_address_space *vm,
-			   const struct i915_ggtt_view *ggtt_view,
-			   uint64_t size,
-			   unsigned alignment,
-			   uint64_t flags)
+i915_gem_object_insert_into_vm(struct drm_i915_gem_object *obj,
+			       struct i915_address_space *vm,
+			       const struct i915_ggtt_view *ggtt_view,
+			       uint64_t size,
+			       unsigned alignment,
+			       uint64_t flags)
 {
 	struct drm_device *dev = obj->base.dev;
 	struct drm_i915_private *dev_priv = to_i915(dev);
@@ -2809,11 +2809,6 @@ search_free:
 		goto err_remove_node;
 	}
 
-	trace_i915_vma_bind(vma, flags);
-	ret = i915_vma_bind(vma, obj->cache_level, flags);
-	if (ret)
-		goto err_remove_node;
-
 	list_move_tail(&obj->global_list, &dev_priv->mm.bound_list);
 	list_move_tail(&vma->vm_link, &vm->inactive_list);
 	obj->bind_count++;
@@ -3484,24 +3479,26 @@ i915_gem_object_do_pin(struct drm_i915_gem_object *obj,
 		}
 	}
 
-	bound = vma ? vma->bound : 0;
 	if (vma == NULL || !drm_mm_node_allocated(&vma->node)) {
-		vma = i915_gem_object_bind_to_vm(obj, vm, ggtt_view,
-						 size, alignment, flags);
+		vma = i915_gem_object_insert_into_vm(obj, vm, ggtt_view,
+						     size, alignment, flags);
 		if (IS_ERR(vma))
 			return PTR_ERR(vma);
-	} else {
-		ret = i915_vma_bind(vma, obj->cache_level, flags);
-		if (ret)
-			return ret;
 	}
 
+	bound = vma->bound;
+	ret = i915_vma_bind(vma, obj->cache_level, flags);
+	if (ret)
+		return ret;
+
 	if (ggtt_view && ggtt_view->type == I915_GGTT_VIEW_NORMAL &&
 	    (bound ^ vma->bound) & GLOBAL_BIND) {
 		__i915_vma_set_map_and_fenceable(vma);
 		WARN_ON(flags & PIN_MAPPABLE && !obj->map_and_fenceable);
 	}
 
+	GEM_BUG_ON(i915_vma_misplaced(vma, size, alignment, flags));
+
 	vma->pin_count++;
 	return 0;
 }
-- 
2.8.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 58+ messages in thread

* [PATCH 08/38] drm/i915: Record allocated vma size
  2016-06-03 16:55 Tracking VMA Chris Wilson
                   ` (6 preceding siblings ...)
  2016-06-03 16:55 ` [PATCH 07/38] drm/i915: Split insertion/binding of an object into the VM Chris Wilson
@ 2016-06-03 16:55 ` Chris Wilson
  2016-06-03 16:55 ` [PATCH 09/38] drm/i915: Start passing around i915_vma from execbuffer Chris Wilson
                   ` (30 subsequent siblings)
  38 siblings, 0 replies; 58+ messages in thread
From: Chris Wilson @ 2016-06-03 16:55 UTC (permalink / raw)
  To: intel-gfx

Tracking the size of the VMA as allocated allows us to dramatically
reduce the complexity of later functions (like inserting the VMA in to
the drm_mm range manager).

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_drv.h     |  10 +--
 drivers/gpu/drm/i915/i915_gem.c     | 117 ++++++++++++++++--------------------
 drivers/gpu/drm/i915/i915_gem_gtt.c |  56 +++++------------
 drivers/gpu/drm/i915/i915_gem_gtt.h |   5 +-
 4 files changed, 71 insertions(+), 117 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 9520adba33f6..fe7e87e8cf9a 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -3137,11 +3137,11 @@ int i915_gem_object_attach_phys(struct drm_i915_gem_object *obj,
 int i915_gem_open(struct drm_device *dev, struct drm_file *file);
 void i915_gem_release(struct drm_device *dev, struct drm_file *file);
 
-uint32_t
-i915_gem_get_gtt_size(struct drm_device *dev, uint32_t size, int tiling_mode);
-uint32_t
-i915_gem_get_gtt_alignment(struct drm_device *dev, uint32_t size,
-			    int tiling_mode, bool fenced);
+uint64_t
+i915_gem_get_gtt_size(struct drm_device *dev, uint64_t size, int tiling_mode);
+uint64_t
+i915_gem_get_gtt_alignment(struct drm_device *dev, uint64_t size,
+			   int tiling_mode, bool fenced);
 
 int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
 				    enum i915_cache_level cache_level);
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index cd54c290680d..7340fc830d9a 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -1593,11 +1593,13 @@ i915_gem_release_all_mmaps(struct drm_i915_private *dev_priv)
 		i915_gem_release_mmap(obj);
 }
 
-uint32_t
-i915_gem_get_gtt_size(struct drm_device *dev, uint32_t size, int tiling_mode)
+uint64_t
+i915_gem_get_gtt_size(struct drm_device *dev, uint64_t size, int tiling_mode)
 {
 	uint32_t gtt_size;
 
+	GEM_BUG_ON(size == 0);
+
 	if (INTEL_INFO(dev)->gen >= 4 ||
 	    tiling_mode == I915_TILING_NONE)
 		return size;
@@ -1621,10 +1623,12 @@ i915_gem_get_gtt_size(struct drm_device *dev, uint32_t size, int tiling_mode)
  * Return the required GTT alignment for an object, taking into account
  * potential fence register mapping.
  */
-uint32_t
-i915_gem_get_gtt_alignment(struct drm_device *dev, uint32_t size,
+uint64_t
+i915_gem_get_gtt_alignment(struct drm_device *dev, uint64_t size,
 			   int tiling_mode, bool fenced)
 {
+	GEM_BUG_ON(size == 0);
+
 	/*
 	 * Minimum alignment is 4k (GTT page size), but might be greater
 	 * if a fence register is needed for the object.
@@ -2679,56 +2683,40 @@ i915_gem_object_insert_into_vm(struct drm_i915_gem_object *obj,
 			       struct i915_address_space *vm,
 			       const struct i915_ggtt_view *ggtt_view,
 			       uint64_t size,
-			       unsigned alignment,
+			       uint64_t alignment,
 			       uint64_t flags)
 {
 	struct drm_device *dev = obj->base.dev;
 	struct drm_i915_private *dev_priv = to_i915(dev);
-	u64 start, end;
-	u32 search_flag, alloc_flag;
 	struct i915_vma *vma;
+	u64 start, end;
+	u64 min_alignment;
 	int ret;
 
-	if (i915_is_ggtt(vm)) {
-		u32 fence_size, fence_alignment, unfenced_alignment;
-		u64 view_size;
-
-		if (WARN_ON(!ggtt_view))
-			return ERR_PTR(-EINVAL);
-
-		view_size = i915_ggtt_view_size(obj, ggtt_view);
-
-		fence_size = i915_gem_get_gtt_size(dev,
-						   view_size,
-						   obj->tiling_mode);
-		fence_alignment = i915_gem_get_gtt_alignment(dev,
-							     view_size,
-							     obj->tiling_mode,
-							     true);
-		unfenced_alignment = i915_gem_get_gtt_alignment(dev,
-								view_size,
-								obj->tiling_mode,
-								false);
-		size = max(size, view_size);
-		if (flags & PIN_MAPPABLE)
-			size = max_t(u64, size, fence_size);
-
-		if (alignment == 0)
-			alignment = flags & PIN_MAPPABLE ? fence_alignment :
-				unfenced_alignment;
-		if (flags & PIN_MAPPABLE && alignment & (fence_alignment - 1)) {
-			DRM_DEBUG("Invalid object (view type=%u) alignment requested %u\n",
-				  ggtt_view ? ggtt_view->type : 0,
-				  alignment);
-			return ERR_PTR(-EINVAL);
-		}
-	} else {
-		size = max_t(u64, size, obj->base.size);
-		alignment = 4096;
+	vma = ggtt_view ?
+		i915_gem_obj_lookup_or_create_ggtt_vma(obj, ggtt_view) :
+		i915_gem_obj_lookup_or_create_vma(obj, vm);
+	if (IS_ERR(vma))
+		return vma;
+
+	size = max(size, vma->size);
+	if (flags & PIN_MAPPABLE)
+		size = i915_gem_get_gtt_size(dev, size, obj->tiling_mode);
+
+	min_alignment =
+		i915_gem_get_gtt_alignment(dev, size, obj->tiling_mode,
+					   flags & PIN_MAPPABLE);
+	if (alignment == 0)
+		alignment = min_alignment;
+	if (alignment & (min_alignment - 1)) {
+		DRM_DEBUG("Invalid object alignment requested %llu, minimum %llu\n",
+			  alignment, min_alignment);
+		return ERR_PTR(-EINVAL);
 	}
 
 	start = flags & PIN_OFFSET_BIAS ? flags & PIN_OFFSET_MASK : 0;
-	end = vm->total;
+
+	end = vma->vm->total;
 	if (flags & PIN_MAPPABLE)
 		end = min_t(u64, end, dev_priv->ggtt.mappable_end);
 	if (flags & PIN_ZONE_4G)
@@ -2739,8 +2727,7 @@ i915_gem_object_insert_into_vm(struct drm_i915_gem_object *obj,
 	 * attempt to find space.
 	 */
 	if (size > end) {
-		DRM_DEBUG("Attempting to bind an object (view type=%u) larger than the aperture: request=%llu [object=%zd] > %s aperture=%llu\n",
-			  ggtt_view ? ggtt_view->type : 0,
+		DRM_DEBUG("Attempting to bind an object larger than the aperture: request=%llu [object=%zd] > %s aperture=%llu\n",
 			  size, obj->base.size,
 			  flags & PIN_MAPPABLE ? "mappable" : "total",
 			  end);
@@ -2753,31 +2740,27 @@ i915_gem_object_insert_into_vm(struct drm_i915_gem_object *obj,
 
 	i915_gem_object_pin_pages(obj);
 
-	vma = ggtt_view ? i915_gem_obj_lookup_or_create_ggtt_vma(obj, ggtt_view) :
-			  i915_gem_obj_lookup_or_create_vma(obj, vm);
-
-	if (IS_ERR(vma))
-		goto err_unpin;
-
 	if (flags & PIN_OFFSET_FIXED) {
 		uint64_t offset = flags & PIN_OFFSET_MASK;
-
-		if (offset & (alignment - 1) || offset + size > end) {
+		if (offset & (alignment - 1) || offset > end - size) {
 			ret = -EINVAL;
-			goto err_vma;
+			goto err_unpin;
 		}
+
 		vma->node.start = offset;
 		vma->node.size = size;
 		vma->node.color = obj->cache_level;
-		ret = drm_mm_reserve_node(&vm->mm, &vma->node);
+		ret = drm_mm_reserve_node(&vma->vm->mm, &vma->node);
 		if (ret) {
 			ret = i915_gem_evict_for_vma(vma);
 			if (ret == 0)
-				ret = drm_mm_reserve_node(&vm->mm, &vma->node);
+				ret = drm_mm_reserve_node(&vma->vm->mm, &vma->node);
+			if (ret)
+				goto err_unpin;
 		}
-		if (ret)
-			goto err_vma;
 	} else {
+		u32 search_flag, alloc_flag;
+
 		if (flags & PIN_HIGH) {
 			search_flag = DRM_MM_SEARCH_BELOW;
 			alloc_flag = DRM_MM_CREATE_TOP;
@@ -2786,22 +2769,26 @@ i915_gem_object_insert_into_vm(struct drm_i915_gem_object *obj,
 			alloc_flag = DRM_MM_CREATE_DEFAULT;
 		}
 
+		if (alignment <= 4096)
+			alignment = 0; /* for efficient drm_mm searching */
+
 search_free:
-		ret = drm_mm_insert_node_in_range_generic(&vm->mm, &vma->node,
+		ret = drm_mm_insert_node_in_range_generic(&vma->vm->mm,
+							  &vma->node,
 							  size, alignment,
 							  obj->cache_level,
 							  start, end,
 							  search_flag,
 							  alloc_flag);
 		if (ret) {
-			ret = i915_gem_evict_something(vm, size, alignment,
+			ret = i915_gem_evict_something(vma->vm, size, alignment,
 						       obj->cache_level,
 						       start, end,
 						       flags);
 			if (ret == 0)
 				goto search_free;
 
-			goto err_vma;
+			goto err_unpin;
 		}
 	}
 	if (WARN_ON(!i915_gem_valid_gtt_space(vma, obj->cache_level))) {
@@ -2810,18 +2797,16 @@ search_free:
 	}
 
 	list_move_tail(&obj->global_list, &dev_priv->mm.bound_list);
-	list_move_tail(&vma->vm_link, &vm->inactive_list);
+	list_move_tail(&vma->vm_link, &vma->vm->inactive_list);
 	obj->bind_count++;
 
 	return vma;
 
 err_remove_node:
 	drm_mm_remove_node(&vma->node);
-err_vma:
-	vma = ERR_PTR(ret);
 err_unpin:
 	i915_gem_object_unpin_pages(obj);
-	return vma;
+	return ERR_PTR(ret);
 }
 
 bool
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index fb2dd65b16e6..b76811d60e8c 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -184,7 +184,7 @@ static void ppgtt_unbind_vma(struct i915_vma *vma)
 {
 	vma->vm->clear_range(vma->vm,
 			     vma->node.start,
-			     vma->obj->base.size,
+			     vma->size,
 			     true);
 }
 
@@ -2655,28 +2655,18 @@ static int aliasing_gtt_bind_vma(struct i915_vma *vma,
 
 static void ggtt_unbind_vma(struct i915_vma *vma)
 {
-	struct drm_device *dev = vma->vm->dev;
-	struct drm_i915_private *dev_priv = dev->dev_private;
-	struct drm_i915_gem_object *obj = vma->obj;
-	const uint64_t size = min_t(uint64_t,
-				    obj->base.size,
-				    vma->node.size);
+	struct i915_hw_ppgtt *appgtt = to_i915(vma->vm->dev)->mm.aliasing_ppgtt;
+	const uint64_t size = min(vma->size, vma->node.size);
 
-	if (vma->bound & GLOBAL_BIND) {
+	if (vma->bound & GLOBAL_BIND)
 		vma->vm->clear_range(vma->vm,
-				     vma->node.start,
-				     size,
+				     vma->node.start, size,
 				     true);
-	}
-
-	if (dev_priv->mm.aliasing_ppgtt && vma->bound & LOCAL_BIND) {
-		struct i915_hw_ppgtt *appgtt = dev_priv->mm.aliasing_ppgtt;
 
+	if (vma->bound & LOCAL_BIND && appgtt)
 		appgtt->base.clear_range(&appgtt->base,
-					 vma->node.start,
-					 size,
+					 vma->node.start, size,
 					 true);
-	}
 }
 
 void i915_gem_gtt_finish_object(struct drm_i915_gem_object *obj)
@@ -3349,11 +3339,16 @@ __i915_gem_vma_create(struct drm_i915_gem_object *obj,
 	list_add(&vma->vm_link, &vm->unbound_list);
 	vma->vm = vm;
 	vma->obj = obj;
+	vma->size = obj->base.size;
 	vma->is_ggtt = i915_is_ggtt(vm);
 
-	if (i915_is_ggtt(vm))
+	if (i915_is_ggtt(vm)) {
 		vma->ggtt_view = *ggtt_view;
-	else
+		if (ggtt_view->type == I915_GGTT_VIEW_PARTIAL)
+			vma->size = ggtt_view->params.partial.size << PAGE_SHIFT;
+		else if (ggtt_view->type == I915_GGTT_VIEW_ROTATED)
+			vma->size = intel_rotation_info_size(&ggtt_view->params.rotated) << PAGE_SHIFT;
+	} else
 		i915_ppgtt_get(i915_vm_to_ppgtt(vm));
 
 	list_add_tail(&vma->obj_link, &obj->vma_list);
@@ -3639,29 +3634,6 @@ int i915_vma_bind(struct i915_vma *vma, enum i915_cache_level cache_level,
 	return 0;
 }
 
-/**
- * i915_ggtt_view_size - Get the size of a GGTT view.
- * @obj: Object the view is of.
- * @view: The view in question.
- *
- * @return The size of the GGTT view in bytes.
- */
-size_t
-i915_ggtt_view_size(struct drm_i915_gem_object *obj,
-		    const struct i915_ggtt_view *view)
-{
-	if (view->type == I915_GGTT_VIEW_NORMAL) {
-		return obj->base.size;
-	} else if (view->type == I915_GGTT_VIEW_ROTATED) {
-		return intel_rotation_info_size(&view->params.rotated) << PAGE_SHIFT;
-	} else if (view->type == I915_GGTT_VIEW_PARTIAL) {
-		return view->params.partial.size << PAGE_SHIFT;
-	} else {
-		WARN_ONCE(1, "GGTT view %u not implemented!\n", view->type);
-		return obj->base.size;
-	}
-}
-
 void __iomem *i915_vma_pin_iomap(struct i915_vma *vma)
 {
 	void __iomem *ptr;
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h
index e4657bfaea95..2bd8ec7e1948 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.h
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.h
@@ -180,6 +180,7 @@ struct i915_vma {
 	struct drm_i915_gem_object *obj;
 	struct i915_address_space *vm;
 	void __iomem *iomap;
+	u64 size;
 
 	struct i915_gem_active last_read[I915_NUM_ENGINES];
 
@@ -598,10 +599,6 @@ i915_ggtt_view_equal(const struct i915_ggtt_view *a,
 	return true;
 }
 
-size_t
-i915_ggtt_view_size(struct drm_i915_gem_object *obj,
-		    const struct i915_ggtt_view *view);
-
 /**
  * i915_vma_pin_iomap - calls ioremap_wc to map the GGTT VMA via the aperture
  * @vma: VMA to iomap
-- 
2.8.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 58+ messages in thread

* [PATCH 09/38] drm/i915: Start passing around i915_vma from execbuffer
  2016-06-03 16:55 Tracking VMA Chris Wilson
                   ` (7 preceding siblings ...)
  2016-06-03 16:55 ` [PATCH 08/38] drm/i915: Record allocated vma size Chris Wilson
@ 2016-06-03 16:55 ` Chris Wilson
  2016-06-03 16:55 ` [PATCH 10/38] drm/i915: Remove highly confusing i915_gem_obj_ggtt_pin() Chris Wilson
                   ` (29 subsequent siblings)
  38 siblings, 0 replies; 58+ messages in thread
From: Chris Wilson @ 2016-06-03 16:55 UTC (permalink / raw)
  To: intel-gfx; +Cc: Mika Kuoppala

During execbuffer we look up the i915_vma in order to reserver them in
the VM. However, we then do a double lookup of the vma in order to then
pin them, all because we lack the necessary interfaces to operate on
i915_vma.

v2: Tidy parameter lists to remove one level of redirection in the hot
path.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Mika Kuoppala <mika.kuoppala@intel.com>
---
 drivers/gpu/drm/i915/i915_debugfs.c        |   2 +-
 drivers/gpu/drm/i915/i915_drv.h            |  47 +++++---
 drivers/gpu/drm/i915/i915_gem.c            | 178 ++++++++++++-----------------
 drivers/gpu/drm/i915/i915_gem_evict.c      |  12 +-
 drivers/gpu/drm/i915/i915_gem_execbuffer.c | 131 ++++++++++-----------
 drivers/gpu/drm/i915/i915_gem_gtt.c        |   7 +-
 drivers/gpu/drm/i915/i915_gpu_error.c      |   4 +-
 7 files changed, 174 insertions(+), 207 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index 99857ee0bb8b..f4745e0c8d5c 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -168,7 +168,7 @@ describe_obj(struct seq_file *m, struct drm_i915_gem_object *obj)
 	if (obj->base.name)
 		seq_printf(m, " (name: %d)", obj->base.name);
 	list_for_each_entry(vma, &obj->vma_list, obj_link) {
-		if (vma->pin_count > 0)
+		if (i915_vma_is_pinned(vma))
 			pin_count++;
 	}
 	seq_printf(m, " (pinned x %d)", pin_count);
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index fe7e87e8cf9a..f537d8fc5e0f 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2931,6 +2931,8 @@ struct drm_i915_gem_object *i915_gem_object_create_from_data(
 void i915_gem_close_object(struct drm_gem_object *gem, struct drm_file *file);
 void i915_gem_free_object(struct drm_gem_object *obj);
 
+int __must_check
+i915_vma_pin(struct i915_vma *vma, u64 size, u64 alignment, u64 flags);
 /* Flags used by pin/bind&friends. */
 #define PIN_MAPPABLE	(1<<0)
 #define PIN_NONBLOCK	(1<<1)
@@ -2942,12 +2944,30 @@ void i915_gem_free_object(struct drm_gem_object *obj);
 #define PIN_HIGH	(1<<7)
 #define PIN_OFFSET_FIXED	(1<<8)
 #define PIN_OFFSET_MASK (~4095)
-int __must_check
-i915_gem_object_pin(struct drm_i915_gem_object *obj,
-		    struct i915_address_space *vm,
-		    uint64_t size,
-		    uint32_t alignment,
-		    uint64_t flags);
+
+static inline void __i915_vma_pin(struct i915_vma *vma)
+{
+	GEM_BUG_ON(vma->pin_count == DRM_I915_GEM_OBJECT_MAX_PIN_COUNT);
+	vma->pin_count++;
+}
+
+static inline bool i915_vma_is_pinned(struct i915_vma *vma)
+{
+	return vma->pin_count;
+}
+
+static inline void __i915_vma_unpin(struct i915_vma *vma)
+{
+	GEM_BUG_ON(!i915_vma_is_pinned(vma));
+	vma->pin_count--;
+}
+
+static inline void i915_vma_unpin(struct i915_vma *vma)
+{
+	GEM_BUG_ON(!drm_mm_node_allocated(&vma->node));
+	__i915_vma_unpin(vma);
+}
+
 int __must_check
 i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj,
 			 const struct i915_ggtt_view *view,
@@ -3208,11 +3228,8 @@ i915_gem_obj_ggtt_pin(struct drm_i915_gem_object *obj,
 		      uint32_t alignment,
 		      unsigned flags)
 {
-	struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
-	struct i915_ggtt *ggtt = &dev_priv->ggtt;
-
-	return i915_gem_object_pin(obj, &ggtt->base, 0, alignment,
-				   flags | PIN_GLOBAL);
+	return i915_gem_object_ggtt_pin(obj, &i915_ggtt_view_normal,
+					0, alignment, flags);
 }
 
 void i915_gem_object_ggtt_unpin_view(struct drm_i915_gem_object *obj,
@@ -3293,11 +3310,11 @@ int i915_gem_context_reset_stats_ioctl(struct drm_device *dev, void *data,
 
 /* i915_gem_evict.c */
 int __must_check i915_gem_evict_something(struct i915_address_space *vm,
-					  int min_size,
-					  unsigned alignment,
+					  u64 min_size,
+					  u64 alignment,
 					  unsigned cache_level,
-					  unsigned long start,
-					  unsigned long end,
+					  u64 start,
+					  u64 end,
 					  unsigned flags);
 int __must_check i915_gem_evict_for_vma(struct i915_vma *target);
 int i915_gem_evict_vm(struct i915_address_space *vm, bool do_idle);
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 7340fc830d9a..71a32a9f9858 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -130,10 +130,10 @@ i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data,
 	pinned = 0;
 	mutex_lock(&dev->struct_mutex);
 	list_for_each_entry(vma, &ggtt->base.active_list, vm_link)
-		if (vma->pin_count)
+		if (i915_vma_is_pinned(vma))
 			pinned += vma->node.size;
 	list_for_each_entry(vma, &ggtt->base.inactive_list, vm_link)
-		if (vma->pin_count)
+		if (i915_vma_is_pinned(vma))
 			pinned += vma->node.size;
 	mutex_unlock(&dev->struct_mutex);
 
@@ -2548,7 +2548,7 @@ int i915_vma_unbind(struct i915_vma *vma)
 		 * take a pin on the vma so that the second unbind is
 		 * aborted.
 		 */
-		vma->pin_count++;
+		__i915_vma_pin(vma);
 
 		for_each_active(active, idx) {
 			ret = i915_gem_active_retire(&vma->last_read[idx],
@@ -2557,14 +2557,14 @@ int i915_vma_unbind(struct i915_vma *vma)
 				break;
 		}
 
-		vma->pin_count--;
+		__i915_vma_unpin(vma);
 		if (ret)
 			return ret;
 
 		GEM_BUG_ON(i915_vma_is_active(vma));
 	}
 
-	if (vma->pin_count)
+	if (i915_vma_is_pinned(vma))
 		return -EBUSY;
 
 	if (!drm_mm_node_allocated(&vma->node))
@@ -2678,26 +2678,18 @@ static bool i915_gem_valid_gtt_space(struct i915_vma *vma,
  * Finds free space in the GTT aperture and binds the object or a view of it
  * there.
  */
-static struct i915_vma *
-i915_gem_object_insert_into_vm(struct drm_i915_gem_object *obj,
-			       struct i915_address_space *vm,
-			       const struct i915_ggtt_view *ggtt_view,
-			       uint64_t size,
-			       uint64_t alignment,
-			       uint64_t flags)
+static int
+i915_vma_insert(struct i915_vma *vma, u64 size, u64 alignment, u64 flags)
 {
+	struct drm_i915_gem_object *obj = vma->obj;
 	struct drm_device *dev = obj->base.dev;
 	struct drm_i915_private *dev_priv = to_i915(dev);
-	struct i915_vma *vma;
 	u64 start, end;
 	u64 min_alignment;
 	int ret;
 
-	vma = ggtt_view ?
-		i915_gem_obj_lookup_or_create_ggtt_vma(obj, ggtt_view) :
-		i915_gem_obj_lookup_or_create_vma(obj, vm);
-	if (IS_ERR(vma))
-		return vma;
+	GEM_BUG_ON(vma->bound);
+	GEM_BUG_ON(drm_mm_node_allocated(&vma->node));
 
 	size = max(size, vma->size);
 	if (flags & PIN_MAPPABLE)
@@ -2711,7 +2703,7 @@ i915_gem_object_insert_into_vm(struct drm_i915_gem_object *obj,
 	if (alignment & (min_alignment - 1)) {
 		DRM_DEBUG("Invalid object alignment requested %llu, minimum %llu\n",
 			  alignment, min_alignment);
-		return ERR_PTR(-EINVAL);
+		return -EINVAL;
 	}
 
 	start = flags & PIN_OFFSET_BIAS ? flags & PIN_OFFSET_MASK : 0;
@@ -2731,17 +2723,17 @@ i915_gem_object_insert_into_vm(struct drm_i915_gem_object *obj,
 			  size, obj->base.size,
 			  flags & PIN_MAPPABLE ? "mappable" : "total",
 			  end);
-		return ERR_PTR(-E2BIG);
+		return -E2BIG;
 	}
 
 	ret = i915_gem_object_get_pages(obj);
 	if (ret)
-		return ERR_PTR(ret);
+		return ret;
 
 	i915_gem_object_pin_pages(obj);
 
 	if (flags & PIN_OFFSET_FIXED) {
-		uint64_t offset = flags & PIN_OFFSET_MASK;
+		u64 offset = flags & PIN_OFFSET_MASK;
 		if (offset & (alignment - 1) || offset > end - size) {
 			ret = -EINVAL;
 			goto err_unpin;
@@ -2800,13 +2792,13 @@ search_free:
 	list_move_tail(&vma->vm_link, &vma->vm->inactive_list);
 	obj->bind_count++;
 
-	return vma;
+	return 0;
 
 err_remove_node:
 	drm_mm_remove_node(&vma->node);
 err_unpin:
 	i915_gem_object_unpin_pages(obj);
-	return ERR_PTR(ret);
+	return ret;
 }
 
 bool
@@ -2999,7 +2991,7 @@ restart:
 		if (!drm_mm_node_allocated(&vma->node))
 			continue;
 
-		if (vma->pin_count) {
+		if (i915_vma_is_pinned(vma)) {
 			DRM_DEBUG("can not change the cache level of pinned objects\n");
 			return -EBUSY;
 		}
@@ -3363,13 +3355,13 @@ i915_gem_ring_throttle(struct drm_device *dev, struct drm_file *file)
 }
 
 static bool
-i915_vma_misplaced(struct i915_vma *vma,
-		   uint64_t size,
-		   uint32_t alignment,
-		   uint64_t flags)
+i915_vma_misplaced(struct i915_vma *vma, u64 size, u64 alignment, u64 flags)
 {
 	struct drm_i915_gem_object *obj = vma->obj;
 
+	if (!drm_mm_node_allocated(&vma->node))
+		return false;
+
 	if (vma->node.size < size)
 		return true;
 
@@ -3413,91 +3405,42 @@ void __i915_vma_set_map_and_fenceable(struct i915_vma *vma)
 	obj->map_and_fenceable = mappable && fenceable;
 }
 
-static int
-i915_gem_object_do_pin(struct drm_i915_gem_object *obj,
-		       struct i915_address_space *vm,
-		       const struct i915_ggtt_view *ggtt_view,
-		       uint64_t size,
-		       uint32_t alignment,
-		       uint64_t flags)
+int
+i915_vma_pin(struct i915_vma *vma, u64 size, u64 alignment, u64 flags)
 {
-	struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
-	struct i915_vma *vma;
-	unsigned bound;
+	unsigned bound = vma->bound;
 	int ret;
 
-	if (WARN_ON(vm == &dev_priv->mm.aliasing_ppgtt->base))
-		return -ENODEV;
-
-	if (WARN_ON(flags & (PIN_GLOBAL | PIN_MAPPABLE) && !i915_is_ggtt(vm)))
-		return -EINVAL;
-
-	if (WARN_ON((flags & (PIN_MAPPABLE | PIN_GLOBAL)) == PIN_MAPPABLE))
-		return -EINVAL;
-
-	if (WARN_ON(i915_is_ggtt(vm) != !!ggtt_view))
-		return -EINVAL;
-
-	vma = ggtt_view ? i915_gem_obj_to_ggtt_view(obj, ggtt_view) :
-			  i915_gem_obj_to_vma(obj, vm);
-
-	if (vma) {
-		if (WARN_ON(vma->pin_count == DRM_I915_GEM_OBJECT_MAX_PIN_COUNT))
-			return -EBUSY;
+	GEM_BUG_ON((flags & (PIN_GLOBAL | PIN_USER)) == 0);
+	GEM_BUG_ON((flags & PIN_GLOBAL) && !vma->is_ggtt);
 
-		if (i915_vma_misplaced(vma, size, alignment, flags)) {
-			WARN(vma->pin_count,
-			     "bo is already pinned in %s with incorrect alignment:"
-			     " offset=%08x %08x, req.alignment=%x, req.map_and_fenceable=%d,"
-			     " obj->map_and_fenceable=%d\n",
-			     ggtt_view ? "ggtt" : "ppgtt",
-			     upper_32_bits(vma->node.start),
-			     lower_32_bits(vma->node.start),
-			     alignment,
-			     !!(flags & PIN_MAPPABLE),
-			     obj->map_and_fenceable);
-			ret = i915_vma_unbind(vma);
-			if (ret)
-				return ret;
+	if (WARN_ON(vma->pin_count == DRM_I915_GEM_OBJECT_MAX_PIN_COUNT))
+		return -EBUSY;
 
-			vma = NULL;
-		}
-	}
+	/* Pin early to prevent the shrinker/eviction logic from destroying
+	 * our vma as we insert and bind.
+	 */
+	__i915_vma_pin(vma);
 
-	if (vma == NULL || !drm_mm_node_allocated(&vma->node)) {
-		vma = i915_gem_object_insert_into_vm(obj, vm, ggtt_view,
-						     size, alignment, flags);
-		if (IS_ERR(vma))
-			return PTR_ERR(vma);
+	if (!bound) {
+		ret = i915_vma_insert(vma, size, alignment, flags);
+		if (ret)
+			goto err;
 	}
 
-	bound = vma->bound;
-	ret = i915_vma_bind(vma, obj->cache_level, flags);
+	ret = i915_vma_bind(vma, vma->obj->cache_level, flags);
 	if (ret)
-		return ret;
+		goto err;
 
-	if (ggtt_view && ggtt_view->type == I915_GGTT_VIEW_NORMAL &&
-	    (bound ^ vma->bound) & GLOBAL_BIND) {
+	if ((bound ^ vma->bound) & GLOBAL_BIND)
 		__i915_vma_set_map_and_fenceable(vma);
-		WARN_ON(flags & PIN_MAPPABLE && !obj->map_and_fenceable);
-	}
 
 	GEM_BUG_ON(i915_vma_misplaced(vma, size, alignment, flags));
-
-	vma->pin_count++;
 	return 0;
-}
 
-int
-i915_gem_object_pin(struct drm_i915_gem_object *obj,
-		    struct i915_address_space *vm,
-		    uint64_t size,
-		    uint32_t alignment,
-		    uint64_t flags)
-{
-	return i915_gem_object_do_pin(obj, vm,
-				      i915_is_ggtt(vm) ? &i915_ggtt_view_normal : NULL,
-				      size, alignment, flags);
+err:
+	__i915_vma_unpin(vma);
+	return ret;
 }
 
 int
@@ -3507,14 +3450,34 @@ i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj,
 			 uint32_t alignment,
 			 uint64_t flags)
 {
-	struct drm_device *dev = obj->base.dev;
-	struct drm_i915_private *dev_priv = to_i915(dev);
-	struct i915_ggtt *ggtt = &dev_priv->ggtt;
+	struct i915_vma *vma;
+	int ret;
 
 	BUG_ON(!view);
 
-	return i915_gem_object_do_pin(obj, &ggtt->base, view,
-				      size, alignment, flags | PIN_GLOBAL);
+	vma = i915_gem_obj_lookup_or_create_ggtt_vma(obj, view);
+	if (IS_ERR(vma))
+		return PTR_ERR(vma);
+
+	if (i915_vma_misplaced(vma, size, alignment, flags)) {
+		if (flags & PIN_NONBLOCK && (vma->pin_count | vma->active))
+			return -ENOSPC;
+
+		WARN(vma->pin_count,
+		     "bo is already pinned in ggtt with incorrect alignment:"
+		     " offset=%08x %08x, req.alignment=%x, req.map_and_fenceable=%d,"
+		     " obj->map_and_fenceable=%d\n",
+		     upper_32_bits(vma->node.start),
+		     lower_32_bits(vma->node.start),
+		     alignment,
+		     !!(flags & PIN_MAPPABLE),
+		     obj->map_and_fenceable);
+		ret = i915_vma_unbind(vma);
+		if (ret)
+			return ret;
+	}
+
+	return i915_vma_pin(vma, size, alignment, flags | PIN_GLOBAL);
 }
 
 void
@@ -3523,10 +3486,11 @@ i915_gem_object_ggtt_unpin_view(struct drm_i915_gem_object *obj,
 {
 	struct i915_vma *vma = i915_gem_obj_to_ggtt_view(obj, view);
 
-	WARN_ON(vma->pin_count == 0);
+	GEM_BUG_ON(!vma);
+	WARN_ON(i915_vma_is_pinned(vma));
 	WARN_ON(!i915_gem_obj_ggtt_bound_view(obj, view));
 
-	--vma->pin_count;
+	__i915_vma_unpin(vma);
 }
 
 int
@@ -4421,7 +4385,7 @@ bool i915_gem_obj_is_pinned(struct drm_i915_gem_object *obj)
 {
 	struct i915_vma *vma;
 	list_for_each_entry(vma, &obj->vma_list, obj_link)
-		if (vma->pin_count > 0)
+		if (i915_vma_is_pinned(vma))
 			return true;
 
 	return false;
diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c b/drivers/gpu/drm/i915/i915_gem_evict.c
index 09e9078f5856..680365f4c4cd 100644
--- a/drivers/gpu/drm/i915/i915_gem_evict.c
+++ b/drivers/gpu/drm/i915/i915_gem_evict.c
@@ -79,7 +79,7 @@ gpu_is_idle(struct drm_i915_private *dev_priv)
 static bool
 mark_free(struct i915_vma *vma, struct list_head *unwind)
 {
-	if (vma->pin_count)
+	if (i915_vma_is_pinned(vma))
 		return false;
 
 	if (WARN_ON(!list_empty(&vma->exec_list)))
@@ -114,8 +114,8 @@ mark_free(struct i915_vma *vma, struct list_head *unwind)
  */
 int
 i915_gem_evict_something(struct i915_address_space *vm,
-			 int min_size, unsigned alignment, unsigned cache_level,
-			 unsigned long start, unsigned long end,
+			 u64 min_size, u64 alignment, unsigned cache_level,
+			 u64 start, u64 end,
 			 unsigned flags)
 {
 	struct drm_i915_private *dev_priv = to_i915(vm->dev);
@@ -215,7 +215,7 @@ found:
 	 */
 	list_for_each_entry_safe(vma, next, &eviction_list, exec_list) {
 		if (drm_mm_scan_remove_block(&vma->node))
-			vma->pin_count++;
+			__i915_vma_pin(vma);
 		else
 			list_del_init(&vma->exec_list);
 	}
@@ -227,7 +227,7 @@ found:
 				       exec_list);
 
 		list_del_init(&vma->exec_list);
-		vma->pin_count--;
+		__i915_vma_unpin(vma);
 		if (ret == 0)
 			ret = i915_vma_unbind(vma);
 	}
@@ -313,7 +313,7 @@ int i915_gem_evict_vm(struct i915_address_space *vm, bool do_idle)
 	}
 
 	list_for_each_entry_safe(vma, next, &vm->inactive_list, vm_link)
-		if (vma->pin_count == 0)
+		if (!i915_vma_is_pinned(vma))
 			WARN_ON(i915_vma_unbind(vma));
 
 	return 0;
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index c1e7ee212e7e..cc9c0e4073ff 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -44,11 +44,10 @@
 struct i915_execbuffer_params {
 	struct drm_device               *dev;
 	struct drm_file                 *file;
+	struct i915_vma			*batch_vma;
 	uint32_t                        dispatch_flags;
 	uint32_t                        args_batch_start_offset;
-	uint64_t                        batch_obj_vm_offset;
 	struct intel_engine_cs          *engine;
-	struct drm_i915_gem_object      *batch_obj;
 	struct i915_gem_context         *ctx;
 	struct drm_i915_gem_request     *request;
 };
@@ -101,6 +100,26 @@ eb_reset(struct eb_vmas *eb)
 		memset(eb->buckets, 0, (eb->and+1)*sizeof(struct hlist_head));
 }
 
+static struct i915_vma *
+eb_get_batch(struct eb_vmas *eb)
+{
+	struct i915_vma *vma = list_entry(eb->vmas.prev, typeof(*vma), exec_list);
+
+	/*
+	 * SNA is doing fancy tricks with compressing batch buffers, which leads
+	 * to negative relocation deltas. Usually that works out ok since the
+	 * relocate address is still positive, except when the batch is placed
+	 * very low in the GTT. Ensure this doesn't happen.
+	 *
+	 * Note that actual hangs have only been observed on gen7, but for
+	 * paranoia do it everywhere.
+	 */
+	if ((vma->exec_entry->flags & EXEC_OBJECT_PINNED) == 0)
+		vma->exec_entry->flags |= __EXEC_OBJECT_NEEDS_BIAS;
+
+	return vma;
+}
+
 static int
 eb_lookup_vmas(struct eb_vmas *eb,
 	       struct drm_i915_gem_exec_object2 *exec,
@@ -231,7 +250,7 @@ i915_gem_execbuffer_unreserve_vma(struct i915_vma *vma)
 		i915_gem_object_unpin_fence(obj);
 
 	if (entry->flags & __EXEC_OBJECT_HAS_PIN)
-		vma->pin_count--;
+		__i915_vma_unpin(vma);
 
 	entry->flags &= ~(__EXEC_OBJECT_HAS_FENCE | __EXEC_OBJECT_HAS_PIN);
 }
@@ -652,16 +671,16 @@ i915_gem_execbuffer_reserve_vma(struct i915_vma *vma,
 			flags |= PIN_HIGH;
 	}
 
-	ret = i915_gem_object_pin(obj, vma->vm,
-				  entry->pad_to_size,
-				  entry->alignment,
-				  flags);
-	if ((ret == -ENOSPC  || ret == -E2BIG) &&
+	ret = i915_vma_pin(vma,
+			   entry->pad_to_size,
+			   entry->alignment,
+			   flags);
+	if ((ret == -ENOSPC || ret == -E2BIG) &&
 	    only_mappable_for_reloc(entry->flags))
-		ret = i915_gem_object_pin(obj, vma->vm,
-					  entry->pad_to_size,
-					  entry->alignment,
-					  flags & ~PIN_MAPPABLE);
+		ret = i915_vma_pin(vma,
+				   entry->pad_to_size,
+				   entry->alignment,
+				   flags & ~PIN_MAPPABLE);
 	if (ret)
 		return ret;
 
@@ -1217,11 +1236,11 @@ i915_reset_gen7_sol_offsets(struct drm_i915_gem_request *req)
 	return 0;
 }
 
-static struct drm_i915_gem_object*
+static struct i915_vma*
 i915_gem_execbuffer_parse(struct intel_engine_cs *engine,
 			  struct drm_i915_gem_exec_object2 *shadow_exec_entry,
-			  struct eb_vmas *eb,
 			  struct drm_i915_gem_object *batch_obj,
+			  struct eb_vmas *eb,
 			  u32 batch_start_offset,
 			  u32 batch_len,
 			  bool is_master)
@@ -1233,7 +1252,7 @@ i915_gem_execbuffer_parse(struct intel_engine_cs *engine,
 	shadow_batch_obj = i915_gem_batch_pool_get(&engine->batch_pool,
 						   PAGE_ALIGN(batch_len));
 	if (IS_ERR(shadow_batch_obj))
-		return shadow_batch_obj;
+		return ERR_CAST(shadow_batch_obj);
 
 	ret = i915_parse_cmds(engine,
 			      batch_obj,
@@ -1258,14 +1277,12 @@ i915_gem_execbuffer_parse(struct intel_engine_cs *engine,
 	i915_gem_object_get(shadow_batch_obj);
 	list_add_tail(&vma->exec_list, &eb->vmas);
 
-	shadow_batch_obj->base.pending_read_domains = I915_GEM_DOMAIN_COMMAND;
-
-	return shadow_batch_obj;
+	return vma;
 
 err:
 	i915_gem_object_unpin_pages(shadow_batch_obj);
 	if (ret == -EACCES) /* unhandled chained batch */
-		return batch_obj;
+		return NULL;
 	else
 		return ERR_PTR(ret);
 }
@@ -1346,11 +1363,11 @@ execbuf_submit(struct i915_execbuffer_params *params,
 	}
 
 	exec_len   = args->batch_len;
-	exec_start = params->batch_obj_vm_offset +
+	exec_start = params->batch_vma->node.start +
 		     params->args_batch_start_offset;
 
 	if (exec_len == 0)
-		exec_len = params->batch_obj->base.size;
+		exec_len = params->batch_vma->size;
 
 	ret = params->engine->emit_bb_start(params->request,
 					    exec_start, exec_len,
@@ -1386,26 +1403,6 @@ gen8_dispatch_bsd_ring(struct drm_i915_private *dev_priv, struct drm_file *file)
 	return file_priv->bsd_ring;
 }
 
-static struct drm_i915_gem_object *
-eb_get_batch(struct eb_vmas *eb)
-{
-	struct i915_vma *vma = list_entry(eb->vmas.prev, typeof(*vma), exec_list);
-
-	/*
-	 * SNA is doing fancy tricks with compressing batch buffers, which leads
-	 * to negative relocation deltas. Usually that works out ok since the
-	 * relocate address is still positive, except when the batch is placed
-	 * very low in the GTT. Ensure this doesn't happen.
-	 *
-	 * Note that actual hangs have only been observed on gen7, but for
-	 * paranoia do it everywhere.
-	 */
-	if ((vma->exec_entry->flags & EXEC_OBJECT_PINNED) == 0)
-		vma->exec_entry->flags |= __EXEC_OBJECT_NEEDS_BIAS;
-
-	return vma->obj;
-}
-
 #define I915_USER_RINGS (4)
 
 static const enum intel_engine_id user_ring_map[I915_USER_RINGS + 1] = {
@@ -1473,7 +1470,6 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
 	struct drm_i915_private *dev_priv = to_i915(dev);
 	struct i915_ggtt *ggtt = &dev_priv->ggtt;
 	struct eb_vmas *eb;
-	struct drm_i915_gem_object *batch_obj;
 	struct drm_i915_gem_exec_object2 shadow_exec_entry;
 	struct intel_engine_cs *engine;
 	struct i915_gem_context *ctx;
@@ -1567,7 +1563,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
 		goto err;
 
 	/* take note of the batch buffer before we might reorder the lists */
-	batch_obj = eb_get_batch(eb);
+	params->batch_vma = eb_get_batch(eb);
 
 	/* Move the objects en-masse into the GTT, evicting if necessary. */
 	need_relocs = (args->flags & I915_EXEC_NO_RELOC) == 0;
@@ -1591,7 +1587,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
 	}
 
 	/* Set the pending read domains for the batch buffer to COMMAND */
-	if (batch_obj->base.pending_write_domain) {
+	if (params->batch_vma->obj->base.pending_write_domain) {
 		DRM_DEBUG("Attempting to use self-modifying batch buffer\n");
 		ret = -EINVAL;
 		goto err;
@@ -1599,26 +1595,20 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
 
 	params->args_batch_start_offset = args->batch_start_offset;
 	if (i915_needs_cmd_parser(engine) && args->batch_len) {
-		struct drm_i915_gem_object *parsed_batch_obj;
-
-		parsed_batch_obj = i915_gem_execbuffer_parse(engine,
-							     &shadow_exec_entry,
-							     eb,
-							     batch_obj,
-							     args->batch_start_offset,
-							     args->batch_len,
-							     file->is_master);
-		if (IS_ERR(parsed_batch_obj)) {
-			ret = PTR_ERR(parsed_batch_obj);
+		struct i915_vma *vma;
+
+		vma = i915_gem_execbuffer_parse(engine, &shadow_exec_entry,
+						params->batch_vma->obj,
+						eb,
+						args->batch_start_offset,
+						args->batch_len,
+						file->is_master);
+		if (IS_ERR(vma)) {
+			ret = PTR_ERR(vma);
 			goto err;
 		}
 
-		/*
-		 * parsed_batch_obj == batch_obj means batch not fully parsed:
-		 * Accept, but don't promote to secure.
-		 */
-
-		if (parsed_batch_obj != batch_obj) {
+		if (vma) {
 			/*
 			 * Batch parsed and accepted:
 			 *
@@ -1630,16 +1620,18 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
 			 */
 			dispatch_flags |= I915_DISPATCH_SECURE;
 			params->args_batch_start_offset = 0;
-			batch_obj = parsed_batch_obj;
+			params->batch_vma = vma;
 		}
 	}
 
-	batch_obj->base.pending_read_domains |= I915_GEM_DOMAIN_COMMAND;
+	params->batch_vma->obj->base.pending_read_domains |= I915_GEM_DOMAIN_COMMAND;
 
 	/* snb/ivb/vlv conflate the "batch in ppgtt" bit with the "non-secure
 	 * batch" bit. Hence we need to pin secure batches into the global gtt.
 	 * hsw should have this fixed, but bdw mucks it up again. */
 	if (dispatch_flags & I915_DISPATCH_SECURE) {
+		struct drm_i915_gem_object *obj = params->batch_vma->obj;
+
 		/*
 		 * So on first glance it looks freaky that we pin the batch here
 		 * outside of the reservation loop. But:
@@ -1650,13 +1642,12 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
 		 *   fitting due to fragmentation.
 		 * So this is actually safe.
 		 */
-		ret = i915_gem_obj_ggtt_pin(batch_obj, 0, 0);
+		ret = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, 0);
 		if (ret)
 			goto err;
 
-		params->batch_obj_vm_offset = i915_gem_obj_ggtt_offset(batch_obj);
-	} else
-		params->batch_obj_vm_offset = i915_gem_obj_offset(batch_obj, vm);
+		params->batch_vma = i915_gem_obj_to_ggtt(obj);
+	}
 
 	/* Allocate a request for this batch buffer nice and early. */
 	params->request = i915_gem_request_alloc(engine, ctx);
@@ -1679,12 +1670,11 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
 	params->file                    = file;
 	params->engine                    = engine;
 	params->dispatch_flags          = dispatch_flags;
-	params->batch_obj               = batch_obj;
 	params->ctx                     = ctx;
 
 	ret = execbuf_submit(params, args, &eb->vmas);
 err_request:
-	__i915_add_request(params->request, params->batch_obj, ret == 0);
+	__i915_add_request(params->request, params->batch_vma->obj, ret == 0);
 
 err_batch_unpin:
 	/*
@@ -1694,8 +1684,7 @@ err_batch_unpin:
 	 * active.
 	 */
 	if (dispatch_flags & I915_DISPATCH_SECURE)
-		i915_gem_object_ggtt_unpin(batch_obj);
-
+		i915_vma_unpin(params->batch_vma);
 err:
 	/* the request owns the ref now */
 	i915_gem_context_put(ctx);
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index b76811d60e8c..c7a77e0f18c2 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -3288,7 +3288,7 @@ i915_vma_retire(struct i915_gem_active *active,
 		return;
 
 	list_move_tail(&vma->vm_link, &vma->vm->inactive_list);
-	if (unlikely(vma->closed && !vma->pin_count))
+	if (unlikely(vma->closed && !i915_vma_is_pinned(vma)))
 		WARN_ON(i915_vma_unbind(vma));
 }
 
@@ -3311,7 +3311,7 @@ void i915_vma_close(struct i915_vma *vma)
 	vma->closed = true;
 
 	list_del_init(&vma->obj_link);
-	if (!i915_vma_is_active(vma) && !vma->pin_count)
+	if (!i915_vma_is_active(vma) && !i915_vma_is_pinned(vma))
 		WARN_ON(i915_vma_unbind(vma));
 }
 
@@ -3614,13 +3614,10 @@ int i915_vma_bind(struct i915_vma *vma, enum i915_cache_level cache_level,
 		return 0;
 
 	if (vma->bound == 0 && vma->vm->allocate_va_range) {
-		/* XXX: i915_vma_pin() will fix this +- hack */
-		vma->pin_count++;
 		trace_i915_va_alloc(vma);
 		ret = vma->vm->allocate_va_range(vma->vm,
 						 vma->node.start,
 						 vma->node.size);
-		vma->pin_count--;
 		if (ret)
 			return ret;
 	}
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
index 70f2911cd78f..cfae2fe1e14f 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -816,7 +816,7 @@ static u32 capture_pinned_bo(struct drm_i915_error_buffer *err,
 			break;
 
 		list_for_each_entry(vma, &obj->vma_list, obj_link)
-			if (vma->vm == vm && vma->pin_count > 0)
+			if (vma->vm == vm && i915_vma_is_pinned(vma))
 				capture_bo(err++, vma);
 	}
 
@@ -1225,7 +1225,7 @@ static void i915_gem_capture_vm(struct drm_i915_private *dev_priv,
 
 	list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) {
 		list_for_each_entry(vma, &obj->vma_list, obj_link)
-			if (vma->vm == vm && vma->pin_count > 0)
+			if (vma->vm == vm && i915_vma_is_pinned(vma))
 				i++;
 	}
 	error->pinned_bo_count[ndx] = i - error->active_bo_count[ndx];
-- 
2.8.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 58+ messages in thread

* [PATCH 10/38] drm/i915: Remove highly confusing i915_gem_obj_ggtt_pin()
  2016-06-03 16:55 Tracking VMA Chris Wilson
                   ` (8 preceding siblings ...)
  2016-06-03 16:55 ` [PATCH 09/38] drm/i915: Start passing around i915_vma from execbuffer Chris Wilson
@ 2016-06-03 16:55 ` Chris Wilson
  2016-06-08  9:43   ` Daniel Vetter
  2016-06-03 16:55 ` [PATCH 11/38] drm/i915: Make fb_tracking.lock a spinlock Chris Wilson
                   ` (28 subsequent siblings)
  38 siblings, 1 reply; 58+ messages in thread
From: Chris Wilson @ 2016-06-03 16:55 UTC (permalink / raw)
  To: intel-gfx

Since i915_gem_obj_ggtt_pin() is an idiom breaking curry function for
i915_gem_object_ggtt_pin(), spare us the confustion and remove it.
Removing it now simplifies later patches to change the i915_vma_pin()
(and friends) interface.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_drv.h              | 35 ++++++++-------------
 drivers/gpu/drm/i915/i915_gem.c              | 46 +++++++++++++--------------
 drivers/gpu/drm/i915/i915_gem_context.c      |  5 ++-
 drivers/gpu/drm/i915/i915_gem_execbuffer.c   | 10 +++---
 drivers/gpu/drm/i915/i915_gem_gtt.h          | 47 +++++++++++++++-------------
 drivers/gpu/drm/i915/i915_gem_render_state.c |  2 +-
 drivers/gpu/drm/i915/i915_guc_submission.c   |  4 +--
 drivers/gpu/drm/i915/intel_guc_loader.c      |  2 +-
 drivers/gpu/drm/i915/intel_lrc.c             |  8 +++--
 drivers/gpu/drm/i915/intel_overlay.c         |  3 +-
 drivers/gpu/drm/i915/intel_ringbuffer.c      | 16 +++++-----
 11 files changed, 89 insertions(+), 89 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index f537d8fc5e0f..861d132b2fe4 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2934,32 +2934,32 @@ void i915_gem_free_object(struct drm_gem_object *obj);
 int __must_check
 i915_vma_pin(struct i915_vma *vma, u64 size, u64 alignment, u64 flags);
 /* Flags used by pin/bind&friends. */
-#define PIN_MAPPABLE	(1<<0)
-#define PIN_NONBLOCK	(1<<1)
-#define PIN_GLOBAL	(1<<2)
-#define PIN_OFFSET_BIAS	(1<<3)
-#define PIN_USER	(1<<4)
-#define PIN_UPDATE	(1<<5)
-#define PIN_ZONE_4G	(1<<6)
-#define PIN_HIGH	(1<<7)
-#define PIN_OFFSET_FIXED	(1<<8)
+#define PIN_GLOBAL	(1<<0)
+#define PIN_USER	(1<<1)
+#define PIN_UPDATE	(1<<2)
+#define PIN_MAPPABLE	(1<<3)
+#define PIN_ZONE_4G	(1<<4)
+#define PIN_NONBLOCK	(1<<5)
+#define PIN_HIGH	(1<<6)
+#define PIN_OFFSET_BIAS	(1<<7)
+#define PIN_OFFSET_FIXED (1<<8)
 #define PIN_OFFSET_MASK (~4095)
 
 static inline void __i915_vma_pin(struct i915_vma *vma)
 {
 	GEM_BUG_ON(vma->pin_count == DRM_I915_GEM_OBJECT_MAX_PIN_COUNT);
-	vma->pin_count++;
+	vma->flags++;
 }
 
 static inline bool i915_vma_is_pinned(struct i915_vma *vma)
 {
-	return vma->pin_count;
+	return vma->flags & DRM_I915_GEM_OBJECT_MAX_PIN_COUNT;
 }
 
 static inline void __i915_vma_unpin(struct i915_vma *vma)
 {
 	GEM_BUG_ON(!i915_vma_is_pinned(vma));
-	vma->pin_count--;
+	vma->flags--;
 }
 
 static inline void i915_vma_unpin(struct i915_vma *vma)
@@ -2972,7 +2972,7 @@ int __must_check
 i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj,
 			 const struct i915_ggtt_view *view,
 			 uint64_t size,
-			 uint32_t alignment,
+			 uint64_t alignment,
 			 uint64_t flags);
 
 int i915_vma_bind(struct i915_vma *vma, enum i915_cache_level cache_level,
@@ -3223,15 +3223,6 @@ static inline bool i915_gem_obj_ggtt_bound(struct drm_i915_gem_object *obj)
 unsigned long
 i915_gem_obj_ggtt_size(struct drm_i915_gem_object *obj);
 
-static inline int __must_check
-i915_gem_obj_ggtt_pin(struct drm_i915_gem_object *obj,
-		      uint32_t alignment,
-		      unsigned flags)
-{
-	return i915_gem_object_ggtt_pin(obj, &i915_ggtt_view_normal,
-					0, alignment, flags);
-}
-
 void i915_gem_object_ggtt_unpin_view(struct drm_i915_gem_object *obj,
 				     const struct i915_ggtt_view *view);
 static inline void
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 71a32a9f9858..53776a071ce7 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -772,7 +772,9 @@ i915_gem_gtt_pwrite_fast(struct drm_device *dev,
 	char __user *user_data;
 	int page_offset, page_length, ret;
 
-	ret = i915_gem_obj_ggtt_pin(obj, 0, PIN_MAPPABLE | PIN_NONBLOCK);
+	ret = i915_gem_object_ggtt_pin(obj, NULL,
+				       0, 0,
+				       PIN_MAPPABLE | PIN_NONBLOCK);
 	if (ret)
 		goto out;
 
@@ -3408,32 +3410,35 @@ void __i915_vma_set_map_and_fenceable(struct i915_vma *vma)
 int
 i915_vma_pin(struct i915_vma *vma, u64 size, u64 alignment, u64 flags)
 {
-	unsigned bound = vma->bound;
+	unsigned bound;
 	int ret;
 
 	GEM_BUG_ON((flags & (PIN_GLOBAL | PIN_USER)) == 0);
 	GEM_BUG_ON((flags & PIN_GLOBAL) && !vma->is_ggtt);
 
-	if (WARN_ON(vma->pin_count == DRM_I915_GEM_OBJECT_MAX_PIN_COUNT))
-		return -EBUSY;
-
 	/* Pin early to prevent the shrinker/eviction logic from destroying
 	 * our vma as we insert and bind.
 	 */
-	__i915_vma_pin(vma);
+	bound = vma->flags++;
+	if (WARN_ON((bound & 0xf) == (DRM_I915_GEM_OBJECT_MAX_PIN_COUNT-1))) {
+		ret = -EBUSY;
+		goto err;
+	}
 
-	if (!bound) {
+	if ((bound & 0xff) == 0) {
 		ret = i915_vma_insert(vma, size, alignment, flags);
 		if (ret)
 			goto err;
 	}
 
-	ret = i915_vma_bind(vma, vma->obj->cache_level, flags);
-	if (ret)
-		goto err;
+	if (~(bound >> 4) & (flags & (GLOBAL_BIND | LOCAL_BIND))) {
+		ret = i915_vma_bind(vma, vma->obj->cache_level, flags);
+		if (ret)
+			goto err;
 
-	if ((bound ^ vma->bound) & GLOBAL_BIND)
-		__i915_vma_set_map_and_fenceable(vma);
+		if ((bound ^ vma->flags) & (GLOBAL_BIND << 4))
+			__i915_vma_set_map_and_fenceable(vma);
+	}
 
 	GEM_BUG_ON(i915_vma_misplaced(vma, size, alignment, flags));
 	return 0;
@@ -3447,13 +3452,14 @@ int
 i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj,
 			 const struct i915_ggtt_view *view,
 			 uint64_t size,
-			 uint32_t alignment,
+			 uint64_t alignment,
 			 uint64_t flags)
 {
 	struct i915_vma *vma;
 	int ret;
 
-	BUG_ON(!view);
+	if (view == NULL)
+		view = &i915_ggtt_view_normal;
 
 	vma = i915_gem_obj_lookup_or_create_ggtt_vma(obj, view);
 	if (IS_ERR(vma))
@@ -3465,11 +3471,11 @@ i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj,
 
 		WARN(vma->pin_count,
 		     "bo is already pinned in ggtt with incorrect alignment:"
-		     " offset=%08x %08x, req.alignment=%x, req.map_and_fenceable=%d,"
+		     " offset=%08x %08x, req.alignment=%llx, req.map_and_fenceable=%d,"
 		     " obj->map_and_fenceable=%d\n",
 		     upper_32_bits(vma->node.start),
 		     lower_32_bits(vma->node.start),
-		     alignment,
+		     (long long)alignment,
 		     !!(flags & PIN_MAPPABLE),
 		     obj->map_and_fenceable);
 		ret = i915_vma_unbind(vma);
@@ -3484,13 +3490,7 @@ void
 i915_gem_object_ggtt_unpin_view(struct drm_i915_gem_object *obj,
 				const struct i915_ggtt_view *view)
 {
-	struct i915_vma *vma = i915_gem_obj_to_ggtt_view(obj, view);
-
-	GEM_BUG_ON(!vma);
-	WARN_ON(i915_vma_is_pinned(vma));
-	WARN_ON(!i915_gem_obj_ggtt_bound_view(obj, view));
-
-	__i915_vma_unpin(vma);
+	i915_vma_unpin(i915_gem_obj_to_ggtt_view(obj, view));
 }
 
 int
diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
index 5ed91406d4e9..c9b8c2c62828 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -722,9 +722,8 @@ static int do_rcs_switch(struct drm_i915_gem_request *req)
 		return 0;
 
 	/* Trying to pin first makes error handling easier. */
-	ret = i915_gem_obj_ggtt_pin(to->engine[RCS].state,
-				    to->ggtt_alignment,
-				    0);
+	ret = i915_gem_object_ggtt_pin(to->engine[RCS].state, NULL, 0,
+				       to->ggtt_alignment, 0);
 	if (ret)
 		return ret;
 
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index cc9c0e4073ff..69bf73b51df9 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -34,10 +34,10 @@
 #include <linux/dma_remapping.h>
 #include <linux/uaccess.h>
 
-#define  __EXEC_OBJECT_HAS_PIN (1<<31)
-#define  __EXEC_OBJECT_HAS_FENCE (1<<30)
-#define  __EXEC_OBJECT_NEEDS_MAP (1<<29)
-#define  __EXEC_OBJECT_NEEDS_BIAS (1<<28)
+#define  __EXEC_OBJECT_HAS_PIN (1U<<31)
+#define  __EXEC_OBJECT_HAS_FENCE (1U<<30)
+#define  __EXEC_OBJECT_NEEDS_MAP (1U<<29)
+#define  __EXEC_OBJECT_NEEDS_BIAS (1U<<28)
 
 #define BATCH_OFFSET_BIAS (256*1024)
 
@@ -1263,7 +1263,7 @@ i915_gem_execbuffer_parse(struct intel_engine_cs *engine,
 	if (ret)
 		goto err;
 
-	ret = i915_gem_obj_ggtt_pin(shadow_batch_obj, 0, 0);
+	ret = i915_gem_object_ggtt_pin(shadow_batch_obj, NULL, 0, 0, 0);
 	if (ret)
 		goto err;
 
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h
index 2bd8ec7e1948..5655358a60e1 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.h
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.h
@@ -184,13 +184,30 @@ struct i915_vma {
 
 	struct i915_gem_active last_read[I915_NUM_ENGINES];
 
-	/** Flags and address space this VMA is bound to */
+	union {
+		struct {
+			/**
+			 * How many users have pinned this object in GTT space. The following
+			 * users can each hold at most one reference: pwrite/pread, execbuffer
+			 * (objects are not allowed multiple times for the same batchbuffer),
+			 * and the framebuffer code. When switching/pageflipping, the
+			 * framebuffer code has at most two buffers pinned per crtc.
+			 *
+			 * In the worst case this is 1 + 1 + 1 + 2*2 = 7. That would fit into 3
+			 * bits with absolutely no headroom. So use 4 bits. */
+			unsigned int pin_count : 4;
+#define DRM_I915_GEM_OBJECT_MAX_PIN_COUNT 0xf
+
+			/** Flags and address space this VMA is bound to */
 #define GLOBAL_BIND	(1<<0)
 #define LOCAL_BIND	(1<<1)
-	unsigned int bound : 4;
-	unsigned int active : I915_NUM_ENGINES;
-	bool is_ggtt : 1;
-	bool closed : 1;
+			unsigned int bound : 4;
+			unsigned int active : I915_NUM_ENGINES;
+			bool is_ggtt : 1;
+			bool closed : 1;
+		};
+		unsigned int flags;
+	};
 
 	/**
 	 * Support different GGTT views into the same object.
@@ -215,39 +232,27 @@ struct i915_vma {
 	struct hlist_node exec_node;
 	unsigned long exec_handle;
 	struct drm_i915_gem_exec_object2 *exec_entry;
-
-	/**
-	 * How many users have pinned this object in GTT space. The following
-	 * users can each hold at most one reference: pwrite/pread, execbuffer
-	 * (objects are not allowed multiple times for the same batchbuffer),
-	 * and the framebuffer code. When switching/pageflipping, the
-	 * framebuffer code has at most two buffers pinned per crtc.
-	 *
-	 * In the worst case this is 1 + 1 + 1 + 2*2 = 7. That would fit into 3
-	 * bits with absolutely no headroom. So use 4 bits. */
-	unsigned int pin_count:4;
-#define DRM_I915_GEM_OBJECT_MAX_PIN_COUNT 0xf
 };
 
 static inline bool i915_vma_is_active(const struct i915_vma *vma)
 {
-	return vma->active;
+	return vma->flags & (((1 << I915_NUM_ENGINES) - 1) << 8);
 }
 
 static inline void i915_vma_set_active(struct i915_vma *vma, unsigned engine)
 {
-	vma->active |= 1 << engine;
+	vma->flags |= 0x100 << engine;
 }
 
 static inline void i915_vma_unset_active(struct i915_vma *vma, unsigned engine)
 {
-	vma->active &= ~(1 << engine);
+	vma->flags &= ~(0x100 << engine);
 }
 
 static inline bool i915_vma_has_active_engine(const struct i915_vma *vma,
 					      unsigned engine)
 {
-	return vma->active & (1 << engine);
+	return vma->flags & (0x100 << engine);
 }
 
 struct i915_page_dma {
diff --git a/drivers/gpu/drm/i915/i915_gem_render_state.c b/drivers/gpu/drm/i915/i915_gem_render_state.c
index c0abe9a2210f..4cf82697b3db 100644
--- a/drivers/gpu/drm/i915/i915_gem_render_state.c
+++ b/drivers/gpu/drm/i915/i915_gem_render_state.c
@@ -72,7 +72,7 @@ static int render_state_init(struct render_state *so,
 	if (IS_ERR(so->obj))
 		return PTR_ERR(so->obj);
 
-	ret = i915_gem_obj_ggtt_pin(so->obj, 4096, 0);
+	ret = i915_gem_object_ggtt_pin(so->obj, NULL, 0, 0, 0);
 	if (ret)
 		goto free_gem;
 
diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c
index cc4792df249d..63ef34c78494 100644
--- a/drivers/gpu/drm/i915/i915_guc_submission.c
+++ b/drivers/gpu/drm/i915/i915_guc_submission.c
@@ -613,8 +613,8 @@ static struct drm_i915_gem_object *gem_allocate_guc_obj(struct drm_device *dev,
 		return NULL;
 	}
 
-	if (i915_gem_obj_ggtt_pin(obj, PAGE_SIZE,
-			PIN_OFFSET_BIAS | GUC_WOPCM_TOP)) {
+	if (i915_gem_object_ggtt_pin(obj, NULL, 0, PAGE_SIZE,
+				     PIN_OFFSET_BIAS | GUC_WOPCM_TOP)) {
 		i915_gem_object_put(obj);
 		return NULL;
 	}
diff --git a/drivers/gpu/drm/i915/intel_guc_loader.c b/drivers/gpu/drm/i915/intel_guc_loader.c
index 74a5f11a5689..be93b458968a 100644
--- a/drivers/gpu/drm/i915/intel_guc_loader.c
+++ b/drivers/gpu/drm/i915/intel_guc_loader.c
@@ -321,7 +321,7 @@ static int guc_ucode_xfer(struct drm_i915_private *dev_priv)
 		return ret;
 	}
 
-	ret = i915_gem_obj_ggtt_pin(guc_fw->guc_fw_obj, 0, 0);
+	ret = i915_gem_object_ggtt_pin(guc_fw->guc_fw_obj, NULL, 0, 0, 0);
 	if (ret) {
 		DRM_DEBUG_DRIVER("pin failed %d\n", ret);
 		return ret;
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 964108cbb9c0..6cdc421fdc37 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -774,8 +774,9 @@ static int intel_lr_context_pin(struct i915_gem_context *ctx,
 	if (ce->pin_count++)
 		return 0;
 
-	ret = i915_gem_obj_ggtt_pin(ce->state, GEN8_LR_CONTEXT_ALIGN,
-				    PIN_OFFSET_BIAS | GUC_WOPCM_TOP);
+	ret = i915_gem_object_ggtt_pin(ce->state, NULL,
+				       0, GEN8_LR_CONTEXT_ALIGN,
+				       PIN_OFFSET_BIAS | GUC_WOPCM_TOP);
 	if (ret)
 		goto err;
 
@@ -1154,7 +1155,8 @@ static int lrc_setup_wa_ctx_obj(struct intel_engine_cs *engine, u32 size)
 		return ret;
 	}
 
-	ret = i915_gem_obj_ggtt_pin(engine->wa_ctx.obj, PAGE_SIZE, 0);
+	ret = i915_gem_object_ggtt_pin(engine->wa_ctx.obj, NULL,
+				       0, PAGE_SIZE, 0);
 	if (ret) {
 		DRM_DEBUG_DRIVER("pin LRC WA ctx backing obj failed: %d\n",
 				 ret);
diff --git a/drivers/gpu/drm/i915/intel_overlay.c b/drivers/gpu/drm/i915/intel_overlay.c
index 5f645ad2babd..9b0fb7e23cbb 100644
--- a/drivers/gpu/drm/i915/intel_overlay.c
+++ b/drivers/gpu/drm/i915/intel_overlay.c
@@ -1412,7 +1412,8 @@ void intel_setup_overlay(struct drm_i915_private *dev_priv)
 		}
 		overlay->flip_addr = reg_bo->phys_handle->busaddr;
 	} else {
-		ret = i915_gem_obj_ggtt_pin(reg_bo, PAGE_SIZE, PIN_MAPPABLE);
+		ret = i915_gem_object_ggtt_pin(reg_bo, NULL,
+					       0, PAGE_SIZE, PIN_MAPPABLE);
 		if (ret) {
 			DRM_ERROR("failed to pin overlay register bo\n");
 			goto out_free_bo;
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index d63e4fdc60de..f86039455c5a 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -648,7 +648,7 @@ int intel_init_pipe_control(struct intel_engine_cs *engine, int size)
 		goto err;
 	}
 
-	ret = i915_gem_obj_ggtt_pin(obj, 4096, PIN_HIGH);
+	ret = i915_gem_object_ggtt_pin(obj, NULL, 0, 4096, PIN_HIGH);
 	if (ret)
 		goto err_unref;
 
@@ -1816,7 +1816,7 @@ static int init_status_page(struct intel_engine_cs *engine)
 			 * actualy map it).
 			 */
 			flags |= PIN_MAPPABLE;
-		ret = i915_gem_obj_ggtt_pin(obj, 4096, flags);
+		ret = i915_gem_object_ggtt_pin(obj, NULL, 0, 4096, flags);
 		if (ret) {
 err_unref:
 			i915_gem_object_put(obj);
@@ -1863,7 +1863,7 @@ int intel_ring_pin(struct intel_ring *ring)
 	int ret;
 
 	if (HAS_LLC(dev_priv) && !obj->stolen) {
-		ret = i915_gem_obj_ggtt_pin(obj, PAGE_SIZE, flags);
+		ret = i915_gem_object_ggtt_pin(obj, NULL, 0, PAGE_SIZE, flags);
 		if (ret)
 			return ret;
 
@@ -1877,8 +1877,8 @@ int intel_ring_pin(struct intel_ring *ring)
 			goto err_unpin;
 		}
 	} else {
-		ret = i915_gem_obj_ggtt_pin(obj, PAGE_SIZE,
-					    flags | PIN_MAPPABLE);
+		ret = i915_gem_object_ggtt_pin(obj, NULL, 0, PAGE_SIZE,
+					       flags | PIN_MAPPABLE);
 		if (ret)
 			return ret;
 
@@ -2007,7 +2007,8 @@ static int intel_ring_context_pin(struct i915_gem_context *ctx,
 		return 0;
 
 	if (ce->state) {
-		ret = i915_gem_obj_ggtt_pin(ce->state, ctx->ggtt_alignment, 0);
+		ret = i915_gem_object_ggtt_pin(ce->state, NULL, 0,
+					       ctx->ggtt_alignment, 0);
 		if (ret)
 			goto error;
 	}
@@ -2574,7 +2575,8 @@ int intel_init_render_ring_buffer(struct drm_device *dev)
 				i915.semaphores = 0;
 			} else {
 				i915_gem_object_set_cache_level(obj, I915_CACHE_LLC);
-				ret = i915_gem_obj_ggtt_pin(obj, 0, PIN_NONBLOCK);
+				ret = i915_gem_object_ggtt_pin(obj, NULL,
+							       0, 0, 0);
 				if (ret != 0) {
 					i915_gem_object_put(obj);
 					DRM_ERROR("Failed to pin semaphore bo. Disabling semaphores\n");
-- 
2.8.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 58+ messages in thread

* [PATCH 11/38] drm/i915: Make fb_tracking.lock a spinlock
  2016-06-03 16:55 Tracking VMA Chris Wilson
                   ` (9 preceding siblings ...)
  2016-06-03 16:55 ` [PATCH 10/38] drm/i915: Remove highly confusing i915_gem_obj_ggtt_pin() Chris Wilson
@ 2016-06-03 16:55 ` Chris Wilson
  2016-06-03 16:55 ` [PATCH 12/38] drm/i915: Use atomics to manipulate obj->frontbuffer_bits Chris Wilson
                   ` (27 subsequent siblings)
  38 siblings, 0 replies; 58+ messages in thread
From: Chris Wilson @ 2016-06-03 16:55 UTC (permalink / raw)
  To: intel-gfx

We only need a very lightweight mechanism here as the locking is only
used for co-ordinating a bitfield.

Also double check that the object is still pinned to the display plane
before processing the state change.

v2: Move the cheap unlikely tests into the caller

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_drv.h          |  2 +-
 drivers/gpu/drm/i915/i915_gem.c          |  2 +-
 drivers/gpu/drm/i915/intel_drv.h         | 29 ++++++++++++++---
 drivers/gpu/drm/i915/intel_frontbuffer.c | 54 ++++++++++++++------------------
 4 files changed, 51 insertions(+), 36 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 861d132b2fe4..59846de3b33d 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1650,7 +1650,7 @@ struct intel_pipe_crc {
 };
 
 struct i915_frontbuffer_tracking {
-	struct mutex lock;
+	spinlock_t lock;
 
 	/*
 	 * Tracking bits for delayed frontbuffer flushing du to gpu activity or
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 53776a071ce7..522f379c8d44 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -4180,7 +4180,7 @@ i915_gem_load_init(struct drm_device *dev)
 
 	dev_priv->mm.interruptible = true;
 
-	mutex_init(&dev_priv->fb_tracking.lock);
+	spin_lock_init(&dev_priv->fb_tracking.lock);
 }
 
 void i915_gem_load_cleanup(struct drm_device *dev)
diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h
index a29618dc7e98..9410767c97da 100644
--- a/drivers/gpu/drm/i915/intel_drv.h
+++ b/drivers/gpu/drm/i915/intel_drv.h
@@ -1106,8 +1106,6 @@ void intel_ddi_set_vc_payload_alloc(struct drm_crtc *crtc, bool state);
 uint32_t ddi_signal_levels(struct intel_dp *intel_dp);
 
 /* intel_frontbuffer.c */
-void intel_fb_obj_invalidate(struct drm_i915_gem_object *obj,
-			     enum fb_op_origin origin);
 void intel_frontbuffer_flip_prepare(struct drm_device *dev,
 				    unsigned frontbuffer_bits);
 void intel_frontbuffer_flip_complete(struct drm_device *dev,
@@ -1118,8 +1116,31 @@ unsigned int intel_fb_align_height(struct drm_device *dev,
 				   unsigned int height,
 				   uint32_t pixel_format,
 				   uint64_t fb_format_modifier);
-void intel_fb_obj_flush(struct drm_i915_gem_object *obj, bool retire,
-			enum fb_op_origin origin);
+
+void __intel_fb_obj_invalidate(struct drm_i915_gem_object *obj,
+			       enum fb_op_origin origin);
+static inline void intel_fb_obj_invalidate(struct drm_i915_gem_object *obj,
+					   enum fb_op_origin origin)
+{
+	if (!obj->frontbuffer_bits || !obj->pin_display)
+		return;
+
+	__intel_fb_obj_invalidate(obj, origin);
+}
+
+void __intel_fb_obj_flush(struct drm_i915_gem_object *obj,
+			  bool retire,
+			  enum fb_op_origin origin);
+static inline void intel_fb_obj_flush(struct drm_i915_gem_object *obj,
+				      bool retire,
+				      enum fb_op_origin origin)
+{
+	if (!obj->frontbuffer_bits || !obj->pin_display)
+		return;
+
+	__intel_fb_obj_flush(obj, retire, origin);
+}
+
 u32 intel_fb_stride_alignment(const struct drm_i915_private *dev_priv,
 			      uint64_t fb_modifier, uint32_t pixel_format);
 
diff --git a/drivers/gpu/drm/i915/intel_frontbuffer.c b/drivers/gpu/drm/i915/intel_frontbuffer.c
index ac85357010b4..a38ccfe4894a 100644
--- a/drivers/gpu/drm/i915/intel_frontbuffer.c
+++ b/drivers/gpu/drm/i915/intel_frontbuffer.c
@@ -76,24 +76,19 @@
  * until the rendering completes or a flip on this frontbuffer plane is
  * scheduled.
  */
-void intel_fb_obj_invalidate(struct drm_i915_gem_object *obj,
-			     enum fb_op_origin origin)
+void __intel_fb_obj_invalidate(struct drm_i915_gem_object *obj,
+			       enum fb_op_origin origin)
 {
 	struct drm_device *dev = obj->base.dev;
 	struct drm_i915_private *dev_priv = to_i915(dev);
 
 	WARN_ON(!mutex_is_locked(&dev->struct_mutex));
 
-	if (!obj->frontbuffer_bits)
-		return;
-
 	if (origin == ORIGIN_CS) {
-		mutex_lock(&dev_priv->fb_tracking.lock);
-		dev_priv->fb_tracking.busy_bits
-			|= obj->frontbuffer_bits;
-		dev_priv->fb_tracking.flip_bits
-			&= ~obj->frontbuffer_bits;
-		mutex_unlock(&dev_priv->fb_tracking.lock);
+		spin_lock(&dev_priv->fb_tracking.lock);
+		dev_priv->fb_tracking.busy_bits |= obj->frontbuffer_bits;
+		dev_priv->fb_tracking.flip_bits &= ~obj->frontbuffer_bits;
+		spin_unlock(&dev_priv->fb_tracking.lock);
 	}
 
 	intel_psr_invalidate(dev, obj->frontbuffer_bits);
@@ -120,11 +115,11 @@ static void intel_frontbuffer_flush(struct drm_device *dev,
 	struct drm_i915_private *dev_priv = to_i915(dev);
 
 	/* Delay flushing when rings are still busy.*/
-	mutex_lock(&dev_priv->fb_tracking.lock);
+	spin_lock(&dev_priv->fb_tracking.lock);
 	frontbuffer_bits &= ~dev_priv->fb_tracking.busy_bits;
-	mutex_unlock(&dev_priv->fb_tracking.lock);
+	spin_unlock(&dev_priv->fb_tracking.lock);
 
-	if (!frontbuffer_bits)
+	if (frontbuffer_bits == 0)
 		return;
 
 	intel_edp_drrs_flush(dev, frontbuffer_bits);
@@ -142,8 +137,9 @@ static void intel_frontbuffer_flush(struct drm_device *dev,
  * completed and frontbuffer caching can be started again. If @retire is true
  * then any delayed flushes will be unblocked.
  */
-void intel_fb_obj_flush(struct drm_i915_gem_object *obj,
-			bool retire, enum fb_op_origin origin)
+void __intel_fb_obj_flush(struct drm_i915_gem_object *obj,
+			  bool retire,
+			  enum fb_op_origin origin)
 {
 	struct drm_device *dev = obj->base.dev;
 	struct drm_i915_private *dev_priv = to_i915(dev);
@@ -151,21 +147,18 @@ void intel_fb_obj_flush(struct drm_i915_gem_object *obj,
 
 	WARN_ON(!mutex_is_locked(&dev->struct_mutex));
 
-	if (!obj->frontbuffer_bits)
-		return;
-
 	frontbuffer_bits = obj->frontbuffer_bits;
 
 	if (retire) {
-		mutex_lock(&dev_priv->fb_tracking.lock);
+		spin_lock(&dev_priv->fb_tracking.lock);
 		/* Filter out new bits since rendering started. */
 		frontbuffer_bits &= dev_priv->fb_tracking.busy_bits;
-
 		dev_priv->fb_tracking.busy_bits &= ~frontbuffer_bits;
-		mutex_unlock(&dev_priv->fb_tracking.lock);
+		spin_unlock(&dev_priv->fb_tracking.lock);
 	}
 
-	intel_frontbuffer_flush(dev, frontbuffer_bits, origin);
+	if (frontbuffer_bits)
+		intel_frontbuffer_flush(dev, frontbuffer_bits, origin);
 }
 
 /**
@@ -185,11 +178,11 @@ void intel_frontbuffer_flip_prepare(struct drm_device *dev,
 {
 	struct drm_i915_private *dev_priv = to_i915(dev);
 
-	mutex_lock(&dev_priv->fb_tracking.lock);
+	spin_lock(&dev_priv->fb_tracking.lock);
 	dev_priv->fb_tracking.flip_bits |= frontbuffer_bits;
 	/* Remove stale busy bits due to the old buffer. */
 	dev_priv->fb_tracking.busy_bits &= ~frontbuffer_bits;
-	mutex_unlock(&dev_priv->fb_tracking.lock);
+	spin_unlock(&dev_priv->fb_tracking.lock);
 
 	intel_psr_single_frame_update(dev, frontbuffer_bits);
 }
@@ -209,13 +202,14 @@ void intel_frontbuffer_flip_complete(struct drm_device *dev,
 {
 	struct drm_i915_private *dev_priv = to_i915(dev);
 
-	mutex_lock(&dev_priv->fb_tracking.lock);
+	spin_lock(&dev_priv->fb_tracking.lock);
 	/* Mask any cancelled flips. */
 	frontbuffer_bits &= dev_priv->fb_tracking.flip_bits;
 	dev_priv->fb_tracking.flip_bits &= ~frontbuffer_bits;
-	mutex_unlock(&dev_priv->fb_tracking.lock);
+	spin_unlock(&dev_priv->fb_tracking.lock);
 
-	intel_frontbuffer_flush(dev, frontbuffer_bits, ORIGIN_FLIP);
+	if (frontbuffer_bits)
+		intel_frontbuffer_flush(dev, frontbuffer_bits, ORIGIN_FLIP);
 }
 
 /**
@@ -234,10 +228,10 @@ void intel_frontbuffer_flip(struct drm_device *dev,
 {
 	struct drm_i915_private *dev_priv = to_i915(dev);
 
-	mutex_lock(&dev_priv->fb_tracking.lock);
+	spin_lock(&dev_priv->fb_tracking.lock);
 	/* Remove stale busy bits due to the old buffer. */
 	dev_priv->fb_tracking.busy_bits &= ~frontbuffer_bits;
-	mutex_unlock(&dev_priv->fb_tracking.lock);
+	spin_unlock(&dev_priv->fb_tracking.lock);
 
 	intel_frontbuffer_flush(dev, frontbuffer_bits, ORIGIN_FLIP);
 }
-- 
2.8.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 58+ messages in thread

* [PATCH 12/38] drm/i915: Use atomics to manipulate obj->frontbuffer_bits
  2016-06-03 16:55 Tracking VMA Chris Wilson
                   ` (10 preceding siblings ...)
  2016-06-03 16:55 ` [PATCH 11/38] drm/i915: Make fb_tracking.lock a spinlock Chris Wilson
@ 2016-06-03 16:55 ` Chris Wilson
  2016-06-03 16:55 ` [PATCH 13/38] drm/i915: Move obj->active:5 to obj->flags Chris Wilson
                   ` (26 subsequent siblings)
  38 siblings, 0 replies; 58+ messages in thread
From: Chris Wilson @ 2016-06-03 16:55 UTC (permalink / raw)
  To: intel-gfx; +Cc: Daniel Vetter

The individual bits inside obj->frontbuffer_bits are protected by each
plane->mutex, but the whole bitfield may be accessed by multiple KMS
operations simultaneously and so the RMW need to be under atomics.
However, for updating the single field we do not need to mandate that it
be under the struct_mutex, one more step towards its removal as the de
facto BKL.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
---
 drivers/gpu/drm/i915/i915_debugfs.c      |  6 ++++--
 drivers/gpu/drm/i915/i915_drv.h          |  4 +---
 drivers/gpu/drm/i915/i915_gem.c          | 18 +++++++++++-------
 drivers/gpu/drm/i915/intel_display.c     |  7 ++++---
 drivers/gpu/drm/i915/intel_drv.h         |  4 ++--
 drivers/gpu/drm/i915/intel_frontbuffer.c | 19 +++++++------------
 6 files changed, 29 insertions(+), 29 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index f4745e0c8d5c..355bbf895c22 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -138,6 +138,7 @@ describe_obj(struct seq_file *m, struct drm_i915_gem_object *obj)
 	struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
 	struct intel_engine_cs *engine;
 	struct i915_vma *vma;
+	unsigned frontbuffer_bits;
 	int pin_count = 0;
 	enum intel_engine_id id;
 
@@ -204,8 +205,9 @@ describe_obj(struct seq_file *m, struct drm_i915_gem_object *obj)
 	if (engine)
 		seq_printf(m, " (%s)", engine->name);
 
-	if (obj->frontbuffer_bits)
-		seq_printf(m, " (frontbuffer: 0x%03x)", obj->frontbuffer_bits);
+	frontbuffer_bits = atomic_read(&obj->frontbuffer_bits);
+	if (frontbuffer_bits)
+		seq_printf(m, " (frontbuffer: 0x%03x)", frontbuffer_bits);
 }
 
 static int i915_gem_object_list_info(struct seq_file *m, void *data)
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 59846de3b33d..236ade61cade 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2108,8 +2108,6 @@ struct drm_i915_gem_object_ops {
  */
 #define INTEL_MAX_SPRITE_BITS_PER_PIPE 5
 #define INTEL_FRONTBUFFER_BITS_PER_PIPE 8
-#define INTEL_FRONTBUFFER_BITS \
-	(INTEL_FRONTBUFFER_BITS_PER_PIPE * I915_MAX_PIPES)
 #define INTEL_FRONTBUFFER_PRIMARY(pipe) \
 	(1 << (INTEL_FRONTBUFFER_BITS_PER_PIPE * (pipe)))
 #define INTEL_FRONTBUFFER_CURSOR(pipe) \
@@ -2197,7 +2195,7 @@ struct drm_i915_gem_object {
 	unsigned int cache_level:3;
 	unsigned int cache_dirty:1;
 
-	unsigned int frontbuffer_bits:INTEL_FRONTBUFFER_BITS;
+	atomic_t frontbuffer_bits;
 
 	/** Count of VMA actually bound by this object */
 	unsigned int bind_count;
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 522f379c8d44..05425ae7c8a8 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -3746,7 +3746,7 @@ void i915_gem_free_object(struct drm_gem_object *gem_obj)
 	if (obj->stolen)
 		i915_gem_object_unpin_pages(obj);
 
-	WARN_ON(obj->frontbuffer_bits);
+	WARN_ON(atomic_read(&obj->frontbuffer_bits));
 
 	if (obj->pages && obj->madv == I915_MADV_WILLNEED &&
 	    dev_priv->quirks & QUIRK_PIN_SWIZZLED_PAGES &&
@@ -4288,16 +4288,20 @@ void i915_gem_track_fb(struct drm_i915_gem_object *old,
 		       struct drm_i915_gem_object *new,
 		       unsigned frontbuffer_bits)
 {
+	/* Control of individual bits within the bitfield are guarded by
+	 * the owning plane->mutex, i.e. we can never see concurrent
+	 * manipulation of individual bits. But since the bitfield as a whole
+	 * is updated using RMW, we need to use atomics in order to update
+	 * the bits.
+	 */
 	if (old) {
-		WARN_ON(!mutex_is_locked(&old->base.dev->struct_mutex));
-		WARN_ON(!(old->frontbuffer_bits & frontbuffer_bits));
-		old->frontbuffer_bits &= ~frontbuffer_bits;
+		WARN_ON(!(atomic_read(&old->frontbuffer_bits) & frontbuffer_bits));
+		atomic_andnot(frontbuffer_bits, &old->frontbuffer_bits);
 	}
 
 	if (new) {
-		WARN_ON(!mutex_is_locked(&new->base.dev->struct_mutex));
-		WARN_ON(new->frontbuffer_bits & frontbuffer_bits);
-		new->frontbuffer_bits |= frontbuffer_bits;
+		WARN_ON(atomic_read(&new->frontbuffer_bits) & frontbuffer_bits);
+		atomic_or(frontbuffer_bits, &new->frontbuffer_bits);
 	}
 }
 
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index 82533f1da54c..0cfaace38370 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -2635,7 +2635,8 @@ valid_fb:
 	primary->fb = primary->state->fb = fb;
 	primary->crtc = primary->state->crtc = &intel_crtc->base;
 	intel_crtc->base.state->plane_mask |= (1 << drm_plane_index(primary));
-	obj->frontbuffer_bits |= to_intel_plane(primary)->frontbuffer_bit;
+	atomic_or(to_intel_plane(primary)->frontbuffer_bit,
+		  &obj->frontbuffer_bits);
 }
 
 static void i9xx_update_primary_plane(struct drm_plane *primary,
@@ -14012,8 +14013,8 @@ intel_cleanup_plane_fb(struct drm_plane *plane,
 		intel_unpin_fb_obj(old_state->fb, old_state->rotation);
 
 	/* prepare_fb aborted? */
-	if ((old_obj && (old_obj->frontbuffer_bits & intel_plane->frontbuffer_bit)) ||
-	    (obj && !(obj->frontbuffer_bits & intel_plane->frontbuffer_bit)))
+	if ((old_obj && (atomic_read(&old_obj->frontbuffer_bits) & intel_plane->frontbuffer_bit)) ||
+	    (obj && !(atomic_read(&obj->frontbuffer_bits) & intel_plane->frontbuffer_bit)))
 		i915_gem_track_fb(old_obj, obj, intel_plane->frontbuffer_bit);
 
 	i915_gem_request_assign(&old_intel_state->wait_req, NULL);
diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h
index 9410767c97da..834646b4cc3f 100644
--- a/drivers/gpu/drm/i915/intel_drv.h
+++ b/drivers/gpu/drm/i915/intel_drv.h
@@ -1122,7 +1122,7 @@ void __intel_fb_obj_invalidate(struct drm_i915_gem_object *obj,
 static inline void intel_fb_obj_invalidate(struct drm_i915_gem_object *obj,
 					   enum fb_op_origin origin)
 {
-	if (!obj->frontbuffer_bits || !obj->pin_display)
+	if (!atomic_read(&obj->frontbuffer_bits) || !obj->pin_display)
 		return;
 
 	__intel_fb_obj_invalidate(obj, origin);
@@ -1135,7 +1135,7 @@ static inline void intel_fb_obj_flush(struct drm_i915_gem_object *obj,
 				      bool retire,
 				      enum fb_op_origin origin)
 {
-	if (!obj->frontbuffer_bits || !obj->pin_display)
+	if (!atomic_read(&obj->frontbuffer_bits) || !obj->pin_display)
 		return;
 
 	__intel_fb_obj_flush(obj, retire, origin);
diff --git a/drivers/gpu/drm/i915/intel_frontbuffer.c b/drivers/gpu/drm/i915/intel_frontbuffer.c
index a38ccfe4894a..8af291d22589 100644
--- a/drivers/gpu/drm/i915/intel_frontbuffer.c
+++ b/drivers/gpu/drm/i915/intel_frontbuffer.c
@@ -81,19 +81,18 @@ void __intel_fb_obj_invalidate(struct drm_i915_gem_object *obj,
 {
 	struct drm_device *dev = obj->base.dev;
 	struct drm_i915_private *dev_priv = to_i915(dev);
-
-	WARN_ON(!mutex_is_locked(&dev->struct_mutex));
+	unsigned frontbuffer_bits = atomic_read(&obj->frontbuffer_bits);
 
 	if (origin == ORIGIN_CS) {
 		spin_lock(&dev_priv->fb_tracking.lock);
-		dev_priv->fb_tracking.busy_bits |= obj->frontbuffer_bits;
-		dev_priv->fb_tracking.flip_bits &= ~obj->frontbuffer_bits;
+		dev_priv->fb_tracking.busy_bits |= frontbuffer_bits;
+		dev_priv->fb_tracking.flip_bits &= ~frontbuffer_bits;
 		spin_unlock(&dev_priv->fb_tracking.lock);
 	}
 
-	intel_psr_invalidate(dev, obj->frontbuffer_bits);
-	intel_edp_drrs_invalidate(dev, obj->frontbuffer_bits);
-	intel_fbc_invalidate(dev_priv, obj->frontbuffer_bits, origin);
+	intel_psr_invalidate(dev, frontbuffer_bits);
+	intel_edp_drrs_invalidate(dev, frontbuffer_bits);
+	intel_fbc_invalidate(dev_priv, frontbuffer_bits, origin);
 }
 
 /**
@@ -143,11 +142,7 @@ void __intel_fb_obj_flush(struct drm_i915_gem_object *obj,
 {
 	struct drm_device *dev = obj->base.dev;
 	struct drm_i915_private *dev_priv = to_i915(dev);
-	unsigned frontbuffer_bits;
-
-	WARN_ON(!mutex_is_locked(&dev->struct_mutex));
-
-	frontbuffer_bits = obj->frontbuffer_bits;
+	unsigned frontbuffer_bits = atomic_read(&obj->frontbuffer_bits);
 
 	if (retire) {
 		spin_lock(&dev_priv->fb_tracking.lock);
-- 
2.8.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 58+ messages in thread

* [PATCH 13/38] drm/i915: Move obj->active:5 to obj->flags
  2016-06-03 16:55 Tracking VMA Chris Wilson
                   ` (11 preceding siblings ...)
  2016-06-03 16:55 ` [PATCH 12/38] drm/i915: Use atomics to manipulate obj->frontbuffer_bits Chris Wilson
@ 2016-06-03 16:55 ` Chris Wilson
  2016-06-08  9:53   ` Daniel Vetter
  2016-06-03 16:55 ` [PATCH 14/38] drm/i915: Move i915_gem_object_wait_rendering() Chris Wilson
                   ` (25 subsequent siblings)
  38 siblings, 1 reply; 58+ messages in thread
From: Chris Wilson @ 2016-06-03 16:55 UTC (permalink / raw)
  To: intel-gfx

We are motivated to avoid using a bitfield for obj->active for a couple
of reasons. Firstly, we wish to document our lockless read of obj->active
using READ_ONCE inside i915_gem_busy_ioctl() and that requires an
integral type (i.e. not a bitfield). Secondly, gcc produces abysmal code
when presented with a bitfield and that shows up high on the profiles of
request tracking (mainly due to excess memory traffic as it converts
the bitfield to a register and back and generates frequent AGI in the
process).

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_debugfs.c        |  2 +-
 drivers/gpu/drm/i915/i915_drv.h            | 31 +++++++++++++++++++++++++++++-
 drivers/gpu/drm/i915/i915_gem.c            | 16 +++++++--------
 drivers/gpu/drm/i915/i915_gem_execbuffer.c | 10 +++++-----
 drivers/gpu/drm/i915/i915_gem_shrinker.c   |  5 +++--
 drivers/gpu/drm/i915/i915_gem_userptr.c    |  2 +-
 6 files changed, 48 insertions(+), 18 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index 355bbf895c22..9154919fdd56 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -91,7 +91,7 @@ static int i915_capabilities(struct seq_file *m, void *data)
 
 static char get_active_flag(struct drm_i915_gem_object *obj)
 {
-	return obj->active ? '*' : ' ';
+	return i915_gem_object_is_active(obj) ? '*' : ' ';
 }
 
 static char get_pin_flag(struct drm_i915_gem_object *obj)
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 236ade61cade..e72b7f35a98e 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2136,12 +2136,16 @@ struct drm_i915_gem_object {
 
 	struct list_head batch_pool_link;
 
+	unsigned long flags;
 	/**
 	 * This is set if the object is on the active lists (has pending
 	 * rendering and so a non-zero seqno), and is not set if it i s on
 	 * inactive (ready to be unbound) list.
 	 */
-	unsigned int active:I915_NUM_ENGINES;
+#define I915_BO_ACTIVE_SHIFT 0
+#define I915_BO_ACTIVE_MASK ((1 << I915_NUM_ENGINES) - 1)
+#define __I915_BO_ACTIVE(bo) \
+	((READ_ONCE((bo)->flags) >> I915_BO_ACTIVE_SHIFT) & I915_BO_ACTIVE_MASK)
 
 	/**
 	 * This is set if the object has been written to since last bound
@@ -2288,6 +2292,31 @@ i915_gem_object_put_unlocked(struct drm_i915_gem_object *obj)
 }
 __deprecated extern void drm_gem_object_unreference_unlocked(struct drm_gem_object *);
 
+static inline unsigned long
+i915_gem_object_is_active(const struct drm_i915_gem_object *obj)
+{
+	return (obj->flags >> I915_BO_ACTIVE_SHIFT) & I915_BO_ACTIVE_MASK;
+}
+
+static inline void
+i915_gem_object_set_active(struct drm_i915_gem_object *obj, int engine)
+{
+	obj->flags |= 1 << (engine + I915_BO_ACTIVE_SHIFT);
+}
+
+static inline void
+i915_gem_object_unset_active(struct drm_i915_gem_object *obj, int engine)
+{
+	obj->flags &= ~(1 << (engine + I915_BO_ACTIVE_SHIFT));
+}
+
+static inline bool
+i915_gem_object_has_active_engine(const struct drm_i915_gem_object *obj,
+				  int engine)
+{
+	return obj->flags & (1 << (engine + I915_BO_ACTIVE_SHIFT));
+}
+
 /*
  * Optimised SGL iterator for GEM objects
  */
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 05425ae7c8a8..a8279a598c4b 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -1126,7 +1126,7 @@ i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj,
 
 	lockdep_assert_held(&obj->base.dev->struct_mutex);
 
-	active_mask = obj->active;
+	active_mask = i915_gem_object_is_active(obj);
 	if (!active_mask)
 		return 0;
 
@@ -1165,7 +1165,7 @@ i915_gem_object_wait_rendering__nonblocking(struct drm_i915_gem_object *obj,
 	BUG_ON(!mutex_is_locked(&dev->struct_mutex));
 	BUG_ON(!dev_priv->mm.interruptible);
 
-	active_mask = obj->active;
+	active_mask = i915_gem_object_is_active(obj);
 	if (!active_mask)
 		return 0;
 
@@ -2109,10 +2109,10 @@ i915_gem_object_retire__read(struct i915_gem_active *active,
 	struct drm_i915_gem_object *obj =
 		container_of(active, struct drm_i915_gem_object, last_read[ring]);
 
-	GEM_BUG_ON((obj->active & (1 << ring)) == 0);
+	GEM_BUG_ON(!i915_gem_object_has_active_engine(obj, ring));
 
-	obj->active &= ~(1 << ring);
-	if (obj->active)
+	i915_gem_object_unset_active(obj, ring);
+	if (i915_gem_object_is_active(obj))
 		return;
 
 	/* Bump our place on the bound list to keep it roughly in LRU order
@@ -2383,7 +2383,7 @@ i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
 		return -ENOENT;
 	}
 
-	if (!obj->active)
+	if (!i915_gem_object_is_active(obj))
 		goto out;
 
 	for (i = 0; i < I915_NUM_ENGINES; i++) {
@@ -2472,7 +2472,7 @@ i915_gem_object_sync(struct drm_i915_gem_object *obj,
 
 	lockdep_assert_held(&obj->base.dev->struct_mutex);
 
-	active_mask = obj->active;
+	active_mask = i915_gem_object_is_active(obj);
 	if (!active_mask)
 		return 0;
 
@@ -3516,7 +3516,7 @@ i915_gem_busy_ioctl(struct drm_device *dev, void *data,
 	 * become non-busy without any further actions.
 	 */
 	args->busy = 0;
-	if (obj->active) {
+	if (i915_gem_object_is_active(obj)) {
 		struct drm_i915_gem_request *req;
 		int i;
 
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 69bf73b51df9..224265619f00 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -432,7 +432,7 @@ relocate_entry_clflush(struct drm_i915_gem_object *obj,
 
 static bool object_is_idle(struct drm_i915_gem_object *obj)
 {
-	unsigned long active = obj->active;
+	unsigned long active = i915_gem_object_is_active(obj);
 	int idx;
 
 	for_each_active(active, idx) {
@@ -991,7 +991,7 @@ static int
 i915_gem_execbuffer_move_to_gpu(struct drm_i915_gem_request *req,
 				struct list_head *vmas)
 {
-	const unsigned other_rings = ~intel_engine_flag(req->engine);
+	const unsigned other_rings = (~intel_engine_flag(req->engine) & I915_BO_ACTIVE_MASK) << I915_BO_ACTIVE_SHIFT;
 	struct i915_vma *vma;
 	uint32_t flush_domains = 0;
 	bool flush_chipset = false;
@@ -1000,7 +1000,7 @@ i915_gem_execbuffer_move_to_gpu(struct drm_i915_gem_request *req,
 	list_for_each_entry(vma, vmas, exec_list) {
 		struct drm_i915_gem_object *obj = vma->obj;
 
-		if (obj->active & other_rings) {
+		if (obj->flags & other_rings) {
 			ret = i915_gem_object_sync(obj, req);
 			if (ret)
 				return ret;
@@ -1159,9 +1159,9 @@ void i915_vma_move_to_active(struct i915_vma *vma,
 	 * add the active reference first and queue for it to be dropped
 	 * *last*.
 	 */
-	if (obj->active == 0)
+	if (!i915_gem_object_is_active(obj))
 		i915_gem_object_get(obj);
-	obj->active |= 1 << idx;
+	i915_gem_object_set_active(obj, idx);
 	i915_gem_active_set(&obj->last_read[idx], req);
 
 	if (flags & EXEC_OBJECT_WRITE) {
diff --git a/drivers/gpu/drm/i915/i915_gem_shrinker.c b/drivers/gpu/drm/i915/i915_gem_shrinker.c
index 71ad58836f48..5cbc4ee52c6d 100644
--- a/drivers/gpu/drm/i915/i915_gem_shrinker.c
+++ b/drivers/gpu/drm/i915/i915_gem_shrinker.c
@@ -168,7 +168,8 @@ i915_gem_shrink(struct drm_i915_private *dev_priv,
 			    !is_vmalloc_addr(obj->mapping))
 				continue;
 
-			if ((flags & I915_SHRINK_ACTIVE) == 0 && obj->active)
+			if ((flags & I915_SHRINK_ACTIVE) == 0 &&
+			    i915_gem_object_is_active(obj))
 				continue;
 
 			if (!can_release_pages(obj))
@@ -253,7 +254,7 @@ i915_gem_shrinker_count(struct shrinker *shrinker, struct shrink_control *sc)
 			count += obj->base.size >> PAGE_SHIFT;
 
 	list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) {
-		if (!obj->active && can_release_pages(obj))
+		if (!i915_gem_object_is_active(obj) && can_release_pages(obj))
 			count += obj->base.size >> PAGE_SHIFT;
 	}
 
diff --git a/drivers/gpu/drm/i915/i915_gem_userptr.c b/drivers/gpu/drm/i915/i915_gem_userptr.c
index e57521dbddc6..221792632290 100644
--- a/drivers/gpu/drm/i915/i915_gem_userptr.c
+++ b/drivers/gpu/drm/i915/i915_gem_userptr.c
@@ -67,7 +67,7 @@ static void wait_rendering(struct drm_i915_gem_object *obj)
 	struct drm_i915_gem_request *requests[I915_NUM_ENGINES];
 	int i, n;
 
-	if (!obj->active)
+	if (!i915_gem_object_is_active(obj))
 		return;
 
 	n = 0;
-- 
2.8.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 58+ messages in thread

* [PATCH 14/38] drm/i915: Move i915_gem_object_wait_rendering()
  2016-06-03 16:55 Tracking VMA Chris Wilson
                   ` (12 preceding siblings ...)
  2016-06-03 16:55 ` [PATCH 13/38] drm/i915: Move obj->active:5 to obj->flags Chris Wilson
@ 2016-06-03 16:55 ` Chris Wilson
  2016-06-03 16:55 ` [PATCH 15/38] drm/i915: Mark all current requests as complete before resetting them Chris Wilson
                   ` (24 subsequent siblings)
  38 siblings, 0 replies; 58+ messages in thread
From: Chris Wilson @ 2016-06-03 16:55 UTC (permalink / raw)
  To: intel-gfx

Just move it earlier so that we can use the companion nonblocking
version in a couple of more callsites without having to add a forward
declaration.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_gem.c | 182 ++++++++++++++++++++--------------------
 1 file changed, 91 insertions(+), 91 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index a8279a598c4b..93a874b0ba14 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -278,6 +278,97 @@ i915_gem_object_unbind(struct drm_i915_gem_object *obj)
 	return ret;
 }
 
+/**
+ * Ensures that all rendering to the object has completed and the object is
+ * safe to unbind from the GTT or access from the CPU.
+ */
+int
+i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj,
+			       bool readonly)
+{
+	struct i915_gem_active *active;
+	unsigned long active_mask;
+	int idx;
+
+	lockdep_assert_held(&obj->base.dev->struct_mutex);
+
+	active_mask = i915_gem_object_is_active(obj);
+	if (!active_mask)
+		return 0;
+
+	if (!readonly) {
+		active = obj->last_read;
+	} else {
+		active_mask = 1;
+		active = &obj->last_write;
+	}
+
+	for_each_active(active_mask, idx) {
+		int ret = i915_gem_active_wait(&active[idx],
+					       &obj->base.dev->struct_mutex);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
+
+/* A nonblocking variant of the above wait. This is a highly dangerous routine
+ * as the object state may change during this call.
+ */
+static __must_check int
+i915_gem_object_wait_rendering__nonblocking(struct drm_i915_gem_object *obj,
+					    struct intel_rps_client *rps,
+					    bool readonly)
+{
+	struct drm_device *dev = obj->base.dev;
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	struct drm_i915_gem_request *requests[I915_NUM_ENGINES];
+	struct i915_gem_active *active;
+	unsigned long active_mask;
+	int ret, i, n = 0;
+
+	BUG_ON(!mutex_is_locked(&dev->struct_mutex));
+	BUG_ON(!dev_priv->mm.interruptible);
+
+	active_mask = i915_gem_object_is_active(obj);
+	if (!active_mask)
+		return 0;
+
+	if (!readonly) {
+		active = obj->last_read;
+	} else {
+		active_mask = 1;
+		active = &obj->last_write;
+	}
+
+	for_each_active(active_mask, i) {
+		struct drm_i915_gem_request *req;
+
+		req = i915_gem_active_get(&active[i],
+					  &obj->base.dev->struct_mutex);
+		if (req)
+			requests[n++] = req;
+	}
+
+	mutex_unlock(&dev->struct_mutex);
+	ret = 0;
+	for (i = 0; ret == 0 && i < n; i++)
+		ret = __i915_wait_request(requests[i], true, NULL, rps);
+	mutex_lock(&dev->struct_mutex);
+
+	for (i = 0; i < n; i++)
+		i915_gem_request_put(requests[i]);
+
+	return ret;
+}
+
+static struct intel_rps_client *to_rps_client(struct drm_file *file)
+{
+	struct drm_i915_file_private *fpriv = file->driver_priv;
+	return &fpriv->rps;
+}
+
 int
 i915_gem_object_attach_phys(struct drm_i915_gem_object *obj,
 			    int align)
@@ -1113,97 +1204,6 @@ put_rpm:
 }
 
 /**
- * Ensures that all rendering to the object has completed and the object is
- * safe to unbind from the GTT or access from the CPU.
- */
-int
-i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj,
-			       bool readonly)
-{
-	struct i915_gem_active *active;
-	unsigned long active_mask;
-	int idx;
-
-	lockdep_assert_held(&obj->base.dev->struct_mutex);
-
-	active_mask = i915_gem_object_is_active(obj);
-	if (!active_mask)
-		return 0;
-
-	if (!readonly) {
-		active = obj->last_read;
-	} else {
-		active_mask = 1;
-		active = &obj->last_write;
-	}
-
-	for_each_active(active_mask, idx) {
-		int ret = i915_gem_active_wait(&active[idx],
-					       &obj->base.dev->struct_mutex);
-		if (ret)
-			return ret;
-	}
-
-	return 0;
-}
-
-/* A nonblocking variant of the above wait. This is a highly dangerous routine
- * as the object state may change during this call.
- */
-static __must_check int
-i915_gem_object_wait_rendering__nonblocking(struct drm_i915_gem_object *obj,
-					    struct intel_rps_client *rps,
-					    bool readonly)
-{
-	struct drm_device *dev = obj->base.dev;
-	struct drm_i915_private *dev_priv = dev->dev_private;
-	struct drm_i915_gem_request *requests[I915_NUM_ENGINES];
-	struct i915_gem_active *active;
-	unsigned long active_mask;
-	int ret, i, n = 0;
-
-	BUG_ON(!mutex_is_locked(&dev->struct_mutex));
-	BUG_ON(!dev_priv->mm.interruptible);
-
-	active_mask = i915_gem_object_is_active(obj);
-	if (!active_mask)
-		return 0;
-
-	if (!readonly) {
-		active = obj->last_read;
-	} else {
-		active_mask = 1;
-		active = &obj->last_write;
-	}
-
-	for_each_active(active_mask, i) {
-		struct drm_i915_gem_request *req;
-
-		req = i915_gem_active_get(&active[i],
-					  &obj->base.dev->struct_mutex);
-		if (req)
-			requests[n++] = req;
-	}
-
-	mutex_unlock(&dev->struct_mutex);
-	ret = 0;
-	for (i = 0; ret == 0 && i < n; i++)
-		ret = __i915_wait_request(requests[i], true, NULL, rps);
-	mutex_lock(&dev->struct_mutex);
-
-	for (i = 0; i < n; i++)
-		i915_gem_request_put(requests[i]);
-
-	return ret;
-}
-
-static struct intel_rps_client *to_rps_client(struct drm_file *file)
-{
-	struct drm_i915_file_private *fpriv = file->driver_priv;
-	return &fpriv->rps;
-}
-
-/**
  * Called when user space prepares to use an object with the CPU, either
  * through the mmap ioctl's mapping or a GTT mapping.
  */
-- 
2.8.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 58+ messages in thread

* [PATCH 15/38] drm/i915: Mark all current requests as complete before resetting them
  2016-06-03 16:55 Tracking VMA Chris Wilson
                   ` (13 preceding siblings ...)
  2016-06-03 16:55 ` [PATCH 14/38] drm/i915: Move i915_gem_object_wait_rendering() Chris Wilson
@ 2016-06-03 16:55 ` Chris Wilson
  2016-06-03 16:55 ` [PATCH 16/38] drm/i915: Enable lockless lookup of request tracking via RCU Chris Wilson
                   ` (23 subsequent siblings)
  38 siblings, 0 replies; 58+ messages in thread
From: Chris Wilson @ 2016-06-03 16:55 UTC (permalink / raw)
  To: intel-gfx

Following a GPU reset upon hang, we retire all the requests and then
mark them all as complete. If we mark them as complete first, we both
keep the normal retirement order (completed first then retired) and
provide a small optimisation for concurrent lookups.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_gem.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 93a874b0ba14..f6f039aad6e2 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2200,6 +2200,12 @@ static void i915_gem_reset_engine_cleanup(struct intel_engine_cs *engine)
 {
 	struct intel_ring *ring;
 
+	/* Mark all pending requests as complete so that any concurrent
+	 * (lockless) lookup doesn't try and wait upon the request as we
+	 * reset it.
+	 */
+	intel_engine_init_seqno(engine, engine->last_submitted_seqno);
+
 	/*
 	 * Clear the execlists queue up before freeing the requests, as those
 	 * are the ones that keep the context and ringbuffer backing objects
@@ -2241,8 +2247,6 @@ static void i915_gem_reset_engine_cleanup(struct intel_engine_cs *engine)
 		ring->last_retired_head = ring->tail;
 		intel_ring_update_space(ring);
 	}
-
-	intel_engine_init_seqno(engine, engine->last_submitted_seqno);
 }
 
 void i915_gem_reset(struct drm_device *dev)
-- 
2.8.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 58+ messages in thread

* [PATCH 16/38] drm/i915: Enable lockless lookup of request tracking via RCU
  2016-06-03 16:55 Tracking VMA Chris Wilson
                   ` (14 preceding siblings ...)
  2016-06-03 16:55 ` [PATCH 15/38] drm/i915: Mark all current requests as complete before resetting them Chris Wilson
@ 2016-06-03 16:55 ` Chris Wilson
  2016-06-03 16:55 ` [PATCH 17/38] drm/i915: Introduce i915_gem_active_wait_unlocked() Chris Wilson
                   ` (22 subsequent siblings)
  38 siblings, 0 replies; 58+ messages in thread
From: Chris Wilson @ 2016-06-03 16:55 UTC (permalink / raw)
  To: intel-gfx; +Cc: Goel, Akash, Josh Triplett

If we enable RCU for the requests (providing a grace period where we can
inspect a "dead" request before it is freed), we can allow callers to
carefully perform lockless lookup of an active request.

However, by enabling deferred freeing of requests, we can potentially
hog a lot of memory when dealing with tens of thousands of requests per
second - with a quick insertion of a synchronize_rcu() inside our
shrinker callback, that issue disappears.

v2: Currently, it is our responsibility to handle reclaim i.e. to avoid
hogging memory with the delayed slab frees. At the moment, we wait for a
grace period in the shrinker, and block for all RCU callbacks on oom.
Suggested alternatives focus on flushing our RCU callback when we have a
certain number of outstanding request frees, and blocking on that flush
after a second high watermark. (So rather than wait for the system to
run out of memory, we stop issuing requests - both are nondeterministic.)

Paul E. McKenney wrote:

Another approach is synchronize_rcu() after some largish number of
requests.  The advantage of this approach is that it throttles the
production of callbacks at the source.  The corresponding disadvantage
is that it slows things up.

Another approach is to use call_rcu(), but if the previous call_rcu()
is still in flight, block waiting for it.  Yet another approach is
the get_state_synchronize_rcu() / cond_synchronize_rcu() pair.  The
idea is to do something like this:

        cond_synchronize_rcu(cookie);
        cookie = get_state_synchronize_rcu();

You would of course do an initial get_state_synchronize_rcu() to
get things going.  This would not block unless there was less than
one grace period's worth of time between invocations.  But this
assumes a busy system, where there is almost always a grace period
in flight.  But you can make that happen as follows:

        cond_synchronize_rcu(cookie);
        cookie = get_state_synchronize_rcu();
        call_rcu(&my_rcu_head, noop_function);

Note that you need additional code to make sure that the old callback
has completed before doing a new one.  Setting and clearing a flag
with appropriate memory ordering control suffices (e.g,. smp_load_acquire()
and smp_store_release()).

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
Cc: "Goel, Akash" <akash.goel@intel.com>
Cc: Josh Triplett <josh@joshtriplett.org>
---
 drivers/gpu/drm/i915/i915_gem.c          |   7 +-
 drivers/gpu/drm/i915/i915_gem_request.c  |   2 +-
 drivers/gpu/drm/i915/i915_gem_request.h  | 110 ++++++++++++++++++++++++++-----
 drivers/gpu/drm/i915/i915_gem_shrinker.c |  15 +++--
 4 files changed, 113 insertions(+), 21 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index f6f039aad6e2..4c0e3632214f 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -4158,7 +4158,9 @@ i915_gem_load_init(struct drm_device *dev)
 	dev_priv->requests =
 		kmem_cache_create("i915_gem_request",
 				  sizeof(struct drm_i915_gem_request), 0,
-				  SLAB_HWCACHE_ALIGN,
+				  SLAB_HWCACHE_ALIGN |
+				  SLAB_RECLAIM_ACCOUNT |
+				  SLAB_DESTROY_BY_RCU,
 				  NULL);
 
 	INIT_LIST_HEAD(&dev_priv->context_list);
@@ -4194,6 +4196,9 @@ void i915_gem_load_cleanup(struct drm_device *dev)
 	kmem_cache_destroy(dev_priv->requests);
 	kmem_cache_destroy(dev_priv->vmas);
 	kmem_cache_destroy(dev_priv->objects);
+
+	/* And ensure that our DESTROY_BY_RCU slabs are truly destroyed */
+	rcu_barrier();
 }
 
 int i915_gem_freeze_late(struct drm_i915_private *dev_priv)
diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c
index 59afc8e547c4..a0cdd3f10566 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.c
+++ b/drivers/gpu/drm/i915/i915_gem_request.c
@@ -344,7 +344,7 @@ static void i915_gem_request_retire(struct drm_i915_gem_request *request)
 		prefetchw(next);
 
 		INIT_LIST_HEAD(&active->link);
-		active->__request = NULL;
+		RCU_INIT_POINTER(active->__request, NULL);
 
 		active->retire(active, request);
 	}
diff --git a/drivers/gpu/drm/i915/i915_gem_request.h b/drivers/gpu/drm/i915/i915_gem_request.h
index e794801baf07..6aa246848894 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.h
+++ b/drivers/gpu/drm/i915/i915_gem_request.h
@@ -178,6 +178,12 @@ i915_gem_request_get(struct drm_i915_gem_request *req)
 	return to_request(fence_get(&req->fence));
 }
 
+static inline struct drm_i915_gem_request *
+i915_gem_request_get_rcu(struct drm_i915_gem_request *req)
+{
+	return to_request(fence_get_rcu(&req->fence));
+}
+
 static inline void
 i915_gem_request_put(struct drm_i915_gem_request *req)
 {
@@ -276,21 +282,12 @@ static inline bool i915_spin_request(const struct drm_i915_gem_request *request,
  * resource including itself.
  */
 struct i915_gem_active {
-	struct drm_i915_gem_request *__request;
+	struct drm_i915_gem_request __rcu *__request;
 	struct list_head link;
 	void (*retire)(struct i915_gem_active *,
 		       struct drm_i915_gem_request *);
 };
 
-/**
- * i915_gem_active_set - updates the tracker to watch the current request
- * @active - the active tracker
- * @request - the request to watch
- *
- * i915_gem_active_set() watches the given @request for completion. Whilst
- * that @request is busy, the @active reports busy. When that @request is
- * retired, the @active tracker is updated to report idle.
- */
 static inline void
 init_request_active(struct i915_gem_active *active,
 		    void (*func)(struct i915_gem_active *,
@@ -300,18 +297,33 @@ init_request_active(struct i915_gem_active *active,
 	active->retire = func;
 }
 
+/**
+ * i915_gem_active_set - updates the tracker to watch the current request
+ * @active - the active tracker
+ * @request - the request to watch
+ *
+ * i915_gem_active_set() watches the given @request for completion. Whilst
+ * that @request is busy, the @active reports busy. When that @request is
+ * retired, the @active tracker is updated to report idle.
+ */
 static inline void
 i915_gem_active_set(struct i915_gem_active *active,
 		    struct drm_i915_gem_request *request)
 {
 	list_move(&active->link, &request->active_list);
-	active->__request = request;
+	rcu_assign_pointer(active->__request, request);
 }
 
 static inline struct drm_i915_gem_request *
 __i915_gem_active_peek(const struct i915_gem_active *active)
 {
-	return active->__request;
+	/* Inside the error capture (running with the driver in an unknown
+	 * state), we want to bend the rules slightly (a lot).
+	 *
+	 * Work is in progress to make it safer, in the meantime this keeps
+	 * the known issue from spamming the logs.
+	 */
+	return rcu_dereference_protected(active->__request, 1);
 }
 
 /**
@@ -326,8 +338,9 @@ static inline struct drm_i915_gem_request *
 i915_gem_active_peek(const struct i915_gem_active *active, struct mutex *mutex)
 {
 	struct drm_i915_gem_request *request;
-       
-	request = active->__request;
+
+	request =  rcu_dereference_protected(active->__request,
+					     lockdep_is_held(mutex));
 	if (!request || i915_gem_request_completed(request))
 		return NULL;
 
@@ -348,6 +361,72 @@ i915_gem_active_get(const struct i915_gem_active *active, struct mutex *mutex)
 }
 
 /**
+ * i915_gem_active_get_rcu - return a reference to the active request
+ * @active - the active tracker
+ *
+ * i915_gem_active_get() returns a reference to the active request, or NULL
+ * if the active tracker is idle. The caller must hold the RCU read lock.
+ */
+static inline struct drm_i915_gem_request *
+i915_gem_active_get_rcu(const struct i915_gem_active *active)
+{
+	/* Performing a lockless retrieval of the active request is super
+	 * tricky. SLAB_DESTROY_BY_RCU merely guarantees that the backing
+	 * slab of request objects will not be freed whilst we hold the
+	 * RCU read lock. It does not guarantee that the request itself
+	 * will not be freed and then *reused*. Viz,
+	 *
+	 * Thread A			Thread B
+	 *
+	 * req = active.request
+	 * 				retire(req) -> free(req);
+	 * 				(req is now first on the slab freelist)
+	 * 				active.request = NULL
+	 *
+	 * 				req = new submission on a new object
+	 * ref(req)
+	 *
+	 * To prevent the request from being reused whilst the caller
+	 * uses it, we take a reference like normal. Whilst acquiring
+	 * the reference we check that it is not in a destroyed state
+	 * (refcnt == 0). That prevents the request being reallocated
+	 * whilst the caller holds on to it. To check that the request
+	 * was not reallocated as we acquired the reference we have to
+	 * check that our request remains the active request across
+	 * the lookup, in the same manner as a seqlock. The visibility
+	 * of the pointer versus the reference counting is controlled
+	 * by using RCU barriers (rcu_dereference and rcu_assign_pointer).
+	 *
+	 * In the middle of all that, we inspect whether the request is
+	 * complete. Retiring is lazy so the request may be completed long
+	 * before the active tracker is updated. Querying whether the
+	 * request is complete is far cheaper (as it involves no locked
+	 * instructions setting cachelines to exclusive) than acquiring
+	 * the reference, so we do it first. The RCU read lock ensures the
+	 * pointer dereference is valid, but does not ensure that the
+	 * seqno nor HWS is the right one! However, if the request was
+	 * reallocated, that means the active tracker's request was complete.
+	 * If the new request is also complete, then both are and we can
+	 * just report the active tracker is idle. If the new request is
+	 * incomplete, then we acquire a reference on it and check that
+	 * it remained the active request.
+	 */
+	do {
+		struct drm_i915_gem_request *request;
+
+		request = rcu_dereference(active->__request);
+		if (!request || i915_gem_request_completed(request))
+			return NULL;
+
+		request = i915_gem_request_get_rcu(request);
+		if (!request || request == rcu_dereference(active->__request))
+			return request;
+
+		i915_gem_request_put(request);
+	} while (1);
+}
+
+/**
  * __i915_gem_active_is_busy - report whether the active tracker is assigned
  * @active - the active tracker
  *
@@ -411,7 +490,8 @@ i915_gem_active_retire(const struct i915_gem_active *active,
 {
 	struct drm_i915_gem_request *request;
 
-	request = active->__request;
+	request =  rcu_dereference_protected(active->__request,
+					     lockdep_is_held(mutex));
 	if (!request)
 		return 0;
 
diff --git a/drivers/gpu/drm/i915/i915_gem_shrinker.c b/drivers/gpu/drm/i915/i915_gem_shrinker.c
index 5cbc4ee52c6d..6eea4abeb9ce 100644
--- a/drivers/gpu/drm/i915/i915_gem_shrinker.c
+++ b/drivers/gpu/drm/i915/i915_gem_shrinker.c
@@ -191,6 +191,8 @@ i915_gem_shrink(struct drm_i915_private *dev_priv,
 		intel_runtime_pm_put(dev_priv);
 
 	i915_gem_retire_requests(dev_priv);
+	/* expedite the RCU grace period to free some request slabs */
+	synchronize_rcu_expedited();
 
 	return count;
 }
@@ -211,10 +213,15 @@ i915_gem_shrink(struct drm_i915_private *dev_priv,
  */
 unsigned long i915_gem_shrink_all(struct drm_i915_private *dev_priv)
 {
-	return i915_gem_shrink(dev_priv, -1UL,
-			       I915_SHRINK_BOUND |
-			       I915_SHRINK_UNBOUND |
-			       I915_SHRINK_ACTIVE);
+	unsigned long freed;
+
+	freed = i915_gem_shrink(dev_priv, -1UL,
+				I915_SHRINK_BOUND |
+				I915_SHRINK_UNBOUND |
+				I915_SHRINK_ACTIVE);
+	rcu_barrier(); /* wait until our RCU delayed slab frees are completed */
+
+	return freed;
 }
 
 static bool i915_gem_shrinker_lock(struct drm_device *dev, bool *unlock)
-- 
2.8.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 58+ messages in thread

* [PATCH 17/38] drm/i915: Introduce i915_gem_active_wait_unlocked()
  2016-06-03 16:55 Tracking VMA Chris Wilson
                   ` (15 preceding siblings ...)
  2016-06-03 16:55 ` [PATCH 16/38] drm/i915: Enable lockless lookup of request tracking via RCU Chris Wilson
@ 2016-06-03 16:55 ` Chris Wilson
  2016-06-03 16:55 ` [PATCH 18/38] drm/i915: Convert non-blocking waits for requests over to using RCU Chris Wilson
                   ` (21 subsequent siblings)
  38 siblings, 0 replies; 58+ messages in thread
From: Chris Wilson @ 2016-06-03 16:55 UTC (permalink / raw)
  To: intel-gfx

It is useful to be able to wait on pending rendering without grabbing
the struct_mutex. We can do this by using the i915_gem_active_get_rcu()
primitive to acquire a reference to the pending request without
requiring struct_mutex, just the RCU read lock, and then call
__i915_wait_request().

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_gem_request.h | 21 +++++++++++++++++++++
 1 file changed, 21 insertions(+)

diff --git a/drivers/gpu/drm/i915/i915_gem_request.h b/drivers/gpu/drm/i915/i915_gem_request.h
index 6aa246848894..006f212b7fd6 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.h
+++ b/drivers/gpu/drm/i915/i915_gem_request.h
@@ -476,6 +476,27 @@ i915_gem_active_wait(const struct i915_gem_active *active, struct mutex *mutex)
 	return i915_wait_request(request);
 }
 
+static inline int
+i915_gem_active_wait_unlocked(const struct i915_gem_active *active,
+			      bool interruptible,
+			      s64 *timeout,
+			      struct intel_rps_client *rps)
+{
+	struct drm_i915_gem_request *request;
+	int ret = 0;
+
+	rcu_read_lock();
+	request = i915_gem_active_get_rcu(active);
+	rcu_read_unlock();
+
+	if (request) {
+		ret = __i915_wait_request(request, interruptible, timeout, rps);
+		i915_gem_request_put(request);
+	}
+
+	return ret;
+}
+
 /**
  * i915_gem_active_retire - waits until the request is retired
  * @active - the active request on which to wait
-- 
2.8.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 58+ messages in thread

* [PATCH 18/38] drm/i915: Convert non-blocking waits for requests over to using RCU
  2016-06-03 16:55 Tracking VMA Chris Wilson
                   ` (16 preceding siblings ...)
  2016-06-03 16:55 ` [PATCH 17/38] drm/i915: Introduce i915_gem_active_wait_unlocked() Chris Wilson
@ 2016-06-03 16:55 ` Chris Wilson
  2016-06-03 16:55 ` [PATCH 19/38] drm/i915: Convert non-blocking userptr " Chris Wilson
                   ` (20 subsequent siblings)
  38 siblings, 0 replies; 58+ messages in thread
From: Chris Wilson @ 2016-06-03 16:55 UTC (permalink / raw)
  To: intel-gfx

We can completely avoid taking the struct_mutex around the non-blocking
waits by switching over to the RCU request management (trading the mutex
for a RCU read lock and some complex atomic operations). The improvement
is that we gain further contention reduction, and overall the code
become simpler due to the reduced mutex dancing.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_gem.c | 113 +++++++++++++++++-----------------------
 1 file changed, 47 insertions(+), 66 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 4c0e3632214f..76e5a241c7be 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -313,25 +313,20 @@ i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj,
 	return 0;
 }
 
-/* A nonblocking variant of the above wait. This is a highly dangerous routine
- * as the object state may change during this call.
+/* A nonblocking variant of the above wait. Must be called prior to
+ * acquiring the mutex for the object, as the object state may change
+ * during this call. A reference must be held by the caller for the object.
  */
 static __must_check int
-i915_gem_object_wait_rendering__nonblocking(struct drm_i915_gem_object *obj,
-					    struct intel_rps_client *rps,
-					    bool readonly)
+__unsafe_wait_rendering(struct drm_i915_gem_object *obj,
+			struct intel_rps_client *rps,
+			bool readonly)
 {
-	struct drm_device *dev = obj->base.dev;
-	struct drm_i915_private *dev_priv = dev->dev_private;
-	struct drm_i915_gem_request *requests[I915_NUM_ENGINES];
 	struct i915_gem_active *active;
 	unsigned long active_mask;
-	int ret, i, n = 0;
-
-	BUG_ON(!mutex_is_locked(&dev->struct_mutex));
-	BUG_ON(!dev_priv->mm.interruptible);
+	int idx;
 
-	active_mask = i915_gem_object_is_active(obj);
+	active_mask = __I915_BO_ACTIVE(obj);
 	if (!active_mask)
 		return 0;
 
@@ -342,25 +337,16 @@ i915_gem_object_wait_rendering__nonblocking(struct drm_i915_gem_object *obj,
 		active = &obj->last_write;
 	}
 
-	for_each_active(active_mask, i) {
-		struct drm_i915_gem_request *req;
+	for_each_active(active_mask, idx) {
+		int ret;
 
-		req = i915_gem_active_get(&active[i],
-					  &obj->base.dev->struct_mutex);
-		if (req)
-			requests[n++] = req;
+		ret = i915_gem_active_wait_unlocked(&active[idx],
+						    true, NULL, rps);
+		if (ret)
+			return ret;
 	}
 
-	mutex_unlock(&dev->struct_mutex);
-	ret = 0;
-	for (i = 0; ret == 0 && i < n; i++)
-		ret = __i915_wait_request(requests[i], true, NULL, rps);
-	mutex_lock(&dev->struct_mutex);
-
-	for (i = 0; i < n; i++)
-		i915_gem_request_put(requests[i]);
-
-	return ret;
+	return 0;
 }
 
 static struct intel_rps_client *to_rps_client(struct drm_file *file)
@@ -1218,10 +1204,7 @@ i915_gem_set_domain_ioctl(struct drm_device *dev, void *data,
 	int ret;
 
 	/* Only handle setting domains to types used by the CPU. */
-	if (write_domain & I915_GEM_GPU_DOMAINS)
-		return -EINVAL;
-
-	if (read_domains & I915_GEM_GPU_DOMAINS)
+	if ((write_domain | read_domains) & I915_GEM_GPU_DOMAINS)
 		return -EINVAL;
 
 	/* Having something in the write domain implies it's in the read
@@ -1230,25 +1213,21 @@ i915_gem_set_domain_ioctl(struct drm_device *dev, void *data,
 	if (write_domain != 0 && read_domains != write_domain)
 		return -EINVAL;
 
-	ret = i915_mutex_lock_interruptible(dev);
-	if (ret)
-		return ret;
-
 	obj = i915_gem_object_lookup(file, args->handle);
-	if (!obj) {
-		ret = -ENOENT;
-		goto unlock;
-	}
+	if (!obj)
+		return -ENOENT;
 
 	/* Try to flush the object off the GPU without holding the lock.
 	 * We will repeat the flush holding the lock in the normal manner
 	 * to catch cases where we are gazumped.
 	 */
-	ret = i915_gem_object_wait_rendering__nonblocking(obj,
-							  to_rps_client(file),
-							  !write_domain);
+	ret = __unsafe_wait_rendering(obj, to_rps_client(file), !write_domain);
+	if (ret)
+		goto out_unlocked;
+
+	ret = i915_mutex_lock_interruptible(dev);
 	if (ret)
-		goto unref;
+		goto out_unlocked;
 
 	if (read_domains & I915_GEM_DOMAIN_GTT)
 		ret = i915_gem_object_set_to_gtt_domain(obj, write_domain != 0);
@@ -1260,11 +1239,13 @@ i915_gem_set_domain_ioctl(struct drm_device *dev, void *data,
 					write_domain == I915_GEM_DOMAIN_GTT ?
 					ORIGIN_GTT : ORIGIN_CPU);
 
-unref:
 	i915_gem_object_put(obj);
-unlock:
 	mutex_unlock(&dev->struct_mutex);
 	return ret;
+
+out_unlocked:
+	i915_gem_object_put_unlocked(obj);
+	return ret;
 }
 
 /**
@@ -1397,6 +1378,15 @@ int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 	int ret = 0;
 	bool write = !!(vmf->flags & FAULT_FLAG_WRITE);
 
+	/* Try to flush the object off the GPU first without holding the lock.
+	 * Upon acquiring the lock, we will perform our sanity checks and then
+	 * repeat the flush holding the lock in the normal manner to catch cases
+	 * where we are gazumped.
+	 */
+	ret = __unsafe_wait_rendering(obj, NULL, !write);
+	if (ret)
+		goto err;
+
 	intel_runtime_pm_get(dev_priv);
 
 	/* We don't use vmf->pgoff since that has the fake offset */
@@ -1405,23 +1395,14 @@ int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 
 	ret = i915_mutex_lock_interruptible(dev);
 	if (ret)
-		goto out;
+		goto err_rpm;
 
 	trace_i915_gem_object_fault(obj, page_offset, true, write);
 
-	/* Try to flush the object off the GPU first without holding the lock.
-	 * Upon reacquiring the lock, we will perform our sanity checks and then
-	 * repeat the flush holding the lock in the normal manner to catch cases
-	 * where we are gazumped.
-	 */
-	ret = i915_gem_object_wait_rendering__nonblocking(obj, NULL, !write);
-	if (ret)
-		goto unlock;
-
 	/* Access to snoopable pages through the GTT is incoherent. */
 	if (obj->cache_level != I915_CACHE_NONE && !HAS_LLC(dev)) {
 		ret = -EFAULT;
-		goto unlock;
+		goto err_unlock;
 	}
 
 	/* Use a partial view if the object is bigger than the aperture. */
@@ -1442,15 +1423,15 @@ int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 	/* Now pin it into the GTT if needed */
 	ret = i915_gem_object_ggtt_pin(obj, &view, 0, 0, PIN_MAPPABLE);
 	if (ret)
-		goto unlock;
+		goto err_unlock;
 
 	ret = i915_gem_object_set_to_gtt_domain(obj, write);
 	if (ret)
-		goto unpin;
+		goto err_unpin;
 
 	ret = i915_gem_object_get_fence(obj);
 	if (ret)
-		goto unpin;
+		goto err_unpin;
 
 	/* Finally, remap it using the new GTT offset */
 	pfn = ggtt->mappable_base +
@@ -1495,11 +1476,13 @@ int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 					    (unsigned long)vmf->virtual_address,
 					    pfn + page_offset);
 	}
-unpin:
+err_unpin:
 	i915_gem_object_ggtt_unpin_view(obj, &view);
-unlock:
+err_unlock:
 	mutex_unlock(&dev->struct_mutex);
-out:
+err_rpm:
+	intel_runtime_pm_put(dev_priv);
+err:
 	switch (ret) {
 	case -EIO:
 		/*
@@ -1540,8 +1523,6 @@ out:
 		ret = VM_FAULT_SIGBUS;
 		break;
 	}
-
-	intel_runtime_pm_put(dev_priv);
 	return ret;
 }
 
-- 
2.8.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 58+ messages in thread

* [PATCH 19/38] drm/i915: Convert non-blocking userptr waits for requests over to using RCU
  2016-06-03 16:55 Tracking VMA Chris Wilson
                   ` (17 preceding siblings ...)
  2016-06-03 16:55 ` [PATCH 18/38] drm/i915: Convert non-blocking waits for requests over to using RCU Chris Wilson
@ 2016-06-03 16:55 ` Chris Wilson
  2016-06-03 16:55 ` [PATCH 20/38] drm/i915/userptr: Remove superfluous interruptible=false on waiting Chris Wilson
                   ` (19 subsequent siblings)
  38 siblings, 0 replies; 58+ messages in thread
From: Chris Wilson @ 2016-06-03 16:55 UTC (permalink / raw)
  To: intel-gfx

We can completely avoid taking the struct_mutex around the non-blocking
waits by switching over to the RCU request management (trading the mutex
for a RCU read lock and some complex atomic operations). The improvement
is that we gain further contention reduction, and overall the code
become simpler due to the reduced mutex dancing.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_gem_userptr.c | 34 +++++++--------------------------
 1 file changed, 7 insertions(+), 27 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_userptr.c b/drivers/gpu/drm/i915/i915_gem_userptr.c
index 221792632290..96ab6161903a 100644
--- a/drivers/gpu/drm/i915/i915_gem_userptr.c
+++ b/drivers/gpu/drm/i915/i915_gem_userptr.c
@@ -63,32 +63,12 @@ struct i915_mmu_object {
 
 static void wait_rendering(struct drm_i915_gem_object *obj)
 {
-	struct drm_device *dev = obj->base.dev;
-	struct drm_i915_gem_request *requests[I915_NUM_ENGINES];
-	int i, n;
-
-	if (!i915_gem_object_is_active(obj))
-		return;
-
-	n = 0;
-	for (i = 0; i < I915_NUM_ENGINES; i++) {
-		struct drm_i915_gem_request *req;
+	unsigned long active = __I915_BO_ACTIVE(obj);
+	int idx;
 
-		req = i915_gem_active_get(&obj->last_read[i],
-					  &obj->base.dev->struct_mutex);
-		if (req)
-			requests[n++] = req;
-	}
-
-	mutex_unlock(&dev->struct_mutex);
-
-	for (i = 0; i < n; i++)
-		__i915_wait_request(requests[i], false, NULL, NULL);
-
-	mutex_lock(&dev->struct_mutex);
-
-	for (i = 0; i < n; i++)
-		i915_gem_request_put(requests[i]);
+	for_each_active(active, idx)
+		i915_gem_active_wait_unlocked(&obj->last_read[idx],
+					      false, NULL, NULL);
 }
 
 static void cancel_userptr(struct work_struct *work)
@@ -97,6 +77,8 @@ static void cancel_userptr(struct work_struct *work)
 	struct drm_i915_gem_object *obj = mo->obj;
 	struct drm_device *dev = obj->base.dev;
 
+	wait_rendering(obj);
+
 	mutex_lock(&dev->struct_mutex);
 	/* Cancel any active worker and force us to re-evaluate gup */
 	obj->userptr.work = NULL;
@@ -105,8 +87,6 @@ static void cancel_userptr(struct work_struct *work)
 		struct drm_i915_private *dev_priv = to_i915(dev);
 		bool was_interruptible;
 
-		wait_rendering(obj);
-
 		was_interruptible = dev_priv->mm.interruptible;
 		dev_priv->mm.interruptible = false;
 
-- 
2.8.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 58+ messages in thread

* [PATCH 20/38] drm/i915/userptr: Remove superfluous interruptible=false on waiting
  2016-06-03 16:55 Tracking VMA Chris Wilson
                   ` (18 preceding siblings ...)
  2016-06-03 16:55 ` [PATCH 19/38] drm/i915: Convert non-blocking userptr " Chris Wilson
@ 2016-06-03 16:55 ` Chris Wilson
  2016-06-03 16:55 ` [PATCH 21/38] drm/i915: Avoid requiring struct_mutex during suspend Chris Wilson
                   ` (18 subsequent siblings)
  38 siblings, 0 replies; 58+ messages in thread
From: Chris Wilson @ 2016-06-03 16:55 UTC (permalink / raw)
  To: intel-gfx

Inside the kthread context, we can't be interrupted by signals so
touching the mm.interruptible flag is pointless and wait-request now
consumes EIO itself.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_gem_userptr.c | 9 +--------
 1 file changed, 1 insertion(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_userptr.c b/drivers/gpu/drm/i915/i915_gem_userptr.c
index 96ab6161903a..57218cca7e05 100644
--- a/drivers/gpu/drm/i915/i915_gem_userptr.c
+++ b/drivers/gpu/drm/i915/i915_gem_userptr.c
@@ -84,16 +84,9 @@ static void cancel_userptr(struct work_struct *work)
 	obj->userptr.work = NULL;
 
 	if (obj->pages != NULL) {
-		struct drm_i915_private *dev_priv = to_i915(dev);
-		bool was_interruptible;
-
-		was_interruptible = dev_priv->mm.interruptible;
-		dev_priv->mm.interruptible = false;
-
+		/* We are inside a kthread context and can't be interrupted */
 		WARN_ON(i915_gem_object_unbind(obj));
 		WARN_ON(i915_gem_object_put_pages(obj));
-
-		dev_priv->mm.interruptible = was_interruptible;
 	}
 
 	i915_gem_object_put(obj);
-- 
2.8.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 58+ messages in thread

* [PATCH 21/38] drm/i915: Avoid requiring struct_mutex during suspend
  2016-06-03 16:55 Tracking VMA Chris Wilson
                   ` (19 preceding siblings ...)
  2016-06-03 16:55 ` [PATCH 20/38] drm/i915/userptr: Remove superfluous interruptible=false on waiting Chris Wilson
@ 2016-06-03 16:55 ` Chris Wilson
  2016-06-03 16:55 ` [PATCH 22/38] drm/gem/shrinker: Wait before acquiring struct_mutex under oom Chris Wilson
                   ` (17 subsequent siblings)
  38 siblings, 0 replies; 58+ messages in thread
From: Chris Wilson @ 2016-06-03 16:55 UTC (permalink / raw)
  To: intel-gfx

The struct_mutex can have some tricky interactions with other mutexes
(mainly due to using nasty constructs like stop_machine() from within
its confines). This makes it "illegal" (lockdep should generate WARNs)
from certain paths like suspend, where the locking order may be
inverted. We can extend the RCU request management to track activity on
an engine and thereby wait upon all GPU activity without taking the
struct_mutex.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_gem.c         | 56 +++++++++++++--------------------
 drivers/gpu/drm/i915/i915_gem_evict.c   |  2 +-
 drivers/gpu/drm/i915/i915_gem_request.c |  8 +++--
 drivers/gpu/drm/i915/i915_gem_request.h | 11 +++++++
 drivers/gpu/drm/i915/i915_gpu_error.c   |  2 +-
 drivers/gpu/drm/i915/i915_irq.c         |  3 +-
 drivers/gpu/drm/i915/intel_lrc.c        |  2 +-
 drivers/gpu/drm/i915/intel_pm.c         |  2 +-
 drivers/gpu/drm/i915/intel_ringbuffer.c | 30 +++++++++---------
 drivers/gpu/drm/i915/intel_ringbuffer.h | 26 ++++++++-------
 10 files changed, 73 insertions(+), 69 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 76e5a241c7be..c1e91589e7bc 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2179,13 +2179,18 @@ static void i915_gem_reset_engine_status(struct intel_engine_cs *engine)
 
 static void i915_gem_reset_engine_cleanup(struct intel_engine_cs *engine)
 {
+	struct drm_i915_gem_request *request;
 	struct intel_ring *ring;
 
+	request = i915_gem_active_peek(&engine->last_request,
+				       &engine->i915->dev->struct_mutex);
+
 	/* Mark all pending requests as complete so that any concurrent
 	 * (lockless) lookup doesn't try and wait upon the request as we
 	 * reset it.
 	 */
-	intel_engine_init_seqno(engine, engine->last_submitted_seqno);
+	if (request)
+		intel_engine_init_seqno(engine, request->fence.seqno);
 
 	/*
 	 * Clear the execlists queue up before freeing the requests, as those
@@ -2207,15 +2212,9 @@ static void i915_gem_reset_engine_cleanup(struct intel_engine_cs *engine)
 	 * implicit references on things like e.g. ppgtt address spaces through
 	 * the request.
 	 */
-	if (!list_empty(&engine->request_list)) {
-		struct drm_i915_gem_request *request;
-
-		request = list_last_entry(&engine->request_list,
-					  struct drm_i915_gem_request,
-					  link);
-
+	if (request)
 		i915_gem_request_retire_upto(request);
-	}
+	GEM_BUG_ON(intel_engine_is_active(engine));
 
 	/* Having flushed all requests from all queues, we know that all
 	 * ringbuffers must now be empty. However, since we do not reclaim
@@ -2614,8 +2613,6 @@ int i915_gem_wait_for_idle(struct drm_i915_private *dev_priv)
 	struct intel_engine_cs *engine;
 	int ret;
 
-	lockdep_assert_held(&dev_priv->dev->struct_mutex);
-
 	for_each_engine(engine, dev_priv) {
 		if (engine->last_context == NULL)
 			continue;
@@ -3787,47 +3784,36 @@ struct i915_vma *i915_gem_obj_to_ggtt_view(struct drm_i915_gem_object *obj,
 	return NULL;
 }
 
-static void
-i915_gem_stop_engines(struct drm_device *dev)
-{
-	struct drm_i915_private *dev_priv = dev->dev_private;
-	struct intel_engine_cs *engine;
-
-	for_each_engine(engine, dev_priv)
-		dev_priv->gt.stop_engine(engine);
-}
-
 int
 i915_gem_suspend(struct drm_device *dev)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
-	int ret = 0;
+	int ret;
 
-	mutex_lock(&dev->struct_mutex);
 	ret = i915_gem_wait_for_idle(dev_priv);
 	if (ret)
-		goto err;
-
-	i915_gem_retire_requests(dev_priv);
-
-	i915_gem_stop_engines(dev);
-	i915_gem_context_lost(dev_priv);
-	mutex_unlock(&dev->struct_mutex);
+		return ret;
 
 	cancel_delayed_work_sync(&dev_priv->gpu_error.hangcheck_work);
 	cancel_delayed_work_sync(&dev_priv->gt.retire_work);
 	flush_delayed_work(&dev_priv->gt.idle_work);
 
+	mutex_lock(&dev_priv->dev->struct_mutex);
+
 	/* Assert that we sucessfully flushed all the work and
 	 * reset the GPU back to its idle, low power state.
 	 */
-	WARN_ON(dev_priv->gt.awake);
+	if (dev_priv->gt.awake) {
+		if (INTEL_INFO(dev_priv)->gen >= 6)
+			gen6_rps_idle(dev_priv);
+		intel_runtime_pm_put(dev_priv);
+		dev_priv->gt.awake = false;
+	}
 
-	return 0;
+	i915_gem_context_lost(dev_priv);
+	mutex_unlock(&dev_priv->dev->struct_mutex);
 
-err:
-	mutex_unlock(&dev->struct_mutex);
-	return ret;
+	return 0;
 }
 
 void i915_gem_init_swizzling(struct drm_device *dev)
diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c b/drivers/gpu/drm/i915/i915_gem_evict.c
index 680365f4c4cd..3ead9359dfa2 100644
--- a/drivers/gpu/drm/i915/i915_gem_evict.c
+++ b/drivers/gpu/drm/i915/i915_gem_evict.c
@@ -69,7 +69,7 @@ gpu_is_idle(struct drm_i915_private *dev_priv)
 	struct intel_engine_cs *engine;
 
 	for_each_engine(engine, dev_priv) {
-		if (!list_empty(&engine->request_list))
+		if (intel_engine_is_active(engine))
 			return false;
 	}
 
diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c
index a0cdd3f10566..016edc6f2d0b 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.c
+++ b/drivers/gpu/drm/i915/i915_gem_request.c
@@ -445,6 +445,7 @@ void __i915_add_request(struct drm_i915_gem_request *request,
 
 	trace_i915_gem_request_add(request);
 
+	trace_i915_gem_request_add(request);
 	request->head = request_start;
 
 	/* Whilst this request exists, batch_obj will be on the
@@ -462,7 +463,8 @@ void __i915_add_request(struct drm_i915_gem_request *request,
 	 */
 	request->emitted_jiffies = jiffies;
 	request->previous_seqno = engine->last_submitted_seqno;
-	smp_store_mb(engine->last_submitted_seqno, request->fence.seqno);
+	engine->last_submitted_seqno = request->fence.seqno;
+	i915_gem_active_set(&engine->last_request, request);
 	list_add_tail(&request->link, &engine->request_list);
 
 	/* Record the position of the start of the request so that
@@ -690,7 +692,7 @@ complete:
 	}
 
 	if (!IS_ERR_OR_NULL(rps) &&
-	    req->fence.seqno == req->engine->last_submitted_seqno) {
+	    req == __i915_gem_active_peek(&req->engine->last_request)) {
 		/* The GPU is now idle and this client has stalled.
 		 * Since no other client has submitted a request in the
 		 * meantime, assume that this client is the only one
@@ -757,7 +759,7 @@ void i915_gem_retire_requests(struct drm_i915_private *dev_priv)
 
 	for_each_engine(engine, dev_priv) {
 		i915_gem_retire_requests_ring(engine);
-		if (list_empty(&engine->request_list))
+		if (!intel_engine_is_active(engine))
 			dev_priv->gt.active_engines &= ~intel_engine_flag(engine);
 	}
 
diff --git a/drivers/gpu/drm/i915/i915_gem_request.h b/drivers/gpu/drm/i915/i915_gem_request.h
index 006f212b7fd6..8d1225999fae 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.h
+++ b/drivers/gpu/drm/i915/i915_gem_request.h
@@ -27,6 +27,17 @@
 
 #include <linux/fence.h>
 
+struct intel_wait {
+	struct rb_node node;
+	struct task_struct *task;
+	u32 seqno;
+};
+
+struct intel_signal_node {
+	struct rb_node node;
+	struct intel_wait wait;
+};
+
 /**
  * Request queue structure.
  *
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
index cfae2fe1e14f..c2cf5bd57db5 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -1003,7 +1003,7 @@ static void i915_record_ring_state(struct drm_i915_private *dev_priv,
 	ering->instpm = I915_READ(RING_INSTPM(engine->mmio_base));
 	ering->acthd = intel_engine_get_active_head(engine);
 	ering->seqno = intel_engine_get_seqno(engine);
-	ering->last_seqno = engine->last_submitted_seqno;
+	ering->last_seqno = __active_get_seqno(&engine->last_request);
 	ering->start = I915_READ_START(engine);
 	ering->head = I915_READ_HEAD(engine);
 	ering->tail = I915_READ_TAIL(engine);
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index 1ffc997b19af..3987b7984fd8 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -2805,8 +2805,7 @@ static void gen8_disable_vblank(struct drm_device *dev, unsigned int pipe)
 static bool
 ring_idle(struct intel_engine_cs *engine, u32 seqno)
 {
-	return i915_seqno_passed(seqno,
-				 READ_ONCE(engine->last_submitted_seqno));
+	return !intel_engine_is_active(engine);
 }
 
 static bool
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 6cdc421fdc37..4bf63af2a282 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -1787,7 +1787,6 @@ logical_ring_setup(struct drm_device *dev, enum intel_engine_id id)
 
 	engine->fw_domains = fw_domains;
 
-	INIT_LIST_HEAD(&engine->request_list);
 	INIT_LIST_HEAD(&engine->buffers);
 	INIT_LIST_HEAD(&engine->execlist_queue);
 	spin_lock_init(&engine->execlist_lock);
@@ -1799,6 +1798,7 @@ logical_ring_setup(struct drm_device *dev, enum intel_engine_id id)
 	logical_ring_default_vfuncs(engine);
 	logical_ring_default_irqs(engine, info->irq_shift);
 
+	intel_engine_init_requests(engine);
 	intel_engine_init_hangcheck(engine);
 	i915_gem_batch_pool_init(engine, &engine->batch_pool);
 
diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
index c141d3e15eed..45bf830a9b10 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -6335,7 +6335,7 @@ bool i915_gpu_busy(void)
 	dev_priv = i915_mch_dev;
 
 	for_each_engine(engine, dev_priv)
-		ret |= !list_empty(&engine->request_list);
+		ret |= intel_engine_is_active(engine);
 
 out_unlock:
 	spin_unlock_irq(&mchdev_lock);
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index f86039455c5a..f172ac6a06dc 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -2058,13 +2058,13 @@ static int intel_init_engine(struct drm_device *dev,
 
 	engine->i915 = dev_priv;
 	engine->fence_context = fence_context_alloc(1);
-	INIT_LIST_HEAD(&engine->request_list);
 	INIT_LIST_HEAD(&engine->execlist_queue);
 	INIT_LIST_HEAD(&engine->buffers);
 	i915_gem_batch_pool_init(engine, &engine->batch_pool);
 	memset(engine->semaphore.sync_seqno, 0,
 	       sizeof(engine->semaphore.sync_seqno));
 
+	intel_engine_init_requests(engine);
 	intel_engine_init_breadcrumbs(engine);
 
 	/* We may need to do things with the shrinker which
@@ -2152,22 +2152,24 @@ void intel_engine_cleanup(struct intel_engine_cs *engine)
 	engine->i915 = NULL;
 }
 
-int intel_engine_idle(struct intel_engine_cs *engine)
+static void
+intel_engine_retire(struct i915_gem_active *active,
+		    struct drm_i915_gem_request *rq)
 {
-	struct drm_i915_gem_request *req;
-
-	/* Wait upon the last request to be completed */
-	if (list_empty(&engine->request_list))
-		return 0;
+}
 
-	req = list_entry(engine->request_list.prev,
-			 struct drm_i915_gem_request,
-			 link);
+void intel_engine_init_requests(struct intel_engine_cs *engine)
+{
+	init_request_active(&engine->last_request, intel_engine_retire);
+	INIT_LIST_HEAD(&engine->request_list);
+}
 
-	/* Make sure we do not trigger any retires */
-	return __i915_wait_request(req,
-				   req->i915->mm.interruptible,
-				   NULL, NULL);
+int intel_engine_idle(struct intel_engine_cs *engine)
+{
+	/* Wait upon the last request to be completed */
+	return i915_gem_active_wait_unlocked(&engine->last_request,
+					     engine->i915->mm.interruptible,
+					     NULL, NULL);
 }
 
 int intel_ring_alloc_request_extras(struct drm_i915_gem_request *request)
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index 0976e155edc0..d19fb8c24919 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -3,6 +3,7 @@
 
 #include <linux/hashtable.h>
 #include "i915_gem_batch_pool.h"
+#include "i915_gem_request.h"
 
 #define I915_CMD_HASH_ORDER 9
 
@@ -310,6 +311,14 @@ struct intel_engine_cs {
 	 * inspecting request list.
 	 */
 	u32 last_submitted_seqno;
+
+	/* An RCU guarded pointer to the last request. No reference is
+	 * held to the request, users must carefully acquire a reference to
+	 * the request using i915_gem_active_get_request_rcu(), or hold the
+	 * struct_mutex.
+	 */
+	struct i915_gem_active last_request;
+
 	unsigned user_interrupts;
 
 	struct i915_gem_context *last_context;
@@ -455,6 +464,7 @@ void intel_ring_update_space(struct intel_ring *ring);
 
 int __must_check intel_engine_idle(struct intel_engine_cs *engine);
 void intel_engine_init_seqno(struct intel_engine_cs *engine, u32 seqno);
+void intel_engine_init_requests(struct intel_engine_cs *engine);
 
 int intel_init_pipe_control(struct intel_engine_cs *engine, int size);
 void intel_fini_pipe_control(struct intel_engine_cs *engine);
@@ -493,17 +503,6 @@ static inline u32 intel_hws_seqno_address(struct intel_engine_cs *engine)
 }
 
 /* intel_breadcrumbs.c -- user interrupt bottom-half for waiters */
-struct intel_wait {
-	struct rb_node node;
-	struct task_struct *task;
-	u32 seqno;
-};
-
-struct intel_signal_node {
-	struct rb_node node;
-	struct intel_wait wait;
-};
-
 void intel_engine_init_breadcrumbs(struct intel_engine_cs *engine);
 static inline void intel_wait_init(struct intel_wait *wait, u32 seqno)
 {
@@ -540,4 +539,9 @@ void intel_engine_fini_breadcrumbs(struct intel_engine_cs *engine);
 unsigned intel_kick_waiters(struct drm_i915_private *i915);
 unsigned intel_kick_signalers(struct drm_i915_private *i915);
 
+static inline bool intel_engine_is_active(struct intel_engine_cs *engine)
+{
+	return __i915_gem_active_is_busy(&engine->last_request);
+}
+
 #endif /* _INTEL_RINGBUFFER_H_ */
-- 
2.8.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 58+ messages in thread

* [PATCH 22/38] drm/gem/shrinker: Wait before acquiring struct_mutex under oom
  2016-06-03 16:55 Tracking VMA Chris Wilson
                   ` (20 preceding siblings ...)
  2016-06-03 16:55 ` [PATCH 21/38] drm/i915: Avoid requiring struct_mutex during suspend Chris Wilson
@ 2016-06-03 16:55 ` Chris Wilson
  2016-06-08  9:57   ` Daniel Vetter
  2016-06-03 16:55 ` [PATCH 23/38] suspend Chris Wilson
                   ` (16 subsequent siblings)
  38 siblings, 1 reply; 58+ messages in thread
From: Chris Wilson @ 2016-06-03 16:55 UTC (permalink / raw)
  To: intel-gfx

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_gem_shrinker.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/drivers/gpu/drm/i915/i915_gem_shrinker.c b/drivers/gpu/drm/i915/i915_gem_shrinker.c
index 6eea4abeb9ce..454be9719daa 100644
--- a/drivers/gpu/drm/i915/i915_gem_shrinker.c
+++ b/drivers/gpu/drm/i915/i915_gem_shrinker.c
@@ -312,9 +312,14 @@ i915_gem_shrinker_lock_uninterruptible(struct drm_i915_private *dev_priv,
 	unsigned long timeout = msecs_to_jiffies(timeout_ms) + 1;
 
 	while (!i915_gem_shrinker_lock(dev_priv->dev, &slu->unlock)) {
+		if (i915_gem_wait_for_idle(dev_priv) == 0 &&
+		    i915_gem_shrinker_lock(dev_priv->dev, &slu->unlock))
+			break;
+
 		schedule_timeout_killable(1);
 		if (fatal_signal_pending(current))
 			return false;
+
 		if (--timeout == 0) {
 			pr_err("Unable to lock GPU to purge memory.\n");
 			return false;
-- 
2.8.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 58+ messages in thread

* [PATCH 23/38] suspend
  2016-06-03 16:55 Tracking VMA Chris Wilson
                   ` (21 preceding siblings ...)
  2016-06-03 16:55 ` [PATCH 22/38] drm/gem/shrinker: Wait before acquiring struct_mutex under oom Chris Wilson
@ 2016-06-03 16:55 ` Chris Wilson
  2016-06-03 16:55 ` [PATCH 24/38] drm/i915: Do a nonblocking wait first in pread/pwrite Chris Wilson
                   ` (15 subsequent siblings)
  38 siblings, 0 replies; 58+ messages in thread
From: Chris Wilson @ 2016-06-03 16:55 UTC (permalink / raw)
  To: intel-gfx

---
 drivers/gpu/drm/i915/i915_gem.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index c1e91589e7bc..98aa0a7c91f0 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -3796,7 +3796,7 @@ i915_gem_suspend(struct drm_device *dev)
 
 	cancel_delayed_work_sync(&dev_priv->gpu_error.hangcheck_work);
 	cancel_delayed_work_sync(&dev_priv->gt.retire_work);
-	flush_delayed_work(&dev_priv->gt.idle_work);
+	cancel_delayed_work_sync(&dev_priv->gt.idle_work);
 
 	mutex_lock(&dev_priv->dev->struct_mutex);
 
-- 
2.8.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 58+ messages in thread

* [PATCH 24/38] drm/i915: Do a nonblocking wait first in pread/pwrite
  2016-06-03 16:55 Tracking VMA Chris Wilson
                   ` (22 preceding siblings ...)
  2016-06-03 16:55 ` [PATCH 23/38] suspend Chris Wilson
@ 2016-06-03 16:55 ` Chris Wilson
  2016-06-03 16:55 ` [PATCH 25/38] drm/i915: Remove (struct_mutex) locking for wait-ioctl Chris Wilson
                   ` (14 subsequent siblings)
  38 siblings, 0 replies; 58+ messages in thread
From: Chris Wilson @ 2016-06-03 16:55 UTC (permalink / raw)
  To: intel-gfx

If we try and read or write to an active request, we first must wait
upon the GPU completing that request. Let's do that without holding the
mutex (and so allow someone else to access the GPU whilst we wait). Upn
completion, we will reacquire the mutex and only then start the
operation (i.e. we do not rely on state from before dropping the mutex).

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_gem.c | 66 +++++++++++++++++++++++------------------
 1 file changed, 37 insertions(+), 29 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 98aa0a7c91f0..4b17aaaa8dca 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -773,21 +773,15 @@ i915_gem_pread_ioctl(struct drm_device *dev, void *data,
 		       args->size))
 		return -EFAULT;
 
-	ret = i915_mutex_lock_interruptible(dev);
-	if (ret)
-		return ret;
-
 	obj = i915_gem_object_lookup(file, args->handle);
-	if (!obj) {
-		ret = -ENOENT;
-		goto unlock;
-	}
+	if (!obj)
+		return -ENOENT;
 
 	/* Bounds check source.  */
 	if (args->offset > obj->base.size ||
 	    args->size > obj->base.size - args->offset) {
 		ret = -EINVAL;
-		goto out;
+		goto out_unlocked;
 	}
 
 	/* prime objects have no backing filp to GEM pread/pwrite
@@ -795,17 +789,27 @@ i915_gem_pread_ioctl(struct drm_device *dev, void *data,
 	 */
 	if (!obj->base.filp) {
 		ret = -EINVAL;
-		goto out;
+		goto out_unlocked;
 	}
 
-	trace_i915_gem_object_pread(obj, args->offset, args->size);
+	ret = __unsafe_wait_rendering(obj, to_rps_client(file), true);
+	if (ret)
+		goto out_unlocked;
+
+	ret = i915_mutex_lock_interruptible(dev);
+	if (ret)
+		goto out_unlocked;
 
+	trace_i915_gem_object_pread(obj, args->offset, args->size);
 	ret = i915_gem_shmem_pread(dev, obj, args, file);
 
-out:
 	i915_gem_object_put(obj);
-unlock:
 	mutex_unlock(&dev->struct_mutex);
+
+	return ret;
+
+out_unlocked:
+	i915_gem_object_put_unlocked(obj);
 	return ret;
 }
 
@@ -1127,23 +1131,15 @@ i915_gem_pwrite_ioctl(struct drm_device *dev, void *data,
 			return -EFAULT;
 	}
 
-	intel_runtime_pm_get(dev_priv);
-
-	ret = i915_mutex_lock_interruptible(dev);
-	if (ret)
-		goto put_rpm;
-
 	obj = i915_gem_object_lookup(file, args->handle);
-	if (!obj) {
-		ret = -ENOENT;
-		goto unlock;
-	}
+	if (!obj)
+		return -ENOENT;
 
 	/* Bounds check destination. */
 	if (args->offset > obj->base.size ||
 	    args->size > obj->base.size - args->offset) {
 		ret = -EINVAL;
-		goto out;
+		goto out_unlocked;
 	}
 
 	/* prime objects have no backing filp to GEM pread/pwrite
@@ -1151,11 +1147,20 @@ i915_gem_pwrite_ioctl(struct drm_device *dev, void *data,
 	 */
 	if (!obj->base.filp) {
 		ret = -EINVAL;
-		goto out;
+		goto out_unlocked;
 	}
 
-	trace_i915_gem_object_pwrite(obj, args->offset, args->size);
+	ret = __unsafe_wait_rendering(obj, to_rps_client(file), false);
+	if (ret)
+		goto out_unlocked;
+
+	intel_runtime_pm_get(dev_priv);
 
+	ret = i915_mutex_lock_interruptible(dev);
+	if (ret)
+		goto out_rpm;
+
+	trace_i915_gem_object_pwrite(obj, args->offset, args->size);
 	ret = -EFAULT;
 	/* We can only do the GTT pwrite on untiled buffers, as otherwise
 	 * it would end up going through the fenced access, and we'll get
@@ -1179,14 +1184,17 @@ i915_gem_pwrite_ioctl(struct drm_device *dev, void *data,
 			ret = i915_gem_shmem_pwrite(dev, obj, args, file);
 	}
 
-out:
 	i915_gem_object_put(obj);
-unlock:
 	mutex_unlock(&dev->struct_mutex);
-put_rpm:
 	intel_runtime_pm_put(dev_priv);
 
 	return ret;
+
+out_rpm:
+	intel_runtime_pm_put(dev_priv);
+out_unlocked:
+	i915_gem_object_put_unlocked(obj);
+	return ret;
 }
 
 /**
-- 
2.8.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 58+ messages in thread

* [PATCH 25/38] drm/i915: Remove (struct_mutex) locking for wait-ioctl
  2016-06-03 16:55 Tracking VMA Chris Wilson
                   ` (23 preceding siblings ...)
  2016-06-03 16:55 ` [PATCH 24/38] drm/i915: Do a nonblocking wait first in pread/pwrite Chris Wilson
@ 2016-06-03 16:55 ` Chris Wilson
  2016-06-03 16:55 ` [PATCH 26/38] drm/i915: Remove (struct_mutex) locking for busy-ioctl Chris Wilson
                   ` (13 subsequent siblings)
  38 siblings, 0 replies; 58+ messages in thread
From: Chris Wilson @ 2016-06-03 16:55 UTC (permalink / raw)
  To: intel-gfx

With a bit of care (and leniency) we can iterate over the object and
wait for previous rendering to complete with judicial use of atomic
reference counting. The ABI requires us to ensure that an active object
is eventually flushed (like the busy-ioctl) which is guaranteed by our
management of requests (i.e. everything that is submitted to hardware is
flushed in the same request). All we have to do is ensure that we can
detect when the requests are complete for reporting when the object is
idle (without triggering ETIME) - this is handled by
__i915_wait_request.

The biggest danger in the code is walking the object without holding any
locks. We iterate over the set of last requests and carefully grab a
reference upon it. (If it is changing beneath us, that is the usual
userspace race and even with locking you get the same indeterminate
results.) If the request is unreferenced beneath us, it will be disposed
of into the request cache - so we have to carefully order the retrieval
of the request pointer with its removal, and to do this we employ RCU on
the request cache and upon the last_request pointer tracking.

The impact of this is actually quite small - the return to userspace
following the wait was already lockless. What we achieve here is
completing an already finished wait without hitting the struct_mutex,
our hold is quite short and so we are typically just a victim of
contention rather than a cause.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_gem.c | 42 +++++++++++------------------------------
 1 file changed, 11 insertions(+), 31 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 4b17aaaa8dca..4af64d864587 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2358,46 +2358,26 @@ i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
 {
 	struct drm_i915_gem_wait *args = data;
 	struct drm_i915_gem_object *obj;
-	struct drm_i915_gem_request *requests[I915_NUM_ENGINES];
-	int i, n = 0;
-	int ret;
+	unsigned long active;
+	int idx, ret = 0;
 
 	if (args->flags != 0)
 		return -EINVAL;
 
-	ret = i915_mutex_lock_interruptible(dev);
-	if (ret)
-		return ret;
-
 	obj = i915_gem_object_lookup(file, args->bo_handle);
-	if (!obj) {
-		mutex_unlock(&dev->struct_mutex);
+	if (!obj)
 		return -ENOENT;
-	}
-
-	if (!i915_gem_object_is_active(obj))
-		goto out;
 
-	for (i = 0; i < I915_NUM_ENGINES; i++) {
-		struct drm_i915_gem_request *req;
-
-		req = i915_gem_active_get(&obj->last_read[i],
-					  &obj->base.dev->struct_mutex);
-		if (req)
-			requests[n++] = req;
+	active = __I915_BO_ACTIVE(obj);
+	for_each_active(active, idx) {
+		ret = i915_gem_active_wait_unlocked(&obj->last_read[idx], true,
+						    args->timeout_ns >= 0 ? &args->timeout_ns : NULL,
+						    to_rps_client(file));
+		if (ret)
+			break;
 	}
 
-out:
-	i915_gem_object_put(obj);
-	mutex_unlock(&dev->struct_mutex);
-
-	for (i = 0; i < n; i++) {
-		if (ret == 0)
-			ret = __i915_wait_request(requests[i], true,
-						  args->timeout_ns > 0 ? &args->timeout_ns : NULL,
-						  to_rps_client(file));
-		i915_gem_request_put(requests[i]);
-	}
+	i915_gem_object_put_unlocked(obj);
 	return ret;
 }
 
-- 
2.8.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 58+ messages in thread

* [PATCH 26/38] drm/i915: Remove (struct_mutex) locking for busy-ioctl
  2016-06-03 16:55 Tracking VMA Chris Wilson
                   ` (24 preceding siblings ...)
  2016-06-03 16:55 ` [PATCH 25/38] drm/i915: Remove (struct_mutex) locking for wait-ioctl Chris Wilson
@ 2016-06-03 16:55 ` Chris Wilson
  2016-06-03 16:55 ` [PATCH 27/38] drm/i915: Reduce locking inside swfinish ioctl Chris Wilson
                   ` (12 subsequent siblings)
  38 siblings, 0 replies; 58+ messages in thread
From: Chris Wilson @ 2016-06-03 16:55 UTC (permalink / raw)
  To: intel-gfx; +Cc: Akash Goel

By applying the same logic as for wait-ioctl, we can query whether a
request has completed without holding struct_mutex. The biggest impact
system-wide is removing the flush_active and the contention that causes.

Testcase: igt/gem_busy
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Akash Goel <akash.goel@intel.com>
---
 drivers/gpu/drm/i915/i915_gem.c | 115 +++++++++++++++++++++++++++++-----------
 1 file changed, 85 insertions(+), 30 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 4af64d864587..a4f949038d50 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -3463,49 +3463,104 @@ i915_gem_object_ggtt_unpin_view(struct drm_i915_gem_object *obj,
 	i915_vma_unpin(i915_gem_obj_to_ggtt_view(obj, view));
 }
 
+static __always_inline unsigned
+__busy_read_flag(const struct drm_i915_gem_request *request)
+{
+	return 0x10000 << request->engine->exec_id;
+}
+
+static __always_inline unsigned
+__busy_write_flag(const struct drm_i915_gem_request *request)
+{
+	return request->engine->exec_id;
+}
+
+static __always_inline unsigned
+__busy_flag(const struct i915_gem_active *active,
+	    unsigned (*flag)(const struct drm_i915_gem_request *))
+{
+	do {
+		struct drm_i915_gem_request *request;
+		unsigned busy;
+
+		request = rcu_dereference(active->__request);
+		if (!request || i915_gem_request_completed(request))
+			return 0;
+
+		busy = flag(request);
+		if (request ==  rcu_dereference(active->__request))
+			return busy;
+	} while (1);
+}
+
+static inline unsigned
+busy_read_flag(const struct i915_gem_active *active)
+{
+	return __busy_flag(active, __busy_read_flag);
+}
+
+static inline unsigned
+busy_write_flag(const struct i915_gem_active *active)
+{
+	return __busy_flag(active, __busy_write_flag);
+}
+
 int
 i915_gem_busy_ioctl(struct drm_device *dev, void *data,
 		    struct drm_file *file)
 {
 	struct drm_i915_gem_busy *args = data;
 	struct drm_i915_gem_object *obj;
-	int ret;
-
-	ret = i915_mutex_lock_interruptible(dev);
-	if (ret)
-		return ret;
+	unsigned long active;
 
 	obj = i915_gem_object_lookup(file, args->handle);
-	if (!obj) {
-		ret = -ENOENT;
-		goto unlock;
-	}
+	if (!obj)
+		return -ENOENT;
 
-	/* Count all active objects as busy, even if they are currently not used
-	 * by the gpu. Users of this interface expect objects to eventually
-	 * become non-busy without any further actions.
-	 */
 	args->busy = 0;
-	if (i915_gem_object_is_active(obj)) {
-		struct drm_i915_gem_request *req;
-		int i;
+	active = __I915_BO_ACTIVE(obj);
+	if (active) {
+		int idx;
 
-		for (i = 0; i < I915_NUM_ENGINES; i++) {
-			req = i915_gem_active_peek(&obj->last_read[i],
-						   &obj->base.dev->struct_mutex);
-			if (req)
-				args->busy |= 1 << (16 + req->engine->exec_id);
-		}
-		req = i915_gem_active_peek(&obj->last_write,
-					   &obj->base.dev->struct_mutex);
-		if (req)
-			args->busy |= req->engine->exec_id;
+		/* Yes, the lookups are intentionally racy.
+		 *
+		 * Even though we guard the pointer lookup by RCU, that only
+		 * guarantees that the pointer and its contents remain
+		 * dereferencable and does *not* mean that the request we
+		 * have is the same as the one being tracked by the object.
+		 *
+		 * Consider that we lookup the request just as it is being
+		 * retired and free. We take a local copy of the pointer,
+		 * but before we add its engine into the busy set, the other
+		 * thread reallocates it and assigns it to a task on another
+		 * engine with a fresh and incomplete seqno.
+		 *
+		 * So after we lookup the engine's id, we double check that
+		 * the active request is the same and only then do we add it
+		 * into the busy set.
+		 */
+		rcu_read_lock();
+
+		for_each_active(active, idx)
+			args->busy |= busy_read_flag(&obj->last_read[idx]);
+
+		/* For ABI sanity, we only care that the write engine is in
+		 * the set of read engines. This is ensured by the ordering
+		 * of setting last_read/last_write in i915_vma_move_to_active,
+		 * and then in reverse in retire.
+		 *
+		 * We don't care that the set of active read/write engines
+		 * may change during construction of the result, as it is
+		 * equally liable to change before userspace can inspect
+		 * the result.
+		 */
+		args->busy |= busy_write_flag(&obj->last_write);
+
+		rcu_read_unlock();
 	}
 
-	i915_gem_object_put(obj);
-unlock:
-	mutex_unlock(&dev->struct_mutex);
-	return ret;
+	i915_gem_object_put_unlocked(obj);
+	return 0;
 }
 
 int
-- 
2.8.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 58+ messages in thread

* [PATCH 27/38] drm/i915: Reduce locking inside swfinish ioctl
  2016-06-03 16:55 Tracking VMA Chris Wilson
                   ` (25 preceding siblings ...)
  2016-06-03 16:55 ` [PATCH 26/38] drm/i915: Remove (struct_mutex) locking for busy-ioctl Chris Wilson
@ 2016-06-03 16:55 ` Chris Wilson
  2016-06-08  9:59   ` Daniel Vetter
  2016-06-03 16:55 ` [PATCH 28/38] drm/i915: Remove pinned check from madvise ioctl Chris Wilson
                   ` (11 subsequent siblings)
  38 siblings, 1 reply; 58+ messages in thread
From: Chris Wilson @ 2016-06-03 16:55 UTC (permalink / raw)
  To: intel-gfx; +Cc: Daniel Vetter

We only need to take the struct_mutex if the object is pinned to the
display engine and so requires checking for clflush. (The race with
userspace pinning the object to a framebuffer is irrelevant.)

v2: Use access once for compiler hints (or not as it is a bitfield)

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
---
 drivers/gpu/drm/i915/i915_gem.c | 29 ++++++++++++++++-------------
 1 file changed, 16 insertions(+), 13 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index a4f949038d50..b78f9df1894c 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -1265,25 +1265,28 @@ i915_gem_sw_finish_ioctl(struct drm_device *dev, void *data,
 {
 	struct drm_i915_gem_sw_finish *args = data;
 	struct drm_i915_gem_object *obj;
-	int ret = 0;
-
-	ret = i915_mutex_lock_interruptible(dev);
-	if (ret)
-		return ret;
+	int ret;
 
 	obj = i915_gem_object_lookup(file, args->handle);
-	if (!obj) {
-		ret = -ENOENT;
-		goto unlock;
-	}
+	if (!obj)
+		return -ENOENT;
 
 	/* Pinned buffers may be scanout, so flush the cache */
-	if (obj->pin_display)
+	if (obj->pin_display) {
+		ret = i915_mutex_lock_interruptible(dev);
+		if (ret)
+			goto unref;
+
 		i915_gem_object_flush_cpu_write_domain(obj);
 
-	i915_gem_object_put(obj);
-unlock:
-	mutex_unlock(&dev->struct_mutex);
+		i915_gem_object_put(obj);
+		mutex_unlock(&dev->struct_mutex);
+	} else {
+		ret = 0;
+unref:
+		i915_gem_object_put_unlocked(obj);
+	}
+
 	return ret;
 }
 
-- 
2.8.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 58+ messages in thread

* [PATCH 28/38] drm/i915: Remove pinned check from madvise ioctl
  2016-06-03 16:55 Tracking VMA Chris Wilson
                   ` (26 preceding siblings ...)
  2016-06-03 16:55 ` [PATCH 27/38] drm/i915: Reduce locking inside swfinish ioctl Chris Wilson
@ 2016-06-03 16:55 ` Chris Wilson
  2016-06-08 10:01   ` Daniel Vetter
  2016-06-03 16:55 ` [PATCH 29/38] drm/i915: Remove locking for get_tiling Chris Wilson
                   ` (10 subsequent siblings)
  38 siblings, 1 reply; 58+ messages in thread
From: Chris Wilson @ 2016-06-03 16:55 UTC (permalink / raw)
  To: intel-gfx

We don't need to incur the overhead of checking whether the object is
pinned prior to changing its madvise. If the object is pinned, the
madvise will not take effect until it is unpinned and so we cannot free
the pages being pointed at by hardware. Marking a pinned object with
allocated pages as DONTNEED will not trigger any undue warnings. The check
is therefore superfluous, and by removing it we can remove a linear walk
over all the vma the object has.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_gem.c | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index b78f9df1894c..dad00800aeef 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -3600,11 +3600,6 @@ i915_gem_madvise_ioctl(struct drm_device *dev, void *data,
 		goto unlock;
 	}
 
-	if (i915_gem_obj_is_pinned(obj)) {
-		ret = -EINVAL;
-		goto out;
-	}
-
 	if (obj->pages &&
 	    obj->tiling_mode != I915_TILING_NONE &&
 	    dev_priv->quirks & QUIRK_PIN_SWIZZLED_PAGES) {
@@ -3623,7 +3618,6 @@ i915_gem_madvise_ioctl(struct drm_device *dev, void *data,
 
 	args->retained = obj->madv != __I915_MADV_PURGED;
 
-out:
 	i915_gem_object_put(obj);
 unlock:
 	mutex_unlock(&dev->struct_mutex);
-- 
2.8.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 58+ messages in thread

* [PATCH 29/38] drm/i915: Remove locking for get_tiling
  2016-06-03 16:55 Tracking VMA Chris Wilson
                   ` (27 preceding siblings ...)
  2016-06-03 16:55 ` [PATCH 28/38] drm/i915: Remove pinned check from madvise ioctl Chris Wilson
@ 2016-06-03 16:55 ` Chris Wilson
  2016-06-08 10:02   ` Daniel Vetter
  2016-06-03 16:55 ` [PATCH 30/38] drm/i915: Assert that the request hasn't been retired Chris Wilson
                   ` (9 subsequent siblings)
  38 siblings, 1 reply; 58+ messages in thread
From: Chris Wilson @ 2016-06-03 16:55 UTC (permalink / raw)
  To: intel-gfx

Since we are not concerned with userspace racing itself with set-tiling
(the order is indeterminant even if we take a lock), then we can safely
read back the single obj->tiling_mode and do the static lookup of
swizzle mode without having to take a lock.

get-tiling is reasonably frequent due to the back-channel passing around
of tiling parameters in DRI2/DRI3.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_gem_tiling.c | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_tiling.c b/drivers/gpu/drm/i915/i915_gem_tiling.c
index 326de7eae101..d6acd0a27c06 100644
--- a/drivers/gpu/drm/i915/i915_gem_tiling.c
+++ b/drivers/gpu/drm/i915/i915_gem_tiling.c
@@ -302,10 +302,8 @@ i915_gem_get_tiling(struct drm_device *dev, void *data,
 	if (!obj)
 		return -ENOENT;
 
-	mutex_lock(&dev->struct_mutex);
-
 	args->tiling_mode = obj->tiling_mode;
-	switch (obj->tiling_mode) {
+	switch (args->tiling_mode) {
 	case I915_TILING_X:
 		args->swizzle_mode = dev_priv->mm.bit_6_swizzle_x;
 		break;
@@ -329,8 +327,6 @@ i915_gem_get_tiling(struct drm_device *dev, void *data,
 	if (args->swizzle_mode == I915_BIT_6_SWIZZLE_9_10_17)
 		args->swizzle_mode = I915_BIT_6_SWIZZLE_9_10;
 
-	i915_gem_object_put(obj);
-	mutex_unlock(&dev->struct_mutex);
-
+	i915_gem_object_put_unlocked(obj);
 	return 0;
 }
-- 
2.8.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 58+ messages in thread

* [PATCH 30/38] drm/i915: Assert that the request hasn't been retired
  2016-06-03 16:55 Tracking VMA Chris Wilson
                   ` (28 preceding siblings ...)
  2016-06-03 16:55 ` [PATCH 29/38] drm/i915: Remove locking for get_tiling Chris Wilson
@ 2016-06-03 16:55 ` Chris Wilson
  2016-06-03 16:55 ` [PATCH 31/38] drm/i915: Reduce amount of duplicate buffer information captured on error Chris Wilson
                   ` (8 subsequent siblings)
  38 siblings, 0 replies; 58+ messages in thread
From: Chris Wilson @ 2016-06-03 16:55 UTC (permalink / raw)
  To: intel-gfx

With all callers now not playing tricks with dropping the struct_mutex
between waiting and retiring, we can assert that the request is ready to
be retired.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_gem_request.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c
index 016edc6f2d0b..8029b37c9eee 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.c
+++ b/drivers/gpu/drm/i915/i915_gem_request.c
@@ -319,7 +319,7 @@ static void i915_gem_request_retire(struct drm_i915_gem_request *request)
 	struct i915_gem_active *active, *next;
 
 	trace_i915_gem_request_retire(request);
-	list_del_init(&request->link);
+	list_del(&request->link);
 
 	/* We know the GPU must have read the request to have
 	 * sent us the seqno + interrupt, so use the position
@@ -367,8 +367,7 @@ void i915_gem_request_retire_upto(struct drm_i915_gem_request *req)
 	struct drm_i915_gem_request *tmp;
 
 	lockdep_assert_held(&req->i915->dev->struct_mutex);
-	if (list_empty(&req->link))
-		return;
+	GEM_BUG_ON(list_empty(&req->link));
 
 	do {
 		tmp = list_first_entry(&engine->request_list,
-- 
2.8.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 58+ messages in thread

* [PATCH 31/38] drm/i915: Reduce amount of duplicate buffer information captured on error
  2016-06-03 16:55 Tracking VMA Chris Wilson
                   ` (29 preceding siblings ...)
  2016-06-03 16:55 ` [PATCH 30/38] drm/i915: Assert that the request hasn't been retired Chris Wilson
@ 2016-06-03 16:55 ` Chris Wilson
  2016-06-03 16:55 ` [PATCH 32/38] drm/i915: Stop the machine whilst capturing the GPU crash dump Chris Wilson
                   ` (7 subsequent siblings)
  38 siblings, 0 replies; 58+ messages in thread
From: Chris Wilson @ 2016-06-03 16:55 UTC (permalink / raw)
  To: intel-gfx

When capturing the error state, we do not need to know about every
address space - just those that are related to the error. We know which
context is active at the time, therefore we know which VM are implicated
in the error. We can then restrict the VM which we report to the
relevant subset.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_drv.h       |   9 +-
 drivers/gpu/drm/i915/i915_gpu_error.c | 198 ++++++++++++++--------------------
 2 files changed, 87 insertions(+), 120 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index e72b7f35a98e..dbd3c6f3abbc 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -506,6 +506,7 @@ struct drm_i915_error_state {
 		int num_waiters;
 		int hangcheck_score;
 		enum intel_engine_hangcheck_action hangcheck_action;
+		struct i915_address_space *vm;
 		int num_requests;
 
 		/* our own tracking of ring head and tail */
@@ -575,17 +576,15 @@ struct drm_i915_error_state {
 		u32 read_domains;
 		u32 write_domain;
 		s32 fence_reg:I915_MAX_NUM_FENCE_BITS;
-		s32 pinned:2;
 		u32 tiling:2;
 		u32 dirty:1;
 		u32 purgeable:1;
 		u32 userptr:1;
 		s32 ring:4;
 		u32 cache_level:3;
-	} **active_bo, **pinned_bo;
-
-	u32 *active_bo_count, *pinned_bo_count;
-	u32 vm_count;
+	} *active_bo[I915_NUM_ENGINES], *pinned_bo;
+	u32 active_bo_count[I915_NUM_ENGINES], pinned_bo_count;
+	struct i915_address_space *active_vm[I915_NUM_ENGINES];
 };
 
 struct intel_connector;
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
index c2cf5bd57db5..f01f0ca4bb86 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -42,16 +42,6 @@ static const char *ring_str(int ring)
 	}
 }
 
-static const char *pin_flag(int pinned)
-{
-	if (pinned > 0)
-		return " P";
-	else if (pinned < 0)
-		return " p";
-	else
-		return "";
-}
-
 static const char *tiling_flag(int tiling)
 {
 	switch (tiling) {
@@ -189,7 +179,7 @@ static void print_error_buffers(struct drm_i915_error_state_buf *m,
 {
 	int i;
 
-	err_printf(m, "  %s [%d]:\n", name, count);
+	err_printf(m, "%s [%d]:\n", name, count);
 
 	while (count--) {
 		err_printf(m, "    %08x_%08x %8u %02x %02x [ ",
@@ -202,7 +192,6 @@ static void print_error_buffers(struct drm_i915_error_state_buf *m,
 			err_printf(m, "%02x ", err->rseqno[i]);
 
 		err_printf(m, "] %02x", err->wseqno);
-		err_puts(m, pin_flag(err->pinned));
 		err_puts(m, tiling_flag(err->tiling));
 		err_puts(m, dirty_flag(err->dirty));
 		err_puts(m, purgeable_flag(err->purgeable));
@@ -417,18 +406,25 @@ int i915_error_state_to_str(struct drm_i915_error_state_buf *m,
 	for (i = 0; i < ARRAY_SIZE(error->ring); i++)
 		i915_ring_error_state(m, dev, error, i);
 
-	for (i = 0; i < error->vm_count; i++) {
-		err_printf(m, "vm[%d]\n", i);
+	for (i = 0; i < I915_NUM_ENGINES; i++) {
+		if (error->active_vm[i] == NULL)
+			break;
 
-		print_error_buffers(m, "Active",
+		err_printf(m, "Active vm[%d]\n", i);
+		for (j = 0; j < I915_NUM_ENGINES; j++) {
+			if (error->ring[j].vm == error->active_vm[i])
+				err_printf(m, "    %s\n",
+					   dev_priv->engine[j].name);
+		}
+		print_error_buffers(m, "  Buffers",
 				    error->active_bo[i],
 				    error->active_bo_count[i]);
-
-		print_error_buffers(m, "Pinned",
-				    error->pinned_bo[i],
-				    error->pinned_bo_count[i]);
 	}
 
+	print_error_buffers(m, "Pinned (global)",
+			    error->pinned_bo,
+			    error->pinned_bo_count);
+
 	for (i = 0; i < ARRAY_SIZE(error->ring); i++) {
 		obj = error->ring[i].batchbuffer;
 		if (obj) {
@@ -624,13 +620,10 @@ static void i915_error_state_free(struct kref *error_ref)
 
 	i915_error_object_free(error->semaphore_obj);
 
-	for (i = 0; i < error->vm_count; i++)
+	for (i = 0; i < ARRAY_SIZE(error->active_bo); i++)
 		kfree(error->active_bo[i]);
-
-	kfree(error->active_bo);
-	kfree(error->active_bo_count);
 	kfree(error->pinned_bo);
-	kfree(error->pinned_bo_count);
+
 	kfree(error->overlay);
 	kfree(error->display);
 	kfree(error);
@@ -776,9 +769,6 @@ static void capture_bo(struct drm_i915_error_buffer *err,
 	err->read_domains = obj->base.read_domains;
 	err->write_domain = obj->base.write_domain;
 	err->fence_reg = obj->fence_reg;
-	err->pinned = 0;
-	if (i915_gem_obj_is_pinned(obj))
-		err->pinned = 1;
 	err->tiling = obj->tiling_mode;
 	err->dirty = obj->dirty;
 	err->purgeable = obj->madv != I915_MADV_WILLNEED;
@@ -786,13 +776,17 @@ static void capture_bo(struct drm_i915_error_buffer *err,
 	err->cache_level = obj->cache_level;
 }
 
-static u32 capture_active_bo(struct drm_i915_error_buffer *err,
-			     int count, struct list_head *head)
+static u32 capture_error_bo(struct drm_i915_error_buffer *err,
+			    int count, struct list_head *head,
+			    bool pinned_only)
 {
 	struct i915_vma *vma;
 	int i = 0;
 
 	list_for_each_entry(vma, head, vm_link) {
+		if (pinned_only && !i915_vma_is_pinned(vma))
+			continue;
+
 		capture_bo(err++, vma);
 		if (++i == count)
 			break;
@@ -801,28 +795,6 @@ static u32 capture_active_bo(struct drm_i915_error_buffer *err,
 	return i;
 }
 
-static u32 capture_pinned_bo(struct drm_i915_error_buffer *err,
-			     int count, struct list_head *head,
-			     struct i915_address_space *vm)
-{
-	struct drm_i915_gem_object *obj;
-	struct drm_i915_error_buffer * const first = err;
-	struct drm_i915_error_buffer * const last = err + count;
-
-	list_for_each_entry(obj, head, global_list) {
-		struct i915_vma *vma;
-
-		if (err == last)
-			break;
-
-		list_for_each_entry(vma, &obj->vma_list, obj_link)
-			if (vma->vm == vm && i915_vma_is_pinned(vma))
-				capture_bo(err++, vma);
-	}
-
-	return err - first;
-}
-
 /* Generate a semi-unique error code. The code is not meant to have meaning, The
  * code's only purpose is to try to prevent false duplicated bug reports by
  * grossly estimating a GPU error state.
@@ -1063,7 +1035,6 @@ static void i915_record_ring_state(struct drm_i915_private *dev_priv,
 	}
 }
 
-
 static void i915_gem_record_active_context(struct intel_engine_cs *engine,
 					   struct drm_i915_error_state *error,
 					   struct drm_i915_error_ring *ering)
@@ -1111,9 +1082,10 @@ static void i915_gem_record_rings(struct drm_i915_private *dev_priv,
 			struct i915_address_space *vm;
 			struct intel_ring *ring;
 
-			vm = request->ctx && request->ctx->ppgtt ?
+			vm = request->ctx->ppgtt ?
 				&request->ctx->ppgtt->base :
 				&ggtt->base;
+			error->ring[i].vm = vm;
 
 			/* We need to copy these to an anonymous buffer
 			 * as the simplest method to avoid being overwritten
@@ -1205,89 +1177,83 @@ static void i915_gem_record_rings(struct drm_i915_private *dev_priv,
 	}
 }
 
-/* FIXME: Since pin count/bound list is global, we duplicate what we capture per
- * VM.
- */
 static void i915_gem_capture_vm(struct drm_i915_private *dev_priv,
 				struct drm_i915_error_state *error,
 				struct i915_address_space *vm,
 				const int ndx)
 {
-	struct drm_i915_error_buffer *active_bo = NULL, *pinned_bo = NULL;
-	struct drm_i915_gem_object *obj;
+	struct drm_i915_error_buffer *active_bo;
 	struct i915_vma *vma;
 	int i;
 
 	i = 0;
 	list_for_each_entry(vma, &vm->active_list, vm_link)
 		i++;
-	error->active_bo_count[ndx] = i;
-
-	list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) {
-		list_for_each_entry(vma, &obj->vma_list, obj_link)
-			if (vma->vm == vm && i915_vma_is_pinned(vma))
-				i++;
-	}
-	error->pinned_bo_count[ndx] = i - error->active_bo_count[ndx];
 
-	if (i) {
+	active_bo = NULL;
+	if (i)
 		active_bo = kcalloc(i, sizeof(*active_bo), GFP_ATOMIC);
-		if (active_bo)
-			pinned_bo = active_bo + error->active_bo_count[ndx];
-	}
-
 	if (active_bo)
-		error->active_bo_count[ndx] =
-			capture_active_bo(active_bo,
-					  error->active_bo_count[ndx],
-					  &vm->active_list);
-
-	if (pinned_bo)
-		error->pinned_bo_count[ndx] =
-			capture_pinned_bo(pinned_bo,
-					  error->pinned_bo_count[ndx],
-					  &dev_priv->mm.bound_list, vm);
+		i = capture_error_bo(active_bo, i, &vm->active_list, false);
+	else
+		i = 0;
+
 	error->active_bo[ndx] = active_bo;
-	error->pinned_bo[ndx] = pinned_bo;
+	error->active_bo_count[ndx] = i;
+	error->active_vm[ndx] = vm;
 }
 
-static void i915_gem_capture_buffers(struct drm_i915_private *dev_priv,
-				     struct drm_i915_error_state *error)
+static void i915_capture_active_buffers(struct drm_i915_private *dev_priv,
+					struct drm_i915_error_state *error)
 {
-	struct i915_address_space *vm;
-	int cnt = 0, i = 0;
-
-	list_for_each_entry(vm, &dev_priv->vm_list, global_link)
-		cnt++;
-
-	error->active_bo = kcalloc(cnt, sizeof(*error->active_bo), GFP_ATOMIC);
-	error->pinned_bo = kcalloc(cnt, sizeof(*error->pinned_bo), GFP_ATOMIC);
-	error->active_bo_count = kcalloc(cnt, sizeof(*error->active_bo_count),
-					 GFP_ATOMIC);
-	error->pinned_bo_count = kcalloc(cnt, sizeof(*error->pinned_bo_count),
-					 GFP_ATOMIC);
-
-	if (error->active_bo == NULL ||
-	    error->pinned_bo == NULL ||
-	    error->active_bo_count == NULL ||
-	    error->pinned_bo_count == NULL) {
-		kfree(error->active_bo);
-		kfree(error->active_bo_count);
-		kfree(error->pinned_bo);
-		kfree(error->pinned_bo_count);
-
-		error->active_bo = NULL;
-		error->active_bo_count = NULL;
-		error->pinned_bo = NULL;
-		error->pinned_bo_count = NULL;
-	} else {
-		list_for_each_entry(vm, &dev_priv->vm_list, global_link)
-			i915_gem_capture_vm(dev_priv, error, vm, i++);
+	int cnt = 0, i, j;
+
+	BUILD_BUG_ON(ARRAY_SIZE(error->ring) > ARRAY_SIZE(error->active_bo));
+	BUILD_BUG_ON(ARRAY_SIZE(error->active_bo) != ARRAY_SIZE(error->active_vm));
+	BUILD_BUG_ON(ARRAY_SIZE(error->active_bo) != ARRAY_SIZE(error->active_bo_count));
+
+	for (i = 0; i < I915_NUM_ENGINES; i++) {
+		if (error->ring[i].vm == NULL)
+			continue;
+
+		for (j = 0; j < i; j++)
+			if (error->ring[j].vm == error->ring[i].vm)
+				break;
+		if (j != i)
+			continue;
 
-		error->vm_count = cnt;
+		i915_gem_capture_vm(dev_priv, error, error->ring[i].vm, cnt++);
 	}
 }
 
+static void i915_capture_pinned_buffers(struct drm_i915_private *dev_priv,
+					struct drm_i915_error_state *error)
+{
+	struct i915_address_space *vm = &dev_priv->ggtt.base;
+	struct drm_i915_error_buffer *bo;
+	struct i915_vma *vma;
+	int i, j;
+
+	i = 0;
+	list_for_each_entry(vma, &vm->active_list, vm_link)
+		i++;
+
+	j = 0;
+	list_for_each_entry(vma, &vm->inactive_list, vm_link)
+		j++;
+
+	bo = NULL;
+	if (i + j)
+		bo = kcalloc(i + j, sizeof(*bo), GFP_ATOMIC);
+	if (bo == NULL)
+		return;
+
+	i = capture_error_bo(bo, i, &vm->active_list, true);
+	j = capture_error_bo(bo + i, j, &vm->inactive_list, true);
+	error->pinned_bo_count = i + j;
+	error->pinned_bo = bo;
+}
+
 /* Capture all registers which don't fit into another category. */
 static void i915_capture_reg_state(struct drm_i915_private *dev_priv,
 				   struct drm_i915_error_state *error)
@@ -1431,10 +1397,12 @@ void i915_capture_error_state(struct drm_i915_private *dev_priv,
 
 	i915_capture_gen_state(dev_priv, error);
 	i915_capture_reg_state(dev_priv, error);
-	i915_gem_capture_buffers(dev_priv, error);
 	i915_gem_record_fences(dev_priv, error);
 	i915_gem_record_rings(dev_priv, error);
 
+	i915_capture_active_buffers(dev_priv, error);
+	i915_capture_pinned_buffers(dev_priv, error);
+
 	do_gettimeofday(&error->time);
 
 	error->overlay = intel_overlay_capture_error_state(dev_priv);
-- 
2.8.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 58+ messages in thread

* [PATCH 32/38] drm/i915: Stop the machine whilst capturing the GPU crash dump
  2016-06-03 16:55 Tracking VMA Chris Wilson
                   ` (30 preceding siblings ...)
  2016-06-03 16:55 ` [PATCH 31/38] drm/i915: Reduce amount of duplicate buffer information captured on error Chris Wilson
@ 2016-06-03 16:55 ` Chris Wilson
  2016-06-08 10:06   ` Daniel Vetter
  2016-06-03 16:55 ` [PATCH 33/38] drm/i915: Scan GGTT active list for context object Chris Wilson
                   ` (6 subsequent siblings)
  38 siblings, 1 reply; 58+ messages in thread
From: Chris Wilson @ 2016-06-03 16:55 UTC (permalink / raw)
  To: intel-gfx

The error state is purposefully racy as we expect it to be called at any
time and so have avoided any locking whilst capturing the crash dump.
However, with multi-engine GPUs and multiple CPUs, those races can
manifest into OOPSes as we attempt to chase dangling pointers freed on
other CPUs. Under discussion are lots of ways to slow down normal
operation in order to protect the post-mortem error capture, but what it
we take the opposite approach and freeze the machine whilst the error
capture runs (note the GPU may still running, but as long as we don't
process any of the results the driver's bookkeeping will be static).

Note that by of itself, this is not a complete fix. It also depends on
the compiler barriers in list_add/list_del to prevent traversing the
lists into the void.

v2: Avoid drm_clflush_pages() inside stop_machine() as it may use
stop_machine() itself for its wbinvd fallback.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/Kconfig          |  1 +
 drivers/gpu/drm/i915/i915_drv.h       |  2 ++
 drivers/gpu/drm/i915/i915_gpu_error.c | 48 +++++++++++++++++++++--------------
 3 files changed, 32 insertions(+), 19 deletions(-)

diff --git a/drivers/gpu/drm/i915/Kconfig b/drivers/gpu/drm/i915/Kconfig
index 29a32b11953b..9398a4d06c0e 100644
--- a/drivers/gpu/drm/i915/Kconfig
+++ b/drivers/gpu/drm/i915/Kconfig
@@ -4,6 +4,7 @@ config DRM_I915
 	depends on X86 && PCI
 	select INTEL_GTT
 	select INTERVAL_TREE
+	select STOP_MACHINE
 	# we need shmfs for the swappable backing store, and in particular
 	# the shmem_readpage() which depends upon tmpfs
 	select SHMEM
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index dbd3c6f3abbc..77564f378771 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -470,6 +470,8 @@ struct drm_i915_error_state {
 	struct kref ref;
 	struct timeval time;
 
+	struct drm_i915_private *i915;
+
 	char error_msg[128];
 	bool simulated;
 	int iommu;
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
index f01f0ca4bb86..ab2ba76a2a3b 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -28,6 +28,7 @@
  */
 
 #include <generated/utsrelease.h>
+#include <linux/stop_machine.h>
 #include "i915_drv.h"
 
 static const char *ring_str(int ring)
@@ -682,14 +683,12 @@ i915_error_object_create(struct drm_i915_private *dev_priv,
 
 	dst->page_count = num_pages;
 	while (num_pages--) {
-		unsigned long flags;
 		void *d;
 
 		d = kmalloc(PAGE_SIZE, GFP_ATOMIC);
 		if (d == NULL)
 			goto unwind;
 
-		local_irq_save(flags);
 		if (use_ggtt) {
 			void __iomem *s;
 
@@ -708,15 +707,10 @@ i915_error_object_create(struct drm_i915_private *dev_priv,
 
 			page = i915_gem_object_get_page(src, i);
 
-			drm_clflush_pages(&page, 1);
-
 			s = kmap_atomic(page);
 			memcpy(d, s, PAGE_SIZE);
 			kunmap_atomic(s);
-
-			drm_clflush_pages(&page, 1);
 		}
-		local_irq_restore(flags);
 
 		dst->pages[i++] = d;
 		reloc_offset += PAGE_SIZE;
@@ -1366,6 +1360,32 @@ static void i915_capture_gen_state(struct drm_i915_private *dev_priv,
 	error->suspend_count = dev_priv->suspend_count;
 }
 
+static int capture(void *data)
+{
+	struct drm_i915_error_state *error = data;
+
+	/* Ensure that what we readback from memory matches what the GPU sees */
+	wbinvd();
+
+	i915_capture_gen_state(error->i915, error);
+	i915_capture_reg_state(error->i915, error);
+	i915_gem_record_fences(error->i915, error);
+	i915_gem_record_rings(error->i915, error);
+
+	i915_capture_active_buffers(error->i915, error);
+	i915_capture_pinned_buffers(error->i915, error);
+
+	do_gettimeofday(&error->time);
+
+	error->overlay = intel_overlay_capture_error_state(error->i915);
+	error->display = intel_display_capture_error_state(error->i915);
+
+	/* And make sure we don't leave trash in the CPU cache */
+	wbinvd();
+
+	return 0;
+}
+
 /**
  * i915_capture_error_state - capture an error record for later analysis
  * @dev: drm device
@@ -1394,19 +1414,9 @@ void i915_capture_error_state(struct drm_i915_private *dev_priv,
 	}
 
 	kref_init(&error->ref);
+	error->i915 = dev_priv;
 
-	i915_capture_gen_state(dev_priv, error);
-	i915_capture_reg_state(dev_priv, error);
-	i915_gem_record_fences(dev_priv, error);
-	i915_gem_record_rings(dev_priv, error);
-
-	i915_capture_active_buffers(dev_priv, error);
-	i915_capture_pinned_buffers(dev_priv, error);
-
-	do_gettimeofday(&error->time);
-
-	error->overlay = intel_overlay_capture_error_state(dev_priv);
-	error->display = intel_display_capture_error_state(dev_priv);
+	stop_machine(capture, error, NULL);
 
 	i915_error_capture_msg(dev_priv, error, engine_mask, error_msg);
 	DRM_INFO("%s\n", error->error_msg);
-- 
2.8.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 58+ messages in thread

* [PATCH 33/38] drm/i915: Scan GGTT active list for context object
  2016-06-03 16:55 Tracking VMA Chris Wilson
                   ` (31 preceding siblings ...)
  2016-06-03 16:55 ` [PATCH 32/38] drm/i915: Stop the machine whilst capturing the GPU crash dump Chris Wilson
@ 2016-06-03 16:55 ` Chris Wilson
  2016-06-03 16:55 ` [PATCH 34/38] drm/i915: Move setting of request->batch into its single callsite Chris Wilson
                   ` (5 subsequent siblings)
  38 siblings, 0 replies; 58+ messages in thread
From: Chris Wilson @ 2016-06-03 16:55 UTC (permalink / raw)
  To: intel-gfx

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_gpu_error.c | 13 ++++++-------
 1 file changed, 6 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
index ab2ba76a2a3b..367b8b2ce5f2 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -1034,18 +1034,17 @@ static void i915_gem_record_active_context(struct intel_engine_cs *engine,
 					   struct drm_i915_error_ring *ering)
 {
 	struct drm_i915_private *dev_priv = engine->i915;
-	struct drm_i915_gem_object *obj;
+	struct i915_vma *vma;
 
 	/* Currently render ring is the only HW context user */
 	if (engine->id != RCS || !error->ccid)
 		return;
 
-	list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) {
-		if (!i915_gem_obj_ggtt_bound(obj))
-			continue;
-
-		if ((error->ccid & PAGE_MASK) == i915_gem_obj_ggtt_offset(obj)) {
-			ering->ctx = i915_error_ggtt_object_create(dev_priv, obj);
+	list_for_each_entry(vma, &dev_priv->ggtt.base.active_list, vm_link) {
+		if ((error->ccid & PAGE_MASK) == vma->node.start) {
+			ering->ctx = i915_error_object_create(dev_priv,
+							      vma->obj,
+							      vma->vm);
 			break;
 		}
 	}
-- 
2.8.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 58+ messages in thread

* [PATCH 34/38] drm/i915: Move setting of request->batch into its single callsite
  2016-06-03 16:55 Tracking VMA Chris Wilson
                   ` (32 preceding siblings ...)
  2016-06-03 16:55 ` [PATCH 33/38] drm/i915: Scan GGTT active list for context object Chris Wilson
@ 2016-06-03 16:55 ` Chris Wilson
  2016-06-03 16:55 ` [PATCH 35/38] drm/i915: Mark unmappable GGTT entries as PIN_HIGH Chris Wilson
                   ` (4 subsequent siblings)
  38 siblings, 0 replies; 58+ messages in thread
From: Chris Wilson @ 2016-06-03 16:55 UTC (permalink / raw)
  To: intel-gfx

request->batch_obj is only set by execbuffer for the convenience of
debugging hangs. By moving that operation to the callsite, we can
simplify all other callers and future patches. We also move the
complications of reference handling of the request->batch_obj next to
where the active tracking is set up for the request.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_gem_execbuffer.c | 10 +++++++++-
 drivers/gpu/drm/i915/i915_gem_request.c    | 12 +-----------
 drivers/gpu/drm/i915/i915_gem_request.h    |  8 +++-----
 3 files changed, 13 insertions(+), 17 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 224265619f00..b89e9d2b33c4 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -1656,6 +1656,14 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
 		goto err_batch_unpin;
 	}
 
+	/* Whilst this request exists, batch_obj will be on the
+	 * active_list, and so will hold the active reference. Only when this
+	 * request is retired will the the batch_obj be moved onto the
+	 * inactive_list and lose its active reference. Hence we do not need
+	 * to explicitly hold another reference here.
+	 */
+	params->request->batch_obj = params->batch_vma->obj;
+
 	ret = i915_gem_request_add_to_client(params->request, file);
 	if (ret)
 		goto err_request;
@@ -1674,7 +1682,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
 
 	ret = execbuf_submit(params, args, &eb->vmas);
 err_request:
-	__i915_add_request(params->request, params->batch_vma->obj, ret == 0);
+	__i915_add_request(params->request, ret == 0);
 
 err_batch_unpin:
 	/*
diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c
index 8029b37c9eee..8101d9169027 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.c
+++ b/drivers/gpu/drm/i915/i915_gem_request.c
@@ -402,9 +402,7 @@ static void i915_gem_mark_busy(struct drm_i915_private *dev_priv,
  * request is not being tracked for completion but the work itself is
  * going to happen on the hardware. This would be a Bad Thing(tm).
  */
-void __i915_add_request(struct drm_i915_gem_request *request,
-			struct drm_i915_gem_object *obj,
-			bool flush_caches)
+void __i915_add_request(struct drm_i915_gem_request *request, bool flush_caches)
 {
 	struct intel_engine_cs *engine;
 	struct intel_ring *ring;
@@ -447,14 +445,6 @@ void __i915_add_request(struct drm_i915_gem_request *request,
 	trace_i915_gem_request_add(request);
 	request->head = request_start;
 
-	/* Whilst this request exists, batch_obj will be on the
-	 * active_list, and so will hold the active reference. Only when this
-	 * request is retired will the the batch_obj be moved onto the
-	 * inactive_list and lose its active reference. Hence we do not need
-	 * to explicitly hold another reference here.
-	 */
-	request->batch_obj = obj;
-
 	/* Seal the request and mark it as pending execution. Note that
 	 * we may inspect this state, without holding any locks, during
 	 * hangcheck. Hence we apply the barrier to ensure that we do not
diff --git a/drivers/gpu/drm/i915/i915_gem_request.h b/drivers/gpu/drm/i915/i915_gem_request.h
index 8d1225999fae..87e055267904 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.h
+++ b/drivers/gpu/drm/i915/i915_gem_request.h
@@ -213,13 +213,11 @@ static inline void i915_gem_request_assign(struct drm_i915_gem_request **pdst,
 	*pdst = src;
 }
 
-void __i915_add_request(struct drm_i915_gem_request *req,
-			struct drm_i915_gem_object *batch_obj,
-			bool flush_caches);
+void __i915_add_request(struct drm_i915_gem_request *req, bool flush_caches);
 #define i915_add_request(req) \
-	__i915_add_request(req, NULL, true)
+	__i915_add_request(req, true)
 #define i915_add_request_no_flush(req) \
-	__i915_add_request(req, NULL, false)
+	__i915_add_request(req, false)
 
 struct intel_rps_client;
 #define NO_WAITBOOST ERR_PTR(-1)
-- 
2.8.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 58+ messages in thread

* [PATCH 35/38] drm/i915: Mark unmappable GGTT entries as PIN_HIGH
  2016-06-03 16:55 Tracking VMA Chris Wilson
                   ` (33 preceding siblings ...)
  2016-06-03 16:55 ` [PATCH 34/38] drm/i915: Move setting of request->batch into its single callsite Chris Wilson
@ 2016-06-03 16:55 ` Chris Wilson
  2016-06-03 16:55 ` [PATCH 36/38] drm/i915: Track pinned vma inside guc Chris Wilson
                   ` (3 subsequent siblings)
  38 siblings, 0 replies; 58+ messages in thread
From: Chris Wilson @ 2016-06-03 16:55 UTC (permalink / raw)
  To: intel-gfx

We allocate a few objects into the GGTT that we never need to access via
the mappable aperture (such as contexts, status pages). We can request
that these are bound high in the VM to increase the amount of mappable
aperture available. However, anything that may be frequently pinned
(such as logical contexts) we want to use the fast search & insert.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/intel_ringbuffer.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index f172ac6a06dc..c8211913f2d6 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -2008,7 +2008,7 @@ static int intel_ring_context_pin(struct i915_gem_context *ctx,
 
 	if (ce->state) {
 		ret = i915_gem_object_ggtt_pin(ce->state, NULL, 0,
-					       ctx->ggtt_alignment, 0);
+					       ctx->ggtt_alignment, PIN_HIGH);
 		if (ret)
 			goto error;
 	}
@@ -2578,7 +2578,8 @@ int intel_init_render_ring_buffer(struct drm_device *dev)
 			} else {
 				i915_gem_object_set_cache_level(obj, I915_CACHE_LLC);
 				ret = i915_gem_object_ggtt_pin(obj, NULL,
-							       0, 0, 0);
+							       0, 0,
+							       PIN_HIGH);
 				if (ret != 0) {
 					i915_gem_object_put(obj);
 					DRM_ERROR("Failed to pin semaphore bo. Disabling semaphores\n");
-- 
2.8.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 58+ messages in thread

* [PATCH 36/38] drm/i915: Track pinned vma inside guc
  2016-06-03 16:55 Tracking VMA Chris Wilson
                   ` (34 preceding siblings ...)
  2016-06-03 16:55 ` [PATCH 35/38] drm/i915: Mark unmappable GGTT entries as PIN_HIGH Chris Wilson
@ 2016-06-03 16:55 ` Chris Wilson
  2016-06-03 16:55 ` [PATCH 37/38] drm/i915: Track pinned VMA Chris Wilson
                   ` (2 subsequent siblings)
  38 siblings, 0 replies; 58+ messages in thread
From: Chris Wilson @ 2016-06-03 16:55 UTC (permalink / raw)
  To: intel-gfx

Since the guc allocates and pins and object into the GGTT for its usage,
it is more natural to use that pinned VMA as our resource cookie.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_debugfs.c        |  10 +--
 drivers/gpu/drm/i915/i915_guc_submission.c | 131 ++++++++++++++---------------
 drivers/gpu/drm/i915/intel_guc.h           |   9 +-
 drivers/gpu/drm/i915/intel_guc_loader.c    |   7 +-
 4 files changed, 73 insertions(+), 84 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index 9154919fdd56..485fc23893d6 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -2601,15 +2601,15 @@ static int i915_guc_log_dump(struct seq_file *m, void *data)
 	struct drm_info_node *node = m->private;
 	struct drm_device *dev = node->minor->dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
-	struct drm_i915_gem_object *log_obj = dev_priv->guc.log_obj;
-	u32 *log;
+	struct drm_i915_gem_object *obj;
 	int i = 0, pg;
 
-	if (!log_obj)
+	if (dev_priv->guc.log == NULL)
 		return 0;
 
-	for (pg = 0; pg < log_obj->base.size / PAGE_SIZE; pg++) {
-		log = kmap_atomic(i915_gem_object_get_page(log_obj, pg));
+	obj = dev_priv->guc.log->obj;
+	for (pg = 0; pg < obj->base.size / PAGE_SIZE; pg++) {
+		u32 *log = kmap_atomic(i915_gem_object_get_page(obj, pg));
 
 		for (i = 0; i < PAGE_SIZE / sizeof(u32); i += 4)
 			seq_printf(m, "0x%08x 0x%08x 0x%08x 0x%08x\n",
diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c
index 63ef34c78494..1c92c4c6b0e1 100644
--- a/drivers/gpu/drm/i915/i915_guc_submission.c
+++ b/drivers/gpu/drm/i915/i915_guc_submission.c
@@ -357,8 +357,8 @@ static void guc_init_proc_desc(struct intel_guc *guc,
 static void guc_init_ctx_desc(struct intel_guc *guc,
 			      struct i915_guc_client *client)
 {
-	struct drm_i915_gem_object *client_obj = client->client_obj;
 	struct drm_i915_private *dev_priv = guc_to_i915(guc);
+	struct drm_i915_gem_object *client_obj = client->client->obj;
 	struct intel_engine_cs *engine;
 	struct i915_gem_context *ctx = client->owner;
 	struct guc_context_desc desc;
@@ -412,7 +412,7 @@ static void guc_init_ctx_desc(struct intel_guc *guc,
 	 * The doorbell, process descriptor, and workqueue are all parts
 	 * of the client object, which the GuC will reference via the GGTT
 	 */
-	gfx_addr = i915_gem_obj_ggtt_offset(client_obj);
+	gfx_addr = client->client->node.start;
 	desc.db_trigger_phy = sg_dma_address(client_obj->pages->sgl) +
 				client->doorbell_offset;
 	desc.db_trigger_cpu = (uintptr_t)client->client_base +
@@ -429,7 +429,7 @@ static void guc_init_ctx_desc(struct intel_guc *guc,
 	desc.desc_private = (uintptr_t)client;
 
 	/* Pool context is pinned already */
-	sg = guc->ctx_pool_obj->pages;
+	sg = guc->ctx_pool->obj->pages;
 	sg_pcopy_from_buffer(sg->sgl, sg->nents, &desc, sizeof(desc),
 			     sizeof(desc) * client->ctx_index);
 }
@@ -442,7 +442,7 @@ static void guc_fini_ctx_desc(struct intel_guc *guc,
 
 	memset(&desc, 0, sizeof(desc));
 
-	sg = guc->ctx_pool_obj->pages;
+	sg = guc->ctx_pool->obj->pages;
 	sg_pcopy_from_buffer(sg->sgl, sg->nents, &desc, sizeof(desc),
 			     sizeof(desc) * client->ctx_index);
 }
@@ -524,7 +524,7 @@ static void guc_add_workqueue_item(struct i915_guc_client *gc,
 	/* WQ starts from the page after doorbell / process_desc */
 	wq_page = (wq_off + GUC_DB_SIZE) >> PAGE_SHIFT;
 	wq_off &= PAGE_SIZE - 1;
-	base = kmap_atomic(i915_gem_object_get_page(gc->client_obj, wq_page));
+	base = kmap_atomic(i915_gem_object_get_page(gc->client->obj, wq_page));
 	wqi = (struct guc_wq_item *)((char *)base + wq_off);
 
 	/* Now fill in the 4-word work queue item */
@@ -588,8 +588,8 @@ void i915_guc_submit(struct drm_i915_gem_request *rq)
  */
 
 /**
- * gem_allocate_guc_obj() - Allocate gem object for GuC usage
- * @dev:	drm device
+ * guc_allocate_vma() - Allocate gem object for GuC usage
+ * @guc:	the guc
  * @size:	size of object
  *
  * This is a wrapper to create a gem obj. In order to use it inside GuC, the
@@ -598,46 +598,40 @@ void i915_guc_submit(struct drm_i915_gem_request *rq)
  *
  * Return:	A drm_i915_gem_object if successful, otherwise NULL.
  */
-static struct drm_i915_gem_object *gem_allocate_guc_obj(struct drm_device *dev,
-							u32 size)
+static struct i915_vma *guc_allocate_vma(struct intel_guc *guc, u32 size)
 {
-	struct drm_i915_private *dev_priv = dev->dev_private;
+	struct drm_i915_private *dev_priv = guc_to_i915(guc);
 	struct drm_i915_gem_object *obj;
+	int ret;
 
-	obj = i915_gem_object_create(dev, size);
+	obj = i915_gem_object_create(dev_priv->dev, size);
 	if (IS_ERR(obj))
-		return NULL;
+		return ERR_CAST(obj);
 
-	if (i915_gem_object_get_pages(obj)) {
+	ret = i915_gem_object_ggtt_pin(obj, NULL, 0, PAGE_SIZE,
+				       PIN_OFFSET_BIAS | GUC_WOPCM_TOP);
+	if (ret) {
 		i915_gem_object_put(obj);
-		return NULL;
-	}
-
-	if (i915_gem_object_ggtt_pin(obj, NULL, 0, PAGE_SIZE,
-				     PIN_OFFSET_BIAS | GUC_WOPCM_TOP)) {
-		i915_gem_object_put(obj);
-		return NULL;
+		return ERR_PTR(ret);
 	}
 
 	/* Invalidate GuC TLB to let GuC take the latest updates to GTT. */
 	I915_WRITE(GEN8_GTCR, GEN8_GTCR_INVALIDATE);
 
-	return obj;
+	return i915_gem_obj_to_ggtt(obj);
 }
 
 /**
- * gem_release_guc_obj() - Release gem object allocated for GuC usage
- * @obj:	gem obj to be released
+ * guc_release_vma() - Release gem object allocated for GuC usage
+ * @vma:	gem obj to be released
  */
-static void gem_release_guc_obj(struct drm_i915_gem_object *obj)
+static void guc_release_vma(struct i915_vma *vma)
 {
-	if (!obj)
+	if (vma == NULL)
 		return;
 
-	if (i915_gem_obj_is_pinned(obj))
-		i915_gem_object_ggtt_unpin(obj);
-
-	i915_gem_object_put(obj);
+	i915_vma_unpin(vma);
+	i915_gem_object_put(vma->obj);
 }
 
 static void guc_client_free(struct drm_device *dev,
@@ -671,7 +665,7 @@ static void guc_client_free(struct drm_device *dev,
 		kunmap(kmap_to_page(client->client_base));
 	}
 
-	gem_release_guc_obj(client->client_obj);
+	guc_release_vma(client->client);
 
 	if (client->ctx_index != GUC_INVALID_CTX_ID) {
 		guc_fini_ctx_desc(guc, client);
@@ -700,7 +694,7 @@ static struct i915_guc_client *guc_client_alloc(struct drm_device *dev,
 	struct i915_guc_client *client;
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct intel_guc *guc = &dev_priv->guc;
-	struct drm_i915_gem_object *obj;
+	struct i915_vma *vma;
 
 	client = kzalloc(sizeof(*client), GFP_KERNEL);
 	if (!client)
@@ -719,13 +713,13 @@ static struct i915_guc_client *guc_client_alloc(struct drm_device *dev,
 	}
 
 	/* The first page is doorbell/proc_desc. Two followed pages are wq. */
-	obj = gem_allocate_guc_obj(dev, GUC_DB_SIZE + GUC_WQ_SIZE);
-	if (!obj)
+	vma = guc_allocate_vma(guc, GUC_DB_SIZE + GUC_WQ_SIZE);
+	if (IS_ERR(vma))
 		goto err;
 
 	/* We'll keep just the first (doorbell/proc) page permanently kmap'd. */
-	client->client_obj = obj;
-	client->client_base = kmap(i915_gem_object_get_page(obj, 0));
+	client->client = vma;
+	client->client_base = kmap(i915_gem_object_get_page(vma->obj, 0));
 	client->wq_offset = GUC_DB_SIZE;
 	client->wq_size = GUC_WQ_SIZE;
 
@@ -769,8 +763,7 @@ err:
 
 static void guc_create_log(struct intel_guc *guc)
 {
-	struct drm_i915_private *dev_priv = guc_to_i915(guc);
-	struct drm_i915_gem_object *obj;
+	struct i915_vma *vma;
 	unsigned long offset;
 	uint32_t size, flags;
 
@@ -786,16 +779,16 @@ static void guc_create_log(struct intel_guc *guc)
 		GUC_LOG_ISR_PAGES + 1 +
 		GUC_LOG_CRASH_PAGES + 1) << PAGE_SHIFT;
 
-	obj = guc->log_obj;
-	if (!obj) {
-		obj = gem_allocate_guc_obj(dev_priv->dev, size);
-		if (!obj) {
+	vma = guc->log;
+	if (vma == NULL) {
+		vma = guc_allocate_vma(guc, size);
+		if (IS_ERR(vma)) {
 			/* logging will be off */
 			i915.guc_log_level = -1;
 			return;
 		}
 
-		guc->log_obj = obj;
+		guc->log = vma;
 	}
 
 	/* each allocated unit is a page */
@@ -804,7 +797,7 @@ static void guc_create_log(struct intel_guc *guc)
 		(GUC_LOG_ISR_PAGES << GUC_LOG_ISR_SHIFT) |
 		(GUC_LOG_CRASH_PAGES << GUC_LOG_CRASH_SHIFT);
 
-	offset = i915_gem_obj_ggtt_offset(obj) >> PAGE_SHIFT; /* in pages */
+	offset = vma->node.start >> PAGE_SHIFT; /* in pages */
 	guc->log_flags = (offset << GUC_LOG_BUF_ADDR_SHIFT) | flags;
 }
 
@@ -833,7 +826,7 @@ static void init_guc_policies(struct guc_policies *policies)
 static void guc_create_ads(struct intel_guc *guc)
 {
 	struct drm_i915_private *dev_priv = guc_to_i915(guc);
-	struct drm_i915_gem_object *obj;
+	struct i915_vma *vma;
 	struct guc_ads *ads;
 	struct guc_policies *policies;
 	struct guc_mmio_reg_state *reg_state;
@@ -846,16 +839,16 @@ static void guc_create_ads(struct intel_guc *guc)
 			sizeof(struct guc_mmio_reg_state) +
 			GUC_S3_SAVE_SPACE_PAGES * PAGE_SIZE;
 
-	obj = guc->ads_obj;
-	if (!obj) {
-		obj = gem_allocate_guc_obj(dev_priv->dev, PAGE_ALIGN(size));
-		if (!obj)
+	vma = guc->ads;
+	if (vma == NULL) {
+		vma = guc_allocate_vma(guc, PAGE_ALIGN(size));
+		if (IS_ERR(vma))
 			return;
 
-		guc->ads_obj = obj;
+		guc->ads = vma;
 	}
 
-	page = i915_gem_object_get_page(obj, 0);
+	page = i915_gem_object_get_page(vma->obj, 0);
 	ads = kmap(page);
 
 	/*
@@ -875,8 +868,7 @@ static void guc_create_ads(struct intel_guc *guc)
 	policies = (void *)ads + sizeof(struct guc_ads);
 	init_guc_policies(policies);
 
-	ads->scheduler_policies = i915_gem_obj_ggtt_offset(obj) +
-			sizeof(struct guc_ads);
+	ads->scheduler_policies = vma->node.start + sizeof(struct guc_ads);
 
 	/* MMIO reg state */
 	reg_state = (void *)policies + sizeof(struct guc_policies);
@@ -904,22 +896,22 @@ static void guc_create_ads(struct intel_guc *guc)
  */
 int i915_guc_submission_init(struct drm_device *dev)
 {
-	struct drm_i915_private *dev_priv = dev->dev_private;
-	const size_t ctxsize = sizeof(struct guc_context_desc);
-	const size_t poolsize = GUC_MAX_GPU_CONTEXTS * ctxsize;
-	const size_t gemsize = round_up(poolsize, PAGE_SIZE);
-	struct intel_guc *guc = &dev_priv->guc;
+	struct intel_guc *guc = &to_i915(dev)->guc;
+	struct i915_vma *vma;
+	u32 size;
 
 	if (!i915.enable_guc_submission)
 		return 0; /* not enabled  */
 
-	if (guc->ctx_pool_obj)
+	if (guc->ctx_pool)
 		return 0; /* already allocated */
 
-	guc->ctx_pool_obj = gem_allocate_guc_obj(dev_priv->dev, gemsize);
-	if (!guc->ctx_pool_obj)
-		return -ENOMEM;
+	size = PAGE_ALIGN(GUC_MAX_GPU_CONTEXTS*sizeof(struct guc_context_desc));
+	vma = guc_allocate_vma(guc, size);
+	if (IS_ERR(vma))
+		return PTR_ERR(vma);
 
+	guc->ctx_pool  = vma;
 	ida_init(&guc->ctx_ids);
 
 	guc_create_log(guc);
@@ -962,19 +954,18 @@ void i915_guc_submission_disable(struct drm_device *dev)
 
 void i915_guc_submission_fini(struct drm_device *dev)
 {
-	struct drm_i915_private *dev_priv = dev->dev_private;
-	struct intel_guc *guc = &dev_priv->guc;
+	struct intel_guc *guc = &to_i915(dev)->guc;
 
-	gem_release_guc_obj(dev_priv->guc.ads_obj);
-	guc->ads_obj = NULL;
+	guc_release_vma(guc->ads);
+	guc->ads = NULL;
 
-	gem_release_guc_obj(dev_priv->guc.log_obj);
-	guc->log_obj = NULL;
+	guc_release_vma(guc->log);
+	guc->log = NULL;
 
-	if (guc->ctx_pool_obj)
+	if (guc->ctx_pool)
 		ida_destroy(&guc->ctx_ids);
-	gem_release_guc_obj(guc->ctx_pool_obj);
-	guc->ctx_pool_obj = NULL;
+	guc_release_vma(guc->ctx_pool);
+	guc->ctx_pool = NULL;
 }
 
 /**
diff --git a/drivers/gpu/drm/i915/intel_guc.h b/drivers/gpu/drm/i915/intel_guc.h
index 7f9063385258..3b65500d4509 100644
--- a/drivers/gpu/drm/i915/intel_guc.h
+++ b/drivers/gpu/drm/i915/intel_guc.h
@@ -62,7 +62,7 @@ struct drm_i915_gem_request;
  *   retcode: errno from last guc_submit()
  */
 struct i915_guc_client {
-	struct drm_i915_gem_object *client_obj;
+	struct i915_vma *client;
 	void *client_base;		/* first page (only) of above	*/
 	struct i915_gem_context *owner;
 	struct intel_guc *guc;
@@ -124,11 +124,10 @@ struct intel_guc_fw {
 struct intel_guc {
 	struct intel_guc_fw guc_fw;
 	uint32_t log_flags;
-	struct drm_i915_gem_object *log_obj;
+	struct i915_vma *log;
 
-	struct drm_i915_gem_object *ads_obj;
-
-	struct drm_i915_gem_object *ctx_pool_obj;
+	struct i915_vma *ads;
+	struct i915_vma *ctx_pool;
 	struct ida ctx_ids;
 
 	struct i915_guc_client *execbuf_client;
diff --git a/drivers/gpu/drm/i915/intel_guc_loader.c b/drivers/gpu/drm/i915/intel_guc_loader.c
index be93b458968a..1ecf88fd0b10 100644
--- a/drivers/gpu/drm/i915/intel_guc_loader.c
+++ b/drivers/gpu/drm/i915/intel_guc_loader.c
@@ -179,16 +179,15 @@ static void set_guc_init_params(struct drm_i915_private *dev_priv)
 			i915.guc_log_level << GUC_LOG_VERBOSITY_SHIFT;
 	}
 
-	if (guc->ads_obj) {
-		u32 ads = (u32)i915_gem_obj_ggtt_offset(guc->ads_obj)
-				>> PAGE_SHIFT;
+	if (guc->ads) {
+		u32 ads = (u32)guc->ads->node.start >> PAGE_SHIFT;
 		params[GUC_CTL_DEBUG] |= ads << GUC_ADS_ADDR_SHIFT;
 		params[GUC_CTL_DEBUG] |= GUC_ADS_ENABLED;
 	}
 
 	/* If GuC submission is enabled, set up additional parameters here */
 	if (i915.enable_guc_submission) {
-		u32 pgs = i915_gem_obj_ggtt_offset(dev_priv->guc.ctx_pool_obj);
+		u32 pgs = dev_priv->guc.ctx_pool->node.start;
 		u32 ctx_in_16 = GUC_MAX_GPU_CONTEXTS / 16;
 
 		pgs >>= PAGE_SHIFT;
-- 
2.8.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 58+ messages in thread

* [PATCH 37/38] drm/i915: Track pinned VMA
  2016-06-03 16:55 Tracking VMA Chris Wilson
                   ` (35 preceding siblings ...)
  2016-06-03 16:55 ` [PATCH 36/38] drm/i915: Track pinned vma inside guc Chris Wilson
@ 2016-06-03 16:55 ` Chris Wilson
  2016-06-08 10:08   ` Daniel Vetter
  2016-06-03 16:55 ` [PATCH 38/38] drm/i915/overlay: Use VMA as the primary tracker for images Chris Wilson
  2016-06-06 10:42 ` ✗ Ro.CI.BAT: failure for series starting with [01/38] drm/i915: Combine loops within i915_gem_evict_something Patchwork
  38 siblings, 1 reply; 58+ messages in thread
From: Chris Wilson @ 2016-06-03 16:55 UTC (permalink / raw)
  To: intel-gfx

Treat the VMA as the primary struct responsible for tracking bindings
into the GPU's VM. That is we want to treat the VMA returned after we
pin an object into the VM as the cookie we hold and eventually release
when unpinning. Doing so eliminates the ambiguity in pinning the object
and then searching for the relevant pin later.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_debugfs.c          |  75 +++++-----
 drivers/gpu/drm/i915/i915_drv.h              |  64 +++------
 drivers/gpu/drm/i915/i915_gem.c              | 200 ++++++---------------------
 drivers/gpu/drm/i915/i915_gem_context.c      |  43 +++---
 drivers/gpu/drm/i915/i915_gem_execbuffer.c   |  60 ++++----
 drivers/gpu/drm/i915/i915_gem_fence.c        |  64 ++++-----
 drivers/gpu/drm/i915/i915_gem_gtt.c          |  58 +++++---
 drivers/gpu/drm/i915/i915_gem_gtt.h          |  14 --
 drivers/gpu/drm/i915/i915_gem_render_state.c |  31 ++---
 drivers/gpu/drm/i915/i915_gem_render_state.h |   2 +-
 drivers/gpu/drm/i915/i915_gem_request.c      |  10 +-
 drivers/gpu/drm/i915/i915_gem_request.h      |   2 +-
 drivers/gpu/drm/i915/i915_gem_stolen.c       |   2 +-
 drivers/gpu/drm/i915/i915_gem_tiling.c       |  42 +++---
 drivers/gpu/drm/i915/i915_gpu_error.c        |  55 +++-----
 drivers/gpu/drm/i915/i915_guc_submission.c   |  28 ++--
 drivers/gpu/drm/i915/intel_display.c         |  57 +++++---
 drivers/gpu/drm/i915/intel_drv.h             |   5 +-
 drivers/gpu/drm/i915/intel_fbc.c             |   2 +-
 drivers/gpu/drm/i915/intel_fbdev.c           |  19 ++-
 drivers/gpu/drm/i915/intel_guc_loader.c      |  29 ++--
 drivers/gpu/drm/i915/intel_lrc.c             | 113 ++++++++-------
 drivers/gpu/drm/i915/intel_overlay.c         |  44 +++---
 drivers/gpu/drm/i915/intel_ringbuffer.c      | 194 ++++++++++++++------------
 drivers/gpu/drm/i915/intel_ringbuffer.h      |  20 +--
 drivers/gpu/drm/i915/intel_sprite.c          |   8 +-
 26 files changed, 549 insertions(+), 692 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index 485fc23893d6..938a95df8a11 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -111,7 +111,7 @@ static char get_tiling_flag(struct drm_i915_gem_object *obj)
 
 static char get_global_flag(struct drm_i915_gem_object *obj)
 {
-	return i915_gem_obj_to_ggtt(obj) ? 'g' : ' ';
+	return i915_gem_object_to_ggtt(obj, NULL) ?  'g' : ' ';
 }
 
 static char get_pin_mapped_flag(struct drm_i915_gem_object *obj)
@@ -278,7 +278,7 @@ static int i915_gem_stolen_list_info(struct seq_file *m, void *data)
 	struct drm_device *dev = node->minor->dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct drm_i915_gem_object *obj;
-	u64 total_obj_size, total_gtt_size;
+	u64 total_obj_size;
 	LIST_HEAD(stolen);
 	int count, ret;
 
@@ -286,7 +286,7 @@ static int i915_gem_stolen_list_info(struct seq_file *m, void *data)
 	if (ret)
 		return ret;
 
-	total_obj_size = total_gtt_size = count = 0;
+	total_obj_size = count = 0;
 	list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) {
 		if (obj->stolen == NULL)
 			continue;
@@ -294,7 +294,6 @@ static int i915_gem_stolen_list_info(struct seq_file *m, void *data)
 		list_add(&obj->obj_exec_link, &stolen);
 
 		total_obj_size += obj->base.size;
-		total_gtt_size += i915_gem_obj_total_ggtt_size(obj);
 		count++;
 	}
 	list_for_each_entry(obj, &dev_priv->mm.unbound_list, global_list) {
@@ -317,8 +316,8 @@ static int i915_gem_stolen_list_info(struct seq_file *m, void *data)
 	}
 	mutex_unlock(&dev->struct_mutex);
 
-	seq_printf(m, "Total %d objects, %llu bytes, %llu GTT size\n",
-		   count, total_obj_size, total_gtt_size);
+	seq_printf(m, "Total %d objects, %llu bytes\n",
+		   count, total_obj_size);
 	return 0;
 }
 
@@ -327,7 +326,7 @@ static int i915_gem_stolen_list_info(struct seq_file *m, void *data)
 		size += i915_gem_obj_total_ggtt_size(obj); \
 		++count; \
 		if (obj->map_and_fenceable) { \
-			mappable_size += i915_gem_obj_ggtt_size(obj); \
+			mappable_size += obj->base.size; \
 			++mappable_count; \
 		} \
 	} \
@@ -451,10 +450,10 @@ static void print_context_stats(struct seq_file *m,
 
 #define count_vmas(list, member) do { \
 	list_for_each_entry(vma, list, member) { \
-		size += i915_gem_obj_total_ggtt_size(vma->obj); \
+		size += vma->size; \
 		++count; \
 		if (vma->obj->map_and_fenceable) { \
-			mappable_size += i915_gem_obj_ggtt_size(vma->obj); \
+			mappable_size += vma->size; \
 			++mappable_count; \
 		} \
 	} \
@@ -517,11 +516,11 @@ static int i915_gem_object_info(struct seq_file *m, void* data)
 	size = count = mappable_size = mappable_count = 0;
 	list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) {
 		if (obj->fault_mappable) {
-			size += i915_gem_obj_ggtt_size(obj);
+			size += obj->base.size;
 			++count;
 		}
 		if (obj->pin_display) {
-			mappable_size += i915_gem_obj_ggtt_size(obj);
+			mappable_size += obj->base.size;
 			++mappable_count;
 		}
 		if (obj->madv == I915_MADV_DONTNEED) {
@@ -589,30 +588,29 @@ static int i915_gem_gtt_info(struct seq_file *m, void *data)
 	uintptr_t list = (uintptr_t) node->info_ent->data;
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct drm_i915_gem_object *obj;
-	u64 total_obj_size, total_gtt_size;
+	u64 total_obj_size;
 	int count, ret;
 
 	ret = mutex_lock_interruptible(&dev->struct_mutex);
 	if (ret)
 		return ret;
 
-	total_obj_size = total_gtt_size = count = 0;
+	total_obj_size = count = 0;
 	list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) {
-		if (list == PINNED_LIST && !i915_gem_obj_is_pinned(obj))
+		if (list == PINNED_LIST && !obj->pin_display)
 			continue;
 
 		seq_puts(m, "   ");
 		describe_obj(m, obj);
 		seq_putc(m, '\n');
 		total_obj_size += obj->base.size;
-		total_gtt_size += i915_gem_obj_total_ggtt_size(obj);
 		count++;
 	}
 
 	mutex_unlock(&dev->struct_mutex);
 
-	seq_printf(m, "Total %d objects, %llu bytes, %llu GTT size\n",
-		   count, total_obj_size, total_gtt_size);
+	seq_printf(m, "Total %d objects, %llu bytes\n",
+		   count, total_obj_size);
 
 	return 0;
 }
@@ -2075,38 +2073,35 @@ static void i915_dump_lrc_obj(struct seq_file *m,
 			      struct i915_gem_context *ctx,
 			      struct intel_engine_cs *engine)
 {
-	struct drm_i915_gem_object *ctx_obj = ctx->engine[engine->id].state;
+	struct drm_i915_gem_object *obj = ctx->engine[engine->id].state;
+	struct i915_vma *vma = ctx->engine[engine->id].vma;
 	struct page *page;
-	uint32_t *reg_state;
 	int j;
-	unsigned long ggtt_offset = 0;
 
 	seq_printf(m, "CONTEXT: %s %u\n", engine->name, ctx->hw_id);
-
-	if (ctx_obj == NULL) {
-		seq_puts(m, "\tNot allocated\n");
-		return;
-	}
-
-	if (!i915_gem_obj_ggtt_bound(ctx_obj))
+	if (vma == NULL) {
 		seq_puts(m, "\tNot bound in GGTT\n");
-	else
-		ggtt_offset = i915_gem_obj_ggtt_offset(ctx_obj);
+	} else {
+		seq_printf(m, "\tBound in GGTT at %x\n",
+			   lower_32_bits(vma->node.start));
+	}
 
-	if (i915_gem_object_get_pages(ctx_obj)) {
-		seq_puts(m, "\tFailed to get pages for context object\n");
+	if (i915_gem_object_get_pages(obj)) {
+		seq_puts(m, "\tFailed to get pages for context object\n\n");
 		return;
 	}
 
-	page = i915_gem_object_get_page(ctx_obj, LRC_STATE_PN);
-	if (!WARN_ON(page == NULL)) {
-		reg_state = kmap_atomic(page);
-
+	page = i915_gem_object_get_page(obj, LRC_STATE_PN);
+	if (page != NULL) {
+		uint32_t *reg_state = kmap_atomic(page);
 		for (j = 0; j < 0x600 / sizeof(u32) / 4; j += 4) {
-			seq_printf(m, "\t[0x%08lx] 0x%08x 0x%08x 0x%08x 0x%08x\n",
-				   ggtt_offset + 4096 + (j * 4),
-				   reg_state[j], reg_state[j + 1],
-				   reg_state[j + 2], reg_state[j + 3]);
+			seq_printf(m,
+				   "\t[0x%08x] 0x%08x 0x%08x 0x%08x 0x%08x\n",
+				   j * 4,
+				   reg_state[j],
+				   reg_state[j + 1],
+				   reg_state[j + 2],
+				   reg_state[j + 3]);
 		}
 		kunmap_atomic(reg_state);
 	}
@@ -3210,7 +3205,7 @@ static int i915_semaphore_status(struct seq_file *m, void *unused)
 		struct page *page;
 		uint64_t *seqno;
 
-		page = i915_gem_object_get_page(dev_priv->semaphore_obj, 0);
+		page = i915_gem_object_get_page(dev_priv->semaphore_vma->obj, 0);
 
 		seqno = (uint64_t *)kmap_atomic(page);
 		for_each_engine_id(engine, dev_priv, id) {
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 77564f378771..ed968deb36aa 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -880,8 +880,8 @@ struct i915_gem_context {
 
 	struct intel_context {
 		struct drm_i915_gem_object *state;
+		struct i915_vma *vma;
 		struct intel_ring *ring;
-		struct i915_vma *lrc_vma;
 		uint32_t *lrc_reg_state;
 		u64 lrc_desc;
 		int pin_count;
@@ -1736,7 +1736,7 @@ struct drm_i915_private {
 	struct pci_dev *bridge_dev;
 	struct i915_gem_context *kernel_context;
 	struct intel_engine_cs engine[I915_NUM_ENGINES];
-	struct drm_i915_gem_object *semaphore_obj;
+	struct i915_vma *semaphore_vma;
 	uint32_t next_seqno;
 
 	struct drm_dma_handle *status_page_dmah;
@@ -2996,7 +2996,7 @@ static inline void i915_vma_unpin(struct i915_vma *vma)
 	__i915_vma_unpin(vma);
 }
 
-int __must_check
+struct i915_vma * __must_check
 i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj,
 			 const struct i915_ggtt_view *view,
 			 uint64_t size,
@@ -3174,12 +3174,11 @@ i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj,
 				  bool write);
 int __must_check
 i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write);
-int __must_check
+struct i915_vma * __must_check
 i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
 				     u32 alignment,
 				     const struct i915_ggtt_view *view);
-void i915_gem_object_unpin_from_display_plane(struct drm_i915_gem_object *obj,
-					      const struct i915_ggtt_view *view);
+void i915_gem_object_unpin_from_display_plane(struct i915_vma *vma);
 int i915_gem_object_attach_phys(struct drm_i915_gem_object *obj,
 				int align);
 int i915_gem_open(struct drm_device *dev, struct drm_file *file);
@@ -3200,63 +3199,34 @@ struct drm_gem_object *i915_gem_prime_import(struct drm_device *dev,
 struct dma_buf *i915_gem_prime_export(struct drm_device *dev,
 				struct drm_gem_object *gem_obj, int flags);
 
-u64 i915_gem_obj_ggtt_offset_view(struct drm_i915_gem_object *o,
-				  const struct i915_ggtt_view *view);
-u64 i915_gem_obj_offset(struct drm_i915_gem_object *o,
-			struct i915_address_space *vm);
-static inline u64
-i915_gem_obj_ggtt_offset(struct drm_i915_gem_object *o)
-{
-	return i915_gem_obj_ggtt_offset_view(o, &i915_ggtt_view_normal);
-}
-
-bool i915_gem_obj_ggtt_bound_view(struct drm_i915_gem_object *o,
-				  const struct i915_ggtt_view *view);
-bool i915_gem_obj_bound(struct drm_i915_gem_object *o,
-			struct i915_address_space *vm);
-
 struct i915_vma *
 i915_gem_obj_to_vma(struct drm_i915_gem_object *obj,
-		    struct i915_address_space *vm);
-struct i915_vma *
-i915_gem_obj_to_ggtt_view(struct drm_i915_gem_object *obj,
-			  const struct i915_ggtt_view *view);
+		     struct i915_address_space *vm,
+		     const struct i915_ggtt_view *view);
 
 struct i915_vma *
 i915_gem_obj_lookup_or_create_vma(struct drm_i915_gem_object *obj,
-				  struct i915_address_space *vm);
-struct i915_vma *
-i915_gem_obj_lookup_or_create_ggtt_vma(struct drm_i915_gem_object *obj,
-				       const struct i915_ggtt_view *view);
-
-static inline struct i915_vma *
-i915_gem_obj_to_ggtt(struct drm_i915_gem_object *obj)
-{
-	return i915_gem_obj_to_ggtt_view(obj, &i915_ggtt_view_normal);
-}
-bool i915_gem_obj_is_pinned(struct drm_i915_gem_object *obj);
+				  struct i915_address_space *vm,
+				  const struct i915_ggtt_view *view);
 
-/* Some GGTT VM helpers */
 static inline struct i915_hw_ppgtt *
 i915_vm_to_ppgtt(struct i915_address_space *vm)
 {
 	return container_of(vm, struct i915_hw_ppgtt, base);
 }
 
-static inline bool i915_gem_obj_ggtt_bound(struct drm_i915_gem_object *obj)
+static inline struct i915_vma *
+i915_gem_object_to_ggtt(struct drm_i915_gem_object *obj,
+			const struct i915_ggtt_view *view)
 {
-	return i915_gem_obj_ggtt_bound_view(obj, &i915_ggtt_view_normal);
+	return i915_gem_obj_to_vma(obj, &to_i915(obj->base.dev)->ggtt.base, view);
 }
 
-unsigned long
-i915_gem_obj_ggtt_size(struct drm_i915_gem_object *obj);
-
-void i915_gem_object_ggtt_unpin_view(struct drm_i915_gem_object *obj,
-				     const struct i915_ggtt_view *view);
-static inline void
-i915_gem_object_ggtt_unpin(struct drm_i915_gem_object *obj)
+static inline unsigned long
+i915_gem_object_ggtt_offset(struct drm_i915_gem_object *o,
+			    const struct i915_ggtt_view *view)
 {
-	i915_gem_object_ggtt_unpin_view(obj, &i915_ggtt_view_normal);
+	return i915_gem_object_to_ggtt(o, view)->node.start;
 }
 
 /* i915_gem_fence.c */
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index dad00800aeef..e0db9b02ee04 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -848,16 +848,18 @@ i915_gem_gtt_pwrite_fast(struct drm_device *dev,
 {
 	struct drm_i915_private *dev_priv = to_i915(dev);
 	struct i915_ggtt *ggtt = &dev_priv->ggtt;
+	struct i915_vma *vma;
 	ssize_t remain;
 	loff_t offset, page_base;
 	char __user *user_data;
 	int page_offset, page_length, ret;
 
-	ret = i915_gem_object_ggtt_pin(obj, NULL,
-				       0, 0,
+	vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, 
 				       PIN_MAPPABLE | PIN_NONBLOCK);
-	if (ret)
+	if (IS_ERR(vma)) {
+		ret = PTR_ERR(vma);
 		goto out;
+	}
 
 	ret = i915_gem_object_set_to_gtt_domain(obj, true);
 	if (ret)
@@ -870,7 +872,7 @@ i915_gem_gtt_pwrite_fast(struct drm_device *dev,
 	user_data = u64_to_user_ptr(args->data_ptr);
 	remain = args->size;
 
-	offset = i915_gem_obj_ggtt_offset(obj) + args->offset;
+	offset = vma->node.start + args->offset;
 
 	intel_fb_obj_invalidate(obj, ORIGIN_GTT);
 
@@ -905,7 +907,7 @@ i915_gem_gtt_pwrite_fast(struct drm_device *dev,
 out_flush:
 	intel_fb_obj_flush(obj, false, ORIGIN_GTT);
 out_unpin:
-	i915_gem_object_ggtt_unpin(obj);
+	i915_vma_unpin(vma);
 out:
 	return ret;
 }
@@ -1382,8 +1384,8 @@ int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 	struct drm_i915_gem_object *obj = to_intel_bo(vma->vm_private_data);
 	struct drm_device *dev = obj->base.dev;
 	struct drm_i915_private *dev_priv = to_i915(dev);
-	struct i915_ggtt *ggtt = &dev_priv->ggtt;
 	struct i915_ggtt_view view = i915_ggtt_view_normal;
+	struct i915_vma *ggtt;
 	pgoff_t page_offset;
 	unsigned long pfn;
 	int ret = 0;
@@ -1417,7 +1419,7 @@ int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 	}
 
 	/* Use a partial view if the object is bigger than the aperture. */
-	if (obj->base.size >= ggtt->mappable_end &&
+	if (obj->base.size >= dev_priv->ggtt.mappable_end &&
 	    obj->tiling_mode == I915_TILING_NONE) {
 		static const unsigned int chunk_size = 256; // 1 MiB
 
@@ -1432,9 +1434,11 @@ int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 	}
 
 	/* Now pin it into the GTT if needed */
-	ret = i915_gem_object_ggtt_pin(obj, &view, 0, 0, PIN_MAPPABLE);
-	if (ret)
+	ggtt = i915_gem_object_ggtt_pin(obj, &view, 0, 0, PIN_MAPPABLE);
+	if (IS_ERR(ggtt)) {
+		ret = PTR_ERR(ggtt);
 		goto err_unlock;
+	}
 
 	ret = i915_gem_object_set_to_gtt_domain(obj, write);
 	if (ret)
@@ -1445,8 +1449,7 @@ int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 		goto err_unpin;
 
 	/* Finally, remap it using the new GTT offset */
-	pfn = ggtt->mappable_base +
-		i915_gem_obj_ggtt_offset_view(obj, &view);
+	pfn = dev_priv->ggtt.mappable_base + ggtt->node.start;
 	pfn >>= PAGE_SHIFT;
 
 	if (unlikely(view.type == I915_GGTT_VIEW_PARTIAL)) {
@@ -1488,7 +1491,7 @@ int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 					    pfn + page_offset);
 	}
 err_unpin:
-	i915_gem_object_ggtt_unpin_view(obj, &view);
+	__i915_vma_unpin(ggtt);
 err_unlock:
 	mutex_unlock(&dev->struct_mutex);
 err_rpm:
@@ -2925,7 +2928,7 @@ i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write)
 					    old_write_domain);
 
 	/* And bump the LRU for this access */
-	vma = i915_gem_obj_to_ggtt(obj);
+	vma = i915_gem_object_to_ggtt(obj, NULL);
 	if (vma &&
 	    drm_mm_node_allocated(&vma->node) &&
 	    !i915_vma_is_active(vma))
@@ -3149,11 +3152,12 @@ rpm_put:
  * Can be called from an uninterruptible phase (modesetting) and allows
  * any flushes to be pipelined (for pageflips).
  */
-int
+struct i915_vma *
 i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
 				     u32 alignment,
 				     const struct i915_ggtt_view *view)
 {
+	struct i915_vma *vma;
 	u32 old_read_domains, old_write_domain;
 	int ret;
 
@@ -3173,19 +3177,23 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
 	 */
 	ret = i915_gem_object_set_cache_level(obj,
 					      HAS_WT(obj->base.dev) ? I915_CACHE_WT : I915_CACHE_NONE);
-	if (ret)
+	if (ret) {
+		vma = ERR_PTR(ret);
 		goto err_unpin_display;
+	}
 
 	/* As the user may map the buffer once pinned in the display plane
 	 * (e.g. libkms for the bootup splash), we have to ensure that we
 	 * always use map_and_fenceable for all scanout buffers.
 	 */
-	ret = i915_gem_object_ggtt_pin(obj, view, 0, alignment,
+	vma = i915_gem_object_ggtt_pin(obj, view, 0, alignment,
 				       view->type == I915_GGTT_VIEW_NORMAL ?
 				       PIN_MAPPABLE : 0);
-	if (ret)
+	if (IS_ERR(vma))
 		goto err_unpin_display;
 
+	WARN_ON(obj->pin_display > vma->pin_count);
+
 	i915_gem_object_flush_cpu_write_domain(obj);
 
 	old_write_domain = obj->base.write_domain;
@@ -3204,24 +3212,24 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
 	/* Increment the pages_pin_count to guard against the shrinker */
 	obj->pages_pin_count++;
 
-	return 0;
+	return vma;
 
 err_unpin_display:
 	obj->pin_display--;
-	return ret;
+	return vma;
 }
 
 void
-i915_gem_object_unpin_from_display_plane(struct drm_i915_gem_object *obj,
-					 const struct i915_ggtt_view *view)
+i915_gem_object_unpin_from_display_plane(struct i915_vma *vma)
 {
-	if (WARN_ON(obj->pin_display == 0))
+	if (WARN_ON(vma->obj->pin_display == 0))
 		return;
 
-	i915_gem_object_ggtt_unpin_view(obj, view);
+	vma->obj->pin_display--;
+	vma->obj->pages_pin_count--;
 
-	obj->pages_pin_count--;
-	obj->pin_display--;
+	i915_vma_unpin(vma);
+	WARN_ON(vma->obj->pin_display > vma->pin_count);
 }
 
 /**
@@ -3421,26 +3429,24 @@ err:
 	return ret;
 }
 
-int
+struct i915_vma *
 i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj,
-			 const struct i915_ggtt_view *view,
+			 const struct i915_ggtt_view *ggtt_view,
 			 uint64_t size,
 			 uint64_t alignment,
 			 uint64_t flags)
 {
+	struct i915_address_space *vm = &to_i915(obj->base.dev)->ggtt.base;
 	struct i915_vma *vma;
 	int ret;
 
-	if (view == NULL)
-		view = &i915_ggtt_view_normal;
-
-	vma = i915_gem_obj_lookup_or_create_ggtt_vma(obj, view);
+	vma = i915_gem_obj_lookup_or_create_vma(obj, vm, ggtt_view);
 	if (IS_ERR(vma))
-		return PTR_ERR(vma);
+		return vma;
 
 	if (i915_vma_misplaced(vma, size, alignment, flags)) {
 		if (flags & PIN_NONBLOCK && (vma->pin_count | vma->active))
-			return -ENOSPC;
+			return ERR_PTR(-ENOSPC);
 
 		WARN(vma->pin_count,
 		     "bo is already pinned in ggtt with incorrect alignment:"
@@ -3453,17 +3459,14 @@ i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj,
 		     obj->map_and_fenceable);
 		ret = i915_vma_unbind(vma);
 		if (ret)
-			return ret;
+			return ERR_PTR(ret);
 	}
 
-	return i915_vma_pin(vma, size, alignment, flags | PIN_GLOBAL);
-}
+	ret = i915_vma_pin(vma, size, alignment, flags | PIN_GLOBAL);
+	if (ret)
+		return ERR_PTR(ret);
 
-void
-i915_gem_object_ggtt_unpin_view(struct drm_i915_gem_object *obj,
-				const struct i915_ggtt_view *view)
-{
-	i915_vma_unpin(i915_gem_obj_to_ggtt_view(obj, view));
+	return vma;
 }
 
 static __always_inline unsigned
@@ -3799,31 +3802,6 @@ void i915_gem_free_object(struct drm_gem_object *gem_obj)
 	intel_runtime_pm_put(dev_priv);
 }
 
-struct i915_vma *i915_gem_obj_to_vma(struct drm_i915_gem_object *obj,
-				     struct i915_address_space *vm)
-{
-	struct i915_vma *vma;
-	list_for_each_entry(vma, &obj->vma_list, obj_link) {
-		if (vma->ggtt_view.type == I915_GGTT_VIEW_NORMAL &&
-		    vma->vm == vm)
-			return vma;
-	}
-	return NULL;
-}
-
-struct i915_vma *i915_gem_obj_to_ggtt_view(struct drm_i915_gem_object *obj,
-					   const struct i915_ggtt_view *view)
-{
-	struct i915_vma *vma;
-
-	GEM_BUG_ON(!view);
-
-	list_for_each_entry(vma, &obj->vma_list, obj_link)
-		if (vma->is_ggtt && i915_ggtt_view_equal(&vma->ggtt_view, view))
-			return vma;
-	return NULL;
-}
-
 int
 i915_gem_suspend(struct drm_device *dev)
 {
@@ -4321,96 +4299,6 @@ void i915_gem_track_fb(struct drm_i915_gem_object *old,
 	}
 }
 
-/* All the new VM stuff */
-u64 i915_gem_obj_offset(struct drm_i915_gem_object *o,
-			struct i915_address_space *vm)
-{
-	struct drm_i915_private *dev_priv = o->base.dev->dev_private;
-	struct i915_vma *vma;
-
-	WARN_ON(vm == &dev_priv->mm.aliasing_ppgtt->base);
-
-	list_for_each_entry(vma, &o->vma_list, obj_link) {
-		if (vma->is_ggtt &&
-		    vma->ggtt_view.type != I915_GGTT_VIEW_NORMAL)
-			continue;
-		if (vma->vm == vm)
-			return vma->node.start;
-	}
-
-	WARN(1, "%s vma for this object not found.\n",
-	     i915_is_ggtt(vm) ? "global" : "ppgtt");
-	return -1;
-}
-
-u64 i915_gem_obj_ggtt_offset_view(struct drm_i915_gem_object *o,
-				  const struct i915_ggtt_view *view)
-{
-	struct i915_vma *vma;
-
-	list_for_each_entry(vma, &o->vma_list, obj_link)
-		if (vma->is_ggtt && i915_ggtt_view_equal(&vma->ggtt_view, view))
-			return vma->node.start;
-
-	WARN(1, "global vma for this object not found. (view=%u)\n", view->type);
-	return -1;
-}
-
-bool i915_gem_obj_bound(struct drm_i915_gem_object *o,
-			struct i915_address_space *vm)
-{
-	struct i915_vma *vma;
-
-	list_for_each_entry(vma, &o->vma_list, obj_link) {
-		if (vma->is_ggtt &&
-		    vma->ggtt_view.type != I915_GGTT_VIEW_NORMAL)
-			continue;
-		if (vma->vm == vm && drm_mm_node_allocated(&vma->node))
-			return true;
-	}
-
-	return false;
-}
-
-bool i915_gem_obj_ggtt_bound_view(struct drm_i915_gem_object *o,
-				  const struct i915_ggtt_view *view)
-{
-	struct i915_vma *vma;
-
-	list_for_each_entry(vma, &o->vma_list, obj_link)
-		if (vma->is_ggtt &&
-		    i915_ggtt_view_equal(&vma->ggtt_view, view) &&
-		    drm_mm_node_allocated(&vma->node))
-			return true;
-
-	return false;
-}
-
-unsigned long i915_gem_obj_ggtt_size(struct drm_i915_gem_object *o)
-{
-	struct i915_vma *vma;
-
-	GEM_BUG_ON(list_empty(&o->vma_list));
-
-	list_for_each_entry(vma, &o->vma_list, obj_link) {
-		if (vma->is_ggtt &&
-		    vma->ggtt_view.type == I915_GGTT_VIEW_NORMAL)
-			return vma->node.size;
-	}
-
-	return 0;
-}
-
-bool i915_gem_obj_is_pinned(struct drm_i915_gem_object *obj)
-{
-	struct i915_vma *vma;
-	list_for_each_entry(vma, &obj->vma_list, obj_link)
-		if (i915_vma_is_pinned(vma))
-			return true;
-
-	return false;
-}
-
 /* Like i915_gem_object_get_page(), but mark the returned page dirty */
 struct page *
 i915_gem_object_get_dirty_page(struct drm_i915_gem_object *obj, int n)
diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
index c9b8c2c62828..0ed8a4a7321a 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -360,8 +360,8 @@ static void i915_gem_context_unpin(struct i915_gem_context *ctx,
 	} else {
 		struct intel_context *ce = &ctx->engine[engine->id];
 
-		if (ce->state)
-			i915_gem_object_ggtt_unpin(ce->state);
+		if (ce->vma)
+			i915_vma_unpin(ce->vma);
 
 		i915_gem_context_put(ctx);
 	}
@@ -580,9 +580,7 @@ mi_set_context(struct drm_i915_gem_request *req, u32 hw_flags)
 
 	intel_ring_emit(ring, MI_NOOP);
 	intel_ring_emit(ring, MI_SET_CONTEXT);
-	intel_ring_emit(ring,
-			i915_gem_obj_ggtt_offset(req->ctx->engine[RCS].state) |
-			flags);
+	intel_ring_emit(ring, req->ctx->engine[RCS].vma->node.start | flags);
 	/*
 	 * w/a: MI_SET_CONTEXT must always be followed by MI_NOOP
 	 * WaMiSetContext_Hang:snb,ivb,vlv
@@ -610,7 +608,7 @@ mi_set_context(struct drm_i915_gem_request *req, u32 hw_flags)
 					MI_STORE_REGISTER_MEM |
 					MI_SRM_LRM_GLOBAL_GTT);
 			intel_ring_emit_reg(ring, last_reg);
-			intel_ring_emit(ring, req->engine->scratch.gtt_offset);
+			intel_ring_emit(ring, req->engine->scratch->node.start);
 			intel_ring_emit(ring, MI_NOOP);
 		}
 		intel_ring_emit(ring, MI_ARB_ON_OFF | MI_ARB_ENABLE);
@@ -715,6 +713,7 @@ static int do_rcs_switch(struct drm_i915_gem_request *req)
 	struct intel_engine_cs *engine = req->engine;
 	struct i915_hw_ppgtt *ppgtt = to->ppgtt ?: req->i915->mm.aliasing_ppgtt;
 	struct i915_gem_context *from;
+	struct i915_vma *vma;
 	u32 hw_flags;
 	int ret, i;
 
@@ -722,10 +721,17 @@ static int do_rcs_switch(struct drm_i915_gem_request *req)
 		return 0;
 
 	/* Trying to pin first makes error handling easier. */
-	ret = i915_gem_object_ggtt_pin(to->engine[RCS].state, NULL, 0,
+	vma = i915_gem_object_ggtt_pin(to->engine[RCS].state, NULL, 0,
 				       to->ggtt_alignment, 0);
-	if (ret)
-		return ret;
+	if (IS_ERR(vma))
+		return PTR_ERR(vma);
+
+	to->engine[RCS].vma = vma;
+
+	if (WARN_ON(!(vma->bound & GLOBAL_BIND))) {
+		ret = -ENODEV;
+		goto unpin_vma;
+	}
 
 	/*
 	 * Pin can switch back to the default context if we end up calling into
@@ -746,7 +752,7 @@ static int do_rcs_switch(struct drm_i915_gem_request *req)
 	 */
 	ret = i915_gem_object_set_to_gtt_domain(to->engine[RCS].state, false);
 	if (ret)
-		goto unpin_out;
+		goto unpin_vma;
 
 	if (needs_pd_load_pre(ppgtt, engine, to)) {
 		/* Older GENs and non render rings still want the load first,
@@ -756,7 +762,7 @@ static int do_rcs_switch(struct drm_i915_gem_request *req)
 		trace_switch_mm(engine, to);
 		ret = ppgtt->switch_mm(ppgtt, req);
 		if (ret)
-			goto unpin_out;
+			goto unpin_vma;
 	}
 
 	if (!to->engine[RCS].initialised || i915_gem_context_is_default(to))
@@ -773,7 +779,7 @@ static int do_rcs_switch(struct drm_i915_gem_request *req)
 	if (to != from || (hw_flags & MI_FORCE_RESTORE)) {
 		ret = mi_set_context(req, hw_flags);
 		if (ret)
-			goto unpin_out;
+			goto unpin_vma;
 	}
 
 	/* The backing object for the context is done after switching to the
@@ -783,8 +789,6 @@ static int do_rcs_switch(struct drm_i915_gem_request *req)
 	 * MI_SET_CONTEXT instead of when the next seqno has completed.
 	 */
 	if (from != NULL) {
-		struct drm_i915_gem_object *obj = from->engine[RCS].state;
-
 		/* As long as MI_SET_CONTEXT is serializing, ie. it flushes the
 		 * whole damn pipeline, we don't need to explicitly mark the
 		 * object dirty. The only exception is that the context must be
@@ -792,11 +796,10 @@ static int do_rcs_switch(struct drm_i915_gem_request *req)
 		 * able to defer doing this until we know the object would be
 		 * swapped, but there is no way to do that yet.
 		 */
-		obj->base.read_domains = I915_GEM_DOMAIN_INSTRUCTION;
-		i915_vma_move_to_active(i915_gem_obj_to_ggtt(obj), req, 0);
-
+		i915_vma_move_to_active(from->engine[RCS].vma, req, 0);
 		/* obj is kept alive until the next request by its active ref */
-		i915_gem_object_ggtt_unpin(obj);
+		i915_vma_unpin(from->engine[RCS].vma);
+
 		i915_gem_context_put(from);
 	}
 	engine->last_context = i915_gem_context_get(to);
@@ -841,8 +844,8 @@ static int do_rcs_switch(struct drm_i915_gem_request *req)
 
 	return 0;
 
-unpin_out:
-	i915_gem_object_ggtt_unpin(to->engine[RCS].state);
+unpin_vma:
+	i915_vma_unpin(vma);
 	return ret;
 }
 
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index b89e9d2b33c4..a29c4b6fea28 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -174,8 +174,8 @@ eb_lookup_vmas(struct eb_vmas *eb,
 		 * from the (obj, vm) we don't run the risk of creating
 		 * duplicated vmas for the same vm.
 		 */
-		vma = i915_gem_obj_lookup_or_create_vma(obj, vm);
-		if (IS_ERR(vma)) {
+		vma = i915_gem_obj_lookup_or_create_vma(obj, vm, NULL);
+		if (unlikely(IS_ERR(vma))) {
 			DRM_DEBUG("Failed to lookup VMA\n");
 			ret = PTR_ERR(vma);
 			goto err;
@@ -343,30 +343,34 @@ relocate_entry_gtt(struct drm_i915_gem_object *obj,
 		   struct drm_i915_gem_relocation_entry *reloc,
 		   uint64_t target_offset)
 {
-	struct drm_device *dev = obj->base.dev;
-	struct drm_i915_private *dev_priv = to_i915(dev);
+	struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
 	struct i915_ggtt *ggtt = &dev_priv->ggtt;
+	struct i915_vma *vma;
 	uint64_t delta = relocation_target(reloc, target_offset);
 	uint64_t offset;
 	void __iomem *reloc_page;
 	int ret;
 
+	vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, PIN_MAPPABLE);
+	if (IS_ERR(vma))
+		return PTR_ERR(vma);
+
 	ret = i915_gem_object_set_to_gtt_domain(obj, true);
 	if (ret)
-		return ret;
+		goto unpin;
 
 	ret = i915_gem_object_put_fence(obj);
 	if (ret)
-		return ret;
+		goto unpin;
 
 	/* Map the page containing the relocation we're going to perform.  */
-	offset = i915_gem_obj_ggtt_offset(obj);
+	offset = vma->node.start;
 	offset += reloc->offset;
 	reloc_page = io_mapping_map_atomic_wc(ggtt->mappable,
 					      offset & PAGE_MASK);
 	iowrite32(lower_32_bits(delta), reloc_page + offset_in_page(offset));
 
-	if (INTEL_INFO(dev)->gen >= 8) {
+	if (INTEL_GEN(dev_priv) >= 8) {
 		offset += sizeof(uint32_t);
 
 		if (offset_in_page(offset) == 0) {
@@ -382,7 +386,9 @@ relocate_entry_gtt(struct drm_i915_gem_object *obj,
 
 	io_mapping_unmap_atomic(reloc_page);
 
-	return 0;
+unpin:
+	i915_vma_unpin(vma);
+	return ret;
 }
 
 static void
@@ -1236,7 +1242,7 @@ i915_reset_gen7_sol_offsets(struct drm_i915_gem_request *req)
 	return 0;
 }
 
-static struct i915_vma*
+static struct i915_vma *
 i915_gem_execbuffer_parse(struct intel_engine_cs *engine,
 			  struct drm_i915_gem_exec_object2 *shadow_exec_entry,
 			  struct drm_i915_gem_object *batch_obj,
@@ -1260,31 +1266,30 @@ i915_gem_execbuffer_parse(struct intel_engine_cs *engine,
 			      batch_start_offset,
 			      batch_len,
 			      is_master);
-	if (ret)
+	if (ret) {
+		if (ret == -EACCES) /* unhandled chained batch */
+			vma = NULL;
+		else
+			vma = ERR_PTR(ret);
 		goto err;
+	}
 
-	ret = i915_gem_object_ggtt_pin(shadow_batch_obj, NULL, 0, 0, 0);
-	if (ret)
+	vma = i915_gem_object_ggtt_pin(shadow_batch_obj, NULL, 0, 0, 0);
+	if (IS_ERR(vma)) {
+		ret = PTR_ERR(vma);
 		goto err;
-
-	i915_gem_object_unpin_pages(shadow_batch_obj);
+	}
 
 	memset(shadow_exec_entry, 0, sizeof(*shadow_exec_entry));
 
-	vma = i915_gem_obj_to_ggtt(shadow_batch_obj);
 	vma->exec_entry = shadow_exec_entry;
 	vma->exec_entry->flags = __EXEC_OBJECT_HAS_PIN;
 	i915_gem_object_get(shadow_batch_obj);
 	list_add_tail(&vma->exec_list, &eb->vmas);
 
-	return vma;
-
 err:
 	i915_gem_object_unpin_pages(shadow_batch_obj);
-	if (ret == -EACCES) /* unhandled chained batch */
-		return NULL;
-	else
-		return ERR_PTR(ret);
+	return vma;
 }
 
 static int
@@ -1631,6 +1636,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
 	 * hsw should have this fixed, but bdw mucks it up again. */
 	if (dispatch_flags & I915_DISPATCH_SECURE) {
 		struct drm_i915_gem_object *obj = params->batch_vma->obj;
+		struct i915_vma *vma;
 
 		/*
 		 * So on first glance it looks freaky that we pin the batch here
@@ -1642,11 +1648,13 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
 		 *   fitting due to fragmentation.
 		 * So this is actually safe.
 		 */
-		ret = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, 0);
-		if (ret)
+		vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, 0);
+		if (IS_ERR(vma)) {
+			ret = PTR_ERR(vma);
 			goto err;
+		}
 
-		params->batch_vma = i915_gem_obj_to_ggtt(obj);
+		params->batch_vma = vma;
 	}
 
 	/* Allocate a request for this batch buffer nice and early. */
@@ -1662,7 +1670,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
 	 * inactive_list and lose its active reference. Hence we do not need
 	 * to explicitly hold another reference here.
 	 */
-	params->request->batch_obj = params->batch_vma->obj;
+	params->request->batch = params->batch_vma;
 
 	ret = i915_gem_request_add_to_client(params->request, file);
 	if (ret)
diff --git a/drivers/gpu/drm/i915/i915_gem_fence.c b/drivers/gpu/drm/i915/i915_gem_fence.c
index ee91705734bc..187611eafa99 100644
--- a/drivers/gpu/drm/i915/i915_gem_fence.c
+++ b/drivers/gpu/drm/i915/i915_gem_fence.c
@@ -85,20 +85,14 @@ static void i965_write_fence_reg(struct drm_device *dev, int reg,
 	POSTING_READ(fence_reg_lo);
 
 	if (obj) {
-		u32 size = i915_gem_obj_ggtt_size(obj);
-		uint64_t val;
-
-		/* Adjust fence size to match tiled area */
-		if (obj->tiling_mode != I915_TILING_NONE) {
-			uint32_t row_size = obj->stride *
-				(obj->tiling_mode == I915_TILING_Y ? 32 : 8);
-			size = (size / row_size) * row_size;
-		}
-
-		val = (uint64_t)((i915_gem_obj_ggtt_offset(obj) + size - 4096) &
-				 0xfffff000) << 32;
-		val |= i915_gem_obj_ggtt_offset(obj) & 0xfffff000;
-		val |= (uint64_t)((obj->stride / 128) - 1) << fence_pitch_shift;
+		struct i915_vma *vma = i915_gem_object_to_ggtt(obj, NULL);
+		u32 row_size = obj->stride * (obj->tiling_mode == I915_TILING_Y  ? 32 : 8);
+		u32 size = (u32)vma->node.size / row_size * row_size;
+		u64 val;
+
+		val = ((vma->node.start + size - 4096) & 0xfffff000) << 32;
+		val |= vma->node.start & 0xfffff000;
+		val |= (u64)((obj->stride / 128) - 1) << fence_pitch_shift;
 		if (obj->tiling_mode == I915_TILING_Y)
 			val |= 1 << I965_FENCE_TILING_Y_SHIFT;
 		val |= I965_FENCE_REG_VALID;
@@ -121,15 +115,17 @@ static void i915_write_fence_reg(struct drm_device *dev, int reg,
 	u32 val;
 
 	if (obj) {
-		u32 size = i915_gem_obj_ggtt_size(obj);
+		struct i915_vma *vma = i915_gem_object_to_ggtt(obj, NULL);
 		int pitch_val;
 		int tile_width;
 
-		WARN((i915_gem_obj_ggtt_offset(obj) & ~I915_FENCE_START_MASK) ||
-		     (size & -size) != size ||
-		     (i915_gem_obj_ggtt_offset(obj) & (size - 1)),
-		     "object 0x%08llx [fenceable? %d] not 1M or pot-size (0x%08x) aligned\n",
-		     i915_gem_obj_ggtt_offset(obj), obj->map_and_fenceable, size);
+		WARN((vma->node.start & ~I915_FENCE_START_MASK) ||
+		     !is_power_of_2(vma->node.size) ||
+		     (vma->node.start & (vma->node.size - 1)),
+		     "object 0x%08lx [fenceable? %d] not 1M or pot-size (0x%08lx) aligned\n",
+		     (long)vma->node.start,
+		     obj->map_and_fenceable,
+		     (long)vma->node.size);
 
 		if (obj->tiling_mode == I915_TILING_Y && HAS_128_BYTE_Y_TILING(dev))
 			tile_width = 128;
@@ -140,10 +136,10 @@ static void i915_write_fence_reg(struct drm_device *dev, int reg,
 		pitch_val = obj->stride / tile_width;
 		pitch_val = ffs(pitch_val) - 1;
 
-		val = i915_gem_obj_ggtt_offset(obj);
+		val = vma->node.start;
 		if (obj->tiling_mode == I915_TILING_Y)
 			val |= 1 << I830_FENCE_TILING_Y_SHIFT;
-		val |= I915_FENCE_SIZE_BITS(size);
+		val |= I915_FENCE_SIZE_BITS(vma->node.size);
 		val |= pitch_val << I830_FENCE_PITCH_SHIFT;
 		val |= I830_FENCE_REG_VALID;
 	} else
@@ -160,22 +156,22 @@ static void i830_write_fence_reg(struct drm_device *dev, int reg,
 	uint32_t val;
 
 	if (obj) {
-		u32 size = i915_gem_obj_ggtt_size(obj);
+		struct i915_vma *vma = i915_gem_object_to_ggtt(obj, NULL);
 		uint32_t pitch_val;
 
-		WARN((i915_gem_obj_ggtt_offset(obj) & ~I830_FENCE_START_MASK) ||
-		     (size & -size) != size ||
-		     (i915_gem_obj_ggtt_offset(obj) & (size - 1)),
-		     "object 0x%08llx not 512K or pot-size 0x%08x aligned\n",
-		     i915_gem_obj_ggtt_offset(obj), size);
+		WARN((vma->node.start & ~I830_FENCE_START_MASK) ||
+		     !is_power_of_2(vma->node.size) ||
+		     (vma->node.start & (vma->node.size - 1)),
+		     "object 0x%08lx not 512K or pot-size 0x%08lx aligned\n",
+		     (long)vma->node.start, (long)vma->node.size);
 
 		pitch_val = obj->stride / 128;
 		pitch_val = ffs(pitch_val) - 1;
 
-		val = i915_gem_obj_ggtt_offset(obj);
+		val = vma->node.start;
 		if (obj->tiling_mode == I915_TILING_Y)
 			val |= 1 << I830_FENCE_TILING_Y_SHIFT;
-		val |= I830_FENCE_SIZE_BITS(size);
+		val |= I830_FENCE_SIZE_BITS(vma->node.size);
 		val |= pitch_val << I830_FENCE_PITCH_SHIFT;
 		val |= I830_FENCE_REG_VALID;
 	} else
@@ -426,13 +422,7 @@ bool
 i915_gem_object_pin_fence(struct drm_i915_gem_object *obj)
 {
 	if (obj->fence_reg != I915_FENCE_REG_NONE) {
-		struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
-		struct i915_vma *ggtt_vma = i915_gem_obj_to_ggtt(obj);
-
-		WARN_ON(!ggtt_vma ||
-			dev_priv->fence_regs[obj->fence_reg].pin_count >
-			ggtt_vma->pin_count);
-		dev_priv->fence_regs[obj->fence_reg].pin_count++;
+		to_i915(obj->base.dev)->fence_regs[obj->fence_reg].pin_count++;
 		return true;
 	} else
 		return false;
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index c7a77e0f18c2..775b5a4e8a5b 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -3325,14 +3325,10 @@ __i915_gem_vma_create(struct drm_i915_gem_object *obj,
 
 	GEM_BUG_ON(vm->closed);
 
-	if (WARN_ON(i915_is_ggtt(vm) != !!ggtt_view))
-		return ERR_PTR(-EINVAL);
-
 	vma = kmem_cache_zalloc(to_i915(obj->base.dev)->vmas, GFP_KERNEL);
 	if (vma == NULL)
 		return ERR_PTR(-ENOMEM);
 
-	INIT_LIST_HEAD(&vma->obj_link);
 	INIT_LIST_HEAD(&vma->exec_list);
 	for (i = 0; i < ARRAY_SIZE(vma->last_read); i++)
 		init_request_active(&vma->last_read[i], i915_vma_retire);
@@ -3342,49 +3338,69 @@ __i915_gem_vma_create(struct drm_i915_gem_object *obj,
 	vma->size = obj->base.size;
 	vma->is_ggtt = i915_is_ggtt(vm);
 
-	if (i915_is_ggtt(vm)) {
+	if (ggtt_view) {
 		vma->ggtt_view = *ggtt_view;
 		if (ggtt_view->type == I915_GGTT_VIEW_PARTIAL)
 			vma->size = ggtt_view->params.partial.size << PAGE_SHIFT;
 		else if (ggtt_view->type == I915_GGTT_VIEW_ROTATED)
 			vma->size = intel_rotation_info_size(&ggtt_view->params.rotated) << PAGE_SHIFT;
 	} else
+
+	if (!vma->is_ggtt)
 		i915_ppgtt_get(i915_vm_to_ppgtt(vm));
 
 	list_add_tail(&vma->obj_link, &obj->vma_list);
-
 	return vma;
 }
 
+static inline bool vma_matches(struct i915_vma *vma,
+			       struct i915_address_space *vm,
+			       const struct i915_ggtt_view *view)
+{
+	if (vma->vm != vm)
+		return false;
+
+	if (!vma->is_ggtt)
+		return true;
+
+	if (view == NULL)
+		return vma->ggtt_view.type == 0;
+
+	if (vma->ggtt_view.type != view->type)
+		return false;
+
+	return memcmp(&vma->ggtt_view.params,
+		      &view->params,
+		      sizeof(view->params)) == 0;
+}
+
 struct i915_vma *
-i915_gem_obj_lookup_or_create_vma(struct drm_i915_gem_object *obj,
-				  struct i915_address_space *vm)
+i915_gem_obj_to_vma(struct drm_i915_gem_object *obj,
+		    struct i915_address_space *vm,
+		    const struct i915_ggtt_view *view)
 {
 	struct i915_vma *vma;
 
-	vma = i915_gem_obj_to_vma(obj, vm);
-	if (!vma)
-		vma = __i915_gem_vma_create(obj, vm,
-					    i915_is_ggtt(vm) ? &i915_ggtt_view_normal : NULL);
+	list_for_each_entry_reverse(vma, &obj->vma_list, obj_link)
+		if (vma_matches(vma, vm, view))
+			return vma;
 
-	return vma;
+	return NULL;
 }
 
 struct i915_vma *
-i915_gem_obj_lookup_or_create_ggtt_vma(struct drm_i915_gem_object *obj,
-				       const struct i915_ggtt_view *view)
+i915_gem_obj_lookup_or_create_vma(struct drm_i915_gem_object *obj,
+				  struct i915_address_space *vm,
+				  const struct i915_ggtt_view *view)
 {
-	struct drm_device *dev = obj->base.dev;
-	struct drm_i915_private *dev_priv = to_i915(dev);
-	struct i915_ggtt *ggtt = &dev_priv->ggtt;
-	struct i915_vma *vma = i915_gem_obj_to_ggtt_view(obj, view);
+	struct i915_vma *vma;
 
+	vma = i915_gem_obj_to_vma(obj, vm, view);
 	if (!vma)
-		vma = __i915_gem_vma_create(obj, &ggtt->base, view);
+		vma = __i915_gem_vma_create(obj, vm, view);
 
 	GEM_BUG_ON(vma->closed);
 	return vma;
-
 }
 
 static struct scatterlist *
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h
index 5655358a60e1..5b28dc251e60 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.h
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.h
@@ -590,20 +590,6 @@ void i915_gem_restore_gtt_mappings(struct drm_device *dev);
 int __must_check i915_gem_gtt_prepare_object(struct drm_i915_gem_object *obj);
 void i915_gem_gtt_finish_object(struct drm_i915_gem_object *obj);
 
-static inline bool
-i915_ggtt_view_equal(const struct i915_ggtt_view *a,
-                     const struct i915_ggtt_view *b)
-{
-	if (WARN_ON(!a || !b))
-		return false;
-
-	if (a->type != b->type)
-		return false;
-	if (a->type != I915_GGTT_VIEW_NORMAL)
-		return !memcmp(&a->params, &b->params, sizeof(a->params));
-	return true;
-}
-
 /**
  * i915_vma_pin_iomap - calls ioremap_wc to map the GGTT VMA via the aperture
  * @vma: VMA to iomap
diff --git a/drivers/gpu/drm/i915/i915_gem_render_state.c b/drivers/gpu/drm/i915/i915_gem_render_state.c
index 4cf82697b3db..6e6eac43db19 100644
--- a/drivers/gpu/drm/i915/i915_gem_render_state.c
+++ b/drivers/gpu/drm/i915/i915_gem_render_state.c
@@ -31,7 +31,7 @@
 struct render_state {
 	const struct intel_renderstate_rodata *rodata;
 	struct drm_i915_gem_object *obj;
-	u64 ggtt_offset;
+	struct i915_vma *vma;
 	int gen;
 	u32 aux_batch_size;
 	u32 aux_batch_offset;
@@ -57,10 +57,9 @@ render_state_get_rodata(const int gen)
 static int render_state_init(struct render_state *so,
 			     struct drm_i915_private *dev_priv)
 {
-	int ret;
+	struct i915_vma *vma;
 
 	so->gen = INTEL_GEN(dev_priv);
-	so->ggtt_offset = 0;
 	so->rodata = render_state_get_rodata(so->gen);
 	if (so->rodata == NULL)
 		return 0;
@@ -72,16 +71,14 @@ static int render_state_init(struct render_state *so,
 	if (IS_ERR(so->obj))
 		return PTR_ERR(so->obj);
 
-	ret = i915_gem_object_ggtt_pin(so->obj, NULL, 0, 0, 0);
-	if (ret)
-		goto free_gem;
+	vma = i915_gem_object_ggtt_pin(so->obj, NULL, 0, 0, 0);
+	if (IS_ERR(vma)) {
+		i915_gem_object_put(so->obj);
+		return PTR_ERR(vma);
+	}
 
-	so->ggtt_offset = i915_gem_obj_ggtt_offset(so->obj);
+	so->vma = vma;
 	return 0;
-
-free_gem:
-	i915_gem_object_put(so->obj);
-	return ret;
 }
 
 /*
@@ -121,7 +118,7 @@ static int render_state_setup(struct render_state *so)
 		u32 s = rodata->batch[i];
 
 		if (i * 4  == rodata->reloc[reloc_index]) {
-			u64 r = s + so->ggtt_offset;
+			u64 r = s + so->vma->node.start,
 			s = lower_32_bits(r);
 			if (so->gen >= 8) {
 				if (i + 1 >= rodata->batch_items ||
@@ -176,7 +173,7 @@ err_out:
 
 static void render_state_fini(struct render_state *so)
 {
-	i915_gem_object_ggtt_unpin(so->obj);
+	i915_vma_unpin(so->vma);
 	i915_gem_object_put(so->obj);
 }
 
@@ -209,14 +206,14 @@ int i915_gem_render_state_init(struct drm_i915_gem_request *req)
 	struct render_state so;
 	int ret;
 
-	ret = render_state_prepare(req->engine, &so);
+	ret = render_state_prepare(req->engine, memset(&so, 0, sizeof(so)));
 	if (ret)
 		return ret;
 
 	if (so.rodata == NULL)
 		return 0;
 
-	ret = req->engine->emit_bb_start(req, so.ggtt_offset,
+	ret = req->engine->emit_bb_start(req, so.vma->node.start,
 					 so.rodata->batch_items * 4,
 					 I915_DISPATCH_SECURE);
 	if (ret)
@@ -224,7 +221,7 @@ int i915_gem_render_state_init(struct drm_i915_gem_request *req)
 
 	if (so.aux_batch_size > 8) {
 		ret = req->engine->emit_bb_start(req,
-						 (so.ggtt_offset +
+						 (so.vma->node.start +
 						  so.aux_batch_offset),
 						 so.aux_batch_size,
 						 I915_DISPATCH_SECURE);
@@ -232,7 +229,7 @@ int i915_gem_render_state_init(struct drm_i915_gem_request *req)
 			goto out;
 	}
 
-	i915_vma_move_to_active(i915_gem_obj_to_ggtt(so.obj), req, 0);
+	i915_vma_move_to_active(so.vma, req, 0);
 out:
 	render_state_fini(&so);
 	return ret;
diff --git a/drivers/gpu/drm/i915/i915_gem_render_state.h b/drivers/gpu/drm/i915/i915_gem_render_state.h
index c44fca8599bb..18cce3f06e9c 100644
--- a/drivers/gpu/drm/i915/i915_gem_render_state.h
+++ b/drivers/gpu/drm/i915/i915_gem_render_state.h
@@ -24,7 +24,7 @@
 #ifndef _I915_GEM_RENDER_STATE_H_
 #define _I915_GEM_RENDER_STATE_H_
 
-#include <linux/types.h>
+struct drm_i915_gem_request;
 
 int i915_gem_render_state_init(struct drm_i915_gem_request *req);
 
diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c
index 8101d9169027..5a3d81e5458b 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.c
+++ b/drivers/gpu/drm/i915/i915_gem_request.c
@@ -404,18 +404,12 @@ static void i915_gem_mark_busy(struct drm_i915_private *dev_priv,
  */
 void __i915_add_request(struct drm_i915_gem_request *request, bool flush_caches)
 {
-	struct intel_engine_cs *engine;
-	struct intel_ring *ring;
+	struct intel_engine_cs *engine = request->engine;
+	struct intel_ring *ring = request->ring;
 	u32 request_start;
 	u32 reserved_tail;
 	int ret;
 
-	if (WARN_ON(request == NULL))
-		return;
-
-	engine = request->engine;
-	ring = request->ring;
-
 	/*
 	 * To ensure that this call will not fail, space for its emissions
 	 * should already have been reserved in the ring buffer. Let the ring
diff --git a/drivers/gpu/drm/i915/i915_gem_request.h b/drivers/gpu/drm/i915/i915_gem_request.h
index 87e055267904..a8e228f5ceb4 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.h
+++ b/drivers/gpu/drm/i915/i915_gem_request.h
@@ -111,7 +111,7 @@ struct drm_i915_gem_request {
 
 	/** Batch buffer related to this request if any (used for
 	 * error state dump only) */
-	struct drm_i915_gem_object *batch_obj;
+	struct i915_vma *batch;
 	struct list_head active_list;
 
 	/** Time at which this request was emitted, in jiffies. */
diff --git a/drivers/gpu/drm/i915/i915_gem_stolen.c b/drivers/gpu/drm/i915/i915_gem_stolen.c
index a881c243fca2..415fa04d5232 100644
--- a/drivers/gpu/drm/i915/i915_gem_stolen.c
+++ b/drivers/gpu/drm/i915/i915_gem_stolen.c
@@ -683,7 +683,7 @@ i915_gem_object_create_stolen_for_preallocated(struct drm_device *dev,
 	if (gtt_offset == I915_GTT_OFFSET_NONE)
 		return obj;
 
-	vma = i915_gem_obj_lookup_or_create_vma(obj, &ggtt->base);
+	vma = i915_gem_obj_lookup_or_create_vma(obj, &ggtt->base, NULL);
 	if (IS_ERR(vma)) {
 		ret = PTR_ERR(vma);
 		goto err;
diff --git a/drivers/gpu/drm/i915/i915_gem_tiling.c b/drivers/gpu/drm/i915/i915_gem_tiling.c
index d6acd0a27c06..29fc4dfd1947 100644
--- a/drivers/gpu/drm/i915/i915_gem_tiling.c
+++ b/drivers/gpu/drm/i915/i915_gem_tiling.c
@@ -114,33 +114,44 @@ i915_tiling_ok(struct drm_device *dev, int stride, int size, int tiling_mode)
 }
 
 /* Is the current GTT allocation valid for the change in tiling? */
-static bool
+static int
 i915_gem_object_fence_ok(struct drm_i915_gem_object *obj, int tiling_mode)
 {
+	struct i915_vma *vma;
 	u32 size;
 
 	if (tiling_mode == I915_TILING_NONE)
-		return true;
+		return 0;
 
-	if (INTEL_INFO(obj->base.dev)->gen >= 4)
-		return true;
+	if (INTEL_GEN(obj->base.dev) >= 4)
+		return 0;
+
+	vma = i915_gem_object_to_ggtt(obj, NULL);
+	if (vma == NULL)
+		return 0;
+
+	if (!obj->map_and_fenceable)
+		return 0;
 
 	if (IS_GEN3(obj->base.dev)) {
-		if (i915_gem_obj_ggtt_offset(obj) & ~I915_FENCE_START_MASK)
-			return false;
+		if (vma->node.start & ~I915_FENCE_START_MASK)
+			goto bad;
 	} else {
-		if (i915_gem_obj_ggtt_offset(obj) & ~I830_FENCE_START_MASK)
-			return false;
+		if (vma->node.start & ~I830_FENCE_START_MASK)
+			goto bad;
 	}
 
 	size = i915_gem_get_gtt_size(obj->base.dev, obj->base.size, tiling_mode);
-	if (i915_gem_obj_ggtt_size(obj) != size)
-		return false;
+	if (vma->node.size < size)
+		goto bad;
 
-	if (i915_gem_obj_ggtt_offset(obj) & (size - 1))
-		return false;
+	if (vma->node.start & (size - 1))
+		goto bad;
 
-	return true;
+	return 0;
+
+bad:
+	return i915_vma_unbind(vma);
 }
 
 /**
@@ -227,10 +238,7 @@ i915_gem_set_tiling(struct drm_device *dev, void *data,
 		 * has to also include the unfenced register the GPU uses
 		 * whilst executing a fenced command for an untiled object.
 		 */
-		if (obj->map_and_fenceable &&
-		    !i915_gem_object_fence_ok(obj, args->tiling_mode))
-			ret = i915_vma_unbind(i915_gem_obj_to_ggtt(obj));
-
+		ret = i915_gem_object_fence_ok(obj, args->tiling_mode);
 		if (ret == 0) {
 			if (obj->pages &&
 			    obj->madv == I915_MADV_WILLNEED &&
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
index 367b8b2ce5f2..3e42705e2fa4 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -632,18 +632,21 @@ static void i915_error_state_free(struct kref *error_ref)
 
 static struct drm_i915_error_object *
 i915_error_object_create(struct drm_i915_private *dev_priv,
-			 struct drm_i915_gem_object *src,
-			 struct i915_address_space *vm)
+			 struct i915_vma *vma)
 {
 	struct i915_ggtt *ggtt = &dev_priv->ggtt;
+	struct drm_i915_gem_object *src;
 	struct drm_i915_error_object *dst;
-	struct i915_vma *vma = NULL;
 	int num_pages;
 	bool use_ggtt;
 	int i = 0;
 	u64 reloc_offset;
 
-	if (src == NULL || src->pages == NULL)
+	if (vma == NULL)
+		return NULL;
+
+	src = vma->obj;
+	if (src->pages == NULL)
 		return NULL;
 
 	num_pages = src->base.size >> PAGE_SHIFT;
@@ -652,26 +655,19 @@ i915_error_object_create(struct drm_i915_private *dev_priv,
 	if (dst == NULL)
 		return NULL;
 
-	if (i915_gem_obj_bound(src, vm))
-		dst->gtt_offset = i915_gem_obj_offset(src, vm);
-	else
-		dst->gtt_offset = -1;
-
-	reloc_offset = dst->gtt_offset;
-	if (i915_is_ggtt(vm))
-		vma = i915_gem_obj_to_ggtt(src);
+	reloc_offset = dst->gtt_offset = vma->node.start;
 	use_ggtt = (src->cache_level == I915_CACHE_NONE &&
-		   vma && (vma->bound & GLOBAL_BIND) &&
+		   (vma->bound & GLOBAL_BIND) &&
 		   reloc_offset + num_pages * PAGE_SIZE <= ggtt->mappable_end);
 
 	/* Cannot access stolen address directly, try to use the aperture */
 	if (src->stolen) {
 		use_ggtt = true;
 
-		if (!(vma && vma->bound & GLOBAL_BIND))
+		if (!(vma->bound & GLOBAL_BIND))
 			goto unwind;
 
-		reloc_offset = i915_gem_obj_ggtt_offset(src);
+		reloc_offset = vma->node.start;
 		if (reloc_offset + num_pages * PAGE_SIZE > ggtt->mappable_end)
 			goto unwind;
 	}
@@ -724,8 +720,6 @@ unwind:
 	kfree(dst);
 	return NULL;
 }
-#define i915_error_ggtt_object_create(dev_priv, src) \
-	i915_error_object_create((dev_priv), (src), &(dev_priv)->ggtt.base)
 
 /* The error capture is special as tries to run underneath the normal
  * locking rules - so we use the raw version of the i915_gem_active lookup.
@@ -851,10 +845,10 @@ static void gen8_record_semaphore_state(struct drm_i915_private *dev_priv,
 	if (!i915.semaphores)
 		return;
 
-	if (!error->semaphore_obj)
+	if (!error->semaphore_obj && dev_priv->semaphore_vma)
 		error->semaphore_obj =
-			i915_error_ggtt_object_create(dev_priv,
-						      dev_priv->semaphore_obj);
+			i915_error_object_create(dev_priv,
+						 dev_priv->semaphore_vma);
 
 	for_each_engine_id(to, dev_priv, id) {
 		int idx;
@@ -1042,9 +1036,7 @@ static void i915_gem_record_active_context(struct intel_engine_cs *engine,
 
 	list_for_each_entry(vma, &dev_priv->ggtt.base.active_list, vm_link) {
 		if ((error->ccid & PAGE_MASK) == vma->node.start) {
-			ering->ctx = i915_error_object_create(dev_priv,
-							      vma->obj,
-							      vma->vm);
+			ering->ctx = i915_error_object_create(dev_priv, vma);
 			break;
 		}
 	}
@@ -1086,13 +1078,12 @@ static void i915_gem_record_rings(struct drm_i915_private *dev_priv,
 			 */
 			error->ring[i].batchbuffer =
 				i915_error_object_create(dev_priv,
-							 request->batch_obj,
-							 vm);
+							 request->batch);
 
 			if (HAS_BROKEN_CS_TLB(dev_priv))
 				error->ring[i].wa_batchbuffer =
-					i915_error_ggtt_object_create(dev_priv,
-								      engine->scratch.obj);
+					i915_error_object_create(dev_priv,
+								 engine->scratch);
 
 			if (request->pid) {
 				struct task_struct *task;
@@ -1112,17 +1103,15 @@ static void i915_gem_record_rings(struct drm_i915_private *dev_priv,
 			error->ring[i].cpu_ring_head = ring->head;
 			error->ring[i].cpu_ring_tail = ring->tail;
 			error->ring[i].ringbuffer =
-				i915_error_ggtt_object_create(dev_priv,
-							      ring->obj);
+				i915_error_object_create(dev_priv, ring->vma);
 		}
 
 		error->ring[i].hws_page =
-			i915_error_ggtt_object_create(dev_priv,
-						      engine->status_page.obj);
+			i915_error_object_create(dev_priv,
+						 engine->status_page.vma);
 
 		error->ring[i].wa_ctx =
-			i915_error_ggtt_object_create(dev_priv,
-						      engine->wa_ctx.obj);
+			i915_error_object_create(dev_priv, engine->wa_ctx.vma);
 
 		i915_gem_record_active_context(engine, error, &error->ring[i]);
 
diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c
index 1c92c4c6b0e1..90db9a88fddc 100644
--- a/drivers/gpu/drm/i915/i915_guc_submission.c
+++ b/drivers/gpu/drm/i915/i915_guc_submission.c
@@ -375,7 +375,6 @@ static void guc_init_ctx_desc(struct intel_guc *guc,
 	for_each_engine(engine, dev_priv) {
 		struct intel_context *ce = &ctx->engine[engine->id];
 		struct guc_execlist_context *lrc = &desc.lrc[engine->guc_id];
-		struct drm_i915_gem_object *obj;
 
 		/* TODO: We have a design issue to be solved here. Only when we
 		 * receive the first batch, we know which engine is used by the
@@ -384,23 +383,20 @@ static void guc_init_ctx_desc(struct intel_guc *guc,
 		 * for now who owns a GuC client. But for future owner of GuC
 		 * client, need to make sure lrc is pinned prior to enter here.
 		 */
-		if (!ce->state)
+		if (!ce->vma)
 			break;	/* XXX: continue? */
 
 		lrc->context_desc = lower_32_bits(ce->lrc_desc);
 
 		/* The state page is after PPHWSP */
-		gfx_addr = i915_gem_obj_ggtt_offset(ce->state);
+		gfx_addr = ce->vma->node.start;
 		lrc->ring_lcra = gfx_addr + LRC_STATE_PN * PAGE_SIZE;
 		lrc->context_id = (client->ctx_index << GUC_ELC_CTXID_OFFSET) |
 				(engine->guc_id << GUC_ELC_ENGINE_OFFSET);
 
-		obj = ce->ring->obj;
-		gfx_addr = i915_gem_obj_ggtt_offset(obj);
-
-		lrc->ring_begin = gfx_addr;
-		lrc->ring_end = gfx_addr + obj->base.size - 1;
-		lrc->ring_next_free_location = gfx_addr;
+		lrc->ring_begin = ce->ring->vma->node.start;
+		lrc->ring_end = gfx_addr + ce->ring->vma->node.size - 1;
+		lrc->ring_next_free_location = lrc->ring_begin;
 		lrc->ring_current_tail_pointer_value = 0;
 
 		desc.engines_used |= (1 << engine->guc_id);
@@ -602,23 +598,23 @@ static struct i915_vma *guc_allocate_vma(struct intel_guc *guc, u32 size)
 {
 	struct drm_i915_private *dev_priv = guc_to_i915(guc);
 	struct drm_i915_gem_object *obj;
-	int ret;
+	struct i915_vma *vma;
 
 	obj = i915_gem_object_create(dev_priv->dev, size);
 	if (IS_ERR(obj))
 		return ERR_CAST(obj);
 
-	ret = i915_gem_object_ggtt_pin(obj, NULL, 0, PAGE_SIZE,
+	vma = i915_gem_object_ggtt_pin(obj, NULL, 0, PAGE_SIZE,
 				       PIN_OFFSET_BIAS | GUC_WOPCM_TOP);
-	if (ret) {
+	if (IS_ERR(vma)) {
 		i915_gem_object_put(obj);
-		return ERR_PTR(ret);
+		return vma;
 	}
 
 	/* Invalidate GuC TLB to let GuC take the latest updates to GTT. */
 	I915_WRITE(GEN8_GTCR, GEN8_GTCR_INVALIDATE);
 
-	return i915_gem_obj_to_ggtt(obj);
+	return vma;
 }
 
 /**
@@ -988,7 +984,7 @@ int intel_guc_suspend(struct drm_device *dev)
 	/* any value greater than GUC_POWER_D0 */
 	data[1] = GUC_POWER_D1;
 	/* first page is shared data with GuC */
-	data[2] = i915_gem_obj_ggtt_offset(ctx->engine[RCS].state);
+	data[2] = ctx->engine[RCS].vma->node.start;
 
 	return host2guc_action(guc, data, ARRAY_SIZE(data));
 }
@@ -1013,7 +1009,7 @@ int intel_guc_resume(struct drm_device *dev)
 	data[0] = HOST2GUC_ACTION_EXIT_S_STATE;
 	data[1] = GUC_POWER_D0;
 	/* first page is shared data with GuC */
-	data[2] = i915_gem_obj_ggtt_offset(ctx->engine[RCS].state);
+	data[2] = ctx->engine[RCS].vma->node.start;
 
 	return host2guc_action(guc, data, ARRAY_SIZE(data));
 }
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index 0cfaace38370..cc6f7a49bf58 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -2240,14 +2240,14 @@ static unsigned int intel_surf_alignment(const struct drm_i915_private *dev_priv
 	}
 }
 
-int
-intel_pin_and_fence_fb_obj(struct drm_framebuffer *fb,
-			   unsigned int rotation)
+struct i915_vma *
+intel_pin_and_fence_fb_obj(struct drm_framebuffer *fb, unsigned int rotation)
 {
 	struct drm_device *dev = fb->dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct drm_i915_gem_object *obj = intel_fb_obj(fb);
 	struct i915_ggtt_view view;
+	struct i915_vma *vma;
 	u32 alignment;
 	int ret;
 
@@ -2274,10 +2274,11 @@ intel_pin_and_fence_fb_obj(struct drm_framebuffer *fb,
 	 */
 	intel_runtime_pm_get(dev_priv);
 
-	ret = i915_gem_object_pin_to_display_plane(obj, alignment,
-						   &view);
-	if (ret)
+	vma = i915_gem_object_pin_to_display_plane(obj, alignment, &view);
+	if (IS_ERR(vma)) {
+		ret = PTR_ERR(vma);
 		goto err_pm;
+	}
 
 	/* Install a fence for tiled scan-out. Pre-i965 always needs a
 	 * fence, whereas 965+ only requires a fence if using
@@ -2304,19 +2305,20 @@ intel_pin_and_fence_fb_obj(struct drm_framebuffer *fb,
 	}
 
 	intel_runtime_pm_put(dev_priv);
-	return 0;
+	return vma;
 
 err_unpin:
-	i915_gem_object_unpin_from_display_plane(obj, &view);
+	i915_gem_object_unpin_from_display_plane(vma);
 err_pm:
 	intel_runtime_pm_put(dev_priv);
-	return ret;
+	return ERR_PTR(ret);
 }
 
 void intel_unpin_fb_obj(struct drm_framebuffer *fb, unsigned int rotation)
 {
 	struct drm_i915_gem_object *obj = intel_fb_obj(fb);
 	struct i915_ggtt_view view;
+	struct i915_vma *vma;
 
 	WARN_ON(!mutex_is_locked(&obj->base.dev->struct_mutex));
 
@@ -2325,7 +2327,8 @@ void intel_unpin_fb_obj(struct drm_framebuffer *fb, unsigned int rotation)
 	if (view.type == I915_GGTT_VIEW_NORMAL)
 		i915_gem_object_unpin_fence(obj);
 
-	i915_gem_object_unpin_from_display_plane(obj, &view);
+	vma = i915_gem_object_to_ggtt(obj, &view);
+	i915_gem_object_unpin_from_display_plane(vma);
 }
 
 /*
@@ -2587,7 +2590,7 @@ intel_find_initial_plane_obj(struct intel_crtc *intel_crtc,
 			continue;
 
 		obj = intel_fb_obj(fb);
-		if (i915_gem_obj_ggtt_offset(obj) == plane_config->base) {
+		if (i915_gem_object_ggtt_offset(obj, NULL) == plane_config->base) {
 			drm_framebuffer_reference(fb);
 			goto valid_fb;
 		}
@@ -2745,11 +2748,11 @@ static void i9xx_update_primary_plane(struct drm_plane *primary,
 	I915_WRITE(DSPSTRIDE(plane), fb->pitches[0]);
 	if (INTEL_INFO(dev)->gen >= 4) {
 		I915_WRITE(DSPSURF(plane),
-			   i915_gem_obj_ggtt_offset(obj) + intel_crtc->dspaddr_offset);
+			   i915_gem_object_ggtt_offset(obj, NULL) + intel_crtc->dspaddr_offset);
 		I915_WRITE(DSPTILEOFF(plane), (y << 16) | x);
 		I915_WRITE(DSPLINOFF(plane), linear_offset);
 	} else
-		I915_WRITE(DSPADDR(plane), i915_gem_obj_ggtt_offset(obj) + linear_offset);
+		I915_WRITE(DSPADDR(plane), i915_gem_object_ggtt_offset(obj, NULL) + linear_offset);
 	POSTING_READ(reg);
 }
 
@@ -2849,7 +2852,7 @@ static void ironlake_update_primary_plane(struct drm_plane *primary,
 
 	I915_WRITE(DSPSTRIDE(plane), fb->pitches[0]);
 	I915_WRITE(DSPSURF(plane),
-		   i915_gem_obj_ggtt_offset(obj) + intel_crtc->dspaddr_offset);
+		   i915_gem_object_ggtt_offset(obj, NULL) + intel_crtc->dspaddr_offset);
 	if (IS_HASWELL(dev) || IS_BROADWELL(dev)) {
 		I915_WRITE(DSPOFFSET(plane), (y << 16) | x);
 	} else {
@@ -2882,7 +2885,7 @@ u32 intel_plane_obj_offset(struct intel_plane *intel_plane,
 	intel_fill_fb_ggtt_view(&view, intel_plane->base.state->fb,
 				intel_plane->base.state->rotation);
 
-	vma = i915_gem_obj_to_ggtt_view(obj, &view);
+	vma = i915_gem_object_to_ggtt(obj, &view);
 	if (WARN(!vma, "ggtt vma for display object not found! (view=%u)\n",
 		view.type))
 		return -1;
@@ -11385,7 +11388,7 @@ static int intel_gen7_queue_flip(struct drm_device *dev,
 			intel_ring_emit(ring, MI_STORE_REGISTER_MEM |
 					      MI_SRM_LRM_GLOBAL_GTT);
 		intel_ring_emit_reg(ring, DERRMR);
-		intel_ring_emit(ring, req->engine->scratch.gtt_offset + 256);
+		intel_ring_emit(ring, req->engine->scratch->node.start + 256);
 		if (IS_GEN8(dev)) {
 			intel_ring_emit(ring, 0);
 			intel_ring_emit(ring, MI_NOOP);
@@ -11634,6 +11637,7 @@ static int intel_crtc_page_flip(struct drm_crtc *crtc,
 	struct intel_engine_cs *engine;
 	bool mmio_flip;
 	struct drm_i915_gem_request *request;
+	struct i915_vma *vma;
 	int ret;
 
 	/*
@@ -11739,9 +11743,11 @@ static int intel_crtc_page_flip(struct drm_crtc *crtc,
 
 	mmio_flip = use_mmio_flip(engine, obj);
 
-	ret = intel_pin_and_fence_fb_obj(fb, primary->state->rotation);
-	if (ret)
+	vma = intel_pin_and_fence_fb_obj(fb, primary->state->rotation);
+	if (IS_ERR(vma)) {
+		ret = PTR_ERR(vma);
 		goto cleanup_pending;
+	}
 
 	work->gtt_offset = intel_plane_obj_offset(to_intel_plane(primary),
 						  obj, 0);
@@ -13965,7 +13971,11 @@ intel_prepare_plane_fb(struct drm_plane *plane,
 		if (ret)
 			DRM_DEBUG_KMS("failed to attach phys object\n");
 	} else {
-		ret = intel_pin_and_fence_fb_obj(fb, new_state->rotation);
+		struct i915_vma *vma;
+
+		vma = intel_pin_and_fence_fb_obj(fb, new_state->rotation);
+		if (IS_ERR(vma))
+			ret = PTR_ERR(vma);
 	}
 
 	if (ret == 0) {
@@ -14334,7 +14344,7 @@ intel_update_cursor_plane(struct drm_plane *plane,
 	if (!obj)
 		addr = 0;
 	else if (!INTEL_INFO(dev)->cursor_needs_physical)
-		addr = i915_gem_obj_ggtt_offset(obj);
+		addr = i915_gem_object_ggtt_offset(obj, NULL);
 	else
 		addr = obj->phys_handle->busaddr;
 
@@ -16160,7 +16170,6 @@ void intel_modeset_gem_init(struct drm_device *dev)
 	struct drm_i915_private *dev_priv = to_i915(dev);
 	struct drm_crtc *c;
 	struct drm_i915_gem_object *obj;
-	int ret;
 
 	intel_init_gt_powersave(dev_priv);
 
@@ -16174,15 +16183,17 @@ void intel_modeset_gem_init(struct drm_device *dev)
 	 * for this.
 	 */
 	for_each_crtc(dev, c) {
+		struct i915_vma *vma;
+
 		obj = intel_fb_obj(c->primary->fb);
 		if (obj == NULL)
 			continue;
 
 		mutex_lock(&dev->struct_mutex);
-		ret = intel_pin_and_fence_fb_obj(c->primary->fb,
+		vma = intel_pin_and_fence_fb_obj(c->primary->fb,
 						 c->primary->state->rotation);
 		mutex_unlock(&dev->struct_mutex);
-		if (ret) {
+		if (IS_ERR(vma)) {
 			DRM_ERROR("failed to pin boot fb on pipe %d\n",
 				  to_intel_crtc(c)->pipe);
 			drm_framebuffer_unreference(c->primary->fb);
diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h
index 834646b4cc3f..30ef29873571 100644
--- a/drivers/gpu/drm/i915/intel_drv.h
+++ b/drivers/gpu/drm/i915/intel_drv.h
@@ -159,6 +159,7 @@ struct intel_framebuffer {
 struct intel_fbdev {
 	struct drm_fb_helper helper;
 	struct intel_framebuffer *fb;
+	struct i915_vma *vma;
 	async_cookie_t cookie;
 	int preferred_bpp;
 };
@@ -1207,8 +1208,8 @@ bool intel_get_load_detect_pipe(struct drm_connector *connector,
 void intel_release_load_detect_pipe(struct drm_connector *connector,
 				    struct intel_load_detect_pipe *old,
 				    struct drm_modeset_acquire_ctx *ctx);
-int intel_pin_and_fence_fb_obj(struct drm_framebuffer *fb,
-			       unsigned int rotation);
+struct i915_vma *
+intel_pin_and_fence_fb_obj(struct drm_framebuffer *fb, unsigned int rotation);
 void intel_unpin_fb_obj(struct drm_framebuffer *fb, unsigned int rotation);
 struct drm_framebuffer *
 __intel_framebuffer_create(struct drm_device *dev,
diff --git a/drivers/gpu/drm/i915/intel_fbc.c b/drivers/gpu/drm/i915/intel_fbc.c
index 45ee07b888a0..e1eb96b50ec1 100644
--- a/drivers/gpu/drm/i915/intel_fbc.c
+++ b/drivers/gpu/drm/i915/intel_fbc.c
@@ -742,7 +742,7 @@ static void intel_fbc_update_state_cache(struct intel_crtc *crtc)
 	/* FIXME: We lack the proper locking here, so only run this on the
 	 * platforms that need. */
 	if (IS_GEN(dev_priv, 5, 6))
-		cache->fb.ilk_ggtt_offset = i915_gem_obj_ggtt_offset(obj);
+		cache->fb.ilk_ggtt_offset = i915_gem_object_ggtt_offset(obj, NULL);
 	cache->fb.pixel_format = fb->pixel_format;
 	cache->fb.stride = fb->pitches[0];
 	cache->fb.fence_reg = obj->fence_reg;
diff --git a/drivers/gpu/drm/i915/intel_fbdev.c b/drivers/gpu/drm/i915/intel_fbdev.c
index 10600975fe8d..e76d18f7c733 100644
--- a/drivers/gpu/drm/i915/intel_fbdev.c
+++ b/drivers/gpu/drm/i915/intel_fbdev.c
@@ -187,7 +187,6 @@ static int intelfb_create(struct drm_fb_helper *helper,
 	struct fb_info *info;
 	struct drm_framebuffer *fb;
 	struct i915_vma *vma;
-	struct drm_i915_gem_object *obj;
 	bool prealloc = false;
 	void *vaddr;
 	int ret;
@@ -215,17 +214,17 @@ static int intelfb_create(struct drm_fb_helper *helper,
 		sizes->fb_height = intel_fb->base.height;
 	}
 
-	obj = intel_fb->obj;
-
 	mutex_lock(&dev->struct_mutex);
 
 	/* Pin the GGTT vma for our access via info->screen_base.
 	 * This also validates that any existing fb inherited from the
 	 * BIOS is suitable for own access.
 	 */
-	ret = intel_pin_and_fence_fb_obj(&ifbdev->fb->base, BIT(DRM_ROTATE_0));
-	if (ret)
+	vma = intel_pin_and_fence_fb_obj(&ifbdev->fb->base, BIT(DRM_ROTATE_0));
+	if (IS_ERR(vma)) {
+		ret = PTR_ERR(vma);
 		goto out_unlock;
+	}
 
 	info = drm_fb_helper_alloc_fbi(helper);
 	if (IS_ERR(info)) {
@@ -245,8 +244,6 @@ static int intelfb_create(struct drm_fb_helper *helper,
 	info->flags = FBINFO_DEFAULT | FBINFO_CAN_FORCE_OUTPUT;
 	info->fbops = &intelfb_ops;
 
-	vma = i915_gem_obj_to_ggtt(obj);
-
 	/* setup aperture base/size for vesafb takeover */
 	info->apertures->ranges[0].base = dev->mode_config.fb_base;
 	info->apertures->ranges[0].size = ggtt->mappable_end;
@@ -273,14 +270,14 @@ static int intelfb_create(struct drm_fb_helper *helper,
 	 * If the object is stolen however, it will be full of whatever
 	 * garbage was left in there.
 	 */
-	if (ifbdev->fb->obj->stolen && !prealloc)
+	if (intel_fb->obj->stolen && !prealloc)
 		memset_io(info->screen_base, 0, info->screen_size);
 
 	/* Use default scratch pixmap (info->pixmap.flags = FB_PIXMAP_SYSTEM) */
 
-	DRM_DEBUG_KMS("allocated %dx%d fb: 0x%08llx, bo %p\n",
-		      fb->width, fb->height,
-		      i915_gem_obj_ggtt_offset(obj), obj);
+	DRM_DEBUG_KMS("allocated %dx%d fb: 0x%08llx\n",
+		      fb->width, fb->height, vma->node.start);
+	ifbdev->vma = vma;
 
 	mutex_unlock(&dev->struct_mutex);
 	vga_switcheroo_client_fb_set(dev->pdev, info);
diff --git a/drivers/gpu/drm/i915/intel_guc_loader.c b/drivers/gpu/drm/i915/intel_guc_loader.c
index 1ecf88fd0b10..64f57c07afcc 100644
--- a/drivers/gpu/drm/i915/intel_guc_loader.c
+++ b/drivers/gpu/drm/i915/intel_guc_loader.c
@@ -235,12 +235,12 @@ static inline bool guc_ucode_response(struct drm_i915_private *dev_priv,
  * Note that GuC needs the CSS header plus uKernel code to be copied by the
  * DMA engine in one operation, whereas the RSA signature is loaded via MMIO.
  */
-static int guc_ucode_xfer_dma(struct drm_i915_private *dev_priv)
+static int guc_ucode_xfer_dma(struct drm_i915_private *dev_priv,
+			      struct i915_vma *vma)
 {
 	struct intel_guc_fw *guc_fw = &dev_priv->guc.guc_fw;
-	struct drm_i915_gem_object *fw_obj = guc_fw->guc_fw_obj;
 	unsigned long offset;
-	struct sg_table *sg = fw_obj->pages;
+	struct sg_table *sg = vma->obj->pages;
 	u32 status, rsa[UOS_RSA_SCRATCH_MAX_COUNT];
 	int i, ret = 0;
 
@@ -257,7 +257,7 @@ static int guc_ucode_xfer_dma(struct drm_i915_private *dev_priv)
 	I915_WRITE(DMA_COPY_SIZE, guc_fw->header_size + guc_fw->ucode_size);
 
 	/* Set the source address for the new blob */
-	offset = i915_gem_obj_ggtt_offset(fw_obj) + guc_fw->header_offset;
+	offset = vma->node.start + guc_fw->header_offset;
 	I915_WRITE(DMA_ADDR_0_LOW, lower_32_bits(offset));
 	I915_WRITE(DMA_ADDR_0_HIGH, upper_32_bits(offset) & 0xFFFF);
 
@@ -312,6 +312,7 @@ static int guc_ucode_xfer(struct drm_i915_private *dev_priv)
 {
 	struct intel_guc_fw *guc_fw = &dev_priv->guc.guc_fw;
 	struct drm_device *dev = dev_priv->dev;
+	struct i915_vma *vma;
 	int ret;
 
 	ret = i915_gem_object_set_to_gtt_domain(guc_fw->guc_fw_obj, false);
@@ -320,10 +321,10 @@ static int guc_ucode_xfer(struct drm_i915_private *dev_priv)
 		return ret;
 	}
 
-	ret = i915_gem_object_ggtt_pin(guc_fw->guc_fw_obj, NULL, 0, 0, 0);
-	if (ret) {
-		DRM_DEBUG_DRIVER("pin failed %d\n", ret);
-		return ret;
+	vma = i915_gem_object_ggtt_pin(guc_fw->guc_fw_obj, NULL, 0, 0, 0);
+	if (IS_ERR(vma)) {
+		DRM_DEBUG_DRIVER("pin failed %d\n", (int)PTR_ERR(vma));
+		return PTR_ERR(vma);
 	}
 
 	/* Invalidate GuC TLB to let GuC take the latest updates to GTT. */
@@ -364,7 +365,7 @@ static int guc_ucode_xfer(struct drm_i915_private *dev_priv)
 
 	set_guc_init_params(dev_priv);
 
-	ret = guc_ucode_xfer_dma(dev_priv);
+	ret = guc_ucode_xfer_dma(dev_priv, vma);
 
 	intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
 
@@ -372,7 +373,7 @@ static int guc_ucode_xfer(struct drm_i915_private *dev_priv)
 	 * We keep the object pages for reuse during resume. But we can unpin it
 	 * now that DMA has completed, so it doesn't continue to take up space.
 	 */
-	i915_gem_object_ggtt_unpin(guc_fw->guc_fw_obj);
+	i915_vma_unpin(vma);
 
 	return ret;
 }
@@ -653,12 +654,8 @@ fail:
 	DRM_ERROR("Failed to fetch GuC firmware from %s (error %d)\n",
 		  guc_fw->guc_fw_path, err);
 
-	mutex_lock(&dev->struct_mutex);
-	obj = guc_fw->guc_fw_obj;
-	if (obj)
-		i915_gem_object_put(obj);
+	i915_gem_object_put_unlocked(guc_fw->guc_fw_obj);
 	guc_fw->guc_fw_obj = NULL;
-	mutex_unlock(&dev->struct_mutex);
 
 	release_firmware(fw);		/* OK even if fw is NULL */
 	guc_fw->guc_fw_fetch_status = GUC_FIRMWARE_FAIL;
@@ -737,7 +734,7 @@ void intel_guc_fini(struct drm_device *dev)
 
 	i915_gem_object_put(guc_fw->guc_fw_obj);
 	guc_fw->guc_fw_obj = NULL;
-	mutex_unlock(&dev->struct_mutex);
 
 	guc_fw->guc_fw_fetch_status = GUC_FIRMWARE_NONE;
+	mutex_unlock(&dev->struct_mutex);
 }
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 4bf63af2a282..49e7bf170a04 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -326,7 +326,7 @@ intel_lr_context_descriptor_update(struct i915_gem_context *ctx,
 	BUILD_BUG_ON(MAX_CONTEXT_HW_ID > (1<<GEN8_CTX_ID_WIDTH));
 
 	desc = engine->ctx_desc_template;			/* bits  0-11 */
-	desc |= ce->lrc_vma->node.start + LRC_PPHWSP_PN * PAGE_SIZE;
+	desc |= ce->vma->node.start + LRC_PPHWSP_PN * PAGE_SIZE;
 								/* bits 12-31 */
 	desc |= (u64)ctx->hw_id << GEN8_CTX_ID_SHIFT;		/* bits 32-52 */
 
@@ -765,6 +765,7 @@ static int intel_lr_context_pin(struct i915_gem_context *ctx,
 {
 	struct drm_i915_private *dev_priv = ctx->i915;
 	struct intel_context *ce = &ctx->engine[engine->id];
+	struct i915_vma *vma;
 	void *vaddr;
 	u32 *lrc_reg_state;
 	int ret;
@@ -774,16 +775,18 @@ static int intel_lr_context_pin(struct i915_gem_context *ctx,
 	if (ce->pin_count++)
 		return 0;
 
-	ret = i915_gem_object_ggtt_pin(ce->state, NULL,
+	vma = i915_gem_object_ggtt_pin(ce->state, NULL,
 				       0, GEN8_LR_CONTEXT_ALIGN,
 				       PIN_OFFSET_BIAS | GUC_WOPCM_TOP);
-	if (ret)
+	if (IS_ERR(vma)) {
+		ret = PTR_ERR(vma);
 		goto err;
+	}
 
-	vaddr = i915_gem_object_pin_map(ce->state);
+	vaddr = i915_gem_object_pin_map(vma->obj);
 	if (IS_ERR(vaddr)) {
 		ret = PTR_ERR(vaddr);
-		goto unpin_ctx_obj;
+		goto unpin_vma;
 	}
 
 	lrc_reg_state = vaddr + LRC_STATE_PN * PAGE_SIZE;
@@ -792,12 +795,12 @@ static int intel_lr_context_pin(struct i915_gem_context *ctx,
 	if (ret)
 		goto unpin_map;
 
-	ce->lrc_vma = i915_gem_obj_to_ggtt(ce->state);
+	ce->vma = vma;
 	intel_lr_context_descriptor_update(ctx, engine);
 
 	lrc_reg_state[CTX_RING_BUFFER_START+1] = ce->ring->vma->node.start;
 	ce->lrc_reg_state = lrc_reg_state;
-	ce->state->dirty = true;
+	vma->obj->dirty = true;
 
 	/* Invalidate GuC TLB. */
 	if (i915.enable_guc_submission)
@@ -807,9 +810,9 @@ static int intel_lr_context_pin(struct i915_gem_context *ctx,
 	return 0;
 
 unpin_map:
-	i915_gem_object_unpin_map(ce->state);
-unpin_ctx_obj:
-	i915_gem_object_ggtt_unpin(ce->state);
+	i915_gem_object_unpin_map(vma->obj);
+unpin_vma:
+	__i915_vma_unpin(vma);
 err:
 	ce->pin_count = 0;
 	return ret;
@@ -829,9 +832,9 @@ void intel_lr_context_unpin(struct i915_gem_context *ctx,
 	intel_ring_unpin(ce->ring);
 
 	i915_gem_object_unpin_map(ce->state);
-	i915_gem_object_ggtt_unpin(ce->state);
+	i915_vma_unpin(ce->vma);
 
-	ce->lrc_vma = NULL;
+	ce->vma = NULL;
 	ce->lrc_desc = 0;
 	ce->lrc_reg_state = NULL;
 
@@ -921,7 +924,7 @@ static inline int gen8_emit_flush_coherentl3_wa(struct intel_engine_cs *engine,
 	wa_ctx_emit(batch, index, (MI_STORE_REGISTER_MEM_GEN8 |
 				   MI_SRM_LRM_GLOBAL_GTT));
 	wa_ctx_emit_reg(batch, index, GEN8_L3SQCREG4);
-	wa_ctx_emit(batch, index, engine->scratch.gtt_offset + 256);
+	wa_ctx_emit(batch, index, engine->scratch->node.start + 256);
 	wa_ctx_emit(batch, index, 0);
 
 	wa_ctx_emit(batch, index, MI_LOAD_REGISTER_IMM(1));
@@ -939,7 +942,7 @@ static inline int gen8_emit_flush_coherentl3_wa(struct intel_engine_cs *engine,
 	wa_ctx_emit(batch, index, (MI_LOAD_REGISTER_MEM_GEN8 |
 				   MI_SRM_LRM_GLOBAL_GTT));
 	wa_ctx_emit_reg(batch, index, GEN8_L3SQCREG4);
-	wa_ctx_emit(batch, index, engine->scratch.gtt_offset + 256);
+	wa_ctx_emit(batch, index, engine->scratch->node.start + 256);
 	wa_ctx_emit(batch, index, 0);
 
 	return index;
@@ -1013,7 +1016,7 @@ static int gen8_init_indirectctx_bb(struct intel_engine_cs *engine,
 
 	/* WaClearSlmSpaceAtContextSwitch:bdw,chv */
 	/* Actual scratch location is at 128 bytes offset */
-	scratch_addr = engine->scratch.gtt_offset + 2*CACHELINE_BYTES;
+	scratch_addr = engine->scratch->node.start + 2*CACHELINE_BYTES;
 
 	wa_ctx_emit(batch, index, GFX_OP_PIPE_CONTROL(6));
 	wa_ctx_emit(batch, index, (PIPE_CONTROL_FLUSH_L3 |
@@ -1142,47 +1145,41 @@ static int gen9_init_perctx_bb(struct intel_engine_cs *engine,
 	return wa_ctx_end(wa_ctx, *offset = index, 1);
 }
 
-static int lrc_setup_wa_ctx_obj(struct intel_engine_cs *engine, u32 size)
+static struct i915_vma *
+lrc_setup_wa_ctx_obj(struct intel_engine_cs *engine, u32 size)
 {
-	int ret;
+	struct drm_i915_gem_object *obj;
+	struct i915_vma *vma;
 
-	engine->wa_ctx.obj = i915_gem_object_create(engine->i915->dev,
-						   PAGE_ALIGN(size));
-	if (IS_ERR(engine->wa_ctx.obj)) {
-		DRM_DEBUG_DRIVER("alloc LRC WA ctx backing obj failed.\n");
-		ret = PTR_ERR(engine->wa_ctx.obj);
-		engine->wa_ctx.obj = NULL;
-		return ret;
-	}
+	obj = i915_gem_object_create(engine->i915->dev, PAGE_ALIGN(size));
+	if (IS_ERR(obj))
+		return ERR_CAST(obj);
 
-	ret = i915_gem_object_ggtt_pin(engine->wa_ctx.obj, NULL,
-				       0, PAGE_SIZE, 0);
-	if (ret) {
-		DRM_DEBUG_DRIVER("pin LRC WA ctx backing obj failed: %d\n",
-				 ret);
-		i915_gem_object_put(engine->wa_ctx.obj);
-		return ret;
+	vma = i915_gem_object_ggtt_pin(obj, NULL, 0, PAGE_SIZE, 0);
+	if (IS_ERR(vma)) {
+		i915_gem_object_put(obj);
+		return vma;
 	}
 
-	return 0;
+	return vma;
 }
 
 static void lrc_destroy_wa_ctx_obj(struct intel_engine_cs *engine)
 {
-	if (engine->wa_ctx.obj) {
-		i915_gem_object_ggtt_unpin(engine->wa_ctx.obj);
-		i915_gem_object_put(engine->wa_ctx.obj);
-		engine->wa_ctx.obj = NULL;
+	if (engine->wa_ctx.vma) {
+		i915_vma_unpin(engine->wa_ctx.vma);
+		i915_gem_object_put(engine->wa_ctx.vma->obj);
+		engine->wa_ctx.vma = NULL;
 	}
 }
 
 static int intel_init_workaround_bb(struct intel_engine_cs *engine)
 {
-	int ret;
+	struct i915_ctx_workarounds *wa_ctx = &engine->wa_ctx;
 	uint32_t *batch;
 	uint32_t offset;
 	struct page *page;
-	struct i915_ctx_workarounds *wa_ctx = &engine->wa_ctx;
+	int ret;
 
 	WARN_ON(engine->id != RCS);
 
@@ -1194,20 +1191,22 @@ static int intel_init_workaround_bb(struct intel_engine_cs *engine)
 	}
 
 	/* some WA perform writes to scratch page, ensure it is valid */
-	if (engine->scratch.obj == NULL) {
+	if (engine->scratch == NULL) {
 		DRM_ERROR("scratch page not allocated for %s\n", engine->name);
 		return -EINVAL;
 	}
 
-	ret = lrc_setup_wa_ctx_obj(engine, PAGE_SIZE);
-	if (ret) {
+	wa_ctx->vma = lrc_setup_wa_ctx_obj(engine, PAGE_SIZE);
+	if (IS_ERR(wa_ctx->vma)) {
+		ret = PTR_ERR(wa_ctx->vma);
 		DRM_DEBUG_DRIVER("Failed to setup context WA page: %d\n", ret);
 		return ret;
 	}
 
-	page = i915_gem_object_get_dirty_page(wa_ctx->obj, 0);
+	page = i915_gem_object_get_dirty_page(wa_ctx->vma->obj, 0);
 	batch = kmap_atomic(page);
 	offset = 0;
+	ret = 0;
 
 	if (IS_GEN8(engine->i915)) {
 		ret = gen8_init_indirectctx_bb(engine,
@@ -1464,7 +1463,7 @@ static int gen8_emit_flush_render(struct drm_i915_gem_request *request,
 {
 	struct intel_ring *ring = request->ring;
 	struct intel_engine_cs *engine = request->engine;
-	u32 scratch_addr = engine->scratch.gtt_offset + 2 * CACHELINE_BYTES;
+	u32 scratch_addr = engine->scratch->node.start + 2 * CACHELINE_BYTES;
 	bool vf_flush_wa = false;
 	u32 flags = 0;
 	int ret;
@@ -1650,9 +1649,9 @@ void intel_logical_ring_cleanup(struct intel_engine_cs *engine)
 
 	intel_engine_fini_breadcrumbs(engine);
 
-	if (engine->status_page.obj) {
-		i915_gem_object_unpin_map(engine->status_page.obj);
-		engine->status_page.obj = NULL;
+	if (engine->status_page.vma) {
+		i915_gem_object_unpin_map(engine->status_page.vma->obj);
+		engine->status_page.vma = NULL;
 	}
 	intel_lr_context_unpin(dev_priv->kernel_context, engine);
 
@@ -1692,19 +1691,19 @@ logical_ring_default_irqs(struct intel_engine_cs *engine, unsigned shift)
 }
 
 static int
-lrc_setup_hws(struct intel_engine_cs *engine,
-	      struct drm_i915_gem_object *dctx_obj)
+lrc_setup_hws(struct intel_engine_cs *engine, struct i915_vma *vma)
 {
+#define HWS_OFFSET (LRC_PPHWSP_PN * PAGE_SIZE)
 	void *hws;
 
 	/* The HWSP is part of the default context object in LRC mode. */
-	engine->status_page.gfx_addr = i915_gem_obj_ggtt_offset(dctx_obj) +
-				       LRC_PPHWSP_PN * PAGE_SIZE;
-	hws = i915_gem_object_pin_map(dctx_obj);
+	hws = i915_gem_object_pin_map(vma->obj);
 	if (IS_ERR(hws))
 		return PTR_ERR(hws);
-	engine->status_page.page_addr = hws + LRC_PPHWSP_PN * PAGE_SIZE;
-	engine->status_page.obj = dctx_obj;
+
+	engine->status_page.page_addr = hws + HWS_OFFSET;
+	engine->status_page.gfx_addr = vma->node.start + HWS_OFFSET;
+	engine->status_page.vma = vma;
 
 	return 0;
 }
@@ -1828,7 +1827,7 @@ logical_ring_init(struct intel_engine_cs *engine)
 	}
 
 	/* And setup the hardware status page. */
-	ret = lrc_setup_hws(engine, dctx->engine[engine->id].state);
+	ret = lrc_setup_hws(engine, dctx->engine[engine->id].vma);
 	if (ret) {
 		DRM_ERROR("Failed to set up hws %s: %d\n", engine->name, ret);
 		goto error;
@@ -2109,9 +2108,9 @@ populate_lr_context(struct i915_gem_context *ctx,
 			       RING_INDIRECT_CTX(engine->mmio_base), 0);
 		ASSIGN_CTX_REG(reg_state, CTX_RCS_INDIRECT_CTX_OFFSET,
 			       RING_INDIRECT_CTX_OFFSET(engine->mmio_base), 0);
-		if (engine->wa_ctx.obj) {
+		if (engine->wa_ctx.vma) {
 			struct i915_ctx_workarounds *wa_ctx = &engine->wa_ctx;
-			uint32_t ggtt_offset = i915_gem_obj_ggtt_offset(wa_ctx->obj);
+			uint32_t ggtt_offset = wa_ctx->vma->node.start;
 
 			reg_state[CTX_RCS_INDIRECT_CTX+1] =
 				(ggtt_offset + wa_ctx->indirect_ctx.offset * sizeof(uint32_t)) |
diff --git a/drivers/gpu/drm/i915/intel_overlay.c b/drivers/gpu/drm/i915/intel_overlay.c
index 9b0fb7e23cbb..75bdd335d565 100644
--- a/drivers/gpu/drm/i915/intel_overlay.c
+++ b/drivers/gpu/drm/i915/intel_overlay.c
@@ -170,8 +170,8 @@ struct overlay_registers {
 struct intel_overlay {
 	struct drm_i915_private *i915;
 	struct intel_crtc *crtc;
-	struct drm_i915_gem_object *vid_bo;
-	struct drm_i915_gem_object *old_vid_bo;
+	struct drm_i915_gem_object *vid_bo, *old_vid_bo;
+	struct i915_vma *vid_vma, *old_vid_vma;
 	bool active;
 	bool pfit_active;
 	u32 pfit_vscale_ratio; /* shifted-point number, (1<<12) == 1.0 */
@@ -316,7 +316,7 @@ static void intel_overlay_release_old_vid_tail(struct intel_overlay *overlay)
 {
 	struct drm_i915_gem_object *obj = overlay->old_vid_bo;
 
-	i915_gem_object_ggtt_unpin(obj);
+	i915_gem_object_unpin_from_display_plane(overlay->old_vid_vma);
 	i915_gem_object_put(obj);
 
 	overlay->old_vid_bo = NULL;
@@ -324,14 +324,13 @@ static void intel_overlay_release_old_vid_tail(struct intel_overlay *overlay)
 
 static void intel_overlay_off_tail(struct intel_overlay *overlay)
 {
-	struct drm_i915_gem_object *obj = overlay->vid_bo;
-
 	/* never have the overlay hw on without showing a frame */
-	if (WARN_ON(!obj))
+	if (WARN_ON(overlay->vid_vma))
 		return;
 
-	i915_gem_object_ggtt_unpin(obj);
-	i915_gem_object_put(obj);
+	i915_gem_object_unpin_from_display_plane(overlay->vid_vma);
+	i915_gem_object_put(overlay->vid_bo);
+	overlay->vid_vma = NULL;
 	overlay->vid_bo = NULL;
 
 	overlay->crtc->overlay = NULL;
@@ -751,6 +750,7 @@ static int intel_overlay_do_put_image(struct intel_overlay *overlay,
 	struct drm_i915_private *dev_priv = overlay->i915;
 	u32 swidth, swidthsw, sheight, ostride;
 	enum pipe pipe = overlay->crtc->pipe;
+	struct i915_vma *vma;
 
 	lockdep_assert_held(&dev_priv->dev->struct_mutex);
 	WARN_ON(!drm_modeset_is_locked(&dev_priv->dev->mode_config.connection_mutex));
@@ -759,10 +759,10 @@ static int intel_overlay_do_put_image(struct intel_overlay *overlay,
 	if (ret != 0)
 		return ret;
 
-	ret = i915_gem_object_pin_to_display_plane(new_bo, 0,
+	vma = i915_gem_object_pin_to_display_plane(new_bo, 0,
 						   &i915_ggtt_view_normal);
-	if (ret != 0)
-		return ret;
+	if (IS_ERR(vma))
+		return PTR_ERR(vma);
 
 	ret = i915_gem_object_put_fence(new_bo);
 	if (ret)
@@ -805,7 +805,7 @@ static int intel_overlay_do_put_image(struct intel_overlay *overlay,
 	swidth = params->src_w;
 	swidthsw = calc_swidthsw(dev_priv, params->offset_Y, tmp_width);
 	sheight = params->src_h;
-	iowrite32(i915_gem_obj_ggtt_offset(new_bo) + params->offset_Y, &regs->OBUF_0Y);
+	iowrite32(vma->node.start + params->offset_Y, &regs->OBUF_0Y);
 	ostride = params->stride_Y;
 
 	if (params->format & I915_OVERLAY_YUV_PLANAR) {
@@ -819,8 +819,8 @@ static int intel_overlay_do_put_image(struct intel_overlay *overlay,
 				      params->src_w/uv_hscale);
 		swidthsw |= max_t(u32, tmp_U, tmp_V) << 16;
 		sheight |= (params->src_h/uv_vscale) << 16;
-		iowrite32(i915_gem_obj_ggtt_offset(new_bo) + params->offset_U, &regs->OBUF_0U);
-		iowrite32(i915_gem_obj_ggtt_offset(new_bo) + params->offset_V, &regs->OBUF_0V);
+		iowrite32(vma->node.start + params->offset_U, &regs->OBUF_0U);
+		iowrite32(vma->node.start + params->offset_V, &regs->OBUF_0V);
 		ostride |= params->stride_UV << 16;
 	}
 
@@ -845,14 +845,16 @@ static int intel_overlay_do_put_image(struct intel_overlay *overlay,
 			  INTEL_FRONTBUFFER_OVERLAY(pipe));
 
 	overlay->old_vid_bo = overlay->vid_bo;
+	overlay->old_vid_vma = overlay->vid_vma;
 	overlay->vid_bo = new_bo;
+	overlay->vid_vma = vma;
 
 	intel_frontbuffer_flip(dev_priv->dev, INTEL_FRONTBUFFER_OVERLAY(pipe));
 
 	return 0;
 
 out_unpin:
-	i915_gem_object_ggtt_unpin(new_bo);
+	i915_gem_object_unpin_from_display_plane(vma);
 	return ret;
 }
 
@@ -1380,6 +1382,7 @@ void intel_setup_overlay(struct drm_i915_private *dev_priv)
 	struct intel_overlay *overlay;
 	struct drm_i915_gem_object *reg_bo;
 	struct overlay_registers __iomem *regs;
+	struct i915_vma *vma = NULL;
 	int ret;
 
 	if (!HAS_OVERLAY(dev_priv))
@@ -1412,13 +1415,14 @@ void intel_setup_overlay(struct drm_i915_private *dev_priv)
 		}
 		overlay->flip_addr = reg_bo->phys_handle->busaddr;
 	} else {
-		ret = i915_gem_object_ggtt_pin(reg_bo, NULL,
+		vma = i915_gem_object_ggtt_pin(reg_bo, NULL,
 					       0, PAGE_SIZE, PIN_MAPPABLE);
-		if (ret) {
+		if (IS_ERR(vma)) {
 			DRM_ERROR("failed to pin overlay register bo\n");
+			ret = PTR_ERR(vma);
 			goto out_free_bo;
 		}
-		overlay->flip_addr = i915_gem_obj_ggtt_offset(reg_bo);
+		overlay->flip_addr = vma->node.start;
 
 		ret = i915_gem_object_set_to_gtt_domain(reg_bo, true);
 		if (ret) {
@@ -1450,8 +1454,8 @@ void intel_setup_overlay(struct drm_i915_private *dev_priv)
 	return;
 
 out_unpin_bo:
-	if (!OVERLAY_NEEDS_PHYSICAL(dev_priv))
-		i915_gem_object_ggtt_unpin(reg_bo);
+	if (vma)
+		i915_vma_unpin(vma);
 out_free_bo:
 	i915_gem_object_put(reg_bo);
 out_free:
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index c8211913f2d6..32add39ee9dd 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -182,7 +182,7 @@ intel_emit_post_sync_nonzero_flush(struct drm_i915_gem_request *req)
 {
 	struct intel_ring *ring = req->ring;
 	u32 scratch_addr =
-	       	req->engine->scratch.gtt_offset + 2 * CACHELINE_BYTES;
+	       	req->engine->scratch->node.start + 2 * CACHELINE_BYTES;
 	int ret;
 
 	ret = intel_ring_begin(req, 6);
@@ -219,7 +219,7 @@ gen6_render_ring_flush(struct drm_i915_gem_request *req,
 {
 	struct intel_ring *ring = req->ring;
 	u32 scratch_addr =
-	       	req->engine->scratch.gtt_offset + 2 * CACHELINE_BYTES;
+	       	req->engine->scratch->node.start + 2 * CACHELINE_BYTES;
 	u32 flags = 0;
 	int ret;
 
@@ -294,7 +294,7 @@ gen7_render_ring_flush(struct drm_i915_gem_request *req,
 {
 	struct intel_ring *ring = req->ring;
 	u32 scratch_addr =
-	       	req->engine->scratch.gtt_offset + 2 * CACHELINE_BYTES;
+	       	req->engine->scratch->node.start + 2 * CACHELINE_BYTES;
 	u32 flags = 0;
 	int ret;
 
@@ -379,7 +379,8 @@ static int
 gen8_render_ring_flush(struct drm_i915_gem_request *req,
 		       u32 invalidate_domains, u32 flush_domains)
 {
-	u32 scratch_addr = req->engine->scratch.gtt_offset + 2 * CACHELINE_BYTES;
+	u32 scratch_addr =
+	       	req->engine->scratch->node.start + 2 * CACHELINE_BYTES;
 	u32 flags = 0;
 	int ret;
 
@@ -540,7 +541,7 @@ static int init_ring_common(struct intel_engine_cs *engine)
 {
 	struct drm_i915_private *dev_priv = engine->i915;
 	struct intel_ring *ring = engine->buffer;
-	struct drm_i915_gem_object *obj = ring->obj;
+	struct i915_vma *vma = ring->vma;
 	int ret = 0;
 
 	intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
@@ -580,7 +581,7 @@ static int init_ring_common(struct intel_engine_cs *engine)
 	 * registers with the above sequence (the readback of the HEAD registers
 	 * also enforces ordering), otherwise the hw might lose the new ring
 	 * register values. */
-	I915_WRITE_START(engine, i915_gem_obj_ggtt_offset(obj));
+	I915_WRITE_START(engine, vma->node.start);
 
 	/* WaClearRingBufHeadRegAtInit:ctg,elk */
 	if (I915_READ_HEAD(engine))
@@ -595,16 +596,15 @@ static int init_ring_common(struct intel_engine_cs *engine)
 
 	/* If the head is still not zero, the ring is dead */
 	if (wait_for((I915_READ_CTL(engine) & RING_VALID) != 0 &&
-		     I915_READ_START(engine) == i915_gem_obj_ggtt_offset(obj) &&
+		     I915_READ_START(engine) == vma->node.start &&
 		     (I915_READ_HEAD(engine) & HEAD_ADDR) == 0, 50)) {
 		DRM_ERROR("%s initialization failed "
-			  "ctl %08x (valid? %d) head %08x tail %08x start %08x [expected %08lx]\n",
+			  "ctl %08x (valid? %d) head %08x tail %08x start %08x [expected %08x]\n",
 			  engine->name,
 			  I915_READ_CTL(engine),
 			  I915_READ_CTL(engine) & RING_VALID,
 			  I915_READ_HEAD(engine), I915_READ_TAIL(engine),
-			  I915_READ_START(engine),
-			  (unsigned long)i915_gem_obj_ggtt_offset(obj));
+			  I915_READ_START(engine), (u32)vma->node.start);
 		ret = -EIO;
 		goto out;
 	}
@@ -624,20 +624,21 @@ out:
 
 void intel_fini_pipe_control(struct intel_engine_cs *engine)
 {
-	if (engine->scratch.obj == NULL)
+	if (!engine->scratch)
 		return;
 
-	i915_gem_object_ggtt_unpin(engine->scratch.obj);
-	i915_gem_object_put(engine->scratch.obj);
-	engine->scratch.obj = NULL;
+	i915_vma_unpin(engine->scratch);
+	i915_gem_object_put(engine->scratch->obj);
+	engine->scratch = NULL;
 }
 
 int intel_init_pipe_control(struct intel_engine_cs *engine, int size)
 {
 	struct drm_i915_gem_object *obj;
+	struct i915_vma *vma;
 	int ret;
 
-	WARN_ON(engine->scratch.obj);
+	WARN_ON(engine->scratch);
 
 	obj = i915_gem_object_create_stolen(engine->i915->dev, size);
 	if (obj == NULL)
@@ -648,18 +649,19 @@ int intel_init_pipe_control(struct intel_engine_cs *engine, int size)
 		goto err;
 	}
 
-	ret = i915_gem_object_ggtt_pin(obj, NULL, 0, 4096, PIN_HIGH);
-	if (ret)
+	vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 4096, PIN_HIGH);
+	if (IS_ERR(vma)) {
+		ret = PTR_ERR(vma);
 		goto err_unref;
+	}
 
-	engine->scratch.obj = obj;
-	engine->scratch.gtt_offset = i915_gem_obj_ggtt_offset(obj);
-	DRM_DEBUG_DRIVER("%s pipe control offset: 0x%08x\n",
-			 engine->name, engine->scratch.gtt_offset);
+	engine->scratch = vma;
+	DRM_DEBUG_DRIVER("%s pipe control offset: 0x%08llx\n",
+			 engine->name, (long long)vma->node.start);
 	return 0;
 
 err_unref:
-	i915_gem_object_put(engine->scratch.obj);
+	i915_gem_object_put(obj);
 err:
 	return ret;
 }
@@ -1217,10 +1219,13 @@ static void render_ring_cleanup(struct intel_engine_cs *engine)
 {
 	struct drm_i915_private *dev_priv = engine->i915;
 
-	if (dev_priv->semaphore_obj) {
-		i915_gem_object_ggtt_unpin(dev_priv->semaphore_obj);
-		i915_gem_object_put(dev_priv->semaphore_obj);
-		dev_priv->semaphore_obj = NULL;
+	if (dev_priv->semaphore_vma) {
+		struct drm_i915_gem_object *obj = dev_priv->semaphore_vma->obj;
+
+		i915_vma_unpin(dev_priv->semaphore_vma);
+		dev_priv->semaphore_vma = NULL;
+
+		i915_gem_object_put(obj);
 	}
 
 	intel_fini_pipe_control(engine);
@@ -1684,7 +1689,7 @@ i830_emit_bb_start(struct drm_i915_gem_request *req,
 		   unsigned dispatch_flags)
 {
 	struct intel_ring *ring = req->ring;
-	u32 cs_offset = req->engine->scratch.gtt_offset;
+	u32 cs_offset = req->engine->scratch->node.start;
 	int ret;
 
 	ret = intel_ring_begin(req, 6);
@@ -1773,67 +1778,68 @@ static void cleanup_phys_status_page(struct intel_engine_cs *engine)
 
 static void cleanup_status_page(struct intel_engine_cs *engine)
 {
-	struct drm_i915_gem_object *obj;
+	struct i915_vma *vma;
 
-	obj = engine->status_page.obj;
-	if (obj == NULL)
+	vma = engine->status_page.vma;
+	if (vma == NULL)
 		return;
+	engine->status_page.vma = NULL;
 
-	kunmap(sg_page(obj->pages->sgl));
-	i915_gem_object_ggtt_unpin(obj);
-	i915_gem_object_put(obj);
-	engine->status_page.obj = NULL;
+	kunmap(sg_page(vma->obj->pages->sgl));
+	i915_vma_unpin(vma);
 }
 
 static int init_status_page(struct intel_engine_cs *engine)
 {
-	struct drm_i915_gem_object *obj = engine->status_page.obj;
+	struct drm_i915_gem_object *obj;
+	struct i915_vma *vma;
+	unsigned flags;
+	int ret;
 
-	if (obj == NULL) {
-		unsigned flags;
-		int ret;
+	if (engine->status_page.vma)
+		return 0;
 
-		obj = i915_gem_object_create(engine->i915->dev, 4096);
-		if (IS_ERR(obj)) {
-			DRM_ERROR("Failed to allocate status page\n");
-			return PTR_ERR(obj);
-		}
+	obj = i915_gem_object_create(engine->i915->dev, 4096);
+	if (IS_ERR(obj)) {
+		DRM_ERROR("Failed to allocate status page\n");
+		return PTR_ERR(obj);
+	}
 
-		ret = i915_gem_object_set_cache_level(obj, I915_CACHE_LLC);
-		if (ret)
-			goto err_unref;
-
-		flags = 0;
-		if (!HAS_LLC(engine->i915))
-			/* On g33, we cannot place HWS above 256MiB, so
-			 * restrict its pinning to the low mappable arena.
-			 * Though this restriction is not documented for
-			 * gen4, gen5, or byt, they also behave similarly
-			 * and hang if the HWS is placed at the top of the
-			 * GTT. To generalise, it appears that all !llc
-			 * platforms have issues with us placing the HWS
-			 * above the mappable region (even though we never
-			 * actualy map it).
-			 */
-			flags |= PIN_MAPPABLE;
-		ret = i915_gem_object_ggtt_pin(obj, NULL, 0, 4096, flags);
-		if (ret) {
-err_unref:
-			i915_gem_object_put(obj);
-			return ret;
-		}
+	ret = i915_gem_object_set_cache_level(obj, I915_CACHE_LLC);
+	if (ret)
+		goto err_unref;
 
-		engine->status_page.obj = obj;
+	flags = 0;
+	if (!HAS_LLC(engine->i915))
+		/* On g33, we cannot place HWS above 256MiB, so
+		 * restrict its pinning to the low mappable arena.
+		 * Though this restriction is not documented for
+		 * gen4, gen5, or byt, they also behave similarly
+		 * and hang if the HWS is placed at the top of the
+		 * GTT. To generalise, it appears that all !llc
+		 * platforms have issues with us placing the HWS
+		 * above the mappable region (even though we never
+		 * actualy map it).
+		 */
+		flags |= PIN_MAPPABLE;
+	vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 4096, flags);
+	if (IS_ERR(vma)) {
+		ret = PTR_ERR(vma);
+		goto err_unref;
 	}
 
-	engine->status_page.gfx_addr = i915_gem_obj_ggtt_offset(obj);
+	engine->status_page.vma = vma;
+	engine->status_page.gfx_addr = vma->node.start;
 	engine->status_page.page_addr = kmap(sg_page(obj->pages->sgl));
-	memset(engine->status_page.page_addr, 0, PAGE_SIZE);
 
 	DRM_DEBUG_DRIVER("%s hws offset: 0x%08x\n",
 			engine->name, engine->status_page.gfx_addr);
 
 	return 0;
+
+err_unref:
+	i915_gem_object_put(obj);
+	return ret;
 }
 
 static int init_phys_status_page(struct intel_engine_cs *engine)
@@ -1857,15 +1863,16 @@ int intel_ring_pin(struct intel_ring *ring)
 {
 	struct drm_i915_private *dev_priv = ring->engine->i915;
 	struct drm_i915_gem_object *obj = ring->obj;
+	struct i915_vma *vma;
 	/* Ring wraparound at offset 0 sometimes hangs. No idea why. */
 	unsigned flags = PIN_OFFSET_BIAS | 4096;
 	void *addr;
 	int ret;
 
 	if (HAS_LLC(dev_priv) && !obj->stolen) {
-		ret = i915_gem_object_ggtt_pin(obj, NULL, 0, PAGE_SIZE, flags);
-		if (ret)
-			return ret;
+		vma = i915_gem_object_ggtt_pin(obj, NULL, 0, PAGE_SIZE, flags);
+		if (IS_ERR(vma))
+			return PTR_ERR(vma);
 
 		ret = i915_gem_object_set_to_cpu_domain(obj, true);
 		if (ret)
@@ -1877,10 +1884,10 @@ int intel_ring_pin(struct intel_ring *ring)
 			goto err_unpin;
 		}
 	} else {
-		ret = i915_gem_object_ggtt_pin(obj, NULL, 0, PAGE_SIZE,
+		vma = i915_gem_object_ggtt_pin(obj, NULL, 0, PAGE_SIZE,
 					       flags | PIN_MAPPABLE);
-		if (ret)
-			return ret;
+		if (IS_ERR(vma))
+			return PTR_ERR(vma);
 
 		ret = i915_gem_object_set_to_gtt_domain(obj, true);
 		if (ret)
@@ -1889,7 +1896,7 @@ int intel_ring_pin(struct intel_ring *ring)
 		/* Access through the GTT requires the device to be awake. */
 		assert_rpm_wakelock_held(dev_priv);
 
-		addr = i915_vma_pin_iomap(i915_gem_obj_to_ggtt(obj));
+		addr = i915_vma_pin_iomap(vma);
 		if (IS_ERR(addr)) {
 			ret = PTR_ERR(addr);
 			goto err_unpin;
@@ -1897,11 +1904,11 @@ int intel_ring_pin(struct intel_ring *ring)
 	}
 
 	ring->vaddr = addr;
-	ring->vma = i915_gem_obj_to_ggtt(obj);
+	ring->vma = vma;
 	return 0;
 
 err_unpin:
-	i915_gem_object_ggtt_unpin(obj);
+	i915_vma_unpin(vma);
 	return ret;
 }
 
@@ -1916,7 +1923,7 @@ void intel_ring_unpin(struct intel_ring *ring)
 		i915_vma_unpin_iomap(ring->vma);
 	ring->vaddr = NULL;
 
-	i915_gem_object_ggtt_unpin(ring->obj);
+	i915_vma_unpin(ring->vma);
 	ring->vma = NULL;
 }
 
@@ -2007,10 +2014,14 @@ static int intel_ring_context_pin(struct i915_gem_context *ctx,
 		return 0;
 
 	if (ce->state) {
-		ret = i915_gem_object_ggtt_pin(ce->state, NULL, 0,
+		struct i915_vma *vma;
+
+		vma = i915_gem_object_ggtt_pin(ce->state, NULL, 0,
 					       ctx->ggtt_alignment, PIN_HIGH);
-		if (ret)
+		if (vma)
 			goto error;
+
+		ce->vma = vma;
 	}
 
 	/* The kernel context is only used as a placeholder for flushing the
@@ -2041,8 +2052,8 @@ static void intel_ring_context_unpin(struct i915_gem_context *ctx,
 	if (--ce->pin_count)
 		return;
 
-	if (ce->state)
-		i915_gem_object_ggtt_unpin(ce->state);
+	if (ce->vma)
+		i915_vma_unpin(ce->vma);
 
 	i915_gem_context_put(ctx);
 }
@@ -2335,8 +2346,8 @@ void intel_engine_init_seqno(struct intel_engine_cs *engine, u32 seqno)
 		if (HAS_VEBOX(dev_priv))
 			I915_WRITE(RING_SYNC_2(engine->mmio_base), 0);
 	}
-	if (dev_priv->semaphore_obj) {
-		struct drm_i915_gem_object *obj = dev_priv->semaphore_obj;
+	if (dev_priv->semaphore_vma) {
+		struct drm_i915_gem_object *obj = dev_priv->semaphore_vma->obj;
 		struct page *page = i915_gem_object_get_dirty_page(obj, 0);
 		void *semaphores = kmap(page);
 		memset(semaphores + GEN8_SEMAPHORE_OFFSET(engine->id, 0),
@@ -2576,16 +2587,20 @@ int intel_init_render_ring_buffer(struct drm_device *dev)
 				DRM_ERROR("Failed to allocate semaphore bo. Disabling semaphores\n");
 				i915.semaphores = 0;
 			} else {
+				struct i915_vma *vma;
+
 				i915_gem_object_set_cache_level(obj, I915_CACHE_LLC);
-				ret = i915_gem_object_ggtt_pin(obj, NULL,
+				vma = i915_gem_object_ggtt_pin(obj, NULL,
 							       0, 0,
 							       PIN_HIGH);
-				if (ret != 0) {
+				if (IS_ERR(vma)) {
 					i915_gem_object_put(obj);
 					DRM_ERROR("Failed to pin semaphore bo. Disabling semaphores\n");
 					i915.semaphores = 0;
-				} else
-					dev_priv->semaphore_obj = obj;
+					vma = NULL;
+				}
+
+				dev_priv->semaphore_vma = vma;
 			}
 		}
 
@@ -2596,7 +2611,6 @@ int intel_init_render_ring_buffer(struct drm_device *dev)
 		engine->irq_disable = gen8_ring_disable_irq;
 		engine->irq_enable_mask = GT_RENDER_USER_INTERRUPT;
 		if (i915.semaphores) {
-			WARN_ON(!dev_priv->semaphore_obj);
 			engine->semaphore.sync_to = gen8_ring_sync;
 			engine->semaphore.signal = gen8_rcs_signal;
 			GEN8_RING_SEMAPHORE_INIT(engine);
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index d19fb8c24919..934d5722dc27 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -26,10 +26,10 @@
  */
 #define I915_RING_FREE_SPACE 64
 
-struct  intel_hw_status_page {
+struct intel_hw_status_page {
 	u32		*page_addr;
 	unsigned int	gfx_addr;
-	struct		drm_i915_gem_object *obj;
+	struct		i915_vma *vma;
 };
 
 #define I915_READ_TAIL(ring) I915_READ(RING_TAIL((ring)->mmio_base))
@@ -57,16 +57,13 @@ struct  intel_hw_status_page {
 #define GEN8_SEMAPHORE_OFFSET(__from, __to)			     \
 	(((__from) * I915_NUM_ENGINES  + (__to)) * gen8_semaphore_seqno_size)
 #define GEN8_SIGNAL_OFFSET(__ring, to)			     \
-	(i915_gem_obj_ggtt_offset(dev_priv->semaphore_obj) + \
+	(dev_priv->semaphore_vma->node.start + \
 	 GEN8_SEMAPHORE_OFFSET((__ring)->id, (to)))
 #define GEN8_WAIT_OFFSET(__ring, from)			     \
-	(i915_gem_obj_ggtt_offset(dev_priv->semaphore_obj) + \
+	(dev_priv->semaphore_vma->node.start + \
 	 GEN8_SEMAPHORE_OFFSET(from, (__ring)->id))
 
 #define GEN8_RING_SEMAPHORE_INIT(e) do { \
-	if (!dev_priv->semaphore_obj) { \
-		break; \
-	} \
 	(e)->semaphore.signal_ggtt[RCS] = GEN8_SIGNAL_OFFSET((e), RCS); \
 	(e)->semaphore.signal_ggtt[VCS] = GEN8_SIGNAL_OFFSET((e), VCS); \
 	(e)->semaphore.signal_ggtt[BCS] = GEN8_SIGNAL_OFFSET((e), BCS); \
@@ -97,8 +94,8 @@ struct intel_engine_hangcheck {
 
 struct intel_ring {
 	struct drm_i915_gem_object *obj;
-	void *vaddr;
 	struct i915_vma *vma;
+	void *vaddr;
 
 	struct intel_engine_cs *engine;
 	struct list_head link;
@@ -139,7 +136,7 @@ struct  i915_ctx_workarounds {
 		u32 offset;
 		u32 size;
 	} indirect_ctx, per_ctx;
-	struct drm_i915_gem_object *obj;
+	struct i915_vma *vma;
 };
 
 struct drm_i915_gem_request;
@@ -325,10 +322,7 @@ struct intel_engine_cs {
 
 	struct intel_engine_hangcheck hangcheck;
 
-	struct {
-		struct drm_i915_gem_object *obj;
-		u32 gtt_offset;
-	} scratch;
+	struct i915_vma *scratch;
 
 	bool needs_cmd_parser;
 
diff --git a/drivers/gpu/drm/i915/intel_sprite.c b/drivers/gpu/drm/i915/intel_sprite.c
index 324ccb06397d..99bdbb9e4037 100644
--- a/drivers/gpu/drm/i915/intel_sprite.c
+++ b/drivers/gpu/drm/i915/intel_sprite.c
@@ -462,8 +462,8 @@ vlv_update_plane(struct drm_plane *dplane,
 
 	I915_WRITE(SPSIZE(pipe, plane), (crtc_h << 16) | crtc_w);
 	I915_WRITE(SPCNTR(pipe, plane), sprctl);
-	I915_WRITE(SPSURF(pipe, plane), i915_gem_obj_ggtt_offset(obj) +
-		   sprsurf_offset);
+	I915_WRITE(SPSURF(pipe, plane),
+		   i915_gem_object_ggtt_offset(obj, NULL) + sprsurf_offset);
 	POSTING_READ(SPSURF(pipe, plane));
 }
 
@@ -602,7 +602,7 @@ ivb_update_plane(struct drm_plane *plane,
 		I915_WRITE(SPRSCALE(pipe), sprscale);
 	I915_WRITE(SPRCTL(pipe), sprctl);
 	I915_WRITE(SPRSURF(pipe),
-		   i915_gem_obj_ggtt_offset(obj) + sprsurf_offset);
+		   i915_gem_object_ggtt_offset(obj, NULL) + sprsurf_offset);
 	POSTING_READ(SPRSURF(pipe));
 }
 
@@ -731,7 +731,7 @@ ilk_update_plane(struct drm_plane *plane,
 	I915_WRITE(DVSSCALE(pipe), dvsscale);
 	I915_WRITE(DVSCNTR(pipe), dvscntr);
 	I915_WRITE(DVSSURF(pipe),
-		   i915_gem_obj_ggtt_offset(obj) + dvssurf_offset);
+		   i915_gem_object_ggtt_offset(obj, NULL) + dvssurf_offset);
 	POSTING_READ(DVSSURF(pipe));
 }
 
-- 
2.8.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 58+ messages in thread

* [PATCH 38/38] drm/i915/overlay: Use VMA as the primary tracker for images
  2016-06-03 16:55 Tracking VMA Chris Wilson
                   ` (36 preceding siblings ...)
  2016-06-03 16:55 ` [PATCH 37/38] drm/i915: Track pinned VMA Chris Wilson
@ 2016-06-03 16:55 ` Chris Wilson
  2016-06-06 10:42 ` ✗ Ro.CI.BAT: failure for series starting with [01/38] drm/i915: Combine loops within i915_gem_evict_something Patchwork
  38 siblings, 0 replies; 58+ messages in thread
From: Chris Wilson @ 2016-06-03 16:55 UTC (permalink / raw)
  To: intel-gfx

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/intel_overlay.c | 32 +++++++++++++-------------------
 1 file changed, 13 insertions(+), 19 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_overlay.c b/drivers/gpu/drm/i915/intel_overlay.c
index 75bdd335d565..ad57149f4809 100644
--- a/drivers/gpu/drm/i915/intel_overlay.c
+++ b/drivers/gpu/drm/i915/intel_overlay.c
@@ -170,8 +170,7 @@ struct overlay_registers {
 struct intel_overlay {
 	struct drm_i915_private *i915;
 	struct intel_crtc *crtc;
-	struct drm_i915_gem_object *vid_bo, *old_vid_bo;
-	struct i915_vma *vid_vma, *old_vid_vma;
+	struct i915_vma *vma, *old_vma;
 	bool active;
 	bool pfit_active;
 	u32 pfit_vscale_ratio; /* shifted-point number, (1<<12) == 1.0 */
@@ -314,24 +313,21 @@ static int intel_overlay_continue(struct intel_overlay *overlay,
 
 static void intel_overlay_release_old_vid_tail(struct intel_overlay *overlay)
 {
-	struct drm_i915_gem_object *obj = overlay->old_vid_bo;
+	i915_gem_object_unpin_from_display_plane(overlay->old_vma);
+	i915_gem_object_put(overlay->old_vma->obj);
 
-	i915_gem_object_unpin_from_display_plane(overlay->old_vid_vma);
-	i915_gem_object_put(obj);
-
-	overlay->old_vid_bo = NULL;
+	overlay->old_vma = NULL;
 }
 
 static void intel_overlay_off_tail(struct intel_overlay *overlay)
 {
 	/* never have the overlay hw on without showing a frame */
-	if (WARN_ON(overlay->vid_vma))
+	if (WARN_ON(overlay->vma))
 		return;
 
-	i915_gem_object_unpin_from_display_plane(overlay->vid_vma);
-	i915_gem_object_put(overlay->vid_bo);
-	overlay->vid_vma = NULL;
-	overlay->vid_bo = NULL;
+	i915_gem_object_unpin_from_display_plane(overlay->vma);
+	i915_gem_object_put(overlay->vma->obj);
+	overlay->vma = NULL;
 
 	overlay->crtc->overlay = NULL;
 	overlay->crtc = NULL;
@@ -422,7 +418,7 @@ static int intel_overlay_release_old_vid(struct intel_overlay *overlay)
 	/* Only wait if there is actually an old frame to release to
 	 * guarantee forward progress.
 	 */
-	if (!overlay->old_vid_bo)
+	if (!overlay->old_vma)
 		return 0;
 
 	if (I915_READ(ISR) & I915_OVERLAY_PLANE_FLIP_PENDING_INTERRUPT) {
@@ -455,7 +451,7 @@ static int intel_overlay_release_old_vid(struct intel_overlay *overlay)
 	intel_overlay_release_old_vid_tail(overlay);
 
 
-	i915_gem_track_fb(overlay->old_vid_bo, NULL,
+	i915_gem_track_fb(overlay->old_vma->obj, NULL,
 			  INTEL_FRONTBUFFER_OVERLAY(overlay->crtc->pipe));
 	return 0;
 }
@@ -841,13 +837,11 @@ static int intel_overlay_do_put_image(struct intel_overlay *overlay,
 	if (ret)
 		goto out_unpin;
 
-	i915_gem_track_fb(overlay->vid_bo, new_bo,
+	i915_gem_track_fb(overlay->vma->obj, new_bo,
 			  INTEL_FRONTBUFFER_OVERLAY(pipe));
 
-	overlay->old_vid_bo = overlay->vid_bo;
-	overlay->old_vid_vma = overlay->vid_vma;
-	overlay->vid_bo = new_bo;
-	overlay->vid_vma = vma;
+	overlay->old_vma = overlay->vma;
+	overlay->vma = vma;
 
 	intel_frontbuffer_flip(dev_priv->dev, INTEL_FRONTBUFFER_OVERLAY(pipe));
 
-- 
2.8.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 58+ messages in thread

* ✗ Ro.CI.BAT: failure for series starting with [01/38] drm/i915: Combine loops within i915_gem_evict_something
  2016-06-03 16:55 Tracking VMA Chris Wilson
                   ` (37 preceding siblings ...)
  2016-06-03 16:55 ` [PATCH 38/38] drm/i915/overlay: Use VMA as the primary tracker for images Chris Wilson
@ 2016-06-06 10:42 ` Patchwork
  38 siblings, 0 replies; 58+ messages in thread
From: Patchwork @ 2016-06-06 10:42 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

== Series Details ==

Series: series starting with [01/38] drm/i915: Combine loops within i915_gem_evict_something
URL   : https://patchwork.freedesktop.org/series/8250/
State : failure

== Summary ==

Applying: drm/i915: Combine loops within i915_gem_evict_something
fatal: sha1 information is lacking or useless (drivers/gpu/drm/i915/i915_gem_evict.c).
error: could not build fake ancestor
Patch failed at 0001 drm/i915: Combine loops within i915_gem_evict_something
The copy of the patch that failed is found in: .git/rebase-apply/patch
When you have resolved this problem, run "git am --continue".
If you prefer to skip this patch, run "git am --skip" instead.
To restore the original branch and stop patching, run "git am --abort".

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH 04/38] drm/i915: Remove request retirement before each batch
  2016-06-03 16:55 ` [PATCH 04/38] drm/i915: Remove request retirement before each batch Chris Wilson
@ 2016-06-06 13:40   ` Mika Kuoppala
  0 siblings, 0 replies; 58+ messages in thread
From: Mika Kuoppala @ 2016-06-06 13:40 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx

Chris Wilson <chris@chris-wilson.co.uk> writes:

> [ text/plain ]
> This reimplements the denial-of-service protection against igt from
>
> commit 227f782e4667fc622810bce8be8ccdeee45f89c2
> Author: Chris Wilson <chris@chris-wilson.co.uk>
> Date:   Thu May 15 10:41:42 2014 +0100
>
>     drm/i915: Retire requests before creating a new one
>
> and transfers the stall from before each batch into get_pages().
> The issue is that the stall is increasing latency between batches which
> is detrimental in some cases (especially coupled with execlists) to
> keeping the GPU well fed. 

I didn't check the claims about the latency benefits, but
overall the retiring when we need the pages instead
of retiring on each execbuffer makes sense to me, so

Reviewed-by: Mika Kuoppala <mika.kuoppala@intel.com>

>Also we have made the observation that retiring
> requests can of itself free objects (and requests) and therefore makes
> a good first step when shrinking.
>
> v2: Recycle objects prior to i915_gem_object_get_pages()
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>  drivers/gpu/drm/i915/i915_drv.h            | 1 -
>  drivers/gpu/drm/i915/i915_gem.c            | 9 ++++++---
>  drivers/gpu/drm/i915/i915_gem_execbuffer.c | 2 --
>  drivers/gpu/drm/i915/i915_gem_request.c    | 2 +-
>  4 files changed, 7 insertions(+), 7 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 09f6f0eecd96..a065325580d8 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -3065,7 +3065,6 @@ struct drm_i915_gem_request *
>  i915_gem_find_active_request(struct intel_engine_cs *engine);
>  
>  void i915_gem_retire_requests(struct drm_i915_private *dev_priv);
> -void i915_gem_retire_requests_ring(struct intel_engine_cs *engine);
>  
>  static inline u32 i915_reset_counter(struct i915_gpu_error *error)
>  {
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index a7aa465cb76d..19b8d2ea7698 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -1989,8 +1989,7 @@ err_pages:
>  int
>  i915_gem_object_get_pages(struct drm_i915_gem_object *obj)
>  {
> -	struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
> -	const struct drm_i915_gem_object_ops *ops = obj->ops;
> +	struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
>  	int ret;
>  
>  	if (obj->pages)
> @@ -2003,7 +2002,10 @@ i915_gem_object_get_pages(struct drm_i915_gem_object *obj)
>  
>  	BUG_ON(obj->pages_pin_count);
>  
> -	ret = ops->get_pages(obj);
> +	/* Recycle as many active objects as possible first */
> +	i915_gem_retire_requests(dev_priv);
> +
> +	ret = obj->ops->get_pages(obj);
>  	if (ret)
>  		return ret;
>  
> @@ -4161,6 +4163,7 @@ i915_gem_cleanup_engines(struct drm_device *dev)
>  static void
>  init_engine_lists(struct intel_engine_cs *engine)
>  {
> +	/* Early initialisation so that core GEM works during engine setup */
>  	INIT_LIST_HEAD(&engine->request_list);
>  }
>  
> diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> index 7b381358512e..1b19a36adedc 100644
> --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> @@ -751,8 +751,6 @@ i915_gem_execbuffer_reserve(struct intel_engine_cs *engine,
>  	bool has_fenced_gpu_access = INTEL_GEN(engine->i915) < 4;
>  	int retry;
>  
> -	i915_gem_retire_requests_ring(engine);
> -
>  	vm = list_first_entry(vmas, struct i915_vma, exec_list)->vm;
>  
>  	INIT_LIST_HEAD(&ordered_vmas);
> diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c
> index 38e5daecd8f5..59afc8e547c4 100644
> --- a/drivers/gpu/drm/i915/i915_gem_request.c
> +++ b/drivers/gpu/drm/i915/i915_gem_request.c
> @@ -734,7 +734,7 @@ int i915_wait_request(struct drm_i915_gem_request *req)
>  	return 0;
>  }
>  
> -void i915_gem_retire_requests_ring(struct intel_engine_cs *engine)
> +static void i915_gem_retire_requests_ring(struct intel_engine_cs *engine)
>  {
>  	struct drm_i915_gem_request *request, *next;
>  
> -- 
> 2.8.1
>
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/intel-gfx
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH 05/38] drm/i915: Remove i915_gem_execbuffer_retire_commands()
  2016-06-03 16:55 ` [PATCH 05/38] drm/i915: Remove i915_gem_execbuffer_retire_commands() Chris Wilson
@ 2016-06-06 14:26   ` Mika Kuoppala
  0 siblings, 0 replies; 58+ messages in thread
From: Mika Kuoppala @ 2016-06-06 14:26 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx

Chris Wilson <chris@chris-wilson.co.uk> writes:

> [ text/plain ]
> Move the single line to the callsite as the name is now misleading, and
> the purpose is solely to add the request to the execution queue.
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>

Reviewed-by: Mika Kuoppala <mika.kuoppala@intel.com>

> ---
>  drivers/gpu/drm/i915/i915_gem_execbuffer.c | 9 +--------
>  1 file changed, 1 insertion(+), 8 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> index 1b19a36adedc..40937a09855d 100644
> --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> @@ -1177,13 +1177,6 @@ i915_gem_execbuffer_move_to_active(struct list_head *vmas,
>  	}
>  }
>  
> -static void
> -i915_gem_execbuffer_retire_commands(struct i915_execbuffer_params *params)
> -{
> -	/* Add a breadcrumb for the completion of the batch buffer */
> -	__i915_add_request(params->request, params->batch_obj, true);
> -}
> -
>  static int
>  i915_reset_gen7_sol_offsets(struct drm_i915_gem_request *req)
>  {
> @@ -1677,7 +1670,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
>  
>  	ret = execbuf_submit(params, args, &eb->vmas);
>  err_request:
> -	i915_gem_execbuffer_retire_commands(params);
> +	__i915_add_request(params->request, params->batch_obj, ret == 0);
>  
>  err_batch_unpin:
>  	/*
> -- 
> 2.8.1
>
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/intel-gfx
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH 06/38] drm/i915: Pad GTT views of exec objects up to user specified size
  2016-06-03 16:55 ` [PATCH 06/38] drm/i915: Pad GTT views of exec objects up to user specified size Chris Wilson
@ 2016-06-08  9:41   ` Daniel Vetter
  2016-06-08 10:08     ` Chris Wilson
  0 siblings, 1 reply; 58+ messages in thread
From: Daniel Vetter @ 2016-06-08  9:41 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

On Fri, Jun 03, 2016 at 05:55:21PM +0100, Chris Wilson wrote:
> Our GPUs impose certain requirements upon buffers that depend upon how
> exactly they are used. Typically this is expressed as that they require
> a larger surface than would be naively computed by pitch * height.
> Normally such requirements are hidden away in the userspace driver, but
> when we accept pointers from strangers and later impose extra conditions
> on them, the original client allocator has no idea about the
> monstrosities in the GPU and we require the userspace driver to inform
> the kernel how many padding pages are required beyond the client
> allocation.
> 
> v2: Long time, no see
> v3: Try an anonymous union for uapi struct compatability
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Hm, where's the userspace for this? Commit message should elaborate imo a
bit more on what's going on here ...
-Daniel

> ---
>  drivers/gpu/drm/i915/i915_drv.h            |  6 ++-
>  drivers/gpu/drm/i915/i915_gem.c            | 82 +++++++++++++++---------------
>  drivers/gpu/drm/i915/i915_gem_execbuffer.c | 16 +++++-
>  include/uapi/drm/i915_drm.h                |  8 ++-
>  4 files changed, 65 insertions(+), 47 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index a065325580d8..9520adba33f6 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -2945,11 +2945,13 @@ void i915_gem_free_object(struct drm_gem_object *obj);
>  int __must_check
>  i915_gem_object_pin(struct drm_i915_gem_object *obj,
>  		    struct i915_address_space *vm,
> +		    uint64_t size,
>  		    uint32_t alignment,
>  		    uint64_t flags);
>  int __must_check
>  i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj,
>  			 const struct i915_ggtt_view *view,
> +			 uint64_t size,
>  			 uint32_t alignment,
>  			 uint64_t flags);
>  
> @@ -3209,8 +3211,8 @@ i915_gem_obj_ggtt_pin(struct drm_i915_gem_object *obj,
>  	struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
>  	struct i915_ggtt *ggtt = &dev_priv->ggtt;
>  
> -	return i915_gem_object_pin(obj, &ggtt->base,
> -				   alignment, flags | PIN_GLOBAL);
> +	return i915_gem_object_pin(obj, &ggtt->base, 0, alignment,
> +				   flags | PIN_GLOBAL);
>  }
>  
>  void i915_gem_object_ggtt_unpin_view(struct drm_i915_gem_object *obj,
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index 19b8d2ea7698..0f0101300b2b 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -1438,7 +1438,7 @@ int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
>  	}
>  
>  	/* Now pin it into the GTT if needed */
> -	ret = i915_gem_object_ggtt_pin(obj, &view, 0, PIN_MAPPABLE);
> +	ret = i915_gem_object_ggtt_pin(obj, &view, 0, 0, PIN_MAPPABLE);
>  	if (ret)
>  		goto unlock;
>  
> @@ -2678,21 +2678,20 @@ static struct i915_vma *
>  i915_gem_object_bind_to_vm(struct drm_i915_gem_object *obj,
>  			   struct i915_address_space *vm,
>  			   const struct i915_ggtt_view *ggtt_view,
> +			   uint64_t size,
>  			   unsigned alignment,
>  			   uint64_t flags)
>  {
>  	struct drm_device *dev = obj->base.dev;
>  	struct drm_i915_private *dev_priv = to_i915(dev);
> -	struct i915_ggtt *ggtt = &dev_priv->ggtt;
> -	u32 fence_alignment, unfenced_alignment;
> -	u32 search_flag, alloc_flag;
>  	u64 start, end;
> -	u64 size, fence_size;
> +	u32 search_flag, alloc_flag;
>  	struct i915_vma *vma;
>  	int ret;
>  
>  	if (i915_is_ggtt(vm)) {
> -		u32 view_size;
> +		u32 fence_size, fence_alignment, unfenced_alignment;
> +		u64 view_size;
>  
>  		if (WARN_ON(!ggtt_view))
>  			return ERR_PTR(-EINVAL);
> @@ -2710,48 +2709,39 @@ i915_gem_object_bind_to_vm(struct drm_i915_gem_object *obj,
>  								view_size,
>  								obj->tiling_mode,
>  								false);
> -		size = flags & PIN_MAPPABLE ? fence_size : view_size;
> +		size = max(size, view_size);
> +		if (flags & PIN_MAPPABLE)
> +			size = max_t(u64, size, fence_size);
> +
> +		if (alignment == 0)
> +			alignment = flags & PIN_MAPPABLE ? fence_alignment :
> +				unfenced_alignment;
> +		if (flags & PIN_MAPPABLE && alignment & (fence_alignment - 1)) {
> +			DRM_DEBUG("Invalid object (view type=%u) alignment requested %u\n",
> +				  ggtt_view ? ggtt_view->type : 0,
> +				  alignment);
> +			return ERR_PTR(-EINVAL);
> +		}
>  	} else {
> -		fence_size = i915_gem_get_gtt_size(dev,
> -						   obj->base.size,
> -						   obj->tiling_mode);
> -		fence_alignment = i915_gem_get_gtt_alignment(dev,
> -							     obj->base.size,
> -							     obj->tiling_mode,
> -							     true);
> -		unfenced_alignment =
> -			i915_gem_get_gtt_alignment(dev,
> -						   obj->base.size,
> -						   obj->tiling_mode,
> -						   false);
> -		size = flags & PIN_MAPPABLE ? fence_size : obj->base.size;
> +		size = max_t(u64, size, obj->base.size);
> +		alignment = 4096;
>  	}
>  
>  	start = flags & PIN_OFFSET_BIAS ? flags & PIN_OFFSET_MASK : 0;
>  	end = vm->total;
>  	if (flags & PIN_MAPPABLE)
> -		end = min_t(u64, end, ggtt->mappable_end);
> +		end = min_t(u64, end, dev_priv->ggtt.mappable_end);
>  	if (flags & PIN_ZONE_4G)
>  		end = min_t(u64, end, (1ULL << 32) - PAGE_SIZE);
>  
> -	if (alignment == 0)
> -		alignment = flags & PIN_MAPPABLE ? fence_alignment :
> -						unfenced_alignment;
> -	if (flags & PIN_MAPPABLE && alignment & (fence_alignment - 1)) {
> -		DRM_DEBUG("Invalid object (view type=%u) alignment requested %u\n",
> -			  ggtt_view ? ggtt_view->type : 0,
> -			  alignment);
> -		return ERR_PTR(-EINVAL);
> -	}
> -
>  	/* If binding the object/GGTT view requires more space than the entire
>  	 * aperture has, reject it early before evicting everything in a vain
>  	 * attempt to find space.
>  	 */
>  	if (size > end) {
> -		DRM_DEBUG("Attempting to bind an object (view type=%u) larger than the aperture: size=%llu > %s aperture=%llu\n",
> +		DRM_DEBUG("Attempting to bind an object (view type=%u) larger than the aperture: request=%llu [object=%zd] > %s aperture=%llu\n",
>  			  ggtt_view ? ggtt_view->type : 0,
> -			  size,
> +			  size, obj->base.size,
>  			  flags & PIN_MAPPABLE ? "mappable" : "total",
>  			  end);
>  		return ERR_PTR(-E2BIG);
> @@ -3243,7 +3233,7 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
>  	 * (e.g. libkms for the bootup splash), we have to ensure that we
>  	 * always use map_and_fenceable for all scanout buffers.
>  	 */
> -	ret = i915_gem_object_ggtt_pin(obj, view, alignment,
> +	ret = i915_gem_object_ggtt_pin(obj, view, 0, alignment,
>  				       view->type == I915_GGTT_VIEW_NORMAL ?
>  				       PIN_MAPPABLE : 0);
>  	if (ret)
> @@ -3393,12 +3383,17 @@ i915_gem_ring_throttle(struct drm_device *dev, struct drm_file *file)
>  }
>  
>  static bool
> -i915_vma_misplaced(struct i915_vma *vma, uint32_t alignment, uint64_t flags)
> +i915_vma_misplaced(struct i915_vma *vma,
> +		   uint64_t size,
> +		   uint32_t alignment,
> +		   uint64_t flags)
>  {
>  	struct drm_i915_gem_object *obj = vma->obj;
>  
> -	if (alignment &&
> -	    vma->node.start & (alignment - 1))
> +	if (vma->node.size < size)
> +		return true;
> +
> +	if (alignment && vma->node.start & (alignment - 1))
>  		return true;
>  
>  	if (flags & PIN_MAPPABLE && !obj->map_and_fenceable)
> @@ -3442,6 +3437,7 @@ static int
>  i915_gem_object_do_pin(struct drm_i915_gem_object *obj,
>  		       struct i915_address_space *vm,
>  		       const struct i915_ggtt_view *ggtt_view,
> +		       uint64_t size,
>  		       uint32_t alignment,
>  		       uint64_t flags)
>  {
> @@ -3469,7 +3465,7 @@ i915_gem_object_do_pin(struct drm_i915_gem_object *obj,
>  		if (WARN_ON(vma->pin_count == DRM_I915_GEM_OBJECT_MAX_PIN_COUNT))
>  			return -EBUSY;
>  
> -		if (i915_vma_misplaced(vma, alignment, flags)) {
> +		if (i915_vma_misplaced(vma, size, alignment, flags)) {
>  			WARN(vma->pin_count,
>  			     "bo is already pinned in %s with incorrect alignment:"
>  			     " offset=%08x %08x, req.alignment=%x, req.map_and_fenceable=%d,"
> @@ -3490,8 +3486,8 @@ i915_gem_object_do_pin(struct drm_i915_gem_object *obj,
>  
>  	bound = vma ? vma->bound : 0;
>  	if (vma == NULL || !drm_mm_node_allocated(&vma->node)) {
> -		vma = i915_gem_object_bind_to_vm(obj, vm, ggtt_view, alignment,
> -						 flags);
> +		vma = i915_gem_object_bind_to_vm(obj, vm, ggtt_view,
> +						 size, alignment, flags);
>  		if (IS_ERR(vma))
>  			return PTR_ERR(vma);
>  	} else {
> @@ -3513,17 +3509,19 @@ i915_gem_object_do_pin(struct drm_i915_gem_object *obj,
>  int
>  i915_gem_object_pin(struct drm_i915_gem_object *obj,
>  		    struct i915_address_space *vm,
> +		    uint64_t size,
>  		    uint32_t alignment,
>  		    uint64_t flags)
>  {
>  	return i915_gem_object_do_pin(obj, vm,
>  				      i915_is_ggtt(vm) ? &i915_ggtt_view_normal : NULL,
> -				      alignment, flags);
> +				      size, alignment, flags);
>  }
>  
>  int
>  i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj,
>  			 const struct i915_ggtt_view *view,
> +			 uint64_t size,
>  			 uint32_t alignment,
>  			 uint64_t flags)
>  {
> @@ -3534,7 +3532,7 @@ i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj,
>  	BUG_ON(!view);
>  
>  	return i915_gem_object_do_pin(obj, &ggtt->base, view,
> -				      alignment, flags | PIN_GLOBAL);
> +				      size, alignment, flags | PIN_GLOBAL);
>  }
>  
>  void
> diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> index 40937a09855d..c1e7ee212e7e 100644
> --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> @@ -652,10 +652,14 @@ i915_gem_execbuffer_reserve_vma(struct i915_vma *vma,
>  			flags |= PIN_HIGH;
>  	}
>  
> -	ret = i915_gem_object_pin(obj, vma->vm, entry->alignment, flags);
> +	ret = i915_gem_object_pin(obj, vma->vm,
> +				  entry->pad_to_size,
> +				  entry->alignment,
> +				  flags);
>  	if ((ret == -ENOSPC  || ret == -E2BIG) &&
>  	    only_mappable_for_reloc(entry->flags))
>  		ret = i915_gem_object_pin(obj, vma->vm,
> +					  entry->pad_to_size,
>  					  entry->alignment,
>  					  flags & ~PIN_MAPPABLE);
>  	if (ret)
> @@ -718,6 +722,9 @@ eb_vma_misplaced(struct i915_vma *vma)
>  	    vma->node.start & (entry->alignment - 1))
>  		return true;
>  
> +	if (vma->node.size < entry->pad_to_size)
> +		return true;
> +
>  	if (entry->flags & EXEC_OBJECT_PINNED &&
>  	    vma->node.start != entry->offset)
>  		return true;
> @@ -1058,6 +1065,13 @@ validate_exec_list(struct drm_device *dev,
>  		if (exec[i].alignment && !is_power_of_2(exec[i].alignment))
>  			return -EINVAL;
>  
> +		/* pad_to_size was once a reserved field, so sanitize it */
> +		if (exec[i].flags & EXEC_OBJECT_PAD_TO_SIZE) {
> +			if (offset_in_page(exec[i].pad_to_size))
> +				return -EINVAL;
> +		} else
> +			exec[i].pad_to_size = 0;
> +
>  		/* First check for malicious input causing overflow in
>  		 * the worst case where we need to allocate the entire
>  		 * relocation tree as a single array.
> diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
> index d6c668e58426..3b861746ba7a 100644
> --- a/include/uapi/drm/i915_drm.h
> +++ b/include/uapi/drm/i915_drm.h
> @@ -701,10 +701,14 @@ struct drm_i915_gem_exec_object2 {
>  #define EXEC_OBJECT_WRITE	(1<<2)
>  #define EXEC_OBJECT_SUPPORTS_48B_ADDRESS (1<<3)
>  #define EXEC_OBJECT_PINNED	(1<<4)
> -#define __EXEC_OBJECT_UNKNOWN_FLAGS -(EXEC_OBJECT_PINNED<<1)
> +#define EXEC_OBJECT_PAD_TO_SIZE	(1<<5)
> +#define __EXEC_OBJECT_UNKNOWN_FLAGS -(EXEC_OBJECT_PAD_TO_SIZE<<1)
>  	__u64 flags;
>  
> -	__u64 rsvd1;
> +	union {
> +		__u64 rsvd1;
> +		__u64 pad_to_size;
> +	};
>  	__u64 rsvd2;
>  };
>  
> -- 
> 2.8.1
> 
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/intel-gfx

-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH 10/38] drm/i915: Remove highly confusing i915_gem_obj_ggtt_pin()
  2016-06-03 16:55 ` [PATCH 10/38] drm/i915: Remove highly confusing i915_gem_obj_ggtt_pin() Chris Wilson
@ 2016-06-08  9:43   ` Daniel Vetter
  2016-06-08 12:58     ` Chris Wilson
  0 siblings, 1 reply; 58+ messages in thread
From: Daniel Vetter @ 2016-06-08  9:43 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

On Fri, Jun 03, 2016 at 05:55:25PM +0100, Chris Wilson wrote:
> Since i915_gem_obj_ggtt_pin() is an idiom breaking curry function for
> i915_gem_object_ggtt_pin(), spare us the confustion and remove it.
> Removing it now simplifies later patches to change the i915_vma_pin()
> (and friends) interface.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>

Diff looks like accidentally squashed in speed-up to help gcc along with
bitfields in vma. Needs to be unsquashed.
-Daniel
> ---
>  drivers/gpu/drm/i915/i915_drv.h              | 35 ++++++++-------------
>  drivers/gpu/drm/i915/i915_gem.c              | 46 +++++++++++++--------------
>  drivers/gpu/drm/i915/i915_gem_context.c      |  5 ++-
>  drivers/gpu/drm/i915/i915_gem_execbuffer.c   | 10 +++---
>  drivers/gpu/drm/i915/i915_gem_gtt.h          | 47 +++++++++++++++-------------
>  drivers/gpu/drm/i915/i915_gem_render_state.c |  2 +-
>  drivers/gpu/drm/i915/i915_guc_submission.c   |  4 +--
>  drivers/gpu/drm/i915/intel_guc_loader.c      |  2 +-
>  drivers/gpu/drm/i915/intel_lrc.c             |  8 +++--
>  drivers/gpu/drm/i915/intel_overlay.c         |  3 +-
>  drivers/gpu/drm/i915/intel_ringbuffer.c      | 16 +++++-----
>  11 files changed, 89 insertions(+), 89 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index f537d8fc5e0f..861d132b2fe4 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -2934,32 +2934,32 @@ void i915_gem_free_object(struct drm_gem_object *obj);
>  int __must_check
>  i915_vma_pin(struct i915_vma *vma, u64 size, u64 alignment, u64 flags);
>  /* Flags used by pin/bind&friends. */
> -#define PIN_MAPPABLE	(1<<0)
> -#define PIN_NONBLOCK	(1<<1)
> -#define PIN_GLOBAL	(1<<2)
> -#define PIN_OFFSET_BIAS	(1<<3)
> -#define PIN_USER	(1<<4)
> -#define PIN_UPDATE	(1<<5)
> -#define PIN_ZONE_4G	(1<<6)
> -#define PIN_HIGH	(1<<7)
> -#define PIN_OFFSET_FIXED	(1<<8)
> +#define PIN_GLOBAL	(1<<0)
> +#define PIN_USER	(1<<1)
> +#define PIN_UPDATE	(1<<2)
> +#define PIN_MAPPABLE	(1<<3)
> +#define PIN_ZONE_4G	(1<<4)
> +#define PIN_NONBLOCK	(1<<5)
> +#define PIN_HIGH	(1<<6)
> +#define PIN_OFFSET_BIAS	(1<<7)
> +#define PIN_OFFSET_FIXED (1<<8)
>  #define PIN_OFFSET_MASK (~4095)
>  
>  static inline void __i915_vma_pin(struct i915_vma *vma)
>  {
>  	GEM_BUG_ON(vma->pin_count == DRM_I915_GEM_OBJECT_MAX_PIN_COUNT);
> -	vma->pin_count++;
> +	vma->flags++;
>  }
>  
>  static inline bool i915_vma_is_pinned(struct i915_vma *vma)
>  {
> -	return vma->pin_count;
> +	return vma->flags & DRM_I915_GEM_OBJECT_MAX_PIN_COUNT;
>  }
>  
>  static inline void __i915_vma_unpin(struct i915_vma *vma)
>  {
>  	GEM_BUG_ON(!i915_vma_is_pinned(vma));
> -	vma->pin_count--;
> +	vma->flags--;
>  }
>  
>  static inline void i915_vma_unpin(struct i915_vma *vma)
> @@ -2972,7 +2972,7 @@ int __must_check
>  i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj,
>  			 const struct i915_ggtt_view *view,
>  			 uint64_t size,
> -			 uint32_t alignment,
> +			 uint64_t alignment,
>  			 uint64_t flags);
>  
>  int i915_vma_bind(struct i915_vma *vma, enum i915_cache_level cache_level,
> @@ -3223,15 +3223,6 @@ static inline bool i915_gem_obj_ggtt_bound(struct drm_i915_gem_object *obj)
>  unsigned long
>  i915_gem_obj_ggtt_size(struct drm_i915_gem_object *obj);
>  
> -static inline int __must_check
> -i915_gem_obj_ggtt_pin(struct drm_i915_gem_object *obj,
> -		      uint32_t alignment,
> -		      unsigned flags)
> -{
> -	return i915_gem_object_ggtt_pin(obj, &i915_ggtt_view_normal,
> -					0, alignment, flags);
> -}
> -
>  void i915_gem_object_ggtt_unpin_view(struct drm_i915_gem_object *obj,
>  				     const struct i915_ggtt_view *view);
>  static inline void
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index 71a32a9f9858..53776a071ce7 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -772,7 +772,9 @@ i915_gem_gtt_pwrite_fast(struct drm_device *dev,
>  	char __user *user_data;
>  	int page_offset, page_length, ret;
>  
> -	ret = i915_gem_obj_ggtt_pin(obj, 0, PIN_MAPPABLE | PIN_NONBLOCK);
> +	ret = i915_gem_object_ggtt_pin(obj, NULL,
> +				       0, 0,
> +				       PIN_MAPPABLE | PIN_NONBLOCK);
>  	if (ret)
>  		goto out;
>  
> @@ -3408,32 +3410,35 @@ void __i915_vma_set_map_and_fenceable(struct i915_vma *vma)
>  int
>  i915_vma_pin(struct i915_vma *vma, u64 size, u64 alignment, u64 flags)
>  {
> -	unsigned bound = vma->bound;
> +	unsigned bound;
>  	int ret;
>  
>  	GEM_BUG_ON((flags & (PIN_GLOBAL | PIN_USER)) == 0);
>  	GEM_BUG_ON((flags & PIN_GLOBAL) && !vma->is_ggtt);
>  
> -	if (WARN_ON(vma->pin_count == DRM_I915_GEM_OBJECT_MAX_PIN_COUNT))
> -		return -EBUSY;
> -
>  	/* Pin early to prevent the shrinker/eviction logic from destroying
>  	 * our vma as we insert and bind.
>  	 */
> -	__i915_vma_pin(vma);
> +	bound = vma->flags++;
> +	if (WARN_ON((bound & 0xf) == (DRM_I915_GEM_OBJECT_MAX_PIN_COUNT-1))) {
> +		ret = -EBUSY;
> +		goto err;
> +	}
>  
> -	if (!bound) {
> +	if ((bound & 0xff) == 0) {
>  		ret = i915_vma_insert(vma, size, alignment, flags);
>  		if (ret)
>  			goto err;
>  	}
>  
> -	ret = i915_vma_bind(vma, vma->obj->cache_level, flags);
> -	if (ret)
> -		goto err;
> +	if (~(bound >> 4) & (flags & (GLOBAL_BIND | LOCAL_BIND))) {
> +		ret = i915_vma_bind(vma, vma->obj->cache_level, flags);
> +		if (ret)
> +			goto err;
>  
> -	if ((bound ^ vma->bound) & GLOBAL_BIND)
> -		__i915_vma_set_map_and_fenceable(vma);
> +		if ((bound ^ vma->flags) & (GLOBAL_BIND << 4))
> +			__i915_vma_set_map_and_fenceable(vma);
> +	}
>  
>  	GEM_BUG_ON(i915_vma_misplaced(vma, size, alignment, flags));
>  	return 0;
> @@ -3447,13 +3452,14 @@ int
>  i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj,
>  			 const struct i915_ggtt_view *view,
>  			 uint64_t size,
> -			 uint32_t alignment,
> +			 uint64_t alignment,
>  			 uint64_t flags)
>  {
>  	struct i915_vma *vma;
>  	int ret;
>  
> -	BUG_ON(!view);
> +	if (view == NULL)
> +		view = &i915_ggtt_view_normal;
>  
>  	vma = i915_gem_obj_lookup_or_create_ggtt_vma(obj, view);
>  	if (IS_ERR(vma))
> @@ -3465,11 +3471,11 @@ i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj,
>  
>  		WARN(vma->pin_count,
>  		     "bo is already pinned in ggtt with incorrect alignment:"
> -		     " offset=%08x %08x, req.alignment=%x, req.map_and_fenceable=%d,"
> +		     " offset=%08x %08x, req.alignment=%llx, req.map_and_fenceable=%d,"
>  		     " obj->map_and_fenceable=%d\n",
>  		     upper_32_bits(vma->node.start),
>  		     lower_32_bits(vma->node.start),
> -		     alignment,
> +		     (long long)alignment,
>  		     !!(flags & PIN_MAPPABLE),
>  		     obj->map_and_fenceable);
>  		ret = i915_vma_unbind(vma);
> @@ -3484,13 +3490,7 @@ void
>  i915_gem_object_ggtt_unpin_view(struct drm_i915_gem_object *obj,
>  				const struct i915_ggtt_view *view)
>  {
> -	struct i915_vma *vma = i915_gem_obj_to_ggtt_view(obj, view);
> -
> -	GEM_BUG_ON(!vma);
> -	WARN_ON(i915_vma_is_pinned(vma));
> -	WARN_ON(!i915_gem_obj_ggtt_bound_view(obj, view));
> -
> -	__i915_vma_unpin(vma);
> +	i915_vma_unpin(i915_gem_obj_to_ggtt_view(obj, view));
>  }
>  
>  int
> diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
> index 5ed91406d4e9..c9b8c2c62828 100644
> --- a/drivers/gpu/drm/i915/i915_gem_context.c
> +++ b/drivers/gpu/drm/i915/i915_gem_context.c
> @@ -722,9 +722,8 @@ static int do_rcs_switch(struct drm_i915_gem_request *req)
>  		return 0;
>  
>  	/* Trying to pin first makes error handling easier. */
> -	ret = i915_gem_obj_ggtt_pin(to->engine[RCS].state,
> -				    to->ggtt_alignment,
> -				    0);
> +	ret = i915_gem_object_ggtt_pin(to->engine[RCS].state, NULL, 0,
> +				       to->ggtt_alignment, 0);
>  	if (ret)
>  		return ret;
>  
> diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> index cc9c0e4073ff..69bf73b51df9 100644
> --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> @@ -34,10 +34,10 @@
>  #include <linux/dma_remapping.h>
>  #include <linux/uaccess.h>
>  
> -#define  __EXEC_OBJECT_HAS_PIN (1<<31)
> -#define  __EXEC_OBJECT_HAS_FENCE (1<<30)
> -#define  __EXEC_OBJECT_NEEDS_MAP (1<<29)
> -#define  __EXEC_OBJECT_NEEDS_BIAS (1<<28)
> +#define  __EXEC_OBJECT_HAS_PIN (1U<<31)
> +#define  __EXEC_OBJECT_HAS_FENCE (1U<<30)
> +#define  __EXEC_OBJECT_NEEDS_MAP (1U<<29)
> +#define  __EXEC_OBJECT_NEEDS_BIAS (1U<<28)
>  
>  #define BATCH_OFFSET_BIAS (256*1024)
>  
> @@ -1263,7 +1263,7 @@ i915_gem_execbuffer_parse(struct intel_engine_cs *engine,
>  	if (ret)
>  		goto err;
>  
> -	ret = i915_gem_obj_ggtt_pin(shadow_batch_obj, 0, 0);
> +	ret = i915_gem_object_ggtt_pin(shadow_batch_obj, NULL, 0, 0, 0);
>  	if (ret)
>  		goto err;
>  
> diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h
> index 2bd8ec7e1948..5655358a60e1 100644
> --- a/drivers/gpu/drm/i915/i915_gem_gtt.h
> +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h
> @@ -184,13 +184,30 @@ struct i915_vma {
>  
>  	struct i915_gem_active last_read[I915_NUM_ENGINES];
>  
> -	/** Flags and address space this VMA is bound to */
> +	union {
> +		struct {
> +			/**
> +			 * How many users have pinned this object in GTT space. The following
> +			 * users can each hold at most one reference: pwrite/pread, execbuffer
> +			 * (objects are not allowed multiple times for the same batchbuffer),
> +			 * and the framebuffer code. When switching/pageflipping, the
> +			 * framebuffer code has at most two buffers pinned per crtc.
> +			 *
> +			 * In the worst case this is 1 + 1 + 1 + 2*2 = 7. That would fit into 3
> +			 * bits with absolutely no headroom. So use 4 bits. */
> +			unsigned int pin_count : 4;
> +#define DRM_I915_GEM_OBJECT_MAX_PIN_COUNT 0xf
> +
> +			/** Flags and address space this VMA is bound to */
>  #define GLOBAL_BIND	(1<<0)
>  #define LOCAL_BIND	(1<<1)
> -	unsigned int bound : 4;
> -	unsigned int active : I915_NUM_ENGINES;
> -	bool is_ggtt : 1;
> -	bool closed : 1;
> +			unsigned int bound : 4;
> +			unsigned int active : I915_NUM_ENGINES;
> +			bool is_ggtt : 1;
> +			bool closed : 1;
> +		};
> +		unsigned int flags;
> +	};
>  
>  	/**
>  	 * Support different GGTT views into the same object.
> @@ -215,39 +232,27 @@ struct i915_vma {
>  	struct hlist_node exec_node;
>  	unsigned long exec_handle;
>  	struct drm_i915_gem_exec_object2 *exec_entry;
> -
> -	/**
> -	 * How many users have pinned this object in GTT space. The following
> -	 * users can each hold at most one reference: pwrite/pread, execbuffer
> -	 * (objects are not allowed multiple times for the same batchbuffer),
> -	 * and the framebuffer code. When switching/pageflipping, the
> -	 * framebuffer code has at most two buffers pinned per crtc.
> -	 *
> -	 * In the worst case this is 1 + 1 + 1 + 2*2 = 7. That would fit into 3
> -	 * bits with absolutely no headroom. So use 4 bits. */
> -	unsigned int pin_count:4;
> -#define DRM_I915_GEM_OBJECT_MAX_PIN_COUNT 0xf
>  };
>  
>  static inline bool i915_vma_is_active(const struct i915_vma *vma)
>  {
> -	return vma->active;
> +	return vma->flags & (((1 << I915_NUM_ENGINES) - 1) << 8);
>  }
>  
>  static inline void i915_vma_set_active(struct i915_vma *vma, unsigned engine)
>  {
> -	vma->active |= 1 << engine;
> +	vma->flags |= 0x100 << engine;
>  }
>  
>  static inline void i915_vma_unset_active(struct i915_vma *vma, unsigned engine)
>  {
> -	vma->active &= ~(1 << engine);
> +	vma->flags &= ~(0x100 << engine);
>  }
>  
>  static inline bool i915_vma_has_active_engine(const struct i915_vma *vma,
>  					      unsigned engine)
>  {
> -	return vma->active & (1 << engine);
> +	return vma->flags & (0x100 << engine);
>  }
>  
>  struct i915_page_dma {
> diff --git a/drivers/gpu/drm/i915/i915_gem_render_state.c b/drivers/gpu/drm/i915/i915_gem_render_state.c
> index c0abe9a2210f..4cf82697b3db 100644
> --- a/drivers/gpu/drm/i915/i915_gem_render_state.c
> +++ b/drivers/gpu/drm/i915/i915_gem_render_state.c
> @@ -72,7 +72,7 @@ static int render_state_init(struct render_state *so,
>  	if (IS_ERR(so->obj))
>  		return PTR_ERR(so->obj);
>  
> -	ret = i915_gem_obj_ggtt_pin(so->obj, 4096, 0);
> +	ret = i915_gem_object_ggtt_pin(so->obj, NULL, 0, 0, 0);
>  	if (ret)
>  		goto free_gem;
>  
> diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c
> index cc4792df249d..63ef34c78494 100644
> --- a/drivers/gpu/drm/i915/i915_guc_submission.c
> +++ b/drivers/gpu/drm/i915/i915_guc_submission.c
> @@ -613,8 +613,8 @@ static struct drm_i915_gem_object *gem_allocate_guc_obj(struct drm_device *dev,
>  		return NULL;
>  	}
>  
> -	if (i915_gem_obj_ggtt_pin(obj, PAGE_SIZE,
> -			PIN_OFFSET_BIAS | GUC_WOPCM_TOP)) {
> +	if (i915_gem_object_ggtt_pin(obj, NULL, 0, PAGE_SIZE,
> +				     PIN_OFFSET_BIAS | GUC_WOPCM_TOP)) {
>  		i915_gem_object_put(obj);
>  		return NULL;
>  	}
> diff --git a/drivers/gpu/drm/i915/intel_guc_loader.c b/drivers/gpu/drm/i915/intel_guc_loader.c
> index 74a5f11a5689..be93b458968a 100644
> --- a/drivers/gpu/drm/i915/intel_guc_loader.c
> +++ b/drivers/gpu/drm/i915/intel_guc_loader.c
> @@ -321,7 +321,7 @@ static int guc_ucode_xfer(struct drm_i915_private *dev_priv)
>  		return ret;
>  	}
>  
> -	ret = i915_gem_obj_ggtt_pin(guc_fw->guc_fw_obj, 0, 0);
> +	ret = i915_gem_object_ggtt_pin(guc_fw->guc_fw_obj, NULL, 0, 0, 0);
>  	if (ret) {
>  		DRM_DEBUG_DRIVER("pin failed %d\n", ret);
>  		return ret;
> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> index 964108cbb9c0..6cdc421fdc37 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/intel_lrc.c
> @@ -774,8 +774,9 @@ static int intel_lr_context_pin(struct i915_gem_context *ctx,
>  	if (ce->pin_count++)
>  		return 0;
>  
> -	ret = i915_gem_obj_ggtt_pin(ce->state, GEN8_LR_CONTEXT_ALIGN,
> -				    PIN_OFFSET_BIAS | GUC_WOPCM_TOP);
> +	ret = i915_gem_object_ggtt_pin(ce->state, NULL,
> +				       0, GEN8_LR_CONTEXT_ALIGN,
> +				       PIN_OFFSET_BIAS | GUC_WOPCM_TOP);
>  	if (ret)
>  		goto err;
>  
> @@ -1154,7 +1155,8 @@ static int lrc_setup_wa_ctx_obj(struct intel_engine_cs *engine, u32 size)
>  		return ret;
>  	}
>  
> -	ret = i915_gem_obj_ggtt_pin(engine->wa_ctx.obj, PAGE_SIZE, 0);
> +	ret = i915_gem_object_ggtt_pin(engine->wa_ctx.obj, NULL,
> +				       0, PAGE_SIZE, 0);
>  	if (ret) {
>  		DRM_DEBUG_DRIVER("pin LRC WA ctx backing obj failed: %d\n",
>  				 ret);
> diff --git a/drivers/gpu/drm/i915/intel_overlay.c b/drivers/gpu/drm/i915/intel_overlay.c
> index 5f645ad2babd..9b0fb7e23cbb 100644
> --- a/drivers/gpu/drm/i915/intel_overlay.c
> +++ b/drivers/gpu/drm/i915/intel_overlay.c
> @@ -1412,7 +1412,8 @@ void intel_setup_overlay(struct drm_i915_private *dev_priv)
>  		}
>  		overlay->flip_addr = reg_bo->phys_handle->busaddr;
>  	} else {
> -		ret = i915_gem_obj_ggtt_pin(reg_bo, PAGE_SIZE, PIN_MAPPABLE);
> +		ret = i915_gem_object_ggtt_pin(reg_bo, NULL,
> +					       0, PAGE_SIZE, PIN_MAPPABLE);
>  		if (ret) {
>  			DRM_ERROR("failed to pin overlay register bo\n");
>  			goto out_free_bo;
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
> index d63e4fdc60de..f86039455c5a 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.c
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
> @@ -648,7 +648,7 @@ int intel_init_pipe_control(struct intel_engine_cs *engine, int size)
>  		goto err;
>  	}
>  
> -	ret = i915_gem_obj_ggtt_pin(obj, 4096, PIN_HIGH);
> +	ret = i915_gem_object_ggtt_pin(obj, NULL, 0, 4096, PIN_HIGH);
>  	if (ret)
>  		goto err_unref;
>  
> @@ -1816,7 +1816,7 @@ static int init_status_page(struct intel_engine_cs *engine)
>  			 * actualy map it).
>  			 */
>  			flags |= PIN_MAPPABLE;
> -		ret = i915_gem_obj_ggtt_pin(obj, 4096, flags);
> +		ret = i915_gem_object_ggtt_pin(obj, NULL, 0, 4096, flags);
>  		if (ret) {
>  err_unref:
>  			i915_gem_object_put(obj);
> @@ -1863,7 +1863,7 @@ int intel_ring_pin(struct intel_ring *ring)
>  	int ret;
>  
>  	if (HAS_LLC(dev_priv) && !obj->stolen) {
> -		ret = i915_gem_obj_ggtt_pin(obj, PAGE_SIZE, flags);
> +		ret = i915_gem_object_ggtt_pin(obj, NULL, 0, PAGE_SIZE, flags);
>  		if (ret)
>  			return ret;
>  
> @@ -1877,8 +1877,8 @@ int intel_ring_pin(struct intel_ring *ring)
>  			goto err_unpin;
>  		}
>  	} else {
> -		ret = i915_gem_obj_ggtt_pin(obj, PAGE_SIZE,
> -					    flags | PIN_MAPPABLE);
> +		ret = i915_gem_object_ggtt_pin(obj, NULL, 0, PAGE_SIZE,
> +					       flags | PIN_MAPPABLE);
>  		if (ret)
>  			return ret;
>  
> @@ -2007,7 +2007,8 @@ static int intel_ring_context_pin(struct i915_gem_context *ctx,
>  		return 0;
>  
>  	if (ce->state) {
> -		ret = i915_gem_obj_ggtt_pin(ce->state, ctx->ggtt_alignment, 0);
> +		ret = i915_gem_object_ggtt_pin(ce->state, NULL, 0,
> +					       ctx->ggtt_alignment, 0);
>  		if (ret)
>  			goto error;
>  	}
> @@ -2574,7 +2575,8 @@ int intel_init_render_ring_buffer(struct drm_device *dev)
>  				i915.semaphores = 0;
>  			} else {
>  				i915_gem_object_set_cache_level(obj, I915_CACHE_LLC);
> -				ret = i915_gem_obj_ggtt_pin(obj, 0, PIN_NONBLOCK);
> +				ret = i915_gem_object_ggtt_pin(obj, NULL,
> +							       0, 0, 0);
>  				if (ret != 0) {
>  					i915_gem_object_put(obj);
>  					DRM_ERROR("Failed to pin semaphore bo. Disabling semaphores\n");
> -- 
> 2.8.1
> 
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/intel-gfx

-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH 13/38] drm/i915: Move obj->active:5 to obj->flags
  2016-06-03 16:55 ` [PATCH 13/38] drm/i915: Move obj->active:5 to obj->flags Chris Wilson
@ 2016-06-08  9:53   ` Daniel Vetter
  0 siblings, 0 replies; 58+ messages in thread
From: Daniel Vetter @ 2016-06-08  9:53 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

On Fri, Jun 03, 2016 at 05:55:28PM +0100, Chris Wilson wrote:
> We are motivated to avoid using a bitfield for obj->active for a couple
> of reasons. Firstly, we wish to document our lockless read of obj->active
> using READ_ONCE inside i915_gem_busy_ioctl() and that requires an
> integral type (i.e. not a bitfield). Secondly, gcc produces abysmal code
> when presented with a bitfield and that shows up high on the profiles of
> request tracking (mainly due to excess memory traffic as it converts
> the bitfield to a register and back and generates frequent AGI in the
> process).

AGI = address generator interlock, I guess gcc first dynamically computes
the address, then loads the right blocks, frobs them and stores the
blocks? Please elaborate (in the commit message), but if it's indeed this
silly this makes sense.
-Daniel
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>  drivers/gpu/drm/i915/i915_debugfs.c        |  2 +-
>  drivers/gpu/drm/i915/i915_drv.h            | 31 +++++++++++++++++++++++++++++-
>  drivers/gpu/drm/i915/i915_gem.c            | 16 +++++++--------
>  drivers/gpu/drm/i915/i915_gem_execbuffer.c | 10 +++++-----
>  drivers/gpu/drm/i915/i915_gem_shrinker.c   |  5 +++--
>  drivers/gpu/drm/i915/i915_gem_userptr.c    |  2 +-
>  6 files changed, 48 insertions(+), 18 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
> index 355bbf895c22..9154919fdd56 100644
> --- a/drivers/gpu/drm/i915/i915_debugfs.c
> +++ b/drivers/gpu/drm/i915/i915_debugfs.c
> @@ -91,7 +91,7 @@ static int i915_capabilities(struct seq_file *m, void *data)
>  
>  static char get_active_flag(struct drm_i915_gem_object *obj)
>  {
> -	return obj->active ? '*' : ' ';
> +	return i915_gem_object_is_active(obj) ? '*' : ' ';
>  }
>  
>  static char get_pin_flag(struct drm_i915_gem_object *obj)
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 236ade61cade..e72b7f35a98e 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -2136,12 +2136,16 @@ struct drm_i915_gem_object {
>  
>  	struct list_head batch_pool_link;
>  
> +	unsigned long flags;
>  	/**
>  	 * This is set if the object is on the active lists (has pending
>  	 * rendering and so a non-zero seqno), and is not set if it i s on
>  	 * inactive (ready to be unbound) list.
>  	 */
> -	unsigned int active:I915_NUM_ENGINES;
> +#define I915_BO_ACTIVE_SHIFT 0
> +#define I915_BO_ACTIVE_MASK ((1 << I915_NUM_ENGINES) - 1)
> +#define __I915_BO_ACTIVE(bo) \
> +	((READ_ONCE((bo)->flags) >> I915_BO_ACTIVE_SHIFT) & I915_BO_ACTIVE_MASK)
>  
>  	/**
>  	 * This is set if the object has been written to since last bound
> @@ -2288,6 +2292,31 @@ i915_gem_object_put_unlocked(struct drm_i915_gem_object *obj)
>  }
>  __deprecated extern void drm_gem_object_unreference_unlocked(struct drm_gem_object *);
>  
> +static inline unsigned long
> +i915_gem_object_is_active(const struct drm_i915_gem_object *obj)
> +{
> +	return (obj->flags >> I915_BO_ACTIVE_SHIFT) & I915_BO_ACTIVE_MASK;
> +}
> +
> +static inline void
> +i915_gem_object_set_active(struct drm_i915_gem_object *obj, int engine)
> +{
> +	obj->flags |= 1 << (engine + I915_BO_ACTIVE_SHIFT);
> +}
> +
> +static inline void
> +i915_gem_object_unset_active(struct drm_i915_gem_object *obj, int engine)
> +{
> +	obj->flags &= ~(1 << (engine + I915_BO_ACTIVE_SHIFT));
> +}
> +
> +static inline bool
> +i915_gem_object_has_active_engine(const struct drm_i915_gem_object *obj,
> +				  int engine)
> +{
> +	return obj->flags & (1 << (engine + I915_BO_ACTIVE_SHIFT));
> +}
> +
>  /*
>   * Optimised SGL iterator for GEM objects
>   */
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index 05425ae7c8a8..a8279a598c4b 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -1126,7 +1126,7 @@ i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj,
>  
>  	lockdep_assert_held(&obj->base.dev->struct_mutex);
>  
> -	active_mask = obj->active;
> +	active_mask = i915_gem_object_is_active(obj);
>  	if (!active_mask)
>  		return 0;
>  
> @@ -1165,7 +1165,7 @@ i915_gem_object_wait_rendering__nonblocking(struct drm_i915_gem_object *obj,
>  	BUG_ON(!mutex_is_locked(&dev->struct_mutex));
>  	BUG_ON(!dev_priv->mm.interruptible);
>  
> -	active_mask = obj->active;
> +	active_mask = i915_gem_object_is_active(obj);
>  	if (!active_mask)
>  		return 0;
>  
> @@ -2109,10 +2109,10 @@ i915_gem_object_retire__read(struct i915_gem_active *active,
>  	struct drm_i915_gem_object *obj =
>  		container_of(active, struct drm_i915_gem_object, last_read[ring]);
>  
> -	GEM_BUG_ON((obj->active & (1 << ring)) == 0);
> +	GEM_BUG_ON(!i915_gem_object_has_active_engine(obj, ring));
>  
> -	obj->active &= ~(1 << ring);
> -	if (obj->active)
> +	i915_gem_object_unset_active(obj, ring);
> +	if (i915_gem_object_is_active(obj))
>  		return;
>  
>  	/* Bump our place on the bound list to keep it roughly in LRU order
> @@ -2383,7 +2383,7 @@ i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
>  		return -ENOENT;
>  	}
>  
> -	if (!obj->active)
> +	if (!i915_gem_object_is_active(obj))
>  		goto out;
>  
>  	for (i = 0; i < I915_NUM_ENGINES; i++) {
> @@ -2472,7 +2472,7 @@ i915_gem_object_sync(struct drm_i915_gem_object *obj,
>  
>  	lockdep_assert_held(&obj->base.dev->struct_mutex);
>  
> -	active_mask = obj->active;
> +	active_mask = i915_gem_object_is_active(obj);
>  	if (!active_mask)
>  		return 0;
>  
> @@ -3516,7 +3516,7 @@ i915_gem_busy_ioctl(struct drm_device *dev, void *data,
>  	 * become non-busy without any further actions.
>  	 */
>  	args->busy = 0;
> -	if (obj->active) {
> +	if (i915_gem_object_is_active(obj)) {
>  		struct drm_i915_gem_request *req;
>  		int i;
>  
> diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> index 69bf73b51df9..224265619f00 100644
> --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> @@ -432,7 +432,7 @@ relocate_entry_clflush(struct drm_i915_gem_object *obj,
>  
>  static bool object_is_idle(struct drm_i915_gem_object *obj)
>  {
> -	unsigned long active = obj->active;
> +	unsigned long active = i915_gem_object_is_active(obj);
>  	int idx;
>  
>  	for_each_active(active, idx) {
> @@ -991,7 +991,7 @@ static int
>  i915_gem_execbuffer_move_to_gpu(struct drm_i915_gem_request *req,
>  				struct list_head *vmas)
>  {
> -	const unsigned other_rings = ~intel_engine_flag(req->engine);
> +	const unsigned other_rings = (~intel_engine_flag(req->engine) & I915_BO_ACTIVE_MASK) << I915_BO_ACTIVE_SHIFT;
>  	struct i915_vma *vma;
>  	uint32_t flush_domains = 0;
>  	bool flush_chipset = false;
> @@ -1000,7 +1000,7 @@ i915_gem_execbuffer_move_to_gpu(struct drm_i915_gem_request *req,
>  	list_for_each_entry(vma, vmas, exec_list) {
>  		struct drm_i915_gem_object *obj = vma->obj;
>  
> -		if (obj->active & other_rings) {
> +		if (obj->flags & other_rings) {
>  			ret = i915_gem_object_sync(obj, req);
>  			if (ret)
>  				return ret;
> @@ -1159,9 +1159,9 @@ void i915_vma_move_to_active(struct i915_vma *vma,
>  	 * add the active reference first and queue for it to be dropped
>  	 * *last*.
>  	 */
> -	if (obj->active == 0)
> +	if (!i915_gem_object_is_active(obj))
>  		i915_gem_object_get(obj);
> -	obj->active |= 1 << idx;
> +	i915_gem_object_set_active(obj, idx);
>  	i915_gem_active_set(&obj->last_read[idx], req);
>  
>  	if (flags & EXEC_OBJECT_WRITE) {
> diff --git a/drivers/gpu/drm/i915/i915_gem_shrinker.c b/drivers/gpu/drm/i915/i915_gem_shrinker.c
> index 71ad58836f48..5cbc4ee52c6d 100644
> --- a/drivers/gpu/drm/i915/i915_gem_shrinker.c
> +++ b/drivers/gpu/drm/i915/i915_gem_shrinker.c
> @@ -168,7 +168,8 @@ i915_gem_shrink(struct drm_i915_private *dev_priv,
>  			    !is_vmalloc_addr(obj->mapping))
>  				continue;
>  
> -			if ((flags & I915_SHRINK_ACTIVE) == 0 && obj->active)
> +			if ((flags & I915_SHRINK_ACTIVE) == 0 &&
> +			    i915_gem_object_is_active(obj))
>  				continue;
>  
>  			if (!can_release_pages(obj))
> @@ -253,7 +254,7 @@ i915_gem_shrinker_count(struct shrinker *shrinker, struct shrink_control *sc)
>  			count += obj->base.size >> PAGE_SHIFT;
>  
>  	list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) {
> -		if (!obj->active && can_release_pages(obj))
> +		if (!i915_gem_object_is_active(obj) && can_release_pages(obj))
>  			count += obj->base.size >> PAGE_SHIFT;
>  	}
>  
> diff --git a/drivers/gpu/drm/i915/i915_gem_userptr.c b/drivers/gpu/drm/i915/i915_gem_userptr.c
> index e57521dbddc6..221792632290 100644
> --- a/drivers/gpu/drm/i915/i915_gem_userptr.c
> +++ b/drivers/gpu/drm/i915/i915_gem_userptr.c
> @@ -67,7 +67,7 @@ static void wait_rendering(struct drm_i915_gem_object *obj)
>  	struct drm_i915_gem_request *requests[I915_NUM_ENGINES];
>  	int i, n;
>  
> -	if (!obj->active)
> +	if (!i915_gem_object_is_active(obj))
>  		return;
>  
>  	n = 0;
> -- 
> 2.8.1
> 
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/intel-gfx

-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH 22/38] drm/gem/shrinker: Wait before acquiring struct_mutex under oom
  2016-06-03 16:55 ` [PATCH 22/38] drm/gem/shrinker: Wait before acquiring struct_mutex under oom Chris Wilson
@ 2016-06-08  9:57   ` Daniel Vetter
  2016-06-08 10:04     ` Chris Wilson
  0 siblings, 1 reply; 58+ messages in thread
From: Daniel Vetter @ 2016-06-08  9:57 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

On Fri, Jun 03, 2016 at 05:55:37PM +0100, Chris Wilson wrote:
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>

Shouldn't we only do this as a last resort, i.e. in the oom notifier?
Commit message is a bit sparse on the motivation here ;-)
-Daniel

> ---
>  drivers/gpu/drm/i915/i915_gem_shrinker.c | 5 +++++
>  1 file changed, 5 insertions(+)
> 
> diff --git a/drivers/gpu/drm/i915/i915_gem_shrinker.c b/drivers/gpu/drm/i915/i915_gem_shrinker.c
> index 6eea4abeb9ce..454be9719daa 100644
> --- a/drivers/gpu/drm/i915/i915_gem_shrinker.c
> +++ b/drivers/gpu/drm/i915/i915_gem_shrinker.c
> @@ -312,9 +312,14 @@ i915_gem_shrinker_lock_uninterruptible(struct drm_i915_private *dev_priv,
>  	unsigned long timeout = msecs_to_jiffies(timeout_ms) + 1;
>  
>  	while (!i915_gem_shrinker_lock(dev_priv->dev, &slu->unlock)) {
> +		if (i915_gem_wait_for_idle(dev_priv) == 0 &&
> +		    i915_gem_shrinker_lock(dev_priv->dev, &slu->unlock))
> +			break;
> +
>  		schedule_timeout_killable(1);
>  		if (fatal_signal_pending(current))
>  			return false;
> +
>  		if (--timeout == 0) {
>  			pr_err("Unable to lock GPU to purge memory.\n");
>  			return false;
> -- 
> 2.8.1
> 
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/intel-gfx

-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH 27/38] drm/i915: Reduce locking inside swfinish ioctl
  2016-06-03 16:55 ` [PATCH 27/38] drm/i915: Reduce locking inside swfinish ioctl Chris Wilson
@ 2016-06-08  9:59   ` Daniel Vetter
  2016-06-08 10:03     ` Chris Wilson
  0 siblings, 1 reply; 58+ messages in thread
From: Daniel Vetter @ 2016-06-08  9:59 UTC (permalink / raw)
  To: Chris Wilson; +Cc: Daniel Vetter, intel-gfx

On Fri, Jun 03, 2016 at 05:55:42PM +0100, Chris Wilson wrote:
> We only need to take the struct_mutex if the object is pinned to the
> display engine and so requires checking for clflush. (The race with
> userspace pinning the object to a framebuffer is irrelevant.)
> 
> v2: Use access once for compiler hints (or not as it is a bitfield)
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
> ---
>  drivers/gpu/drm/i915/i915_gem.c | 29 ++++++++++++++++-------------
>  1 file changed, 16 insertions(+), 13 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index a4f949038d50..b78f9df1894c 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -1265,25 +1265,28 @@ i915_gem_sw_finish_ioctl(struct drm_device *dev, void *data,
>  {
>  	struct drm_i915_gem_sw_finish *args = data;
>  	struct drm_i915_gem_object *obj;
> -	int ret = 0;
> -
> -	ret = i915_mutex_lock_interruptible(dev);
> -	if (ret)
> -		return ret;
> +	int ret;
>  
>  	obj = i915_gem_object_lookup(file, args->handle);
> -	if (!obj) {
> -		ret = -ENOENT;
> -		goto unlock;
> -	}
> +	if (!obj)
> +		return -ENOENT;
>  
>  	/* Pinned buffers may be scanout, so flush the cache */
> -	if (obj->pin_display)
> +	if (obj->pin_display) {

READ_ONCE(obj->pin_display)?

> +		ret = i915_mutex_lock_interruptible(dev);
> +		if (ret)
> +			goto unref;
> +
>  		i915_gem_object_flush_cpu_write_domain(obj);
>  
> -	i915_gem_object_put(obj);
> -unlock:
> -	mutex_unlock(&dev->struct_mutex);
> +		i915_gem_object_put(obj);
> +		mutex_unlock(&dev->struct_mutex);
> +	} else {
> +		ret = 0;
> +unref:
> +		i915_gem_object_put_unlocked(obj);
> +	}
> +
>  	return ret;
>  }
>  
> -- 
> 2.8.1
> 

-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH 28/38] drm/i915: Remove pinned check from madvise ioctl
  2016-06-03 16:55 ` [PATCH 28/38] drm/i915: Remove pinned check from madvise ioctl Chris Wilson
@ 2016-06-08 10:01   ` Daniel Vetter
  0 siblings, 0 replies; 58+ messages in thread
From: Daniel Vetter @ 2016-06-08 10:01 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

On Fri, Jun 03, 2016 at 05:55:43PM +0100, Chris Wilson wrote:
> We don't need to incur the overhead of checking whether the object is
> pinned prior to changing its madvise. If the object is pinned, the
> madvise will not take effect until it is unpinned and so we cannot free
> the pages being pointed at by hardware. Marking a pinned object with
> allocated pages as DONTNEED will not trigger any undue warnings. The check
> is therefore superfluous, and by removing it we can remove a linear walk
> over all the vma the object has.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>

Needs lots of testing, but in principle I've become accustomed to this
idea meanwhile ;-)

Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch>

> ---
>  drivers/gpu/drm/i915/i915_gem.c | 6 ------
>  1 file changed, 6 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index b78f9df1894c..dad00800aeef 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -3600,11 +3600,6 @@ i915_gem_madvise_ioctl(struct drm_device *dev, void *data,
>  		goto unlock;
>  	}
>  
> -	if (i915_gem_obj_is_pinned(obj)) {
> -		ret = -EINVAL;
> -		goto out;
> -	}
> -
>  	if (obj->pages &&
>  	    obj->tiling_mode != I915_TILING_NONE &&
>  	    dev_priv->quirks & QUIRK_PIN_SWIZZLED_PAGES) {
> @@ -3623,7 +3618,6 @@ i915_gem_madvise_ioctl(struct drm_device *dev, void *data,
>  
>  	args->retained = obj->madv != __I915_MADV_PURGED;
>  
> -out:
>  	i915_gem_object_put(obj);
>  unlock:
>  	mutex_unlock(&dev->struct_mutex);
> -- 
> 2.8.1
> 
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/intel-gfx

-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH 29/38] drm/i915: Remove locking for get_tiling
  2016-06-03 16:55 ` [PATCH 29/38] drm/i915: Remove locking for get_tiling Chris Wilson
@ 2016-06-08 10:02   ` Daniel Vetter
  2016-06-08 10:11     ` Chris Wilson
  0 siblings, 1 reply; 58+ messages in thread
From: Daniel Vetter @ 2016-06-08 10:02 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

On Fri, Jun 03, 2016 at 05:55:44PM +0100, Chris Wilson wrote:
> Since we are not concerned with userspace racing itself with set-tiling
> (the order is indeterminant even if we take a lock), then we can safely
> read back the single obj->tiling_mode and do the static lookup of
> swizzle mode without having to take a lock.
> 
> get-tiling is reasonably frequent due to the back-channel passing around
> of tiling parameters in DRI2/DRI3.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>  drivers/gpu/drm/i915/i915_gem_tiling.c | 8 ++------
>  1 file changed, 2 insertions(+), 6 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_gem_tiling.c b/drivers/gpu/drm/i915/i915_gem_tiling.c
> index 326de7eae101..d6acd0a27c06 100644
> --- a/drivers/gpu/drm/i915/i915_gem_tiling.c
> +++ b/drivers/gpu/drm/i915/i915_gem_tiling.c
> @@ -302,10 +302,8 @@ i915_gem_get_tiling(struct drm_device *dev, void *data,
>  	if (!obj)
>  		return -ENOENT;
>  
> -	mutex_lock(&dev->struct_mutex);
> -
>  	args->tiling_mode = obj->tiling_mode;

READ_ONCE here. With that Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch>

> -	switch (obj->tiling_mode) {
> +	switch (args->tiling_mode) {
>  	case I915_TILING_X:
>  		args->swizzle_mode = dev_priv->mm.bit_6_swizzle_x;
>  		break;
> @@ -329,8 +327,6 @@ i915_gem_get_tiling(struct drm_device *dev, void *data,
>  	if (args->swizzle_mode == I915_BIT_6_SWIZZLE_9_10_17)
>  		args->swizzle_mode = I915_BIT_6_SWIZZLE_9_10;
>  
> -	i915_gem_object_put(obj);
> -	mutex_unlock(&dev->struct_mutex);
> -
> +	i915_gem_object_put_unlocked(obj);
>  	return 0;
>  }
> -- 
> 2.8.1
> 
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/intel-gfx

-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH 27/38] drm/i915: Reduce locking inside swfinish ioctl
  2016-06-08  9:59   ` Daniel Vetter
@ 2016-06-08 10:03     ` Chris Wilson
  0 siblings, 0 replies; 58+ messages in thread
From: Chris Wilson @ 2016-06-08 10:03 UTC (permalink / raw)
  To: Daniel Vetter; +Cc: Daniel Vetter, intel-gfx

On Wed, Jun 08, 2016 at 11:59:25AM +0200, Daniel Vetter wrote:
> On Fri, Jun 03, 2016 at 05:55:42PM +0100, Chris Wilson wrote:
> > We only need to take the struct_mutex if the object is pinned to the
> > display engine and so requires checking for clflush. (The race with
> > userspace pinning the object to a framebuffer is irrelevant.)
> > 
> > v2: Use access once for compiler hints (or not as it is a bitfield)
> > 
> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
> > ---
> >  drivers/gpu/drm/i915/i915_gem.c | 29 ++++++++++++++++-------------
> >  1 file changed, 16 insertions(+), 13 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> > index a4f949038d50..b78f9df1894c 100644
> > --- a/drivers/gpu/drm/i915/i915_gem.c
> > +++ b/drivers/gpu/drm/i915/i915_gem.c
> > @@ -1265,25 +1265,28 @@ i915_gem_sw_finish_ioctl(struct drm_device *dev, void *data,
> >  {
> >  	struct drm_i915_gem_sw_finish *args = data;
> >  	struct drm_i915_gem_object *obj;
> > -	int ret = 0;
> > -
> > -	ret = i915_mutex_lock_interruptible(dev);
> > -	if (ret)
> > -		return ret;
> > +	int ret;
> >  
> >  	obj = i915_gem_object_lookup(file, args->handle);
> > -	if (!obj) {
> > -		ret = -ENOENT;
> > -		goto unlock;
> > -	}
> > +	if (!obj)
> > +		return -ENOENT;
> >  
> >  	/* Pinned buffers may be scanout, so flush the cache */
> > -	if (obj->pin_display)
> > +	if (obj->pin_display) {
> 
> READ_ONCE(obj->pin_display)?

 v2: Use access once for compiler hints (or not as it is a bitfield)

Now v3, it is no longer a bitfield. Sigh.
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH 22/38] drm/gem/shrinker: Wait before acquiring struct_mutex under oom
  2016-06-08  9:57   ` Daniel Vetter
@ 2016-06-08 10:04     ` Chris Wilson
  0 siblings, 0 replies; 58+ messages in thread
From: Chris Wilson @ 2016-06-08 10:04 UTC (permalink / raw)
  To: Daniel Vetter; +Cc: intel-gfx

On Wed, Jun 08, 2016 at 11:57:21AM +0200, Daniel Vetter wrote:
> On Fri, Jun 03, 2016 at 05:55:37PM +0100, Chris Wilson wrote:
> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> 
> Shouldn't we only do this as a last resort, i.e. in the oom notifier?
> Commit message is a bit sparse on the motivation here ;-)

i915_gem_shrinker_lock_uninterruptible is last resort.

Movitivation is a bit lacking since there isn't much more we can do to
prevent oom right now.
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH 32/38] drm/i915: Stop the machine whilst capturing the GPU crash dump
  2016-06-03 16:55 ` [PATCH 32/38] drm/i915: Stop the machine whilst capturing the GPU crash dump Chris Wilson
@ 2016-06-08 10:06   ` Daniel Vetter
  2016-06-08 11:37     ` Chris Wilson
  0 siblings, 1 reply; 58+ messages in thread
From: Daniel Vetter @ 2016-06-08 10:06 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

On Fri, Jun 03, 2016 at 05:55:47PM +0100, Chris Wilson wrote:
> The error state is purposefully racy as we expect it to be called at any
> time and so have avoided any locking whilst capturing the crash dump.
> However, with multi-engine GPUs and multiple CPUs, those races can
> manifest into OOPSes as we attempt to chase dangling pointers freed on
> other CPUs. Under discussion are lots of ways to slow down normal
> operation in order to protect the post-mortem error capture, but what it
> we take the opposite approach and freeze the machine whilst the error
> capture runs (note the GPU may still running, but as long as we don't
> process any of the results the driver's bookkeeping will be static).
> 
> Note that by of itself, this is not a complete fix. It also depends on
> the compiler barriers in list_add/list_del to prevent traversing the
> lists into the void.
> 
> v2: Avoid drm_clflush_pages() inside stop_machine() as it may use
> stop_machine() itself for its wbinvd fallback.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>

rt folks will hate us for this I think. But yeah the only other options is
mass-rcu-ifying everything, which is much more fragile. Ack on the general
idea at least, need to look at what's all needed for list manipulation
still.
-Daniel

> ---
>  drivers/gpu/drm/i915/Kconfig          |  1 +
>  drivers/gpu/drm/i915/i915_drv.h       |  2 ++
>  drivers/gpu/drm/i915/i915_gpu_error.c | 48 +++++++++++++++++++++--------------
>  3 files changed, 32 insertions(+), 19 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/Kconfig b/drivers/gpu/drm/i915/Kconfig
> index 29a32b11953b..9398a4d06c0e 100644
> --- a/drivers/gpu/drm/i915/Kconfig
> +++ b/drivers/gpu/drm/i915/Kconfig
> @@ -4,6 +4,7 @@ config DRM_I915
>  	depends on X86 && PCI
>  	select INTEL_GTT
>  	select INTERVAL_TREE
> +	select STOP_MACHINE
>  	# we need shmfs for the swappable backing store, and in particular
>  	# the shmem_readpage() which depends upon tmpfs
>  	select SHMEM
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index dbd3c6f3abbc..77564f378771 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -470,6 +470,8 @@ struct drm_i915_error_state {
>  	struct kref ref;
>  	struct timeval time;
>  
> +	struct drm_i915_private *i915;
> +
>  	char error_msg[128];
>  	bool simulated;
>  	int iommu;
> diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
> index f01f0ca4bb86..ab2ba76a2a3b 100644
> --- a/drivers/gpu/drm/i915/i915_gpu_error.c
> +++ b/drivers/gpu/drm/i915/i915_gpu_error.c
> @@ -28,6 +28,7 @@
>   */
>  
>  #include <generated/utsrelease.h>
> +#include <linux/stop_machine.h>
>  #include "i915_drv.h"
>  
>  static const char *ring_str(int ring)
> @@ -682,14 +683,12 @@ i915_error_object_create(struct drm_i915_private *dev_priv,
>  
>  	dst->page_count = num_pages;
>  	while (num_pages--) {
> -		unsigned long flags;
>  		void *d;
>  
>  		d = kmalloc(PAGE_SIZE, GFP_ATOMIC);
>  		if (d == NULL)
>  			goto unwind;
>  
> -		local_irq_save(flags);
>  		if (use_ggtt) {
>  			void __iomem *s;
>  
> @@ -708,15 +707,10 @@ i915_error_object_create(struct drm_i915_private *dev_priv,
>  
>  			page = i915_gem_object_get_page(src, i);
>  
> -			drm_clflush_pages(&page, 1);
> -
>  			s = kmap_atomic(page);
>  			memcpy(d, s, PAGE_SIZE);
>  			kunmap_atomic(s);
> -
> -			drm_clflush_pages(&page, 1);
>  		}
> -		local_irq_restore(flags);
>  
>  		dst->pages[i++] = d;
>  		reloc_offset += PAGE_SIZE;
> @@ -1366,6 +1360,32 @@ static void i915_capture_gen_state(struct drm_i915_private *dev_priv,
>  	error->suspend_count = dev_priv->suspend_count;
>  }
>  
> +static int capture(void *data)
> +{
> +	struct drm_i915_error_state *error = data;
> +
> +	/* Ensure that what we readback from memory matches what the GPU sees */
> +	wbinvd();
> +
> +	i915_capture_gen_state(error->i915, error);
> +	i915_capture_reg_state(error->i915, error);
> +	i915_gem_record_fences(error->i915, error);
> +	i915_gem_record_rings(error->i915, error);
> +
> +	i915_capture_active_buffers(error->i915, error);
> +	i915_capture_pinned_buffers(error->i915, error);
> +
> +	do_gettimeofday(&error->time);
> +
> +	error->overlay = intel_overlay_capture_error_state(error->i915);
> +	error->display = intel_display_capture_error_state(error->i915);
> +
> +	/* And make sure we don't leave trash in the CPU cache */
> +	wbinvd();
> +
> +	return 0;
> +}
> +
>  /**
>   * i915_capture_error_state - capture an error record for later analysis
>   * @dev: drm device
> @@ -1394,19 +1414,9 @@ void i915_capture_error_state(struct drm_i915_private *dev_priv,
>  	}
>  
>  	kref_init(&error->ref);
> +	error->i915 = dev_priv;
>  
> -	i915_capture_gen_state(dev_priv, error);
> -	i915_capture_reg_state(dev_priv, error);
> -	i915_gem_record_fences(dev_priv, error);
> -	i915_gem_record_rings(dev_priv, error);
> -
> -	i915_capture_active_buffers(dev_priv, error);
> -	i915_capture_pinned_buffers(dev_priv, error);
> -
> -	do_gettimeofday(&error->time);
> -
> -	error->overlay = intel_overlay_capture_error_state(dev_priv);
> -	error->display = intel_display_capture_error_state(dev_priv);
> +	stop_machine(capture, error, NULL);
>  
>  	i915_error_capture_msg(dev_priv, error, engine_mask, error_msg);
>  	DRM_INFO("%s\n", error->error_msg);
> -- 
> 2.8.1
> 
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/intel-gfx

-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH 06/38] drm/i915: Pad GTT views of exec objects up to user specified size
  2016-06-08  9:41   ` Daniel Vetter
@ 2016-06-08 10:08     ` Chris Wilson
  0 siblings, 0 replies; 58+ messages in thread
From: Chris Wilson @ 2016-06-08 10:08 UTC (permalink / raw)
  To: Daniel Vetter; +Cc: intel-gfx

On Wed, Jun 08, 2016 at 11:41:01AM +0200, Daniel Vetter wrote:
> On Fri, Jun 03, 2016 at 05:55:21PM +0100, Chris Wilson wrote:
> > Our GPUs impose certain requirements upon buffers that depend upon how
> > exactly they are used. Typically this is expressed as that they require
> > a larger surface than would be naively computed by pitch * height.
> > Normally such requirements are hidden away in the userspace driver, but
> > when we accept pointers from strangers and later impose extra conditions
> > on them, the original client allocator has no idea about the
> > monstrosities in the GPU and we require the userspace driver to inform
> > the kernel how many padding pages are required beyond the client
> > allocation.
> > 
> > v2: Long time, no see
> > v3: Try an anonymous union for uapi struct compatability
> > 
> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> > Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> 
> Hm, where's the userspace for this? Commit message should elaborate imo a
> bit more on what's going on here ...

ddx, igt both posted.

For dri3 the client passes us a buffer with one size that may not match
all uses (but is sufficient for its intended). At the moment we reject
it, but I could allow it through and pad the missing pages in the GTT
instead (ala lazy fencing). 

The earliest motivation for this was for OpenCL wrapping blobs of
userspace and trying to manage the similar problem that the client
memory may not match the actual requirements of the GPU.
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH 37/38] drm/i915: Track pinned VMA
  2016-06-03 16:55 ` [PATCH 37/38] drm/i915: Track pinned VMA Chris Wilson
@ 2016-06-08 10:08   ` Daniel Vetter
  0 siblings, 0 replies; 58+ messages in thread
From: Daniel Vetter @ 2016-06-08 10:08 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

On Fri, Jun 03, 2016 at 05:55:52PM +0100, Chris Wilson wrote:
> Treat the VMA as the primary struct responsible for tracking bindings
> into the GPU's VM. That is we want to treat the VMA returned after we
> pin an object into the VM as the cookie we hold and eventually release
> when unpinning. Doing so eliminates the ambiguity in pinning the object
> and then searching for the relevant pin later.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>

Imo would be good to split this up a bit more, like you already extracted
the overlay parts. I know there's probably a few chicken&eggs in here, but
still imo needed.
-Daniel

> ---
>  drivers/gpu/drm/i915/i915_debugfs.c          |  75 +++++-----
>  drivers/gpu/drm/i915/i915_drv.h              |  64 +++------
>  drivers/gpu/drm/i915/i915_gem.c              | 200 ++++++---------------------
>  drivers/gpu/drm/i915/i915_gem_context.c      |  43 +++---
>  drivers/gpu/drm/i915/i915_gem_execbuffer.c   |  60 ++++----
>  drivers/gpu/drm/i915/i915_gem_fence.c        |  64 ++++-----
>  drivers/gpu/drm/i915/i915_gem_gtt.c          |  58 +++++---
>  drivers/gpu/drm/i915/i915_gem_gtt.h          |  14 --
>  drivers/gpu/drm/i915/i915_gem_render_state.c |  31 ++---
>  drivers/gpu/drm/i915/i915_gem_render_state.h |   2 +-
>  drivers/gpu/drm/i915/i915_gem_request.c      |  10 +-
>  drivers/gpu/drm/i915/i915_gem_request.h      |   2 +-
>  drivers/gpu/drm/i915/i915_gem_stolen.c       |   2 +-
>  drivers/gpu/drm/i915/i915_gem_tiling.c       |  42 +++---
>  drivers/gpu/drm/i915/i915_gpu_error.c        |  55 +++-----
>  drivers/gpu/drm/i915/i915_guc_submission.c   |  28 ++--
>  drivers/gpu/drm/i915/intel_display.c         |  57 +++++---
>  drivers/gpu/drm/i915/intel_drv.h             |   5 +-
>  drivers/gpu/drm/i915/intel_fbc.c             |   2 +-
>  drivers/gpu/drm/i915/intel_fbdev.c           |  19 ++-
>  drivers/gpu/drm/i915/intel_guc_loader.c      |  29 ++--
>  drivers/gpu/drm/i915/intel_lrc.c             | 113 ++++++++-------
>  drivers/gpu/drm/i915/intel_overlay.c         |  44 +++---
>  drivers/gpu/drm/i915/intel_ringbuffer.c      | 194 ++++++++++++++------------
>  drivers/gpu/drm/i915/intel_ringbuffer.h      |  20 +--
>  drivers/gpu/drm/i915/intel_sprite.c          |   8 +-
>  26 files changed, 549 insertions(+), 692 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
> index 485fc23893d6..938a95df8a11 100644
> --- a/drivers/gpu/drm/i915/i915_debugfs.c
> +++ b/drivers/gpu/drm/i915/i915_debugfs.c
> @@ -111,7 +111,7 @@ static char get_tiling_flag(struct drm_i915_gem_object *obj)
>  
>  static char get_global_flag(struct drm_i915_gem_object *obj)
>  {
> -	return i915_gem_obj_to_ggtt(obj) ? 'g' : ' ';
> +	return i915_gem_object_to_ggtt(obj, NULL) ?  'g' : ' ';
>  }
>  
>  static char get_pin_mapped_flag(struct drm_i915_gem_object *obj)
> @@ -278,7 +278,7 @@ static int i915_gem_stolen_list_info(struct seq_file *m, void *data)
>  	struct drm_device *dev = node->minor->dev;
>  	struct drm_i915_private *dev_priv = dev->dev_private;
>  	struct drm_i915_gem_object *obj;
> -	u64 total_obj_size, total_gtt_size;
> +	u64 total_obj_size;
>  	LIST_HEAD(stolen);
>  	int count, ret;
>  
> @@ -286,7 +286,7 @@ static int i915_gem_stolen_list_info(struct seq_file *m, void *data)
>  	if (ret)
>  		return ret;
>  
> -	total_obj_size = total_gtt_size = count = 0;
> +	total_obj_size = count = 0;
>  	list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) {
>  		if (obj->stolen == NULL)
>  			continue;
> @@ -294,7 +294,6 @@ static int i915_gem_stolen_list_info(struct seq_file *m, void *data)
>  		list_add(&obj->obj_exec_link, &stolen);
>  
>  		total_obj_size += obj->base.size;
> -		total_gtt_size += i915_gem_obj_total_ggtt_size(obj);
>  		count++;
>  	}
>  	list_for_each_entry(obj, &dev_priv->mm.unbound_list, global_list) {
> @@ -317,8 +316,8 @@ static int i915_gem_stolen_list_info(struct seq_file *m, void *data)
>  	}
>  	mutex_unlock(&dev->struct_mutex);
>  
> -	seq_printf(m, "Total %d objects, %llu bytes, %llu GTT size\n",
> -		   count, total_obj_size, total_gtt_size);
> +	seq_printf(m, "Total %d objects, %llu bytes\n",
> +		   count, total_obj_size);
>  	return 0;
>  }
>  
> @@ -327,7 +326,7 @@ static int i915_gem_stolen_list_info(struct seq_file *m, void *data)
>  		size += i915_gem_obj_total_ggtt_size(obj); \
>  		++count; \
>  		if (obj->map_and_fenceable) { \
> -			mappable_size += i915_gem_obj_ggtt_size(obj); \
> +			mappable_size += obj->base.size; \
>  			++mappable_count; \
>  		} \
>  	} \
> @@ -451,10 +450,10 @@ static void print_context_stats(struct seq_file *m,
>  
>  #define count_vmas(list, member) do { \
>  	list_for_each_entry(vma, list, member) { \
> -		size += i915_gem_obj_total_ggtt_size(vma->obj); \
> +		size += vma->size; \
>  		++count; \
>  		if (vma->obj->map_and_fenceable) { \
> -			mappable_size += i915_gem_obj_ggtt_size(vma->obj); \
> +			mappable_size += vma->size; \
>  			++mappable_count; \
>  		} \
>  	} \
> @@ -517,11 +516,11 @@ static int i915_gem_object_info(struct seq_file *m, void* data)
>  	size = count = mappable_size = mappable_count = 0;
>  	list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) {
>  		if (obj->fault_mappable) {
> -			size += i915_gem_obj_ggtt_size(obj);
> +			size += obj->base.size;
>  			++count;
>  		}
>  		if (obj->pin_display) {
> -			mappable_size += i915_gem_obj_ggtt_size(obj);
> +			mappable_size += obj->base.size;
>  			++mappable_count;
>  		}
>  		if (obj->madv == I915_MADV_DONTNEED) {
> @@ -589,30 +588,29 @@ static int i915_gem_gtt_info(struct seq_file *m, void *data)
>  	uintptr_t list = (uintptr_t) node->info_ent->data;
>  	struct drm_i915_private *dev_priv = dev->dev_private;
>  	struct drm_i915_gem_object *obj;
> -	u64 total_obj_size, total_gtt_size;
> +	u64 total_obj_size;
>  	int count, ret;
>  
>  	ret = mutex_lock_interruptible(&dev->struct_mutex);
>  	if (ret)
>  		return ret;
>  
> -	total_obj_size = total_gtt_size = count = 0;
> +	total_obj_size = count = 0;
>  	list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) {
> -		if (list == PINNED_LIST && !i915_gem_obj_is_pinned(obj))
> +		if (list == PINNED_LIST && !obj->pin_display)
>  			continue;
>  
>  		seq_puts(m, "   ");
>  		describe_obj(m, obj);
>  		seq_putc(m, '\n');
>  		total_obj_size += obj->base.size;
> -		total_gtt_size += i915_gem_obj_total_ggtt_size(obj);
>  		count++;
>  	}
>  
>  	mutex_unlock(&dev->struct_mutex);
>  
> -	seq_printf(m, "Total %d objects, %llu bytes, %llu GTT size\n",
> -		   count, total_obj_size, total_gtt_size);
> +	seq_printf(m, "Total %d objects, %llu bytes\n",
> +		   count, total_obj_size);
>  
>  	return 0;
>  }
> @@ -2075,38 +2073,35 @@ static void i915_dump_lrc_obj(struct seq_file *m,
>  			      struct i915_gem_context *ctx,
>  			      struct intel_engine_cs *engine)
>  {
> -	struct drm_i915_gem_object *ctx_obj = ctx->engine[engine->id].state;
> +	struct drm_i915_gem_object *obj = ctx->engine[engine->id].state;
> +	struct i915_vma *vma = ctx->engine[engine->id].vma;
>  	struct page *page;
> -	uint32_t *reg_state;
>  	int j;
> -	unsigned long ggtt_offset = 0;
>  
>  	seq_printf(m, "CONTEXT: %s %u\n", engine->name, ctx->hw_id);
> -
> -	if (ctx_obj == NULL) {
> -		seq_puts(m, "\tNot allocated\n");
> -		return;
> -	}
> -
> -	if (!i915_gem_obj_ggtt_bound(ctx_obj))
> +	if (vma == NULL) {
>  		seq_puts(m, "\tNot bound in GGTT\n");
> -	else
> -		ggtt_offset = i915_gem_obj_ggtt_offset(ctx_obj);
> +	} else {
> +		seq_printf(m, "\tBound in GGTT at %x\n",
> +			   lower_32_bits(vma->node.start));
> +	}
>  
> -	if (i915_gem_object_get_pages(ctx_obj)) {
> -		seq_puts(m, "\tFailed to get pages for context object\n");
> +	if (i915_gem_object_get_pages(obj)) {
> +		seq_puts(m, "\tFailed to get pages for context object\n\n");
>  		return;
>  	}
>  
> -	page = i915_gem_object_get_page(ctx_obj, LRC_STATE_PN);
> -	if (!WARN_ON(page == NULL)) {
> -		reg_state = kmap_atomic(page);
> -
> +	page = i915_gem_object_get_page(obj, LRC_STATE_PN);
> +	if (page != NULL) {
> +		uint32_t *reg_state = kmap_atomic(page);
>  		for (j = 0; j < 0x600 / sizeof(u32) / 4; j += 4) {
> -			seq_printf(m, "\t[0x%08lx] 0x%08x 0x%08x 0x%08x 0x%08x\n",
> -				   ggtt_offset + 4096 + (j * 4),
> -				   reg_state[j], reg_state[j + 1],
> -				   reg_state[j + 2], reg_state[j + 3]);
> +			seq_printf(m,
> +				   "\t[0x%08x] 0x%08x 0x%08x 0x%08x 0x%08x\n",
> +				   j * 4,
> +				   reg_state[j],
> +				   reg_state[j + 1],
> +				   reg_state[j + 2],
> +				   reg_state[j + 3]);
>  		}
>  		kunmap_atomic(reg_state);
>  	}
> @@ -3210,7 +3205,7 @@ static int i915_semaphore_status(struct seq_file *m, void *unused)
>  		struct page *page;
>  		uint64_t *seqno;
>  
> -		page = i915_gem_object_get_page(dev_priv->semaphore_obj, 0);
> +		page = i915_gem_object_get_page(dev_priv->semaphore_vma->obj, 0);
>  
>  		seqno = (uint64_t *)kmap_atomic(page);
>  		for_each_engine_id(engine, dev_priv, id) {
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 77564f378771..ed968deb36aa 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -880,8 +880,8 @@ struct i915_gem_context {
>  
>  	struct intel_context {
>  		struct drm_i915_gem_object *state;
> +		struct i915_vma *vma;
>  		struct intel_ring *ring;
> -		struct i915_vma *lrc_vma;
>  		uint32_t *lrc_reg_state;
>  		u64 lrc_desc;
>  		int pin_count;
> @@ -1736,7 +1736,7 @@ struct drm_i915_private {
>  	struct pci_dev *bridge_dev;
>  	struct i915_gem_context *kernel_context;
>  	struct intel_engine_cs engine[I915_NUM_ENGINES];
> -	struct drm_i915_gem_object *semaphore_obj;
> +	struct i915_vma *semaphore_vma;
>  	uint32_t next_seqno;
>  
>  	struct drm_dma_handle *status_page_dmah;
> @@ -2996,7 +2996,7 @@ static inline void i915_vma_unpin(struct i915_vma *vma)
>  	__i915_vma_unpin(vma);
>  }
>  
> -int __must_check
> +struct i915_vma * __must_check
>  i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj,
>  			 const struct i915_ggtt_view *view,
>  			 uint64_t size,
> @@ -3174,12 +3174,11 @@ i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj,
>  				  bool write);
>  int __must_check
>  i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write);
> -int __must_check
> +struct i915_vma * __must_check
>  i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
>  				     u32 alignment,
>  				     const struct i915_ggtt_view *view);
> -void i915_gem_object_unpin_from_display_plane(struct drm_i915_gem_object *obj,
> -					      const struct i915_ggtt_view *view);
> +void i915_gem_object_unpin_from_display_plane(struct i915_vma *vma);
>  int i915_gem_object_attach_phys(struct drm_i915_gem_object *obj,
>  				int align);
>  int i915_gem_open(struct drm_device *dev, struct drm_file *file);
> @@ -3200,63 +3199,34 @@ struct drm_gem_object *i915_gem_prime_import(struct drm_device *dev,
>  struct dma_buf *i915_gem_prime_export(struct drm_device *dev,
>  				struct drm_gem_object *gem_obj, int flags);
>  
> -u64 i915_gem_obj_ggtt_offset_view(struct drm_i915_gem_object *o,
> -				  const struct i915_ggtt_view *view);
> -u64 i915_gem_obj_offset(struct drm_i915_gem_object *o,
> -			struct i915_address_space *vm);
> -static inline u64
> -i915_gem_obj_ggtt_offset(struct drm_i915_gem_object *o)
> -{
> -	return i915_gem_obj_ggtt_offset_view(o, &i915_ggtt_view_normal);
> -}
> -
> -bool i915_gem_obj_ggtt_bound_view(struct drm_i915_gem_object *o,
> -				  const struct i915_ggtt_view *view);
> -bool i915_gem_obj_bound(struct drm_i915_gem_object *o,
> -			struct i915_address_space *vm);
> -
>  struct i915_vma *
>  i915_gem_obj_to_vma(struct drm_i915_gem_object *obj,
> -		    struct i915_address_space *vm);
> -struct i915_vma *
> -i915_gem_obj_to_ggtt_view(struct drm_i915_gem_object *obj,
> -			  const struct i915_ggtt_view *view);
> +		     struct i915_address_space *vm,
> +		     const struct i915_ggtt_view *view);
>  
>  struct i915_vma *
>  i915_gem_obj_lookup_or_create_vma(struct drm_i915_gem_object *obj,
> -				  struct i915_address_space *vm);
> -struct i915_vma *
> -i915_gem_obj_lookup_or_create_ggtt_vma(struct drm_i915_gem_object *obj,
> -				       const struct i915_ggtt_view *view);
> -
> -static inline struct i915_vma *
> -i915_gem_obj_to_ggtt(struct drm_i915_gem_object *obj)
> -{
> -	return i915_gem_obj_to_ggtt_view(obj, &i915_ggtt_view_normal);
> -}
> -bool i915_gem_obj_is_pinned(struct drm_i915_gem_object *obj);
> +				  struct i915_address_space *vm,
> +				  const struct i915_ggtt_view *view);
>  
> -/* Some GGTT VM helpers */
>  static inline struct i915_hw_ppgtt *
>  i915_vm_to_ppgtt(struct i915_address_space *vm)
>  {
>  	return container_of(vm, struct i915_hw_ppgtt, base);
>  }
>  
> -static inline bool i915_gem_obj_ggtt_bound(struct drm_i915_gem_object *obj)
> +static inline struct i915_vma *
> +i915_gem_object_to_ggtt(struct drm_i915_gem_object *obj,
> +			const struct i915_ggtt_view *view)
>  {
> -	return i915_gem_obj_ggtt_bound_view(obj, &i915_ggtt_view_normal);
> +	return i915_gem_obj_to_vma(obj, &to_i915(obj->base.dev)->ggtt.base, view);
>  }
>  
> -unsigned long
> -i915_gem_obj_ggtt_size(struct drm_i915_gem_object *obj);
> -
> -void i915_gem_object_ggtt_unpin_view(struct drm_i915_gem_object *obj,
> -				     const struct i915_ggtt_view *view);
> -static inline void
> -i915_gem_object_ggtt_unpin(struct drm_i915_gem_object *obj)
> +static inline unsigned long
> +i915_gem_object_ggtt_offset(struct drm_i915_gem_object *o,
> +			    const struct i915_ggtt_view *view)
>  {
> -	i915_gem_object_ggtt_unpin_view(obj, &i915_ggtt_view_normal);
> +	return i915_gem_object_to_ggtt(o, view)->node.start;
>  }
>  
>  /* i915_gem_fence.c */
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index dad00800aeef..e0db9b02ee04 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -848,16 +848,18 @@ i915_gem_gtt_pwrite_fast(struct drm_device *dev,
>  {
>  	struct drm_i915_private *dev_priv = to_i915(dev);
>  	struct i915_ggtt *ggtt = &dev_priv->ggtt;
> +	struct i915_vma *vma;
>  	ssize_t remain;
>  	loff_t offset, page_base;
>  	char __user *user_data;
>  	int page_offset, page_length, ret;
>  
> -	ret = i915_gem_object_ggtt_pin(obj, NULL,
> -				       0, 0,
> +	vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, 
>  				       PIN_MAPPABLE | PIN_NONBLOCK);
> -	if (ret)
> +	if (IS_ERR(vma)) {
> +		ret = PTR_ERR(vma);
>  		goto out;
> +	}
>  
>  	ret = i915_gem_object_set_to_gtt_domain(obj, true);
>  	if (ret)
> @@ -870,7 +872,7 @@ i915_gem_gtt_pwrite_fast(struct drm_device *dev,
>  	user_data = u64_to_user_ptr(args->data_ptr);
>  	remain = args->size;
>  
> -	offset = i915_gem_obj_ggtt_offset(obj) + args->offset;
> +	offset = vma->node.start + args->offset;
>  
>  	intel_fb_obj_invalidate(obj, ORIGIN_GTT);
>  
> @@ -905,7 +907,7 @@ i915_gem_gtt_pwrite_fast(struct drm_device *dev,
>  out_flush:
>  	intel_fb_obj_flush(obj, false, ORIGIN_GTT);
>  out_unpin:
> -	i915_gem_object_ggtt_unpin(obj);
> +	i915_vma_unpin(vma);
>  out:
>  	return ret;
>  }
> @@ -1382,8 +1384,8 @@ int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
>  	struct drm_i915_gem_object *obj = to_intel_bo(vma->vm_private_data);
>  	struct drm_device *dev = obj->base.dev;
>  	struct drm_i915_private *dev_priv = to_i915(dev);
> -	struct i915_ggtt *ggtt = &dev_priv->ggtt;
>  	struct i915_ggtt_view view = i915_ggtt_view_normal;
> +	struct i915_vma *ggtt;
>  	pgoff_t page_offset;
>  	unsigned long pfn;
>  	int ret = 0;
> @@ -1417,7 +1419,7 @@ int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
>  	}
>  
>  	/* Use a partial view if the object is bigger than the aperture. */
> -	if (obj->base.size >= ggtt->mappable_end &&
> +	if (obj->base.size >= dev_priv->ggtt.mappable_end &&
>  	    obj->tiling_mode == I915_TILING_NONE) {
>  		static const unsigned int chunk_size = 256; // 1 MiB
>  
> @@ -1432,9 +1434,11 @@ int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
>  	}
>  
>  	/* Now pin it into the GTT if needed */
> -	ret = i915_gem_object_ggtt_pin(obj, &view, 0, 0, PIN_MAPPABLE);
> -	if (ret)
> +	ggtt = i915_gem_object_ggtt_pin(obj, &view, 0, 0, PIN_MAPPABLE);
> +	if (IS_ERR(ggtt)) {
> +		ret = PTR_ERR(ggtt);
>  		goto err_unlock;
> +	}
>  
>  	ret = i915_gem_object_set_to_gtt_domain(obj, write);
>  	if (ret)
> @@ -1445,8 +1449,7 @@ int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
>  		goto err_unpin;
>  
>  	/* Finally, remap it using the new GTT offset */
> -	pfn = ggtt->mappable_base +
> -		i915_gem_obj_ggtt_offset_view(obj, &view);
> +	pfn = dev_priv->ggtt.mappable_base + ggtt->node.start;
>  	pfn >>= PAGE_SHIFT;
>  
>  	if (unlikely(view.type == I915_GGTT_VIEW_PARTIAL)) {
> @@ -1488,7 +1491,7 @@ int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
>  					    pfn + page_offset);
>  	}
>  err_unpin:
> -	i915_gem_object_ggtt_unpin_view(obj, &view);
> +	__i915_vma_unpin(ggtt);
>  err_unlock:
>  	mutex_unlock(&dev->struct_mutex);
>  err_rpm:
> @@ -2925,7 +2928,7 @@ i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write)
>  					    old_write_domain);
>  
>  	/* And bump the LRU for this access */
> -	vma = i915_gem_obj_to_ggtt(obj);
> +	vma = i915_gem_object_to_ggtt(obj, NULL);
>  	if (vma &&
>  	    drm_mm_node_allocated(&vma->node) &&
>  	    !i915_vma_is_active(vma))
> @@ -3149,11 +3152,12 @@ rpm_put:
>   * Can be called from an uninterruptible phase (modesetting) and allows
>   * any flushes to be pipelined (for pageflips).
>   */
> -int
> +struct i915_vma *
>  i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
>  				     u32 alignment,
>  				     const struct i915_ggtt_view *view)
>  {
> +	struct i915_vma *vma;
>  	u32 old_read_domains, old_write_domain;
>  	int ret;
>  
> @@ -3173,19 +3177,23 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
>  	 */
>  	ret = i915_gem_object_set_cache_level(obj,
>  					      HAS_WT(obj->base.dev) ? I915_CACHE_WT : I915_CACHE_NONE);
> -	if (ret)
> +	if (ret) {
> +		vma = ERR_PTR(ret);
>  		goto err_unpin_display;
> +	}
>  
>  	/* As the user may map the buffer once pinned in the display plane
>  	 * (e.g. libkms for the bootup splash), we have to ensure that we
>  	 * always use map_and_fenceable for all scanout buffers.
>  	 */
> -	ret = i915_gem_object_ggtt_pin(obj, view, 0, alignment,
> +	vma = i915_gem_object_ggtt_pin(obj, view, 0, alignment,
>  				       view->type == I915_GGTT_VIEW_NORMAL ?
>  				       PIN_MAPPABLE : 0);
> -	if (ret)
> +	if (IS_ERR(vma))
>  		goto err_unpin_display;
>  
> +	WARN_ON(obj->pin_display > vma->pin_count);
> +
>  	i915_gem_object_flush_cpu_write_domain(obj);
>  
>  	old_write_domain = obj->base.write_domain;
> @@ -3204,24 +3212,24 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
>  	/* Increment the pages_pin_count to guard against the shrinker */
>  	obj->pages_pin_count++;
>  
> -	return 0;
> +	return vma;
>  
>  err_unpin_display:
>  	obj->pin_display--;
> -	return ret;
> +	return vma;
>  }
>  
>  void
> -i915_gem_object_unpin_from_display_plane(struct drm_i915_gem_object *obj,
> -					 const struct i915_ggtt_view *view)
> +i915_gem_object_unpin_from_display_plane(struct i915_vma *vma)
>  {
> -	if (WARN_ON(obj->pin_display == 0))
> +	if (WARN_ON(vma->obj->pin_display == 0))
>  		return;
>  
> -	i915_gem_object_ggtt_unpin_view(obj, view);
> +	vma->obj->pin_display--;
> +	vma->obj->pages_pin_count--;
>  
> -	obj->pages_pin_count--;
> -	obj->pin_display--;
> +	i915_vma_unpin(vma);
> +	WARN_ON(vma->obj->pin_display > vma->pin_count);
>  }
>  
>  /**
> @@ -3421,26 +3429,24 @@ err:
>  	return ret;
>  }
>  
> -int
> +struct i915_vma *
>  i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj,
> -			 const struct i915_ggtt_view *view,
> +			 const struct i915_ggtt_view *ggtt_view,
>  			 uint64_t size,
>  			 uint64_t alignment,
>  			 uint64_t flags)
>  {
> +	struct i915_address_space *vm = &to_i915(obj->base.dev)->ggtt.base;
>  	struct i915_vma *vma;
>  	int ret;
>  
> -	if (view == NULL)
> -		view = &i915_ggtt_view_normal;
> -
> -	vma = i915_gem_obj_lookup_or_create_ggtt_vma(obj, view);
> +	vma = i915_gem_obj_lookup_or_create_vma(obj, vm, ggtt_view);
>  	if (IS_ERR(vma))
> -		return PTR_ERR(vma);
> +		return vma;
>  
>  	if (i915_vma_misplaced(vma, size, alignment, flags)) {
>  		if (flags & PIN_NONBLOCK && (vma->pin_count | vma->active))
> -			return -ENOSPC;
> +			return ERR_PTR(-ENOSPC);
>  
>  		WARN(vma->pin_count,
>  		     "bo is already pinned in ggtt with incorrect alignment:"
> @@ -3453,17 +3459,14 @@ i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj,
>  		     obj->map_and_fenceable);
>  		ret = i915_vma_unbind(vma);
>  		if (ret)
> -			return ret;
> +			return ERR_PTR(ret);
>  	}
>  
> -	return i915_vma_pin(vma, size, alignment, flags | PIN_GLOBAL);
> -}
> +	ret = i915_vma_pin(vma, size, alignment, flags | PIN_GLOBAL);
> +	if (ret)
> +		return ERR_PTR(ret);
>  
> -void
> -i915_gem_object_ggtt_unpin_view(struct drm_i915_gem_object *obj,
> -				const struct i915_ggtt_view *view)
> -{
> -	i915_vma_unpin(i915_gem_obj_to_ggtt_view(obj, view));
> +	return vma;
>  }
>  
>  static __always_inline unsigned
> @@ -3799,31 +3802,6 @@ void i915_gem_free_object(struct drm_gem_object *gem_obj)
>  	intel_runtime_pm_put(dev_priv);
>  }
>  
> -struct i915_vma *i915_gem_obj_to_vma(struct drm_i915_gem_object *obj,
> -				     struct i915_address_space *vm)
> -{
> -	struct i915_vma *vma;
> -	list_for_each_entry(vma, &obj->vma_list, obj_link) {
> -		if (vma->ggtt_view.type == I915_GGTT_VIEW_NORMAL &&
> -		    vma->vm == vm)
> -			return vma;
> -	}
> -	return NULL;
> -}
> -
> -struct i915_vma *i915_gem_obj_to_ggtt_view(struct drm_i915_gem_object *obj,
> -					   const struct i915_ggtt_view *view)
> -{
> -	struct i915_vma *vma;
> -
> -	GEM_BUG_ON(!view);
> -
> -	list_for_each_entry(vma, &obj->vma_list, obj_link)
> -		if (vma->is_ggtt && i915_ggtt_view_equal(&vma->ggtt_view, view))
> -			return vma;
> -	return NULL;
> -}
> -
>  int
>  i915_gem_suspend(struct drm_device *dev)
>  {
> @@ -4321,96 +4299,6 @@ void i915_gem_track_fb(struct drm_i915_gem_object *old,
>  	}
>  }
>  
> -/* All the new VM stuff */
> -u64 i915_gem_obj_offset(struct drm_i915_gem_object *o,
> -			struct i915_address_space *vm)
> -{
> -	struct drm_i915_private *dev_priv = o->base.dev->dev_private;
> -	struct i915_vma *vma;
> -
> -	WARN_ON(vm == &dev_priv->mm.aliasing_ppgtt->base);
> -
> -	list_for_each_entry(vma, &o->vma_list, obj_link) {
> -		if (vma->is_ggtt &&
> -		    vma->ggtt_view.type != I915_GGTT_VIEW_NORMAL)
> -			continue;
> -		if (vma->vm == vm)
> -			return vma->node.start;
> -	}
> -
> -	WARN(1, "%s vma for this object not found.\n",
> -	     i915_is_ggtt(vm) ? "global" : "ppgtt");
> -	return -1;
> -}
> -
> -u64 i915_gem_obj_ggtt_offset_view(struct drm_i915_gem_object *o,
> -				  const struct i915_ggtt_view *view)
> -{
> -	struct i915_vma *vma;
> -
> -	list_for_each_entry(vma, &o->vma_list, obj_link)
> -		if (vma->is_ggtt && i915_ggtt_view_equal(&vma->ggtt_view, view))
> -			return vma->node.start;
> -
> -	WARN(1, "global vma for this object not found. (view=%u)\n", view->type);
> -	return -1;
> -}
> -
> -bool i915_gem_obj_bound(struct drm_i915_gem_object *o,
> -			struct i915_address_space *vm)
> -{
> -	struct i915_vma *vma;
> -
> -	list_for_each_entry(vma, &o->vma_list, obj_link) {
> -		if (vma->is_ggtt &&
> -		    vma->ggtt_view.type != I915_GGTT_VIEW_NORMAL)
> -			continue;
> -		if (vma->vm == vm && drm_mm_node_allocated(&vma->node))
> -			return true;
> -	}
> -
> -	return false;
> -}
> -
> -bool i915_gem_obj_ggtt_bound_view(struct drm_i915_gem_object *o,
> -				  const struct i915_ggtt_view *view)
> -{
> -	struct i915_vma *vma;
> -
> -	list_for_each_entry(vma, &o->vma_list, obj_link)
> -		if (vma->is_ggtt &&
> -		    i915_ggtt_view_equal(&vma->ggtt_view, view) &&
> -		    drm_mm_node_allocated(&vma->node))
> -			return true;
> -
> -	return false;
> -}
> -
> -unsigned long i915_gem_obj_ggtt_size(struct drm_i915_gem_object *o)
> -{
> -	struct i915_vma *vma;
> -
> -	GEM_BUG_ON(list_empty(&o->vma_list));
> -
> -	list_for_each_entry(vma, &o->vma_list, obj_link) {
> -		if (vma->is_ggtt &&
> -		    vma->ggtt_view.type == I915_GGTT_VIEW_NORMAL)
> -			return vma->node.size;
> -	}
> -
> -	return 0;
> -}
> -
> -bool i915_gem_obj_is_pinned(struct drm_i915_gem_object *obj)
> -{
> -	struct i915_vma *vma;
> -	list_for_each_entry(vma, &obj->vma_list, obj_link)
> -		if (i915_vma_is_pinned(vma))
> -			return true;
> -
> -	return false;
> -}
> -
>  /* Like i915_gem_object_get_page(), but mark the returned page dirty */
>  struct page *
>  i915_gem_object_get_dirty_page(struct drm_i915_gem_object *obj, int n)
> diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
> index c9b8c2c62828..0ed8a4a7321a 100644
> --- a/drivers/gpu/drm/i915/i915_gem_context.c
> +++ b/drivers/gpu/drm/i915/i915_gem_context.c
> @@ -360,8 +360,8 @@ static void i915_gem_context_unpin(struct i915_gem_context *ctx,
>  	} else {
>  		struct intel_context *ce = &ctx->engine[engine->id];
>  
> -		if (ce->state)
> -			i915_gem_object_ggtt_unpin(ce->state);
> +		if (ce->vma)
> +			i915_vma_unpin(ce->vma);
>  
>  		i915_gem_context_put(ctx);
>  	}
> @@ -580,9 +580,7 @@ mi_set_context(struct drm_i915_gem_request *req, u32 hw_flags)
>  
>  	intel_ring_emit(ring, MI_NOOP);
>  	intel_ring_emit(ring, MI_SET_CONTEXT);
> -	intel_ring_emit(ring,
> -			i915_gem_obj_ggtt_offset(req->ctx->engine[RCS].state) |
> -			flags);
> +	intel_ring_emit(ring, req->ctx->engine[RCS].vma->node.start | flags);
>  	/*
>  	 * w/a: MI_SET_CONTEXT must always be followed by MI_NOOP
>  	 * WaMiSetContext_Hang:snb,ivb,vlv
> @@ -610,7 +608,7 @@ mi_set_context(struct drm_i915_gem_request *req, u32 hw_flags)
>  					MI_STORE_REGISTER_MEM |
>  					MI_SRM_LRM_GLOBAL_GTT);
>  			intel_ring_emit_reg(ring, last_reg);
> -			intel_ring_emit(ring, req->engine->scratch.gtt_offset);
> +			intel_ring_emit(ring, req->engine->scratch->node.start);
>  			intel_ring_emit(ring, MI_NOOP);
>  		}
>  		intel_ring_emit(ring, MI_ARB_ON_OFF | MI_ARB_ENABLE);
> @@ -715,6 +713,7 @@ static int do_rcs_switch(struct drm_i915_gem_request *req)
>  	struct intel_engine_cs *engine = req->engine;
>  	struct i915_hw_ppgtt *ppgtt = to->ppgtt ?: req->i915->mm.aliasing_ppgtt;
>  	struct i915_gem_context *from;
> +	struct i915_vma *vma;
>  	u32 hw_flags;
>  	int ret, i;
>  
> @@ -722,10 +721,17 @@ static int do_rcs_switch(struct drm_i915_gem_request *req)
>  		return 0;
>  
>  	/* Trying to pin first makes error handling easier. */
> -	ret = i915_gem_object_ggtt_pin(to->engine[RCS].state, NULL, 0,
> +	vma = i915_gem_object_ggtt_pin(to->engine[RCS].state, NULL, 0,
>  				       to->ggtt_alignment, 0);
> -	if (ret)
> -		return ret;
> +	if (IS_ERR(vma))
> +		return PTR_ERR(vma);
> +
> +	to->engine[RCS].vma = vma;
> +
> +	if (WARN_ON(!(vma->bound & GLOBAL_BIND))) {
> +		ret = -ENODEV;
> +		goto unpin_vma;
> +	}
>  
>  	/*
>  	 * Pin can switch back to the default context if we end up calling into
> @@ -746,7 +752,7 @@ static int do_rcs_switch(struct drm_i915_gem_request *req)
>  	 */
>  	ret = i915_gem_object_set_to_gtt_domain(to->engine[RCS].state, false);
>  	if (ret)
> -		goto unpin_out;
> +		goto unpin_vma;
>  
>  	if (needs_pd_load_pre(ppgtt, engine, to)) {
>  		/* Older GENs and non render rings still want the load first,
> @@ -756,7 +762,7 @@ static int do_rcs_switch(struct drm_i915_gem_request *req)
>  		trace_switch_mm(engine, to);
>  		ret = ppgtt->switch_mm(ppgtt, req);
>  		if (ret)
> -			goto unpin_out;
> +			goto unpin_vma;
>  	}
>  
>  	if (!to->engine[RCS].initialised || i915_gem_context_is_default(to))
> @@ -773,7 +779,7 @@ static int do_rcs_switch(struct drm_i915_gem_request *req)
>  	if (to != from || (hw_flags & MI_FORCE_RESTORE)) {
>  		ret = mi_set_context(req, hw_flags);
>  		if (ret)
> -			goto unpin_out;
> +			goto unpin_vma;
>  	}
>  
>  	/* The backing object for the context is done after switching to the
> @@ -783,8 +789,6 @@ static int do_rcs_switch(struct drm_i915_gem_request *req)
>  	 * MI_SET_CONTEXT instead of when the next seqno has completed.
>  	 */
>  	if (from != NULL) {
> -		struct drm_i915_gem_object *obj = from->engine[RCS].state;
> -
>  		/* As long as MI_SET_CONTEXT is serializing, ie. it flushes the
>  		 * whole damn pipeline, we don't need to explicitly mark the
>  		 * object dirty. The only exception is that the context must be
> @@ -792,11 +796,10 @@ static int do_rcs_switch(struct drm_i915_gem_request *req)
>  		 * able to defer doing this until we know the object would be
>  		 * swapped, but there is no way to do that yet.
>  		 */
> -		obj->base.read_domains = I915_GEM_DOMAIN_INSTRUCTION;
> -		i915_vma_move_to_active(i915_gem_obj_to_ggtt(obj), req, 0);
> -
> +		i915_vma_move_to_active(from->engine[RCS].vma, req, 0);
>  		/* obj is kept alive until the next request by its active ref */
> -		i915_gem_object_ggtt_unpin(obj);
> +		i915_vma_unpin(from->engine[RCS].vma);
> +
>  		i915_gem_context_put(from);
>  	}
>  	engine->last_context = i915_gem_context_get(to);
> @@ -841,8 +844,8 @@ static int do_rcs_switch(struct drm_i915_gem_request *req)
>  
>  	return 0;
>  
> -unpin_out:
> -	i915_gem_object_ggtt_unpin(to->engine[RCS].state);
> +unpin_vma:
> +	i915_vma_unpin(vma);
>  	return ret;
>  }
>  
> diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> index b89e9d2b33c4..a29c4b6fea28 100644
> --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> @@ -174,8 +174,8 @@ eb_lookup_vmas(struct eb_vmas *eb,
>  		 * from the (obj, vm) we don't run the risk of creating
>  		 * duplicated vmas for the same vm.
>  		 */
> -		vma = i915_gem_obj_lookup_or_create_vma(obj, vm);
> -		if (IS_ERR(vma)) {
> +		vma = i915_gem_obj_lookup_or_create_vma(obj, vm, NULL);
> +		if (unlikely(IS_ERR(vma))) {
>  			DRM_DEBUG("Failed to lookup VMA\n");
>  			ret = PTR_ERR(vma);
>  			goto err;
> @@ -343,30 +343,34 @@ relocate_entry_gtt(struct drm_i915_gem_object *obj,
>  		   struct drm_i915_gem_relocation_entry *reloc,
>  		   uint64_t target_offset)
>  {
> -	struct drm_device *dev = obj->base.dev;
> -	struct drm_i915_private *dev_priv = to_i915(dev);
> +	struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
>  	struct i915_ggtt *ggtt = &dev_priv->ggtt;
> +	struct i915_vma *vma;
>  	uint64_t delta = relocation_target(reloc, target_offset);
>  	uint64_t offset;
>  	void __iomem *reloc_page;
>  	int ret;
>  
> +	vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, PIN_MAPPABLE);
> +	if (IS_ERR(vma))
> +		return PTR_ERR(vma);
> +
>  	ret = i915_gem_object_set_to_gtt_domain(obj, true);
>  	if (ret)
> -		return ret;
> +		goto unpin;
>  
>  	ret = i915_gem_object_put_fence(obj);
>  	if (ret)
> -		return ret;
> +		goto unpin;
>  
>  	/* Map the page containing the relocation we're going to perform.  */
> -	offset = i915_gem_obj_ggtt_offset(obj);
> +	offset = vma->node.start;
>  	offset += reloc->offset;
>  	reloc_page = io_mapping_map_atomic_wc(ggtt->mappable,
>  					      offset & PAGE_MASK);
>  	iowrite32(lower_32_bits(delta), reloc_page + offset_in_page(offset));
>  
> -	if (INTEL_INFO(dev)->gen >= 8) {
> +	if (INTEL_GEN(dev_priv) >= 8) {
>  		offset += sizeof(uint32_t);
>  
>  		if (offset_in_page(offset) == 0) {
> @@ -382,7 +386,9 @@ relocate_entry_gtt(struct drm_i915_gem_object *obj,
>  
>  	io_mapping_unmap_atomic(reloc_page);
>  
> -	return 0;
> +unpin:
> +	i915_vma_unpin(vma);
> +	return ret;
>  }
>  
>  static void
> @@ -1236,7 +1242,7 @@ i915_reset_gen7_sol_offsets(struct drm_i915_gem_request *req)
>  	return 0;
>  }
>  
> -static struct i915_vma*
> +static struct i915_vma *
>  i915_gem_execbuffer_parse(struct intel_engine_cs *engine,
>  			  struct drm_i915_gem_exec_object2 *shadow_exec_entry,
>  			  struct drm_i915_gem_object *batch_obj,
> @@ -1260,31 +1266,30 @@ i915_gem_execbuffer_parse(struct intel_engine_cs *engine,
>  			      batch_start_offset,
>  			      batch_len,
>  			      is_master);
> -	if (ret)
> +	if (ret) {
> +		if (ret == -EACCES) /* unhandled chained batch */
> +			vma = NULL;
> +		else
> +			vma = ERR_PTR(ret);
>  		goto err;
> +	}
>  
> -	ret = i915_gem_object_ggtt_pin(shadow_batch_obj, NULL, 0, 0, 0);
> -	if (ret)
> +	vma = i915_gem_object_ggtt_pin(shadow_batch_obj, NULL, 0, 0, 0);
> +	if (IS_ERR(vma)) {
> +		ret = PTR_ERR(vma);
>  		goto err;
> -
> -	i915_gem_object_unpin_pages(shadow_batch_obj);
> +	}
>  
>  	memset(shadow_exec_entry, 0, sizeof(*shadow_exec_entry));
>  
> -	vma = i915_gem_obj_to_ggtt(shadow_batch_obj);
>  	vma->exec_entry = shadow_exec_entry;
>  	vma->exec_entry->flags = __EXEC_OBJECT_HAS_PIN;
>  	i915_gem_object_get(shadow_batch_obj);
>  	list_add_tail(&vma->exec_list, &eb->vmas);
>  
> -	return vma;
> -
>  err:
>  	i915_gem_object_unpin_pages(shadow_batch_obj);
> -	if (ret == -EACCES) /* unhandled chained batch */
> -		return NULL;
> -	else
> -		return ERR_PTR(ret);
> +	return vma;
>  }
>  
>  static int
> @@ -1631,6 +1636,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
>  	 * hsw should have this fixed, but bdw mucks it up again. */
>  	if (dispatch_flags & I915_DISPATCH_SECURE) {
>  		struct drm_i915_gem_object *obj = params->batch_vma->obj;
> +		struct i915_vma *vma;
>  
>  		/*
>  		 * So on first glance it looks freaky that we pin the batch here
> @@ -1642,11 +1648,13 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
>  		 *   fitting due to fragmentation.
>  		 * So this is actually safe.
>  		 */
> -		ret = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, 0);
> -		if (ret)
> +		vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, 0);
> +		if (IS_ERR(vma)) {
> +			ret = PTR_ERR(vma);
>  			goto err;
> +		}
>  
> -		params->batch_vma = i915_gem_obj_to_ggtt(obj);
> +		params->batch_vma = vma;
>  	}
>  
>  	/* Allocate a request for this batch buffer nice and early. */
> @@ -1662,7 +1670,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
>  	 * inactive_list and lose its active reference. Hence we do not need
>  	 * to explicitly hold another reference here.
>  	 */
> -	params->request->batch_obj = params->batch_vma->obj;
> +	params->request->batch = params->batch_vma;
>  
>  	ret = i915_gem_request_add_to_client(params->request, file);
>  	if (ret)
> diff --git a/drivers/gpu/drm/i915/i915_gem_fence.c b/drivers/gpu/drm/i915/i915_gem_fence.c
> index ee91705734bc..187611eafa99 100644
> --- a/drivers/gpu/drm/i915/i915_gem_fence.c
> +++ b/drivers/gpu/drm/i915/i915_gem_fence.c
> @@ -85,20 +85,14 @@ static void i965_write_fence_reg(struct drm_device *dev, int reg,
>  	POSTING_READ(fence_reg_lo);
>  
>  	if (obj) {
> -		u32 size = i915_gem_obj_ggtt_size(obj);
> -		uint64_t val;
> -
> -		/* Adjust fence size to match tiled area */
> -		if (obj->tiling_mode != I915_TILING_NONE) {
> -			uint32_t row_size = obj->stride *
> -				(obj->tiling_mode == I915_TILING_Y ? 32 : 8);
> -			size = (size / row_size) * row_size;
> -		}
> -
> -		val = (uint64_t)((i915_gem_obj_ggtt_offset(obj) + size - 4096) &
> -				 0xfffff000) << 32;
> -		val |= i915_gem_obj_ggtt_offset(obj) & 0xfffff000;
> -		val |= (uint64_t)((obj->stride / 128) - 1) << fence_pitch_shift;
> +		struct i915_vma *vma = i915_gem_object_to_ggtt(obj, NULL);
> +		u32 row_size = obj->stride * (obj->tiling_mode == I915_TILING_Y  ? 32 : 8);
> +		u32 size = (u32)vma->node.size / row_size * row_size;
> +		u64 val;
> +
> +		val = ((vma->node.start + size - 4096) & 0xfffff000) << 32;
> +		val |= vma->node.start & 0xfffff000;
> +		val |= (u64)((obj->stride / 128) - 1) << fence_pitch_shift;
>  		if (obj->tiling_mode == I915_TILING_Y)
>  			val |= 1 << I965_FENCE_TILING_Y_SHIFT;
>  		val |= I965_FENCE_REG_VALID;
> @@ -121,15 +115,17 @@ static void i915_write_fence_reg(struct drm_device *dev, int reg,
>  	u32 val;
>  
>  	if (obj) {
> -		u32 size = i915_gem_obj_ggtt_size(obj);
> +		struct i915_vma *vma = i915_gem_object_to_ggtt(obj, NULL);
>  		int pitch_val;
>  		int tile_width;
>  
> -		WARN((i915_gem_obj_ggtt_offset(obj) & ~I915_FENCE_START_MASK) ||
> -		     (size & -size) != size ||
> -		     (i915_gem_obj_ggtt_offset(obj) & (size - 1)),
> -		     "object 0x%08llx [fenceable? %d] not 1M or pot-size (0x%08x) aligned\n",
> -		     i915_gem_obj_ggtt_offset(obj), obj->map_and_fenceable, size);
> +		WARN((vma->node.start & ~I915_FENCE_START_MASK) ||
> +		     !is_power_of_2(vma->node.size) ||
> +		     (vma->node.start & (vma->node.size - 1)),
> +		     "object 0x%08lx [fenceable? %d] not 1M or pot-size (0x%08lx) aligned\n",
> +		     (long)vma->node.start,
> +		     obj->map_and_fenceable,
> +		     (long)vma->node.size);
>  
>  		if (obj->tiling_mode == I915_TILING_Y && HAS_128_BYTE_Y_TILING(dev))
>  			tile_width = 128;
> @@ -140,10 +136,10 @@ static void i915_write_fence_reg(struct drm_device *dev, int reg,
>  		pitch_val = obj->stride / tile_width;
>  		pitch_val = ffs(pitch_val) - 1;
>  
> -		val = i915_gem_obj_ggtt_offset(obj);
> +		val = vma->node.start;
>  		if (obj->tiling_mode == I915_TILING_Y)
>  			val |= 1 << I830_FENCE_TILING_Y_SHIFT;
> -		val |= I915_FENCE_SIZE_BITS(size);
> +		val |= I915_FENCE_SIZE_BITS(vma->node.size);
>  		val |= pitch_val << I830_FENCE_PITCH_SHIFT;
>  		val |= I830_FENCE_REG_VALID;
>  	} else
> @@ -160,22 +156,22 @@ static void i830_write_fence_reg(struct drm_device *dev, int reg,
>  	uint32_t val;
>  
>  	if (obj) {
> -		u32 size = i915_gem_obj_ggtt_size(obj);
> +		struct i915_vma *vma = i915_gem_object_to_ggtt(obj, NULL);
>  		uint32_t pitch_val;
>  
> -		WARN((i915_gem_obj_ggtt_offset(obj) & ~I830_FENCE_START_MASK) ||
> -		     (size & -size) != size ||
> -		     (i915_gem_obj_ggtt_offset(obj) & (size - 1)),
> -		     "object 0x%08llx not 512K or pot-size 0x%08x aligned\n",
> -		     i915_gem_obj_ggtt_offset(obj), size);
> +		WARN((vma->node.start & ~I830_FENCE_START_MASK) ||
> +		     !is_power_of_2(vma->node.size) ||
> +		     (vma->node.start & (vma->node.size - 1)),
> +		     "object 0x%08lx not 512K or pot-size 0x%08lx aligned\n",
> +		     (long)vma->node.start, (long)vma->node.size);
>  
>  		pitch_val = obj->stride / 128;
>  		pitch_val = ffs(pitch_val) - 1;
>  
> -		val = i915_gem_obj_ggtt_offset(obj);
> +		val = vma->node.start;
>  		if (obj->tiling_mode == I915_TILING_Y)
>  			val |= 1 << I830_FENCE_TILING_Y_SHIFT;
> -		val |= I830_FENCE_SIZE_BITS(size);
> +		val |= I830_FENCE_SIZE_BITS(vma->node.size);
>  		val |= pitch_val << I830_FENCE_PITCH_SHIFT;
>  		val |= I830_FENCE_REG_VALID;
>  	} else
> @@ -426,13 +422,7 @@ bool
>  i915_gem_object_pin_fence(struct drm_i915_gem_object *obj)
>  {
>  	if (obj->fence_reg != I915_FENCE_REG_NONE) {
> -		struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
> -		struct i915_vma *ggtt_vma = i915_gem_obj_to_ggtt(obj);
> -
> -		WARN_ON(!ggtt_vma ||
> -			dev_priv->fence_regs[obj->fence_reg].pin_count >
> -			ggtt_vma->pin_count);
> -		dev_priv->fence_regs[obj->fence_reg].pin_count++;
> +		to_i915(obj->base.dev)->fence_regs[obj->fence_reg].pin_count++;
>  		return true;
>  	} else
>  		return false;
> diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
> index c7a77e0f18c2..775b5a4e8a5b 100644
> --- a/drivers/gpu/drm/i915/i915_gem_gtt.c
> +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
> @@ -3325,14 +3325,10 @@ __i915_gem_vma_create(struct drm_i915_gem_object *obj,
>  
>  	GEM_BUG_ON(vm->closed);
>  
> -	if (WARN_ON(i915_is_ggtt(vm) != !!ggtt_view))
> -		return ERR_PTR(-EINVAL);
> -
>  	vma = kmem_cache_zalloc(to_i915(obj->base.dev)->vmas, GFP_KERNEL);
>  	if (vma == NULL)
>  		return ERR_PTR(-ENOMEM);
>  
> -	INIT_LIST_HEAD(&vma->obj_link);
>  	INIT_LIST_HEAD(&vma->exec_list);
>  	for (i = 0; i < ARRAY_SIZE(vma->last_read); i++)
>  		init_request_active(&vma->last_read[i], i915_vma_retire);
> @@ -3342,49 +3338,69 @@ __i915_gem_vma_create(struct drm_i915_gem_object *obj,
>  	vma->size = obj->base.size;
>  	vma->is_ggtt = i915_is_ggtt(vm);
>  
> -	if (i915_is_ggtt(vm)) {
> +	if (ggtt_view) {
>  		vma->ggtt_view = *ggtt_view;
>  		if (ggtt_view->type == I915_GGTT_VIEW_PARTIAL)
>  			vma->size = ggtt_view->params.partial.size << PAGE_SHIFT;
>  		else if (ggtt_view->type == I915_GGTT_VIEW_ROTATED)
>  			vma->size = intel_rotation_info_size(&ggtt_view->params.rotated) << PAGE_SHIFT;
>  	} else
> +
> +	if (!vma->is_ggtt)
>  		i915_ppgtt_get(i915_vm_to_ppgtt(vm));
>  
>  	list_add_tail(&vma->obj_link, &obj->vma_list);
> -
>  	return vma;
>  }
>  
> +static inline bool vma_matches(struct i915_vma *vma,
> +			       struct i915_address_space *vm,
> +			       const struct i915_ggtt_view *view)
> +{
> +	if (vma->vm != vm)
> +		return false;
> +
> +	if (!vma->is_ggtt)
> +		return true;
> +
> +	if (view == NULL)
> +		return vma->ggtt_view.type == 0;
> +
> +	if (vma->ggtt_view.type != view->type)
> +		return false;
> +
> +	return memcmp(&vma->ggtt_view.params,
> +		      &view->params,
> +		      sizeof(view->params)) == 0;
> +}
> +
>  struct i915_vma *
> -i915_gem_obj_lookup_or_create_vma(struct drm_i915_gem_object *obj,
> -				  struct i915_address_space *vm)
> +i915_gem_obj_to_vma(struct drm_i915_gem_object *obj,
> +		    struct i915_address_space *vm,
> +		    const struct i915_ggtt_view *view)
>  {
>  	struct i915_vma *vma;
>  
> -	vma = i915_gem_obj_to_vma(obj, vm);
> -	if (!vma)
> -		vma = __i915_gem_vma_create(obj, vm,
> -					    i915_is_ggtt(vm) ? &i915_ggtt_view_normal : NULL);
> +	list_for_each_entry_reverse(vma, &obj->vma_list, obj_link)
> +		if (vma_matches(vma, vm, view))
> +			return vma;
>  
> -	return vma;
> +	return NULL;
>  }
>  
>  struct i915_vma *
> -i915_gem_obj_lookup_or_create_ggtt_vma(struct drm_i915_gem_object *obj,
> -				       const struct i915_ggtt_view *view)
> +i915_gem_obj_lookup_or_create_vma(struct drm_i915_gem_object *obj,
> +				  struct i915_address_space *vm,
> +				  const struct i915_ggtt_view *view)
>  {
> -	struct drm_device *dev = obj->base.dev;
> -	struct drm_i915_private *dev_priv = to_i915(dev);
> -	struct i915_ggtt *ggtt = &dev_priv->ggtt;
> -	struct i915_vma *vma = i915_gem_obj_to_ggtt_view(obj, view);
> +	struct i915_vma *vma;
>  
> +	vma = i915_gem_obj_to_vma(obj, vm, view);
>  	if (!vma)
> -		vma = __i915_gem_vma_create(obj, &ggtt->base, view);
> +		vma = __i915_gem_vma_create(obj, vm, view);
>  
>  	GEM_BUG_ON(vma->closed);
>  	return vma;
> -
>  }
>  
>  static struct scatterlist *
> diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h
> index 5655358a60e1..5b28dc251e60 100644
> --- a/drivers/gpu/drm/i915/i915_gem_gtt.h
> +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h
> @@ -590,20 +590,6 @@ void i915_gem_restore_gtt_mappings(struct drm_device *dev);
>  int __must_check i915_gem_gtt_prepare_object(struct drm_i915_gem_object *obj);
>  void i915_gem_gtt_finish_object(struct drm_i915_gem_object *obj);
>  
> -static inline bool
> -i915_ggtt_view_equal(const struct i915_ggtt_view *a,
> -                     const struct i915_ggtt_view *b)
> -{
> -	if (WARN_ON(!a || !b))
> -		return false;
> -
> -	if (a->type != b->type)
> -		return false;
> -	if (a->type != I915_GGTT_VIEW_NORMAL)
> -		return !memcmp(&a->params, &b->params, sizeof(a->params));
> -	return true;
> -}
> -
>  /**
>   * i915_vma_pin_iomap - calls ioremap_wc to map the GGTT VMA via the aperture
>   * @vma: VMA to iomap
> diff --git a/drivers/gpu/drm/i915/i915_gem_render_state.c b/drivers/gpu/drm/i915/i915_gem_render_state.c
> index 4cf82697b3db..6e6eac43db19 100644
> --- a/drivers/gpu/drm/i915/i915_gem_render_state.c
> +++ b/drivers/gpu/drm/i915/i915_gem_render_state.c
> @@ -31,7 +31,7 @@
>  struct render_state {
>  	const struct intel_renderstate_rodata *rodata;
>  	struct drm_i915_gem_object *obj;
> -	u64 ggtt_offset;
> +	struct i915_vma *vma;
>  	int gen;
>  	u32 aux_batch_size;
>  	u32 aux_batch_offset;
> @@ -57,10 +57,9 @@ render_state_get_rodata(const int gen)
>  static int render_state_init(struct render_state *so,
>  			     struct drm_i915_private *dev_priv)
>  {
> -	int ret;
> +	struct i915_vma *vma;
>  
>  	so->gen = INTEL_GEN(dev_priv);
> -	so->ggtt_offset = 0;
>  	so->rodata = render_state_get_rodata(so->gen);
>  	if (so->rodata == NULL)
>  		return 0;
> @@ -72,16 +71,14 @@ static int render_state_init(struct render_state *so,
>  	if (IS_ERR(so->obj))
>  		return PTR_ERR(so->obj);
>  
> -	ret = i915_gem_object_ggtt_pin(so->obj, NULL, 0, 0, 0);
> -	if (ret)
> -		goto free_gem;
> +	vma = i915_gem_object_ggtt_pin(so->obj, NULL, 0, 0, 0);
> +	if (IS_ERR(vma)) {
> +		i915_gem_object_put(so->obj);
> +		return PTR_ERR(vma);
> +	}
>  
> -	so->ggtt_offset = i915_gem_obj_ggtt_offset(so->obj);
> +	so->vma = vma;
>  	return 0;
> -
> -free_gem:
> -	i915_gem_object_put(so->obj);
> -	return ret;
>  }
>  
>  /*
> @@ -121,7 +118,7 @@ static int render_state_setup(struct render_state *so)
>  		u32 s = rodata->batch[i];
>  
>  		if (i * 4  == rodata->reloc[reloc_index]) {
> -			u64 r = s + so->ggtt_offset;
> +			u64 r = s + so->vma->node.start,
>  			s = lower_32_bits(r);
>  			if (so->gen >= 8) {
>  				if (i + 1 >= rodata->batch_items ||
> @@ -176,7 +173,7 @@ err_out:
>  
>  static void render_state_fini(struct render_state *so)
>  {
> -	i915_gem_object_ggtt_unpin(so->obj);
> +	i915_vma_unpin(so->vma);
>  	i915_gem_object_put(so->obj);
>  }
>  
> @@ -209,14 +206,14 @@ int i915_gem_render_state_init(struct drm_i915_gem_request *req)
>  	struct render_state so;
>  	int ret;
>  
> -	ret = render_state_prepare(req->engine, &so);
> +	ret = render_state_prepare(req->engine, memset(&so, 0, sizeof(so)));
>  	if (ret)
>  		return ret;
>  
>  	if (so.rodata == NULL)
>  		return 0;
>  
> -	ret = req->engine->emit_bb_start(req, so.ggtt_offset,
> +	ret = req->engine->emit_bb_start(req, so.vma->node.start,
>  					 so.rodata->batch_items * 4,
>  					 I915_DISPATCH_SECURE);
>  	if (ret)
> @@ -224,7 +221,7 @@ int i915_gem_render_state_init(struct drm_i915_gem_request *req)
>  
>  	if (so.aux_batch_size > 8) {
>  		ret = req->engine->emit_bb_start(req,
> -						 (so.ggtt_offset +
> +						 (so.vma->node.start +
>  						  so.aux_batch_offset),
>  						 so.aux_batch_size,
>  						 I915_DISPATCH_SECURE);
> @@ -232,7 +229,7 @@ int i915_gem_render_state_init(struct drm_i915_gem_request *req)
>  			goto out;
>  	}
>  
> -	i915_vma_move_to_active(i915_gem_obj_to_ggtt(so.obj), req, 0);
> +	i915_vma_move_to_active(so.vma, req, 0);
>  out:
>  	render_state_fini(&so);
>  	return ret;
> diff --git a/drivers/gpu/drm/i915/i915_gem_render_state.h b/drivers/gpu/drm/i915/i915_gem_render_state.h
> index c44fca8599bb..18cce3f06e9c 100644
> --- a/drivers/gpu/drm/i915/i915_gem_render_state.h
> +++ b/drivers/gpu/drm/i915/i915_gem_render_state.h
> @@ -24,7 +24,7 @@
>  #ifndef _I915_GEM_RENDER_STATE_H_
>  #define _I915_GEM_RENDER_STATE_H_
>  
> -#include <linux/types.h>
> +struct drm_i915_gem_request;
>  
>  int i915_gem_render_state_init(struct drm_i915_gem_request *req);
>  
> diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c
> index 8101d9169027..5a3d81e5458b 100644
> --- a/drivers/gpu/drm/i915/i915_gem_request.c
> +++ b/drivers/gpu/drm/i915/i915_gem_request.c
> @@ -404,18 +404,12 @@ static void i915_gem_mark_busy(struct drm_i915_private *dev_priv,
>   */
>  void __i915_add_request(struct drm_i915_gem_request *request, bool flush_caches)
>  {
> -	struct intel_engine_cs *engine;
> -	struct intel_ring *ring;
> +	struct intel_engine_cs *engine = request->engine;
> +	struct intel_ring *ring = request->ring;
>  	u32 request_start;
>  	u32 reserved_tail;
>  	int ret;
>  
> -	if (WARN_ON(request == NULL))
> -		return;
> -
> -	engine = request->engine;
> -	ring = request->ring;
> -
>  	/*
>  	 * To ensure that this call will not fail, space for its emissions
>  	 * should already have been reserved in the ring buffer. Let the ring
> diff --git a/drivers/gpu/drm/i915/i915_gem_request.h b/drivers/gpu/drm/i915/i915_gem_request.h
> index 87e055267904..a8e228f5ceb4 100644
> --- a/drivers/gpu/drm/i915/i915_gem_request.h
> +++ b/drivers/gpu/drm/i915/i915_gem_request.h
> @@ -111,7 +111,7 @@ struct drm_i915_gem_request {
>  
>  	/** Batch buffer related to this request if any (used for
>  	 * error state dump only) */
> -	struct drm_i915_gem_object *batch_obj;
> +	struct i915_vma *batch;
>  	struct list_head active_list;
>  
>  	/** Time at which this request was emitted, in jiffies. */
> diff --git a/drivers/gpu/drm/i915/i915_gem_stolen.c b/drivers/gpu/drm/i915/i915_gem_stolen.c
> index a881c243fca2..415fa04d5232 100644
> --- a/drivers/gpu/drm/i915/i915_gem_stolen.c
> +++ b/drivers/gpu/drm/i915/i915_gem_stolen.c
> @@ -683,7 +683,7 @@ i915_gem_object_create_stolen_for_preallocated(struct drm_device *dev,
>  	if (gtt_offset == I915_GTT_OFFSET_NONE)
>  		return obj;
>  
> -	vma = i915_gem_obj_lookup_or_create_vma(obj, &ggtt->base);
> +	vma = i915_gem_obj_lookup_or_create_vma(obj, &ggtt->base, NULL);
>  	if (IS_ERR(vma)) {
>  		ret = PTR_ERR(vma);
>  		goto err;
> diff --git a/drivers/gpu/drm/i915/i915_gem_tiling.c b/drivers/gpu/drm/i915/i915_gem_tiling.c
> index d6acd0a27c06..29fc4dfd1947 100644
> --- a/drivers/gpu/drm/i915/i915_gem_tiling.c
> +++ b/drivers/gpu/drm/i915/i915_gem_tiling.c
> @@ -114,33 +114,44 @@ i915_tiling_ok(struct drm_device *dev, int stride, int size, int tiling_mode)
>  }
>  
>  /* Is the current GTT allocation valid for the change in tiling? */
> -static bool
> +static int
>  i915_gem_object_fence_ok(struct drm_i915_gem_object *obj, int tiling_mode)
>  {
> +	struct i915_vma *vma;
>  	u32 size;
>  
>  	if (tiling_mode == I915_TILING_NONE)
> -		return true;
> +		return 0;
>  
> -	if (INTEL_INFO(obj->base.dev)->gen >= 4)
> -		return true;
> +	if (INTEL_GEN(obj->base.dev) >= 4)
> +		return 0;
> +
> +	vma = i915_gem_object_to_ggtt(obj, NULL);
> +	if (vma == NULL)
> +		return 0;
> +
> +	if (!obj->map_and_fenceable)
> +		return 0;
>  
>  	if (IS_GEN3(obj->base.dev)) {
> -		if (i915_gem_obj_ggtt_offset(obj) & ~I915_FENCE_START_MASK)
> -			return false;
> +		if (vma->node.start & ~I915_FENCE_START_MASK)
> +			goto bad;
>  	} else {
> -		if (i915_gem_obj_ggtt_offset(obj) & ~I830_FENCE_START_MASK)
> -			return false;
> +		if (vma->node.start & ~I830_FENCE_START_MASK)
> +			goto bad;
>  	}
>  
>  	size = i915_gem_get_gtt_size(obj->base.dev, obj->base.size, tiling_mode);
> -	if (i915_gem_obj_ggtt_size(obj) != size)
> -		return false;
> +	if (vma->node.size < size)
> +		goto bad;
>  
> -	if (i915_gem_obj_ggtt_offset(obj) & (size - 1))
> -		return false;
> +	if (vma->node.start & (size - 1))
> +		goto bad;
>  
> -	return true;
> +	return 0;
> +
> +bad:
> +	return i915_vma_unbind(vma);
>  }
>  
>  /**
> @@ -227,10 +238,7 @@ i915_gem_set_tiling(struct drm_device *dev, void *data,
>  		 * has to also include the unfenced register the GPU uses
>  		 * whilst executing a fenced command for an untiled object.
>  		 */
> -		if (obj->map_and_fenceable &&
> -		    !i915_gem_object_fence_ok(obj, args->tiling_mode))
> -			ret = i915_vma_unbind(i915_gem_obj_to_ggtt(obj));
> -
> +		ret = i915_gem_object_fence_ok(obj, args->tiling_mode);
>  		if (ret == 0) {
>  			if (obj->pages &&
>  			    obj->madv == I915_MADV_WILLNEED &&
> diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
> index 367b8b2ce5f2..3e42705e2fa4 100644
> --- a/drivers/gpu/drm/i915/i915_gpu_error.c
> +++ b/drivers/gpu/drm/i915/i915_gpu_error.c
> @@ -632,18 +632,21 @@ static void i915_error_state_free(struct kref *error_ref)
>  
>  static struct drm_i915_error_object *
>  i915_error_object_create(struct drm_i915_private *dev_priv,
> -			 struct drm_i915_gem_object *src,
> -			 struct i915_address_space *vm)
> +			 struct i915_vma *vma)
>  {
>  	struct i915_ggtt *ggtt = &dev_priv->ggtt;
> +	struct drm_i915_gem_object *src;
>  	struct drm_i915_error_object *dst;
> -	struct i915_vma *vma = NULL;
>  	int num_pages;
>  	bool use_ggtt;
>  	int i = 0;
>  	u64 reloc_offset;
>  
> -	if (src == NULL || src->pages == NULL)
> +	if (vma == NULL)
> +		return NULL;
> +
> +	src = vma->obj;
> +	if (src->pages == NULL)
>  		return NULL;
>  
>  	num_pages = src->base.size >> PAGE_SHIFT;
> @@ -652,26 +655,19 @@ i915_error_object_create(struct drm_i915_private *dev_priv,
>  	if (dst == NULL)
>  		return NULL;
>  
> -	if (i915_gem_obj_bound(src, vm))
> -		dst->gtt_offset = i915_gem_obj_offset(src, vm);
> -	else
> -		dst->gtt_offset = -1;
> -
> -	reloc_offset = dst->gtt_offset;
> -	if (i915_is_ggtt(vm))
> -		vma = i915_gem_obj_to_ggtt(src);
> +	reloc_offset = dst->gtt_offset = vma->node.start;
>  	use_ggtt = (src->cache_level == I915_CACHE_NONE &&
> -		   vma && (vma->bound & GLOBAL_BIND) &&
> +		   (vma->bound & GLOBAL_BIND) &&
>  		   reloc_offset + num_pages * PAGE_SIZE <= ggtt->mappable_end);
>  
>  	/* Cannot access stolen address directly, try to use the aperture */
>  	if (src->stolen) {
>  		use_ggtt = true;
>  
> -		if (!(vma && vma->bound & GLOBAL_BIND))
> +		if (!(vma->bound & GLOBAL_BIND))
>  			goto unwind;
>  
> -		reloc_offset = i915_gem_obj_ggtt_offset(src);
> +		reloc_offset = vma->node.start;
>  		if (reloc_offset + num_pages * PAGE_SIZE > ggtt->mappable_end)
>  			goto unwind;
>  	}
> @@ -724,8 +720,6 @@ unwind:
>  	kfree(dst);
>  	return NULL;
>  }
> -#define i915_error_ggtt_object_create(dev_priv, src) \
> -	i915_error_object_create((dev_priv), (src), &(dev_priv)->ggtt.base)
>  
>  /* The error capture is special as tries to run underneath the normal
>   * locking rules - so we use the raw version of the i915_gem_active lookup.
> @@ -851,10 +845,10 @@ static void gen8_record_semaphore_state(struct drm_i915_private *dev_priv,
>  	if (!i915.semaphores)
>  		return;
>  
> -	if (!error->semaphore_obj)
> +	if (!error->semaphore_obj && dev_priv->semaphore_vma)
>  		error->semaphore_obj =
> -			i915_error_ggtt_object_create(dev_priv,
> -						      dev_priv->semaphore_obj);
> +			i915_error_object_create(dev_priv,
> +						 dev_priv->semaphore_vma);
>  
>  	for_each_engine_id(to, dev_priv, id) {
>  		int idx;
> @@ -1042,9 +1036,7 @@ static void i915_gem_record_active_context(struct intel_engine_cs *engine,
>  
>  	list_for_each_entry(vma, &dev_priv->ggtt.base.active_list, vm_link) {
>  		if ((error->ccid & PAGE_MASK) == vma->node.start) {
> -			ering->ctx = i915_error_object_create(dev_priv,
> -							      vma->obj,
> -							      vma->vm);
> +			ering->ctx = i915_error_object_create(dev_priv, vma);
>  			break;
>  		}
>  	}
> @@ -1086,13 +1078,12 @@ static void i915_gem_record_rings(struct drm_i915_private *dev_priv,
>  			 */
>  			error->ring[i].batchbuffer =
>  				i915_error_object_create(dev_priv,
> -							 request->batch_obj,
> -							 vm);
> +							 request->batch);
>  
>  			if (HAS_BROKEN_CS_TLB(dev_priv))
>  				error->ring[i].wa_batchbuffer =
> -					i915_error_ggtt_object_create(dev_priv,
> -								      engine->scratch.obj);
> +					i915_error_object_create(dev_priv,
> +								 engine->scratch);
>  
>  			if (request->pid) {
>  				struct task_struct *task;
> @@ -1112,17 +1103,15 @@ static void i915_gem_record_rings(struct drm_i915_private *dev_priv,
>  			error->ring[i].cpu_ring_head = ring->head;
>  			error->ring[i].cpu_ring_tail = ring->tail;
>  			error->ring[i].ringbuffer =
> -				i915_error_ggtt_object_create(dev_priv,
> -							      ring->obj);
> +				i915_error_object_create(dev_priv, ring->vma);
>  		}
>  
>  		error->ring[i].hws_page =
> -			i915_error_ggtt_object_create(dev_priv,
> -						      engine->status_page.obj);
> +			i915_error_object_create(dev_priv,
> +						 engine->status_page.vma);
>  
>  		error->ring[i].wa_ctx =
> -			i915_error_ggtt_object_create(dev_priv,
> -						      engine->wa_ctx.obj);
> +			i915_error_object_create(dev_priv, engine->wa_ctx.vma);
>  
>  		i915_gem_record_active_context(engine, error, &error->ring[i]);
>  
> diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c
> index 1c92c4c6b0e1..90db9a88fddc 100644
> --- a/drivers/gpu/drm/i915/i915_guc_submission.c
> +++ b/drivers/gpu/drm/i915/i915_guc_submission.c
> @@ -375,7 +375,6 @@ static void guc_init_ctx_desc(struct intel_guc *guc,
>  	for_each_engine(engine, dev_priv) {
>  		struct intel_context *ce = &ctx->engine[engine->id];
>  		struct guc_execlist_context *lrc = &desc.lrc[engine->guc_id];
> -		struct drm_i915_gem_object *obj;
>  
>  		/* TODO: We have a design issue to be solved here. Only when we
>  		 * receive the first batch, we know which engine is used by the
> @@ -384,23 +383,20 @@ static void guc_init_ctx_desc(struct intel_guc *guc,
>  		 * for now who owns a GuC client. But for future owner of GuC
>  		 * client, need to make sure lrc is pinned prior to enter here.
>  		 */
> -		if (!ce->state)
> +		if (!ce->vma)
>  			break;	/* XXX: continue? */
>  
>  		lrc->context_desc = lower_32_bits(ce->lrc_desc);
>  
>  		/* The state page is after PPHWSP */
> -		gfx_addr = i915_gem_obj_ggtt_offset(ce->state);
> +		gfx_addr = ce->vma->node.start;
>  		lrc->ring_lcra = gfx_addr + LRC_STATE_PN * PAGE_SIZE;
>  		lrc->context_id = (client->ctx_index << GUC_ELC_CTXID_OFFSET) |
>  				(engine->guc_id << GUC_ELC_ENGINE_OFFSET);
>  
> -		obj = ce->ring->obj;
> -		gfx_addr = i915_gem_obj_ggtt_offset(obj);
> -
> -		lrc->ring_begin = gfx_addr;
> -		lrc->ring_end = gfx_addr + obj->base.size - 1;
> -		lrc->ring_next_free_location = gfx_addr;
> +		lrc->ring_begin = ce->ring->vma->node.start;
> +		lrc->ring_end = gfx_addr + ce->ring->vma->node.size - 1;
> +		lrc->ring_next_free_location = lrc->ring_begin;
>  		lrc->ring_current_tail_pointer_value = 0;
>  
>  		desc.engines_used |= (1 << engine->guc_id);
> @@ -602,23 +598,23 @@ static struct i915_vma *guc_allocate_vma(struct intel_guc *guc, u32 size)
>  {
>  	struct drm_i915_private *dev_priv = guc_to_i915(guc);
>  	struct drm_i915_gem_object *obj;
> -	int ret;
> +	struct i915_vma *vma;
>  
>  	obj = i915_gem_object_create(dev_priv->dev, size);
>  	if (IS_ERR(obj))
>  		return ERR_CAST(obj);
>  
> -	ret = i915_gem_object_ggtt_pin(obj, NULL, 0, PAGE_SIZE,
> +	vma = i915_gem_object_ggtt_pin(obj, NULL, 0, PAGE_SIZE,
>  				       PIN_OFFSET_BIAS | GUC_WOPCM_TOP);
> -	if (ret) {
> +	if (IS_ERR(vma)) {
>  		i915_gem_object_put(obj);
> -		return ERR_PTR(ret);
> +		return vma;
>  	}
>  
>  	/* Invalidate GuC TLB to let GuC take the latest updates to GTT. */
>  	I915_WRITE(GEN8_GTCR, GEN8_GTCR_INVALIDATE);
>  
> -	return i915_gem_obj_to_ggtt(obj);
> +	return vma;
>  }
>  
>  /**
> @@ -988,7 +984,7 @@ int intel_guc_suspend(struct drm_device *dev)
>  	/* any value greater than GUC_POWER_D0 */
>  	data[1] = GUC_POWER_D1;
>  	/* first page is shared data with GuC */
> -	data[2] = i915_gem_obj_ggtt_offset(ctx->engine[RCS].state);
> +	data[2] = ctx->engine[RCS].vma->node.start;
>  
>  	return host2guc_action(guc, data, ARRAY_SIZE(data));
>  }
> @@ -1013,7 +1009,7 @@ int intel_guc_resume(struct drm_device *dev)
>  	data[0] = HOST2GUC_ACTION_EXIT_S_STATE;
>  	data[1] = GUC_POWER_D0;
>  	/* first page is shared data with GuC */
> -	data[2] = i915_gem_obj_ggtt_offset(ctx->engine[RCS].state);
> +	data[2] = ctx->engine[RCS].vma->node.start;
>  
>  	return host2guc_action(guc, data, ARRAY_SIZE(data));
>  }
> diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
> index 0cfaace38370..cc6f7a49bf58 100644
> --- a/drivers/gpu/drm/i915/intel_display.c
> +++ b/drivers/gpu/drm/i915/intel_display.c
> @@ -2240,14 +2240,14 @@ static unsigned int intel_surf_alignment(const struct drm_i915_private *dev_priv
>  	}
>  }
>  
> -int
> -intel_pin_and_fence_fb_obj(struct drm_framebuffer *fb,
> -			   unsigned int rotation)
> +struct i915_vma *
> +intel_pin_and_fence_fb_obj(struct drm_framebuffer *fb, unsigned int rotation)
>  {
>  	struct drm_device *dev = fb->dev;
>  	struct drm_i915_private *dev_priv = dev->dev_private;
>  	struct drm_i915_gem_object *obj = intel_fb_obj(fb);
>  	struct i915_ggtt_view view;
> +	struct i915_vma *vma;
>  	u32 alignment;
>  	int ret;
>  
> @@ -2274,10 +2274,11 @@ intel_pin_and_fence_fb_obj(struct drm_framebuffer *fb,
>  	 */
>  	intel_runtime_pm_get(dev_priv);
>  
> -	ret = i915_gem_object_pin_to_display_plane(obj, alignment,
> -						   &view);
> -	if (ret)
> +	vma = i915_gem_object_pin_to_display_plane(obj, alignment, &view);
> +	if (IS_ERR(vma)) {
> +		ret = PTR_ERR(vma);
>  		goto err_pm;
> +	}
>  
>  	/* Install a fence for tiled scan-out. Pre-i965 always needs a
>  	 * fence, whereas 965+ only requires a fence if using
> @@ -2304,19 +2305,20 @@ intel_pin_and_fence_fb_obj(struct drm_framebuffer *fb,
>  	}
>  
>  	intel_runtime_pm_put(dev_priv);
> -	return 0;
> +	return vma;
>  
>  err_unpin:
> -	i915_gem_object_unpin_from_display_plane(obj, &view);
> +	i915_gem_object_unpin_from_display_plane(vma);
>  err_pm:
>  	intel_runtime_pm_put(dev_priv);
> -	return ret;
> +	return ERR_PTR(ret);
>  }
>  
>  void intel_unpin_fb_obj(struct drm_framebuffer *fb, unsigned int rotation)
>  {
>  	struct drm_i915_gem_object *obj = intel_fb_obj(fb);
>  	struct i915_ggtt_view view;
> +	struct i915_vma *vma;
>  
>  	WARN_ON(!mutex_is_locked(&obj->base.dev->struct_mutex));
>  
> @@ -2325,7 +2327,8 @@ void intel_unpin_fb_obj(struct drm_framebuffer *fb, unsigned int rotation)
>  	if (view.type == I915_GGTT_VIEW_NORMAL)
>  		i915_gem_object_unpin_fence(obj);
>  
> -	i915_gem_object_unpin_from_display_plane(obj, &view);
> +	vma = i915_gem_object_to_ggtt(obj, &view);
> +	i915_gem_object_unpin_from_display_plane(vma);
>  }
>  
>  /*
> @@ -2587,7 +2590,7 @@ intel_find_initial_plane_obj(struct intel_crtc *intel_crtc,
>  			continue;
>  
>  		obj = intel_fb_obj(fb);
> -		if (i915_gem_obj_ggtt_offset(obj) == plane_config->base) {
> +		if (i915_gem_object_ggtt_offset(obj, NULL) == plane_config->base) {
>  			drm_framebuffer_reference(fb);
>  			goto valid_fb;
>  		}
> @@ -2745,11 +2748,11 @@ static void i9xx_update_primary_plane(struct drm_plane *primary,
>  	I915_WRITE(DSPSTRIDE(plane), fb->pitches[0]);
>  	if (INTEL_INFO(dev)->gen >= 4) {
>  		I915_WRITE(DSPSURF(plane),
> -			   i915_gem_obj_ggtt_offset(obj) + intel_crtc->dspaddr_offset);
> +			   i915_gem_object_ggtt_offset(obj, NULL) + intel_crtc->dspaddr_offset);
>  		I915_WRITE(DSPTILEOFF(plane), (y << 16) | x);
>  		I915_WRITE(DSPLINOFF(plane), linear_offset);
>  	} else
> -		I915_WRITE(DSPADDR(plane), i915_gem_obj_ggtt_offset(obj) + linear_offset);
> +		I915_WRITE(DSPADDR(plane), i915_gem_object_ggtt_offset(obj, NULL) + linear_offset);
>  	POSTING_READ(reg);
>  }
>  
> @@ -2849,7 +2852,7 @@ static void ironlake_update_primary_plane(struct drm_plane *primary,
>  
>  	I915_WRITE(DSPSTRIDE(plane), fb->pitches[0]);
>  	I915_WRITE(DSPSURF(plane),
> -		   i915_gem_obj_ggtt_offset(obj) + intel_crtc->dspaddr_offset);
> +		   i915_gem_object_ggtt_offset(obj, NULL) + intel_crtc->dspaddr_offset);
>  	if (IS_HASWELL(dev) || IS_BROADWELL(dev)) {
>  		I915_WRITE(DSPOFFSET(plane), (y << 16) | x);
>  	} else {
> @@ -2882,7 +2885,7 @@ u32 intel_plane_obj_offset(struct intel_plane *intel_plane,
>  	intel_fill_fb_ggtt_view(&view, intel_plane->base.state->fb,
>  				intel_plane->base.state->rotation);
>  
> -	vma = i915_gem_obj_to_ggtt_view(obj, &view);
> +	vma = i915_gem_object_to_ggtt(obj, &view);
>  	if (WARN(!vma, "ggtt vma for display object not found! (view=%u)\n",
>  		view.type))
>  		return -1;
> @@ -11385,7 +11388,7 @@ static int intel_gen7_queue_flip(struct drm_device *dev,
>  			intel_ring_emit(ring, MI_STORE_REGISTER_MEM |
>  					      MI_SRM_LRM_GLOBAL_GTT);
>  		intel_ring_emit_reg(ring, DERRMR);
> -		intel_ring_emit(ring, req->engine->scratch.gtt_offset + 256);
> +		intel_ring_emit(ring, req->engine->scratch->node.start + 256);
>  		if (IS_GEN8(dev)) {
>  			intel_ring_emit(ring, 0);
>  			intel_ring_emit(ring, MI_NOOP);
> @@ -11634,6 +11637,7 @@ static int intel_crtc_page_flip(struct drm_crtc *crtc,
>  	struct intel_engine_cs *engine;
>  	bool mmio_flip;
>  	struct drm_i915_gem_request *request;
> +	struct i915_vma *vma;
>  	int ret;
>  
>  	/*
> @@ -11739,9 +11743,11 @@ static int intel_crtc_page_flip(struct drm_crtc *crtc,
>  
>  	mmio_flip = use_mmio_flip(engine, obj);
>  
> -	ret = intel_pin_and_fence_fb_obj(fb, primary->state->rotation);
> -	if (ret)
> +	vma = intel_pin_and_fence_fb_obj(fb, primary->state->rotation);
> +	if (IS_ERR(vma)) {
> +		ret = PTR_ERR(vma);
>  		goto cleanup_pending;
> +	}
>  
>  	work->gtt_offset = intel_plane_obj_offset(to_intel_plane(primary),
>  						  obj, 0);
> @@ -13965,7 +13971,11 @@ intel_prepare_plane_fb(struct drm_plane *plane,
>  		if (ret)
>  			DRM_DEBUG_KMS("failed to attach phys object\n");
>  	} else {
> -		ret = intel_pin_and_fence_fb_obj(fb, new_state->rotation);
> +		struct i915_vma *vma;
> +
> +		vma = intel_pin_and_fence_fb_obj(fb, new_state->rotation);
> +		if (IS_ERR(vma))
> +			ret = PTR_ERR(vma);
>  	}
>  
>  	if (ret == 0) {
> @@ -14334,7 +14344,7 @@ intel_update_cursor_plane(struct drm_plane *plane,
>  	if (!obj)
>  		addr = 0;
>  	else if (!INTEL_INFO(dev)->cursor_needs_physical)
> -		addr = i915_gem_obj_ggtt_offset(obj);
> +		addr = i915_gem_object_ggtt_offset(obj, NULL);
>  	else
>  		addr = obj->phys_handle->busaddr;
>  
> @@ -16160,7 +16170,6 @@ void intel_modeset_gem_init(struct drm_device *dev)
>  	struct drm_i915_private *dev_priv = to_i915(dev);
>  	struct drm_crtc *c;
>  	struct drm_i915_gem_object *obj;
> -	int ret;
>  
>  	intel_init_gt_powersave(dev_priv);
>  
> @@ -16174,15 +16183,17 @@ void intel_modeset_gem_init(struct drm_device *dev)
>  	 * for this.
>  	 */
>  	for_each_crtc(dev, c) {
> +		struct i915_vma *vma;
> +
>  		obj = intel_fb_obj(c->primary->fb);
>  		if (obj == NULL)
>  			continue;
>  
>  		mutex_lock(&dev->struct_mutex);
> -		ret = intel_pin_and_fence_fb_obj(c->primary->fb,
> +		vma = intel_pin_and_fence_fb_obj(c->primary->fb,
>  						 c->primary->state->rotation);
>  		mutex_unlock(&dev->struct_mutex);
> -		if (ret) {
> +		if (IS_ERR(vma)) {
>  			DRM_ERROR("failed to pin boot fb on pipe %d\n",
>  				  to_intel_crtc(c)->pipe);
>  			drm_framebuffer_unreference(c->primary->fb);
> diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h
> index 834646b4cc3f..30ef29873571 100644
> --- a/drivers/gpu/drm/i915/intel_drv.h
> +++ b/drivers/gpu/drm/i915/intel_drv.h
> @@ -159,6 +159,7 @@ struct intel_framebuffer {
>  struct intel_fbdev {
>  	struct drm_fb_helper helper;
>  	struct intel_framebuffer *fb;
> +	struct i915_vma *vma;
>  	async_cookie_t cookie;
>  	int preferred_bpp;
>  };
> @@ -1207,8 +1208,8 @@ bool intel_get_load_detect_pipe(struct drm_connector *connector,
>  void intel_release_load_detect_pipe(struct drm_connector *connector,
>  				    struct intel_load_detect_pipe *old,
>  				    struct drm_modeset_acquire_ctx *ctx);
> -int intel_pin_and_fence_fb_obj(struct drm_framebuffer *fb,
> -			       unsigned int rotation);
> +struct i915_vma *
> +intel_pin_and_fence_fb_obj(struct drm_framebuffer *fb, unsigned int rotation);
>  void intel_unpin_fb_obj(struct drm_framebuffer *fb, unsigned int rotation);
>  struct drm_framebuffer *
>  __intel_framebuffer_create(struct drm_device *dev,
> diff --git a/drivers/gpu/drm/i915/intel_fbc.c b/drivers/gpu/drm/i915/intel_fbc.c
> index 45ee07b888a0..e1eb96b50ec1 100644
> --- a/drivers/gpu/drm/i915/intel_fbc.c
> +++ b/drivers/gpu/drm/i915/intel_fbc.c
> @@ -742,7 +742,7 @@ static void intel_fbc_update_state_cache(struct intel_crtc *crtc)
>  	/* FIXME: We lack the proper locking here, so only run this on the
>  	 * platforms that need. */
>  	if (IS_GEN(dev_priv, 5, 6))
> -		cache->fb.ilk_ggtt_offset = i915_gem_obj_ggtt_offset(obj);
> +		cache->fb.ilk_ggtt_offset = i915_gem_object_ggtt_offset(obj, NULL);
>  	cache->fb.pixel_format = fb->pixel_format;
>  	cache->fb.stride = fb->pitches[0];
>  	cache->fb.fence_reg = obj->fence_reg;
> diff --git a/drivers/gpu/drm/i915/intel_fbdev.c b/drivers/gpu/drm/i915/intel_fbdev.c
> index 10600975fe8d..e76d18f7c733 100644
> --- a/drivers/gpu/drm/i915/intel_fbdev.c
> +++ b/drivers/gpu/drm/i915/intel_fbdev.c
> @@ -187,7 +187,6 @@ static int intelfb_create(struct drm_fb_helper *helper,
>  	struct fb_info *info;
>  	struct drm_framebuffer *fb;
>  	struct i915_vma *vma;
> -	struct drm_i915_gem_object *obj;
>  	bool prealloc = false;
>  	void *vaddr;
>  	int ret;
> @@ -215,17 +214,17 @@ static int intelfb_create(struct drm_fb_helper *helper,
>  		sizes->fb_height = intel_fb->base.height;
>  	}
>  
> -	obj = intel_fb->obj;
> -
>  	mutex_lock(&dev->struct_mutex);
>  
>  	/* Pin the GGTT vma for our access via info->screen_base.
>  	 * This also validates that any existing fb inherited from the
>  	 * BIOS is suitable for own access.
>  	 */
> -	ret = intel_pin_and_fence_fb_obj(&ifbdev->fb->base, BIT(DRM_ROTATE_0));
> -	if (ret)
> +	vma = intel_pin_and_fence_fb_obj(&ifbdev->fb->base, BIT(DRM_ROTATE_0));
> +	if (IS_ERR(vma)) {
> +		ret = PTR_ERR(vma);
>  		goto out_unlock;
> +	}
>  
>  	info = drm_fb_helper_alloc_fbi(helper);
>  	if (IS_ERR(info)) {
> @@ -245,8 +244,6 @@ static int intelfb_create(struct drm_fb_helper *helper,
>  	info->flags = FBINFO_DEFAULT | FBINFO_CAN_FORCE_OUTPUT;
>  	info->fbops = &intelfb_ops;
>  
> -	vma = i915_gem_obj_to_ggtt(obj);
> -
>  	/* setup aperture base/size for vesafb takeover */
>  	info->apertures->ranges[0].base = dev->mode_config.fb_base;
>  	info->apertures->ranges[0].size = ggtt->mappable_end;
> @@ -273,14 +270,14 @@ static int intelfb_create(struct drm_fb_helper *helper,
>  	 * If the object is stolen however, it will be full of whatever
>  	 * garbage was left in there.
>  	 */
> -	if (ifbdev->fb->obj->stolen && !prealloc)
> +	if (intel_fb->obj->stolen && !prealloc)
>  		memset_io(info->screen_base, 0, info->screen_size);
>  
>  	/* Use default scratch pixmap (info->pixmap.flags = FB_PIXMAP_SYSTEM) */
>  
> -	DRM_DEBUG_KMS("allocated %dx%d fb: 0x%08llx, bo %p\n",
> -		      fb->width, fb->height,
> -		      i915_gem_obj_ggtt_offset(obj), obj);
> +	DRM_DEBUG_KMS("allocated %dx%d fb: 0x%08llx\n",
> +		      fb->width, fb->height, vma->node.start);
> +	ifbdev->vma = vma;
>  
>  	mutex_unlock(&dev->struct_mutex);
>  	vga_switcheroo_client_fb_set(dev->pdev, info);
> diff --git a/drivers/gpu/drm/i915/intel_guc_loader.c b/drivers/gpu/drm/i915/intel_guc_loader.c
> index 1ecf88fd0b10..64f57c07afcc 100644
> --- a/drivers/gpu/drm/i915/intel_guc_loader.c
> +++ b/drivers/gpu/drm/i915/intel_guc_loader.c
> @@ -235,12 +235,12 @@ static inline bool guc_ucode_response(struct drm_i915_private *dev_priv,
>   * Note that GuC needs the CSS header plus uKernel code to be copied by the
>   * DMA engine in one operation, whereas the RSA signature is loaded via MMIO.
>   */
> -static int guc_ucode_xfer_dma(struct drm_i915_private *dev_priv)
> +static int guc_ucode_xfer_dma(struct drm_i915_private *dev_priv,
> +			      struct i915_vma *vma)
>  {
>  	struct intel_guc_fw *guc_fw = &dev_priv->guc.guc_fw;
> -	struct drm_i915_gem_object *fw_obj = guc_fw->guc_fw_obj;
>  	unsigned long offset;
> -	struct sg_table *sg = fw_obj->pages;
> +	struct sg_table *sg = vma->obj->pages;
>  	u32 status, rsa[UOS_RSA_SCRATCH_MAX_COUNT];
>  	int i, ret = 0;
>  
> @@ -257,7 +257,7 @@ static int guc_ucode_xfer_dma(struct drm_i915_private *dev_priv)
>  	I915_WRITE(DMA_COPY_SIZE, guc_fw->header_size + guc_fw->ucode_size);
>  
>  	/* Set the source address for the new blob */
> -	offset = i915_gem_obj_ggtt_offset(fw_obj) + guc_fw->header_offset;
> +	offset = vma->node.start + guc_fw->header_offset;
>  	I915_WRITE(DMA_ADDR_0_LOW, lower_32_bits(offset));
>  	I915_WRITE(DMA_ADDR_0_HIGH, upper_32_bits(offset) & 0xFFFF);
>  
> @@ -312,6 +312,7 @@ static int guc_ucode_xfer(struct drm_i915_private *dev_priv)
>  {
>  	struct intel_guc_fw *guc_fw = &dev_priv->guc.guc_fw;
>  	struct drm_device *dev = dev_priv->dev;
> +	struct i915_vma *vma;
>  	int ret;
>  
>  	ret = i915_gem_object_set_to_gtt_domain(guc_fw->guc_fw_obj, false);
> @@ -320,10 +321,10 @@ static int guc_ucode_xfer(struct drm_i915_private *dev_priv)
>  		return ret;
>  	}
>  
> -	ret = i915_gem_object_ggtt_pin(guc_fw->guc_fw_obj, NULL, 0, 0, 0);
> -	if (ret) {
> -		DRM_DEBUG_DRIVER("pin failed %d\n", ret);
> -		return ret;
> +	vma = i915_gem_object_ggtt_pin(guc_fw->guc_fw_obj, NULL, 0, 0, 0);
> +	if (IS_ERR(vma)) {
> +		DRM_DEBUG_DRIVER("pin failed %d\n", (int)PTR_ERR(vma));
> +		return PTR_ERR(vma);
>  	}
>  
>  	/* Invalidate GuC TLB to let GuC take the latest updates to GTT. */
> @@ -364,7 +365,7 @@ static int guc_ucode_xfer(struct drm_i915_private *dev_priv)
>  
>  	set_guc_init_params(dev_priv);
>  
> -	ret = guc_ucode_xfer_dma(dev_priv);
> +	ret = guc_ucode_xfer_dma(dev_priv, vma);
>  
>  	intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
>  
> @@ -372,7 +373,7 @@ static int guc_ucode_xfer(struct drm_i915_private *dev_priv)
>  	 * We keep the object pages for reuse during resume. But we can unpin it
>  	 * now that DMA has completed, so it doesn't continue to take up space.
>  	 */
> -	i915_gem_object_ggtt_unpin(guc_fw->guc_fw_obj);
> +	i915_vma_unpin(vma);
>  
>  	return ret;
>  }
> @@ -653,12 +654,8 @@ fail:
>  	DRM_ERROR("Failed to fetch GuC firmware from %s (error %d)\n",
>  		  guc_fw->guc_fw_path, err);
>  
> -	mutex_lock(&dev->struct_mutex);
> -	obj = guc_fw->guc_fw_obj;
> -	if (obj)
> -		i915_gem_object_put(obj);
> +	i915_gem_object_put_unlocked(guc_fw->guc_fw_obj);
>  	guc_fw->guc_fw_obj = NULL;
> -	mutex_unlock(&dev->struct_mutex);
>  
>  	release_firmware(fw);		/* OK even if fw is NULL */
>  	guc_fw->guc_fw_fetch_status = GUC_FIRMWARE_FAIL;
> @@ -737,7 +734,7 @@ void intel_guc_fini(struct drm_device *dev)
>  
>  	i915_gem_object_put(guc_fw->guc_fw_obj);
>  	guc_fw->guc_fw_obj = NULL;
> -	mutex_unlock(&dev->struct_mutex);
>  
>  	guc_fw->guc_fw_fetch_status = GUC_FIRMWARE_NONE;
> +	mutex_unlock(&dev->struct_mutex);
>  }
> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> index 4bf63af2a282..49e7bf170a04 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/intel_lrc.c
> @@ -326,7 +326,7 @@ intel_lr_context_descriptor_update(struct i915_gem_context *ctx,
>  	BUILD_BUG_ON(MAX_CONTEXT_HW_ID > (1<<GEN8_CTX_ID_WIDTH));
>  
>  	desc = engine->ctx_desc_template;			/* bits  0-11 */
> -	desc |= ce->lrc_vma->node.start + LRC_PPHWSP_PN * PAGE_SIZE;
> +	desc |= ce->vma->node.start + LRC_PPHWSP_PN * PAGE_SIZE;
>  								/* bits 12-31 */
>  	desc |= (u64)ctx->hw_id << GEN8_CTX_ID_SHIFT;		/* bits 32-52 */
>  
> @@ -765,6 +765,7 @@ static int intel_lr_context_pin(struct i915_gem_context *ctx,
>  {
>  	struct drm_i915_private *dev_priv = ctx->i915;
>  	struct intel_context *ce = &ctx->engine[engine->id];
> +	struct i915_vma *vma;
>  	void *vaddr;
>  	u32 *lrc_reg_state;
>  	int ret;
> @@ -774,16 +775,18 @@ static int intel_lr_context_pin(struct i915_gem_context *ctx,
>  	if (ce->pin_count++)
>  		return 0;
>  
> -	ret = i915_gem_object_ggtt_pin(ce->state, NULL,
> +	vma = i915_gem_object_ggtt_pin(ce->state, NULL,
>  				       0, GEN8_LR_CONTEXT_ALIGN,
>  				       PIN_OFFSET_BIAS | GUC_WOPCM_TOP);
> -	if (ret)
> +	if (IS_ERR(vma)) {
> +		ret = PTR_ERR(vma);
>  		goto err;
> +	}
>  
> -	vaddr = i915_gem_object_pin_map(ce->state);
> +	vaddr = i915_gem_object_pin_map(vma->obj);
>  	if (IS_ERR(vaddr)) {
>  		ret = PTR_ERR(vaddr);
> -		goto unpin_ctx_obj;
> +		goto unpin_vma;
>  	}
>  
>  	lrc_reg_state = vaddr + LRC_STATE_PN * PAGE_SIZE;
> @@ -792,12 +795,12 @@ static int intel_lr_context_pin(struct i915_gem_context *ctx,
>  	if (ret)
>  		goto unpin_map;
>  
> -	ce->lrc_vma = i915_gem_obj_to_ggtt(ce->state);
> +	ce->vma = vma;
>  	intel_lr_context_descriptor_update(ctx, engine);
>  
>  	lrc_reg_state[CTX_RING_BUFFER_START+1] = ce->ring->vma->node.start;
>  	ce->lrc_reg_state = lrc_reg_state;
> -	ce->state->dirty = true;
> +	vma->obj->dirty = true;
>  
>  	/* Invalidate GuC TLB. */
>  	if (i915.enable_guc_submission)
> @@ -807,9 +810,9 @@ static int intel_lr_context_pin(struct i915_gem_context *ctx,
>  	return 0;
>  
>  unpin_map:
> -	i915_gem_object_unpin_map(ce->state);
> -unpin_ctx_obj:
> -	i915_gem_object_ggtt_unpin(ce->state);
> +	i915_gem_object_unpin_map(vma->obj);
> +unpin_vma:
> +	__i915_vma_unpin(vma);
>  err:
>  	ce->pin_count = 0;
>  	return ret;
> @@ -829,9 +832,9 @@ void intel_lr_context_unpin(struct i915_gem_context *ctx,
>  	intel_ring_unpin(ce->ring);
>  
>  	i915_gem_object_unpin_map(ce->state);
> -	i915_gem_object_ggtt_unpin(ce->state);
> +	i915_vma_unpin(ce->vma);
>  
> -	ce->lrc_vma = NULL;
> +	ce->vma = NULL;
>  	ce->lrc_desc = 0;
>  	ce->lrc_reg_state = NULL;
>  
> @@ -921,7 +924,7 @@ static inline int gen8_emit_flush_coherentl3_wa(struct intel_engine_cs *engine,
>  	wa_ctx_emit(batch, index, (MI_STORE_REGISTER_MEM_GEN8 |
>  				   MI_SRM_LRM_GLOBAL_GTT));
>  	wa_ctx_emit_reg(batch, index, GEN8_L3SQCREG4);
> -	wa_ctx_emit(batch, index, engine->scratch.gtt_offset + 256);
> +	wa_ctx_emit(batch, index, engine->scratch->node.start + 256);
>  	wa_ctx_emit(batch, index, 0);
>  
>  	wa_ctx_emit(batch, index, MI_LOAD_REGISTER_IMM(1));
> @@ -939,7 +942,7 @@ static inline int gen8_emit_flush_coherentl3_wa(struct intel_engine_cs *engine,
>  	wa_ctx_emit(batch, index, (MI_LOAD_REGISTER_MEM_GEN8 |
>  				   MI_SRM_LRM_GLOBAL_GTT));
>  	wa_ctx_emit_reg(batch, index, GEN8_L3SQCREG4);
> -	wa_ctx_emit(batch, index, engine->scratch.gtt_offset + 256);
> +	wa_ctx_emit(batch, index, engine->scratch->node.start + 256);
>  	wa_ctx_emit(batch, index, 0);
>  
>  	return index;
> @@ -1013,7 +1016,7 @@ static int gen8_init_indirectctx_bb(struct intel_engine_cs *engine,
>  
>  	/* WaClearSlmSpaceAtContextSwitch:bdw,chv */
>  	/* Actual scratch location is at 128 bytes offset */
> -	scratch_addr = engine->scratch.gtt_offset + 2*CACHELINE_BYTES;
> +	scratch_addr = engine->scratch->node.start + 2*CACHELINE_BYTES;
>  
>  	wa_ctx_emit(batch, index, GFX_OP_PIPE_CONTROL(6));
>  	wa_ctx_emit(batch, index, (PIPE_CONTROL_FLUSH_L3 |
> @@ -1142,47 +1145,41 @@ static int gen9_init_perctx_bb(struct intel_engine_cs *engine,
>  	return wa_ctx_end(wa_ctx, *offset = index, 1);
>  }
>  
> -static int lrc_setup_wa_ctx_obj(struct intel_engine_cs *engine, u32 size)
> +static struct i915_vma *
> +lrc_setup_wa_ctx_obj(struct intel_engine_cs *engine, u32 size)
>  {
> -	int ret;
> +	struct drm_i915_gem_object *obj;
> +	struct i915_vma *vma;
>  
> -	engine->wa_ctx.obj = i915_gem_object_create(engine->i915->dev,
> -						   PAGE_ALIGN(size));
> -	if (IS_ERR(engine->wa_ctx.obj)) {
> -		DRM_DEBUG_DRIVER("alloc LRC WA ctx backing obj failed.\n");
> -		ret = PTR_ERR(engine->wa_ctx.obj);
> -		engine->wa_ctx.obj = NULL;
> -		return ret;
> -	}
> +	obj = i915_gem_object_create(engine->i915->dev, PAGE_ALIGN(size));
> +	if (IS_ERR(obj))
> +		return ERR_CAST(obj);
>  
> -	ret = i915_gem_object_ggtt_pin(engine->wa_ctx.obj, NULL,
> -				       0, PAGE_SIZE, 0);
> -	if (ret) {
> -		DRM_DEBUG_DRIVER("pin LRC WA ctx backing obj failed: %d\n",
> -				 ret);
> -		i915_gem_object_put(engine->wa_ctx.obj);
> -		return ret;
> +	vma = i915_gem_object_ggtt_pin(obj, NULL, 0, PAGE_SIZE, 0);
> +	if (IS_ERR(vma)) {
> +		i915_gem_object_put(obj);
> +		return vma;
>  	}
>  
> -	return 0;
> +	return vma;
>  }
>  
>  static void lrc_destroy_wa_ctx_obj(struct intel_engine_cs *engine)
>  {
> -	if (engine->wa_ctx.obj) {
> -		i915_gem_object_ggtt_unpin(engine->wa_ctx.obj);
> -		i915_gem_object_put(engine->wa_ctx.obj);
> -		engine->wa_ctx.obj = NULL;
> +	if (engine->wa_ctx.vma) {
> +		i915_vma_unpin(engine->wa_ctx.vma);
> +		i915_gem_object_put(engine->wa_ctx.vma->obj);
> +		engine->wa_ctx.vma = NULL;
>  	}
>  }
>  
>  static int intel_init_workaround_bb(struct intel_engine_cs *engine)
>  {
> -	int ret;
> +	struct i915_ctx_workarounds *wa_ctx = &engine->wa_ctx;
>  	uint32_t *batch;
>  	uint32_t offset;
>  	struct page *page;
> -	struct i915_ctx_workarounds *wa_ctx = &engine->wa_ctx;
> +	int ret;
>  
>  	WARN_ON(engine->id != RCS);
>  
> @@ -1194,20 +1191,22 @@ static int intel_init_workaround_bb(struct intel_engine_cs *engine)
>  	}
>  
>  	/* some WA perform writes to scratch page, ensure it is valid */
> -	if (engine->scratch.obj == NULL) {
> +	if (engine->scratch == NULL) {
>  		DRM_ERROR("scratch page not allocated for %s\n", engine->name);
>  		return -EINVAL;
>  	}
>  
> -	ret = lrc_setup_wa_ctx_obj(engine, PAGE_SIZE);
> -	if (ret) {
> +	wa_ctx->vma = lrc_setup_wa_ctx_obj(engine, PAGE_SIZE);
> +	if (IS_ERR(wa_ctx->vma)) {
> +		ret = PTR_ERR(wa_ctx->vma);
>  		DRM_DEBUG_DRIVER("Failed to setup context WA page: %d\n", ret);
>  		return ret;
>  	}
>  
> -	page = i915_gem_object_get_dirty_page(wa_ctx->obj, 0);
> +	page = i915_gem_object_get_dirty_page(wa_ctx->vma->obj, 0);
>  	batch = kmap_atomic(page);
>  	offset = 0;
> +	ret = 0;
>  
>  	if (IS_GEN8(engine->i915)) {
>  		ret = gen8_init_indirectctx_bb(engine,
> @@ -1464,7 +1463,7 @@ static int gen8_emit_flush_render(struct drm_i915_gem_request *request,
>  {
>  	struct intel_ring *ring = request->ring;
>  	struct intel_engine_cs *engine = request->engine;
> -	u32 scratch_addr = engine->scratch.gtt_offset + 2 * CACHELINE_BYTES;
> +	u32 scratch_addr = engine->scratch->node.start + 2 * CACHELINE_BYTES;
>  	bool vf_flush_wa = false;
>  	u32 flags = 0;
>  	int ret;
> @@ -1650,9 +1649,9 @@ void intel_logical_ring_cleanup(struct intel_engine_cs *engine)
>  
>  	intel_engine_fini_breadcrumbs(engine);
>  
> -	if (engine->status_page.obj) {
> -		i915_gem_object_unpin_map(engine->status_page.obj);
> -		engine->status_page.obj = NULL;
> +	if (engine->status_page.vma) {
> +		i915_gem_object_unpin_map(engine->status_page.vma->obj);
> +		engine->status_page.vma = NULL;
>  	}
>  	intel_lr_context_unpin(dev_priv->kernel_context, engine);
>  
> @@ -1692,19 +1691,19 @@ logical_ring_default_irqs(struct intel_engine_cs *engine, unsigned shift)
>  }
>  
>  static int
> -lrc_setup_hws(struct intel_engine_cs *engine,
> -	      struct drm_i915_gem_object *dctx_obj)
> +lrc_setup_hws(struct intel_engine_cs *engine, struct i915_vma *vma)
>  {
> +#define HWS_OFFSET (LRC_PPHWSP_PN * PAGE_SIZE)
>  	void *hws;
>  
>  	/* The HWSP is part of the default context object in LRC mode. */
> -	engine->status_page.gfx_addr = i915_gem_obj_ggtt_offset(dctx_obj) +
> -				       LRC_PPHWSP_PN * PAGE_SIZE;
> -	hws = i915_gem_object_pin_map(dctx_obj);
> +	hws = i915_gem_object_pin_map(vma->obj);
>  	if (IS_ERR(hws))
>  		return PTR_ERR(hws);
> -	engine->status_page.page_addr = hws + LRC_PPHWSP_PN * PAGE_SIZE;
> -	engine->status_page.obj = dctx_obj;
> +
> +	engine->status_page.page_addr = hws + HWS_OFFSET;
> +	engine->status_page.gfx_addr = vma->node.start + HWS_OFFSET;
> +	engine->status_page.vma = vma;
>  
>  	return 0;
>  }
> @@ -1828,7 +1827,7 @@ logical_ring_init(struct intel_engine_cs *engine)
>  	}
>  
>  	/* And setup the hardware status page. */
> -	ret = lrc_setup_hws(engine, dctx->engine[engine->id].state);
> +	ret = lrc_setup_hws(engine, dctx->engine[engine->id].vma);
>  	if (ret) {
>  		DRM_ERROR("Failed to set up hws %s: %d\n", engine->name, ret);
>  		goto error;
> @@ -2109,9 +2108,9 @@ populate_lr_context(struct i915_gem_context *ctx,
>  			       RING_INDIRECT_CTX(engine->mmio_base), 0);
>  		ASSIGN_CTX_REG(reg_state, CTX_RCS_INDIRECT_CTX_OFFSET,
>  			       RING_INDIRECT_CTX_OFFSET(engine->mmio_base), 0);
> -		if (engine->wa_ctx.obj) {
> +		if (engine->wa_ctx.vma) {
>  			struct i915_ctx_workarounds *wa_ctx = &engine->wa_ctx;
> -			uint32_t ggtt_offset = i915_gem_obj_ggtt_offset(wa_ctx->obj);
> +			uint32_t ggtt_offset = wa_ctx->vma->node.start;
>  
>  			reg_state[CTX_RCS_INDIRECT_CTX+1] =
>  				(ggtt_offset + wa_ctx->indirect_ctx.offset * sizeof(uint32_t)) |
> diff --git a/drivers/gpu/drm/i915/intel_overlay.c b/drivers/gpu/drm/i915/intel_overlay.c
> index 9b0fb7e23cbb..75bdd335d565 100644
> --- a/drivers/gpu/drm/i915/intel_overlay.c
> +++ b/drivers/gpu/drm/i915/intel_overlay.c
> @@ -170,8 +170,8 @@ struct overlay_registers {
>  struct intel_overlay {
>  	struct drm_i915_private *i915;
>  	struct intel_crtc *crtc;
> -	struct drm_i915_gem_object *vid_bo;
> -	struct drm_i915_gem_object *old_vid_bo;
> +	struct drm_i915_gem_object *vid_bo, *old_vid_bo;
> +	struct i915_vma *vid_vma, *old_vid_vma;
>  	bool active;
>  	bool pfit_active;
>  	u32 pfit_vscale_ratio; /* shifted-point number, (1<<12) == 1.0 */
> @@ -316,7 +316,7 @@ static void intel_overlay_release_old_vid_tail(struct intel_overlay *overlay)
>  {
>  	struct drm_i915_gem_object *obj = overlay->old_vid_bo;
>  
> -	i915_gem_object_ggtt_unpin(obj);
> +	i915_gem_object_unpin_from_display_plane(overlay->old_vid_vma);
>  	i915_gem_object_put(obj);
>  
>  	overlay->old_vid_bo = NULL;
> @@ -324,14 +324,13 @@ static void intel_overlay_release_old_vid_tail(struct intel_overlay *overlay)
>  
>  static void intel_overlay_off_tail(struct intel_overlay *overlay)
>  {
> -	struct drm_i915_gem_object *obj = overlay->vid_bo;
> -
>  	/* never have the overlay hw on without showing a frame */
> -	if (WARN_ON(!obj))
> +	if (WARN_ON(overlay->vid_vma))
>  		return;
>  
> -	i915_gem_object_ggtt_unpin(obj);
> -	i915_gem_object_put(obj);
> +	i915_gem_object_unpin_from_display_plane(overlay->vid_vma);
> +	i915_gem_object_put(overlay->vid_bo);
> +	overlay->vid_vma = NULL;
>  	overlay->vid_bo = NULL;
>  
>  	overlay->crtc->overlay = NULL;
> @@ -751,6 +750,7 @@ static int intel_overlay_do_put_image(struct intel_overlay *overlay,
>  	struct drm_i915_private *dev_priv = overlay->i915;
>  	u32 swidth, swidthsw, sheight, ostride;
>  	enum pipe pipe = overlay->crtc->pipe;
> +	struct i915_vma *vma;
>  
>  	lockdep_assert_held(&dev_priv->dev->struct_mutex);
>  	WARN_ON(!drm_modeset_is_locked(&dev_priv->dev->mode_config.connection_mutex));
> @@ -759,10 +759,10 @@ static int intel_overlay_do_put_image(struct intel_overlay *overlay,
>  	if (ret != 0)
>  		return ret;
>  
> -	ret = i915_gem_object_pin_to_display_plane(new_bo, 0,
> +	vma = i915_gem_object_pin_to_display_plane(new_bo, 0,
>  						   &i915_ggtt_view_normal);
> -	if (ret != 0)
> -		return ret;
> +	if (IS_ERR(vma))
> +		return PTR_ERR(vma);
>  
>  	ret = i915_gem_object_put_fence(new_bo);
>  	if (ret)
> @@ -805,7 +805,7 @@ static int intel_overlay_do_put_image(struct intel_overlay *overlay,
>  	swidth = params->src_w;
>  	swidthsw = calc_swidthsw(dev_priv, params->offset_Y, tmp_width);
>  	sheight = params->src_h;
> -	iowrite32(i915_gem_obj_ggtt_offset(new_bo) + params->offset_Y, &regs->OBUF_0Y);
> +	iowrite32(vma->node.start + params->offset_Y, &regs->OBUF_0Y);
>  	ostride = params->stride_Y;
>  
>  	if (params->format & I915_OVERLAY_YUV_PLANAR) {
> @@ -819,8 +819,8 @@ static int intel_overlay_do_put_image(struct intel_overlay *overlay,
>  				      params->src_w/uv_hscale);
>  		swidthsw |= max_t(u32, tmp_U, tmp_V) << 16;
>  		sheight |= (params->src_h/uv_vscale) << 16;
> -		iowrite32(i915_gem_obj_ggtt_offset(new_bo) + params->offset_U, &regs->OBUF_0U);
> -		iowrite32(i915_gem_obj_ggtt_offset(new_bo) + params->offset_V, &regs->OBUF_0V);
> +		iowrite32(vma->node.start + params->offset_U, &regs->OBUF_0U);
> +		iowrite32(vma->node.start + params->offset_V, &regs->OBUF_0V);
>  		ostride |= params->stride_UV << 16;
>  	}
>  
> @@ -845,14 +845,16 @@ static int intel_overlay_do_put_image(struct intel_overlay *overlay,
>  			  INTEL_FRONTBUFFER_OVERLAY(pipe));
>  
>  	overlay->old_vid_bo = overlay->vid_bo;
> +	overlay->old_vid_vma = overlay->vid_vma;
>  	overlay->vid_bo = new_bo;
> +	overlay->vid_vma = vma;
>  
>  	intel_frontbuffer_flip(dev_priv->dev, INTEL_FRONTBUFFER_OVERLAY(pipe));
>  
>  	return 0;
>  
>  out_unpin:
> -	i915_gem_object_ggtt_unpin(new_bo);
> +	i915_gem_object_unpin_from_display_plane(vma);
>  	return ret;
>  }
>  
> @@ -1380,6 +1382,7 @@ void intel_setup_overlay(struct drm_i915_private *dev_priv)
>  	struct intel_overlay *overlay;
>  	struct drm_i915_gem_object *reg_bo;
>  	struct overlay_registers __iomem *regs;
> +	struct i915_vma *vma = NULL;
>  	int ret;
>  
>  	if (!HAS_OVERLAY(dev_priv))
> @@ -1412,13 +1415,14 @@ void intel_setup_overlay(struct drm_i915_private *dev_priv)
>  		}
>  		overlay->flip_addr = reg_bo->phys_handle->busaddr;
>  	} else {
> -		ret = i915_gem_object_ggtt_pin(reg_bo, NULL,
> +		vma = i915_gem_object_ggtt_pin(reg_bo, NULL,
>  					       0, PAGE_SIZE, PIN_MAPPABLE);
> -		if (ret) {
> +		if (IS_ERR(vma)) {
>  			DRM_ERROR("failed to pin overlay register bo\n");
> +			ret = PTR_ERR(vma);
>  			goto out_free_bo;
>  		}
> -		overlay->flip_addr = i915_gem_obj_ggtt_offset(reg_bo);
> +		overlay->flip_addr = vma->node.start;
>  
>  		ret = i915_gem_object_set_to_gtt_domain(reg_bo, true);
>  		if (ret) {
> @@ -1450,8 +1454,8 @@ void intel_setup_overlay(struct drm_i915_private *dev_priv)
>  	return;
>  
>  out_unpin_bo:
> -	if (!OVERLAY_NEEDS_PHYSICAL(dev_priv))
> -		i915_gem_object_ggtt_unpin(reg_bo);
> +	if (vma)
> +		i915_vma_unpin(vma);
>  out_free_bo:
>  	i915_gem_object_put(reg_bo);
>  out_free:
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
> index c8211913f2d6..32add39ee9dd 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.c
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
> @@ -182,7 +182,7 @@ intel_emit_post_sync_nonzero_flush(struct drm_i915_gem_request *req)
>  {
>  	struct intel_ring *ring = req->ring;
>  	u32 scratch_addr =
> -	       	req->engine->scratch.gtt_offset + 2 * CACHELINE_BYTES;
> +	       	req->engine->scratch->node.start + 2 * CACHELINE_BYTES;
>  	int ret;
>  
>  	ret = intel_ring_begin(req, 6);
> @@ -219,7 +219,7 @@ gen6_render_ring_flush(struct drm_i915_gem_request *req,
>  {
>  	struct intel_ring *ring = req->ring;
>  	u32 scratch_addr =
> -	       	req->engine->scratch.gtt_offset + 2 * CACHELINE_BYTES;
> +	       	req->engine->scratch->node.start + 2 * CACHELINE_BYTES;
>  	u32 flags = 0;
>  	int ret;
>  
> @@ -294,7 +294,7 @@ gen7_render_ring_flush(struct drm_i915_gem_request *req,
>  {
>  	struct intel_ring *ring = req->ring;
>  	u32 scratch_addr =
> -	       	req->engine->scratch.gtt_offset + 2 * CACHELINE_BYTES;
> +	       	req->engine->scratch->node.start + 2 * CACHELINE_BYTES;
>  	u32 flags = 0;
>  	int ret;
>  
> @@ -379,7 +379,8 @@ static int
>  gen8_render_ring_flush(struct drm_i915_gem_request *req,
>  		       u32 invalidate_domains, u32 flush_domains)
>  {
> -	u32 scratch_addr = req->engine->scratch.gtt_offset + 2 * CACHELINE_BYTES;
> +	u32 scratch_addr =
> +	       	req->engine->scratch->node.start + 2 * CACHELINE_BYTES;
>  	u32 flags = 0;
>  	int ret;
>  
> @@ -540,7 +541,7 @@ static int init_ring_common(struct intel_engine_cs *engine)
>  {
>  	struct drm_i915_private *dev_priv = engine->i915;
>  	struct intel_ring *ring = engine->buffer;
> -	struct drm_i915_gem_object *obj = ring->obj;
> +	struct i915_vma *vma = ring->vma;
>  	int ret = 0;
>  
>  	intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
> @@ -580,7 +581,7 @@ static int init_ring_common(struct intel_engine_cs *engine)
>  	 * registers with the above sequence (the readback of the HEAD registers
>  	 * also enforces ordering), otherwise the hw might lose the new ring
>  	 * register values. */
> -	I915_WRITE_START(engine, i915_gem_obj_ggtt_offset(obj));
> +	I915_WRITE_START(engine, vma->node.start);
>  
>  	/* WaClearRingBufHeadRegAtInit:ctg,elk */
>  	if (I915_READ_HEAD(engine))
> @@ -595,16 +596,15 @@ static int init_ring_common(struct intel_engine_cs *engine)
>  
>  	/* If the head is still not zero, the ring is dead */
>  	if (wait_for((I915_READ_CTL(engine) & RING_VALID) != 0 &&
> -		     I915_READ_START(engine) == i915_gem_obj_ggtt_offset(obj) &&
> +		     I915_READ_START(engine) == vma->node.start &&
>  		     (I915_READ_HEAD(engine) & HEAD_ADDR) == 0, 50)) {
>  		DRM_ERROR("%s initialization failed "
> -			  "ctl %08x (valid? %d) head %08x tail %08x start %08x [expected %08lx]\n",
> +			  "ctl %08x (valid? %d) head %08x tail %08x start %08x [expected %08x]\n",
>  			  engine->name,
>  			  I915_READ_CTL(engine),
>  			  I915_READ_CTL(engine) & RING_VALID,
>  			  I915_READ_HEAD(engine), I915_READ_TAIL(engine),
> -			  I915_READ_START(engine),
> -			  (unsigned long)i915_gem_obj_ggtt_offset(obj));
> +			  I915_READ_START(engine), (u32)vma->node.start);
>  		ret = -EIO;
>  		goto out;
>  	}
> @@ -624,20 +624,21 @@ out:
>  
>  void intel_fini_pipe_control(struct intel_engine_cs *engine)
>  {
> -	if (engine->scratch.obj == NULL)
> +	if (!engine->scratch)
>  		return;
>  
> -	i915_gem_object_ggtt_unpin(engine->scratch.obj);
> -	i915_gem_object_put(engine->scratch.obj);
> -	engine->scratch.obj = NULL;
> +	i915_vma_unpin(engine->scratch);
> +	i915_gem_object_put(engine->scratch->obj);
> +	engine->scratch = NULL;
>  }
>  
>  int intel_init_pipe_control(struct intel_engine_cs *engine, int size)
>  {
>  	struct drm_i915_gem_object *obj;
> +	struct i915_vma *vma;
>  	int ret;
>  
> -	WARN_ON(engine->scratch.obj);
> +	WARN_ON(engine->scratch);
>  
>  	obj = i915_gem_object_create_stolen(engine->i915->dev, size);
>  	if (obj == NULL)
> @@ -648,18 +649,19 @@ int intel_init_pipe_control(struct intel_engine_cs *engine, int size)
>  		goto err;
>  	}
>  
> -	ret = i915_gem_object_ggtt_pin(obj, NULL, 0, 4096, PIN_HIGH);
> -	if (ret)
> +	vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 4096, PIN_HIGH);
> +	if (IS_ERR(vma)) {
> +		ret = PTR_ERR(vma);
>  		goto err_unref;
> +	}
>  
> -	engine->scratch.obj = obj;
> -	engine->scratch.gtt_offset = i915_gem_obj_ggtt_offset(obj);
> -	DRM_DEBUG_DRIVER("%s pipe control offset: 0x%08x\n",
> -			 engine->name, engine->scratch.gtt_offset);
> +	engine->scratch = vma;
> +	DRM_DEBUG_DRIVER("%s pipe control offset: 0x%08llx\n",
> +			 engine->name, (long long)vma->node.start);
>  	return 0;
>  
>  err_unref:
> -	i915_gem_object_put(engine->scratch.obj);
> +	i915_gem_object_put(obj);
>  err:
>  	return ret;
>  }
> @@ -1217,10 +1219,13 @@ static void render_ring_cleanup(struct intel_engine_cs *engine)
>  {
>  	struct drm_i915_private *dev_priv = engine->i915;
>  
> -	if (dev_priv->semaphore_obj) {
> -		i915_gem_object_ggtt_unpin(dev_priv->semaphore_obj);
> -		i915_gem_object_put(dev_priv->semaphore_obj);
> -		dev_priv->semaphore_obj = NULL;
> +	if (dev_priv->semaphore_vma) {
> +		struct drm_i915_gem_object *obj = dev_priv->semaphore_vma->obj;
> +
> +		i915_vma_unpin(dev_priv->semaphore_vma);
> +		dev_priv->semaphore_vma = NULL;
> +
> +		i915_gem_object_put(obj);
>  	}
>  
>  	intel_fini_pipe_control(engine);
> @@ -1684,7 +1689,7 @@ i830_emit_bb_start(struct drm_i915_gem_request *req,
>  		   unsigned dispatch_flags)
>  {
>  	struct intel_ring *ring = req->ring;
> -	u32 cs_offset = req->engine->scratch.gtt_offset;
> +	u32 cs_offset = req->engine->scratch->node.start;
>  	int ret;
>  
>  	ret = intel_ring_begin(req, 6);
> @@ -1773,67 +1778,68 @@ static void cleanup_phys_status_page(struct intel_engine_cs *engine)
>  
>  static void cleanup_status_page(struct intel_engine_cs *engine)
>  {
> -	struct drm_i915_gem_object *obj;
> +	struct i915_vma *vma;
>  
> -	obj = engine->status_page.obj;
> -	if (obj == NULL)
> +	vma = engine->status_page.vma;
> +	if (vma == NULL)
>  		return;
> +	engine->status_page.vma = NULL;
>  
> -	kunmap(sg_page(obj->pages->sgl));
> -	i915_gem_object_ggtt_unpin(obj);
> -	i915_gem_object_put(obj);
> -	engine->status_page.obj = NULL;
> +	kunmap(sg_page(vma->obj->pages->sgl));
> +	i915_vma_unpin(vma);
>  }
>  
>  static int init_status_page(struct intel_engine_cs *engine)
>  {
> -	struct drm_i915_gem_object *obj = engine->status_page.obj;
> +	struct drm_i915_gem_object *obj;
> +	struct i915_vma *vma;
> +	unsigned flags;
> +	int ret;
>  
> -	if (obj == NULL) {
> -		unsigned flags;
> -		int ret;
> +	if (engine->status_page.vma)
> +		return 0;
>  
> -		obj = i915_gem_object_create(engine->i915->dev, 4096);
> -		if (IS_ERR(obj)) {
> -			DRM_ERROR("Failed to allocate status page\n");
> -			return PTR_ERR(obj);
> -		}
> +	obj = i915_gem_object_create(engine->i915->dev, 4096);
> +	if (IS_ERR(obj)) {
> +		DRM_ERROR("Failed to allocate status page\n");
> +		return PTR_ERR(obj);
> +	}
>  
> -		ret = i915_gem_object_set_cache_level(obj, I915_CACHE_LLC);
> -		if (ret)
> -			goto err_unref;
> -
> -		flags = 0;
> -		if (!HAS_LLC(engine->i915))
> -			/* On g33, we cannot place HWS above 256MiB, so
> -			 * restrict its pinning to the low mappable arena.
> -			 * Though this restriction is not documented for
> -			 * gen4, gen5, or byt, they also behave similarly
> -			 * and hang if the HWS is placed at the top of the
> -			 * GTT. To generalise, it appears that all !llc
> -			 * platforms have issues with us placing the HWS
> -			 * above the mappable region (even though we never
> -			 * actualy map it).
> -			 */
> -			flags |= PIN_MAPPABLE;
> -		ret = i915_gem_object_ggtt_pin(obj, NULL, 0, 4096, flags);
> -		if (ret) {
> -err_unref:
> -			i915_gem_object_put(obj);
> -			return ret;
> -		}
> +	ret = i915_gem_object_set_cache_level(obj, I915_CACHE_LLC);
> +	if (ret)
> +		goto err_unref;
>  
> -		engine->status_page.obj = obj;
> +	flags = 0;
> +	if (!HAS_LLC(engine->i915))
> +		/* On g33, we cannot place HWS above 256MiB, so
> +		 * restrict its pinning to the low mappable arena.
> +		 * Though this restriction is not documented for
> +		 * gen4, gen5, or byt, they also behave similarly
> +		 * and hang if the HWS is placed at the top of the
> +		 * GTT. To generalise, it appears that all !llc
> +		 * platforms have issues with us placing the HWS
> +		 * above the mappable region (even though we never
> +		 * actualy map it).
> +		 */
> +		flags |= PIN_MAPPABLE;
> +	vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 4096, flags);
> +	if (IS_ERR(vma)) {
> +		ret = PTR_ERR(vma);
> +		goto err_unref;
>  	}
>  
> -	engine->status_page.gfx_addr = i915_gem_obj_ggtt_offset(obj);
> +	engine->status_page.vma = vma;
> +	engine->status_page.gfx_addr = vma->node.start;
>  	engine->status_page.page_addr = kmap(sg_page(obj->pages->sgl));
> -	memset(engine->status_page.page_addr, 0, PAGE_SIZE);
>  
>  	DRM_DEBUG_DRIVER("%s hws offset: 0x%08x\n",
>  			engine->name, engine->status_page.gfx_addr);
>  
>  	return 0;
> +
> +err_unref:
> +	i915_gem_object_put(obj);
> +	return ret;
>  }
>  
>  static int init_phys_status_page(struct intel_engine_cs *engine)
> @@ -1857,15 +1863,16 @@ int intel_ring_pin(struct intel_ring *ring)
>  {
>  	struct drm_i915_private *dev_priv = ring->engine->i915;
>  	struct drm_i915_gem_object *obj = ring->obj;
> +	struct i915_vma *vma;
>  	/* Ring wraparound at offset 0 sometimes hangs. No idea why. */
>  	unsigned flags = PIN_OFFSET_BIAS | 4096;
>  	void *addr;
>  	int ret;
>  
>  	if (HAS_LLC(dev_priv) && !obj->stolen) {
> -		ret = i915_gem_object_ggtt_pin(obj, NULL, 0, PAGE_SIZE, flags);
> -		if (ret)
> -			return ret;
> +		vma = i915_gem_object_ggtt_pin(obj, NULL, 0, PAGE_SIZE, flags);
> +		if (IS_ERR(vma))
> +			return PTR_ERR(vma);
>  
>  		ret = i915_gem_object_set_to_cpu_domain(obj, true);
>  		if (ret)
> @@ -1877,10 +1884,10 @@ int intel_ring_pin(struct intel_ring *ring)
>  			goto err_unpin;
>  		}
>  	} else {
> -		ret = i915_gem_object_ggtt_pin(obj, NULL, 0, PAGE_SIZE,
> +		vma = i915_gem_object_ggtt_pin(obj, NULL, 0, PAGE_SIZE,
>  					       flags | PIN_MAPPABLE);
> -		if (ret)
> -			return ret;
> +		if (IS_ERR(vma))
> +			return PTR_ERR(vma);
>  
>  		ret = i915_gem_object_set_to_gtt_domain(obj, true);
>  		if (ret)
> @@ -1889,7 +1896,7 @@ int intel_ring_pin(struct intel_ring *ring)
>  		/* Access through the GTT requires the device to be awake. */
>  		assert_rpm_wakelock_held(dev_priv);
>  
> -		addr = i915_vma_pin_iomap(i915_gem_obj_to_ggtt(obj));
> +		addr = i915_vma_pin_iomap(vma);
>  		if (IS_ERR(addr)) {
>  			ret = PTR_ERR(addr);
>  			goto err_unpin;
> @@ -1897,11 +1904,11 @@ int intel_ring_pin(struct intel_ring *ring)
>  	}
>  
>  	ring->vaddr = addr;
> -	ring->vma = i915_gem_obj_to_ggtt(obj);
> +	ring->vma = vma;
>  	return 0;
>  
>  err_unpin:
> -	i915_gem_object_ggtt_unpin(obj);
> +	i915_vma_unpin(vma);
>  	return ret;
>  }
>  
> @@ -1916,7 +1923,7 @@ void intel_ring_unpin(struct intel_ring *ring)
>  		i915_vma_unpin_iomap(ring->vma);
>  	ring->vaddr = NULL;
>  
> -	i915_gem_object_ggtt_unpin(ring->obj);
> +	i915_vma_unpin(ring->vma);
>  	ring->vma = NULL;
>  }
>  
> @@ -2007,10 +2014,14 @@ static int intel_ring_context_pin(struct i915_gem_context *ctx,
>  		return 0;
>  
>  	if (ce->state) {
> -		ret = i915_gem_object_ggtt_pin(ce->state, NULL, 0,
> +		struct i915_vma *vma;
> +
> +		vma = i915_gem_object_ggtt_pin(ce->state, NULL, 0,
>  					       ctx->ggtt_alignment, PIN_HIGH);
> -		if (ret)
> +		if (vma)
>  			goto error;
> +
> +		ce->vma = vma;
>  	}
>  
>  	/* The kernel context is only used as a placeholder for flushing the
> @@ -2041,8 +2052,8 @@ static void intel_ring_context_unpin(struct i915_gem_context *ctx,
>  	if (--ce->pin_count)
>  		return;
>  
> -	if (ce->state)
> -		i915_gem_object_ggtt_unpin(ce->state);
> +	if (ce->vma)
> +		i915_vma_unpin(ce->vma);
>  
>  	i915_gem_context_put(ctx);
>  }
> @@ -2335,8 +2346,8 @@ void intel_engine_init_seqno(struct intel_engine_cs *engine, u32 seqno)
>  		if (HAS_VEBOX(dev_priv))
>  			I915_WRITE(RING_SYNC_2(engine->mmio_base), 0);
>  	}
> -	if (dev_priv->semaphore_obj) {
> -		struct drm_i915_gem_object *obj = dev_priv->semaphore_obj;
> +	if (dev_priv->semaphore_vma) {
> +		struct drm_i915_gem_object *obj = dev_priv->semaphore_vma->obj;
>  		struct page *page = i915_gem_object_get_dirty_page(obj, 0);
>  		void *semaphores = kmap(page);
>  		memset(semaphores + GEN8_SEMAPHORE_OFFSET(engine->id, 0),
> @@ -2576,16 +2587,20 @@ int intel_init_render_ring_buffer(struct drm_device *dev)
>  				DRM_ERROR("Failed to allocate semaphore bo. Disabling semaphores\n");
>  				i915.semaphores = 0;
>  			} else {
> +				struct i915_vma *vma;
> +
>  				i915_gem_object_set_cache_level(obj, I915_CACHE_LLC);
> -				ret = i915_gem_object_ggtt_pin(obj, NULL,
> +				vma = i915_gem_object_ggtt_pin(obj, NULL,
>  							       0, 0,
>  							       PIN_HIGH);
> -				if (ret != 0) {
> +				if (IS_ERR(vma)) {
>  					i915_gem_object_put(obj);
>  					DRM_ERROR("Failed to pin semaphore bo. Disabling semaphores\n");
>  					i915.semaphores = 0;
> -				} else
> -					dev_priv->semaphore_obj = obj;
> +					vma = NULL;
> +				}
> +
> +				dev_priv->semaphore_vma = vma;
>  			}
>  		}
>  
> @@ -2596,7 +2611,6 @@ int intel_init_render_ring_buffer(struct drm_device *dev)
>  		engine->irq_disable = gen8_ring_disable_irq;
>  		engine->irq_enable_mask = GT_RENDER_USER_INTERRUPT;
>  		if (i915.semaphores) {
> -			WARN_ON(!dev_priv->semaphore_obj);
>  			engine->semaphore.sync_to = gen8_ring_sync;
>  			engine->semaphore.signal = gen8_rcs_signal;
>  			GEN8_RING_SEMAPHORE_INIT(engine);
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
> index d19fb8c24919..934d5722dc27 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.h
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
> @@ -26,10 +26,10 @@
>   */
>  #define I915_RING_FREE_SPACE 64
>  
> -struct  intel_hw_status_page {
> +struct intel_hw_status_page {
>  	u32		*page_addr;
>  	unsigned int	gfx_addr;
> -	struct		drm_i915_gem_object *obj;
> +	struct		i915_vma *vma;
>  };
>  
>  #define I915_READ_TAIL(ring) I915_READ(RING_TAIL((ring)->mmio_base))
> @@ -57,16 +57,13 @@ struct  intel_hw_status_page {
>  #define GEN8_SEMAPHORE_OFFSET(__from, __to)			     \
>  	(((__from) * I915_NUM_ENGINES  + (__to)) * gen8_semaphore_seqno_size)
>  #define GEN8_SIGNAL_OFFSET(__ring, to)			     \
> -	(i915_gem_obj_ggtt_offset(dev_priv->semaphore_obj) + \
> +	(dev_priv->semaphore_vma->node.start + \
>  	 GEN8_SEMAPHORE_OFFSET((__ring)->id, (to)))
>  #define GEN8_WAIT_OFFSET(__ring, from)			     \
> -	(i915_gem_obj_ggtt_offset(dev_priv->semaphore_obj) + \
> +	(dev_priv->semaphore_vma->node.start + \
>  	 GEN8_SEMAPHORE_OFFSET(from, (__ring)->id))
>  
>  #define GEN8_RING_SEMAPHORE_INIT(e) do { \
> -	if (!dev_priv->semaphore_obj) { \
> -		break; \
> -	} \
>  	(e)->semaphore.signal_ggtt[RCS] = GEN8_SIGNAL_OFFSET((e), RCS); \
>  	(e)->semaphore.signal_ggtt[VCS] = GEN8_SIGNAL_OFFSET((e), VCS); \
>  	(e)->semaphore.signal_ggtt[BCS] = GEN8_SIGNAL_OFFSET((e), BCS); \
> @@ -97,8 +94,8 @@ struct intel_engine_hangcheck {
>  
>  struct intel_ring {
>  	struct drm_i915_gem_object *obj;
> -	void *vaddr;
>  	struct i915_vma *vma;
> +	void *vaddr;
>  
>  	struct intel_engine_cs *engine;
>  	struct list_head link;
> @@ -139,7 +136,7 @@ struct  i915_ctx_workarounds {
>  		u32 offset;
>  		u32 size;
>  	} indirect_ctx, per_ctx;
> -	struct drm_i915_gem_object *obj;
> +	struct i915_vma *vma;
>  };
>  
>  struct drm_i915_gem_request;
> @@ -325,10 +322,7 @@ struct intel_engine_cs {
>  
>  	struct intel_engine_hangcheck hangcheck;
>  
> -	struct {
> -		struct drm_i915_gem_object *obj;
> -		u32 gtt_offset;
> -	} scratch;
> +	struct i915_vma *scratch;
>  
>  	bool needs_cmd_parser;
>  
> diff --git a/drivers/gpu/drm/i915/intel_sprite.c b/drivers/gpu/drm/i915/intel_sprite.c
> index 324ccb06397d..99bdbb9e4037 100644
> --- a/drivers/gpu/drm/i915/intel_sprite.c
> +++ b/drivers/gpu/drm/i915/intel_sprite.c
> @@ -462,8 +462,8 @@ vlv_update_plane(struct drm_plane *dplane,
>  
>  	I915_WRITE(SPSIZE(pipe, plane), (crtc_h << 16) | crtc_w);
>  	I915_WRITE(SPCNTR(pipe, plane), sprctl);
> -	I915_WRITE(SPSURF(pipe, plane), i915_gem_obj_ggtt_offset(obj) +
> -		   sprsurf_offset);
> +	I915_WRITE(SPSURF(pipe, plane),
> +		   i915_gem_object_ggtt_offset(obj, NULL) + sprsurf_offset);
>  	POSTING_READ(SPSURF(pipe, plane));
>  }
>  
> @@ -602,7 +602,7 @@ ivb_update_plane(struct drm_plane *plane,
>  		I915_WRITE(SPRSCALE(pipe), sprscale);
>  	I915_WRITE(SPRCTL(pipe), sprctl);
>  	I915_WRITE(SPRSURF(pipe),
> -		   i915_gem_obj_ggtt_offset(obj) + sprsurf_offset);
> +		   i915_gem_object_ggtt_offset(obj, NULL) + sprsurf_offset);
>  	POSTING_READ(SPRSURF(pipe));
>  }
>  
> @@ -731,7 +731,7 @@ ilk_update_plane(struct drm_plane *plane,
>  	I915_WRITE(DVSSCALE(pipe), dvsscale);
>  	I915_WRITE(DVSCNTR(pipe), dvscntr);
>  	I915_WRITE(DVSSURF(pipe),
> -		   i915_gem_obj_ggtt_offset(obj) + dvssurf_offset);
> +		   i915_gem_object_ggtt_offset(obj, NULL) + dvssurf_offset);
>  	POSTING_READ(DVSSURF(pipe));
>  }
>  
> -- 
> 2.8.1
> 
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/intel-gfx

-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH 29/38] drm/i915: Remove locking for get_tiling
  2016-06-08 10:02   ` Daniel Vetter
@ 2016-06-08 10:11     ` Chris Wilson
  2016-06-13 14:19       ` Daniel Vetter
  0 siblings, 1 reply; 58+ messages in thread
From: Chris Wilson @ 2016-06-08 10:11 UTC (permalink / raw)
  To: Daniel Vetter; +Cc: intel-gfx

On Wed, Jun 08, 2016 at 12:02:01PM +0200, Daniel Vetter wrote:
> On Fri, Jun 03, 2016 at 05:55:44PM +0100, Chris Wilson wrote:
> > Since we are not concerned with userspace racing itself with set-tiling
> > (the order is indeterminant even if we take a lock), then we can safely
> > read back the single obj->tiling_mode and do the static lookup of
> > swizzle mode without having to take a lock.
> > 
> > get-tiling is reasonably frequent due to the back-channel passing around
> > of tiling parameters in DRI2/DRI3.
> > 
> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > ---
> >  drivers/gpu/drm/i915/i915_gem_tiling.c | 8 ++------
> >  1 file changed, 2 insertions(+), 6 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/i915/i915_gem_tiling.c b/drivers/gpu/drm/i915/i915_gem_tiling.c
> > index 326de7eae101..d6acd0a27c06 100644
> > --- a/drivers/gpu/drm/i915/i915_gem_tiling.c
> > +++ b/drivers/gpu/drm/i915/i915_gem_tiling.c
> > @@ -302,10 +302,8 @@ i915_gem_get_tiling(struct drm_device *dev, void *data,
> >  	if (!obj)
> >  		return -ENOENT;
> >  
> > -	mutex_lock(&dev->struct_mutex);
> > -
> >  	args->tiling_mode = obj->tiling_mode;
> 
> READ_ONCE here. With that Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch>

obj->tiling_mode is still a bitfield. Not yet convinced of extracting
it, but avoiding the lock for get_tiling is useful.
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH 32/38] drm/i915: Stop the machine whilst capturing the GPU crash dump
  2016-06-08 10:06   ` Daniel Vetter
@ 2016-06-08 11:37     ` Chris Wilson
  0 siblings, 0 replies; 58+ messages in thread
From: Chris Wilson @ 2016-06-08 11:37 UTC (permalink / raw)
  To: Daniel Vetter; +Cc: intel-gfx

On Wed, Jun 08, 2016 at 12:06:05PM +0200, Daniel Vetter wrote:
> On Fri, Jun 03, 2016 at 05:55:47PM +0100, Chris Wilson wrote:
> > The error state is purposefully racy as we expect it to be called at any
> > time and so have avoided any locking whilst capturing the crash dump.
> > However, with multi-engine GPUs and multiple CPUs, those races can
> > manifest into OOPSes as we attempt to chase dangling pointers freed on
> > other CPUs. Under discussion are lots of ways to slow down normal
> > operation in order to protect the post-mortem error capture, but what it
> > we take the opposite approach and freeze the machine whilst the error
> > capture runs (note the GPU may still running, but as long as we don't
> > process any of the results the driver's bookkeeping will be static).
> > 
> > Note that by of itself, this is not a complete fix. It also depends on
> > the compiler barriers in list_add/list_del to prevent traversing the
> > lists into the void.
> > 
> > v2: Avoid drm_clflush_pages() inside stop_machine() as it may use
> > stop_machine() itself for its wbinvd fallback.
> > 
> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> 
> rt folks will hate us for this I think. But yeah the only other options is
> mass-rcu-ifying everything, which is much more fragile. Ack on the general
> idea at least, need to look at what's all needed for list manipulation
> still.

General answer, if you have a problem with a GPU hang talk to whoever
caused it and tell them to stop ;)

Last inspection of list.h, suggests they are safe for our usage:

static inline void __list_del(struct list_head * prev, struct list_head * next)
{
	next->prev = prev;
	WRITE_ONCE(prev->next, next);
}


static inline void __list_add(struct list_head *new,
                              struct list_head *prev,
                              struct list_head *next)
{
        next->prev = new;
        new->next = next;
        new->prev = prev;
        WRITE_ONCE(prev->next, new);
}

i.e. they have gained the compiler barriers to prevent us from seeing a
partial list manipulation (they are basically RCU-safe by default now).

I also do passes over the error capture trying to minimise our list
usage so that we only have to verify the request list (and all GPU state
associated with the request should then be derivable from the request).
E.g that saves having to iterate over the context lists looking for the
request->ctx!
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH 10/38] drm/i915: Remove highly confusing i915_gem_obj_ggtt_pin()
  2016-06-08  9:43   ` Daniel Vetter
@ 2016-06-08 12:58     ` Chris Wilson
  0 siblings, 0 replies; 58+ messages in thread
From: Chris Wilson @ 2016-06-08 12:58 UTC (permalink / raw)
  To: Daniel Vetter; +Cc: intel-gfx

On Wed, Jun 08, 2016 at 11:43:57AM +0200, Daniel Vetter wrote:
> On Fri, Jun 03, 2016 at 05:55:25PM +0100, Chris Wilson wrote:
> > Since i915_gem_obj_ggtt_pin() is an idiom breaking curry function for
> > i915_gem_object_ggtt_pin(), spare us the confustion and remove it.
> > Removing it now simplifies later patches to change the i915_vma_pin()
> > (and friends) interface.
> > 
> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> 
> Diff looks like accidentally squashed in speed-up to help gcc along with
> bitfields in vma. Needs to be unsquashed.

That change was made a few months ago, at least outside of reflog's
history. I guess I got fed up of having a few patches doing very small
overlapping tasks of changing the function prototypes.
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH 29/38] drm/i915: Remove locking for get_tiling
  2016-06-08 10:11     ` Chris Wilson
@ 2016-06-13 14:19       ` Daniel Vetter
  0 siblings, 0 replies; 58+ messages in thread
From: Daniel Vetter @ 2016-06-13 14:19 UTC (permalink / raw)
  To: Chris Wilson, Daniel Vetter, intel-gfx

On Wed, Jun 08, 2016 at 11:11:07AM +0100, Chris Wilson wrote:
> On Wed, Jun 08, 2016 at 12:02:01PM +0200, Daniel Vetter wrote:
> > On Fri, Jun 03, 2016 at 05:55:44PM +0100, Chris Wilson wrote:
> > > Since we are not concerned with userspace racing itself with set-tiling
> > > (the order is indeterminant even if we take a lock), then we can safely
> > > read back the single obj->tiling_mode and do the static lookup of
> > > swizzle mode without having to take a lock.
> > > 
> > > get-tiling is reasonably frequent due to the back-channel passing around
> > > of tiling parameters in DRI2/DRI3.
> > > 
> > > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > > ---
> > >  drivers/gpu/drm/i915/i915_gem_tiling.c | 8 ++------
> > >  1 file changed, 2 insertions(+), 6 deletions(-)
> > > 
> > > diff --git a/drivers/gpu/drm/i915/i915_gem_tiling.c b/drivers/gpu/drm/i915/i915_gem_tiling.c
> > > index 326de7eae101..d6acd0a27c06 100644
> > > --- a/drivers/gpu/drm/i915/i915_gem_tiling.c
> > > +++ b/drivers/gpu/drm/i915/i915_gem_tiling.c
> > > @@ -302,10 +302,8 @@ i915_gem_get_tiling(struct drm_device *dev, void *data,
> > >  	if (!obj)
> > >  		return -ENOENT;
> > >  
> > > -	mutex_lock(&dev->struct_mutex);
> > > -
> > >  	args->tiling_mode = obj->tiling_mode;
> > 
> > READ_ONCE here. With that Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch>
> 
> obj->tiling_mode is still a bitfield. Not yet convinced of extracting
> it, but avoiding the lock for get_tiling is useful.

With all the recent discussions the past years about gcc becoming more and
more creative with exploiting the undefined parts of C99 I'm just a bit
paranoid. Would be awesome if we could unbitfield this one beforehand ...
-Daniel
-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 58+ messages in thread

end of thread, other threads:[~2016-06-13 14:19 UTC | newest]

Thread overview: 58+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2016-06-03 16:55 Tracking VMA Chris Wilson
2016-06-03 16:55 ` [PATCH 01/38] drm/i915: Combine loops within i915_gem_evict_something Chris Wilson
2016-06-03 16:55 ` [PATCH 02/38] drm/i915: Remove surplus drm_device parameter to i915_gem_evict_something() Chris Wilson
2016-06-03 16:55 ` [PATCH 03/38] drm/i915: Double check the active status on the batch pool Chris Wilson
2016-06-03 16:55 ` [PATCH 04/38] drm/i915: Remove request retirement before each batch Chris Wilson
2016-06-06 13:40   ` Mika Kuoppala
2016-06-03 16:55 ` [PATCH 05/38] drm/i915: Remove i915_gem_execbuffer_retire_commands() Chris Wilson
2016-06-06 14:26   ` Mika Kuoppala
2016-06-03 16:55 ` [PATCH 06/38] drm/i915: Pad GTT views of exec objects up to user specified size Chris Wilson
2016-06-08  9:41   ` Daniel Vetter
2016-06-08 10:08     ` Chris Wilson
2016-06-03 16:55 ` [PATCH 07/38] drm/i915: Split insertion/binding of an object into the VM Chris Wilson
2016-06-03 16:55 ` [PATCH 08/38] drm/i915: Record allocated vma size Chris Wilson
2016-06-03 16:55 ` [PATCH 09/38] drm/i915: Start passing around i915_vma from execbuffer Chris Wilson
2016-06-03 16:55 ` [PATCH 10/38] drm/i915: Remove highly confusing i915_gem_obj_ggtt_pin() Chris Wilson
2016-06-08  9:43   ` Daniel Vetter
2016-06-08 12:58     ` Chris Wilson
2016-06-03 16:55 ` [PATCH 11/38] drm/i915: Make fb_tracking.lock a spinlock Chris Wilson
2016-06-03 16:55 ` [PATCH 12/38] drm/i915: Use atomics to manipulate obj->frontbuffer_bits Chris Wilson
2016-06-03 16:55 ` [PATCH 13/38] drm/i915: Move obj->active:5 to obj->flags Chris Wilson
2016-06-08  9:53   ` Daniel Vetter
2016-06-03 16:55 ` [PATCH 14/38] drm/i915: Move i915_gem_object_wait_rendering() Chris Wilson
2016-06-03 16:55 ` [PATCH 15/38] drm/i915: Mark all current requests as complete before resetting them Chris Wilson
2016-06-03 16:55 ` [PATCH 16/38] drm/i915: Enable lockless lookup of request tracking via RCU Chris Wilson
2016-06-03 16:55 ` [PATCH 17/38] drm/i915: Introduce i915_gem_active_wait_unlocked() Chris Wilson
2016-06-03 16:55 ` [PATCH 18/38] drm/i915: Convert non-blocking waits for requests over to using RCU Chris Wilson
2016-06-03 16:55 ` [PATCH 19/38] drm/i915: Convert non-blocking userptr " Chris Wilson
2016-06-03 16:55 ` [PATCH 20/38] drm/i915/userptr: Remove superfluous interruptible=false on waiting Chris Wilson
2016-06-03 16:55 ` [PATCH 21/38] drm/i915: Avoid requiring struct_mutex during suspend Chris Wilson
2016-06-03 16:55 ` [PATCH 22/38] drm/gem/shrinker: Wait before acquiring struct_mutex under oom Chris Wilson
2016-06-08  9:57   ` Daniel Vetter
2016-06-08 10:04     ` Chris Wilson
2016-06-03 16:55 ` [PATCH 23/38] suspend Chris Wilson
2016-06-03 16:55 ` [PATCH 24/38] drm/i915: Do a nonblocking wait first in pread/pwrite Chris Wilson
2016-06-03 16:55 ` [PATCH 25/38] drm/i915: Remove (struct_mutex) locking for wait-ioctl Chris Wilson
2016-06-03 16:55 ` [PATCH 26/38] drm/i915: Remove (struct_mutex) locking for busy-ioctl Chris Wilson
2016-06-03 16:55 ` [PATCH 27/38] drm/i915: Reduce locking inside swfinish ioctl Chris Wilson
2016-06-08  9:59   ` Daniel Vetter
2016-06-08 10:03     ` Chris Wilson
2016-06-03 16:55 ` [PATCH 28/38] drm/i915: Remove pinned check from madvise ioctl Chris Wilson
2016-06-08 10:01   ` Daniel Vetter
2016-06-03 16:55 ` [PATCH 29/38] drm/i915: Remove locking for get_tiling Chris Wilson
2016-06-08 10:02   ` Daniel Vetter
2016-06-08 10:11     ` Chris Wilson
2016-06-13 14:19       ` Daniel Vetter
2016-06-03 16:55 ` [PATCH 30/38] drm/i915: Assert that the request hasn't been retired Chris Wilson
2016-06-03 16:55 ` [PATCH 31/38] drm/i915: Reduce amount of duplicate buffer information captured on error Chris Wilson
2016-06-03 16:55 ` [PATCH 32/38] drm/i915: Stop the machine whilst capturing the GPU crash dump Chris Wilson
2016-06-08 10:06   ` Daniel Vetter
2016-06-08 11:37     ` Chris Wilson
2016-06-03 16:55 ` [PATCH 33/38] drm/i915: Scan GGTT active list for context object Chris Wilson
2016-06-03 16:55 ` [PATCH 34/38] drm/i915: Move setting of request->batch into its single callsite Chris Wilson
2016-06-03 16:55 ` [PATCH 35/38] drm/i915: Mark unmappable GGTT entries as PIN_HIGH Chris Wilson
2016-06-03 16:55 ` [PATCH 36/38] drm/i915: Track pinned vma inside guc Chris Wilson
2016-06-03 16:55 ` [PATCH 37/38] drm/i915: Track pinned VMA Chris Wilson
2016-06-08 10:08   ` Daniel Vetter
2016-06-03 16:55 ` [PATCH 38/38] drm/i915/overlay: Use VMA as the primary tracker for images Chris Wilson
2016-06-06 10:42 ` ✗ Ro.CI.BAT: failure for series starting with [01/38] drm/i915: Combine loops within i915_gem_evict_something Patchwork

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.