Getting to RCU and exporting fences

All of lore.kernel.org
 help / color / mirror / Atom feed

* Getting to RCU and exporting fences
@ 2016-07-27 11:14 Chris Wilson
  2016-07-27 11:14 ` [PATCH 01/22] drm/i915: Combine loops within i915_gem_evict_something Chris Wilson
                   ` (23 more replies)
  0 siblings, 24 replies; 95+ messages in thread
From: Chris Wilson @ 2016-07-27 11:14 UTC (permalink / raw)
  To: intel-gfx

We're starting to transition to VMA fixes now, and in the process take a
few steps enabling RCU fences.
-Chris

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 95+ messages in thread

* [PATCH 01/22] drm/i915: Combine loops within i915_gem_evict_something
  2016-07-27 11:14 Getting to RCU and exporting fences Chris Wilson
@ 2016-07-27 11:14 ` Chris Wilson
  2016-07-29  6:17   ` Joonas Lahtinen
  2016-07-27 11:14 ` [PATCH 02/22] drm/i915: Remove surplus drm_device parameter to i915_gem_evict_something() Chris Wilson
                   ` (22 subsequent siblings)
  23 siblings, 1 reply; 95+ messages in thread
From: Chris Wilson @ 2016-07-27 11:14 UTC (permalink / raw)
  To: intel-gfx

Slight micro-optimise to produce combine loops so that gcc is able to
optimise the inner-loops concisely. Since we are reviewing the loops, we
can update the comments to describe the current state of affairs, in
particular the distinction between evicting from the global GTT (which
may contain untracked items and transient global pins) and the
per-process GTT.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_gem_evict.c | 143 +++++++++++++++++-----------------
 1 file changed, 70 insertions(+), 73 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c b/drivers/gpu/drm/i915/i915_gem_evict.c
index 3437ced76cb6..016be7316676 100644
--- a/drivers/gpu/drm/i915/i915_gem_evict.c
+++ b/drivers/gpu/drm/i915/i915_gem_evict.c
@@ -34,6 +34,19 @@
 #include "i915_trace.h"
 
 static bool
+gpu_is_idle(struct drm_i915_private *dev_priv)
+{
+	struct intel_engine_cs *engine;
+
+	for_each_engine(engine, dev_priv) {
+		if (!list_empty(&engine->request_list))
+			return false;
+	}
+
+	return true;
+}
+
+static bool
 mark_free(struct i915_vma *vma, struct list_head *unwind)
 {
 	if (vma->pin_count)
@@ -76,37 +89,31 @@ i915_gem_evict_something(struct drm_device *dev, struct i915_address_space *vm,
 			 unsigned long start, unsigned long end,
 			 unsigned flags)
 {
-	struct list_head eviction_list, unwind_list;
-	struct i915_vma *vma;
-	int ret = 0;
-	int pass = 0;
+	struct drm_i915_private *dev_priv = to_i915(dev);
+	struct list_head eviction_list;
+	struct list_head *phases[] = {
+		&vm->inactive_list,
+		&vm->active_list,
+		NULL,
+	}, **phase;
+	struct i915_vma *vma, *next;
+	int ret;
 
 	trace_i915_gem_evict(dev, min_size, alignment, flags);
 
 	/*
 	 * The goal is to evict objects and amalgamate space in LRU order.
 	 * The oldest idle objects reside on the inactive list, which is in
-	 * retirement order. The next objects to retire are those on the (per
-	 * ring) active list that do not have an outstanding flush. Once the
-	 * hardware reports completion (the seqno is updated after the
-	 * batchbuffer has been finished) the clean buffer objects would
-	 * be retired to the inactive list. Any dirty objects would be added
-	 * to the tail of the flushing list. So after processing the clean
-	 * active objects we need to emit a MI_FLUSH to retire the flushing
-	 * list, hence the retirement order of the flushing list is in
-	 * advance of the dirty objects on the active lists.
+	 * retirement order. The next objects to retire are those in flight,
+	 * on the active list, again in retirement order.
 	 *
 	 * The retirement sequence is thus:
 	 *   1. Inactive objects (already retired)
-	 *   2. Clean active objects
-	 *   3. Flushing list
-	 *   4. Dirty active objects.
+	 *   2. Active objects (will stall on unbinding)
 	 *
 	 * On each list, the oldest objects lie at the HEAD with the freshest
 	 * object on the TAIL.
 	 */
-
-	INIT_LIST_HEAD(&unwind_list);
 	if (start != 0 || end != vm->total) {
 		drm_mm_init_scan_with_range(&vm->mm, min_size,
 					    alignment, cache_level,
@@ -114,79 +121,71 @@ i915_gem_evict_something(struct drm_device *dev, struct i915_address_space *vm,
 	} else
 		drm_mm_init_scan(&vm->mm, min_size, alignment, cache_level);
 
-search_again:
-	/* First see if there is a large enough contiguous idle region... */
-	list_for_each_entry(vma, &vm->inactive_list, vm_link) {
-		if (mark_free(vma, &unwind_list))
-			goto found;
-	}
-
 	if (flags & PIN_NONBLOCK)
-		goto none;
+		phases[1] = NULL;
 
-	/* Now merge in the soon-to-be-expired objects... */
-	list_for_each_entry(vma, &vm->active_list, vm_link) {
-		if (mark_free(vma, &unwind_list))
-			goto found;
-	}
+search_again:
+	INIT_LIST_HEAD(&eviction_list);
+	phase = phases;
+	do {
+		list_for_each_entry(vma, *phase, vm_link)
+			if (mark_free(vma, &eviction_list))
+				goto found;
+	} while (*++phase);
 
-none:
 	/* Nothing found, clean up and bail out! */
-	while (!list_empty(&unwind_list)) {
-		vma = list_first_entry(&unwind_list,
-				       struct i915_vma,
-				       exec_list);
+	list_for_each_entry_safe(vma, next, &eviction_list, exec_list) {
 		ret = drm_mm_scan_remove_block(&vma->node);
 		BUG_ON(ret);
 
-		list_del_init(&vma->exec_list);
+		INIT_LIST_HEAD(&vma->exec_list);
 	}
 
 	/* Can we unpin some objects such as idle hw contents,
-	 * or pending flips?
+	 * or pending flips? But since only the GGTT has global entries
+	 * such as scanouts, rinbuffers and contexts, we can skip the
+	 * purge when inspecting per-process local address spaces.
 	 */
-	if (flags & PIN_NONBLOCK)
+	if (!i915_is_ggtt(vm) || flags & PIN_NONBLOCK)
 		return -ENOSPC;
 
-	/* Only idle the GPU and repeat the search once */
-	if (pass++ == 0) {
-		struct drm_i915_private *dev_priv = to_i915(dev);
-
-		if (i915_is_ggtt(vm)) {
-			ret = i915_gem_switch_to_kernel_context(dev_priv);
-			if (ret)
-				return ret;
-		}
-
-		ret = i915_gem_wait_for_idle(dev_priv);
-		if (ret)
-			return ret;
-
-		i915_gem_retire_requests(dev_priv);
-		goto search_again;
+	if (gpu_is_idle(dev_priv)) {
+		/* If we still have pending pageflip completions, drop
+		 * back to userspace to give our workqueues time to
+		 * acquire our locks and unpin the old scanouts.
+		 */
+		return intel_has_pending_fb_unpin(dev) ? -EAGAIN : -ENOSPC;
 	}
 
-	/* If we still have pending pageflip completions, drop
-	 * back to userspace to give our workqueues time to
-	 * acquire our locks and unpin the old scanouts.
+	/* Not everything in the GGTT is tracked via vma (otherwise we
+	 * could evict as required with minimal stalling) so we are forced
+	 * to idle the GPU and explicitly retire outstanding requests in
+	 * the hopes that we can then remove contexts and the like only
+	 * bound by their active reference.
 	 */
-	return intel_has_pending_fb_unpin(dev) ? -EAGAIN : -ENOSPC;
+	ret = i915_gem_switch_to_kernel_context(dev_priv);
+	if (ret)
+		return ret;
+
+	ret = i915_gem_wait_for_idle(dev_priv);
+	if (ret)
+		return ret;
+
+	i915_gem_retire_requests(dev_priv);
+	goto search_again;
 
 found:
 	/* drm_mm doesn't allow any other other operations while
-	 * scanning, therefore store to be evicted objects on a
-	 * temporary list. */
-	INIT_LIST_HEAD(&eviction_list);
-	while (!list_empty(&unwind_list)) {
-		vma = list_first_entry(&unwind_list,
-				       struct i915_vma,
-				       exec_list);
-		if (drm_mm_scan_remove_block(&vma->node)) {
+	 * scanning, therefore store to-be-evicted objects on a
+	 * temporary list and take a reference for all before
+	 * calling unbind (which may remove the active reference
+	 * of any of our objects, thus corrupting the list).
+	 */
+	list_for_each_entry_safe(vma, next, &eviction_list, exec_list) {
+		if (drm_mm_scan_remove_block(&vma->node))
 			vma->pin_count++;
-			list_move(&vma->exec_list, &eviction_list);
-			continue;
-		}
-		list_del_init(&vma->exec_list);
+		else
+			list_del_init(&vma->exec_list);
 	}
 
 	/* Unbinding will emit any required flushes */
@@ -200,7 +199,6 @@ found:
 		if (ret == 0)
 			ret = i915_vma_unbind(vma);
 	}
-
 	return ret;
 }
 
@@ -279,7 +277,6 @@ int i915_gem_evict_vm(struct i915_address_space *vm, bool do_idle)
 			return ret;
 
 		i915_gem_retire_requests(dev_priv);
-
 		WARN_ON(!list_empty(&vm->active_list));
 	}
 
-- 
2.8.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 95+ messages in thread

* [PATCH 02/22] drm/i915: Remove surplus drm_device parameter to i915_gem_evict_something()
  2016-07-27 11:14 Getting to RCU and exporting fences Chris Wilson
  2016-07-27 11:14 ` [PATCH 01/22] drm/i915: Combine loops within i915_gem_evict_something Chris Wilson
@ 2016-07-27 11:14 ` Chris Wilson
  2016-07-28  8:07   ` Joonas Lahtinen
  2016-07-27 11:14 ` [PATCH 03/22] drm/i915: Double check the active status on the batch pool Chris Wilson
                   ` (21 subsequent siblings)
  23 siblings, 1 reply; 95+ messages in thread
From: Chris Wilson @ 2016-07-27 11:14 UTC (permalink / raw)
  To: intel-gfx

Eviction is VM local, so we can ignore the significance of the
drm_device in the caller, and leave it to i915_gem_evict_something() to
manager itself.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_drv.h       |  3 +--
 drivers/gpu/drm/i915/i915_gem.c       |  2 +-
 drivers/gpu/drm/i915/i915_gem_evict.c |  9 ++++-----
 drivers/gpu/drm/i915/i915_gem_gtt.c   |  2 +-
 drivers/gpu/drm/i915/i915_trace.h     | 14 ++++++++------
 5 files changed, 15 insertions(+), 15 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 66b98fa4715a..fbda38f25c6b 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -3398,8 +3398,7 @@ int i915_gem_context_reset_stats_ioctl(struct drm_device *dev, void *data,
 				       struct drm_file *file);
 
 /* i915_gem_evict.c */
-int __must_check i915_gem_evict_something(struct drm_device *dev,
-					  struct i915_address_space *vm,
+int __must_check i915_gem_evict_something(struct i915_address_space *vm,
 					  int min_size,
 					  unsigned alignment,
 					  unsigned cache_level,
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index e3278f4e1ad2..bf652dc88024 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -3093,7 +3093,7 @@ search_free:
 							  search_flag,
 							  alloc_flag);
 		if (ret) {
-			ret = i915_gem_evict_something(dev, vm, size, alignment,
+			ret = i915_gem_evict_something(vm, size, alignment,
 						       obj->cache_level,
 						       start, end,
 						       flags);
diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c b/drivers/gpu/drm/i915/i915_gem_evict.c
index 016be7316676..4bce72fa14c4 100644
--- a/drivers/gpu/drm/i915/i915_gem_evict.c
+++ b/drivers/gpu/drm/i915/i915_gem_evict.c
@@ -61,7 +61,6 @@ mark_free(struct i915_vma *vma, struct list_head *unwind)
 
 /**
  * i915_gem_evict_something - Evict vmas to make room for binding a new one
- * @dev: drm_device
  * @vm: address space to evict from
  * @min_size: size of the desired free space
  * @alignment: alignment constraint of the desired free space
@@ -84,12 +83,12 @@ mark_free(struct i915_vma *vma, struct list_head *unwind)
  * memory in e.g. the shrinker.
  */
 int
-i915_gem_evict_something(struct drm_device *dev, struct i915_address_space *vm,
+i915_gem_evict_something(struct i915_address_space *vm,
 			 int min_size, unsigned alignment, unsigned cache_level,
 			 unsigned long start, unsigned long end,
 			 unsigned flags)
 {
-	struct drm_i915_private *dev_priv = to_i915(dev);
+	struct drm_i915_private *dev_priv = to_i915(vm->dev);
 	struct list_head eviction_list;
 	struct list_head *phases[] = {
 		&vm->inactive_list,
@@ -99,7 +98,7 @@ i915_gem_evict_something(struct drm_device *dev, struct i915_address_space *vm,
 	struct i915_vma *vma, *next;
 	int ret;
 
-	trace_i915_gem_evict(dev, min_size, alignment, flags);
+	trace_i915_gem_evict(vm, min_size, alignment, flags);
 
 	/*
 	 * The goal is to evict objects and amalgamate space in LRU order.
@@ -154,7 +153,7 @@ search_again:
 		 * back to userspace to give our workqueues time to
 		 * acquire our locks and unpin the old scanouts.
 		 */
-		return intel_has_pending_fb_unpin(dev) ? -EAGAIN : -ENOSPC;
+		return intel_has_pending_fb_unpin(vm->dev) ? -EAGAIN : -ENOSPC;
 	}
 
 	/* Not everything in the GGTT is tracked via vma (otherwise we
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 59ecaf2c8bf8..5869fa074009 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -2012,7 +2012,7 @@ alloc:
 						  0, ggtt->base.total,
 						  DRM_MM_TOPDOWN);
 	if (ret == -ENOSPC && !retried) {
-		ret = i915_gem_evict_something(dev, &ggtt->base,
+		ret = i915_gem_evict_something(&ggtt->base,
 					       GEN6_PD_SIZE, GEN6_PD_ALIGN,
 					       I915_CACHE_NONE,
 					       0, ggtt->base.total,
diff --git a/drivers/gpu/drm/i915/i915_trace.h b/drivers/gpu/drm/i915/i915_trace.h
index 9e43c0aa6e3b..178798002a73 100644
--- a/drivers/gpu/drm/i915/i915_trace.h
+++ b/drivers/gpu/drm/i915/i915_trace.h
@@ -394,25 +394,27 @@ DEFINE_EVENT(i915_gem_object, i915_gem_object_destroy,
 );
 
 TRACE_EVENT(i915_gem_evict,
-	    TP_PROTO(struct drm_device *dev, u32 size, u32 align, unsigned flags),
-	    TP_ARGS(dev, size, align, flags),
+	    TP_PROTO(struct i915_address_space *vm, u32 size, u32 align, unsigned int flags),
+	    TP_ARGS(vm, size, align, flags),
 
 	    TP_STRUCT__entry(
 			     __field(u32, dev)
+			     __field(struct i915_address_space *, vm)
 			     __field(u32, size)
 			     __field(u32, align)
-			     __field(unsigned, flags)
+			     __field(unsigned int, flags)
 			    ),
 
 	    TP_fast_assign(
-			   __entry->dev = dev->primary->index;
+			   __entry->dev = vm->dev->primary->index;
+			   __entry->vm = vm;
 			   __entry->size = size;
 			   __entry->align = align;
 			   __entry->flags = flags;
 			  ),
 
-	    TP_printk("dev=%d, size=%d, align=%d %s",
-		      __entry->dev, __entry->size, __entry->align,
+	    TP_printk("dev=%d, vm=%p, size=%d, align=%d %s",
+		      __entry->dev, __entry->vm, __entry->size, __entry->align,
 		      __entry->flags & PIN_MAPPABLE ? ", mappable" : "")
 );
 
-- 
2.8.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 95+ messages in thread

* [PATCH 03/22] drm/i915: Double check the active status on the batch pool
  2016-07-27 11:14 Getting to RCU and exporting fences Chris Wilson
  2016-07-27 11:14 ` [PATCH 01/22] drm/i915: Combine loops within i915_gem_evict_something Chris Wilson
  2016-07-27 11:14 ` [PATCH 02/22] drm/i915: Remove surplus drm_device parameter to i915_gem_evict_something() Chris Wilson
@ 2016-07-27 11:14 ` Chris Wilson
  2016-07-28  8:14   ` Joonas Lahtinen
  2016-07-27 11:14 ` [PATCH 04/22] drm/i915: Remove request retirement before each batch Chris Wilson
                   ` (20 subsequent siblings)
  23 siblings, 1 reply; 95+ messages in thread
From: Chris Wilson @ 2016-07-27 11:14 UTC (permalink / raw)
  To: intel-gfx

We should not rely on obj->active being uptodate unless we manually
flush it. Instead, we can verify that the next available batch object is
idle by looking at its last active request (and checking it for
completion).

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_gem_batch_pool.c | 15 ++++++++-------
 drivers/gpu/drm/i915/i915_gem_batch_pool.h |  7 +++++--
 drivers/gpu/drm/i915/intel_engine_cs.c     |  2 +-
 3 files changed, 14 insertions(+), 10 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_batch_pool.c b/drivers/gpu/drm/i915/i915_gem_batch_pool.c
index 825981b5aa40..ed989596d9a3 100644
--- a/drivers/gpu/drm/i915/i915_gem_batch_pool.c
+++ b/drivers/gpu/drm/i915/i915_gem_batch_pool.c
@@ -41,15 +41,15 @@
 
 /**
  * i915_gem_batch_pool_init() - initialize a batch buffer pool
- * @dev: the drm device
+ * @engine: the associated request submission engine
  * @pool: the batch buffer pool
  */
-void i915_gem_batch_pool_init(struct drm_device *dev,
+void i915_gem_batch_pool_init(struct intel_engine_cs *engine,
 			      struct i915_gem_batch_pool *pool)
 {
 	int n;
 
-	pool->dev = dev;
+	pool->engine = engine;
 
 	for (n = 0; n < ARRAY_SIZE(pool->cache_list); n++)
 		INIT_LIST_HEAD(&pool->cache_list[n]);
@@ -65,7 +65,7 @@ void i915_gem_batch_pool_fini(struct i915_gem_batch_pool *pool)
 {
 	int n;
 
-	WARN_ON(!mutex_is_locked(&pool->dev->struct_mutex));
+	lockdep_assert_held(&pool->engine->i915->drm.struct_mutex);
 
 	for (n = 0; n < ARRAY_SIZE(pool->cache_list); n++) {
 		struct drm_i915_gem_object *obj, *next;
@@ -101,7 +101,7 @@ i915_gem_batch_pool_get(struct i915_gem_batch_pool *pool,
 	struct list_head *list;
 	int n;
 
-	WARN_ON(!mutex_is_locked(&pool->dev->struct_mutex));
+	lockdep_assert_held(&pool->engine->i915->drm.struct_mutex);
 
 	/* Compute a power-of-two bucket, but throw everything greater than
 	 * 16KiB into the same bucket: i.e. the the buckets hold objects of
@@ -114,7 +114,8 @@ i915_gem_batch_pool_get(struct i915_gem_batch_pool *pool,
 
 	list_for_each_entry_safe(tmp, next, list, batch_pool_link) {
 		/* The batches are strictly LRU ordered */
-		if (tmp->active)
+		if (!i915_gem_active_is_idle(&tmp->last_read[pool->engine->id],
+					     &tmp->base.dev->struct_mutex))
 			break;
 
 		/* While we're looping, do some clean up */
@@ -133,7 +134,7 @@ i915_gem_batch_pool_get(struct i915_gem_batch_pool *pool,
 	if (obj == NULL) {
 		int ret;
 
-		obj = i915_gem_object_create(pool->dev, size);
+		obj = i915_gem_object_create(&pool->engine->i915->drm, size);
 		if (IS_ERR(obj))
 			return obj;
 
diff --git a/drivers/gpu/drm/i915/i915_gem_batch_pool.h b/drivers/gpu/drm/i915/i915_gem_batch_pool.h
index 848e90703eed..7fd4df0a29fe 100644
--- a/drivers/gpu/drm/i915/i915_gem_batch_pool.h
+++ b/drivers/gpu/drm/i915/i915_gem_batch_pool.h
@@ -27,13 +27,16 @@
 
 #include "i915_drv.h"
 
+struct drm_device;
+struct intel_engine_cs;
+
 struct i915_gem_batch_pool {
-	struct drm_device *dev;
+	struct intel_engine_cs *engine;
 	struct list_head cache_list[4];
 };
 
 /* i915_gem_batch_pool.c */
-void i915_gem_batch_pool_init(struct drm_device *dev,
+void i915_gem_batch_pool_init(struct intel_engine_cs *engine,
 			      struct i915_gem_batch_pool *pool);
 void i915_gem_batch_pool_fini(struct i915_gem_batch_pool *pool);
 struct drm_i915_gem_object*
diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c
index 821ea10c885c..62e4f6bc8cd5 100644
--- a/drivers/gpu/drm/i915/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/intel_engine_cs.c
@@ -184,7 +184,7 @@ void intel_engine_setup_common(struct intel_engine_cs *engine)
 	engine->fence_context = fence_context_alloc(1);
 
 	intel_engine_init_hangcheck(engine);
-	i915_gem_batch_pool_init(&engine->i915->drm, &engine->batch_pool);
+	i915_gem_batch_pool_init(engine, &engine->batch_pool);
 }
 
 /**
-- 
2.8.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 95+ messages in thread

* [PATCH 04/22] drm/i915: Remove request retirement before each batch
  2016-07-27 11:14 Getting to RCU and exporting fences Chris Wilson
                   ` (2 preceding siblings ...)
  2016-07-27 11:14 ` [PATCH 03/22] drm/i915: Double check the active status on the batch pool Chris Wilson
@ 2016-07-27 11:14 ` Chris Wilson
  2016-07-28  8:32   ` Joonas Lahtinen
  2016-07-28  9:54   ` Daniel Vetter
  2016-07-27 11:14 ` [PATCH 05/22] drm/i915: Remove i915_gem_execbuffer_retire_commands() Chris Wilson
                   ` (19 subsequent siblings)
  23 siblings, 2 replies; 95+ messages in thread
From: Chris Wilson @ 2016-07-27 11:14 UTC (permalink / raw)
  To: intel-gfx

This reimplements the denial-of-service protection against igt from
commit 227f782e4667 ("drm/i915: Retire requests before creating a new
one") and transfers the stall from before each batch into get_pages().
The issue is that the stall is increasing latency between batches which
is detrimental in some cases (especially coupled with execlists) to
keeping the GPU well fed. Also we have made the observation that retiring
requests can of itself free objects (and requests) and therefore makes
a good first step when shrinking.

v2: Recycle objects prior to i915_gem_object_get_pages()
v3: Remove the reference to the ring from i915_gem_requests_ring() as it
operates on an intel_engine_cs.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_drv.h            | 1 -
 drivers/gpu/drm/i915/i915_gem.c            | 7 +++++--
 drivers/gpu/drm/i915/i915_gem_execbuffer.c | 2 --
 drivers/gpu/drm/i915/i915_gem_request.c    | 4 ++--
 4 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index fbda38f25c6b..2de3d16f7b80 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -3169,7 +3169,6 @@ struct drm_i915_gem_request *
 i915_gem_find_active_request(struct intel_engine_cs *engine);
 
 void i915_gem_retire_requests(struct drm_i915_private *dev_priv);
-void i915_gem_retire_requests_ring(struct intel_engine_cs *engine);
 
 static inline u32 i915_reset_counter(struct i915_gpu_error *error)
 {
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index bf652dc88024..68dbe4f7940c 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2244,7 +2244,6 @@ int
 i915_gem_object_get_pages(struct drm_i915_gem_object *obj)
 {
 	struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
-	const struct drm_i915_gem_object_ops *ops = obj->ops;
 	int ret;
 
 	if (obj->pages)
@@ -2257,7 +2256,10 @@ i915_gem_object_get_pages(struct drm_i915_gem_object *obj)
 
 	BUG_ON(obj->pages_pin_count);
 
-	ret = ops->get_pages(obj);
+	/* Recycle as many active objects as possible first */
+	i915_gem_retire_requests(dev_priv);
+
+	ret = obj->ops->get_pages(obj);
 	if (ret)
 		return ret;
 
@@ -4437,6 +4439,7 @@ i915_gem_cleanup_engines(struct drm_device *dev)
 static void
 init_engine_lists(struct intel_engine_cs *engine)
 {
+	/* Early initialisation so that core GEM works during engine setup */
 	INIT_LIST_HEAD(&engine->request_list);
 }
 
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 5e3b5054f72d..0593ea3ba211 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -781,8 +781,6 @@ i915_gem_execbuffer_reserve(struct intel_engine_cs *engine,
 	bool has_fenced_gpu_access = INTEL_GEN(engine->i915) < 4;
 	int retry;
 
-	i915_gem_retire_requests_ring(engine);
-
 	vm = list_first_entry(vmas, struct i915_vma, exec_list)->vm;
 
 	INIT_LIST_HEAD(&ordered_vmas);
diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c
index 07f08e546915..3395c955a532 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.c
+++ b/drivers/gpu/drm/i915/i915_gem_request.c
@@ -717,7 +717,7 @@ complete:
 	return ret;
 }
 
-void i915_gem_retire_requests_ring(struct intel_engine_cs *engine)
+static void engine_retire_requests(struct intel_engine_cs *engine)
 {
 	struct drm_i915_gem_request *request, *next;
 
@@ -741,7 +741,7 @@ void i915_gem_retire_requests(struct drm_i915_private *dev_priv)
 	GEM_BUG_ON(!dev_priv->gt.awake);
 
 	for_each_engine(engine, dev_priv) {
-		i915_gem_retire_requests_ring(engine);
+		engine_retire_requests(engine);
 		if (list_empty(&engine->request_list))
 			dev_priv->gt.active_engines &= ~intel_engine_flag(engine);
 	}
-- 
2.8.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 95+ messages in thread

* [PATCH 05/22] drm/i915: Remove i915_gem_execbuffer_retire_commands()
  2016-07-27 11:14 Getting to RCU and exporting fences Chris Wilson
                   ` (3 preceding siblings ...)
  2016-07-27 11:14 ` [PATCH 04/22] drm/i915: Remove request retirement before each batch Chris Wilson
@ 2016-07-27 11:14 ` Chris Wilson
  2016-07-28  8:46   ` Joonas Lahtinen
  2016-07-27 11:14 ` [PATCH 06/22] drm/i915: Fix up vma alignment to be u64 Chris Wilson
                   ` (18 subsequent siblings)
  23 siblings, 1 reply; 95+ messages in thread
From: Chris Wilson @ 2016-07-27 11:14 UTC (permalink / raw)
  To: intel-gfx

Move the single line to the callsite as the name is now misleading, and
the purpose is solely to add the request to the execution queue.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_gem_execbuffer.c | 9 +--------
 1 file changed, 1 insertion(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 0593ea3ba211..63984c4d8e5a 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -1211,13 +1211,6 @@ i915_gem_execbuffer_move_to_active(struct list_head *vmas,
 	}
 }
 
-static void
-i915_gem_execbuffer_retire_commands(struct i915_execbuffer_params *params)
-{
-	/* Add a breadcrumb for the completion of the batch buffer */
-	__i915_add_request(params->request, params->batch_obj, true);
-}
-
 static int
 i915_reset_gen7_sol_offsets(struct drm_i915_gem_request *req)
 {
@@ -1692,7 +1685,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
 
 	ret = execbuf_submit(params, args, &eb->vmas);
 err_request:
-	i915_gem_execbuffer_retire_commands(params);
+	__i915_add_request(params->request, params->batch_obj, ret == 0);
 
 err_batch_unpin:
 	/*
-- 
2.8.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 95+ messages in thread

* [PATCH 06/22] drm/i915: Fix up vma alignment to be u64
  2016-07-27 11:14 Getting to RCU and exporting fences Chris Wilson
                   ` (4 preceding siblings ...)
  2016-07-27 11:14 ` [PATCH 05/22] drm/i915: Remove i915_gem_execbuffer_retire_commands() Chris Wilson
@ 2016-07-27 11:14 ` Chris Wilson
  2016-07-28  8:59   ` Joonas Lahtinen
  2016-07-27 11:14 ` [PATCH 07/22] drm/i915: Pad GTT views of exec objects up to user specified size Chris Wilson
                   ` (17 subsequent siblings)
  23 siblings, 1 reply; 95+ messages in thread
From: Chris Wilson @ 2016-07-27 11:14 UTC (permalink / raw)
  To: intel-gfx

This is not the full fix, as we are required to percolate the u64 nature
down through the drm_mm stack, but this is required now to prevent
explosions due to mismatch between execbuf (eb_vma_misplaced) and vma
binding (i915_vma_misplaced) - and reduces the risk of spurious changes
as we adjust the vma interface in the next patches.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_drv.h       | 14 ++++++--------
 drivers/gpu/drm/i915/i915_gem.c       | 26 +++++++++++++-------------
 drivers/gpu/drm/i915/i915_gem_evict.c |  5 +++--
 3 files changed, 22 insertions(+), 23 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 2de3d16f7b80..74a31358fd87 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -3032,13 +3032,13 @@ void i915_gem_free_object(struct drm_gem_object *obj);
 int __must_check
 i915_gem_object_pin(struct drm_i915_gem_object *obj,
 		    struct i915_address_space *vm,
-		    uint32_t alignment,
-		    uint64_t flags);
+		    u64 alignment,
+		    u64 flags);
 int __must_check
 i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj,
 			 const struct i915_ggtt_view *view,
-			 uint32_t alignment,
-			 uint64_t flags);
+			 u64 alignment,
+			 u64 flags);
 
 int i915_vma_bind(struct i915_vma *vma, enum i915_cache_level cache_level,
 		  u32 flags);
@@ -3398,11 +3398,9 @@ int i915_gem_context_reset_stats_ioctl(struct drm_device *dev, void *data,
 
 /* i915_gem_evict.c */
 int __must_check i915_gem_evict_something(struct i915_address_space *vm,
-					  int min_size,
-					  unsigned alignment,
+					  u64 min_size, u64 alignment,
 					  unsigned cache_level,
-					  unsigned long start,
-					  unsigned long end,
+					  u64 start, u64 end,
 					  unsigned flags);
 int __must_check i915_gem_evict_for_vma(struct i915_vma *target);
 int i915_gem_evict_vm(struct i915_address_space *vm, bool do_idle);
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 68dbe4f7940c..c4df44b47cea 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2969,8 +2969,8 @@ static struct i915_vma *
 i915_gem_object_bind_to_vm(struct drm_i915_gem_object *obj,
 			   struct i915_address_space *vm,
 			   const struct i915_ggtt_view *ggtt_view,
-			   unsigned alignment,
-			   uint64_t flags)
+			   u64 alignment,
+			   u64 flags)
 {
 	struct drm_device *dev = obj->base.dev;
 	struct drm_i915_private *dev_priv = to_i915(dev);
@@ -3029,9 +3029,9 @@ i915_gem_object_bind_to_vm(struct drm_i915_gem_object *obj,
 		alignment = flags & PIN_MAPPABLE ? fence_alignment :
 						unfenced_alignment;
 	if (flags & PIN_MAPPABLE && alignment & (fence_alignment - 1)) {
-		DRM_DEBUG("Invalid object (view type=%u) alignment requested %u\n",
+		DRM_DEBUG("Invalid object (view type=%u) alignment requested %llx\n",
 			  ggtt_view ? ggtt_view->type : 0,
-			  alignment);
+			  (long long)alignment);
 		return ERR_PTR(-EINVAL);
 	}
 
@@ -3688,7 +3688,7 @@ i915_gem_ring_throttle(struct drm_device *dev, struct drm_file *file)
 }
 
 static bool
-i915_vma_misplaced(struct i915_vma *vma, uint32_t alignment, uint64_t flags)
+i915_vma_misplaced(struct i915_vma *vma, u64 alignment, u64 flags)
 {
 	struct drm_i915_gem_object *obj = vma->obj;
 
@@ -3737,8 +3737,8 @@ static int
 i915_gem_object_do_pin(struct drm_i915_gem_object *obj,
 		       struct i915_address_space *vm,
 		       const struct i915_ggtt_view *ggtt_view,
-		       uint32_t alignment,
-		       uint64_t flags)
+		       u64 alignment,
+		       u64 flags)
 {
 	struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
 	struct i915_vma *vma;
@@ -3767,12 +3767,12 @@ i915_gem_object_do_pin(struct drm_i915_gem_object *obj,
 		if (i915_vma_misplaced(vma, alignment, flags)) {
 			WARN(vma->pin_count,
 			     "bo is already pinned in %s with incorrect alignment:"
-			     " offset=%08x %08x, req.alignment=%x, req.map_and_fenceable=%d,"
+			     " offset=%08x %08x, req.alignment=%llx, req.map_and_fenceable=%d,"
 			     " obj->map_and_fenceable=%d\n",
 			     ggtt_view ? "ggtt" : "ppgtt",
 			     upper_32_bits(vma->node.start),
 			     lower_32_bits(vma->node.start),
-			     alignment,
+			     (long long)alignment,
 			     !!(flags & PIN_MAPPABLE),
 			     obj->map_and_fenceable);
 			ret = i915_vma_unbind(vma);
@@ -3808,8 +3808,8 @@ i915_gem_object_do_pin(struct drm_i915_gem_object *obj,
 int
 i915_gem_object_pin(struct drm_i915_gem_object *obj,
 		    struct i915_address_space *vm,
-		    uint32_t alignment,
-		    uint64_t flags)
+		    u64 alignment,
+		    u64 flags)
 {
 	return i915_gem_object_do_pin(obj, vm,
 				      i915_is_ggtt(vm) ? &i915_ggtt_view_normal : NULL,
@@ -3819,8 +3819,8 @@ i915_gem_object_pin(struct drm_i915_gem_object *obj,
 int
 i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj,
 			 const struct i915_ggtt_view *view,
-			 uint32_t alignment,
-			 uint64_t flags)
+			 u64 alignment,
+			 u64 flags)
 {
 	struct drm_device *dev = obj->base.dev;
 	struct drm_i915_private *dev_priv = to_i915(dev);
diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c b/drivers/gpu/drm/i915/i915_gem_evict.c
index 4bce72fa14c4..ef12ecd2b182 100644
--- a/drivers/gpu/drm/i915/i915_gem_evict.c
+++ b/drivers/gpu/drm/i915/i915_gem_evict.c
@@ -84,8 +84,9 @@ mark_free(struct i915_vma *vma, struct list_head *unwind)
  */
 int
 i915_gem_evict_something(struct i915_address_space *vm,
-			 int min_size, unsigned alignment, unsigned cache_level,
-			 unsigned long start, unsigned long end,
+			 u64 min_size, u64 alignment,
+			 unsigned cache_level,
+			 u64 start, u64 end,
 			 unsigned flags)
 {
 	struct drm_i915_private *dev_priv = to_i915(vm->dev);
-- 
2.8.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 95+ messages in thread

* [PATCH 07/22] drm/i915: Pad GTT views of exec objects up to user specified size
  2016-07-27 11:14 Getting to RCU and exporting fences Chris Wilson
                   ` (5 preceding siblings ...)
  2016-07-27 11:14 ` [PATCH 06/22] drm/i915: Fix up vma alignment to be u64 Chris Wilson
@ 2016-07-27 11:14 ` Chris Wilson
  2016-07-28  9:55   ` Daniel Vetter
  2016-07-29  7:59   ` Joonas Lahtinen
  2016-07-27 11:14 ` [PATCH 08/22] drm/i915: Reduce WARN(i915_gem_valid_gtt_space) to a debug-only check Chris Wilson
                   ` (16 subsequent siblings)
  23 siblings, 2 replies; 95+ messages in thread
From: Chris Wilson @ 2016-07-27 11:14 UTC (permalink / raw)
  To: intel-gfx

Our GPUs impose certain requirements upon buffers that depend upon how
exactly they are used. Typically this is expressed as that they require
a larger surface than would be naively computed by pitch * height.
Normally such requirements are hidden away in the userspace driver, but
when we accept pointers from strangers and later impose extra conditions
on them, the original client allocator has no idea about the
monstrosities in the GPU and we require the userspace driver to inform
the kernel how many padding pages are required beyond the client
allocation.

v2: Long time, no see
v3: Try an anonymous union for uapi struct compatibility

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 drivers/gpu/drm/i915/i915_drv.h            |  6 ++-
 drivers/gpu/drm/i915/i915_gem.c            | 79 ++++++++++++++----------------
 drivers/gpu/drm/i915/i915_gem_execbuffer.c | 16 +++++-
 include/uapi/drm/i915_drm.h                |  8 ++-
 4 files changed, 62 insertions(+), 47 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 74a31358fd87..1e1369319326 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -3032,11 +3032,13 @@ void i915_gem_free_object(struct drm_gem_object *obj);
 int __must_check
 i915_gem_object_pin(struct drm_i915_gem_object *obj,
 		    struct i915_address_space *vm,
+		    u64 size,
 		    u64 alignment,
 		    u64 flags);
 int __must_check
 i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj,
 			 const struct i915_ggtt_view *view,
+			 u64 size,
 			 u64 alignment,
 			 u64 flags);
 
@@ -3313,8 +3315,8 @@ i915_gem_obj_ggtt_pin(struct drm_i915_gem_object *obj,
 	struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
 	struct i915_ggtt *ggtt = &dev_priv->ggtt;
 
-	return i915_gem_object_pin(obj, &ggtt->base,
-				   alignment, flags | PIN_GLOBAL);
+	return i915_gem_object_pin(obj, &ggtt->base, 0, alignment,
+				   flags | PIN_GLOBAL);
 }
 
 void i915_gem_object_ggtt_unpin_view(struct drm_i915_gem_object *obj,
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index c4df44b47cea..2147225e7887 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -1689,7 +1689,7 @@ int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 	}
 
 	/* Now pin it into the GTT if needed */
-	ret = i915_gem_object_ggtt_pin(obj, &view, 0, PIN_MAPPABLE);
+	ret = i915_gem_object_ggtt_pin(obj, &view, 0, 0, PIN_MAPPABLE);
 	if (ret)
 		goto unlock;
 
@@ -2969,21 +2969,20 @@ static struct i915_vma *
 i915_gem_object_bind_to_vm(struct drm_i915_gem_object *obj,
 			   struct i915_address_space *vm,
 			   const struct i915_ggtt_view *ggtt_view,
+			   u64 size,
 			   u64 alignment,
 			   u64 flags)
 {
 	struct drm_device *dev = obj->base.dev;
 	struct drm_i915_private *dev_priv = to_i915(dev);
-	struct i915_ggtt *ggtt = &dev_priv->ggtt;
-	u32 fence_alignment, unfenced_alignment;
-	u32 search_flag, alloc_flag;
 	u64 start, end;
-	u64 size, fence_size;
+	u32 search_flag, alloc_flag;
 	struct i915_vma *vma;
 	int ret;
 
 	if (i915_is_ggtt(vm)) {
-		u32 view_size;
+		u32 fence_size, fence_alignment, unfenced_alignment;
+		u64 view_size;
 
 		if (WARN_ON(!ggtt_view))
 			return ERR_PTR(-EINVAL);
@@ -3001,48 +3000,39 @@ i915_gem_object_bind_to_vm(struct drm_i915_gem_object *obj,
 								view_size,
 								obj->tiling_mode,
 								false);
-		size = flags & PIN_MAPPABLE ? fence_size : view_size;
+		size = max(size, view_size);
+		if (flags & PIN_MAPPABLE)
+			size = max_t(u64, size, fence_size);
+
+		if (alignment == 0)
+			alignment = flags & PIN_MAPPABLE ? fence_alignment :
+				unfenced_alignment;
+		if (flags & PIN_MAPPABLE && alignment & (fence_alignment - 1)) {
+			DRM_DEBUG("Invalid object (view type=%u) alignment requested %llx\n",
+				  ggtt_view ? ggtt_view->type : 0,
+				  (long long)alignment);
+			return ERR_PTR(-EINVAL);
+		}
 	} else {
-		fence_size = i915_gem_get_gtt_size(dev,
-						   obj->base.size,
-						   obj->tiling_mode);
-		fence_alignment = i915_gem_get_gtt_alignment(dev,
-							     obj->base.size,
-							     obj->tiling_mode,
-							     true);
-		unfenced_alignment =
-			i915_gem_get_gtt_alignment(dev,
-						   obj->base.size,
-						   obj->tiling_mode,
-						   false);
-		size = flags & PIN_MAPPABLE ? fence_size : obj->base.size;
+		size = max_t(u64, size, obj->base.size);
+		alignment = 4096;
 	}
 
 	start = flags & PIN_OFFSET_BIAS ? flags & PIN_OFFSET_MASK : 0;
 	end = vm->total;
 	if (flags & PIN_MAPPABLE)
-		end = min_t(u64, end, ggtt->mappable_end);
+		end = min_t(u64, end, dev_priv->ggtt.mappable_end);
 	if (flags & PIN_ZONE_4G)
 		end = min_t(u64, end, (1ULL << 32) - PAGE_SIZE);
 
-	if (alignment == 0)
-		alignment = flags & PIN_MAPPABLE ? fence_alignment :
-						unfenced_alignment;
-	if (flags & PIN_MAPPABLE && alignment & (fence_alignment - 1)) {
-		DRM_DEBUG("Invalid object (view type=%u) alignment requested %llx\n",
-			  ggtt_view ? ggtt_view->type : 0,
-			  (long long)alignment);
-		return ERR_PTR(-EINVAL);
-	}
-
 	/* If binding the object/GGTT view requires more space than the entire
 	 * aperture has, reject it early before evicting everything in a vain
 	 * attempt to find space.
 	 */
 	if (size > end) {
-		DRM_DEBUG("Attempting to bind an object (view type=%u) larger than the aperture: size=%llu > %s aperture=%llu\n",
+		DRM_DEBUG("Attempting to bind an object (view type=%u) larger than the aperture: request=%llu [object=%zd] > %s aperture=%llu\n",
 			  ggtt_view ? ggtt_view->type : 0,
-			  size,
+			  size, obj->base.size,
 			  flags & PIN_MAPPABLE ? "mappable" : "total",
 			  end);
 		return ERR_PTR(-E2BIG);
@@ -3536,7 +3526,7 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
 	 * (e.g. libkms for the bootup splash), we have to ensure that we
 	 * always use map_and_fenceable for all scanout buffers.
 	 */
-	ret = i915_gem_object_ggtt_pin(obj, view, alignment,
+	ret = i915_gem_object_ggtt_pin(obj, view, 0, alignment,
 				       view->type == I915_GGTT_VIEW_NORMAL ?
 				       PIN_MAPPABLE : 0);
 	if (ret)
@@ -3688,12 +3678,14 @@ i915_gem_ring_throttle(struct drm_device *dev, struct drm_file *file)
 }
 
 static bool
-i915_vma_misplaced(struct i915_vma *vma, u64 alignment, u64 flags)
+i915_vma_misplaced(struct i915_vma *vma, u64 size, u64 alignment, u64 flags)
 {
 	struct drm_i915_gem_object *obj = vma->obj;
 
-	if (alignment &&
-	    vma->node.start & (alignment - 1))
+	if (vma->node.size < size)
+		return true;
+
+	if (alignment && vma->node.start & (alignment - 1))
 		return true;
 
 	if (flags & PIN_MAPPABLE && !obj->map_and_fenceable)
@@ -3737,6 +3729,7 @@ static int
 i915_gem_object_do_pin(struct drm_i915_gem_object *obj,
 		       struct i915_address_space *vm,
 		       const struct i915_ggtt_view *ggtt_view,
+		       u64 size,
 		       u64 alignment,
 		       u64 flags)
 {
@@ -3764,7 +3757,7 @@ i915_gem_object_do_pin(struct drm_i915_gem_object *obj,
 		if (WARN_ON(vma->pin_count == DRM_I915_GEM_OBJECT_MAX_PIN_COUNT))
 			return -EBUSY;
 
-		if (i915_vma_misplaced(vma, alignment, flags)) {
+		if (i915_vma_misplaced(vma, size, alignment, flags)) {
 			WARN(vma->pin_count,
 			     "bo is already pinned in %s with incorrect alignment:"
 			     " offset=%08x %08x, req.alignment=%llx, req.map_and_fenceable=%d,"
@@ -3785,8 +3778,8 @@ i915_gem_object_do_pin(struct drm_i915_gem_object *obj,
 
 	bound = vma ? vma->bound : 0;
 	if (vma == NULL || !drm_mm_node_allocated(&vma->node)) {
-		vma = i915_gem_object_bind_to_vm(obj, vm, ggtt_view, alignment,
-						 flags);
+		vma = i915_gem_object_bind_to_vm(obj, vm, ggtt_view,
+						 size, alignment, flags);
 		if (IS_ERR(vma))
 			return PTR_ERR(vma);
 	} else {
@@ -3808,17 +3801,19 @@ i915_gem_object_do_pin(struct drm_i915_gem_object *obj,
 int
 i915_gem_object_pin(struct drm_i915_gem_object *obj,
 		    struct i915_address_space *vm,
+		    u64 size,
 		    u64 alignment,
 		    u64 flags)
 {
 	return i915_gem_object_do_pin(obj, vm,
 				      i915_is_ggtt(vm) ? &i915_ggtt_view_normal : NULL,
-				      alignment, flags);
+				      size, alignment, flags);
 }
 
 int
 i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj,
 			 const struct i915_ggtt_view *view,
+			 u64 size,
 			 u64 alignment,
 			 u64 flags)
 {
@@ -3829,7 +3824,7 @@ i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj,
 	BUG_ON(!view);
 
 	return i915_gem_object_do_pin(obj, &ggtt->base, view,
-				      alignment, flags | PIN_GLOBAL);
+				      size, alignment, flags | PIN_GLOBAL);
 }
 
 void
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 63984c4d8e5a..f40fd7f9e5fa 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -682,10 +682,14 @@ i915_gem_execbuffer_reserve_vma(struct i915_vma *vma,
 			flags |= PIN_HIGH;
 	}
 
-	ret = i915_gem_object_pin(obj, vma->vm, entry->alignment, flags);
+	ret = i915_gem_object_pin(obj, vma->vm,
+				  entry->pad_to_size,
+				  entry->alignment,
+				  flags);
 	if ((ret == -ENOSPC  || ret == -E2BIG) &&
 	    only_mappable_for_reloc(entry->flags))
 		ret = i915_gem_object_pin(obj, vma->vm,
+					  entry->pad_to_size,
 					  entry->alignment,
 					  flags & ~PIN_MAPPABLE);
 	if (ret)
@@ -748,6 +752,9 @@ eb_vma_misplaced(struct i915_vma *vma)
 	    vma->node.start & (entry->alignment - 1))
 		return true;
 
+	if (vma->node.size < entry->pad_to_size)
+		return true;
+
 	if (entry->flags & EXEC_OBJECT_PINNED &&
 	    vma->node.start != entry->offset)
 		return true;
@@ -1091,6 +1098,13 @@ validate_exec_list(struct drm_device *dev,
 		if (exec[i].alignment && !is_power_of_2(exec[i].alignment))
 			return -EINVAL;
 
+		/* pad_to_size was once a reserved field, so sanitize it */
+		if (exec[i].flags & EXEC_OBJECT_PAD_TO_SIZE) {
+			if (offset_in_page(exec[i].pad_to_size))
+				return -EINVAL;
+		} else
+			exec[i].pad_to_size = 0;
+
 		/* First check for malicious input causing overflow in
 		 * the worst case where we need to allocate the entire
 		 * relocation tree as a single array.
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index 33ce5ff9556a..0f292733cffc 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -727,11 +727,15 @@ struct drm_i915_gem_exec_object2 {
 #define EXEC_OBJECT_WRITE		 (1<<2)
 #define EXEC_OBJECT_SUPPORTS_48B_ADDRESS (1<<3)
 #define EXEC_OBJECT_PINNED		 (1<<4)
+#define EXEC_OBJECT_PAD_TO_SIZE		 (1<<5)
 /* All remaining bits are MBZ and RESERVED FOR FUTURE USE */
-#define __EXEC_OBJECT_UNKNOWN_FLAGS	(-(EXEC_OBJECT_PINNED<<1))
+#define __EXEC_OBJECT_UNKNOWN_FLAGS -(EXEC_OBJECT_PAD_TO_SIZE<<1)
 	__u64 flags;
 
-	__u64 rsvd1;
+	union {
+		__u64 rsvd1;
+		__u64 pad_to_size;
+	};
 	__u64 rsvd2;
 };
 
-- 
2.8.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 95+ messages in thread

* [PATCH 08/22] drm/i915: Reduce WARN(i915_gem_valid_gtt_space) to a debug-only check
  2016-07-27 11:14 Getting to RCU and exporting fences Chris Wilson
                   ` (6 preceding siblings ...)
  2016-07-27 11:14 ` [PATCH 07/22] drm/i915: Pad GTT views of exec objects up to user specified size Chris Wilson
@ 2016-07-27 11:14 ` Chris Wilson
  2016-07-28  9:18   ` Joonas Lahtinen
  2016-07-27 11:14 ` [PATCH 09/22] drm/i915: Split insertion/binding of an object into the VM Chris Wilson
                   ` (15 subsequent siblings)
  23 siblings, 1 reply; 95+ messages in thread
From: Chris Wilson @ 2016-07-27 11:14 UTC (permalink / raw)
  To: intel-gfx

i915_gem_valid_gtt_space() is used after inserting the VMA to double
check the list - the location should have been chosen to pass all the
restrictions.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_gem.c | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 2147225e7887..f47a9e450239 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -3095,10 +3095,7 @@ search_free:
 			goto err_vma;
 		}
 	}
-	if (WARN_ON(!i915_gem_valid_gtt_space(vma, obj->cache_level))) {
-		ret = -EINVAL;
-		goto err_remove_node;
-	}
+	GEM_BUG_ON(!i915_gem_valid_gtt_space(vma, obj->cache_level));
 
 	trace_i915_vma_bind(vma, flags);
 	ret = i915_vma_bind(vma, obj->cache_level, flags);
-- 
2.8.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 95+ messages in thread

* [PATCH 09/22] drm/i915: Split insertion/binding of an object into the VM
  2016-07-27 11:14 Getting to RCU and exporting fences Chris Wilson
                   ` (7 preceding siblings ...)
  2016-07-27 11:14 ` [PATCH 08/22] drm/i915: Reduce WARN(i915_gem_valid_gtt_space) to a debug-only check Chris Wilson
@ 2016-07-27 11:14 ` Chris Wilson
  2016-07-28  9:25   ` Joonas Lahtinen
  2016-07-27 11:14 ` [PATCH 10/22] drm/i915: Record allocated vma size Chris Wilson
                   ` (14 subsequent siblings)
  23 siblings, 1 reply; 95+ messages in thread
From: Chris Wilson @ 2016-07-27 11:14 UTC (permalink / raw)
  To: intel-gfx

Split the insertion into the address space's range manager and binding
of that object into the GTT to simplify the code flow when pinning a
VMA.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_gem.c | 35 +++++++++++++++--------------------
 1 file changed, 15 insertions(+), 20 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index f47a9e450239..1773b35703bc 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2966,12 +2966,12 @@ static bool i915_gem_valid_gtt_space(struct i915_vma *vma,
  * @flags: mask of PIN_* flags to use
  */
 static struct i915_vma *
-i915_gem_object_bind_to_vm(struct drm_i915_gem_object *obj,
-			   struct i915_address_space *vm,
-			   const struct i915_ggtt_view *ggtt_view,
-			   u64 size,
-			   u64 alignment,
-			   u64 flags)
+i915_gem_object_insert_into_vm(struct drm_i915_gem_object *obj,
+			       struct i915_address_space *vm,
+			       const struct i915_ggtt_view *ggtt_view,
+			       u64 size,
+			       u64 alignment,
+			       u64 flags)
 {
 	struct drm_device *dev = obj->base.dev;
 	struct drm_i915_private *dev_priv = to_i915(dev);
@@ -3097,19 +3097,12 @@ search_free:
 	}
 	GEM_BUG_ON(!i915_gem_valid_gtt_space(vma, obj->cache_level));
 
-	trace_i915_vma_bind(vma, flags);
-	ret = i915_vma_bind(vma, obj->cache_level, flags);
-	if (ret)
-		goto err_remove_node;
-
 	list_move_tail(&obj->global_list, &dev_priv->mm.bound_list);
 	list_move_tail(&vma->vm_link, &vm->inactive_list);
 	obj->bind_count++;
 
 	return vma;
 
-err_remove_node:
-	drm_mm_remove_node(&vma->node);
 err_vma:
 	vma = ERR_PTR(ret);
 err_unpin:
@@ -3773,24 +3766,26 @@ i915_gem_object_do_pin(struct drm_i915_gem_object *obj,
 		}
 	}
 
-	bound = vma ? vma->bound : 0;
 	if (vma == NULL || !drm_mm_node_allocated(&vma->node)) {
-		vma = i915_gem_object_bind_to_vm(obj, vm, ggtt_view,
-						 size, alignment, flags);
+		vma = i915_gem_object_insert_into_vm(obj, vm, ggtt_view,
+						     size, alignment, flags);
 		if (IS_ERR(vma))
 			return PTR_ERR(vma);
-	} else {
-		ret = i915_vma_bind(vma, obj->cache_level, flags);
-		if (ret)
-			return ret;
 	}
 
+	bound = vma->bound;
+	ret = i915_vma_bind(vma, obj->cache_level, flags);
+	if (ret)
+		return ret;
+
 	if (ggtt_view && ggtt_view->type == I915_GGTT_VIEW_NORMAL &&
 	    (bound ^ vma->bound) & GLOBAL_BIND) {
 		__i915_vma_set_map_and_fenceable(vma);
 		WARN_ON(flags & PIN_MAPPABLE && !obj->map_and_fenceable);
 	}
 
+	GEM_BUG_ON(i915_vma_misplaced(vma, size, alignment, flags));
+
 	vma->pin_count++;
 	return 0;
 }
-- 
2.8.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 95+ messages in thread

* [PATCH 10/22] drm/i915: Record allocated vma size
  2016-07-27 11:14 Getting to RCU and exporting fences Chris Wilson
                   ` (8 preceding siblings ...)
  2016-07-27 11:14 ` [PATCH 09/22] drm/i915: Split insertion/binding of an object into the VM Chris Wilson
@ 2016-07-27 11:14 ` Chris Wilson
  2016-07-29  6:53   ` Joonas Lahtinen
  2016-07-27 11:14 ` [PATCH 11/22] drm/i915: Wrap vma->pin_count accessors with small inline helpers Chris Wilson
                   ` (13 subsequent siblings)
  23 siblings, 1 reply; 95+ messages in thread
From: Chris Wilson @ 2016-07-27 11:14 UTC (permalink / raw)
  To: intel-gfx

Tracking the size of the VMA as allocated allows us to dramatically
reduce the complexity of later functions (like inserting the VMA in to
the drm_mm range manager).

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_drv.h     |  10 ++--
 drivers/gpu/drm/i915/i915_gem.c     | 115 ++++++++++++++++--------------------
 drivers/gpu/drm/i915/i915_gem_gtt.c |  66 +++++++--------------
 drivers/gpu/drm/i915/i915_gem_gtt.h |   5 +-
 4 files changed, 77 insertions(+), 119 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 1e1369319326..717834bc1ae6 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -3241,11 +3241,11 @@ int i915_gem_object_attach_phys(struct drm_i915_gem_object *obj,
 int i915_gem_open(struct drm_device *dev, struct drm_file *file);
 void i915_gem_release(struct drm_device *dev, struct drm_file *file);
 
-uint32_t
-i915_gem_get_gtt_size(struct drm_device *dev, uint32_t size, int tiling_mode);
-uint32_t
-i915_gem_get_gtt_alignment(struct drm_device *dev, uint32_t size,
-			    int tiling_mode, bool fenced);
+uint64_t
+i915_gem_get_gtt_size(struct drm_device *dev, uint64_t size, int tiling_mode);
+uint64_t
+i915_gem_get_gtt_alignment(struct drm_device *dev, uint64_t size,
+			   int tiling_mode, bool fenced);
 
 int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
 				    enum i915_cache_level cache_level);
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 1773b35703bc..358c0ca60530 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -1844,11 +1844,13 @@ i915_gem_release_all_mmaps(struct drm_i915_private *dev_priv)
 		i915_gem_release_mmap(obj);
 }
 
-uint32_t
-i915_gem_get_gtt_size(struct drm_device *dev, uint32_t size, int tiling_mode)
+uint64_t
+i915_gem_get_gtt_size(struct drm_device *dev, uint64_t size, int tiling_mode)
 {
 	uint32_t gtt_size;
 
+	GEM_BUG_ON(size == 0);
+
 	if (INTEL_INFO(dev)->gen >= 4 ||
 	    tiling_mode == I915_TILING_NONE)
 		return size;
@@ -1875,10 +1877,12 @@ i915_gem_get_gtt_size(struct drm_device *dev, uint32_t size, int tiling_mode)
  * Return the required GTT alignment for an object, taking into account
  * potential fence register mapping.
  */
-uint32_t
-i915_gem_get_gtt_alignment(struct drm_device *dev, uint32_t size,
+uint64_t
+i915_gem_get_gtt_alignment(struct drm_device *dev, uint64_t size,
 			   int tiling_mode, bool fenced)
 {
+	GEM_BUG_ON(size == 0);
+
 	/*
 	 * Minimum alignment is 4k (GTT page size), but might be greater
 	 * if a fence register is needed for the object.
@@ -2975,51 +2979,35 @@ i915_gem_object_insert_into_vm(struct drm_i915_gem_object *obj,
 {
 	struct drm_device *dev = obj->base.dev;
 	struct drm_i915_private *dev_priv = to_i915(dev);
-	u64 start, end;
-	u32 search_flag, alloc_flag;
 	struct i915_vma *vma;
+	u64 start, end;
+	u64 min_alignment;
 	int ret;
 
-	if (i915_is_ggtt(vm)) {
-		u32 fence_size, fence_alignment, unfenced_alignment;
-		u64 view_size;
-
-		if (WARN_ON(!ggtt_view))
-			return ERR_PTR(-EINVAL);
-
-		view_size = i915_ggtt_view_size(obj, ggtt_view);
-
-		fence_size = i915_gem_get_gtt_size(dev,
-						   view_size,
-						   obj->tiling_mode);
-		fence_alignment = i915_gem_get_gtt_alignment(dev,
-							     view_size,
-							     obj->tiling_mode,
-							     true);
-		unfenced_alignment = i915_gem_get_gtt_alignment(dev,
-								view_size,
-								obj->tiling_mode,
-								false);
-		size = max(size, view_size);
-		if (flags & PIN_MAPPABLE)
-			size = max_t(u64, size, fence_size);
-
-		if (alignment == 0)
-			alignment = flags & PIN_MAPPABLE ? fence_alignment :
-				unfenced_alignment;
-		if (flags & PIN_MAPPABLE && alignment & (fence_alignment - 1)) {
-			DRM_DEBUG("Invalid object (view type=%u) alignment requested %llx\n",
-				  ggtt_view ? ggtt_view->type : 0,
-				  (long long)alignment);
-			return ERR_PTR(-EINVAL);
-		}
-	} else {
-		size = max_t(u64, size, obj->base.size);
-		alignment = 4096;
+	vma = ggtt_view ?
+		i915_gem_obj_lookup_or_create_ggtt_vma(obj, ggtt_view) :
+		i915_gem_obj_lookup_or_create_vma(obj, vm);
+	if (IS_ERR(vma))
+		return vma;
+
+	size = max(size, vma->size);
+	if (flags & PIN_MAPPABLE)
+		size = i915_gem_get_gtt_size(dev, size, obj->tiling_mode);
+
+	min_alignment =
+		i915_gem_get_gtt_alignment(dev, size, obj->tiling_mode,
+					   flags & PIN_MAPPABLE);
+	if (alignment == 0)
+		alignment = min_alignment;
+	if (alignment & (min_alignment - 1)) {
+		DRM_DEBUG("Invalid object alignment requested %llu, minimum %llu\n",
+			  alignment, min_alignment);
+		return ERR_PTR(-EINVAL);
 	}
 
 	start = flags & PIN_OFFSET_BIAS ? flags & PIN_OFFSET_MASK : 0;
-	end = vm->total;
+
+	end = vma->vm->total;
 	if (flags & PIN_MAPPABLE)
 		end = min_t(u64, end, dev_priv->ggtt.mappable_end);
 	if (flags & PIN_ZONE_4G)
@@ -3030,8 +3018,7 @@ i915_gem_object_insert_into_vm(struct drm_i915_gem_object *obj,
 	 * attempt to find space.
 	 */
 	if (size > end) {
-		DRM_DEBUG("Attempting to bind an object (view type=%u) larger than the aperture: request=%llu [object=%zd] > %s aperture=%llu\n",
-			  ggtt_view ? ggtt_view->type : 0,
+		DRM_DEBUG("Attempting to bind an object larger than the aperture: request=%llu [object=%zd] > %s aperture=%llu\n",
 			  size, obj->base.size,
 			  flags & PIN_MAPPABLE ? "mappable" : "total",
 			  end);
@@ -3044,31 +3031,27 @@ i915_gem_object_insert_into_vm(struct drm_i915_gem_object *obj,
 
 	i915_gem_object_pin_pages(obj);
 
-	vma = ggtt_view ? i915_gem_obj_lookup_or_create_ggtt_vma(obj, ggtt_view) :
-			  i915_gem_obj_lookup_or_create_vma(obj, vm);
-
-	if (IS_ERR(vma))
-		goto err_unpin;
-
 	if (flags & PIN_OFFSET_FIXED) {
 		uint64_t offset = flags & PIN_OFFSET_MASK;
-
-		if (offset & (alignment - 1) || offset + size > end) {
+		if (offset & (alignment - 1) || offset > end - size) {
 			ret = -EINVAL;
-			goto err_vma;
+			goto err_unpin;
 		}
+
 		vma->node.start = offset;
 		vma->node.size = size;
 		vma->node.color = obj->cache_level;
-		ret = drm_mm_reserve_node(&vm->mm, &vma->node);
+		ret = drm_mm_reserve_node(&vma->vm->mm, &vma->node);
 		if (ret) {
 			ret = i915_gem_evict_for_vma(vma);
 			if (ret == 0)
-				ret = drm_mm_reserve_node(&vm->mm, &vma->node);
+				ret = drm_mm_reserve_node(&vma->vm->mm, &vma->node);
+			if (ret)
+				goto err_unpin;
 		}
-		if (ret)
-			goto err_vma;
 	} else {
+		u32 search_flag, alloc_flag;
+
 		if (flags & PIN_HIGH) {
 			search_flag = DRM_MM_SEARCH_BELOW;
 			alloc_flag = DRM_MM_CREATE_TOP;
@@ -3077,37 +3060,39 @@ i915_gem_object_insert_into_vm(struct drm_i915_gem_object *obj,
 			alloc_flag = DRM_MM_CREATE_DEFAULT;
 		}
 
+		if (alignment <= 4096)
+			alignment = 0; /* for efficient drm_mm searching */
+
 search_free:
-		ret = drm_mm_insert_node_in_range_generic(&vm->mm, &vma->node,
+		ret = drm_mm_insert_node_in_range_generic(&vma->vm->mm,
+							  &vma->node,
 							  size, alignment,
 							  obj->cache_level,
 							  start, end,
 							  search_flag,
 							  alloc_flag);
 		if (ret) {
-			ret = i915_gem_evict_something(vm, size, alignment,
+			ret = i915_gem_evict_something(vma->vm, size, alignment,
 						       obj->cache_level,
 						       start, end,
 						       flags);
 			if (ret == 0)
 				goto search_free;
 
-			goto err_vma;
+			goto err_unpin;
 		}
 	}
 	GEM_BUG_ON(!i915_gem_valid_gtt_space(vma, obj->cache_level));
 
 	list_move_tail(&obj->global_list, &dev_priv->mm.bound_list);
-	list_move_tail(&vma->vm_link, &vm->inactive_list);
+	list_move_tail(&vma->vm_link, &vma->vm->inactive_list);
 	obj->bind_count++;
 
 	return vma;
 
-err_vma:
-	vma = ERR_PTR(ret);
 err_unpin:
 	i915_gem_object_unpin_pages(obj);
-	return vma;
+	return ERR_PTR(ret);
 }
 
 bool
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 5869fa074009..ebf28bf8db00 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -184,7 +184,7 @@ static void ppgtt_unbind_vma(struct i915_vma *vma)
 {
 	vma->vm->clear_range(vma->vm,
 			     vma->node.start,
-			     vma->obj->base.size,
+			     vma->size,
 			     true);
 }
 
@@ -2697,28 +2697,18 @@ static int aliasing_gtt_bind_vma(struct i915_vma *vma,
 
 static void ggtt_unbind_vma(struct i915_vma *vma)
 {
-	struct drm_device *dev = vma->vm->dev;
-	struct drm_i915_private *dev_priv = to_i915(dev);
-	struct drm_i915_gem_object *obj = vma->obj;
-	const uint64_t size = min_t(uint64_t,
-				    obj->base.size,
-				    vma->node.size);
+	struct i915_hw_ppgtt *appgtt = to_i915(vma->vm->dev)->mm.aliasing_ppgtt;
+	const u64 size = min(vma->size, vma->node.size);
 
-	if (vma->bound & GLOBAL_BIND) {
+	if (vma->bound & GLOBAL_BIND)
 		vma->vm->clear_range(vma->vm,
-				     vma->node.start,
-				     size,
+				     vma->node.start, size,
 				     true);
-	}
-
-	if (dev_priv->mm.aliasing_ppgtt && vma->bound & LOCAL_BIND) {
-		struct i915_hw_ppgtt *appgtt = dev_priv->mm.aliasing_ppgtt;
 
+	if (vma->bound & LOCAL_BIND && appgtt)
 		appgtt->base.clear_range(&appgtt->base,
-					 vma->node.start,
-					 size,
+					 vma->node.start, size,
 					 true);
-	}
 }
 
 void i915_gem_gtt_finish_object(struct drm_i915_gem_object *obj)
@@ -3367,14 +3357,14 @@ void i915_vma_close(struct i915_vma *vma)
 static struct i915_vma *
 __i915_gem_vma_create(struct drm_i915_gem_object *obj,
 		      struct i915_address_space *vm,
-		      const struct i915_ggtt_view *ggtt_view)
+		      const struct i915_ggtt_view *view)
 {
 	struct i915_vma *vma;
 	int i;
 
 	GEM_BUG_ON(vm->closed);
 
-	if (WARN_ON(i915_is_ggtt(vm) != !!ggtt_view))
+	if (WARN_ON(i915_is_ggtt(vm) != !!view))
 		return ERR_PTR(-EINVAL);
 
 	vma = kmem_cache_zalloc(to_i915(obj->base.dev)->vmas, GFP_KERNEL);
@@ -3388,11 +3378,20 @@ __i915_gem_vma_create(struct drm_i915_gem_object *obj,
 	list_add(&vma->vm_link, &vm->unbound_list);
 	vma->vm = vm;
 	vma->obj = obj;
+	vma->size = obj->base.size;
 	vma->is_ggtt = i915_is_ggtt(vm);
 
-	if (i915_is_ggtt(vm))
-		vma->ggtt_view = *ggtt_view;
-	else
+	if (i915_is_ggtt(vm)) {
+		vma->ggtt_view = *view;
+		if (view->type == I915_GGTT_VIEW_PARTIAL) {
+			vma->size = view->params.partial.size;
+			vma->size <<= PAGE_SHIFT;
+		} else if (view->type == I915_GGTT_VIEW_ROTATED) {
+			vma->size =
+				intel_rotation_info_size(&view->params.rotated);
+			vma->size <<= PAGE_SHIFT;
+		}
+	} else
 		i915_ppgtt_get(i915_vm_to_ppgtt(vm));
 
 	list_add_tail(&vma->obj_link, &obj->vma_list);
@@ -3678,29 +3677,6 @@ int i915_vma_bind(struct i915_vma *vma, enum i915_cache_level cache_level,
 	return 0;
 }
 
-/**
- * i915_ggtt_view_size - Get the size of a GGTT view.
- * @obj: Object the view is of.
- * @view: The view in question.
- *
- * @return The size of the GGTT view in bytes.
- */
-size_t
-i915_ggtt_view_size(struct drm_i915_gem_object *obj,
-		    const struct i915_ggtt_view *view)
-{
-	if (view->type == I915_GGTT_VIEW_NORMAL) {
-		return obj->base.size;
-	} else if (view->type == I915_GGTT_VIEW_ROTATED) {
-		return intel_rotation_info_size(&view->params.rotated) << PAGE_SHIFT;
-	} else if (view->type == I915_GGTT_VIEW_PARTIAL) {
-		return view->params.partial.size << PAGE_SHIFT;
-	} else {
-		WARN_ONCE(1, "GGTT view %u not implemented!\n", view->type);
-		return obj->base.size;
-	}
-}
-
 void __iomem *i915_vma_pin_iomap(struct i915_vma *vma)
 {
 	void __iomem *ptr;
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h
index a79015bf7261..5c3a093b83c1 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.h
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.h
@@ -180,6 +180,7 @@ struct i915_vma {
 	struct drm_i915_gem_object *obj;
 	struct i915_address_space *vm;
 	void __iomem *iomap;
+	u64 size;
 
 	unsigned int active;
 	struct i915_gem_active last_read[I915_NUM_ENGINES];
@@ -605,10 +606,6 @@ i915_ggtt_view_equal(const struct i915_ggtt_view *a,
 	return true;
 }
 
-size_t
-i915_ggtt_view_size(struct drm_i915_gem_object *obj,
-		    const struct i915_ggtt_view *view);
-
 /**
  * i915_vma_pin_iomap - calls ioremap_wc to map the GGTT VMA via the aperture
  * @vma: VMA to iomap
-- 
2.8.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 95+ messages in thread

* [PATCH 11/22] drm/i915: Wrap vma->pin_count accessors with small inline helpers
  2016-07-27 11:14 Getting to RCU and exporting fences Chris Wilson
                   ` (9 preceding siblings ...)
  2016-07-27 11:14 ` [PATCH 10/22] drm/i915: Record allocated vma size Chris Wilson
@ 2016-07-27 11:14 ` Chris Wilson
  2016-07-29  6:59   ` Joonas Lahtinen
  2016-07-27 11:14 ` [PATCH 12/22] drm/i915: Start passing around i915_vma from execbuffer Chris Wilson
                   ` (12 subsequent siblings)
  23 siblings, 1 reply; 95+ messages in thread
From: Chris Wilson @ 2016-07-27 11:14 UTC (permalink / raw)
  To: intel-gfx

In the next few patches, the VMA pinning API is overhauled and to reduce
the churn we pull out the update to the accessors into a prep patch.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_debugfs.c        |  2 +-
 drivers/gpu/drm/i915/i915_gem.c            | 27 +++++++++++++-------------
 drivers/gpu/drm/i915/i915_gem_evict.c      | 12 ++++++------
 drivers/gpu/drm/i915/i915_gem_execbuffer.c |  2 +-
 drivers/gpu/drm/i915/i915_gem_fence.c      |  2 +-
 drivers/gpu/drm/i915/i915_gem_gtt.c        | 10 +++++-----
 drivers/gpu/drm/i915/i915_gem_gtt.h        | 31 ++++++++++++++++++++++++++++--
 drivers/gpu/drm/i915/i915_gpu_error.c      |  4 ++--
 8 files changed, 59 insertions(+), 31 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index 6285d50e6876..e6428feffc56 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -168,7 +168,7 @@ describe_obj(struct seq_file *m, struct drm_i915_gem_object *obj)
 	if (obj->base.name)
 		seq_printf(m, " (name: %d)", obj->base.name);
 	list_for_each_entry(vma, &obj->vma_list, obj_link) {
-		if (vma->pin_count > 0)
+		if (i915_vma_is_pinned(vma))
 			pin_count++;
 	}
 	seq_printf(m, " (pinned x %d)", pin_count);
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 358c0ca60530..fa9cb3313147 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -153,10 +153,10 @@ i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data,
 	pinned = 0;
 	mutex_lock(&dev->struct_mutex);
 	list_for_each_entry(vma, &ggtt->base.active_list, vm_link)
-		if (vma->pin_count)
+		if (i915_vma_is_pinned(vma))
 			pinned += vma->node.size;
 	list_for_each_entry(vma, &ggtt->base.inactive_list, vm_link)
-		if (vma->pin_count)
+		if (i915_vma_is_pinned(vma))
 			pinned += vma->node.size;
 	mutex_unlock(&dev->struct_mutex);
 
@@ -2807,7 +2807,7 @@ static void i915_gem_object_finish_gtt(struct drm_i915_gem_object *obj)
 
 static void __i915_vma_iounmap(struct i915_vma *vma)
 {
-	GEM_BUG_ON(vma->pin_count);
+	GEM_BUG_ON(i915_vma_is_pinned(vma));
 
 	if (vma->iomap == NULL)
 		return;
@@ -2834,7 +2834,7 @@ int i915_vma_unbind(struct i915_vma *vma)
 		 * take a pin on the vma so that the second unbind is
 		 * aborted.
 		 */
-		vma->pin_count++;
+		__i915_vma_pin(vma);
 
 		for_each_active(active, idx) {
 			ret = i915_gem_active_retire(&vma->last_read[idx],
@@ -2843,14 +2843,14 @@ int i915_vma_unbind(struct i915_vma *vma)
 				break;
 		}
 
-		vma->pin_count--;
+		__i915_vma_unpin(vma);
 		if (ret)
 			return ret;
 
 		GEM_BUG_ON(i915_vma_is_active(vma));
 	}
 
-	if (vma->pin_count)
+	if (i915_vma_is_pinned(vma))
 		return -EBUSY;
 
 	if (!drm_mm_node_allocated(&vma->node))
@@ -3289,7 +3289,7 @@ restart:
 		if (!drm_mm_node_allocated(&vma->node))
 			continue;
 
-		if (vma->pin_count) {
+		if (i915_vma_is_pinned(vma)) {
 			DRM_DEBUG("can not change the cache level of pinned objects\n");
 			return -EBUSY;
 		}
@@ -3729,11 +3729,11 @@ i915_gem_object_do_pin(struct drm_i915_gem_object *obj,
 			  i915_gem_obj_to_vma(obj, vm);
 
 	if (vma) {
-		if (WARN_ON(vma->pin_count == DRM_I915_GEM_OBJECT_MAX_PIN_COUNT))
+		if (WARN_ON(i915_vma_pin_count(vma) == DRM_I915_GEM_OBJECT_MAX_PIN_COUNT))
 			return -EBUSY;
 
 		if (i915_vma_misplaced(vma, size, alignment, flags)) {
-			WARN(vma->pin_count,
+			WARN(i915_vma_is_pinned(vma),
 			     "bo is already pinned in %s with incorrect alignment:"
 			     " offset=%08x %08x, req.alignment=%llx, req.map_and_fenceable=%d,"
 			     " obj->map_and_fenceable=%d\n",
@@ -3771,7 +3771,7 @@ i915_gem_object_do_pin(struct drm_i915_gem_object *obj,
 
 	GEM_BUG_ON(i915_vma_misplaced(vma, size, alignment, flags));
 
-	vma->pin_count++;
+	__i915_vma_pin(vma);
 	return 0;
 }
 
@@ -3810,10 +3810,11 @@ i915_gem_object_ggtt_unpin_view(struct drm_i915_gem_object *obj,
 {
 	struct i915_vma *vma = i915_gem_obj_to_ggtt_view(obj, view);
 
-	WARN_ON(vma->pin_count == 0);
+	GEM_BUG_ON(!vma);
+	WARN_ON(i915_vma_is_pinned(vma));
 	WARN_ON(!i915_gem_obj_ggtt_bound_view(obj, view));
 
-	--vma->pin_count;
+	__i915_vma_unpin(vma);
 }
 
 int
@@ -4683,7 +4684,7 @@ bool i915_gem_obj_is_pinned(struct drm_i915_gem_object *obj)
 {
 	struct i915_vma *vma;
 	list_for_each_entry(vma, &obj->vma_list, obj_link)
-		if (vma->pin_count > 0)
+		if (i915_vma_is_pinned(vma))
 			return true;
 
 	return false;
diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c b/drivers/gpu/drm/i915/i915_gem_evict.c
index ef12ecd2b182..7be425826539 100644
--- a/drivers/gpu/drm/i915/i915_gem_evict.c
+++ b/drivers/gpu/drm/i915/i915_gem_evict.c
@@ -49,7 +49,7 @@ gpu_is_idle(struct drm_i915_private *dev_priv)
 static bool
 mark_free(struct i915_vma *vma, struct list_head *unwind)
 {
-	if (vma->pin_count)
+	if (i915_vma_is_pinned(vma))
 		return false;
 
 	if (WARN_ON(!list_empty(&vma->exec_list)))
@@ -183,7 +183,7 @@ found:
 	 */
 	list_for_each_entry_safe(vma, next, &eviction_list, exec_list) {
 		if (drm_mm_scan_remove_block(&vma->node))
-			vma->pin_count++;
+			__i915_vma_pin(vma);
 		else
 			list_del_init(&vma->exec_list);
 	}
@@ -195,7 +195,7 @@ found:
 				       exec_list);
 
 		list_del_init(&vma->exec_list);
-		vma->pin_count--;
+		__i915_vma_unpin(vma);
 		if (ret == 0)
 			ret = i915_vma_unbind(vma);
 	}
@@ -220,8 +220,8 @@ i915_gem_evict_for_vma(struct i915_vma *target)
 
 		vma = container_of(node, typeof(*vma), node);
 
-		if (vma->pin_count) {
-			if (!vma->exec_entry || (vma->pin_count > 1))
+		if (i915_vma_is_pinned(vma)) {
+			if (!vma->exec_entry || i915_vma_pin_count(vma) > 1)
 				/* Object is pinned for some other use */
 				return -EBUSY;
 
@@ -281,7 +281,7 @@ int i915_gem_evict_vm(struct i915_address_space *vm, bool do_idle)
 	}
 
 	list_for_each_entry_safe(vma, next, &vm->inactive_list, vm_link)
-		if (vma->pin_count == 0)
+		if (!i915_vma_is_pinned(vma))
 			WARN_ON(i915_vma_unbind(vma));
 
 	return 0;
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index f40fd7f9e5fa..9ea99e181997 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -261,7 +261,7 @@ i915_gem_execbuffer_unreserve_vma(struct i915_vma *vma)
 		i915_gem_object_unpin_fence(obj);
 
 	if (entry->flags & __EXEC_OBJECT_HAS_PIN)
-		vma->pin_count--;
+		__i915_vma_unpin(vma);
 
 	entry->flags &= ~(__EXEC_OBJECT_HAS_FENCE | __EXEC_OBJECT_HAS_PIN);
 }
diff --git a/drivers/gpu/drm/i915/i915_gem_fence.c b/drivers/gpu/drm/i915/i915_gem_fence.c
index dbaab9ce29c9..3b462da612ca 100644
--- a/drivers/gpu/drm/i915/i915_gem_fence.c
+++ b/drivers/gpu/drm/i915/i915_gem_fence.c
@@ -431,7 +431,7 @@ i915_gem_object_pin_fence(struct drm_i915_gem_object *obj)
 
 		WARN_ON(!ggtt_vma ||
 			dev_priv->fence_regs[obj->fence_reg].pin_count >
-			ggtt_vma->pin_count);
+			i915_vma_pin_count(ggtt_vma));
 		dev_priv->fence_regs[obj->fence_reg].pin_count++;
 		return true;
 	} else
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index ebf28bf8db00..bab2bbdb1fb5 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -3327,7 +3327,7 @@ i915_vma_retire(struct i915_gem_active *active,
 		return;
 
 	list_move_tail(&vma->vm_link, &vma->vm->inactive_list);
-	if (unlikely(vma->closed && !vma->pin_count))
+	if (unlikely(vma->closed && !i915_vma_is_pinned(vma)))
 		WARN_ON(i915_vma_unbind(vma));
 }
 
@@ -3350,7 +3350,7 @@ void i915_vma_close(struct i915_vma *vma)
 	vma->closed = true;
 
 	list_del_init(&vma->obj_link);
-	if (!i915_vma_is_active(vma) && !vma->pin_count)
+	if (!i915_vma_is_active(vma) && !i915_vma_is_pinned(vma))
 		WARN_ON(i915_vma_unbind(vma));
 }
 
@@ -3658,12 +3658,12 @@ int i915_vma_bind(struct i915_vma *vma, enum i915_cache_level cache_level,
 
 	if (vma->bound == 0 && vma->vm->allocate_va_range) {
 		/* XXX: i915_vma_pin() will fix this +- hack */
-		vma->pin_count++;
+		__i915_vma_pin(vma);
 		trace_i915_va_alloc(vma);
 		ret = vma->vm->allocate_va_range(vma->vm,
 						 vma->node.start,
 						 vma->node.size);
-		vma->pin_count--;
+		__i915_vma_unpin(vma);
 		if (ret)
 			return ret;
 	}
@@ -3699,6 +3699,6 @@ void __iomem *i915_vma_pin_iomap(struct i915_vma *vma)
 		vma->iomap = ptr;
 	}
 
-	vma->pin_count++;
+	__i915_vma_pin(vma);
 	return ptr;
 }
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h
index 5c3a093b83c1..a5bab5f061f1 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.h
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.h
@@ -606,6 +606,34 @@ i915_ggtt_view_equal(const struct i915_ggtt_view *a,
 	return true;
 }
 
+static inline int i915_vma_pin_count(const struct i915_vma *vma)
+{
+	return vma->pin_count;
+}
+
+static inline bool i915_vma_is_pinned(const struct i915_vma *vma)
+{
+	return i915_vma_pin_count(vma);
+}
+
+static inline void __i915_vma_pin(struct i915_vma *vma)
+{
+	vma->pin_count++;
+	GEM_BUG_ON(!i915_vma_is_pinned(vma));
+}
+
+static inline void __i915_vma_unpin(struct i915_vma *vma)
+{
+	GEM_BUG_ON(!i915_vma_is_pinned(vma));
+	vma->pin_count--;
+}
+
+static inline void i915_vma_unpin(struct i915_vma *vma)
+{
+	GEM_BUG_ON(!drm_mm_node_allocated(&vma->node));
+	__i915_vma_unpin(vma);
+}
+
 /**
  * i915_vma_pin_iomap - calls ioremap_wc to map the GGTT VMA via the aperture
  * @vma: VMA to iomap
@@ -634,9 +662,8 @@ void __iomem *i915_vma_pin_iomap(struct i915_vma *vma);
 static inline void i915_vma_unpin_iomap(struct i915_vma *vma)
 {
 	lockdep_assert_held(&vma->vm->dev->struct_mutex);
-	GEM_BUG_ON(vma->pin_count == 0);
 	GEM_BUG_ON(vma->iomap == NULL);
-	vma->pin_count--;
+	i915_vma_unpin(vma);
 }
 
 #endif
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
index c19f72e1bcf7..d94eb907a23a 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -818,7 +818,7 @@ static u32 capture_pinned_bo(struct drm_i915_error_buffer *err,
 			break;
 
 		list_for_each_entry(vma, &obj->vma_list, obj_link)
-			if (vma->vm == vm && vma->pin_count > 0)
+			if (vma->vm == vm && i915_vma_is_pinned(vma))
 				capture_bo(err++, vma);
 	}
 
@@ -1230,7 +1230,7 @@ static void i915_gem_capture_vm(struct drm_i915_private *dev_priv,
 
 	list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) {
 		list_for_each_entry(vma, &obj->vma_list, obj_link)
-			if (vma->vm == vm && vma->pin_count > 0)
+			if (vma->vm == vm && i915_vma_is_pinned(vma))
 				i++;
 	}
 	error->pinned_bo_count[ndx] = i - error->active_bo_count[ndx];
-- 
2.8.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 95+ messages in thread

* [PATCH 12/22] drm/i915: Start passing around i915_vma from execbuffer
  2016-07-27 11:14 Getting to RCU and exporting fences Chris Wilson
                   ` (10 preceding siblings ...)
  2016-07-27 11:14 ` [PATCH 11/22] drm/i915: Wrap vma->pin_count accessors with small inline helpers Chris Wilson
@ 2016-07-27 11:14 ` Chris Wilson
  2016-07-29  8:23   ` Joonas Lahtinen
  2016-07-27 11:14 ` [PATCH 13/22] drm/i915: Combine all i915_vma bitfields into a single set of flags Chris Wilson
                   ` (11 subsequent siblings)
  23 siblings, 1 reply; 95+ messages in thread
From: Chris Wilson @ 2016-07-27 11:14 UTC (permalink / raw)
  To: intel-gfx; +Cc: Mika Kuoppala

During execbuffer we look up the i915_vma in order to reserve them in
the VM. However, we then do a double lookup of the vma in order to then
pin them, all because we lack the necessary interfaces to operate on
i915_vma - so introduce i915_vma_pin()!

v2: Tidy parameter lists to remove one level of redirection in the hot
path.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Mika Kuoppala <mika.kuoppala@intel.com>
---
 drivers/gpu/drm/i915/i915_drv.h            |  24 +----
 drivers/gpu/drm/i915/i915_gem.c            | 155 ++++++++++++-----------------
 drivers/gpu/drm/i915/i915_gem_execbuffer.c | 142 ++++++++++++--------------
 drivers/gpu/drm/i915/i915_gem_gtt.c        |   3 -
 drivers/gpu/drm/i915/i915_gem_gtt.h        |  14 +++
 5 files changed, 138 insertions(+), 200 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 717834bc1ae6..490e337b65d4 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -3018,23 +3018,6 @@ struct drm_i915_gem_object *i915_gem_object_create_from_data(
 void i915_gem_close_object(struct drm_gem_object *gem, struct drm_file *file);
 void i915_gem_free_object(struct drm_gem_object *obj);
 
-/* Flags used by pin/bind&friends. */
-#define PIN_MAPPABLE	(1<<0)
-#define PIN_NONBLOCK	(1<<1)
-#define PIN_GLOBAL	(1<<2)
-#define PIN_OFFSET_BIAS	(1<<3)
-#define PIN_USER	(1<<4)
-#define PIN_UPDATE	(1<<5)
-#define PIN_ZONE_4G	(1<<6)
-#define PIN_HIGH	(1<<7)
-#define PIN_OFFSET_FIXED	(1<<8)
-#define PIN_OFFSET_MASK (~4095)
-int __must_check
-i915_gem_object_pin(struct drm_i915_gem_object *obj,
-		    struct i915_address_space *vm,
-		    u64 size,
-		    u64 alignment,
-		    u64 flags);
 int __must_check
 i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj,
 			 const struct i915_ggtt_view *view,
@@ -3312,11 +3295,8 @@ i915_gem_obj_ggtt_pin(struct drm_i915_gem_object *obj,
 		      uint32_t alignment,
 		      unsigned flags)
 {
-	struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
-	struct i915_ggtt *ggtt = &dev_priv->ggtt;
-
-	return i915_gem_object_pin(obj, &ggtt->base, 0, alignment,
-				   flags | PIN_GLOBAL);
+	return i915_gem_object_ggtt_pin(obj, &i915_ggtt_view_normal,
+					0, alignment, flags);
 }
 
 void i915_gem_object_ggtt_unpin_view(struct drm_i915_gem_object *obj,
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index fa9cb3313147..3b7b44e74301 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2969,26 +2969,18 @@ static bool i915_gem_valid_gtt_space(struct i915_vma *vma,
  * @alignment: requested alignment
  * @flags: mask of PIN_* flags to use
  */
-static struct i915_vma *
-i915_gem_object_insert_into_vm(struct drm_i915_gem_object *obj,
-			       struct i915_address_space *vm,
-			       const struct i915_ggtt_view *ggtt_view,
-			       u64 size,
-			       u64 alignment,
-			       u64 flags)
+static int
+i915_vma_insert(struct i915_vma *vma, u64 size, u64 alignment, u64 flags)
 {
+	struct drm_i915_gem_object *obj = vma->obj;
 	struct drm_device *dev = obj->base.dev;
 	struct drm_i915_private *dev_priv = to_i915(dev);
-	struct i915_vma *vma;
 	u64 start, end;
 	u64 min_alignment;
 	int ret;
 
-	vma = ggtt_view ?
-		i915_gem_obj_lookup_or_create_ggtt_vma(obj, ggtt_view) :
-		i915_gem_obj_lookup_or_create_vma(obj, vm);
-	if (IS_ERR(vma))
-		return vma;
+	GEM_BUG_ON(vma->bound);
+	GEM_BUG_ON(drm_mm_node_allocated(&vma->node));
 
 	size = max(size, vma->size);
 	if (flags & PIN_MAPPABLE)
@@ -3002,7 +2994,7 @@ i915_gem_object_insert_into_vm(struct drm_i915_gem_object *obj,
 	if (alignment & (min_alignment - 1)) {
 		DRM_DEBUG("Invalid object alignment requested %llu, minimum %llu\n",
 			  alignment, min_alignment);
-		return ERR_PTR(-EINVAL);
+		return -EINVAL;
 	}
 
 	start = flags & PIN_OFFSET_BIAS ? flags & PIN_OFFSET_MASK : 0;
@@ -3022,17 +3014,17 @@ i915_gem_object_insert_into_vm(struct drm_i915_gem_object *obj,
 			  size, obj->base.size,
 			  flags & PIN_MAPPABLE ? "mappable" : "total",
 			  end);
-		return ERR_PTR(-E2BIG);
+		return -E2BIG;
 	}
 
 	ret = i915_gem_object_get_pages(obj);
 	if (ret)
-		return ERR_PTR(ret);
+		return ret;
 
 	i915_gem_object_pin_pages(obj);
 
 	if (flags & PIN_OFFSET_FIXED) {
-		uint64_t offset = flags & PIN_OFFSET_MASK;
+		u64 offset = flags & PIN_OFFSET_MASK;
 		if (offset & (alignment - 1) || offset > end - size) {
 			ret = -EINVAL;
 			goto err_unpin;
@@ -3088,11 +3080,11 @@ search_free:
 	list_move_tail(&vma->vm_link, &vma->vm->inactive_list);
 	obj->bind_count++;
 
-	return vma;
+	return 0;
 
 err_unpin:
 	i915_gem_object_unpin_pages(obj);
-	return ERR_PTR(ret);
+	return ret;
 }
 
 bool
@@ -3657,6 +3649,9 @@ i915_vma_misplaced(struct i915_vma *vma, u64 size, u64 alignment, u64 flags)
 {
 	struct drm_i915_gem_object *obj = vma->obj;
 
+	if (!drm_mm_node_allocated(&vma->node))
+		return false;
+
 	if (vma->node.size < size)
 		return true;
 
@@ -3700,91 +3695,42 @@ void __i915_vma_set_map_and_fenceable(struct i915_vma *vma)
 	obj->map_and_fenceable = mappable && fenceable;
 }
 
-static int
-i915_gem_object_do_pin(struct drm_i915_gem_object *obj,
-		       struct i915_address_space *vm,
-		       const struct i915_ggtt_view *ggtt_view,
-		       u64 size,
-		       u64 alignment,
-		       u64 flags)
+int
+i915_vma_pin(struct i915_vma *vma, u64 size, u64 alignment, u64 flags)
 {
-	struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
-	struct i915_vma *vma;
-	unsigned bound;
+	unsigned int bound = vma->bound;
 	int ret;
 
-	if (WARN_ON(vm == &dev_priv->mm.aliasing_ppgtt->base))
-		return -ENODEV;
-
-	if (WARN_ON(flags & (PIN_GLOBAL | PIN_MAPPABLE) && !i915_is_ggtt(vm)))
-		return -EINVAL;
-
-	if (WARN_ON((flags & (PIN_MAPPABLE | PIN_GLOBAL)) == PIN_MAPPABLE))
-		return -EINVAL;
-
-	if (WARN_ON(i915_is_ggtt(vm) != !!ggtt_view))
-		return -EINVAL;
-
-	vma = ggtt_view ? i915_gem_obj_to_ggtt_view(obj, ggtt_view) :
-			  i915_gem_obj_to_vma(obj, vm);
+	GEM_BUG_ON((flags & (PIN_GLOBAL | PIN_USER)) == 0);
+	GEM_BUG_ON((flags & PIN_GLOBAL) && !vma->is_ggtt);
 
-	if (vma) {
-		if (WARN_ON(i915_vma_pin_count(vma) == DRM_I915_GEM_OBJECT_MAX_PIN_COUNT))
-			return -EBUSY;
-
-		if (i915_vma_misplaced(vma, size, alignment, flags)) {
-			WARN(i915_vma_is_pinned(vma),
-			     "bo is already pinned in %s with incorrect alignment:"
-			     " offset=%08x %08x, req.alignment=%llx, req.map_and_fenceable=%d,"
-			     " obj->map_and_fenceable=%d\n",
-			     ggtt_view ? "ggtt" : "ppgtt",
-			     upper_32_bits(vma->node.start),
-			     lower_32_bits(vma->node.start),
-			     (long long)alignment,
-			     !!(flags & PIN_MAPPABLE),
-			     obj->map_and_fenceable);
-			ret = i915_vma_unbind(vma);
-			if (ret)
-				return ret;
+	if (WARN_ON(i915_vma_pin_count(vma) == DRM_I915_GEM_OBJECT_MAX_PIN_COUNT))
+		return -EBUSY;
 
-			vma = NULL;
-		}
-	}
+	/* Pin early to prevent the shrinker/eviction logic from destroying
+	 * our vma as we insert and bind.
+	 */
+	__i915_vma_pin(vma);
 
-	if (vma == NULL || !drm_mm_node_allocated(&vma->node)) {
-		vma = i915_gem_object_insert_into_vm(obj, vm, ggtt_view,
-						     size, alignment, flags);
-		if (IS_ERR(vma))
-			return PTR_ERR(vma);
+	if (!bound) {
+		ret = i915_vma_insert(vma, size, alignment, flags);
+		if (ret)
+			goto err;
 	}
 
-	bound = vma->bound;
-	ret = i915_vma_bind(vma, obj->cache_level, flags);
+	ret = i915_vma_bind(vma, vma->obj->cache_level, flags);
 	if (ret)
-		return ret;
+		goto err;
 
-	if (ggtt_view && ggtt_view->type == I915_GGTT_VIEW_NORMAL &&
-	    (bound ^ vma->bound) & GLOBAL_BIND) {
+	if ((bound ^ vma->bound) & GLOBAL_BIND)
 		__i915_vma_set_map_and_fenceable(vma);
-		WARN_ON(flags & PIN_MAPPABLE && !obj->map_and_fenceable);
-	}
 
 	GEM_BUG_ON(i915_vma_misplaced(vma, size, alignment, flags));
-
-	__i915_vma_pin(vma);
 	return 0;
-}
 
-int
-i915_gem_object_pin(struct drm_i915_gem_object *obj,
-		    struct i915_address_space *vm,
-		    u64 size,
-		    u64 alignment,
-		    u64 flags)
-{
-	return i915_gem_object_do_pin(obj, vm,
-				      i915_is_ggtt(vm) ? &i915_ggtt_view_normal : NULL,
-				      size, alignment, flags);
+err:
+	__i915_vma_unpin(vma);
+	return ret;
 }
 
 int
@@ -3794,14 +3740,35 @@ i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj,
 			 u64 alignment,
 			 u64 flags)
 {
-	struct drm_device *dev = obj->base.dev;
-	struct drm_i915_private *dev_priv = to_i915(dev);
-	struct i915_ggtt *ggtt = &dev_priv->ggtt;
+	struct i915_vma *vma;
+	int ret;
 
 	BUG_ON(!view);
 
-	return i915_gem_object_do_pin(obj, &ggtt->base, view,
-				      size, alignment, flags | PIN_GLOBAL);
+	vma = i915_gem_obj_lookup_or_create_ggtt_vma(obj, view);
+	if (IS_ERR(vma))
+		return PTR_ERR(vma);
+
+	if (i915_vma_misplaced(vma, size, alignment, flags)) {
+		if (flags & PIN_NONBLOCK &&
+		    (i915_vma_is_pinned(vma) || i915_vma_is_active(vma)))
+			return -ENOSPC;
+
+		WARN(i915_vma_is_pinned(vma),
+		     "bo is already pinned in ggtt with incorrect alignment:"
+		     " offset=%08x %08x, req.alignment=%llx, req.map_and_fenceable=%d,"
+		     " obj->map_and_fenceable=%d\n",
+		     upper_32_bits(vma->node.start),
+		     lower_32_bits(vma->node.start),
+		     (long long)alignment,
+		     !!(flags & PIN_MAPPABLE),
+		     obj->map_and_fenceable);
+		ret = i915_vma_unbind(vma);
+		if (ret)
+			return ret;
+	}
+
+	return i915_vma_pin(vma, size, alignment, flags | PIN_GLOBAL);
 }
 
 void
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 9ea99e181997..a0759fe613f8 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -45,11 +45,10 @@
 struct i915_execbuffer_params {
 	struct drm_device               *dev;
 	struct drm_file                 *file;
-	u32				 dispatch_flags;
-	u32				 args_batch_start_offset;
-	u32				 batch_obj_vm_offset;
+	struct i915_vma			*batch;
+	u32				dispatch_flags;
+	u32				args_batch_start_offset;
 	struct intel_engine_cs          *engine;
-	struct drm_i915_gem_object      *batch_obj;
 	struct i915_gem_context         *ctx;
 	struct drm_i915_gem_request     *request;
 };
@@ -102,6 +101,26 @@ eb_reset(struct eb_vmas *eb)
 		memset(eb->buckets, 0, (eb->and+1)*sizeof(struct hlist_head));
 }
 
+static struct i915_vma *
+eb_get_batch(struct eb_vmas *eb)
+{
+	struct i915_vma *vma = list_entry(eb->vmas.prev, typeof(*vma), exec_list);
+
+	/*
+	 * SNA is doing fancy tricks with compressing batch buffers, which leads
+	 * to negative relocation deltas. Usually that works out ok since the
+	 * relocate address is still positive, except when the batch is placed
+	 * very low in the GTT. Ensure this doesn't happen.
+	 *
+	 * Note that actual hangs have only been observed on gen7, but for
+	 * paranoia do it everywhere.
+	 */
+	if ((vma->exec_entry->flags & EXEC_OBJECT_PINNED) == 0)
+		vma->exec_entry->flags |= __EXEC_OBJECT_NEEDS_BIAS;
+
+	return vma;
+}
+
 static int
 eb_lookup_vmas(struct eb_vmas *eb,
 	       struct drm_i915_gem_exec_object2 *exec,
@@ -198,35 +217,6 @@ err:
 	return ret;
 }
 
-static inline struct i915_vma *
-eb_get_batch_vma(struct eb_vmas *eb)
-{
-	/* The batch is always the LAST item in the VMA list */
-	struct i915_vma *vma = list_last_entry(&eb->vmas, typeof(*vma), exec_list);
-
-	return vma;
-}
-
-static struct drm_i915_gem_object *
-eb_get_batch(struct eb_vmas *eb)
-{
-	struct i915_vma *vma = eb_get_batch_vma(eb);
-
-	/*
-	 * SNA is doing fancy tricks with compressing batch buffers, which leads
-	 * to negative relocation deltas. Usually that works out ok since the
-	 * relocate address is still positive, except when the batch is placed
-	 * very low in the GTT. Ensure this doesn't happen.
-	 *
-	 * Note that actual hangs have only been observed on gen7, but for
-	 * paranoia do it everywhere.
-	 */
-	if ((vma->exec_entry->flags & EXEC_OBJECT_PINNED) == 0)
-		vma->exec_entry->flags |= __EXEC_OBJECT_NEEDS_BIAS;
-
-	return vma->obj;
-}
-
 static struct i915_vma *eb_get_vma(struct eb_vmas *eb, unsigned long handle)
 {
 	if (eb->and < 0) {
@@ -682,16 +672,16 @@ i915_gem_execbuffer_reserve_vma(struct i915_vma *vma,
 			flags |= PIN_HIGH;
 	}
 
-	ret = i915_gem_object_pin(obj, vma->vm,
-				  entry->pad_to_size,
-				  entry->alignment,
-				  flags);
-	if ((ret == -ENOSPC  || ret == -E2BIG) &&
+	ret = i915_vma_pin(vma,
+			   entry->pad_to_size,
+			   entry->alignment,
+			   flags);
+	if ((ret == -ENOSPC || ret == -E2BIG) &&
 	    only_mappable_for_reloc(entry->flags))
-		ret = i915_gem_object_pin(obj, vma->vm,
-					  entry->pad_to_size,
-					  entry->alignment,
-					  flags & ~PIN_MAPPABLE);
+		ret = i915_vma_pin(vma,
+				   entry->pad_to_size,
+				   entry->alignment,
+				   flags & ~PIN_MAPPABLE);
 	if (ret)
 		return ret;
 
@@ -1251,11 +1241,11 @@ i915_reset_gen7_sol_offsets(struct drm_i915_gem_request *req)
 	return 0;
 }
 
-static struct drm_i915_gem_object*
+static struct i915_vma*
 i915_gem_execbuffer_parse(struct intel_engine_cs *engine,
 			  struct drm_i915_gem_exec_object2 *shadow_exec_entry,
-			  struct eb_vmas *eb,
 			  struct drm_i915_gem_object *batch_obj,
+			  struct eb_vmas *eb,
 			  u32 batch_start_offset,
 			  u32 batch_len,
 			  bool is_master)
@@ -1267,7 +1257,7 @@ i915_gem_execbuffer_parse(struct intel_engine_cs *engine,
 	shadow_batch_obj = i915_gem_batch_pool_get(&engine->batch_pool,
 						   PAGE_ALIGN(batch_len));
 	if (IS_ERR(shadow_batch_obj))
-		return shadow_batch_obj;
+		return ERR_CAST(shadow_batch_obj);
 
 	ret = intel_engine_cmd_parser(engine,
 				      batch_obj,
@@ -1292,14 +1282,12 @@ i915_gem_execbuffer_parse(struct intel_engine_cs *engine,
 	i915_gem_object_get(shadow_batch_obj);
 	list_add_tail(&vma->exec_list, &eb->vmas);
 
-	shadow_batch_obj->base.pending_read_domains = I915_GEM_DOMAIN_COMMAND;
-
-	return shadow_batch_obj;
+	return vma;
 
 err:
 	i915_gem_object_unpin_pages(shadow_batch_obj);
 	if (ret == -EACCES) /* unhandled chained batch */
-		return batch_obj;
+		return NULL;
 	else
 		return ERR_PTR(ret);
 }
@@ -1380,11 +1368,11 @@ execbuf_submit(struct i915_execbuffer_params *params,
 	}
 
 	exec_len   = args->batch_len;
-	exec_start = params->batch_obj_vm_offset +
+	exec_start = params->batch->node.start +
 		     params->args_batch_start_offset;
 
 	if (exec_len == 0)
-		exec_len = params->batch_obj->base.size;
+		exec_len = params->batch->size;
 
 	ret = params->engine->emit_bb_start(params->request,
 					    exec_start, exec_len,
@@ -1488,7 +1476,6 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
 	struct drm_i915_private *dev_priv = to_i915(dev);
 	struct i915_ggtt *ggtt = &dev_priv->ggtt;
 	struct eb_vmas *eb;
-	struct drm_i915_gem_object *batch_obj;
 	struct drm_i915_gem_exec_object2 shadow_exec_entry;
 	struct intel_engine_cs *engine;
 	struct i915_gem_context *ctx;
@@ -1582,7 +1569,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
 		goto err;
 
 	/* take note of the batch buffer before we might reorder the lists */
-	batch_obj = eb_get_batch(eb);
+	params->batch = eb_get_batch(eb);
 
 	/* Move the objects en-masse into the GTT, evicting if necessary. */
 	need_relocs = (args->flags & I915_EXEC_NO_RELOC) == 0;
@@ -1606,7 +1593,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
 	}
 
 	/* Set the pending read domains for the batch buffer to COMMAND */
-	if (batch_obj->base.pending_write_domain) {
+	if (params->batch->obj->base.pending_write_domain) {
 		DRM_DEBUG("Attempting to use self-modifying batch buffer\n");
 		ret = -EINVAL;
 		goto err;
@@ -1614,26 +1601,20 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
 
 	params->args_batch_start_offset = args->batch_start_offset;
 	if (intel_engine_needs_cmd_parser(engine) && args->batch_len) {
-		struct drm_i915_gem_object *parsed_batch_obj;
-
-		parsed_batch_obj = i915_gem_execbuffer_parse(engine,
-							     &shadow_exec_entry,
-							     eb,
-							     batch_obj,
-							     args->batch_start_offset,
-							     args->batch_len,
-							     drm_is_current_master(file));
-		if (IS_ERR(parsed_batch_obj)) {
-			ret = PTR_ERR(parsed_batch_obj);
+		struct i915_vma *vma;
+
+		vma = i915_gem_execbuffer_parse(engine, &shadow_exec_entry,
+						params->batch->obj,
+						eb,
+						args->batch_start_offset,
+						args->batch_len,
+						drm_is_current_master(file));
+		if (IS_ERR(vma)) {
+			ret = PTR_ERR(vma);
 			goto err;
 		}
 
-		/*
-		 * parsed_batch_obj == batch_obj means batch not fully parsed:
-		 * Accept, but don't promote to secure.
-		 */
-
-		if (parsed_batch_obj != batch_obj) {
+		if (vma) {
 			/*
 			 * Batch parsed and accepted:
 			 *
@@ -1645,16 +1626,18 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
 			 */
 			dispatch_flags |= I915_DISPATCH_SECURE;
 			params->args_batch_start_offset = 0;
-			batch_obj = parsed_batch_obj;
+			params->batch = vma;
 		}
 	}
 
-	batch_obj->base.pending_read_domains |= I915_GEM_DOMAIN_COMMAND;
+	params->batch->obj->base.pending_read_domains |= I915_GEM_DOMAIN_COMMAND;
 
 	/* snb/ivb/vlv conflate the "batch in ppgtt" bit with the "non-secure
 	 * batch" bit. Hence we need to pin secure batches into the global gtt.
 	 * hsw should have this fixed, but bdw mucks it up again. */
 	if (dispatch_flags & I915_DISPATCH_SECURE) {
+		struct drm_i915_gem_object *obj = params->batch->obj;
+
 		/*
 		 * So on first glance it looks freaky that we pin the batch here
 		 * outside of the reservation loop. But:
@@ -1665,13 +1648,12 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
 		 *   fitting due to fragmentation.
 		 * So this is actually safe.
 		 */
-		ret = i915_gem_obj_ggtt_pin(batch_obj, 0, 0);
+		ret = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, 0);
 		if (ret)
 			goto err;
 
-		params->batch_obj_vm_offset = i915_gem_obj_ggtt_offset(batch_obj);
-	} else
-		params->batch_obj_vm_offset = i915_gem_obj_offset(batch_obj, vm);
+		params->batch = i915_gem_obj_to_ggtt(obj);
+	}
 
 	/* Allocate a request for this batch buffer nice and early. */
 	params->request = i915_gem_request_alloc(engine, ctx);
@@ -1694,12 +1676,11 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
 	params->file                    = file;
 	params->engine                    = engine;
 	params->dispatch_flags          = dispatch_flags;
-	params->batch_obj               = batch_obj;
 	params->ctx                     = ctx;
 
 	ret = execbuf_submit(params, args, &eb->vmas);
 err_request:
-	__i915_add_request(params->request, params->batch_obj, ret == 0);
+	__i915_add_request(params->request, params->batch->obj, ret == 0);
 
 err_batch_unpin:
 	/*
@@ -1709,8 +1690,7 @@ err_batch_unpin:
 	 * active.
 	 */
 	if (dispatch_flags & I915_DISPATCH_SECURE)
-		i915_gem_object_ggtt_unpin(batch_obj);
-
+		i915_vma_unpin(params->batch);
 err:
 	/* the request owns the ref now */
 	i915_gem_context_put(ctx);
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index bab2bbdb1fb5..d93c7e04b21e 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -3657,13 +3657,10 @@ int i915_vma_bind(struct i915_vma *vma, enum i915_cache_level cache_level,
 		return 0;
 
 	if (vma->bound == 0 && vma->vm->allocate_va_range) {
-		/* XXX: i915_vma_pin() will fix this +- hack */
-		__i915_vma_pin(vma);
 		trace_i915_va_alloc(vma);
 		ret = vma->vm->allocate_va_range(vma->vm,
 						 vma->node.start,
 						 vma->node.size);
-		__i915_vma_unpin(vma);
 		if (ret)
 			return ret;
 	}
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h
index a5bab5f061f1..9ed134576b2d 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.h
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.h
@@ -606,6 +606,20 @@ i915_ggtt_view_equal(const struct i915_ggtt_view *a,
 	return true;
 }
 
+int __must_check
+i915_vma_pin(struct i915_vma *vma, u64 size, u64 alignment, u64 flags);
+/* Flags used by pin/bind&friends. */
+#define PIN_MAPPABLE		BIT(0)
+#define PIN_NONBLOCK		BIT(1)
+#define PIN_GLOBAL		BIT(2)
+#define PIN_OFFSET_BIAS		BIT(3)
+#define PIN_USER		BIT(4)
+#define PIN_UPDATE		BIT(5)
+#define PIN_ZONE_4G		BIT(6)
+#define PIN_HIGH		BIT(7)
+#define PIN_OFFSET_FIXED	BIT(8)
+#define PIN_OFFSET_MASK		(~4095)
+
 static inline int i915_vma_pin_count(const struct i915_vma *vma)
 {
 	return vma->pin_count;
-- 
2.8.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 95+ messages in thread

* [PATCH 13/22] drm/i915: Combine all i915_vma bitfields into a single set of flags
  2016-07-27 11:14 Getting to RCU and exporting fences Chris Wilson
                   ` (11 preceding siblings ...)
  2016-07-27 11:14 ` [PATCH 12/22] drm/i915: Start passing around i915_vma from execbuffer Chris Wilson
@ 2016-07-27 11:14 ` Chris Wilson
  2016-07-29  7:30   ` Joonas Lahtinen
  2016-07-27 11:14 ` [PATCH 14/22] drm/i915: Make i915_vma_pin() small and inline Chris Wilson
                   ` (10 subsequent siblings)
  23 siblings, 1 reply; 95+ messages in thread
From: Chris Wilson @ 2016-07-27 11:14 UTC (permalink / raw)
  To: intel-gfx

In preparation to perform some magic to speed up i915_vma_pin(), which
is among the hottest of hot paths in execbuf, refactor all the bitfields
accessed by i915_vma_pin() into a single unified set of flags.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_debugfs.c        |  8 ++---
 drivers/gpu/drm/i915/i915_gem.c            | 40 ++++++++++++----------
 drivers/gpu/drm/i915/i915_gem_context.c    |  2 +-
 drivers/gpu/drm/i915/i915_gem_execbuffer.c |  5 +--
 drivers/gpu/drm/i915/i915_gem_gtt.c        | 46 ++++++++++++-------------
 drivers/gpu/drm/i915/i915_gem_gtt.h        | 54 ++++++++++++++++++------------
 drivers/gpu/drm/i915/i915_gem_stolen.c     |  2 +-
 drivers/gpu/drm/i915/i915_gpu_error.c      |  4 +--
 8 files changed, 88 insertions(+), 73 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index e6428feffc56..fcfa9ca6b50a 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -125,7 +125,7 @@ static u64 i915_gem_obj_total_ggtt_size(struct drm_i915_gem_object *obj)
 	struct i915_vma *vma;
 
 	list_for_each_entry(vma, &obj->vma_list, obj_link) {
-		if (vma->is_ggtt && drm_mm_node_allocated(&vma->node))
+		if (i915_vma_is_ggtt(vma) && drm_mm_node_allocated(&vma->node))
 			size += vma->node.size;
 	}
 
@@ -181,9 +181,9 @@ describe_obj(struct seq_file *m, struct drm_i915_gem_object *obj)
 			continue;
 
 		seq_printf(m, " (%sgtt offset: %08llx, size: %08llx",
-			   vma->is_ggtt ? "g" : "pp",
+			   i915_vma_is_ggtt(vma) ? "g" : "pp",
 			   vma->node.start, vma->node.size);
-		if (vma->is_ggtt)
+		if (i915_vma_is_ggtt(vma))
 			seq_printf(m, ", type: %u", vma->ggtt_view.type);
 		seq_puts(m, ")");
 	}
@@ -356,7 +356,7 @@ static int per_file_stats(int id, void *ptr, void *data)
 		if (!drm_mm_node_allocated(&vma->node))
 			continue;
 
-		if (vma->is_ggtt) {
+		if (i915_vma_is_ggtt(vma)) {
 			stats->global += vma->node.size;
 		} else {
 			struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vma->vm);
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 3b7b44e74301..eeea7dab70f1 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2859,7 +2859,8 @@ int i915_vma_unbind(struct i915_vma *vma)
 	GEM_BUG_ON(obj->bind_count == 0);
 	GEM_BUG_ON(!obj->pages);
 
-	if (vma->is_ggtt && vma->ggtt_view.type == I915_GGTT_VIEW_NORMAL) {
+	if (i915_vma_is_ggtt(vma) &&
+	    vma->ggtt_view.type == I915_GGTT_VIEW_NORMAL) {
 		i915_gem_object_finish_gtt(obj);
 
 		/* release the fence reg _after_ flushing */
@@ -2874,12 +2875,12 @@ int i915_vma_unbind(struct i915_vma *vma)
 		trace_i915_vma_unbind(vma);
 		vma->vm->unbind_vma(vma);
 	}
-	vma->bound = 0;
+	vma->flags &= ~(I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND);
 
 	drm_mm_remove_node(&vma->node);
 	list_move_tail(&vma->vm_link, &vma->vm->unbound_list);
 
-	if (vma->is_ggtt) {
+	if (i915_vma_is_ggtt(vma)) {
 		if (vma->ggtt_view.type == I915_GGTT_VIEW_NORMAL) {
 			obj->map_and_fenceable = false;
 		} else if (vma->ggtt_view.pages) {
@@ -2902,7 +2903,7 @@ int i915_vma_unbind(struct i915_vma *vma)
 	i915_gem_object_unpin_pages(obj);
 
 destroy:
-	if (unlikely(vma->closed))
+	if (unlikely(i915_vma_is_closed(vma)))
 		i915_vma_destroy(vma);
 
 	return 0;
@@ -2979,7 +2980,7 @@ i915_vma_insert(struct i915_vma *vma, u64 size, u64 alignment, u64 flags)
 	u64 min_alignment;
 	int ret;
 
-	GEM_BUG_ON(vma->bound);
+	GEM_BUG_ON(vma->flags & (I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND));
 	GEM_BUG_ON(drm_mm_node_allocated(&vma->node));
 
 	size = max(size, vma->size);
@@ -3698,13 +3699,14 @@ void __i915_vma_set_map_and_fenceable(struct i915_vma *vma)
 int
 i915_vma_pin(struct i915_vma *vma, u64 size, u64 alignment, u64 flags)
 {
-	unsigned int bound = vma->bound;
+	unsigned int bound;
 	int ret;
 
 	GEM_BUG_ON((flags & (PIN_GLOBAL | PIN_USER)) == 0);
-	GEM_BUG_ON((flags & PIN_GLOBAL) && !vma->is_ggtt);
+	GEM_BUG_ON((flags & PIN_GLOBAL) && !i915_vma_is_ggtt(vma));
 
-	if (WARN_ON(i915_vma_pin_count(vma) == DRM_I915_GEM_OBJECT_MAX_PIN_COUNT))
+	bound = vma->flags;
+	if (WARN_ON((bound & I915_VMA_PIN_MASK) == I915_VMA_PIN_MASK))
 		return -EBUSY;
 
 	/* Pin early to prevent the shrinker/eviction logic from destroying
@@ -3712,7 +3714,7 @@ i915_vma_pin(struct i915_vma *vma, u64 size, u64 alignment, u64 flags)
 	 */
 	__i915_vma_pin(vma);
 
-	if (!bound) {
+	if ((bound & (I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND)) == 0) {
 		ret = i915_vma_insert(vma, size, alignment, flags);
 		if (ret)
 			goto err;
@@ -3722,7 +3724,7 @@ i915_vma_pin(struct i915_vma *vma, u64 size, u64 alignment, u64 flags)
 	if (ret)
 		goto err;
 
-	if ((bound ^ vma->bound) & GLOBAL_BIND)
+	if ((bound ^ vma->flags) & I915_VMA_GLOBAL_BIND)
 		__i915_vma_set_map_and_fenceable(vma);
 
 	GEM_BUG_ON(i915_vma_misplaced(vma, size, alignment, flags));
@@ -4025,9 +4027,9 @@ void i915_gem_free_object(struct drm_gem_object *gem_obj)
 	 * unbound now.
 	 */
 	list_for_each_entry_safe(vma, next, &obj->vma_list, obj_link) {
-		GEM_BUG_ON(!vma->is_ggtt);
+		GEM_BUG_ON(!i915_vma_is_ggtt(vma));
 		GEM_BUG_ON(i915_vma_is_active(vma));
-		vma->pin_count = 0;
+		vma->flags &= ~I915_VMA_PIN_MASK;
 		i915_vma_close(vma);
 	}
 	GEM_BUG_ON(obj->bind_count);
@@ -4087,7 +4089,8 @@ struct i915_vma *i915_gem_obj_to_ggtt_view(struct drm_i915_gem_object *obj,
 	GEM_BUG_ON(!view);
 
 	list_for_each_entry(vma, &obj->vma_list, obj_link)
-		if (vma->is_ggtt && i915_ggtt_view_equal(&vma->ggtt_view, view))
+		if (i915_vma_is_ggtt(vma) &&
+		    i915_ggtt_view_equal(&vma->ggtt_view, view))
 			return vma;
 	return NULL;
 }
@@ -4577,7 +4580,7 @@ u64 i915_gem_obj_offset(struct drm_i915_gem_object *o,
 	WARN_ON(vm == &dev_priv->mm.aliasing_ppgtt->base);
 
 	list_for_each_entry(vma, &o->vma_list, obj_link) {
-		if (vma->is_ggtt &&
+		if (i915_vma_is_ggtt(vma) &&
 		    vma->ggtt_view.type != I915_GGTT_VIEW_NORMAL)
 			continue;
 		if (vma->vm == vm)
@@ -4595,7 +4598,8 @@ u64 i915_gem_obj_ggtt_offset_view(struct drm_i915_gem_object *o,
 	struct i915_vma *vma;
 
 	list_for_each_entry(vma, &o->vma_list, obj_link)
-		if (vma->is_ggtt && i915_ggtt_view_equal(&vma->ggtt_view, view))
+		if (i915_vma_is_ggtt(vma) &&
+		    i915_ggtt_view_equal(&vma->ggtt_view, view))
 			return vma->node.start;
 
 	WARN(1, "global vma for this object not found. (view=%u)\n", view->type);
@@ -4608,7 +4612,7 @@ bool i915_gem_obj_bound(struct drm_i915_gem_object *o,
 	struct i915_vma *vma;
 
 	list_for_each_entry(vma, &o->vma_list, obj_link) {
-		if (vma->is_ggtt &&
+		if (i915_vma_is_ggtt(vma) &&
 		    vma->ggtt_view.type != I915_GGTT_VIEW_NORMAL)
 			continue;
 		if (vma->vm == vm && drm_mm_node_allocated(&vma->node))
@@ -4624,7 +4628,7 @@ bool i915_gem_obj_ggtt_bound_view(struct drm_i915_gem_object *o,
 	struct i915_vma *vma;
 
 	list_for_each_entry(vma, &o->vma_list, obj_link)
-		if (vma->is_ggtt &&
+		if (i915_vma_is_ggtt(vma) &&
 		    i915_ggtt_view_equal(&vma->ggtt_view, view) &&
 		    drm_mm_node_allocated(&vma->node))
 			return true;
@@ -4639,7 +4643,7 @@ unsigned long i915_gem_obj_ggtt_size(struct drm_i915_gem_object *o)
 	GEM_BUG_ON(list_empty(&o->vma_list));
 
 	list_for_each_entry(vma, &o->vma_list, obj_link) {
-		if (vma->is_ggtt &&
+		if (i915_vma_is_ggtt(vma) &&
 		    vma->ggtt_view.type == I915_GGTT_VIEW_NORMAL)
 			return vma->node.size;
 	}
diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
index eff6d3953ecd..dc7c0ae73b62 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -219,7 +219,7 @@ static void i915_ppgtt_close(struct i915_address_space *vm)
 		struct i915_vma *vma, *vn;
 
 		list_for_each_entry_safe(vma, vn, *phase, vm_link)
-			if (!vma->closed)
+			if (!i915_vma_is_closed(vma))
 				i915_vma_close(vma);
 	}
 }
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index a0759fe613f8..7755cf339298 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -717,7 +717,7 @@ need_reloc_mappable(struct i915_vma *vma)
 	if (entry->relocation_count == 0)
 		return false;
 
-	if (!vma->is_ggtt)
+	if (!i915_vma_is_ggtt(vma))
 		return false;
 
 	/* See also use_cpu_reloc() */
@@ -736,7 +736,8 @@ eb_vma_misplaced(struct i915_vma *vma)
 	struct drm_i915_gem_exec_object2 *entry = vma->exec_entry;
 	struct drm_i915_gem_object *obj = vma->obj;
 
-	WARN_ON(entry->flags & __EXEC_OBJECT_NEEDS_MAP && !vma->is_ggtt);
+	WARN_ON(entry->flags & __EXEC_OBJECT_NEEDS_MAP &&
+		!i915_vma_is_ggtt(vma));
 
 	if (entry->alignment &&
 	    vma->node.start & (entry->alignment - 1))
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index d93c7e04b21e..e140e5f2c26c 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -2654,7 +2654,7 @@ static int ggtt_bind_vma(struct i915_vma *vma,
 	 * GLOBAL/LOCAL_BIND, it's all the same ptes. Hence unconditionally
 	 * upgrade to both bound if we bind either to avoid double-binding.
 	 */
-	vma->bound |= GLOBAL_BIND | LOCAL_BIND;
+	vma->flags |= I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND;
 
 	return 0;
 }
@@ -2676,14 +2676,14 @@ static int aliasing_gtt_bind_vma(struct i915_vma *vma,
 		pte_flags |= PTE_READ_ONLY;
 
 
-	if (flags & GLOBAL_BIND) {
+	if (flags & I915_VMA_GLOBAL_BIND) {
 		vma->vm->insert_entries(vma->vm,
 					vma->ggtt_view.pages,
 					vma->node.start,
 					cache_level, pte_flags);
 	}
 
-	if (flags & LOCAL_BIND) {
+	if (flags & I915_VMA_LOCAL_BIND) {
 		struct i915_hw_ppgtt *appgtt =
 			to_i915(vma->vm->dev)->mm.aliasing_ppgtt;
 		appgtt->base.insert_entries(&appgtt->base,
@@ -2700,12 +2700,12 @@ static void ggtt_unbind_vma(struct i915_vma *vma)
 	struct i915_hw_ppgtt *appgtt = to_i915(vma->vm->dev)->mm.aliasing_ppgtt;
 	const u64 size = min(vma->size, vma->node.size);
 
-	if (vma->bound & GLOBAL_BIND)
+	if (vma->flags & I915_VMA_GLOBAL_BIND)
 		vma->vm->clear_range(vma->vm,
 				     vma->node.start, size,
 				     true);
 
-	if (vma->bound & LOCAL_BIND && appgtt)
+	if (vma->flags & I915_VMA_LOCAL_BIND && appgtt)
 		appgtt->base.clear_range(&appgtt->base,
 					 vma->node.start, size,
 					 true);
@@ -3327,7 +3327,7 @@ i915_vma_retire(struct i915_gem_active *active,
 		return;
 
 	list_move_tail(&vma->vm_link, &vma->vm->inactive_list);
-	if (unlikely(vma->closed && !i915_vma_is_pinned(vma)))
+	if (unlikely(i915_vma_is_closed(vma) && !i915_vma_is_pinned(vma)))
 		WARN_ON(i915_vma_unbind(vma));
 }
 
@@ -3335,10 +3335,10 @@ void i915_vma_destroy(struct i915_vma *vma)
 {
 	GEM_BUG_ON(vma->node.allocated);
 	GEM_BUG_ON(i915_vma_is_active(vma));
-	GEM_BUG_ON(!vma->closed);
+	GEM_BUG_ON(!i915_vma_is_closed(vma));
 
 	list_del(&vma->vm_link);
-	if (!vma->is_ggtt)
+	if (!i915_vma_is_ggtt(vma))
 		i915_ppgtt_put(i915_vm_to_ppgtt(vma->vm));
 
 	kmem_cache_free(to_i915(vma->obj->base.dev)->vmas, vma);
@@ -3346,8 +3346,8 @@ void i915_vma_destroy(struct i915_vma *vma)
 
 void i915_vma_close(struct i915_vma *vma)
 {
-	GEM_BUG_ON(vma->closed);
-	vma->closed = true;
+	GEM_BUG_ON(i915_vma_is_closed(vma));
+	vma->flags |= I915_VMA_CLOSED;
 
 	list_del_init(&vma->obj_link);
 	if (!i915_vma_is_active(vma) && !i915_vma_is_pinned(vma))
@@ -3379,9 +3379,9 @@ __i915_gem_vma_create(struct drm_i915_gem_object *obj,
 	vma->vm = vm;
 	vma->obj = obj;
 	vma->size = obj->base.size;
-	vma->is_ggtt = i915_is_ggtt(vm);
 
 	if (i915_is_ggtt(vm)) {
+		vma->flags |= I915_VMA_GGTT;
 		vma->ggtt_view = *view;
 		if (view->type == I915_GGTT_VIEW_PARTIAL) {
 			vma->size = view->params.partial.size;
@@ -3425,7 +3425,7 @@ i915_gem_obj_lookup_or_create_ggtt_vma(struct drm_i915_gem_object *obj,
 	if (!vma)
 		vma = __i915_gem_vma_create(obj, &ggtt->base, view);
 
-	GEM_BUG_ON(vma->closed);
+	GEM_BUG_ON(i915_vma_is_closed(vma));
 	return vma;
 
 }
@@ -3636,27 +3636,28 @@ i915_get_ggtt_vma_pages(struct i915_vma *vma)
 int i915_vma_bind(struct i915_vma *vma, enum i915_cache_level cache_level,
 		  u32 flags)
 {
-	int ret;
 	u32 bind_flags;
+	u32 vma_flags;
+	int ret;
 
 	if (WARN_ON(flags == 0))
 		return -EINVAL;
 
 	bind_flags = 0;
 	if (flags & PIN_GLOBAL)
-		bind_flags |= GLOBAL_BIND;
+		bind_flags |= I915_VMA_GLOBAL_BIND;
 	if (flags & PIN_USER)
-		bind_flags |= LOCAL_BIND;
+		bind_flags |= I915_VMA_LOCAL_BIND;
 
+	vma_flags = vma->flags & (I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND);
 	if (flags & PIN_UPDATE)
-		bind_flags |= vma->bound;
+		bind_flags |= vma_flags;
 	else
-		bind_flags &= ~vma->bound;
-
+		bind_flags &= ~vma_flags;
 	if (bind_flags == 0)
 		return 0;
 
-	if (vma->bound == 0 && vma->vm->allocate_va_range) {
+	if (vma_flags == 0 && vma->vm->allocate_va_range) {
 		trace_i915_va_alloc(vma);
 		ret = vma->vm->allocate_va_range(vma->vm,
 						 vma->node.start,
@@ -3669,8 +3670,7 @@ int i915_vma_bind(struct i915_vma *vma, enum i915_cache_level cache_level,
 	if (ret)
 		return ret;
 
-	vma->bound |= bind_flags;
-
+	vma->flags |= bind_flags;
 	return 0;
 }
 
@@ -3682,8 +3682,8 @@ void __iomem *i915_vma_pin_iomap(struct i915_vma *vma)
 	if (WARN_ON(!vma->obj->map_and_fenceable))
 		return IO_ERR_PTR(-ENODEV);
 
-	GEM_BUG_ON(!vma->is_ggtt);
-	GEM_BUG_ON((vma->bound & GLOBAL_BIND) == 0);
+	GEM_BUG_ON(!i915_vma_is_ggtt(vma));
+	GEM_BUG_ON((vma->flags & I915_VMA_GLOBAL_BIND) == 0);
 
 	ptr = vma->iomap;
 	if (ptr == NULL) {
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h
index 9ed134576b2d..0308c7401405 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.h
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.h
@@ -182,15 +182,27 @@ struct i915_vma {
 	void __iomem *iomap;
 	u64 size;
 
-	unsigned int active;
-	struct i915_gem_active last_read[I915_NUM_ENGINES];
+	unsigned int flags;
+	/**
+	 * How many users have pinned this object in GTT space. The following
+	 * users can each hold at most one reference: pwrite/pread, execbuffer
+	 * (objects are not allowed multiple times for the same batchbuffer),
+	 * and the framebuffer code. When switching/pageflipping, the
+	 * framebuffer code has at most two buffers pinned per crtc.
+	 *
+	 * In the worst case this is 1 + 1 + 1 + 2*2 = 7. That would fit into 3
+	 * bits with absolutely no headroom. So use 4 bits. */
+#define I915_VMA_PIN_MASK 0xf
 
 	/** Flags and address space this VMA is bound to */
-#define GLOBAL_BIND	(1<<0)
-#define LOCAL_BIND	(1<<1)
-	unsigned int bound : 4;
-	bool is_ggtt : 1;
-	bool closed : 1;
+#define I915_VMA_GLOBAL_BIND	BIT(5)
+#define I915_VMA_LOCAL_BIND	BIT(6)
+
+#define I915_VMA_GGTT	BIT(7)
+#define I915_VMA_CLOSED BIT(8)
+
+	unsigned int active;
+	struct i915_gem_active last_read[I915_NUM_ENGINES];
 
 	/**
 	 * Support different GGTT views into the same object.
@@ -215,20 +227,18 @@ struct i915_vma {
 	struct hlist_node exec_node;
 	unsigned long exec_handle;
 	struct drm_i915_gem_exec_object2 *exec_entry;
-
-	/**
-	 * How many users have pinned this object in GTT space. The following
-	 * users can each hold at most one reference: pwrite/pread, execbuffer
-	 * (objects are not allowed multiple times for the same batchbuffer),
-	 * and the framebuffer code. When switching/pageflipping, the
-	 * framebuffer code has at most two buffers pinned per crtc.
-	 *
-	 * In the worst case this is 1 + 1 + 1 + 2*2 = 7. That would fit into 3
-	 * bits with absolutely no headroom. So use 4 bits. */
-	unsigned int pin_count:4;
-#define DRM_I915_GEM_OBJECT_MAX_PIN_COUNT 0xf
 };
 
+static inline bool i915_vma_is_ggtt(const struct i915_vma *vma)
+{
+	return vma->flags & I915_VMA_GGTT;
+}
+
+static inline bool i915_vma_is_closed(const struct i915_vma *vma)
+{
+	return vma->flags & I915_VMA_CLOSED;
+}
+
 static inline bool i915_vma_is_active(const struct i915_vma *vma)
 {
 	return vma->active;
@@ -622,7 +632,7 @@ i915_vma_pin(struct i915_vma *vma, u64 size, u64 alignment, u64 flags);
 
 static inline int i915_vma_pin_count(const struct i915_vma *vma)
 {
-	return vma->pin_count;
+	return vma->flags & I915_VMA_PIN_MASK;
 }
 
 static inline bool i915_vma_is_pinned(const struct i915_vma *vma)
@@ -632,14 +642,14 @@ static inline bool i915_vma_is_pinned(const struct i915_vma *vma)
 
 static inline void __i915_vma_pin(struct i915_vma *vma)
 {
-	vma->pin_count++;
+	vma->flags++;
 	GEM_BUG_ON(!i915_vma_is_pinned(vma));
 }
 
 static inline void __i915_vma_unpin(struct i915_vma *vma)
 {
 	GEM_BUG_ON(!i915_vma_is_pinned(vma));
-	vma->pin_count--;
+	vma->flags--;
 }
 
 static inline void i915_vma_unpin(struct i915_vma *vma)
diff --git a/drivers/gpu/drm/i915/i915_gem_stolen.c b/drivers/gpu/drm/i915/i915_gem_stolen.c
index bc91ffe614e2..13279610eeec 100644
--- a/drivers/gpu/drm/i915/i915_gem_stolen.c
+++ b/drivers/gpu/drm/i915/i915_gem_stolen.c
@@ -705,7 +705,7 @@ i915_gem_object_create_stolen_for_preallocated(struct drm_device *dev,
 		goto err;
 	}
 
-	vma->bound |= GLOBAL_BIND;
+	vma->flags |= I915_VMA_GLOBAL_BIND;
 	__i915_vma_set_map_and_fenceable(vma);
 	list_move_tail(&vma->vm_link, &ggtt->base.inactive_list);
 	obj->bind_count++;
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
index d94eb907a23a..cc28ad429dd8 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -669,14 +669,14 @@ i915_error_object_create(struct drm_i915_private *dev_priv,
 	if (i915_is_ggtt(vm))
 		vma = i915_gem_obj_to_ggtt(src);
 	use_ggtt = (src->cache_level == I915_CACHE_NONE &&
-		   vma && (vma->bound & GLOBAL_BIND) &&
+		   vma && (vma->flags & I915_VMA_GLOBAL_BIND) &&
 		   reloc_offset + num_pages * PAGE_SIZE <= ggtt->mappable_end);
 
 	/* Cannot access stolen address directly, try to use the aperture */
 	if (src->stolen) {
 		use_ggtt = true;
 
-		if (!(vma && vma->bound & GLOBAL_BIND))
+		if (!(vma && vma->flags & I915_VMA_GLOBAL_BIND))
 			goto unwind;
 
 		reloc_offset = i915_gem_obj_ggtt_offset(src);
-- 
2.8.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 95+ messages in thread

* [PATCH 14/22] drm/i915: Make i915_vma_pin() small and inline
  2016-07-27 11:14 Getting to RCU and exporting fences Chris Wilson
                   ` (12 preceding siblings ...)
  2016-07-27 11:14 ` [PATCH 13/22] drm/i915: Combine all i915_vma bitfields into a single set of flags Chris Wilson
@ 2016-07-27 11:14 ` Chris Wilson
  2016-07-28 11:06   ` Joonas Lahtinen
  2016-07-27 11:14 ` [PATCH 15/22] drm/i915: Remove highly confusing i915_gem_obj_ggtt_pin() Chris Wilson
                   ` (9 subsequent siblings)
  23 siblings, 1 reply; 95+ messages in thread
From: Chris Wilson @ 2016-07-27 11:14 UTC (permalink / raw)
  To: intel-gfx

Not only is i915_vma_pin() called for every single object on every single
execbuf, it is usually a simple increment as the VMA is already bound for
execution by the GPU. Rearrange the tests for unbound and pin_count
overflow so that we can do the increment and test very cheaply and
compact enough to inline the operation into execbuf. The trick used is
to note that we can check for an overflow bit (keeping space available
for it inside the flags) at the same time as checking the binding bits.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_gem.c     | 18 +++++--------
 drivers/gpu/drm/i915/i915_gem_gtt.h | 53 ++++++++++++++++++++++++++-----------
 2 files changed, 44 insertions(+), 27 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index eeea7dab70f1..a666f72121eb 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -3696,23 +3696,19 @@ void __i915_vma_set_map_and_fenceable(struct i915_vma *vma)
 	obj->map_and_fenceable = mappable && fenceable;
 }
 
-int
-i915_vma_pin(struct i915_vma *vma, u64 size, u64 alignment, u64 flags)
+int __i915_vma_do_pin(struct i915_vma *vma,
+		      u64 size, u64 alignment, u64 flags)
 {
-	unsigned int bound;
+	unsigned int bound = vma->flags;
 	int ret;
 
 	GEM_BUG_ON((flags & (PIN_GLOBAL | PIN_USER)) == 0);
 	GEM_BUG_ON((flags & PIN_GLOBAL) && !i915_vma_is_ggtt(vma));
 
-	bound = vma->flags;
-	if (WARN_ON((bound & I915_VMA_PIN_MASK) == I915_VMA_PIN_MASK))
-		return -EBUSY;
-
-	/* Pin early to prevent the shrinker/eviction logic from destroying
-	 * our vma as we insert and bind.
-	 */
-	__i915_vma_pin(vma);
+	if (WARN_ON(bound & I915_VMA_PIN_OVERFLOW)) {
+		ret = -EBUSY;
+		goto err;
+	}
 
 	if ((bound & (I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND)) == 0) {
 		ret = i915_vma_insert(vma, size, alignment, flags);
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h
index 0308c7401405..a7e05b4d7d2b 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.h
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.h
@@ -193,13 +193,15 @@ struct i915_vma {
 	 * In the worst case this is 1 + 1 + 1 + 2*2 = 7. That would fit into 3
 	 * bits with absolutely no headroom. So use 4 bits. */
 #define I915_VMA_PIN_MASK 0xf
+#define I915_VMA_PIN_OVERFLOW	BIT(5)
 
 	/** Flags and address space this VMA is bound to */
-#define I915_VMA_GLOBAL_BIND	BIT(5)
-#define I915_VMA_LOCAL_BIND	BIT(6)
+#define I915_VMA_GLOBAL_BIND	BIT(6)
+#define I915_VMA_LOCAL_BIND	BIT(7)
+#define I915_VMA_BIND_MASK (I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND | I915_VMA_PIN_OVERFLOW)
 
-#define I915_VMA_GGTT	BIT(7)
-#define I915_VMA_CLOSED BIT(8)
+#define I915_VMA_GGTT	BIT(8)
+#define I915_VMA_CLOSED BIT(9)
 
 	unsigned int active;
 	struct i915_gem_active last_read[I915_NUM_ENGINES];
@@ -616,20 +618,39 @@ i915_ggtt_view_equal(const struct i915_ggtt_view *a,
 	return true;
 }
 
-int __must_check
-i915_vma_pin(struct i915_vma *vma, u64 size, u64 alignment, u64 flags);
 /* Flags used by pin/bind&friends. */
-#define PIN_MAPPABLE		BIT(0)
-#define PIN_NONBLOCK		BIT(1)
-#define PIN_GLOBAL		BIT(2)
-#define PIN_OFFSET_BIAS		BIT(3)
-#define PIN_USER		BIT(4)
-#define PIN_UPDATE		BIT(5)
-#define PIN_ZONE_4G		BIT(6)
-#define PIN_HIGH		BIT(7)
-#define PIN_OFFSET_FIXED	BIT(8)
+#define PIN_NONBLOCK		BIT(0)
+#define PIN_MAPPABLE		BIT(1)
+#define PIN_ZONE_4G		BIT(2)
+
+#define PIN_MBZ			BIT(5) /* I915_VMA_PIN_OVERFLOW */
+#define PIN_GLOBAL		BIT(6) /* I915_VMA_GLOBAL_BIND */
+#define PIN_USER		BIT(7) /* I915_VMA_LOCAL_BIND */
+#define PIN_UPDATE		BIT(8)
+
+#define PIN_HIGH		BIT(9)
+#define PIN_OFFSET_BIAS		BIT(10)
+#define PIN_OFFSET_FIXED	BIT(11)
 #define PIN_OFFSET_MASK		(~4095)
 
+int __i915_vma_do_pin(struct i915_vma *vma,
+		      u64 size, u64 alignment, u64 flags);
+static inline int __must_check
+i915_vma_pin(struct i915_vma *vma, u64 size, u64 alignment, u64 flags)
+{
+	BUILD_BUG_ON(PIN_MBZ != I915_VMA_PIN_OVERFLOW);
+	BUILD_BUG_ON(PIN_GLOBAL != I915_VMA_GLOBAL_BIND);
+	BUILD_BUG_ON(PIN_USER != I915_VMA_LOCAL_BIND);
+
+	/* Pin early to prevent the shrinker/eviction logic from destroying
+	 * our vma as we insert and bind.
+	 */
+	if (likely(((++vma->flags ^ flags) & I915_VMA_BIND_MASK) == 0))
+		return 0;
+
+	return __i915_vma_do_pin(vma, size, alignment, flags);
+}
+
 static inline int i915_vma_pin_count(const struct i915_vma *vma)
 {
 	return vma->flags & I915_VMA_PIN_MASK;
@@ -643,7 +664,7 @@ static inline bool i915_vma_is_pinned(const struct i915_vma *vma)
 static inline void __i915_vma_pin(struct i915_vma *vma)
 {
 	vma->flags++;
-	GEM_BUG_ON(!i915_vma_is_pinned(vma));
+	GEM_BUG_ON(vma->flags & I915_VMA_PIN_OVERFLOW);
 }
 
 static inline void __i915_vma_unpin(struct i915_vma *vma)
-- 
2.8.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 95+ messages in thread

* [PATCH 15/22] drm/i915: Remove highly confusing i915_gem_obj_ggtt_pin()
  2016-07-27 11:14 Getting to RCU and exporting fences Chris Wilson
                   ` (13 preceding siblings ...)
  2016-07-27 11:14 ` [PATCH 14/22] drm/i915: Make i915_vma_pin() small and inline Chris Wilson
@ 2016-07-27 11:14 ` Chris Wilson
  2016-07-28 10:38   ` Joonas Lahtinen
  2016-07-27 11:14 ` [PATCH 16/22] drm/i915: Make fb_tracking.lock a spinlock Chris Wilson
                   ` (8 subsequent siblings)
  23 siblings, 1 reply; 95+ messages in thread
From: Chris Wilson @ 2016-07-27 11:14 UTC (permalink / raw)
  To: intel-gfx

Since i915_gem_obj_ggtt_pin() is an idiom breaking curry function for
i915_gem_object_ggtt_pin(), spare us the confustion and remove it.
Removing it now simplifies later patches to change the i915_vma_pin()
(and friends) interface.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_drv.h              |  9 ---------
 drivers/gpu/drm/i915/i915_gem.c              | 18 +++++++-----------
 drivers/gpu/drm/i915/i915_gem_context.c      |  5 ++---
 drivers/gpu/drm/i915/i915_gem_execbuffer.c   |  2 +-
 drivers/gpu/drm/i915/i915_gem_render_state.c |  2 +-
 drivers/gpu/drm/i915/i915_guc_submission.c   |  4 ++--
 drivers/gpu/drm/i915/intel_guc_loader.c      |  2 +-
 drivers/gpu/drm/i915/intel_lrc.c             |  8 +++++---
 drivers/gpu/drm/i915/intel_overlay.c         |  3 ++-
 drivers/gpu/drm/i915/intel_ringbuffer.c      | 15 ++++++++-------
 10 files changed, 29 insertions(+), 39 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 490e337b65d4..3a68e604ad10 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -3290,15 +3290,6 @@ static inline bool i915_gem_obj_ggtt_bound(struct drm_i915_gem_object *obj)
 unsigned long
 i915_gem_obj_ggtt_size(struct drm_i915_gem_object *obj);
 
-static inline int __must_check
-i915_gem_obj_ggtt_pin(struct drm_i915_gem_object *obj,
-		      uint32_t alignment,
-		      unsigned flags)
-{
-	return i915_gem_object_ggtt_pin(obj, &i915_ggtt_view_normal,
-					0, alignment, flags);
-}
-
 void i915_gem_object_ggtt_unpin_view(struct drm_i915_gem_object *obj,
 				     const struct i915_ggtt_view *view);
 static inline void
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index a666f72121eb..1fb958dcc749 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -652,7 +652,7 @@ i915_gem_gtt_pread(struct drm_device *dev,
 	uint64_t offset;
 	int ret;
 
-	ret = i915_gem_obj_ggtt_pin(obj, 0, PIN_MAPPABLE);
+	ret = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, PIN_MAPPABLE);
 	if (ret) {
 		ret = insert_mappable_node(dev_priv, &node, PAGE_SIZE);
 		if (ret)
@@ -946,7 +946,8 @@ i915_gem_gtt_pwrite_fast(struct drm_i915_private *i915,
 	if (obj->tiling_mode != I915_TILING_NONE)
 		return -EFAULT;
 
-	ret = i915_gem_obj_ggtt_pin(obj, 0, PIN_MAPPABLE | PIN_NONBLOCK);
+	ret = i915_gem_object_ggtt_pin(obj, NULL, 0, 0,
+				       PIN_MAPPABLE | PIN_NONBLOCK);
 	if (ret) {
 		ret = insert_mappable_node(i915, &node, PAGE_SIZE);
 		if (ret)
@@ -3710,7 +3711,7 @@ int __i915_vma_do_pin(struct i915_vma *vma,
 		goto err;
 	}
 
-	if ((bound & (I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND)) == 0) {
+	if ((bound & I915_VMA_BIND_MASK) == 0) {
 		ret = i915_vma_insert(vma, size, alignment, flags);
 		if (ret)
 			goto err;
@@ -3741,7 +3742,8 @@ i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj,
 	struct i915_vma *vma;
 	int ret;
 
-	BUG_ON(!view);
+	if (!view)
+		view = &i915_ggtt_view_normal;
 
 	vma = i915_gem_obj_lookup_or_create_ggtt_vma(obj, view);
 	if (IS_ERR(vma))
@@ -3773,13 +3775,7 @@ void
 i915_gem_object_ggtt_unpin_view(struct drm_i915_gem_object *obj,
 				const struct i915_ggtt_view *view)
 {
-	struct i915_vma *vma = i915_gem_obj_to_ggtt_view(obj, view);
-
-	GEM_BUG_ON(!vma);
-	WARN_ON(i915_vma_is_pinned(vma));
-	WARN_ON(!i915_gem_obj_ggtt_bound_view(obj, view));
-
-	__i915_vma_unpin(vma);
+	i915_vma_unpin(i915_gem_obj_to_ggtt_view(obj, view));
 }
 
 int
diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
index dc7c0ae73b62..bb72af5320b0 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -763,9 +763,8 @@ static int do_rcs_switch(struct drm_i915_gem_request *req)
 		return 0;
 
 	/* Trying to pin first makes error handling easier. */
-	ret = i915_gem_obj_ggtt_pin(to->engine[RCS].state,
-				    to->ggtt_alignment,
-				    0);
+	ret = i915_gem_object_ggtt_pin(to->engine[RCS].state, NULL, 0,
+				       to->ggtt_alignment, 0);
 	if (ret)
 		return ret;
 
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 7755cf339298..0b5b0020916b 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -1269,7 +1269,7 @@ i915_gem_execbuffer_parse(struct intel_engine_cs *engine,
 	if (ret)
 		goto err;
 
-	ret = i915_gem_obj_ggtt_pin(shadow_batch_obj, 0, 0);
+	ret = i915_gem_object_ggtt_pin(shadow_batch_obj, NULL, 0, 0, 0);
 	if (ret)
 		goto err;
 
diff --git a/drivers/gpu/drm/i915/i915_gem_render_state.c b/drivers/gpu/drm/i915/i915_gem_render_state.c
index 90236672ac1e..57fd767a2d79 100644
--- a/drivers/gpu/drm/i915/i915_gem_render_state.c
+++ b/drivers/gpu/drm/i915/i915_gem_render_state.c
@@ -191,7 +191,7 @@ int i915_gem_render_state_init(struct drm_i915_gem_request *req)
 	if (IS_ERR(so.obj))
 		return PTR_ERR(so.obj);
 
-	ret = i915_gem_obj_ggtt_pin(so.obj, 4096, 0);
+	ret = i915_gem_object_ggtt_pin(so.obj, NULL, 0, 0, 0);
 	if (ret)
 		goto err_obj;
 
diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c
index 32d0e1890950..354d11dc3338 100644
--- a/drivers/gpu/drm/i915/i915_guc_submission.c
+++ b/drivers/gpu/drm/i915/i915_guc_submission.c
@@ -635,8 +635,8 @@ gem_allocate_guc_obj(struct drm_i915_private *dev_priv, u32 size)
 		return NULL;
 	}
 
-	if (i915_gem_obj_ggtt_pin(obj, PAGE_SIZE,
-			PIN_OFFSET_BIAS | GUC_WOPCM_TOP)) {
+	if (i915_gem_object_ggtt_pin(obj, NULL, 0, PAGE_SIZE,
+				     PIN_OFFSET_BIAS | GUC_WOPCM_TOP)) {
 		i915_gem_object_put(obj);
 		return NULL;
 	}
diff --git a/drivers/gpu/drm/i915/intel_guc_loader.c b/drivers/gpu/drm/i915/intel_guc_loader.c
index b883efd35e3f..3763e30cc165 100644
--- a/drivers/gpu/drm/i915/intel_guc_loader.c
+++ b/drivers/gpu/drm/i915/intel_guc_loader.c
@@ -323,7 +323,7 @@ static int guc_ucode_xfer(struct drm_i915_private *dev_priv)
 		return ret;
 	}
 
-	ret = i915_gem_obj_ggtt_pin(guc_fw->guc_fw_obj, 0, 0);
+	ret = i915_gem_object_ggtt_pin(guc_fw->guc_fw_obj, NULL, 0, 0, 0);
 	if (ret) {
 		DRM_DEBUG_DRIVER("pin failed %d\n", ret);
 		return ret;
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index cad64a772bdf..767b0828d77b 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -799,8 +799,9 @@ static int intel_lr_context_pin(struct i915_gem_context *ctx,
 	if (ce->pin_count++)
 		return 0;
 
-	ret = i915_gem_obj_ggtt_pin(ce->state, GEN8_LR_CONTEXT_ALIGN,
-				    PIN_OFFSET_BIAS | GUC_WOPCM_TOP);
+	ret = i915_gem_object_ggtt_pin(ce->state, NULL,
+				       0, GEN8_LR_CONTEXT_ALIGN,
+				       PIN_OFFSET_BIAS | GUC_WOPCM_TOP);
 	if (ret)
 		goto err;
 
@@ -1205,7 +1206,8 @@ static int lrc_setup_wa_ctx_obj(struct intel_engine_cs *engine, u32 size)
 		return ret;
 	}
 
-	ret = i915_gem_obj_ggtt_pin(engine->wa_ctx.obj, PAGE_SIZE, 0);
+	ret = i915_gem_object_ggtt_pin(engine->wa_ctx.obj, NULL,
+				       0, PAGE_SIZE, 0);
 	if (ret) {
 		DRM_DEBUG_DRIVER("pin LRC WA ctx backing obj failed: %d\n",
 				 ret);
diff --git a/drivers/gpu/drm/i915/intel_overlay.c b/drivers/gpu/drm/i915/intel_overlay.c
index 2c598d63c794..217fefc49bf9 100644
--- a/drivers/gpu/drm/i915/intel_overlay.c
+++ b/drivers/gpu/drm/i915/intel_overlay.c
@@ -1401,7 +1401,8 @@ void intel_setup_overlay(struct drm_i915_private *dev_priv)
 		}
 		overlay->flip_addr = reg_bo->phys_handle->busaddr;
 	} else {
-		ret = i915_gem_obj_ggtt_pin(reg_bo, PAGE_SIZE, PIN_MAPPABLE);
+		ret = i915_gem_object_ggtt_pin(reg_bo, NULL,
+					       0, PAGE_SIZE, PIN_MAPPABLE);
 		if (ret) {
 			DRM_ERROR("failed to pin overlay register bo\n");
 			goto out_free_bo;
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index d0d9c0d82089..54ec2faa0bf2 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -639,7 +639,7 @@ int intel_init_pipe_control(struct intel_engine_cs *engine, int size)
 		goto err;
 	}
 
-	ret = i915_gem_obj_ggtt_pin(obj, 4096, PIN_HIGH);
+	ret = i915_gem_object_ggtt_pin(obj, NULL, 0, 4096, PIN_HIGH);
 	if (ret)
 		goto err_unref;
 
@@ -1896,7 +1896,7 @@ static int init_status_page(struct intel_engine_cs *engine)
 			 * actualy map it).
 			 */
 			flags |= PIN_MAPPABLE;
-		ret = i915_gem_obj_ggtt_pin(obj, 4096, flags);
+		ret = i915_gem_object_ggtt_pin(obj, NULL, 0, 4096, flags);
 		if (ret) {
 err_unref:
 			i915_gem_object_put(obj);
@@ -1943,7 +1943,7 @@ int intel_ring_pin(struct intel_ring *ring)
 	int ret;
 
 	if (HAS_LLC(dev_priv) && !obj->stolen) {
-		ret = i915_gem_obj_ggtt_pin(obj, PAGE_SIZE, flags);
+		ret = i915_gem_object_ggtt_pin(obj, NULL, 0, PAGE_SIZE, flags);
 		if (ret)
 			return ret;
 
@@ -1957,8 +1957,8 @@ int intel_ring_pin(struct intel_ring *ring)
 			goto err_unpin;
 		}
 	} else {
-		ret = i915_gem_obj_ggtt_pin(obj, PAGE_SIZE,
-					    flags | PIN_MAPPABLE);
+		ret = i915_gem_object_ggtt_pin(obj, NULL, 0, PAGE_SIZE,
+					       flags | PIN_MAPPABLE);
 		if (ret)
 			return ret;
 
@@ -2090,7 +2090,8 @@ static int intel_ring_context_pin(struct i915_gem_context *ctx,
 		return 0;
 
 	if (ce->state) {
-		ret = i915_gem_obj_ggtt_pin(ce->state, ctx->ggtt_alignment, 0);
+		ret = i915_gem_object_ggtt_pin(ce->state, NULL, 0,
+					       ctx->ggtt_alignment, 0);
 		if (ret)
 			goto error;
 	}
@@ -2649,7 +2650,7 @@ static void intel_ring_init_semaphores(struct drm_i915_private *dev_priv,
 			i915.semaphores = 0;
 		} else {
 			i915_gem_object_set_cache_level(obj, I915_CACHE_LLC);
-			ret = i915_gem_obj_ggtt_pin(obj, 0, PIN_NONBLOCK);
+			ret = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, 0);
 			if (ret != 0) {
 				i915_gem_object_put(obj);
 				DRM_ERROR("Failed to pin semaphore bo. Disabling semaphores\n");
-- 
2.8.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 95+ messages in thread

* [PATCH 16/22] drm/i915: Make fb_tracking.lock a spinlock
  2016-07-27 11:14 Getting to RCU and exporting fences Chris Wilson
                   ` (14 preceding siblings ...)
  2016-07-27 11:14 ` [PATCH 15/22] drm/i915: Remove highly confusing i915_gem_obj_ggtt_pin() Chris Wilson
@ 2016-07-27 11:14 ` Chris Wilson
  2016-07-28 10:02   ` Daniel Vetter
  2016-07-28 10:19   ` Joonas Lahtinen
  2016-07-27 11:14 ` [PATCH 17/22] drm/i915: Use atomics to manipulate obj->frontbuffer_bits Chris Wilson
                   ` (7 subsequent siblings)
  23 siblings, 2 replies; 95+ messages in thread
From: Chris Wilson @ 2016-07-27 11:14 UTC (permalink / raw)
  To: intel-gfx

We only need a very lightweight mechanism here as the locking is only
used for co-ordinating a bitfield.

v2: Move the cheap unlikely tests into the caller

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_drv.h          |  2 +-
 drivers/gpu/drm/i915/i915_gem.c          |  2 +-
 drivers/gpu/drm/i915/intel_drv.h         | 29 ++++++++++++++---
 drivers/gpu/drm/i915/intel_frontbuffer.c | 54 ++++++++++++++------------------
 4 files changed, 51 insertions(+), 36 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 3a68e604ad10..a24d31e3e014 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1669,7 +1669,7 @@ struct intel_pipe_crc {
 };
 
 struct i915_frontbuffer_tracking {
-	struct mutex lock;
+	spinlock_t lock;
 
 	/*
 	 * Tracking bits for delayed frontbuffer flushing du to gpu activity or
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 1fb958dcc749..7db0808f6961 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -4447,7 +4447,7 @@ i915_gem_load_init(struct drm_device *dev)
 
 	dev_priv->mm.interruptible = true;
 
-	mutex_init(&dev_priv->fb_tracking.lock);
+	spin_lock_init(&dev_priv->fb_tracking.lock);
 }
 
 void i915_gem_load_cleanup(struct drm_device *dev)
diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h
index e74d851868c5..01056ce8d461 100644
--- a/drivers/gpu/drm/i915/intel_drv.h
+++ b/drivers/gpu/drm/i915/intel_drv.h
@@ -1135,8 +1135,6 @@ void intel_ddi_set_vc_payload_alloc(struct drm_crtc *crtc, bool state);
 uint32_t ddi_signal_levels(struct intel_dp *intel_dp);
 
 /* intel_frontbuffer.c */
-void intel_fb_obj_invalidate(struct drm_i915_gem_object *obj,
-			     enum fb_op_origin origin);
 void intel_frontbuffer_flip_prepare(struct drm_device *dev,
 				    unsigned frontbuffer_bits);
 void intel_frontbuffer_flip_complete(struct drm_device *dev,
@@ -1147,8 +1145,31 @@ unsigned int intel_fb_align_height(struct drm_device *dev,
 				   unsigned int height,
 				   uint32_t pixel_format,
 				   uint64_t fb_format_modifier);
-void intel_fb_obj_flush(struct drm_i915_gem_object *obj, bool retire,
-			enum fb_op_origin origin);
+
+void __intel_fb_obj_invalidate(struct drm_i915_gem_object *obj,
+			       enum fb_op_origin origin);
+static inline void intel_fb_obj_invalidate(struct drm_i915_gem_object *obj,
+					   enum fb_op_origin origin)
+{
+	if (!obj->frontbuffer_bits)
+		return;
+
+	__intel_fb_obj_invalidate(obj, origin);
+}
+
+void __intel_fb_obj_flush(struct drm_i915_gem_object *obj,
+			  bool retire,
+			  enum fb_op_origin origin);
+static inline void intel_fb_obj_flush(struct drm_i915_gem_object *obj,
+				      bool retire,
+				      enum fb_op_origin origin)
+{
+	if (!obj->frontbuffer_bits)
+		return;
+
+	__intel_fb_obj_flush(obj, retire, origin);
+}
+
 u32 intel_fb_stride_alignment(const struct drm_i915_private *dev_priv,
 			      uint64_t fb_modifier, uint32_t pixel_format);
 
diff --git a/drivers/gpu/drm/i915/intel_frontbuffer.c b/drivers/gpu/drm/i915/intel_frontbuffer.c
index ac85357010b4..a38ccfe4894a 100644
--- a/drivers/gpu/drm/i915/intel_frontbuffer.c
+++ b/drivers/gpu/drm/i915/intel_frontbuffer.c
@@ -76,24 +76,19 @@
  * until the rendering completes or a flip on this frontbuffer plane is
  * scheduled.
  */
-void intel_fb_obj_invalidate(struct drm_i915_gem_object *obj,
-			     enum fb_op_origin origin)
+void __intel_fb_obj_invalidate(struct drm_i915_gem_object *obj,
+			       enum fb_op_origin origin)
 {
 	struct drm_device *dev = obj->base.dev;
 	struct drm_i915_private *dev_priv = to_i915(dev);
 
 	WARN_ON(!mutex_is_locked(&dev->struct_mutex));
 
-	if (!obj->frontbuffer_bits)
-		return;
-
 	if (origin == ORIGIN_CS) {
-		mutex_lock(&dev_priv->fb_tracking.lock);
-		dev_priv->fb_tracking.busy_bits
-			|= obj->frontbuffer_bits;
-		dev_priv->fb_tracking.flip_bits
-			&= ~obj->frontbuffer_bits;
-		mutex_unlock(&dev_priv->fb_tracking.lock);
+		spin_lock(&dev_priv->fb_tracking.lock);
+		dev_priv->fb_tracking.busy_bits |= obj->frontbuffer_bits;
+		dev_priv->fb_tracking.flip_bits &= ~obj->frontbuffer_bits;
+		spin_unlock(&dev_priv->fb_tracking.lock);
 	}
 
 	intel_psr_invalidate(dev, obj->frontbuffer_bits);
@@ -120,11 +115,11 @@ static void intel_frontbuffer_flush(struct drm_device *dev,
 	struct drm_i915_private *dev_priv = to_i915(dev);
 
 	/* Delay flushing when rings are still busy.*/
-	mutex_lock(&dev_priv->fb_tracking.lock);
+	spin_lock(&dev_priv->fb_tracking.lock);
 	frontbuffer_bits &= ~dev_priv->fb_tracking.busy_bits;
-	mutex_unlock(&dev_priv->fb_tracking.lock);
+	spin_unlock(&dev_priv->fb_tracking.lock);
 
-	if (!frontbuffer_bits)
+	if (frontbuffer_bits == 0)
 		return;
 
 	intel_edp_drrs_flush(dev, frontbuffer_bits);
@@ -142,8 +137,9 @@ static void intel_frontbuffer_flush(struct drm_device *dev,
  * completed and frontbuffer caching can be started again. If @retire is true
  * then any delayed flushes will be unblocked.
  */
-void intel_fb_obj_flush(struct drm_i915_gem_object *obj,
-			bool retire, enum fb_op_origin origin)
+void __intel_fb_obj_flush(struct drm_i915_gem_object *obj,
+			  bool retire,
+			  enum fb_op_origin origin)
 {
 	struct drm_device *dev = obj->base.dev;
 	struct drm_i915_private *dev_priv = to_i915(dev);
@@ -151,21 +147,18 @@ void intel_fb_obj_flush(struct drm_i915_gem_object *obj,
 
 	WARN_ON(!mutex_is_locked(&dev->struct_mutex));
 
-	if (!obj->frontbuffer_bits)
-		return;
-
 	frontbuffer_bits = obj->frontbuffer_bits;
 
 	if (retire) {
-		mutex_lock(&dev_priv->fb_tracking.lock);
+		spin_lock(&dev_priv->fb_tracking.lock);
 		/* Filter out new bits since rendering started. */
 		frontbuffer_bits &= dev_priv->fb_tracking.busy_bits;
-
 		dev_priv->fb_tracking.busy_bits &= ~frontbuffer_bits;
-		mutex_unlock(&dev_priv->fb_tracking.lock);
+		spin_unlock(&dev_priv->fb_tracking.lock);
 	}
 
-	intel_frontbuffer_flush(dev, frontbuffer_bits, origin);
+	if (frontbuffer_bits)
+		intel_frontbuffer_flush(dev, frontbuffer_bits, origin);
 }
 
 /**
@@ -185,11 +178,11 @@ void intel_frontbuffer_flip_prepare(struct drm_device *dev,
 {
 	struct drm_i915_private *dev_priv = to_i915(dev);
 
-	mutex_lock(&dev_priv->fb_tracking.lock);
+	spin_lock(&dev_priv->fb_tracking.lock);
 	dev_priv->fb_tracking.flip_bits |= frontbuffer_bits;
 	/* Remove stale busy bits due to the old buffer. */
 	dev_priv->fb_tracking.busy_bits &= ~frontbuffer_bits;
-	mutex_unlock(&dev_priv->fb_tracking.lock);
+	spin_unlock(&dev_priv->fb_tracking.lock);
 
 	intel_psr_single_frame_update(dev, frontbuffer_bits);
 }
@@ -209,13 +202,14 @@ void intel_frontbuffer_flip_complete(struct drm_device *dev,
 {
 	struct drm_i915_private *dev_priv = to_i915(dev);
 
-	mutex_lock(&dev_priv->fb_tracking.lock);
+	spin_lock(&dev_priv->fb_tracking.lock);
 	/* Mask any cancelled flips. */
 	frontbuffer_bits &= dev_priv->fb_tracking.flip_bits;
 	dev_priv->fb_tracking.flip_bits &= ~frontbuffer_bits;
-	mutex_unlock(&dev_priv->fb_tracking.lock);
+	spin_unlock(&dev_priv->fb_tracking.lock);
 
-	intel_frontbuffer_flush(dev, frontbuffer_bits, ORIGIN_FLIP);
+	if (frontbuffer_bits)
+		intel_frontbuffer_flush(dev, frontbuffer_bits, ORIGIN_FLIP);
 }
 
 /**
@@ -234,10 +228,10 @@ void intel_frontbuffer_flip(struct drm_device *dev,
 {
 	struct drm_i915_private *dev_priv = to_i915(dev);
 
-	mutex_lock(&dev_priv->fb_tracking.lock);
+	spin_lock(&dev_priv->fb_tracking.lock);
 	/* Remove stale busy bits due to the old buffer. */
 	dev_priv->fb_tracking.busy_bits &= ~frontbuffer_bits;
-	mutex_unlock(&dev_priv->fb_tracking.lock);
+	spin_unlock(&dev_priv->fb_tracking.lock);
 
 	intel_frontbuffer_flush(dev, frontbuffer_bits, ORIGIN_FLIP);
 }
-- 
2.8.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 95+ messages in thread

* [PATCH 17/22] drm/i915: Use atomics to manipulate obj->frontbuffer_bits
  2016-07-27 11:14 Getting to RCU and exporting fences Chris Wilson
                   ` (15 preceding siblings ...)
  2016-07-27 11:14 ` [PATCH 16/22] drm/i915: Make fb_tracking.lock a spinlock Chris Wilson
@ 2016-07-27 11:14 ` Chris Wilson
  2016-07-28  9:49   ` Joonas Lahtinen
  2016-07-28 10:05   ` Daniel Vetter
  2016-07-27 11:14 ` [PATCH 18/22] drm/i915: Use dev_priv consistently through the intel_frontbuffer interface Chris Wilson
                   ` (6 subsequent siblings)
  23 siblings, 2 replies; 95+ messages in thread
From: Chris Wilson @ 2016-07-27 11:14 UTC (permalink / raw)
  To: intel-gfx; +Cc: Daniel Vetter

The individual bits inside obj->frontbuffer_bits are protected by each
plane->mutex, but the whole bitfield may be accessed by multiple KMS
operations simultaneously and so the RMW need to be under atomics.
However, for updating the single field we do not need to mandate that it
be under the struct_mutex, one more step towards its removal as the de
facto BKL.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
---
 drivers/gpu/drm/i915/i915_debugfs.c      |  6 ++++--
 drivers/gpu/drm/i915/i915_drv.h          |  4 +---
 drivers/gpu/drm/i915/i915_gem.c          | 18 +++++++++++-------
 drivers/gpu/drm/i915/intel_display.c     | 18 ++++++------------
 drivers/gpu/drm/i915/intel_drv.h         | 20 ++++++++++++++------
 drivers/gpu/drm/i915/intel_frontbuffer.c | 23 +++++++++--------------
 6 files changed, 45 insertions(+), 44 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index fcfa9ca6b50a..10a346237795 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -138,6 +138,7 @@ describe_obj(struct seq_file *m, struct drm_i915_gem_object *obj)
 	struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
 	struct intel_engine_cs *engine;
 	struct i915_vma *vma;
+	unsigned int frontbuffer_bits;
 	int pin_count = 0;
 	enum intel_engine_id id;
 
@@ -204,8 +205,9 @@ describe_obj(struct seq_file *m, struct drm_i915_gem_object *obj)
 	if (engine)
 		seq_printf(m, " (%s)", engine->name);
 
-	if (obj->frontbuffer_bits)
-		seq_printf(m, " (frontbuffer: 0x%03x)", obj->frontbuffer_bits);
+	frontbuffer_bits = atomic_read(&obj->frontbuffer_bits);
+	if (frontbuffer_bits)
+		seq_printf(m, " (frontbuffer: 0x%03x)", frontbuffer_bits);
 }
 
 static int i915_gem_object_list_info(struct seq_file *m, void *data)
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index a24d31e3e014..b6b9a1f78238 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2127,8 +2127,6 @@ struct drm_i915_gem_object_ops {
  */
 #define INTEL_MAX_SPRITE_BITS_PER_PIPE 5
 #define INTEL_FRONTBUFFER_BITS_PER_PIPE 8
-#define INTEL_FRONTBUFFER_BITS \
-	(INTEL_FRONTBUFFER_BITS_PER_PIPE * I915_MAX_PIPES)
 #define INTEL_FRONTBUFFER_PRIMARY(pipe) \
 	(1 << (INTEL_FRONTBUFFER_BITS_PER_PIPE * (pipe)))
 #define INTEL_FRONTBUFFER_CURSOR(pipe) \
@@ -2216,7 +2214,7 @@ struct drm_i915_gem_object {
 	unsigned int cache_level:3;
 	unsigned int cache_dirty:1;
 
-	unsigned int frontbuffer_bits:INTEL_FRONTBUFFER_BITS;
+	atomic_t frontbuffer_bits;
 
 	unsigned int has_wc_mmap;
 	/** Count of VMA actually bound by this object */
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 7db0808f6961..bc5bc5ccdde0 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -4031,7 +4031,7 @@ void i915_gem_free_object(struct drm_gem_object *gem_obj)
 	if (obj->stolen)
 		i915_gem_object_unpin_pages(obj);
 
-	WARN_ON(obj->frontbuffer_bits);
+	WARN_ON(atomic_read(&obj->frontbuffer_bits));
 
 	if (obj->pages && obj->madv == I915_MADV_WILLNEED &&
 	    dev_priv->quirks & QUIRK_PIN_SWIZZLED_PAGES &&
@@ -4549,16 +4549,20 @@ void i915_gem_track_fb(struct drm_i915_gem_object *old,
 		       struct drm_i915_gem_object *new,
 		       unsigned frontbuffer_bits)
 {
+	/* Control of individual bits within the bitfield are guarded by
+	 * the owning plane->mutex, i.e. we can never see concurrent
+	 * manipulation of individual bits. But since the bitfield as a whole
+	 * is updated using RMW, we need to use atomics in order to update
+	 * the bits.
+	 */
 	if (old) {
-		WARN_ON(!mutex_is_locked(&old->base.dev->struct_mutex));
-		WARN_ON(!(old->frontbuffer_bits & frontbuffer_bits));
-		old->frontbuffer_bits &= ~frontbuffer_bits;
+		WARN_ON(!(atomic_read(&old->frontbuffer_bits) & frontbuffer_bits));
+		atomic_andnot(frontbuffer_bits, &old->frontbuffer_bits);
 	}
 
 	if (new) {
-		WARN_ON(!mutex_is_locked(&new->base.dev->struct_mutex));
-		WARN_ON(new->frontbuffer_bits & frontbuffer_bits);
-		new->frontbuffer_bits |= frontbuffer_bits;
+		WARN_ON(atomic_read(&new->frontbuffer_bits) & frontbuffer_bits);
+		atomic_or(frontbuffer_bits, &new->frontbuffer_bits);
 	}
 }
 
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index ed2069c56036..1c70f68328b4 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -2600,7 +2600,8 @@ valid_fb:
 	primary->fb = primary->state->fb = fb;
 	primary->crtc = primary->state->crtc = &intel_crtc->base;
 	intel_crtc->base.state->plane_mask |= (1 << drm_plane_index(primary));
-	obj->frontbuffer_bits |= to_intel_plane(primary)->frontbuffer_bit;
+	atomic_or(to_intel_plane(primary)->frontbuffer_bit,
+		  &obj->frontbuffer_bits);
 }
 
 static void i9xx_update_primary_plane(struct drm_plane *primary,
@@ -13807,19 +13808,12 @@ static void intel_atomic_track_fbs(struct drm_atomic_state *state)
 {
 	struct drm_plane_state *old_plane_state;
 	struct drm_plane *plane;
-	struct drm_i915_gem_object *obj, *old_obj;
-	struct intel_plane *intel_plane;
 	int i;
 
-	mutex_lock(&state->dev->struct_mutex);
-	for_each_plane_in_state(state, plane, old_plane_state, i) {
-		obj = intel_fb_obj(plane->state->fb);
-		old_obj = intel_fb_obj(old_plane_state->fb);
-		intel_plane = to_intel_plane(plane);
-
-		i915_gem_track_fb(old_obj, obj, intel_plane->frontbuffer_bit);
-	}
-	mutex_unlock(&state->dev->struct_mutex);
+	for_each_plane_in_state(state, plane, old_plane_state, i)
+		i915_gem_track_fb(intel_fb_obj(old_plane_state->fb),
+				  intel_fb_obj(plane->state->fb),
+				  to_intel_plane(plane)->frontbuffer_bit);
 }
 
 /**
diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h
index 01056ce8d461..5294039cf238 100644
--- a/drivers/gpu/drm/i915/intel_drv.h
+++ b/drivers/gpu/drm/i915/intel_drv.h
@@ -1147,27 +1147,35 @@ unsigned int intel_fb_align_height(struct drm_device *dev,
 				   uint64_t fb_format_modifier);
 
 void __intel_fb_obj_invalidate(struct drm_i915_gem_object *obj,
-			       enum fb_op_origin origin);
+			       enum fb_op_origin origin,
+			       unsigned int frontbuffer_bits);
 static inline void intel_fb_obj_invalidate(struct drm_i915_gem_object *obj,
 					   enum fb_op_origin origin)
 {
-	if (!obj->frontbuffer_bits)
+	unsigned int frontbuffer_bits;
+
+	frontbuffer_bits = atomic_read(&obj->frontbuffer_bits);
+	if (!frontbuffer_bits)
 		return;
 
-	__intel_fb_obj_invalidate(obj, origin);
+	__intel_fb_obj_invalidate(obj, origin, frontbuffer_bits);
 }
 
 void __intel_fb_obj_flush(struct drm_i915_gem_object *obj,
 			  bool retire,
-			  enum fb_op_origin origin);
+			  enum fb_op_origin origin,
+			  unsigned int frontbuffer_tibst);
 static inline void intel_fb_obj_flush(struct drm_i915_gem_object *obj,
 				      bool retire,
 				      enum fb_op_origin origin)
 {
-	if (!obj->frontbuffer_bits)
+	unsigned int frontbuffer_bits;
+
+	frontbuffer_bits = atomic_read(&obj->frontbuffer_bits);
+	if (!frontbuffer_bits)
 		return;
 
-	__intel_fb_obj_flush(obj, retire, origin);
+	__intel_fb_obj_flush(obj, retire, origin, frontbuffer_bits);
 }
 
 u32 intel_fb_stride_alignment(const struct drm_i915_private *dev_priv,
diff --git a/drivers/gpu/drm/i915/intel_frontbuffer.c b/drivers/gpu/drm/i915/intel_frontbuffer.c
index a38ccfe4894a..636324da21c2 100644
--- a/drivers/gpu/drm/i915/intel_frontbuffer.c
+++ b/drivers/gpu/drm/i915/intel_frontbuffer.c
@@ -77,23 +77,22 @@
  * scheduled.
  */
 void __intel_fb_obj_invalidate(struct drm_i915_gem_object *obj,
-			       enum fb_op_origin origin)
+			       enum fb_op_origin origin,
+			       unsigned int frontbuffer_bits)
 {
 	struct drm_device *dev = obj->base.dev;
 	struct drm_i915_private *dev_priv = to_i915(dev);
 
-	WARN_ON(!mutex_is_locked(&dev->struct_mutex));
-
 	if (origin == ORIGIN_CS) {
 		spin_lock(&dev_priv->fb_tracking.lock);
-		dev_priv->fb_tracking.busy_bits |= obj->frontbuffer_bits;
-		dev_priv->fb_tracking.flip_bits &= ~obj->frontbuffer_bits;
+		dev_priv->fb_tracking.busy_bits |= frontbuffer_bits;
+		dev_priv->fb_tracking.flip_bits &= ~frontbuffer_bits;
 		spin_unlock(&dev_priv->fb_tracking.lock);
 	}
 
-	intel_psr_invalidate(dev, obj->frontbuffer_bits);
-	intel_edp_drrs_invalidate(dev, obj->frontbuffer_bits);
-	intel_fbc_invalidate(dev_priv, obj->frontbuffer_bits, origin);
+	intel_psr_invalidate(dev, frontbuffer_bits);
+	intel_edp_drrs_invalidate(dev, frontbuffer_bits);
+	intel_fbc_invalidate(dev_priv, frontbuffer_bits, origin);
 }
 
 /**
@@ -139,15 +138,11 @@ static void intel_frontbuffer_flush(struct drm_device *dev,
  */
 void __intel_fb_obj_flush(struct drm_i915_gem_object *obj,
 			  bool retire,
-			  enum fb_op_origin origin)
+			  enum fb_op_origin origin,
+			  unsigned int frontbuffer_bits)
 {
 	struct drm_device *dev = obj->base.dev;
 	struct drm_i915_private *dev_priv = to_i915(dev);
-	unsigned frontbuffer_bits;
-
-	WARN_ON(!mutex_is_locked(&dev->struct_mutex));
-
-	frontbuffer_bits = obj->frontbuffer_bits;
 
 	if (retire) {
 		spin_lock(&dev_priv->fb_tracking.lock);
-- 
2.8.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 95+ messages in thread

* [PATCH 18/22] drm/i915: Use dev_priv consistently through the intel_frontbuffer interface
  2016-07-27 11:14 Getting to RCU and exporting fences Chris Wilson
                   ` (16 preceding siblings ...)
  2016-07-27 11:14 ` [PATCH 17/22] drm/i915: Use atomics to manipulate obj->frontbuffer_bits Chris Wilson
@ 2016-07-27 11:14 ` Chris Wilson
  2016-07-28  9:36   ` Joonas Lahtinen
  2016-07-28 10:06   ` Daniel Vetter
  2016-07-27 11:14 ` [PATCH 19/22] drm/i915: Move obj->active:5 to obj->flags Chris Wilson
                   ` (5 subsequent siblings)
  23 siblings, 2 replies; 95+ messages in thread
From: Chris Wilson @ 2016-07-27 11:14 UTC (permalink / raw)
  To: intel-gfx

Rather than a mismash of struct drm_device *dev and struct
drm_i915_private *dev_priv being used freely within a function, be
consistent and only pass along dev_priv.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/intel_display.c     | 10 ++++----
 drivers/gpu/drm/i915/intel_dp.c          | 14 +++++-------
 drivers/gpu/drm/i915/intel_drv.h         | 21 +++++++++--------
 drivers/gpu/drm/i915/intel_frontbuffer.c | 39 ++++++++++++--------------------
 drivers/gpu/drm/i915/intel_overlay.c     |  3 +--
 drivers/gpu/drm/i915/intel_psr.c         | 26 +++++++++------------
 6 files changed, 49 insertions(+), 64 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index 1c70f68328b4..9f15ced6fc7c 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -4565,12 +4565,11 @@ static void intel_post_plane_update(struct intel_crtc_state *old_crtc_state)
 	struct drm_atomic_state *old_state = old_crtc_state->base.state;
 	struct intel_crtc_state *pipe_config =
 		to_intel_crtc_state(crtc->base.state);
-	struct drm_device *dev = crtc->base.dev;
 	struct drm_plane *primary = crtc->base.primary;
 	struct drm_plane_state *old_pri_state =
 		drm_atomic_get_existing_plane_state(old_state, primary);
 
-	intel_frontbuffer_flip(dev, pipe_config->fb_bits);
+	intel_frontbuffer_flip(to_i915(crtc->base.dev), pipe_config->fb_bits);
 
 	crtc->wm.cxsr_allowed = true;
 
@@ -4693,7 +4692,7 @@ static void intel_crtc_disable_planes(struct drm_crtc *crtc, unsigned plane_mask
 	 * to compute the mask of flip planes precisely. For the time being
 	 * consider this a flip to a NULL plane.
 	 */
-	intel_frontbuffer_flip(dev, INTEL_FRONTBUFFER_ALL_MASK(pipe));
+	intel_frontbuffer_flip(to_i915(dev), INTEL_FRONTBUFFER_ALL_MASK(pipe));
 }
 
 static void ironlake_crtc_enable(struct drm_crtc *crtc)
@@ -10959,7 +10958,8 @@ static void intel_unpin_work_fn(struct work_struct *__work)
 
 	i915_gem_request_put(work->flip_queued_req);
 
-	intel_frontbuffer_flip_complete(dev, to_intel_plane(primary)->frontbuffer_bit);
+	intel_frontbuffer_flip_complete(to_i915(dev),
+					to_intel_plane(primary)->frontbuffer_bit);
 	intel_fbc_post_update(crtc);
 	drm_framebuffer_unreference(work->old_fb);
 
@@ -11734,7 +11734,7 @@ static int intel_crtc_page_flip(struct drm_crtc *crtc,
 			  to_intel_plane(primary)->frontbuffer_bit);
 	mutex_unlock(&dev->struct_mutex);
 
-	intel_frontbuffer_flip_prepare(dev,
+	intel_frontbuffer_flip_prepare(to_i915(dev),
 				       to_intel_plane(primary)->frontbuffer_bit);
 
 	trace_i915_flip_request(intel_crtc->plane, obj);
diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c
index 21b04c3eda41..2fd90d153fe7 100644
--- a/drivers/gpu/drm/i915/intel_dp.c
+++ b/drivers/gpu/drm/i915/intel_dp.c
@@ -5186,7 +5186,7 @@ unlock:
 
 /**
  * intel_edp_drrs_invalidate - Disable Idleness DRRS
- * @dev: DRM device
+ * @dev_priv: i915 device
  * @frontbuffer_bits: frontbuffer plane tracking bits
  *
  * This function gets called everytime rendering on the given planes start.
@@ -5194,10 +5194,9 @@ unlock:
  *
  * Dirty frontbuffers relevant to DRRS are tracked in busy_frontbuffer_bits.
  */
-void intel_edp_drrs_invalidate(struct drm_device *dev,
-		unsigned frontbuffer_bits)
+void intel_edp_drrs_invalidate(struct drm_i915_private *dev_priv,
+			       unsigned frontbuffer_bits)
 {
-	struct drm_i915_private *dev_priv = to_i915(dev);
 	struct drm_crtc *crtc;
 	enum pipe pipe;
 
@@ -5229,7 +5228,7 @@ void intel_edp_drrs_invalidate(struct drm_device *dev,
 
 /**
  * intel_edp_drrs_flush - Restart Idleness DRRS
- * @dev: DRM device
+ * @dev_priv: i915 device
  * @frontbuffer_bits: frontbuffer plane tracking bits
  *
  * This function gets called every time rendering on the given planes has
@@ -5239,10 +5238,9 @@ void intel_edp_drrs_invalidate(struct drm_device *dev,
  *
  * Dirty frontbuffers relevant to DRRS are tracked in busy_frontbuffer_bits.
  */
-void intel_edp_drrs_flush(struct drm_device *dev,
-		unsigned frontbuffer_bits)
+void intel_edp_drrs_flush(struct drm_i915_private *dev_priv,
+			  unsigned frontbuffer_bits)
 {
-	struct drm_i915_private *dev_priv = to_i915(dev);
 	struct drm_crtc *crtc;
 	enum pipe pipe;
 
diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h
index 5294039cf238..6f447d485db1 100644
--- a/drivers/gpu/drm/i915/intel_drv.h
+++ b/drivers/gpu/drm/i915/intel_drv.h
@@ -1135,11 +1135,11 @@ void intel_ddi_set_vc_payload_alloc(struct drm_crtc *crtc, bool state);
 uint32_t ddi_signal_levels(struct intel_dp *intel_dp);
 
 /* intel_frontbuffer.c */
-void intel_frontbuffer_flip_prepare(struct drm_device *dev,
+void intel_frontbuffer_flip_prepare(struct drm_i915_private *dev_priv,
 				    unsigned frontbuffer_bits);
-void intel_frontbuffer_flip_complete(struct drm_device *dev,
+void intel_frontbuffer_flip_complete(struct drm_i915_private *dev_priv,
 				     unsigned frontbuffer_bits);
-void intel_frontbuffer_flip(struct drm_device *dev,
+void intel_frontbuffer_flip(struct drm_i915_private *dev_priv,
 			    unsigned frontbuffer_bits);
 unsigned int intel_fb_align_height(struct drm_device *dev,
 				   unsigned int height,
@@ -1413,11 +1413,12 @@ uint32_t intel_dp_pack_aux(const uint8_t *src, int src_bytes);
 void intel_plane_destroy(struct drm_plane *plane);
 void intel_edp_drrs_enable(struct intel_dp *intel_dp);
 void intel_edp_drrs_disable(struct intel_dp *intel_dp);
-void intel_edp_drrs_invalidate(struct drm_device *dev,
-		unsigned frontbuffer_bits);
-void intel_edp_drrs_flush(struct drm_device *dev, unsigned frontbuffer_bits);
+void intel_edp_drrs_invalidate(struct drm_i915_private *dev_priv,
+			       unsigned frontbuffer_bits);
+void intel_edp_drrs_flush(struct drm_i915_private *dev_priv,
+			  unsigned frontbuffer_bits);
 bool intel_digital_port_connected(struct drm_i915_private *dev_priv,
-					 struct intel_digital_port *port);
+				  struct intel_digital_port *port);
 
 void
 intel_dp_program_link_training_pattern(struct intel_dp *intel_dp,
@@ -1590,13 +1591,13 @@ static inline void intel_backlight_device_unregister(struct intel_connector *con
 /* intel_psr.c */
 void intel_psr_enable(struct intel_dp *intel_dp);
 void intel_psr_disable(struct intel_dp *intel_dp);
-void intel_psr_invalidate(struct drm_device *dev,
+void intel_psr_invalidate(struct drm_i915_private *dev_priv,
 			  unsigned frontbuffer_bits);
-void intel_psr_flush(struct drm_device *dev,
+void intel_psr_flush(struct drm_i915_private *dev_priv,
 		     unsigned frontbuffer_bits,
 		     enum fb_op_origin origin);
 void intel_psr_init(struct drm_device *dev);
-void intel_psr_single_frame_update(struct drm_device *dev,
+void intel_psr_single_frame_update(struct drm_i915_private *dev_priv,
 				   unsigned frontbuffer_bits);
 
 /* intel_runtime_pm.c */
diff --git a/drivers/gpu/drm/i915/intel_frontbuffer.c b/drivers/gpu/drm/i915/intel_frontbuffer.c
index 636324da21c2..42f718bb584c 100644
--- a/drivers/gpu/drm/i915/intel_frontbuffer.c
+++ b/drivers/gpu/drm/i915/intel_frontbuffer.c
@@ -80,8 +80,7 @@ void __intel_fb_obj_invalidate(struct drm_i915_gem_object *obj,
 			       enum fb_op_origin origin,
 			       unsigned int frontbuffer_bits)
 {
-	struct drm_device *dev = obj->base.dev;
-	struct drm_i915_private *dev_priv = to_i915(dev);
+	struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
 
 	if (origin == ORIGIN_CS) {
 		spin_lock(&dev_priv->fb_tracking.lock);
@@ -90,8 +89,8 @@ void __intel_fb_obj_invalidate(struct drm_i915_gem_object *obj,
 		spin_unlock(&dev_priv->fb_tracking.lock);
 	}
 
-	intel_psr_invalidate(dev, frontbuffer_bits);
-	intel_edp_drrs_invalidate(dev, frontbuffer_bits);
+	intel_psr_invalidate(dev_priv, frontbuffer_bits);
+	intel_edp_drrs_invalidate(dev_priv, frontbuffer_bits);
 	intel_fbc_invalidate(dev_priv, frontbuffer_bits, origin);
 }
 
@@ -107,12 +106,10 @@ void __intel_fb_obj_invalidate(struct drm_i915_gem_object *obj,
  *
  * Can be called without any locks held.
  */
-static void intel_frontbuffer_flush(struct drm_device *dev,
+static void intel_frontbuffer_flush(struct drm_i915_private *dev_priv,
 				    unsigned frontbuffer_bits,
 				    enum fb_op_origin origin)
 {
-	struct drm_i915_private *dev_priv = to_i915(dev);
-
 	/* Delay flushing when rings are still busy.*/
 	spin_lock(&dev_priv->fb_tracking.lock);
 	frontbuffer_bits &= ~dev_priv->fb_tracking.busy_bits;
@@ -121,8 +118,8 @@ static void intel_frontbuffer_flush(struct drm_device *dev,
 	if (frontbuffer_bits == 0)
 		return;
 
-	intel_edp_drrs_flush(dev, frontbuffer_bits);
-	intel_psr_flush(dev, frontbuffer_bits, origin);
+	intel_edp_drrs_flush(dev_priv, frontbuffer_bits);
+	intel_psr_flush(dev_priv, frontbuffer_bits, origin);
 	intel_fbc_flush(dev_priv, frontbuffer_bits, origin);
 }
 
@@ -141,8 +138,7 @@ void __intel_fb_obj_flush(struct drm_i915_gem_object *obj,
 			  enum fb_op_origin origin,
 			  unsigned int frontbuffer_bits)
 {
-	struct drm_device *dev = obj->base.dev;
-	struct drm_i915_private *dev_priv = to_i915(dev);
+	struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
 
 	if (retire) {
 		spin_lock(&dev_priv->fb_tracking.lock);
@@ -153,7 +149,7 @@ void __intel_fb_obj_flush(struct drm_i915_gem_object *obj,
 	}
 
 	if (frontbuffer_bits)
-		intel_frontbuffer_flush(dev, frontbuffer_bits, origin);
+		intel_frontbuffer_flush(dev_priv, frontbuffer_bits, origin);
 }
 
 /**
@@ -168,18 +164,16 @@ void __intel_fb_obj_flush(struct drm_i915_gem_object *obj,
  *
  * Can be called without any locks held.
  */
-void intel_frontbuffer_flip_prepare(struct drm_device *dev,
+void intel_frontbuffer_flip_prepare(struct drm_i915_private *dev_priv,
 				    unsigned frontbuffer_bits)
 {
-	struct drm_i915_private *dev_priv = to_i915(dev);
-
 	spin_lock(&dev_priv->fb_tracking.lock);
 	dev_priv->fb_tracking.flip_bits |= frontbuffer_bits;
 	/* Remove stale busy bits due to the old buffer. */
 	dev_priv->fb_tracking.busy_bits &= ~frontbuffer_bits;
 	spin_unlock(&dev_priv->fb_tracking.lock);
 
-	intel_psr_single_frame_update(dev, frontbuffer_bits);
+	intel_psr_single_frame_update(dev_priv, frontbuffer_bits);
 }
 
 /**
@@ -192,11 +186,9 @@ void intel_frontbuffer_flip_prepare(struct drm_device *dev,
  *
  * Can be called without any locks held.
  */
-void intel_frontbuffer_flip_complete(struct drm_device *dev,
+void intel_frontbuffer_flip_complete(struct drm_i915_private *dev_priv,
 				     unsigned frontbuffer_bits)
 {
-	struct drm_i915_private *dev_priv = to_i915(dev);
-
 	spin_lock(&dev_priv->fb_tracking.lock);
 	/* Mask any cancelled flips. */
 	frontbuffer_bits &= dev_priv->fb_tracking.flip_bits;
@@ -204,7 +196,8 @@ void intel_frontbuffer_flip_complete(struct drm_device *dev,
 	spin_unlock(&dev_priv->fb_tracking.lock);
 
 	if (frontbuffer_bits)
-		intel_frontbuffer_flush(dev, frontbuffer_bits, ORIGIN_FLIP);
+		intel_frontbuffer_flush(dev_priv,
+					frontbuffer_bits, ORIGIN_FLIP);
 }
 
 /**
@@ -218,15 +211,13 @@ void intel_frontbuffer_flip_complete(struct drm_device *dev,
  *
  * Can be called without any locks held.
  */
-void intel_frontbuffer_flip(struct drm_device *dev,
+void intel_frontbuffer_flip(struct drm_i915_private *dev_priv,
 			    unsigned frontbuffer_bits)
 {
-	struct drm_i915_private *dev_priv = to_i915(dev);
-
 	spin_lock(&dev_priv->fb_tracking.lock);
 	/* Remove stale busy bits due to the old buffer. */
 	dev_priv->fb_tracking.busy_bits &= ~frontbuffer_bits;
 	spin_unlock(&dev_priv->fb_tracking.lock);
 
-	intel_frontbuffer_flush(dev, frontbuffer_bits, ORIGIN_FLIP);
+	intel_frontbuffer_flush(dev_priv, frontbuffer_bits, ORIGIN_FLIP);
 }
diff --git a/drivers/gpu/drm/i915/intel_overlay.c b/drivers/gpu/drm/i915/intel_overlay.c
index 217fefc49bf9..ad08df49ed48 100644
--- a/drivers/gpu/drm/i915/intel_overlay.c
+++ b/drivers/gpu/drm/i915/intel_overlay.c
@@ -839,8 +839,7 @@ static int intel_overlay_do_put_image(struct intel_overlay *overlay,
 	overlay->old_vid_bo = overlay->vid_bo;
 	overlay->vid_bo = new_bo;
 
-	intel_frontbuffer_flip(&dev_priv->drm,
-			       INTEL_FRONTBUFFER_OVERLAY(pipe));
+	intel_frontbuffer_flip(dev_priv, INTEL_FRONTBUFFER_OVERLAY(pipe));
 
 	return 0;
 
diff --git a/drivers/gpu/drm/i915/intel_psr.c b/drivers/gpu/drm/i915/intel_psr.c
index 68bd0bb34817..adf2ce0f38c0 100644
--- a/drivers/gpu/drm/i915/intel_psr.c
+++ b/drivers/gpu/drm/i915/intel_psr.c
@@ -628,9 +628,8 @@ unlock:
 	mutex_unlock(&dev_priv->psr.lock);
 }
 
-static void intel_psr_exit(struct drm_device *dev)
+static void intel_psr_exit(struct drm_i915_private *dev_priv)
 {
-	struct drm_i915_private *dev_priv = to_i915(dev);
 	struct intel_dp *intel_dp = dev_priv->psr.enabled;
 	struct drm_crtc *crtc = dp_to_dig_port(intel_dp)->base.base.crtc;
 	enum pipe pipe = to_intel_crtc(crtc)->pipe;
@@ -639,7 +638,7 @@ static void intel_psr_exit(struct drm_device *dev)
 	if (!dev_priv->psr.active)
 		return;
 
-	if (HAS_DDI(dev)) {
+	if (HAS_DDI(dev_priv)) {
 		val = I915_READ(EDP_PSR_CTL);
 
 		WARN_ON(!(val & EDP_PSR_ENABLE));
@@ -674,7 +673,7 @@ static void intel_psr_exit(struct drm_device *dev)
 
 /**
  * intel_psr_single_frame_update - Single Frame Update
- * @dev: DRM device
+ * @dev_priv: i915 device
  * @frontbuffer_bits: frontbuffer plane tracking bits
  *
  * Some platforms support a single frame update feature that is used to
@@ -682,10 +681,9 @@ static void intel_psr_exit(struct drm_device *dev)
  * So far it is only implemented for Valleyview and Cherryview because
  * hardware requires this to be done before a page flip.
  */
-void intel_psr_single_frame_update(struct drm_device *dev,
+void intel_psr_single_frame_update(struct drm_i915_private *dev_priv,
 				   unsigned frontbuffer_bits)
 {
-	struct drm_i915_private *dev_priv = to_i915(dev);
 	struct drm_crtc *crtc;
 	enum pipe pipe;
 	u32 val;
@@ -694,7 +692,7 @@ void intel_psr_single_frame_update(struct drm_device *dev,
 	 * Single frame update is already supported on BDW+ but it requires
 	 * many W/A and it isn't really needed.
 	 */
-	if (!IS_VALLEYVIEW(dev) && !IS_CHERRYVIEW(dev))
+	if (!IS_VALLEYVIEW(dev_priv) && !IS_CHERRYVIEW(dev_priv))
 		return;
 
 	mutex_lock(&dev_priv->psr.lock);
@@ -720,7 +718,7 @@ void intel_psr_single_frame_update(struct drm_device *dev,
 
 /**
  * intel_psr_invalidate - Invalidade PSR
- * @dev: DRM device
+ * @dev_priv: i915 device
  * @frontbuffer_bits: frontbuffer plane tracking bits
  *
  * Since the hardware frontbuffer tracking has gaps we need to integrate
@@ -730,10 +728,9 @@ void intel_psr_single_frame_update(struct drm_device *dev,
  *
  * Dirty frontbuffers relevant to PSR are tracked in busy_frontbuffer_bits."
  */
-void intel_psr_invalidate(struct drm_device *dev,
+void intel_psr_invalidate(struct drm_i915_private *dev_priv,
 			  unsigned frontbuffer_bits)
 {
-	struct drm_i915_private *dev_priv = to_i915(dev);
 	struct drm_crtc *crtc;
 	enum pipe pipe;
 
@@ -750,14 +747,14 @@ void intel_psr_invalidate(struct drm_device *dev,
 	dev_priv->psr.busy_frontbuffer_bits |= frontbuffer_bits;
 
 	if (frontbuffer_bits)
-		intel_psr_exit(dev);
+		intel_psr_exit(dev_priv);
 
 	mutex_unlock(&dev_priv->psr.lock);
 }
 
 /**
  * intel_psr_flush - Flush PSR
- * @dev: DRM device
+ * @dev_priv: i915 device
  * @frontbuffer_bits: frontbuffer plane tracking bits
  * @origin: which operation caused the flush
  *
@@ -768,10 +765,9 @@ void intel_psr_invalidate(struct drm_device *dev,
  *
  * Dirty frontbuffers relevant to PSR are tracked in busy_frontbuffer_bits.
  */
-void intel_psr_flush(struct drm_device *dev,
+void intel_psr_flush(struct drm_i915_private *dev_priv,
 		     unsigned frontbuffer_bits, enum fb_op_origin origin)
 {
-	struct drm_i915_private *dev_priv = to_i915(dev);
 	struct drm_crtc *crtc;
 	enum pipe pipe;
 
@@ -789,7 +785,7 @@ void intel_psr_flush(struct drm_device *dev,
 
 	/* By definition flush = invalidate + flush */
 	if (frontbuffer_bits)
-		intel_psr_exit(dev);
+		intel_psr_exit(dev_priv);
 
 	if (!dev_priv->psr.active && !dev_priv->psr.busy_frontbuffer_bits)
 		if (!work_busy(&dev_priv->psr.work.work))
-- 
2.8.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 95+ messages in thread

* [PATCH 19/22] drm/i915: Move obj->active:5 to obj->flags
  2016-07-27 11:14 Getting to RCU and exporting fences Chris Wilson
                   ` (17 preceding siblings ...)
  2016-07-27 11:14 ` [PATCH 18/22] drm/i915: Use dev_priv consistently through the intel_frontbuffer interface Chris Wilson
@ 2016-07-27 11:14 ` Chris Wilson
  2016-07-29  7:40   ` Joonas Lahtinen
  2016-07-27 11:14 ` [PATCH 20/22] drm/i915: Move i915_gem_object_wait_rendering() Chris Wilson
                   ` (4 subsequent siblings)
  23 siblings, 1 reply; 95+ messages in thread
From: Chris Wilson @ 2016-07-27 11:14 UTC (permalink / raw)
  To: intel-gfx

We are motivated to avoid using a bitfield for obj->active for a couple
of reasons. Firstly, we wish to document our lockless read of obj->active
using READ_ONCE inside i915_gem_busy_ioctl() and that requires an
integral type (i.e. not a bitfield). Secondly, gcc produces abysmal code
when presented with a bitfield and that shows up high on the profiles of
request tracking (mainly due to excess memory traffic as it converts
the bitfield to a register and back and generates frequent AGI in the
process).

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_debugfs.c        |  2 +-
 drivers/gpu/drm/i915/i915_drv.h            | 31 +++++++++++++++++++++++++++++-
 drivers/gpu/drm/i915/i915_gem.c            | 16 +++++++--------
 drivers/gpu/drm/i915/i915_gem_execbuffer.c | 10 +++++-----
 drivers/gpu/drm/i915/i915_gem_shrinker.c   |  5 +++--
 drivers/gpu/drm/i915/i915_gem_userptr.c    |  2 +-
 6 files changed, 48 insertions(+), 18 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index 10a346237795..920a2de95cd5 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -91,7 +91,7 @@ static int i915_capabilities(struct seq_file *m, void *data)
 
 static char get_active_flag(struct drm_i915_gem_object *obj)
 {
-	return obj->active ? '*' : ' ';
+	return i915_gem_object_is_active(obj) ? '*' : ' ';
 }
 
 static char get_pin_flag(struct drm_i915_gem_object *obj)
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index b6b9a1f78238..309000af003d 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2155,12 +2155,16 @@ struct drm_i915_gem_object {
 
 	struct list_head batch_pool_link;
 
+	unsigned long flags;
 	/**
 	 * This is set if the object is on the active lists (has pending
 	 * rendering and so a non-zero seqno), and is not set if it i s on
 	 * inactive (ready to be unbound) list.
 	 */
-	unsigned int active:I915_NUM_ENGINES;
+#define I915_BO_ACTIVE_SHIFT 0
+#define I915_BO_ACTIVE_MASK ((1 << I915_NUM_ENGINES) - 1)
+#define __I915_BO_ACTIVE(bo) \
+	((READ_ONCE((bo)->flags) >> I915_BO_ACTIVE_SHIFT) & I915_BO_ACTIVE_MASK)
 
 	/**
 	 * This is set if the object has been written to since last bound
@@ -2325,6 +2329,31 @@ i915_gem_object_has_struct_page(const struct drm_i915_gem_object *obj)
 	return obj->ops->flags & I915_GEM_OBJECT_HAS_STRUCT_PAGE;
 }
 
+static inline unsigned long
+i915_gem_object_is_active(const struct drm_i915_gem_object *obj)
+{
+	return (obj->flags >> I915_BO_ACTIVE_SHIFT) & I915_BO_ACTIVE_MASK;
+}
+
+static inline void
+i915_gem_object_set_active(struct drm_i915_gem_object *obj, int engine)
+{
+	obj->flags |= 1 << (engine + I915_BO_ACTIVE_SHIFT);
+}
+
+static inline void
+i915_gem_object_unset_active(struct drm_i915_gem_object *obj, int engine)
+{
+	obj->flags &= ~(1 << (engine + I915_BO_ACTIVE_SHIFT));
+}
+
+static inline bool
+i915_gem_object_has_active_engine(const struct drm_i915_gem_object *obj,
+				  int engine)
+{
+	return obj->flags & (1 << (engine + I915_BO_ACTIVE_SHIFT));
+}
+
 /*
  * Optimised SGL iterator for GEM objects
  */
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index bc5bc5ccdde0..ca9741525bf4 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -1354,7 +1354,7 @@ i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj,
 
 	if (!readonly) {
 		active = obj->last_read;
-		active_mask = obj->active;
+		active_mask = i915_gem_object_is_active(obj);
 	} else {
 		active_mask = 1;
 		active = &obj->last_write;
@@ -1398,7 +1398,7 @@ i915_gem_object_wait_rendering__nonblocking(struct drm_i915_gem_object *obj,
 	BUG_ON(!mutex_is_locked(&dev->struct_mutex));
 	BUG_ON(!dev_priv->mm.interruptible);
 
-	active_mask = obj->active;
+	active_mask = i915_gem_object_is_active(obj);
 	if (!active_mask)
 		return 0;
 
@@ -2362,10 +2362,10 @@ i915_gem_object_retire__read(struct i915_gem_active *active,
 	struct drm_i915_gem_object *obj =
 		container_of(active, struct drm_i915_gem_object, last_read[idx]);
 
-	GEM_BUG_ON((obj->active & (1 << idx)) == 0);
+	GEM_BUG_ON(!i915_gem_object_has_active_engine(obj, idx));
 
-	obj->active &= ~(1 << idx);
-	if (obj->active)
+	i915_gem_object_unset_active(obj, idx);
+	if (i915_gem_object_is_active(obj))
 		return;
 
 	/* Bump our place on the bound list to keep it roughly in LRU order
@@ -2669,7 +2669,7 @@ i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
 		return -ENOENT;
 	}
 
-	if (!obj->active)
+	if (!i915_gem_object_is_active(obj))
 		goto out;
 
 	for (i = 0; i < I915_NUM_ENGINES; i++) {
@@ -2757,7 +2757,7 @@ i915_gem_object_sync(struct drm_i915_gem_object *obj,
 
 	lockdep_assert_held(&obj->base.dev->struct_mutex);
 
-	active_mask = obj->active;
+	active_mask = i915_gem_object_is_active(obj);
 	if (!active_mask)
 		return 0;
 
@@ -3801,7 +3801,7 @@ i915_gem_busy_ioctl(struct drm_device *dev, void *data,
 	 * become non-busy without any further actions.
 	 */
 	args->busy = 0;
-	if (obj->active) {
+	if (i915_gem_object_is_active(obj)) {
 		struct drm_i915_gem_request *req;
 		int i;
 
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 0b5b0020916b..0d28703d991a 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -433,7 +433,7 @@ relocate_entry_clflush(struct drm_i915_gem_object *obj,
 
 static bool object_is_idle(struct drm_i915_gem_object *obj)
 {
-	unsigned long active = obj->active;
+	unsigned long active = i915_gem_object_is_active(obj);
 	int idx;
 
 	for_each_active(active, idx) {
@@ -993,7 +993,7 @@ static int
 i915_gem_execbuffer_move_to_gpu(struct drm_i915_gem_request *req,
 				struct list_head *vmas)
 {
-	const unsigned other_rings = ~intel_engine_flag(req->engine);
+	const unsigned int other_rings = (~intel_engine_flag(req->engine) & I915_BO_ACTIVE_MASK) << I915_BO_ACTIVE_SHIFT;
 	struct i915_vma *vma;
 	uint32_t flush_domains = 0;
 	bool flush_chipset = false;
@@ -1002,7 +1002,7 @@ i915_gem_execbuffer_move_to_gpu(struct drm_i915_gem_request *req,
 	list_for_each_entry(vma, vmas, exec_list) {
 		struct drm_i915_gem_object *obj = vma->obj;
 
-		if (obj->active & other_rings) {
+		if (obj->flags & other_rings) {
 			ret = i915_gem_object_sync(obj, req);
 			if (ret)
 				return ret;
@@ -1164,9 +1164,9 @@ void i915_vma_move_to_active(struct i915_vma *vma,
 	 * add the active reference first and queue for it to be dropped
 	 * *last*.
 	 */
-	if (obj->active == 0)
+	if (!i915_gem_object_is_active(obj))
 		i915_gem_object_get(obj);
-	obj->active |= 1 << idx;
+	i915_gem_object_set_active(obj, idx);
 	i915_gem_active_set(&obj->last_read[idx], req);
 
 	if (flags & EXEC_OBJECT_WRITE) {
diff --git a/drivers/gpu/drm/i915/i915_gem_shrinker.c b/drivers/gpu/drm/i915/i915_gem_shrinker.c
index 673999acc1f9..33f8dcb9b8c4 100644
--- a/drivers/gpu/drm/i915/i915_gem_shrinker.c
+++ b/drivers/gpu/drm/i915/i915_gem_shrinker.c
@@ -168,7 +168,8 @@ i915_gem_shrink(struct drm_i915_private *dev_priv,
 			    !is_vmalloc_addr(obj->mapping))
 				continue;
 
-			if ((flags & I915_SHRINK_ACTIVE) == 0 && obj->active)
+			if ((flags & I915_SHRINK_ACTIVE) == 0 &&
+			    i915_gem_object_is_active(obj))
 				continue;
 
 			if (!can_release_pages(obj))
@@ -253,7 +254,7 @@ i915_gem_shrinker_count(struct shrinker *shrinker, struct shrink_control *sc)
 			count += obj->base.size >> PAGE_SHIFT;
 
 	list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) {
-		if (!obj->active && can_release_pages(obj))
+		if (!i915_gem_object_is_active(obj) && can_release_pages(obj))
 			count += obj->base.size >> PAGE_SHIFT;
 	}
 
diff --git a/drivers/gpu/drm/i915/i915_gem_userptr.c b/drivers/gpu/drm/i915/i915_gem_userptr.c
index 651a84ba840c..53f64fcc89ef 100644
--- a/drivers/gpu/drm/i915/i915_gem_userptr.c
+++ b/drivers/gpu/drm/i915/i915_gem_userptr.c
@@ -67,7 +67,7 @@ static void wait_rendering(struct drm_i915_gem_object *obj)
 	struct drm_i915_gem_request *requests[I915_NUM_ENGINES];
 	int i, n;
 
-	if (!obj->active)
+	if (!i915_gem_object_is_active(obj))
 		return;
 
 	n = 0;
-- 
2.8.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 95+ messages in thread

* [PATCH 20/22] drm/i915: Move i915_gem_object_wait_rendering()
  2016-07-27 11:14 Getting to RCU and exporting fences Chris Wilson
                   ` (18 preceding siblings ...)
  2016-07-27 11:14 ` [PATCH 19/22] drm/i915: Move obj->active:5 to obj->flags Chris Wilson
@ 2016-07-27 11:14 ` Chris Wilson
  2016-07-28  9:37   ` Joonas Lahtinen
  2016-07-27 11:14 ` [PATCH 21/22] drm/i915: Enable lockless lookup of request tracking via RCU Chris Wilson
                   ` (3 subsequent siblings)
  23 siblings, 1 reply; 95+ messages in thread
From: Chris Wilson @ 2016-07-27 11:14 UTC (permalink / raw)
  To: intel-gfx

Just move it earlier so that we can use the companion nonblocking
version in a couple of more callsites without having to add a forward
declaration.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_gem.c | 202 ++++++++++++++++++++--------------------
 1 file changed, 101 insertions(+), 101 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index ca9741525bf4..54d8a3863d11 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -301,6 +301,107 @@ i915_gem_object_unbind(struct drm_i915_gem_object *obj)
 	return ret;
 }
 
+/**
+ * Ensures that all rendering to the object has completed and the object is
+ * safe to unbind from the GTT or access from the CPU.
+ */
+int
+i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj,
+			       bool readonly)
+{
+	struct reservation_object *resv;
+	struct i915_gem_active *active;
+	unsigned long active_mask;
+	int idx;
+
+	lockdep_assert_held(&obj->base.dev->struct_mutex);
+
+	if (!readonly) {
+		active = obj->last_read;
+		active_mask = i915_gem_object_is_active(obj);
+	} else {
+		active_mask = 1;
+		active = &obj->last_write;
+	}
+
+	for_each_active(active_mask, idx) {
+		int ret;
+
+		ret = i915_gem_active_wait(&active[idx],
+					   &obj->base.dev->struct_mutex);
+		if (ret)
+			return ret;
+	}
+
+	resv = i915_gem_object_get_dmabuf_resv(obj);
+	if (resv) {
+		long err;
+
+		err = reservation_object_wait_timeout_rcu(resv, !readonly, true,
+							  MAX_SCHEDULE_TIMEOUT);
+		if (err < 0)
+			return err;
+	}
+
+	return 0;
+}
+
+/* A nonblocking variant of the above wait. This is a highly dangerous routine
+ * as the object state may change during this call.
+ */
+static __must_check int
+i915_gem_object_wait_rendering__nonblocking(struct drm_i915_gem_object *obj,
+					    struct intel_rps_client *rps,
+					    bool readonly)
+{
+	struct drm_device *dev = obj->base.dev;
+	struct drm_i915_gem_request *requests[I915_NUM_ENGINES];
+	struct i915_gem_active *active;
+	unsigned long active_mask;
+	int ret, i, n = 0;
+
+	lockdep_assert_held(&dev->struct_mutex);
+	GEM_BUG_ON(!to_i915(dev)->mm.interruptible);
+
+	active_mask = i915_gem_object_is_active(obj);
+	if (!active_mask)
+		return 0;
+
+	if (!readonly) {
+		active = obj->last_read;
+	} else {
+		active_mask = 1;
+		active = &obj->last_write;
+	}
+
+	for_each_active(active_mask, i) {
+		struct drm_i915_gem_request *req;
+
+		req = i915_gem_active_get(&active[i],
+					  &obj->base.dev->struct_mutex);
+		if (req)
+			requests[n++] = req;
+	}
+
+	mutex_unlock(&dev->struct_mutex);
+	ret = 0;
+	for (i = 0; ret == 0 && i < n; i++)
+		ret = i915_wait_request(requests[i], true, NULL, rps);
+	mutex_lock(&dev->struct_mutex);
+
+	for (i = 0; i < n; i++)
+		i915_gem_request_put(requests[i]);
+
+	return ret;
+}
+
+static struct intel_rps_client *to_rps_client(struct drm_file *file)
+{
+	struct drm_i915_file_private *fpriv = file->driver_priv;
+
+	return &fpriv->rps;
+}
+
 int
 i915_gem_object_attach_phys(struct drm_i915_gem_object *obj,
 			    int align)
@@ -1335,107 +1436,6 @@ put_rpm:
 	return ret;
 }
 
-/**
- * Ensures that all rendering to the object has completed and the object is
- * safe to unbind from the GTT or access from the CPU.
- * @obj: i915 gem object
- * @readonly: waiting for read access or write
- */
-int
-i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj,
-			       bool readonly)
-{
-	struct reservation_object *resv;
-	struct i915_gem_active *active;
-	unsigned long active_mask;
-	int idx, ret;
-
-	lockdep_assert_held(&obj->base.dev->struct_mutex);
-
-	if (!readonly) {
-		active = obj->last_read;
-		active_mask = i915_gem_object_is_active(obj);
-	} else {
-		active_mask = 1;
-		active = &obj->last_write;
-	}
-
-	for_each_active(active_mask, idx) {
-		ret = i915_gem_active_wait(&active[idx],
-					   &obj->base.dev->struct_mutex);
-		if (ret)
-			return ret;
-	}
-
-	resv = i915_gem_object_get_dmabuf_resv(obj);
-	if (resv) {
-		long err;
-
-		err = reservation_object_wait_timeout_rcu(resv, !readonly, true,
-							  MAX_SCHEDULE_TIMEOUT);
-		if (err < 0)
-			return err;
-	}
-
-	return 0;
-}
-
-/* A nonblocking variant of the above wait. This is a highly dangerous routine
- * as the object state may change during this call.
- */
-static __must_check int
-i915_gem_object_wait_rendering__nonblocking(struct drm_i915_gem_object *obj,
-					    struct intel_rps_client *rps,
-					    bool readonly)
-{
-	struct drm_device *dev = obj->base.dev;
-	struct drm_i915_private *dev_priv = to_i915(dev);
-	struct drm_i915_gem_request *requests[I915_NUM_ENGINES];
-	struct i915_gem_active *active;
-	unsigned long active_mask;
-	int ret, i, n = 0;
-
-	BUG_ON(!mutex_is_locked(&dev->struct_mutex));
-	BUG_ON(!dev_priv->mm.interruptible);
-
-	active_mask = i915_gem_object_is_active(obj);
-	if (!active_mask)
-		return 0;
-
-	if (!readonly) {
-		active = obj->last_read;
-	} else {
-		active_mask = 1;
-		active = &obj->last_write;
-	}
-
-	for_each_active(active_mask, i) {
-		struct drm_i915_gem_request *req;
-
-		req = i915_gem_active_get(&active[i],
-					  &obj->base.dev->struct_mutex);
-		if (req)
-			requests[n++] = req;
-	}
-
-	mutex_unlock(&dev->struct_mutex);
-	ret = 0;
-	for (i = 0; ret == 0 && i < n; i++)
-		ret = i915_wait_request(requests[i], true, NULL, rps);
-	mutex_lock(&dev->struct_mutex);
-
-	for (i = 0; i < n; i++)
-		i915_gem_request_put(requests[i]);
-
-	return ret;
-}
-
-static struct intel_rps_client *to_rps_client(struct drm_file *file)
-{
-	struct drm_i915_file_private *fpriv = file->driver_priv;
-	return &fpriv->rps;
-}
-
 static enum fb_op_origin
 write_origin(struct drm_i915_gem_object *obj, unsigned domain)
 {
-- 
2.8.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 95+ messages in thread

* [PATCH 21/22] drm/i915: Enable lockless lookup of request tracking via RCU
  2016-07-27 11:14 Getting to RCU and exporting fences Chris Wilson
                   ` (19 preceding siblings ...)
  2016-07-27 11:14 ` [PATCH 20/22] drm/i915: Move i915_gem_object_wait_rendering() Chris Wilson
@ 2016-07-27 11:14 ` Chris Wilson
  2016-07-28 10:23   ` Daniel Vetter
  2016-07-27 11:15 ` [PATCH 22/22] drm/i915: Export our request as a dma-buf fence on the reservation object Chris Wilson
                   ` (2 subsequent siblings)
  23 siblings, 1 reply; 95+ messages in thread
From: Chris Wilson @ 2016-07-27 11:14 UTC (permalink / raw)
  To: intel-gfx; +Cc: Goel, Akash, Josh Triplett

If we enable RCU for the requests (providing a grace period where we can
inspect a "dead" request before it is freed), we can allow callers to
carefully perform lockless lookup of an active request.

However, by enabling deferred freeing of requests, we can potentially
hog a lot of memory when dealing with tens of thousands of requests per
second - with a quick insertion of a synchronize_rcu() inside our
shrinker callback, that issue disappears.

v2: Currently, it is our responsibility to handle reclaim i.e. to avoid
hogging memory with the delayed slab frees. At the moment, we wait for a
grace period in the shrinker, and block for all RCU callbacks on oom.
Suggested alternatives focus on flushing our RCU callback when we have a
certain number of outstanding request frees, and blocking on that flush
after a second high watermark. (So rather than wait for the system to
run out of memory, we stop issuing requests - both are nondeterministic.)

Paul E. McKenney wrote:

Another approach is synchronize_rcu() after some largish number of
requests.  The advantage of this approach is that it throttles the
production of callbacks at the source.  The corresponding disadvantage
is that it slows things up.

Another approach is to use call_rcu(), but if the previous call_rcu()
is still in flight, block waiting for it.  Yet another approach is
the get_state_synchronize_rcu() / cond_synchronize_rcu() pair.  The
idea is to do something like this:

        cond_synchronize_rcu(cookie);
        cookie = get_state_synchronize_rcu();

You would of course do an initial get_state_synchronize_rcu() to
get things going.  This would not block unless there was less than
one grace period's worth of time between invocations.  But this
assumes a busy system, where there is almost always a grace period
in flight.  But you can make that happen as follows:

        cond_synchronize_rcu(cookie);
        cookie = get_state_synchronize_rcu();
        call_rcu(&my_rcu_head, noop_function);

Note that you need additional code to make sure that the old callback
has completed before doing a new one.  Setting and clearing a flag
with appropriate memory ordering control suffices (e.g,. smp_load_acquire()
and smp_store_release()).

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
Cc: "Goel, Akash" <akash.goel@intel.com>
Cc: Josh Triplett <josh@joshtriplett.org>
---
 drivers/gpu/drm/i915/i915_gem.c          |   7 +-
 drivers/gpu/drm/i915/i915_gem_request.c  |   2 +-
 drivers/gpu/drm/i915/i915_gem_request.h  | 114 +++++++++++++++++++++++++++++--
 drivers/gpu/drm/i915/i915_gem_shrinker.c |  15 ++--
 4 files changed, 126 insertions(+), 12 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 54d8a3863d11..0c546f8099d9 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -4421,7 +4421,9 @@ i915_gem_load_init(struct drm_device *dev)
 	dev_priv->requests =
 		kmem_cache_create("i915_gem_request",
 				  sizeof(struct drm_i915_gem_request), 0,
-				  SLAB_HWCACHE_ALIGN,
+				  SLAB_HWCACHE_ALIGN |
+				  SLAB_RECLAIM_ACCOUNT |
+				  SLAB_DESTROY_BY_RCU,
 				  NULL);
 
 	INIT_LIST_HEAD(&dev_priv->context_list);
@@ -4457,6 +4459,9 @@ void i915_gem_load_cleanup(struct drm_device *dev)
 	kmem_cache_destroy(dev_priv->requests);
 	kmem_cache_destroy(dev_priv->vmas);
 	kmem_cache_destroy(dev_priv->objects);
+
+	/* And ensure that our DESTROY_BY_RCU slabs are truly destroyed */
+	rcu_barrier();
 }
 
 int i915_gem_freeze_late(struct drm_i915_private *dev_priv)
diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c
index 3395c955a532..bcc1369c0693 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.c
+++ b/drivers/gpu/drm/i915/i915_gem_request.c
@@ -190,7 +190,7 @@ static void i915_gem_request_retire(struct drm_i915_gem_request *request)
 		prefetchw(next);
 
 		INIT_LIST_HEAD(&active->link);
-		active->__request = NULL;
+		RCU_INIT_POINTER(active->__request, NULL);
 
 		active->retire(active, request);
 	}
diff --git a/drivers/gpu/drm/i915/i915_gem_request.h b/drivers/gpu/drm/i915/i915_gem_request.h
index 2eec0cac1e9f..bb03f4440b0f 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.h
+++ b/drivers/gpu/drm/i915/i915_gem_request.h
@@ -183,6 +183,12 @@ i915_gem_request_get(struct drm_i915_gem_request *req)
 	return to_request(fence_get(&req->fence));
 }
 
+static inline struct drm_i915_gem_request *
+i915_gem_request_get_rcu(struct drm_i915_gem_request *req)
+{
+	return to_request(fence_get_rcu(&req->fence));
+}
+
 static inline void
 i915_gem_request_put(struct drm_i915_gem_request *req)
 {
@@ -286,7 +292,7 @@ typedef void (*i915_gem_retire_fn)(struct i915_gem_active *,
 				   struct drm_i915_gem_request *);
 
 struct i915_gem_active {
-	struct drm_i915_gem_request *__request;
+	struct drm_i915_gem_request __rcu *__request;
 	struct list_head link;
 	i915_gem_retire_fn retire;
 };
@@ -323,13 +329,19 @@ i915_gem_active_set(struct i915_gem_active *active,
 		    struct drm_i915_gem_request *request)
 {
 	list_move(&active->link, &request->active_list);
-	active->__request = request;
+	rcu_assign_pointer(active->__request, request);
 }
 
 static inline struct drm_i915_gem_request *
 __i915_gem_active_peek(const struct i915_gem_active *active)
 {
-	return active->__request;
+	/* Inside the error capture (running with the driver in an unknown
+	 * state), we want to bend the rules slightly (a lot).
+	 *
+	 * Work is in progress to make it safer, in the meantime this keeps
+	 * the known issue from spamming the logs.
+	 */
+	return rcu_dereference_protected(active->__request, 1);
 }
 
 /**
@@ -345,7 +357,29 @@ i915_gem_active_peek(const struct i915_gem_active *active, struct mutex *mutex)
 {
 	struct drm_i915_gem_request *request;
 
-	request = active->__request;
+	request = rcu_dereference_protected(active->__request,
+					    lockdep_is_held(mutex));
+	if (!request || i915_gem_request_completed(request))
+		return NULL;
+
+	return request;
+}
+
+/**
+ * i915_gem_active_peek_rcu - report the active request being monitored
+ * @active - the active tracker
+ *
+ * i915_gem_active_peek_rcu() returns the current request being tracked if
+ * still active, or NULL. It does not obtain a reference on the request
+ * for the caller, and inspection of the request is only valid under
+ * the RCU lock.
+ */
+static inline struct drm_i915_gem_request *
+i915_gem_active_peek_rcu(const struct i915_gem_active *active)
+{
+	struct drm_i915_gem_request *request;
+
+	request = rcu_dereference(active->__request);
 	if (!request || i915_gem_request_completed(request))
 		return NULL;
 
@@ -366,6 +400,72 @@ i915_gem_active_get(const struct i915_gem_active *active, struct mutex *mutex)
 }
 
 /**
+ * i915_gem_active_get_rcu - return a reference to the active request
+ * @active - the active tracker
+ *
+ * i915_gem_active_get() returns a reference to the active request, or NULL
+ * if the active tracker is idle. The caller must hold the RCU read lock.
+ */
+static inline struct drm_i915_gem_request *
+i915_gem_active_get_rcu(const struct i915_gem_active *active)
+{
+	/* Performing a lockless retrieval of the active request is super
+	 * tricky. SLAB_DESTROY_BY_RCU merely guarantees that the backing
+	 * slab of request objects will not be freed whilst we hold the
+	 * RCU read lock. It does not guarantee that the request itself
+	 * will not be freed and then *reused*. Viz,
+	 *
+	 * Thread A			Thread B
+	 *
+	 * req = active.request
+	 *				retire(req) -> free(req);
+	 *				(req is now first on the slab freelist)
+	 *				active.request = NULL
+	 *
+	 *				req = new submission on a new object
+	 * ref(req)
+	 *
+	 * To prevent the request from being reused whilst the caller
+	 * uses it, we take a reference like normal. Whilst acquiring
+	 * the reference we check that it is not in a destroyed state
+	 * (refcnt == 0). That prevents the request being reallocated
+	 * whilst the caller holds on to it. To check that the request
+	 * was not reallocated as we acquired the reference we have to
+	 * check that our request remains the active request across
+	 * the lookup, in the same manner as a seqlock. The visibility
+	 * of the pointer versus the reference counting is controlled
+	 * by using RCU barriers (rcu_dereference and rcu_assign_pointer).
+	 *
+	 * In the middle of all that, we inspect whether the request is
+	 * complete. Retiring is lazy so the request may be completed long
+	 * before the active tracker is updated. Querying whether the
+	 * request is complete is far cheaper (as it involves no locked
+	 * instructions setting cachelines to exclusive) than acquiring
+	 * the reference, so we do it first. The RCU read lock ensures the
+	 * pointer dereference is valid, but does not ensure that the
+	 * seqno nor HWS is the right one! However, if the request was
+	 * reallocated, that means the active tracker's request was complete.
+	 * If the new request is also complete, then both are and we can
+	 * just report the active tracker is idle. If the new request is
+	 * incomplete, then we acquire a reference on it and check that
+	 * it remained the active request.
+	 */
+	do {
+		struct drm_i915_gem_request *request;
+
+		request = rcu_dereference(active->__request);
+		if (!request || i915_gem_request_completed(request))
+			return NULL;
+
+		request = i915_gem_request_get_rcu(request);
+		if (!request || request == rcu_dereference(active->__request))
+			return request;
+
+		i915_gem_request_put(request);
+	} while (1);
+}
+
+/**
  * __i915_gem_active_is_busy - report whether the active tracker is assigned
  * @active - the active tracker
  *
@@ -433,7 +533,8 @@ i915_gem_active_retire(struct i915_gem_active *active,
 	struct drm_i915_gem_request *request;
 	int ret;
 
-	request = active->__request;
+	request = rcu_dereference_protected(active->__request,
+					    lockdep_is_held(mutex));
 	if (!request)
 		return 0;
 
@@ -442,7 +543,8 @@ i915_gem_active_retire(struct i915_gem_active *active,
 		return ret;
 
 	list_del_init(&active->link);
-	active->__request = NULL;
+	RCU_INIT_POINTER(active->__request, NULL);
+
 	active->retire(active, request);
 
 	return 0;
diff --git a/drivers/gpu/drm/i915/i915_gem_shrinker.c b/drivers/gpu/drm/i915/i915_gem_shrinker.c
index 33f8dcb9b8c4..a1a805fcdffa 100644
--- a/drivers/gpu/drm/i915/i915_gem_shrinker.c
+++ b/drivers/gpu/drm/i915/i915_gem_shrinker.c
@@ -191,6 +191,8 @@ i915_gem_shrink(struct drm_i915_private *dev_priv,
 		intel_runtime_pm_put(dev_priv);
 
 	i915_gem_retire_requests(dev_priv);
+	/* expedite the RCU grace period to free some request slabs */
+	synchronize_rcu_expedited();
 
 	return count;
 }
@@ -211,10 +213,15 @@ i915_gem_shrink(struct drm_i915_private *dev_priv,
  */
 unsigned long i915_gem_shrink_all(struct drm_i915_private *dev_priv)
 {
-	return i915_gem_shrink(dev_priv, -1UL,
-			       I915_SHRINK_BOUND |
-			       I915_SHRINK_UNBOUND |
-			       I915_SHRINK_ACTIVE);
+	unsigned long freed;
+
+	freed = i915_gem_shrink(dev_priv, -1UL,
+				I915_SHRINK_BOUND |
+				I915_SHRINK_UNBOUND |
+				I915_SHRINK_ACTIVE);
+	rcu_barrier(); /* wait until our RCU delayed slab frees are completed */
+
+	return freed;
 }
 
 static bool i915_gem_shrinker_lock(struct drm_device *dev, bool *unlock)
-- 
2.8.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 95+ messages in thread

* [PATCH 22/22] drm/i915: Export our request as a dma-buf fence on the reservation object
  2016-07-27 11:14 Getting to RCU and exporting fences Chris Wilson
                   ` (20 preceding siblings ...)
  2016-07-27 11:14 ` [PATCH 21/22] drm/i915: Enable lockless lookup of request tracking via RCU Chris Wilson
@ 2016-07-27 11:15 ` Chris Wilson
  2016-07-28 10:32   ` Daniel Vetter
  2016-07-27 11:23 ` ✗ Ro.CI.BAT: failure for series starting with [01/22] drm/i915: Combine loops within i915_gem_evict_something Patchwork
  2016-07-29 10:20 ` ✗ Ro.CI.BAT: failure for series starting with [01/22] drm/i915: Combine loops within i915_gem_evict_something (rev2) Patchwork
  23 siblings, 1 reply; 95+ messages in thread
From: Chris Wilson @ 2016-07-27 11:15 UTC (permalink / raw)
  To: intel-gfx

If the GEM objects being rendered with in this request have been
exported via dma-buf to a third party, hook ourselves into the dma-buf
reservation object so that the third party can serialise with our
rendering via the dma-buf fences.

Testcase: igt/prime_busy
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_gem_dmabuf.c     | 56 ++++++++++++++++++++++++++++--
 drivers/gpu/drm/i915/i915_gem_execbuffer.c | 33 ++++++++++++++++--
 2 files changed, 83 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_dmabuf.c b/drivers/gpu/drm/i915/i915_gem_dmabuf.c
index 3a00ab3ad06e..bab71ba9c25a 100644
--- a/drivers/gpu/drm/i915/i915_gem_dmabuf.c
+++ b/drivers/gpu/drm/i915/i915_gem_dmabuf.c
@@ -23,9 +23,13 @@
  * Authors:
  *	Dave Airlie <airlied@redhat.com>
  */
+
+#include <linux/dma-buf.h>
+#include <linux/reservation.h>
+
 #include <drm/drmP.h>
+
 #include "i915_drv.h"
-#include <linux/dma-buf.h>
 
 static struct drm_i915_gem_object *dma_buf_to_obj(struct dma_buf *buf)
 {
@@ -218,25 +222,71 @@ static const struct dma_buf_ops i915_dmabuf_ops =  {
 	.end_cpu_access = i915_gem_end_cpu_access,
 };
 
+static void export_fences(struct drm_i915_gem_object *obj,
+			  struct dma_buf *dma_buf)
+{
+	struct reservation_object *resv = dma_buf->resv;
+	struct drm_i915_gem_request *req;
+	unsigned long active;
+	int idx;
+
+	active = __I915_BO_ACTIVE(obj);
+	if (!active)
+		return;
+
+	/* Mark the object for future fences before racily adding old fences */
+	obj->base.dma_buf = dma_buf;
+
+	mutex_lock(&resv->lock.base);
+
+	for_each_active(active, idx) {
+		rcu_read_lock();
+		req = i915_gem_active_get_rcu(&obj->last_read[idx]);
+		rcu_read_unlock();
+		if (!req)
+			continue;
+
+		if (reservation_object_reserve_shared(resv) == 0)
+			reservation_object_add_shared_fence(resv, &req->fence);
+
+		i915_gem_request_put(req);
+	}
+
+	rcu_read_lock();
+	req = i915_gem_active_get_rcu(&obj->last_write);
+	rcu_read_unlock();
+	if (req) {
+		reservation_object_add_excl_fence(resv, &req->fence);
+		i915_gem_request_put(req);
+	}
+
+	mutex_unlock(&resv->lock.base);
+}
+
 struct dma_buf *i915_gem_prime_export(struct drm_device *dev,
 				      struct drm_gem_object *gem_obj, int flags)
 {
 	struct drm_i915_gem_object *obj = to_intel_bo(gem_obj);
 	DEFINE_DMA_BUF_EXPORT_INFO(exp_info);
+	struct dma_buf *dma_buf;
 
 	exp_info.ops = &i915_dmabuf_ops;
 	exp_info.size = gem_obj->size;
 	exp_info.flags = flags;
 	exp_info.priv = gem_obj;
 
-
 	if (obj->ops->dmabuf_export) {
 		int ret = obj->ops->dmabuf_export(obj);
 		if (ret)
 			return ERR_PTR(ret);
 	}
 
-	return dma_buf_export(&exp_info);
+	dma_buf = dma_buf_export(&exp_info);
+	if (IS_ERR(dma_buf))
+		return dma_buf;
+
+	export_fences(obj, dma_buf);
+	return dma_buf;
 }
 
 static int i915_gem_object_get_pages_dmabuf(struct drm_i915_gem_object *obj)
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 0d28703d991a..e2aba40bf328 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -26,13 +26,17 @@
  *
  */
 
+#include <linux/dma_remapping.h>
+#include <linux/reservation.h>
+#include <linux/uaccess.h>
+
 #include <drm/drmP.h>
 #include <drm/i915_drm.h>
+
 #include "i915_drv.h"
+#include "i915_gem_dmabuf.h"
 #include "i915_trace.h"
 #include "intel_drv.h"
-#include <linux/dma_remapping.h>
-#include <linux/uaccess.h>
 
 #define  __EXEC_OBJECT_HAS_PIN		(1<<31)
 #define  __EXEC_OBJECT_HAS_FENCE	(1<<30)
@@ -1193,7 +1197,29 @@ void i915_vma_move_to_active(struct i915_vma *vma,
 	list_move_tail(&vma->vm_link, &vma->vm->active_list);
 }
 
-static void
+static void eb_export_fence(struct drm_i915_gem_object *obj,
+			    struct drm_i915_gem_request *req,
+			    unsigned int flags)
+{
+	struct reservation_object *resv;
+
+	resv = i915_gem_object_get_dmabuf_resv(obj);
+	if (!resv)
+		return;
+
+	/* Ignore errors from failing to allocate the new fence, we can't
+	 * handle an error right now. Worst case should be missed
+	 * synchronisation leading to rendering corruption.
+	 */
+	mutex_lock(&resv->lock.base);
+	if (flags & EXEC_OBJECT_WRITE)
+		reservation_object_add_excl_fence(resv, &req->fence);
+	else if (reservation_object_reserve_shared(resv) == 0)
+		reservation_object_add_shared_fence(resv, &req->fence);
+	mutex_unlock(&resv->lock.base);
+}
+
+void
 i915_gem_execbuffer_move_to_active(struct list_head *vmas,
 				   struct drm_i915_gem_request *req)
 {
@@ -1212,6 +1238,7 @@ i915_gem_execbuffer_move_to_active(struct list_head *vmas,
 		obj->base.read_domains = obj->base.pending_read_domains;
 
 		i915_vma_move_to_active(vma, req, vma->exec_entry->flags);
+		eb_export_fence(obj, req, vma->exec_entry->flags);
 		trace_i915_gem_object_change_domain(obj, old_read, old_write);
 	}
 }
-- 
2.8.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 95+ messages in thread

* ✗ Ro.CI.BAT: failure for series starting with [01/22] drm/i915: Combine loops within i915_gem_evict_something
  2016-07-27 11:14 Getting to RCU and exporting fences Chris Wilson
                   ` (21 preceding siblings ...)
  2016-07-27 11:15 ` [PATCH 22/22] drm/i915: Export our request as a dma-buf fence on the reservation object Chris Wilson
@ 2016-07-27 11:23 ` Patchwork
  2016-07-29 10:20 ` ✗ Ro.CI.BAT: failure for series starting with [01/22] drm/i915: Combine loops within i915_gem_evict_something (rev2) Patchwork
  23 siblings, 0 replies; 95+ messages in thread
From: Patchwork @ 2016-07-27 11:23 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

== Series Details ==

Series: series starting with [01/22] drm/i915: Combine loops within i915_gem_evict_something
URL   : https://patchwork.freedesktop.org/series/10315/
State : failure

== Summary ==

Applying: drm/i915: Combine loops within i915_gem_evict_something
Using index info to reconstruct a base tree...
M	drivers/gpu/drm/i915/i915_gem_evict.c
Falling back to patching base and 3-way merge...
Auto-merging drivers/gpu/drm/i915/i915_gem_evict.c
CONFLICT (content): Merge conflict in drivers/gpu/drm/i915/i915_gem_evict.c
error: Failed to merge in the changes.
Patch failed at 0001 drm/i915: Combine loops within i915_gem_evict_something
The copy of the patch that failed is found in: .git/rebase-apply/patch
When you have resolved this problem, run "git am --continue".
If you prefer to skip this patch, run "git am --skip" instead.
To restore the original branch and stop patching, run "git am --abort".

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 95+ messages in thread

* Re: [PATCH 02/22] drm/i915: Remove surplus drm_device parameter to i915_gem_evict_something()
  2016-07-27 11:14 ` [PATCH 02/22] drm/i915: Remove surplus drm_device parameter to i915_gem_evict_something() Chris Wilson
@ 2016-07-28  8:07   ` Joonas Lahtinen
  0 siblings, 0 replies; 95+ messages in thread
From: Joonas Lahtinen @ 2016-07-28  8:07 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx

On ke, 2016-07-27 at 12:14 +0100, Chris Wilson wrote:
> Eviction is VM local, so we can ignore the significance of the
> drm_device in the caller, and leave it to i915_gem_evict_something() to
> manager itself.

s/manager/manage/?

Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>

Regards, Joonas
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>  drivers/gpu/drm/i915/i915_drv.h       |  3 +--
>  drivers/gpu/drm/i915/i915_gem.c       |  2 +-
>  drivers/gpu/drm/i915/i915_gem_evict.c |  9 ++++-----
>  drivers/gpu/drm/i915/i915_gem_gtt.c   |  2 +-
>  drivers/gpu/drm/i915/i915_trace.h     | 14 ++++++++------
>  5 files changed, 15 insertions(+), 15 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 66b98fa4715a..fbda38f25c6b 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -3398,8 +3398,7 @@ int i915_gem_context_reset_stats_ioctl(struct drm_device *dev, void *data,
>  				       struct drm_file *file);
>  
>  /* i915_gem_evict.c */
> -int __must_check i915_gem_evict_something(struct drm_device *dev,
> -					  struct i915_address_space *vm,
> +int __must_check i915_gem_evict_something(struct i915_address_space *vm,
>  					  int min_size,
>  					  unsigned alignment,
>  					  unsigned cache_level,
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index e3278f4e1ad2..bf652dc88024 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -3093,7 +3093,7 @@ search_free:
>  							  search_flag,
>  							  alloc_flag);
>  		if (ret) {
> -			ret = i915_gem_evict_something(dev, vm, size, alignment,
> +			ret = i915_gem_evict_something(vm, size, alignment,
>  						       obj->cache_level,
>  						       start, end,
>  						       flags);
> diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c b/drivers/gpu/drm/i915/i915_gem_evict.c
> index 016be7316676..4bce72fa14c4 100644
> --- a/drivers/gpu/drm/i915/i915_gem_evict.c
> +++ b/drivers/gpu/drm/i915/i915_gem_evict.c
> @@ -61,7 +61,6 @@ mark_free(struct i915_vma *vma, struct list_head *unwind)
>  
>  /**
>   * i915_gem_evict_something - Evict vmas to make room for binding a new one
> - * @dev: drm_device
>   * @vm: address space to evict from
>   * @min_size: size of the desired free space
>   * @alignment: alignment constraint of the desired free space
> @@ -84,12 +83,12 @@ mark_free(struct i915_vma *vma, struct list_head *unwind)
>   * memory in e.g. the shrinker.
>   */
>  int
> -i915_gem_evict_something(struct drm_device *dev, struct i915_address_space *vm,
> +i915_gem_evict_something(struct i915_address_space *vm,
>  			 int min_size, unsigned alignment, unsigned cache_level,
>  			 unsigned long start, unsigned long end,
>  			 unsigned flags)
>  {
> -	struct drm_i915_private *dev_priv = to_i915(dev);
> +	struct drm_i915_private *dev_priv = to_i915(vm->dev);
>  	struct list_head eviction_list;
>  	struct list_head *phases[] = {
>  		&vm->inactive_list,
> @@ -99,7 +98,7 @@ i915_gem_evict_something(struct drm_device *dev, struct i915_address_space *vm,
>  	struct i915_vma *vma, *next;
>  	int ret;
>  
> -	trace_i915_gem_evict(dev, min_size, alignment, flags);
> +	trace_i915_gem_evict(vm, min_size, alignment, flags);
>  
>  	/*
>  	 * The goal is to evict objects and amalgamate space in LRU order.
> @@ -154,7 +153,7 @@ search_again:
>  		 * back to userspace to give our workqueues time to
>  		 * acquire our locks and unpin the old scanouts.
>  		 */
> -		return intel_has_pending_fb_unpin(dev) ? -EAGAIN : -ENOSPC;
> +		return intel_has_pending_fb_unpin(vm->dev) ? -EAGAIN : -ENOSPC;
>  	}
>  
>  	/* Not everything in the GGTT is tracked via vma (otherwise we
> diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
> index 59ecaf2c8bf8..5869fa074009 100644
> --- a/drivers/gpu/drm/i915/i915_gem_gtt.c
> +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
> @@ -2012,7 +2012,7 @@ alloc:
>  						  0, ggtt->base.total,
>  						  DRM_MM_TOPDOWN);
>  	if (ret == -ENOSPC && !retried) {
> -		ret = i915_gem_evict_something(dev, &ggtt->base,
> +		ret = i915_gem_evict_something(&ggtt->base,
>  					       GEN6_PD_SIZE, GEN6_PD_ALIGN,
>  					       I915_CACHE_NONE,
>  					       0, ggtt->base.total,
> diff --git a/drivers/gpu/drm/i915/i915_trace.h b/drivers/gpu/drm/i915/i915_trace.h
> index 9e43c0aa6e3b..178798002a73 100644
> --- a/drivers/gpu/drm/i915/i915_trace.h
> +++ b/drivers/gpu/drm/i915/i915_trace.h
> @@ -394,25 +394,27 @@ DEFINE_EVENT(i915_gem_object, i915_gem_object_destroy,
>  );
>  
>  TRACE_EVENT(i915_gem_evict,
> -	    TP_PROTO(struct drm_device *dev, u32 size, u32 align, unsigned flags),
> -	    TP_ARGS(dev, size, align, flags),
> +	    TP_PROTO(struct i915_address_space *vm, u32 size, u32 align, unsigned int flags),
> +	    TP_ARGS(vm, size, align, flags),
>  
>  	    TP_STRUCT__entry(
>  			     __field(u32, dev)
> +			     __field(struct i915_address_space *, vm)
>  			     __field(u32, size)
>  			     __field(u32, align)
> -			     __field(unsigned, flags)
> +			     __field(unsigned int, flags)
>  			    ),
>  
>  	    TP_fast_assign(
> -			   __entry->dev = dev->primary->index;
> +			   __entry->dev = vm->dev->primary->index;
> +			   __entry->vm = vm;
>  			   __entry->size = size;
>  			   __entry->align = align;
>  			   __entry->flags = flags;
>  			  ),
>  
> -	    TP_printk("dev=%d, size=%d, align=%d %s",
> -		      __entry->dev, __entry->size, __entry->align,
> +	    TP_printk("dev=%d, vm=%p, size=%d, align=%d %s",
> +		      __entry->dev, __entry->vm, __entry->size, __entry->align,
>  		      __entry->flags & PIN_MAPPABLE ? ", mappable" : "")
>  );
>  
-- 
Joonas Lahtinen
Open Source Technology Center
Intel Corporation
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 95+ messages in thread

* Re: [PATCH 03/22] drm/i915: Double check the active status on the batch pool
  2016-07-27 11:14 ` [PATCH 03/22] drm/i915: Double check the active status on the batch pool Chris Wilson
@ 2016-07-28  8:14   ` Joonas Lahtinen
  0 siblings, 0 replies; 95+ messages in thread
From: Joonas Lahtinen @ 2016-07-28  8:14 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx

On ke, 2016-07-27 at 12:14 +0100, Chris Wilson wrote:
> --- a/drivers/gpu/drm/i915/i915_gem_batch_pool.h
> +++ b/drivers/gpu/drm/i915/i915_gem_batch_pool.h
> @@ -27,13 +27,16 @@
>  
>  #include "i915_drv.h"
>  
> +struct drm_device;

Why you add this when you remove the sole usage in this same patch?

That removed,

Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>

Regards, Joonas
-- 
Joonas Lahtinen
Open Source Technology Center
Intel Corporation
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 95+ messages in thread

* Re: [PATCH 04/22] drm/i915: Remove request retirement before each batch
  2016-07-27 11:14 ` [PATCH 04/22] drm/i915: Remove request retirement before each batch Chris Wilson
@ 2016-07-28  8:32   ` Joonas Lahtinen
  2016-07-28  9:32     ` Chris Wilson
  2016-07-28  9:54   ` Daniel Vetter
  1 sibling, 1 reply; 95+ messages in thread
From: Joonas Lahtinen @ 2016-07-28  8:32 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx

On ke, 2016-07-27 at 12:14 +0100, Chris Wilson wrote:
> This reimplements the denial-of-service protection against igt from
> commit 227f782e4667 ("drm/i915: Retire requests before creating a new
> one") and transfers the stall from before each batch into get_pages().
> The issue is that the stall is increasing latency between batches which
> is detrimental in some cases (especially coupled with execlists) to
> keeping the GPU well fed. Also we have made the observation that retiring
> requests can of itself free objects (and requests) and therefore makes
> a good first step when shrinking.
> 
> v2: Recycle objects prior to i915_gem_object_get_pages()
> v3: Remove the reference to the ring from i915_gem_requests_ring() as it
> operates on an intel_engine_cs.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>

Was this tested for performance regressions?

Codewise,

Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>

Regards, Joonas
-- 
Joonas Lahtinen
Open Source Technology Center
Intel Corporation
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 95+ messages in thread

* Re: [PATCH 05/22] drm/i915: Remove i915_gem_execbuffer_retire_commands()
  2016-07-27 11:14 ` [PATCH 05/22] drm/i915: Remove i915_gem_execbuffer_retire_commands() Chris Wilson
@ 2016-07-28  8:46   ` Joonas Lahtinen
  2016-07-28  8:55     ` Chris Wilson
  0 siblings, 1 reply; 95+ messages in thread
From: Joonas Lahtinen @ 2016-07-28  8:46 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx

On ke, 2016-07-27 at 12:14 +0100, Chris Wilson wrote:
> Move the single line to the callsite as the name is now misleading, and
> the purpose is solely to add the request to the execution queue.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>  drivers/gpu/drm/i915/i915_gem_execbuffer.c | 9 +--------
>  1 file changed, 1 insertion(+), 8 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> index 0593ea3ba211..63984c4d8e5a 100644
> --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> @@ -1211,13 +1211,6 @@ i915_gem_execbuffer_move_to_active(struct list_head *vmas,
>  	}
>  }
>  
> -static void
> -i915_gem_execbuffer_retire_commands(struct i915_execbuffer_params *params)
> -{
> -	/* Add a breadcrumb for the completion of the batch buffer */
> -	__i915_add_request(params->request, params->batch_obj, true);
> -}
> -
>  static int
>  i915_reset_gen7_sol_offsets(struct drm_i915_gem_request *req)
>  {
> @@ -1692,7 +1685,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
>  
>  	ret = execbuf_submit(params, args, &eb->vmas);
>  err_request:
> -	i915_gem_execbuffer_retire_commands(params);
> +	__i915_add_request(params->request, params->batch_obj, ret == 0);

This adds a new behavior of no flushing if execbuf fails to submit, I
guess it is intentional? Do mention in the commit message.

Regards, Joonas

>  
>  err_batch_unpin:
>  	/*
-- 
Joonas Lahtinen
Open Source Technology Center
Intel Corporation
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 95+ messages in thread

* Re: [PATCH 05/22] drm/i915: Remove i915_gem_execbuffer_retire_commands()
  2016-07-28  8:46   ` Joonas Lahtinen
@ 2016-07-28  8:55     ` Chris Wilson
  2016-07-28  9:54       ` Joonas Lahtinen
  0 siblings, 1 reply; 95+ messages in thread
From: Chris Wilson @ 2016-07-28  8:55 UTC (permalink / raw)
  To: Joonas Lahtinen; +Cc: intel-gfx

On Thu, Jul 28, 2016 at 11:46:30AM +0300, Joonas Lahtinen wrote:
> On ke, 2016-07-27 at 12:14 +0100, Chris Wilson wrote:
> > Move the single line to the callsite as the name is now misleading, and
> > the purpose is solely to add the request to the execution queue.
> > 
> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > ---
> >  drivers/gpu/drm/i915/i915_gem_execbuffer.c | 9 +--------
> >  1 file changed, 1 insertion(+), 8 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> > index 0593ea3ba211..63984c4d8e5a 100644
> > --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> > +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> > @@ -1211,13 +1211,6 @@ i915_gem_execbuffer_move_to_active(struct list_head *vmas,
> >  	}
> >  }
> >  
> > -static void
> > -i915_gem_execbuffer_retire_commands(struct i915_execbuffer_params *params)
> > -{
> > -	/* Add a breadcrumb for the completion of the batch buffer */
> > -	__i915_add_request(params->request, params->batch_obj, true);
> > -}
> > -
> >  static int
> >  i915_reset_gen7_sol_offsets(struct drm_i915_gem_request *req)
> >  {
> > @@ -1692,7 +1685,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
> >  
> >  	ret = execbuf_submit(params, args, &eb->vmas);
> >  err_request:
> > -	i915_gem_execbuffer_retire_commands(params);
> > +	__i915_add_request(params->request, params->batch_obj, ret == 0);
> 
> This adds a new behavior of no flushing if execbuf fails to submit, I
> guess it is intentional? Do mention in the commit message.

Yes, if we fail to actually emit the execbuf, we don't need to emit
the flush afterwards as we don't perform any rendering. Later on we will
be in a position to detect a request that doesn't do anything and is not
observed and unwind it.
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 95+ messages in thread

* Re: [PATCH 06/22] drm/i915: Fix up vma alignment to be u64
  2016-07-27 11:14 ` [PATCH 06/22] drm/i915: Fix up vma alignment to be u64 Chris Wilson
@ 2016-07-28  8:59   ` Joonas Lahtinen
  0 siblings, 0 replies; 95+ messages in thread
From: Joonas Lahtinen @ 2016-07-28  8:59 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx

On ke, 2016-07-27 at 12:14 +0100, Chris Wilson wrote:
> @@ -3029,9 +3029,9 @@ i915_gem_object_bind_to_vm(struct drm_i915_gem_object *obj,
>  		alignment = flags & PIN_MAPPABLE ? fence_alignment :
>  						unfenced_alignment;
>  	if (flags & PIN_MAPPABLE && alignment & (fence_alignment - 1)) {
> -		DRM_DEBUG("Invalid object (view type=%u) alignment requested %u\n",
> +		DRM_DEBUG("Invalid object (view type=%u) alignment requested %llx\n",
>  			  ggtt_view ? ggtt_view->type : 0,
> -			  alignment);
> +			  (long long)alignment);

From printk-formats;

u64			%llu or %llx

Should be no need to cast.

>  		return ERR_PTR(-EINVAL);
>  	}
>  
> @@ -3688,7 +3688,7 @@ i915_gem_ring_throttle(struct drm_device *dev, struct drm_file *file)
>  }
>  
>  static bool
> -i915_vma_misplaced(struct i915_vma *vma, uint32_t alignment, uint64_t flags)
> +i915_vma_misplaced(struct i915_vma *vma, u64 alignment, u64 flags)
>  {
>  	struct drm_i915_gem_object *obj = vma->obj;
>  
> @@ -3737,8 +3737,8 @@ static int
>  i915_gem_object_do_pin(struct drm_i915_gem_object *obj,
>  		       struct i915_address_space *vm,
>  		       const struct i915_ggtt_view *ggtt_view,
> -		       uint32_t alignment,
> -		       uint64_t flags)
> +		       u64 alignment,
> +		       u64 flags)
>  {
>  	struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
>  	struct i915_vma *vma;
> @@ -3767,12 +3767,12 @@ i915_gem_object_do_pin(struct drm_i915_gem_object *obj,
>  		if (i915_vma_misplaced(vma, alignment, flags)) {
>  			WARN(vma->pin_count,
>  			     "bo is already pinned in %s with incorrect alignment:"
> -			     " offset=%08x %08x, req.alignment=%x, req.map_and_fenceable=%d,"
> +			     " offset=%08x %08x, req.alignment=%llx, req.map_and_fenceable=%d,"
>  			     " obj->map_and_fenceable=%d\n",
>  			     ggtt_view ? "ggtt" : "ppgtt",
>  			     upper_32_bits(vma->node.start),
>  			     lower_32_bits(vma->node.start),
> -			     alignment,
> +			     (long long)alignment,

Ditto here, or do you get warnings from GCC?

Otherwise seems fine, just converting the types to be wider.

Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
-- 
Joonas Lahtinen
Open Source Technology Center
Intel Corporation
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 95+ messages in thread

* Re: [PATCH 08/22] drm/i915: Reduce WARN(i915_gem_valid_gtt_space) to a debug-only check
  2016-07-27 11:14 ` [PATCH 08/22] drm/i915: Reduce WARN(i915_gem_valid_gtt_space) to a debug-only check Chris Wilson
@ 2016-07-28  9:18   ` Joonas Lahtinen
  0 siblings, 0 replies; 95+ messages in thread
From: Joonas Lahtinen @ 2016-07-28  9:18 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx

On ke, 2016-07-27 at 12:14 +0100, Chris Wilson wrote:
> i915_gem_valid_gtt_space() is used after inserting the VMA to double
> check the list - the location should have been chosen to pass all the
> restrictions.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>

Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>

> ---
>  drivers/gpu/drm/i915/i915_gem.c | 5 +----
>  1 file changed, 1 insertion(+), 4 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index 2147225e7887..f47a9e450239 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -3095,10 +3095,7 @@ search_free:
>  			goto err_vma;
>  		}
>  	}
> -	if (WARN_ON(!i915_gem_valid_gtt_space(vma, obj->cache_level))) {
> -		ret = -EINVAL;
> -		goto err_remove_node;
> -	}
> +	GEM_BUG_ON(!i915_gem_valid_gtt_space(vma, obj->cache_level));
>  
>  	trace_i915_vma_bind(vma, flags);
>  	ret = i915_vma_bind(vma, obj->cache_level, flags);
-- 
Joonas Lahtinen
Open Source Technology Center
Intel Corporation
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 95+ messages in thread

* Re: [PATCH 09/22] drm/i915: Split insertion/binding of an object into the VM
  2016-07-27 11:14 ` [PATCH 09/22] drm/i915: Split insertion/binding of an object into the VM Chris Wilson
@ 2016-07-28  9:25   ` Joonas Lahtinen
  2016-07-28  9:34     ` Chris Wilson
  0 siblings, 1 reply; 95+ messages in thread
From: Joonas Lahtinen @ 2016-07-28  9:25 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx

On ke, 2016-07-27 at 12:14 +0100, Chris Wilson wrote:
> Split the insertion into the address space's range manager and binding
> of that object into the GTT to simplify the code flow when pinning a
> VMA.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>  drivers/gpu/drm/i915/i915_gem.c | 35 +++++++++++++++--------------------
>  1 file changed, 15 insertions(+), 20 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index f47a9e450239..1773b35703bc 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -2966,12 +2966,12 @@ static bool i915_gem_valid_gtt_space(struct i915_vma *vma,
>   * @flags: mask of PIN_* flags to use
>   */
>  static struct i915_vma *
> -i915_gem_object_bind_to_vm(struct drm_i915_gem_object *obj,
> -			   struct i915_address_space *vm,
> -			   const struct i915_ggtt_view *ggtt_view,
> -			   u64 size,
> -			   u64 alignment,
> -			   u64 flags)
> +i915_gem_object_insert_into_vm(struct drm_i915_gem_object *obj,
> +			       struct i915_address_space *vm,
> +			       const struct i915_ggtt_view *ggtt_view,
> +			       u64 size,
> +			       u64 alignment,
> +			       u64 flags)

Could be just object_insert_into_vm() ? Or will you expose it later.

Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
-- 
Joonas Lahtinen
Open Source Technology Center
Intel Corporation
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 95+ messages in thread

* Re: [PATCH 04/22] drm/i915: Remove request retirement before each batch
  2016-07-28  8:32   ` Joonas Lahtinen
@ 2016-07-28  9:32     ` Chris Wilson
  2016-07-28  9:53       ` Joonas Lahtinen
  0 siblings, 1 reply; 95+ messages in thread
From: Chris Wilson @ 2016-07-28  9:32 UTC (permalink / raw)
  To: Joonas Lahtinen; +Cc: intel-gfx

On Thu, Jul 28, 2016 at 11:32:47AM +0300, Joonas Lahtinen wrote:
> On ke, 2016-07-27 at 12:14 +0100, Chris Wilson wrote:
> > This reimplements the denial-of-service protection against igt from
> > commit 227f782e4667 ("drm/i915: Retire requests before creating a new
> > one") and transfers the stall from before each batch into get_pages().
> > The issue is that the stall is increasing latency between batches which
> > is detrimental in some cases (especially coupled with execlists) to
> > keeping the GPU well fed. Also we have made the observation that retiring
> > requests can of itself free objects (and requests) and therefore makes
> > a good first step when shrinking.
> > 
> > v2: Recycle objects prior to i915_gem_object_get_pages()
> > v3: Remove the reference to the ring from i915_gem_requests_ring() as it
> > operates on an intel_engine_cs.
> > 
> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> 
> Was this tested for performance regressions?

Yes. It fixed the latency issue from 227f82e4667, introduced an issue
with page allocation for context/object creation which was papered over
in v2. Since then requests (this series+) have become both more lazy and
more economical changing the latency characteristics for execbuf, which
mitigates somewhat the issue found in v1. Thankfully since when we
implement a separate mm lock, the freedom to do a full retirement before
get_pages() is lost.

This series is intending (including the execbuf reworking) to fix the
2x-10x performance regression (platform dependent) we have in
microbenchmarks (which corresponds to about 20% at the GL level in driver
stress tests). However, execlists still remains ~8% slower than legacy
submission (at the GL level).
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 95+ messages in thread

* Re: [PATCH 09/22] drm/i915: Split insertion/binding of an object into the VM
  2016-07-28  9:25   ` Joonas Lahtinen
@ 2016-07-28  9:34     ` Chris Wilson
  0 siblings, 0 replies; 95+ messages in thread
From: Chris Wilson @ 2016-07-28  9:34 UTC (permalink / raw)
  To: Joonas Lahtinen; +Cc: intel-gfx

On Thu, Jul 28, 2016 at 12:25:06PM +0300, Joonas Lahtinen wrote:
> On ke, 2016-07-27 at 12:14 +0100, Chris Wilson wrote:
> > Split the insertion into the address space's range manager and binding
> > of that object into the GTT to simplify the code flow when pinning a
> > VMA.
> > 
> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > ---
> >  drivers/gpu/drm/i915/i915_gem.c | 35 +++++++++++++++--------------------
> >  1 file changed, 15 insertions(+), 20 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> > index f47a9e450239..1773b35703bc 100644
> > --- a/drivers/gpu/drm/i915/i915_gem.c
> > +++ b/drivers/gpu/drm/i915/i915_gem.c
> > @@ -2966,12 +2966,12 @@ static bool i915_gem_valid_gtt_space(struct i915_vma *vma,
> >   * @flags: mask of PIN_* flags to use
> >   */
> >  static struct i915_vma *
> > -i915_gem_object_bind_to_vm(struct drm_i915_gem_object *obj,
> > -			   struct i915_address_space *vm,
> > -			   const struct i915_ggtt_view *ggtt_view,
> > -			   u64 size,
> > -			   u64 alignment,
> > -			   u64 flags)
> > +i915_gem_object_insert_into_vm(struct drm_i915_gem_object *obj,
> > +			       struct i915_address_space *vm,
> > +			       const struct i915_ggtt_view *ggtt_view,
> > +			       u64 size,
> > +			       u64 alignment,
> > +			       u64 flags)
> 
> Could be just object_insert_into_vm() ? Or will you expose it later.

It becomes i915_vma_insert() very shortly. Tentative plans to move it
out of i915_gem.c
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 95+ messages in thread

* Re: [PATCH 18/22] drm/i915: Use dev_priv consistently through the intel_frontbuffer interface
  2016-07-27 11:14 ` [PATCH 18/22] drm/i915: Use dev_priv consistently through the intel_frontbuffer interface Chris Wilson
@ 2016-07-28  9:36   ` Joonas Lahtinen
  2016-07-28 10:06   ` Daniel Vetter
  1 sibling, 0 replies; 95+ messages in thread
From: Joonas Lahtinen @ 2016-07-28  9:36 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx

On ke, 2016-07-27 at 12:14 +0100, Chris Wilson wrote:
> Rather than a mismash of struct drm_device *dev and struct
> drm_i915_private *dev_priv being used freely within a function, be
> consistent and only pass along dev_priv.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>

Mechanical,

Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>

> ---
>  drivers/gpu/drm/i915/intel_display.c     | 10 ++++----
>  drivers/gpu/drm/i915/intel_dp.c          | 14 +++++-------
>  drivers/gpu/drm/i915/intel_drv.h         | 21 +++++++++--------
>  drivers/gpu/drm/i915/intel_frontbuffer.c | 39 ++++++++++++--------------------
>  drivers/gpu/drm/i915/intel_overlay.c     |  3 +--
>  drivers/gpu/drm/i915/intel_psr.c         | 26 +++++++++------------
>  6 files changed, 49 insertions(+), 64 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
> index 1c70f68328b4..9f15ced6fc7c 100644
> --- a/drivers/gpu/drm/i915/intel_display.c
> +++ b/drivers/gpu/drm/i915/intel_display.c
> @@ -4565,12 +4565,11 @@ static void intel_post_plane_update(struct intel_crtc_state *old_crtc_state)
>  	struct drm_atomic_state *old_state = old_crtc_state->base.state;
>  	struct intel_crtc_state *pipe_config =
>  		to_intel_crtc_state(crtc->base.state);
> -	struct drm_device *dev = crtc->base.dev;
>  	struct drm_plane *primary = crtc->base.primary;
>  	struct drm_plane_state *old_pri_state =
>  		drm_atomic_get_existing_plane_state(old_state, primary);
>  
> -	intel_frontbuffer_flip(dev, pipe_config->fb_bits);
> +	intel_frontbuffer_flip(to_i915(crtc->base.dev), pipe_config->fb_bits);
>  
>  	crtc->wm.cxsr_allowed = true;
>  
> @@ -4693,7 +4692,7 @@ static void intel_crtc_disable_planes(struct drm_crtc *crtc, unsigned plane_mask
>  	 * to compute the mask of flip planes precisely. For the time being
>  	 * consider this a flip to a NULL plane.
>  	 */
> -	intel_frontbuffer_flip(dev, INTEL_FRONTBUFFER_ALL_MASK(pipe));
> +	intel_frontbuffer_flip(to_i915(dev), INTEL_FRONTBUFFER_ALL_MASK(pipe));
>  }
>  
>  static void ironlake_crtc_enable(struct drm_crtc *crtc)
> @@ -10959,7 +10958,8 @@ static void intel_unpin_work_fn(struct work_struct *__work)
>  
>  	i915_gem_request_put(work->flip_queued_req);
>  
> -	intel_frontbuffer_flip_complete(dev, to_intel_plane(primary)->frontbuffer_bit);
> +	intel_frontbuffer_flip_complete(to_i915(dev),
> +					to_intel_plane(primary)->frontbuffer_bit);
>  	intel_fbc_post_update(crtc);
>  	drm_framebuffer_unreference(work->old_fb);
>  
> @@ -11734,7 +11734,7 @@ static int intel_crtc_page_flip(struct drm_crtc *crtc,
>  			  to_intel_plane(primary)->frontbuffer_bit);
>  	mutex_unlock(&dev->struct_mutex);
>  
> -	intel_frontbuffer_flip_prepare(dev,
> +	intel_frontbuffer_flip_prepare(to_i915(dev),
>  				       to_intel_plane(primary)->frontbuffer_bit);
>  
>  	trace_i915_flip_request(intel_crtc->plane, obj);
> diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c
> index 21b04c3eda41..2fd90d153fe7 100644
> --- a/drivers/gpu/drm/i915/intel_dp.c
> +++ b/drivers/gpu/drm/i915/intel_dp.c
> @@ -5186,7 +5186,7 @@ unlock:
>  
>  /**
>   * intel_edp_drrs_invalidate - Disable Idleness DRRS
> - * @dev: DRM device
> + * @dev_priv: i915 device
>   * @frontbuffer_bits: frontbuffer plane tracking bits
>   *
>   * This function gets called everytime rendering on the given planes start.
> @@ -5194,10 +5194,9 @@ unlock:
>   *
>   * Dirty frontbuffers relevant to DRRS are tracked in busy_frontbuffer_bits.
>   */
> -void intel_edp_drrs_invalidate(struct drm_device *dev,
> -		unsigned frontbuffer_bits)
> +void intel_edp_drrs_invalidate(struct drm_i915_private *dev_priv,
> +			       unsigned frontbuffer_bits)
>  {
> -	struct drm_i915_private *dev_priv = to_i915(dev);
>  	struct drm_crtc *crtc;
>  	enum pipe pipe;
>  
> @@ -5229,7 +5228,7 @@ void intel_edp_drrs_invalidate(struct drm_device *dev,
>  
>  /**
>   * intel_edp_drrs_flush - Restart Idleness DRRS
> - * @dev: DRM device
> + * @dev_priv: i915 device
>   * @frontbuffer_bits: frontbuffer plane tracking bits
>   *
>   * This function gets called every time rendering on the given planes has
> @@ -5239,10 +5238,9 @@ void intel_edp_drrs_invalidate(struct drm_device *dev,
>   *
>   * Dirty frontbuffers relevant to DRRS are tracked in busy_frontbuffer_bits.
>   */
> -void intel_edp_drrs_flush(struct drm_device *dev,
> -		unsigned frontbuffer_bits)
> +void intel_edp_drrs_flush(struct drm_i915_private *dev_priv,
> +			  unsigned frontbuffer_bits)
>  {
> -	struct drm_i915_private *dev_priv = to_i915(dev);
>  	struct drm_crtc *crtc;
>  	enum pipe pipe;
>  
> diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h
> index 5294039cf238..6f447d485db1 100644
> --- a/drivers/gpu/drm/i915/intel_drv.h
> +++ b/drivers/gpu/drm/i915/intel_drv.h
> @@ -1135,11 +1135,11 @@ void intel_ddi_set_vc_payload_alloc(struct drm_crtc *crtc, bool state);
>  uint32_t ddi_signal_levels(struct intel_dp *intel_dp);
>  
>  /* intel_frontbuffer.c */
> -void intel_frontbuffer_flip_prepare(struct drm_device *dev,
> +void intel_frontbuffer_flip_prepare(struct drm_i915_private *dev_priv,
>  				    unsigned frontbuffer_bits);
> -void intel_frontbuffer_flip_complete(struct drm_device *dev,
> +void intel_frontbuffer_flip_complete(struct drm_i915_private *dev_priv,
>  				     unsigned frontbuffer_bits);
> -void intel_frontbuffer_flip(struct drm_device *dev,
> +void intel_frontbuffer_flip(struct drm_i915_private *dev_priv,
>  			    unsigned frontbuffer_bits);
>  unsigned int intel_fb_align_height(struct drm_device *dev,
>  				   unsigned int height,
> @@ -1413,11 +1413,12 @@ uint32_t intel_dp_pack_aux(const uint8_t *src, int src_bytes);
>  void intel_plane_destroy(struct drm_plane *plane);
>  void intel_edp_drrs_enable(struct intel_dp *intel_dp);
>  void intel_edp_drrs_disable(struct intel_dp *intel_dp);
> -void intel_edp_drrs_invalidate(struct drm_device *dev,
> -		unsigned frontbuffer_bits);
> -void intel_edp_drrs_flush(struct drm_device *dev, unsigned frontbuffer_bits);
> +void intel_edp_drrs_invalidate(struct drm_i915_private *dev_priv,
> +			       unsigned frontbuffer_bits);
> +void intel_edp_drrs_flush(struct drm_i915_private *dev_priv,
> +			  unsigned frontbuffer_bits);
>  bool intel_digital_port_connected(struct drm_i915_private *dev_priv,
> -					 struct intel_digital_port *port);
> +				  struct intel_digital_port *port);
>  
>  void
>  intel_dp_program_link_training_pattern(struct intel_dp *intel_dp,
> @@ -1590,13 +1591,13 @@ static inline void intel_backlight_device_unregister(struct intel_connector *con
>  /* intel_psr.c */
>  void intel_psr_enable(struct intel_dp *intel_dp);
>  void intel_psr_disable(struct intel_dp *intel_dp);
> -void intel_psr_invalidate(struct drm_device *dev,
> +void intel_psr_invalidate(struct drm_i915_private *dev_priv,
>  			  unsigned frontbuffer_bits);
> -void intel_psr_flush(struct drm_device *dev,
> +void intel_psr_flush(struct drm_i915_private *dev_priv,
>  		     unsigned frontbuffer_bits,
>  		     enum fb_op_origin origin);
>  void intel_psr_init(struct drm_device *dev);
> -void intel_psr_single_frame_update(struct drm_device *dev,
> +void intel_psr_single_frame_update(struct drm_i915_private *dev_priv,
>  				   unsigned frontbuffer_bits);
>  
>  /* intel_runtime_pm.c */
> diff --git a/drivers/gpu/drm/i915/intel_frontbuffer.c b/drivers/gpu/drm/i915/intel_frontbuffer.c
> index 636324da21c2..42f718bb584c 100644
> --- a/drivers/gpu/drm/i915/intel_frontbuffer.c
> +++ b/drivers/gpu/drm/i915/intel_frontbuffer.c
> @@ -80,8 +80,7 @@ void __intel_fb_obj_invalidate(struct drm_i915_gem_object *obj,
>  			       enum fb_op_origin origin,
>  			       unsigned int frontbuffer_bits)
>  {
> -	struct drm_device *dev = obj->base.dev;
> -	struct drm_i915_private *dev_priv = to_i915(dev);
> +	struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
>  
>  	if (origin == ORIGIN_CS) {
>  		spin_lock(&dev_priv->fb_tracking.lock);
> @@ -90,8 +89,8 @@ void __intel_fb_obj_invalidate(struct drm_i915_gem_object *obj,
>  		spin_unlock(&dev_priv->fb_tracking.lock);
>  	}
>  
> -	intel_psr_invalidate(dev, frontbuffer_bits);
> -	intel_edp_drrs_invalidate(dev, frontbuffer_bits);
> +	intel_psr_invalidate(dev_priv, frontbuffer_bits);
> +	intel_edp_drrs_invalidate(dev_priv, frontbuffer_bits);
>  	intel_fbc_invalidate(dev_priv, frontbuffer_bits, origin);
>  }
>  
> @@ -107,12 +106,10 @@ void __intel_fb_obj_invalidate(struct drm_i915_gem_object *obj,
>   *
>   * Can be called without any locks held.
>   */
> -static void intel_frontbuffer_flush(struct drm_device *dev,
> +static void intel_frontbuffer_flush(struct drm_i915_private *dev_priv,
>  				    unsigned frontbuffer_bits,
>  				    enum fb_op_origin origin)
>  {
> -	struct drm_i915_private *dev_priv = to_i915(dev);
> -
>  	/* Delay flushing when rings are still busy.*/
>  	spin_lock(&dev_priv->fb_tracking.lock);
>  	frontbuffer_bits &= ~dev_priv->fb_tracking.busy_bits;
> @@ -121,8 +118,8 @@ static void intel_frontbuffer_flush(struct drm_device *dev,
>  	if (frontbuffer_bits == 0)
>  		return;
>  
> -	intel_edp_drrs_flush(dev, frontbuffer_bits);
> -	intel_psr_flush(dev, frontbuffer_bits, origin);
> +	intel_edp_drrs_flush(dev_priv, frontbuffer_bits);
> +	intel_psr_flush(dev_priv, frontbuffer_bits, origin);
>  	intel_fbc_flush(dev_priv, frontbuffer_bits, origin);
>  }
>  
> @@ -141,8 +138,7 @@ void __intel_fb_obj_flush(struct drm_i915_gem_object *obj,
>  			  enum fb_op_origin origin,
>  			  unsigned int frontbuffer_bits)
>  {
> -	struct drm_device *dev = obj->base.dev;
> -	struct drm_i915_private *dev_priv = to_i915(dev);
> +	struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
>  
>  	if (retire) {
>  		spin_lock(&dev_priv->fb_tracking.lock);
> @@ -153,7 +149,7 @@ void __intel_fb_obj_flush(struct drm_i915_gem_object *obj,
>  	}
>  
>  	if (frontbuffer_bits)
> -		intel_frontbuffer_flush(dev, frontbuffer_bits, origin);
> +		intel_frontbuffer_flush(dev_priv, frontbuffer_bits, origin);
>  }
>  
>  /**
> @@ -168,18 +164,16 @@ void __intel_fb_obj_flush(struct drm_i915_gem_object *obj,
>   *
>   * Can be called without any locks held.
>   */
> -void intel_frontbuffer_flip_prepare(struct drm_device *dev,
> +void intel_frontbuffer_flip_prepare(struct drm_i915_private *dev_priv,
>  				    unsigned frontbuffer_bits)
>  {
> -	struct drm_i915_private *dev_priv = to_i915(dev);
> -
>  	spin_lock(&dev_priv->fb_tracking.lock);
>  	dev_priv->fb_tracking.flip_bits |= frontbuffer_bits;
>  	/* Remove stale busy bits due to the old buffer. */
>  	dev_priv->fb_tracking.busy_bits &= ~frontbuffer_bits;
>  	spin_unlock(&dev_priv->fb_tracking.lock);
>  
> -	intel_psr_single_frame_update(dev, frontbuffer_bits);
> +	intel_psr_single_frame_update(dev_priv, frontbuffer_bits);
>  }
>  
>  /**
> @@ -192,11 +186,9 @@ void intel_frontbuffer_flip_prepare(struct drm_device *dev,
>   *
>   * Can be called without any locks held.
>   */
> -void intel_frontbuffer_flip_complete(struct drm_device *dev,
> +void intel_frontbuffer_flip_complete(struct drm_i915_private *dev_priv,
>  				     unsigned frontbuffer_bits)
>  {
> -	struct drm_i915_private *dev_priv = to_i915(dev);
> -
>  	spin_lock(&dev_priv->fb_tracking.lock);
>  	/* Mask any cancelled flips. */
>  	frontbuffer_bits &= dev_priv->fb_tracking.flip_bits;
> @@ -204,7 +196,8 @@ void intel_frontbuffer_flip_complete(struct drm_device *dev,
>  	spin_unlock(&dev_priv->fb_tracking.lock);
>  
>  	if (frontbuffer_bits)
> -		intel_frontbuffer_flush(dev, frontbuffer_bits, ORIGIN_FLIP);
> +		intel_frontbuffer_flush(dev_priv,
> +					frontbuffer_bits, ORIGIN_FLIP);
>  }
>  
>  /**
> @@ -218,15 +211,13 @@ void intel_frontbuffer_flip_complete(struct drm_device *dev,
>   *
>   * Can be called without any locks held.
>   */
> -void intel_frontbuffer_flip(struct drm_device *dev,
> +void intel_frontbuffer_flip(struct drm_i915_private *dev_priv,
>  			    unsigned frontbuffer_bits)
>  {
> -	struct drm_i915_private *dev_priv = to_i915(dev);
> -
>  	spin_lock(&dev_priv->fb_tracking.lock);
>  	/* Remove stale busy bits due to the old buffer. */
>  	dev_priv->fb_tracking.busy_bits &= ~frontbuffer_bits;
>  	spin_unlock(&dev_priv->fb_tracking.lock);
>  
> -	intel_frontbuffer_flush(dev, frontbuffer_bits, ORIGIN_FLIP);
> +	intel_frontbuffer_flush(dev_priv, frontbuffer_bits, ORIGIN_FLIP);
>  }
> diff --git a/drivers/gpu/drm/i915/intel_overlay.c b/drivers/gpu/drm/i915/intel_overlay.c
> index 217fefc49bf9..ad08df49ed48 100644
> --- a/drivers/gpu/drm/i915/intel_overlay.c
> +++ b/drivers/gpu/drm/i915/intel_overlay.c
> @@ -839,8 +839,7 @@ static int intel_overlay_do_put_image(struct intel_overlay *overlay,
>  	overlay->old_vid_bo = overlay->vid_bo;
>  	overlay->vid_bo = new_bo;
>  
> -	intel_frontbuffer_flip(&dev_priv->drm,
> -			       INTEL_FRONTBUFFER_OVERLAY(pipe));
> +	intel_frontbuffer_flip(dev_priv, INTEL_FRONTBUFFER_OVERLAY(pipe));
>  
>  	return 0;
>  
> diff --git a/drivers/gpu/drm/i915/intel_psr.c b/drivers/gpu/drm/i915/intel_psr.c
> index 68bd0bb34817..adf2ce0f38c0 100644
> --- a/drivers/gpu/drm/i915/intel_psr.c
> +++ b/drivers/gpu/drm/i915/intel_psr.c
> @@ -628,9 +628,8 @@ unlock:
>  	mutex_unlock(&dev_priv->psr.lock);
>  }
>  
> -static void intel_psr_exit(struct drm_device *dev)
> +static void intel_psr_exit(struct drm_i915_private *dev_priv)
>  {
> -	struct drm_i915_private *dev_priv = to_i915(dev);
>  	struct intel_dp *intel_dp = dev_priv->psr.enabled;
>  	struct drm_crtc *crtc = dp_to_dig_port(intel_dp)->base.base.crtc;
>  	enum pipe pipe = to_intel_crtc(crtc)->pipe;
> @@ -639,7 +638,7 @@ static void intel_psr_exit(struct drm_device *dev)
>  	if (!dev_priv->psr.active)
>  		return;
>  
> -	if (HAS_DDI(dev)) {
> +	if (HAS_DDI(dev_priv)) {
>  		val = I915_READ(EDP_PSR_CTL);
>  
>  		WARN_ON(!(val & EDP_PSR_ENABLE));
> @@ -674,7 +673,7 @@ static void intel_psr_exit(struct drm_device *dev)
>  
>  /**
>   * intel_psr_single_frame_update - Single Frame Update
> - * @dev: DRM device
> + * @dev_priv: i915 device
>   * @frontbuffer_bits: frontbuffer plane tracking bits
>   *
>   * Some platforms support a single frame update feature that is used to
> @@ -682,10 +681,9 @@ static void intel_psr_exit(struct drm_device *dev)
>   * So far it is only implemented for Valleyview and Cherryview because
>   * hardware requires this to be done before a page flip.
>   */
> -void intel_psr_single_frame_update(struct drm_device *dev,
> +void intel_psr_single_frame_update(struct drm_i915_private *dev_priv,
>  				   unsigned frontbuffer_bits)
>  {
> -	struct drm_i915_private *dev_priv = to_i915(dev);
>  	struct drm_crtc *crtc;
>  	enum pipe pipe;
>  	u32 val;
> @@ -694,7 +692,7 @@ void intel_psr_single_frame_update(struct drm_device *dev,
>  	 * Single frame update is already supported on BDW+ but it requires
>  	 * many W/A and it isn't really needed.
>  	 */
> -	if (!IS_VALLEYVIEW(dev) && !IS_CHERRYVIEW(dev))
> +	if (!IS_VALLEYVIEW(dev_priv) && !IS_CHERRYVIEW(dev_priv))
>  		return;
>  
>  	mutex_lock(&dev_priv->psr.lock);
> @@ -720,7 +718,7 @@ void intel_psr_single_frame_update(struct drm_device *dev,
>  
>  /**
>   * intel_psr_invalidate - Invalidade PSR
> - * @dev: DRM device
> + * @dev_priv: i915 device
>   * @frontbuffer_bits: frontbuffer plane tracking bits
>   *
>   * Since the hardware frontbuffer tracking has gaps we need to integrate
> @@ -730,10 +728,9 @@ void intel_psr_single_frame_update(struct drm_device *dev,
>   *
>   * Dirty frontbuffers relevant to PSR are tracked in busy_frontbuffer_bits."
>   */
> -void intel_psr_invalidate(struct drm_device *dev,
> +void intel_psr_invalidate(struct drm_i915_private *dev_priv,
>  			  unsigned frontbuffer_bits)
>  {
> -	struct drm_i915_private *dev_priv = to_i915(dev);
>  	struct drm_crtc *crtc;
>  	enum pipe pipe;
>  
> @@ -750,14 +747,14 @@ void intel_psr_invalidate(struct drm_device *dev,
>  	dev_priv->psr.busy_frontbuffer_bits |= frontbuffer_bits;
>  
>  	if (frontbuffer_bits)
> -		intel_psr_exit(dev);
> +		intel_psr_exit(dev_priv);
>  
>  	mutex_unlock(&dev_priv->psr.lock);
>  }
>  
>  /**
>   * intel_psr_flush - Flush PSR
> - * @dev: DRM device
> + * @dev_priv: i915 device
>   * @frontbuffer_bits: frontbuffer plane tracking bits
>   * @origin: which operation caused the flush
>   *
> @@ -768,10 +765,9 @@ void intel_psr_invalidate(struct drm_device *dev,
>   *
>   * Dirty frontbuffers relevant to PSR are tracked in busy_frontbuffer_bits.
>   */
> -void intel_psr_flush(struct drm_device *dev,
> +void intel_psr_flush(struct drm_i915_private *dev_priv,
>  		     unsigned frontbuffer_bits, enum fb_op_origin origin)
>  {
> -	struct drm_i915_private *dev_priv = to_i915(dev);
>  	struct drm_crtc *crtc;
>  	enum pipe pipe;
>  
> @@ -789,7 +785,7 @@ void intel_psr_flush(struct drm_device *dev,
>  
>  	/* By definition flush = invalidate + flush */
>  	if (frontbuffer_bits)
> -		intel_psr_exit(dev);
> +		intel_psr_exit(dev_priv);
>  
>  	if (!dev_priv->psr.active && !dev_priv->psr.busy_frontbuffer_bits)
>  		if (!work_busy(&dev_priv->psr.work.work))
-- 
Joonas Lahtinen
Open Source Technology Center
Intel Corporation
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 95+ messages in thread

* Re: [PATCH 20/22] drm/i915: Move i915_gem_object_wait_rendering()
  2016-07-27 11:14 ` [PATCH 20/22] drm/i915: Move i915_gem_object_wait_rendering() Chris Wilson
@ 2016-07-28  9:37   ` Joonas Lahtinen
  0 siblings, 0 replies; 95+ messages in thread
From: Joonas Lahtinen @ 2016-07-28  9:37 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx

On ke, 2016-07-27 at 12:14 +0100, Chris Wilson wrote:
> Just move it earlier so that we can use the companion nonblocking
> version in a couple of more callsites without having to add a forward
> declaration.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>


Separate code motion patch, wow.

Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>

> ---
>  drivers/gpu/drm/i915/i915_gem.c | 202 ++++++++++++++++++++--------------------
>  1 file changed, 101 insertions(+), 101 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index ca9741525bf4..54d8a3863d11 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -301,6 +301,107 @@ i915_gem_object_unbind(struct drm_i915_gem_object *obj)
>  	return ret;
>  }
>  
> +/**
> + * Ensures that all rendering to the object has completed and the object is
> + * safe to unbind from the GTT or access from the CPU.
> + */
> +int
> +i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj,
> +			       bool readonly)
> +{
> +	struct reservation_object *resv;
> +	struct i915_gem_active *active;
> +	unsigned long active_mask;
> +	int idx;
> +
> +	lockdep_assert_held(&obj->base.dev->struct_mutex);
> +
> +	if (!readonly) {
> +		active = obj->last_read;
> +		active_mask = i915_gem_object_is_active(obj);
> +	} else {
> +		active_mask = 1;
> +		active = &obj->last_write;
> +	}
> +
> +	for_each_active(active_mask, idx) {
> +		int ret;
> +
> +		ret = i915_gem_active_wait(&active[idx],
> +					   &obj->base.dev->struct_mutex);
> +		if (ret)
> +			return ret;
> +	}
> +
> +	resv = i915_gem_object_get_dmabuf_resv(obj);
> +	if (resv) {
> +		long err;
> +
> +		err = reservation_object_wait_timeout_rcu(resv, !readonly, true,
> +							  MAX_SCHEDULE_TIMEOUT);
> +		if (err < 0)
> +			return err;
> +	}
> +
> +	return 0;
> +}
> +
> +/* A nonblocking variant of the above wait. This is a highly dangerous routine
> + * as the object state may change during this call.
> + */
> +static __must_check int
> +i915_gem_object_wait_rendering__nonblocking(struct drm_i915_gem_object *obj,
> +					    struct intel_rps_client *rps,
> +					    bool readonly)
> +{
> +	struct drm_device *dev = obj->base.dev;
> +	struct drm_i915_gem_request *requests[I915_NUM_ENGINES];
> +	struct i915_gem_active *active;
> +	unsigned long active_mask;
> +	int ret, i, n = 0;
> +
> +	lockdep_assert_held(&dev->struct_mutex);
> +	GEM_BUG_ON(!to_i915(dev)->mm.interruptible);
> +
> +	active_mask = i915_gem_object_is_active(obj);
> +	if (!active_mask)
> +		return 0;
> +
> +	if (!readonly) {
> +		active = obj->last_read;
> +	} else {
> +		active_mask = 1;
> +		active = &obj->last_write;
> +	}
> +
> +	for_each_active(active_mask, i) {
> +		struct drm_i915_gem_request *req;
> +
> +		req = i915_gem_active_get(&active[i],
> +					  &obj->base.dev->struct_mutex);
> +		if (req)
> +			requests[n++] = req;
> +	}
> +
> +	mutex_unlock(&dev->struct_mutex);
> +	ret = 0;
> +	for (i = 0; ret == 0 && i < n; i++)
> +		ret = i915_wait_request(requests[i], true, NULL, rps);
> +	mutex_lock(&dev->struct_mutex);
> +
> +	for (i = 0; i < n; i++)
> +		i915_gem_request_put(requests[i]);
> +
> +	return ret;
> +}
> +
> +static struct intel_rps_client *to_rps_client(struct drm_file *file)
> +{
> +	struct drm_i915_file_private *fpriv = file->driver_priv;
> +
> +	return &fpriv->rps;
> +}
> +
>  int
>  i915_gem_object_attach_phys(struct drm_i915_gem_object *obj,
>  			    int align)
> @@ -1335,107 +1436,6 @@ put_rpm:
>  	return ret;
>  }
>  
> -/**
> - * Ensures that all rendering to the object has completed and the object is
> - * safe to unbind from the GTT or access from the CPU.
> - * @obj: i915 gem object
> - * @readonly: waiting for read access or write
> - */
> -int
> -i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj,
> -			       bool readonly)
> -{
> -	struct reservation_object *resv;
> -	struct i915_gem_active *active;
> -	unsigned long active_mask;
> -	int idx, ret;
> -
> -	lockdep_assert_held(&obj->base.dev->struct_mutex);
> -
> -	if (!readonly) {
> -		active = obj->last_read;
> -		active_mask = i915_gem_object_is_active(obj);
> -	} else {
> -		active_mask = 1;
> -		active = &obj->last_write;
> -	}
> -
> -	for_each_active(active_mask, idx) {
> -		ret = i915_gem_active_wait(&active[idx],
> -					   &obj->base.dev->struct_mutex);
> -		if (ret)
> -			return ret;
> -	}
> -
> -	resv = i915_gem_object_get_dmabuf_resv(obj);
> -	if (resv) {
> -		long err;
> -
> -		err = reservation_object_wait_timeout_rcu(resv, !readonly, true,
> -							  MAX_SCHEDULE_TIMEOUT);
> -		if (err < 0)
> -			return err;
> -	}
> -
> -	return 0;
> -}
> -
> -/* A nonblocking variant of the above wait. This is a highly dangerous routine
> - * as the object state may change during this call.
> - */
> -static __must_check int
> -i915_gem_object_wait_rendering__nonblocking(struct drm_i915_gem_object *obj,
> -					    struct intel_rps_client *rps,
> -					    bool readonly)
> -{
> -	struct drm_device *dev = obj->base.dev;
> -	struct drm_i915_private *dev_priv = to_i915(dev);
> -	struct drm_i915_gem_request *requests[I915_NUM_ENGINES];
> -	struct i915_gem_active *active;
> -	unsigned long active_mask;
> -	int ret, i, n = 0;
> -
> -	BUG_ON(!mutex_is_locked(&dev->struct_mutex));
> -	BUG_ON(!dev_priv->mm.interruptible);
> -
> -	active_mask = i915_gem_object_is_active(obj);
> -	if (!active_mask)
> -		return 0;
> -
> -	if (!readonly) {
> -		active = obj->last_read;
> -	} else {
> -		active_mask = 1;
> -		active = &obj->last_write;
> -	}
> -
> -	for_each_active(active_mask, i) {
> -		struct drm_i915_gem_request *req;
> -
> -		req = i915_gem_active_get(&active[i],
> -					  &obj->base.dev->struct_mutex);
> -		if (req)
> -			requests[n++] = req;
> -	}
> -
> -	mutex_unlock(&dev->struct_mutex);
> -	ret = 0;
> -	for (i = 0; ret == 0 && i < n; i++)
> -		ret = i915_wait_request(requests[i], true, NULL, rps);
> -	mutex_lock(&dev->struct_mutex);
> -
> -	for (i = 0; i < n; i++)
> -		i915_gem_request_put(requests[i]);
> -
> -	return ret;
> -}
> -
> -static struct intel_rps_client *to_rps_client(struct drm_file *file)
> -{
> -	struct drm_i915_file_private *fpriv = file->driver_priv;
> -	return &fpriv->rps;
> -}
> -
>  static enum fb_op_origin
>  write_origin(struct drm_i915_gem_object *obj, unsigned domain)
>  {
-- 
Joonas Lahtinen
Open Source Technology Center
Intel Corporation
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 95+ messages in thread

* Re: [PATCH 17/22] drm/i915: Use atomics to manipulate obj->frontbuffer_bits
  2016-07-27 11:14 ` [PATCH 17/22] drm/i915: Use atomics to manipulate obj->frontbuffer_bits Chris Wilson
@ 2016-07-28  9:49   ` Joonas Lahtinen
  2016-07-28 10:10     ` Chris Wilson
  2016-07-28 10:05   ` Daniel Vetter
  1 sibling, 1 reply; 95+ messages in thread
From: Joonas Lahtinen @ 2016-07-28  9:49 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx; +Cc: Daniel Vetter

On ke, 2016-07-27 at 12:14 +0100, Chris Wilson wrote:
>  static int i915_gem_object_list_info(struct seq_file *m, void *data)
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index a24d31e3e014..b6b9a1f78238 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -2127,8 +2127,6 @@ struct drm_i915_gem_object_ops {
>   */
>  #define INTEL_MAX_SPRITE_BITS_PER_PIPE 5
>  #define INTEL_FRONTBUFFER_BITS_PER_PIPE 8
> -#define INTEL_FRONTBUFFER_BITS \
> -	(INTEL_FRONTBUFFER_BITS_PER_PIPE * I915_MAX_PIPES)

Should we have a BUILD_BUG_ON to make sure we have a fit?

>  #define INTEL_FRONTBUFFER_PRIMARY(pipe) \
>  	(1 << (INTEL_FRONTBUFFER_BITS_PER_PIPE * (pipe)))
>  #define INTEL_FRONTBUFFER_CURSOR(pipe) \
> @@ -2216,7 +2214,7 @@ struct drm_i915_gem_object {
>  	unsigned int cache_level:3;
>  	unsigned int cache_dirty:1;
>  
> -	unsigned int frontbuffer_bits:INTEL_FRONTBUFFER_BITS;
> +	atomic_t frontbuffer_bits;
>  
>  	unsigned int has_wc_mmap;
>  	/** Count of VMA actually bound by this object */
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index 7db0808f6961..bc5bc5ccdde0 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -4031,7 +4031,7 @@ void i915_gem_free_object(struct drm_gem_object *gem_obj)
>  	if (obj->stolen)
>  		i915_gem_object_unpin_pages(obj);
>  
> -	WARN_ON(obj->frontbuffer_bits);
> +	WARN_ON(atomic_read(&obj->frontbuffer_bits));
>  
>  	if (obj->pages && obj->madv == I915_MADV_WILLNEED &&
>  	    dev_priv->quirks & QUIRK_PIN_SWIZZLED_PAGES &&
> @@ -4549,16 +4549,20 @@ void i915_gem_track_fb(struct drm_i915_gem_object *old,
>  		       struct drm_i915_gem_object *new,
>  		       unsigned frontbuffer_bits)
>  {
> +	/* Control of individual bits within the bitfield are guarded by

'bitfield' refers to specific C construct, so not the appropriate term
here now that it is removed. In this commit it is readable, but for
future I think just confusing.

>  static void i9xx_update_primary_plane(struct drm_plane *primary,
> @@ -13807,19 +13808,12 @@ static void intel_atomic_track_fbs(struct drm_atomic_state *state)
>  {
>  	struct drm_plane_state *old_plane_state;
>  	struct drm_plane *plane;
> -	struct drm_i915_gem_object *obj, *old_obj;
> -	struct intel_plane *intel_plane;
>  	int i;
>  
> -	mutex_lock(&state->dev->struct_mutex);
> -	for_each_plane_in_state(state, plane, old_plane_state, i) {
> -		obj = intel_fb_obj(plane->state->fb);
> -		old_obj = intel_fb_obj(old_plane_state->fb);
> -		intel_plane = to_intel_plane(plane);
> -
> -		i915_gem_track_fb(old_obj, obj, intel_plane->frontbuffer_bit);
> -	}
> -	mutex_unlock(&state->dev->struct_mutex);
> +	for_each_plane_in_state(state, plane, old_plane_state, i)
> +		i915_gem_track_fb(intel_fb_obj(old_plane_state->fb),
> +				  intel_fb_obj(plane->state->fb),
> +				  to_intel_plane(plane)->frontbuffer_bit);
>  }
>  

These unrelated changes should not be squashed, me thinks. I expect
less intermingling in the future.

Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>

Regards, Joonas
-- 
Joonas Lahtinen
Open Source Technology Center
Intel Corporation
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 95+ messages in thread

* Re: [PATCH 04/22] drm/i915: Remove request retirement before each batch
  2016-07-28  9:32     ` Chris Wilson
@ 2016-07-28  9:53       ` Joonas Lahtinen
  0 siblings, 0 replies; 95+ messages in thread
From: Joonas Lahtinen @ 2016-07-28  9:53 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

On to, 2016-07-28 at 10:32 +0100, Chris Wilson wrote:
> On Thu, Jul 28, 2016 at 11:32:47AM +0300, Joonas Lahtinen wrote:
> > 
> > On ke, 2016-07-27 at 12:14 +0100, Chris Wilson wrote:
> > > 
> > > This reimplements the denial-of-service protection against igt from
> > > commit 227f782e4667 ("drm/i915: Retire requests before creating a new
> > > one") and transfers the stall from before each batch into get_pages().
> > > The issue is that the stall is increasing latency between batches which
> > > is detrimental in some cases (especially coupled with execlists) to
> > > keeping the GPU well fed. Also we have made the observation that retiring
> > > requests can of itself free objects (and requests) and therefore makes
> > > a good first step when shrinking.
> > > 
> > > v2: Recycle objects prior to i915_gem_object_get_pages()
> > > v3: Remove the reference to the ring from i915_gem_requests_ring() as it
> > > operates on an intel_engine_cs.
> > > 
> > > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > Was this tested for performance regressions?
> Yes. It fixed the latency issue from 227f82e4667, introduced an issue
> with page allocation for context/object creation which was papered over
> in v2. Since then requests (this series+) have become both more lazy and
> more economical changing the latency characteristics for execbuf, which
> mitigates somewhat the issue found in v1. Thankfully since when we
> implement a separate mm lock, the freedom to do a full retirement before
> get_pages() is lost.
> 
> This series is intending (including the execbuf reworking) to fix the
> 2x-10x performance regression (platform dependent) we have in
> microbenchmarks (which corresponds to about 20% at the GL level in driver
> stress tests). However, execlists still remains ~8% slower than legacy
> submission (at the GL level).

Good good, maybe put some of this to the commit message too.

Regards, Joonas

> -Chris
> 
-- 
Joonas Lahtinen
Open Source Technology Center
Intel Corporation
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 95+ messages in thread

* Re: [PATCH 04/22] drm/i915: Remove request retirement before each batch
  2016-07-27 11:14 ` [PATCH 04/22] drm/i915: Remove request retirement before each batch Chris Wilson
  2016-07-28  8:32   ` Joonas Lahtinen
@ 2016-07-28  9:54   ` Daniel Vetter
  2016-07-28 10:26     ` Chris Wilson
  1 sibling, 1 reply; 95+ messages in thread
From: Daniel Vetter @ 2016-07-28  9:54 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

On Wed, Jul 27, 2016 at 12:14:42PM +0100, Chris Wilson wrote:
> This reimplements the denial-of-service protection against igt from
> commit 227f782e4667 ("drm/i915: Retire requests before creating a new
> one") and transfers the stall from before each batch into get_pages().
> The issue is that the stall is increasing latency between batches which
> is detrimental in some cases (especially coupled with execlists) to
> keeping the GPU well fed. Also we have made the observation that retiring
> requests can of itself free objects (and requests) and therefore makes
> a good first step when shrinking.
> 
> v2: Recycle objects prior to i915_gem_object_get_pages()
> v3: Remove the reference to the ring from i915_gem_requests_ring() as it
> operates on an intel_engine_cs.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>  drivers/gpu/drm/i915/i915_drv.h            | 1 -
>  drivers/gpu/drm/i915/i915_gem.c            | 7 +++++--
>  drivers/gpu/drm/i915/i915_gem_execbuffer.c | 2 --
>  drivers/gpu/drm/i915/i915_gem_request.c    | 4 ++--
>  4 files changed, 7 insertions(+), 7 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index fbda38f25c6b..2de3d16f7b80 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -3169,7 +3169,6 @@ struct drm_i915_gem_request *
>  i915_gem_find_active_request(struct intel_engine_cs *engine);
>  
>  void i915_gem_retire_requests(struct drm_i915_private *dev_priv);
> -void i915_gem_retire_requests_ring(struct intel_engine_cs *engine);
>  
>  static inline u32 i915_reset_counter(struct i915_gpu_error *error)
>  {
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index bf652dc88024..68dbe4f7940c 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -2244,7 +2244,6 @@ int
>  i915_gem_object_get_pages(struct drm_i915_gem_object *obj)
>  {
>  	struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
> -	const struct drm_i915_gem_object_ops *ops = obj->ops;
>  	int ret;
>  
>  	if (obj->pages)
> @@ -2257,7 +2256,10 @@ i915_gem_object_get_pages(struct drm_i915_gem_object *obj)
>  
>  	BUG_ON(obj->pages_pin_count);
>  
> -	ret = ops->get_pages(obj);
> +	/* Recycle as many active objects as possible first */
> +	i915_gem_retire_requests(dev_priv);
> +
> +	ret = obj->ops->get_pages(obj);

Why exactly do we need this?
- shmem objs already call shrink_all if they can't get at the memory
- everyone else doesn't care.

Even if we need this in some case it looks funny, since it splits the
memory cleanup between caller and callee of get_pages.
-Daniel

>  	if (ret)
>  		return ret;
>  
> @@ -4437,6 +4439,7 @@ i915_gem_cleanup_engines(struct drm_device *dev)
>  static void
>  init_engine_lists(struct intel_engine_cs *engine)
>  {
> +	/* Early initialisation so that core GEM works during engine setup */
>  	INIT_LIST_HEAD(&engine->request_list);
>  }
>  
> diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> index 5e3b5054f72d..0593ea3ba211 100644
> --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> @@ -781,8 +781,6 @@ i915_gem_execbuffer_reserve(struct intel_engine_cs *engine,
>  	bool has_fenced_gpu_access = INTEL_GEN(engine->i915) < 4;
>  	int retry;
>  
> -	i915_gem_retire_requests_ring(engine);
> -
>  	vm = list_first_entry(vmas, struct i915_vma, exec_list)->vm;
>  
>  	INIT_LIST_HEAD(&ordered_vmas);
> diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c
> index 07f08e546915..3395c955a532 100644
> --- a/drivers/gpu/drm/i915/i915_gem_request.c
> +++ b/drivers/gpu/drm/i915/i915_gem_request.c
> @@ -717,7 +717,7 @@ complete:
>  	return ret;
>  }
>  
> -void i915_gem_retire_requests_ring(struct intel_engine_cs *engine)
> +static void engine_retire_requests(struct intel_engine_cs *engine)
>  {
>  	struct drm_i915_gem_request *request, *next;
>  
> @@ -741,7 +741,7 @@ void i915_gem_retire_requests(struct drm_i915_private *dev_priv)
>  	GEM_BUG_ON(!dev_priv->gt.awake);
>  
>  	for_each_engine(engine, dev_priv) {
> -		i915_gem_retire_requests_ring(engine);
> +		engine_retire_requests(engine);
>  		if (list_empty(&engine->request_list))
>  			dev_priv->gt.active_engines &= ~intel_engine_flag(engine);
>  	}
> -- 
> 2.8.1
> 
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/intel-gfx

-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 95+ messages in thread

* Re: [PATCH 05/22] drm/i915: Remove i915_gem_execbuffer_retire_commands()
  2016-07-28  8:55     ` Chris Wilson
@ 2016-07-28  9:54       ` Joonas Lahtinen
  0 siblings, 0 replies; 95+ messages in thread
From: Joonas Lahtinen @ 2016-07-28  9:54 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

On to, 2016-07-28 at 09:55 +0100, Chris Wilson wrote:
> On Thu, Jul 28, 2016 at 11:46:30AM +0300, Joonas Lahtinen wrote:
> > 
> > On ke, 2016-07-27 at 12:14 +0100, Chris Wilson wrote:
> > > 
> > > Move the single line to the callsite as the name is now misleading, and
> > > the purpose is solely to add the request to the execution queue.
> > > 
> > > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > > ---
> > >  drivers/gpu/drm/i915/i915_gem_execbuffer.c | 9 +--------
> > >  1 file changed, 1 insertion(+), 8 deletions(-)
> > > 
> > > diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> > > index 0593ea3ba211..63984c4d8e5a 100644
> > > --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> > > +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> > > @@ -1211,13 +1211,6 @@ i915_gem_execbuffer_move_to_active(struct list_head *vmas,
> > >  	}
> > >  }
> > >  
> > > -static void
> > > -i915_gem_execbuffer_retire_commands(struct i915_execbuffer_params *params)
> > > -{
> > > -	/* Add a breadcrumb for the completion of the batch buffer */
> > > -	__i915_add_request(params->request, params->batch_obj, true);
> > > -}
> > > -
> > >  static int
> > >  i915_reset_gen7_sol_offsets(struct drm_i915_gem_request *req)
> > >  {
> > > @@ -1692,7 +1685,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
> > >  
> > >  	ret = execbuf_submit(params, args, &eb->vmas);
> > >  err_request:
> > > -	i915_gem_execbuffer_retire_commands(params);
> > > +	__i915_add_request(params->request, params->batch_obj, ret == 0);
> > This adds a new behavior of no flushing if execbuf fails to submit, I
> > guess it is intentional? Do mention in the commit message.
> Yes, if we fail to actually emit the execbuf, we don't need to emit
> the flush afterwards as we don't perform any rendering. Later on we will
> be in a position to detect a request that doesn't do anything and is not
> observed and unwind it.

Belongs to commit message too, then all good.

Regards, Joonas

> -Chris
> 
-- 
Joonas Lahtinen
Open Source Technology Center
Intel Corporation
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 95+ messages in thread

* Re: [PATCH 07/22] drm/i915: Pad GTT views of exec objects up to user specified size
  2016-07-27 11:14 ` [PATCH 07/22] drm/i915: Pad GTT views of exec objects up to user specified size Chris Wilson
@ 2016-07-28  9:55   ` Daniel Vetter
  2016-07-28 10:33     ` Chris Wilson
  2016-07-29  7:59   ` Joonas Lahtinen
  1 sibling, 1 reply; 95+ messages in thread
From: Daniel Vetter @ 2016-07-28  9:55 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

On Wed, Jul 27, 2016 at 12:14:45PM +0100, Chris Wilson wrote:
> Our GPUs impose certain requirements upon buffers that depend upon how
> exactly they are used. Typically this is expressed as that they require
> a larger surface than would be naively computed by pitch * height.
> Normally such requirements are hidden away in the userspace driver, but
> when we accept pointers from strangers and later impose extra conditions
> on them, the original client allocator has no idea about the
> monstrosities in the GPU and we require the userspace driver to inform
> the kernel how many padding pages are required beyond the client
> allocation.
> 
> v2: Long time, no see
> v3: Try an anonymous union for uapi struct compatibility
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

This is missing the testcase line. Also short link to the
libva/opencl/whatever patches would be good too.
-Daniel

> ---
>  drivers/gpu/drm/i915/i915_drv.h            |  6 ++-
>  drivers/gpu/drm/i915/i915_gem.c            | 79 ++++++++++++++----------------
>  drivers/gpu/drm/i915/i915_gem_execbuffer.c | 16 +++++-
>  include/uapi/drm/i915_drm.h                |  8 ++-
>  4 files changed, 62 insertions(+), 47 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 74a31358fd87..1e1369319326 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -3032,11 +3032,13 @@ void i915_gem_free_object(struct drm_gem_object *obj);
>  int __must_check
>  i915_gem_object_pin(struct drm_i915_gem_object *obj,
>  		    struct i915_address_space *vm,
> +		    u64 size,
>  		    u64 alignment,
>  		    u64 flags);
>  int __must_check
>  i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj,
>  			 const struct i915_ggtt_view *view,
> +			 u64 size,
>  			 u64 alignment,
>  			 u64 flags);
>  
> @@ -3313,8 +3315,8 @@ i915_gem_obj_ggtt_pin(struct drm_i915_gem_object *obj,
>  	struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
>  	struct i915_ggtt *ggtt = &dev_priv->ggtt;
>  
> -	return i915_gem_object_pin(obj, &ggtt->base,
> -				   alignment, flags | PIN_GLOBAL);
> +	return i915_gem_object_pin(obj, &ggtt->base, 0, alignment,
> +				   flags | PIN_GLOBAL);
>  }
>  
>  void i915_gem_object_ggtt_unpin_view(struct drm_i915_gem_object *obj,
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index c4df44b47cea..2147225e7887 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -1689,7 +1689,7 @@ int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
>  	}
>  
>  	/* Now pin it into the GTT if needed */
> -	ret = i915_gem_object_ggtt_pin(obj, &view, 0, PIN_MAPPABLE);
> +	ret = i915_gem_object_ggtt_pin(obj, &view, 0, 0, PIN_MAPPABLE);
>  	if (ret)
>  		goto unlock;
>  
> @@ -2969,21 +2969,20 @@ static struct i915_vma *
>  i915_gem_object_bind_to_vm(struct drm_i915_gem_object *obj,
>  			   struct i915_address_space *vm,
>  			   const struct i915_ggtt_view *ggtt_view,
> +			   u64 size,
>  			   u64 alignment,
>  			   u64 flags)
>  {
>  	struct drm_device *dev = obj->base.dev;
>  	struct drm_i915_private *dev_priv = to_i915(dev);
> -	struct i915_ggtt *ggtt = &dev_priv->ggtt;
> -	u32 fence_alignment, unfenced_alignment;
> -	u32 search_flag, alloc_flag;
>  	u64 start, end;
> -	u64 size, fence_size;
> +	u32 search_flag, alloc_flag;
>  	struct i915_vma *vma;
>  	int ret;
>  
>  	if (i915_is_ggtt(vm)) {
> -		u32 view_size;
> +		u32 fence_size, fence_alignment, unfenced_alignment;
> +		u64 view_size;
>  
>  		if (WARN_ON(!ggtt_view))
>  			return ERR_PTR(-EINVAL);
> @@ -3001,48 +3000,39 @@ i915_gem_object_bind_to_vm(struct drm_i915_gem_object *obj,
>  								view_size,
>  								obj->tiling_mode,
>  								false);
> -		size = flags & PIN_MAPPABLE ? fence_size : view_size;
> +		size = max(size, view_size);
> +		if (flags & PIN_MAPPABLE)
> +			size = max_t(u64, size, fence_size);
> +
> +		if (alignment == 0)
> +			alignment = flags & PIN_MAPPABLE ? fence_alignment :
> +				unfenced_alignment;
> +		if (flags & PIN_MAPPABLE && alignment & (fence_alignment - 1)) {
> +			DRM_DEBUG("Invalid object (view type=%u) alignment requested %llx\n",
> +				  ggtt_view ? ggtt_view->type : 0,
> +				  (long long)alignment);
> +			return ERR_PTR(-EINVAL);
> +		}
>  	} else {
> -		fence_size = i915_gem_get_gtt_size(dev,
> -						   obj->base.size,
> -						   obj->tiling_mode);
> -		fence_alignment = i915_gem_get_gtt_alignment(dev,
> -							     obj->base.size,
> -							     obj->tiling_mode,
> -							     true);
> -		unfenced_alignment =
> -			i915_gem_get_gtt_alignment(dev,
> -						   obj->base.size,
> -						   obj->tiling_mode,
> -						   false);
> -		size = flags & PIN_MAPPABLE ? fence_size : obj->base.size;
> +		size = max_t(u64, size, obj->base.size);
> +		alignment = 4096;
>  	}
>  
>  	start = flags & PIN_OFFSET_BIAS ? flags & PIN_OFFSET_MASK : 0;
>  	end = vm->total;
>  	if (flags & PIN_MAPPABLE)
> -		end = min_t(u64, end, ggtt->mappable_end);
> +		end = min_t(u64, end, dev_priv->ggtt.mappable_end);
>  	if (flags & PIN_ZONE_4G)
>  		end = min_t(u64, end, (1ULL << 32) - PAGE_SIZE);
>  
> -	if (alignment == 0)
> -		alignment = flags & PIN_MAPPABLE ? fence_alignment :
> -						unfenced_alignment;
> -	if (flags & PIN_MAPPABLE && alignment & (fence_alignment - 1)) {
> -		DRM_DEBUG("Invalid object (view type=%u) alignment requested %llx\n",
> -			  ggtt_view ? ggtt_view->type : 0,
> -			  (long long)alignment);
> -		return ERR_PTR(-EINVAL);
> -	}
> -
>  	/* If binding the object/GGTT view requires more space than the entire
>  	 * aperture has, reject it early before evicting everything in a vain
>  	 * attempt to find space.
>  	 */
>  	if (size > end) {
> -		DRM_DEBUG("Attempting to bind an object (view type=%u) larger than the aperture: size=%llu > %s aperture=%llu\n",
> +		DRM_DEBUG("Attempting to bind an object (view type=%u) larger than the aperture: request=%llu [object=%zd] > %s aperture=%llu\n",
>  			  ggtt_view ? ggtt_view->type : 0,
> -			  size,
> +			  size, obj->base.size,
>  			  flags & PIN_MAPPABLE ? "mappable" : "total",
>  			  end);
>  		return ERR_PTR(-E2BIG);
> @@ -3536,7 +3526,7 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
>  	 * (e.g. libkms for the bootup splash), we have to ensure that we
>  	 * always use map_and_fenceable for all scanout buffers.
>  	 */
> -	ret = i915_gem_object_ggtt_pin(obj, view, alignment,
> +	ret = i915_gem_object_ggtt_pin(obj, view, 0, alignment,
>  				       view->type == I915_GGTT_VIEW_NORMAL ?
>  				       PIN_MAPPABLE : 0);
>  	if (ret)
> @@ -3688,12 +3678,14 @@ i915_gem_ring_throttle(struct drm_device *dev, struct drm_file *file)
>  }
>  
>  static bool
> -i915_vma_misplaced(struct i915_vma *vma, u64 alignment, u64 flags)
> +i915_vma_misplaced(struct i915_vma *vma, u64 size, u64 alignment, u64 flags)
>  {
>  	struct drm_i915_gem_object *obj = vma->obj;
>  
> -	if (alignment &&
> -	    vma->node.start & (alignment - 1))
> +	if (vma->node.size < size)
> +		return true;
> +
> +	if (alignment && vma->node.start & (alignment - 1))
>  		return true;
>  
>  	if (flags & PIN_MAPPABLE && !obj->map_and_fenceable)
> @@ -3737,6 +3729,7 @@ static int
>  i915_gem_object_do_pin(struct drm_i915_gem_object *obj,
>  		       struct i915_address_space *vm,
>  		       const struct i915_ggtt_view *ggtt_view,
> +		       u64 size,
>  		       u64 alignment,
>  		       u64 flags)
>  {
> @@ -3764,7 +3757,7 @@ i915_gem_object_do_pin(struct drm_i915_gem_object *obj,
>  		if (WARN_ON(vma->pin_count == DRM_I915_GEM_OBJECT_MAX_PIN_COUNT))
>  			return -EBUSY;
>  
> -		if (i915_vma_misplaced(vma, alignment, flags)) {
> +		if (i915_vma_misplaced(vma, size, alignment, flags)) {
>  			WARN(vma->pin_count,
>  			     "bo is already pinned in %s with incorrect alignment:"
>  			     " offset=%08x %08x, req.alignment=%llx, req.map_and_fenceable=%d,"
> @@ -3785,8 +3778,8 @@ i915_gem_object_do_pin(struct drm_i915_gem_object *obj,
>  
>  	bound = vma ? vma->bound : 0;
>  	if (vma == NULL || !drm_mm_node_allocated(&vma->node)) {
> -		vma = i915_gem_object_bind_to_vm(obj, vm, ggtt_view, alignment,
> -						 flags);
> +		vma = i915_gem_object_bind_to_vm(obj, vm, ggtt_view,
> +						 size, alignment, flags);
>  		if (IS_ERR(vma))
>  			return PTR_ERR(vma);
>  	} else {
> @@ -3808,17 +3801,19 @@ i915_gem_object_do_pin(struct drm_i915_gem_object *obj,
>  int
>  i915_gem_object_pin(struct drm_i915_gem_object *obj,
>  		    struct i915_address_space *vm,
> +		    u64 size,
>  		    u64 alignment,
>  		    u64 flags)
>  {
>  	return i915_gem_object_do_pin(obj, vm,
>  				      i915_is_ggtt(vm) ? &i915_ggtt_view_normal : NULL,
> -				      alignment, flags);
> +				      size, alignment, flags);
>  }
>  
>  int
>  i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj,
>  			 const struct i915_ggtt_view *view,
> +			 u64 size,
>  			 u64 alignment,
>  			 u64 flags)
>  {
> @@ -3829,7 +3824,7 @@ i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj,
>  	BUG_ON(!view);
>  
>  	return i915_gem_object_do_pin(obj, &ggtt->base, view,
> -				      alignment, flags | PIN_GLOBAL);
> +				      size, alignment, flags | PIN_GLOBAL);
>  }
>  
>  void
> diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> index 63984c4d8e5a..f40fd7f9e5fa 100644
> --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> @@ -682,10 +682,14 @@ i915_gem_execbuffer_reserve_vma(struct i915_vma *vma,
>  			flags |= PIN_HIGH;
>  	}
>  
> -	ret = i915_gem_object_pin(obj, vma->vm, entry->alignment, flags);
> +	ret = i915_gem_object_pin(obj, vma->vm,
> +				  entry->pad_to_size,
> +				  entry->alignment,
> +				  flags);
>  	if ((ret == -ENOSPC  || ret == -E2BIG) &&
>  	    only_mappable_for_reloc(entry->flags))
>  		ret = i915_gem_object_pin(obj, vma->vm,
> +					  entry->pad_to_size,
>  					  entry->alignment,
>  					  flags & ~PIN_MAPPABLE);
>  	if (ret)
> @@ -748,6 +752,9 @@ eb_vma_misplaced(struct i915_vma *vma)
>  	    vma->node.start & (entry->alignment - 1))
>  		return true;
>  
> +	if (vma->node.size < entry->pad_to_size)
> +		return true;
> +
>  	if (entry->flags & EXEC_OBJECT_PINNED &&
>  	    vma->node.start != entry->offset)
>  		return true;
> @@ -1091,6 +1098,13 @@ validate_exec_list(struct drm_device *dev,
>  		if (exec[i].alignment && !is_power_of_2(exec[i].alignment))
>  			return -EINVAL;
>  
> +		/* pad_to_size was once a reserved field, so sanitize it */
> +		if (exec[i].flags & EXEC_OBJECT_PAD_TO_SIZE) {
> +			if (offset_in_page(exec[i].pad_to_size))
> +				return -EINVAL;
> +		} else
> +			exec[i].pad_to_size = 0;
> +
>  		/* First check for malicious input causing overflow in
>  		 * the worst case where we need to allocate the entire
>  		 * relocation tree as a single array.
> diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
> index 33ce5ff9556a..0f292733cffc 100644
> --- a/include/uapi/drm/i915_drm.h
> +++ b/include/uapi/drm/i915_drm.h
> @@ -727,11 +727,15 @@ struct drm_i915_gem_exec_object2 {
>  #define EXEC_OBJECT_WRITE		 (1<<2)
>  #define EXEC_OBJECT_SUPPORTS_48B_ADDRESS (1<<3)
>  #define EXEC_OBJECT_PINNED		 (1<<4)
> +#define EXEC_OBJECT_PAD_TO_SIZE		 (1<<5)
>  /* All remaining bits are MBZ and RESERVED FOR FUTURE USE */
> -#define __EXEC_OBJECT_UNKNOWN_FLAGS	(-(EXEC_OBJECT_PINNED<<1))
> +#define __EXEC_OBJECT_UNKNOWN_FLAGS -(EXEC_OBJECT_PAD_TO_SIZE<<1)
>  	__u64 flags;
>  
> -	__u64 rsvd1;
> +	union {
> +		__u64 rsvd1;
> +		__u64 pad_to_size;
> +	};
>  	__u64 rsvd2;
>  };
>  
> -- 
> 2.8.1
> 
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/intel-gfx

-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 95+ messages in thread

* Re: [PATCH 16/22] drm/i915: Make fb_tracking.lock a spinlock
  2016-07-27 11:14 ` [PATCH 16/22] drm/i915: Make fb_tracking.lock a spinlock Chris Wilson
@ 2016-07-28 10:02   ` Daniel Vetter
  2016-07-28 10:08     ` Daniel Vetter
  2016-07-29  8:25     ` Chris Wilson
  2016-07-28 10:19   ` Joonas Lahtinen
  1 sibling, 2 replies; 95+ messages in thread
From: Daniel Vetter @ 2016-07-28 10:02 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

On Wed, Jul 27, 2016 at 12:14:54PM +0100, Chris Wilson wrote:
> We only need a very lightweight mechanism here as the locking is only
> used for co-ordinating a bitfield.
> 
> v2: Move the cheap unlikely tests into the caller
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>

I think the code shuffling in here badly breaks the kerneldoc. Best fix
would be to extract a small header for frontbuffer tracking and pull that
into the kernel doc. Much less preferred is to explicitly pull in the
kerneldoc function-by-function (but that tends to be rather fragile when
someone adds something new).
-Daniel

> ---
>  drivers/gpu/drm/i915/i915_drv.h          |  2 +-
>  drivers/gpu/drm/i915/i915_gem.c          |  2 +-
>  drivers/gpu/drm/i915/intel_drv.h         | 29 ++++++++++++++---
>  drivers/gpu/drm/i915/intel_frontbuffer.c | 54 ++++++++++++++------------------
>  4 files changed, 51 insertions(+), 36 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 3a68e604ad10..a24d31e3e014 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -1669,7 +1669,7 @@ struct intel_pipe_crc {
>  };
>  
>  struct i915_frontbuffer_tracking {
> -	struct mutex lock;
> +	spinlock_t lock;
>  
>  	/*
>  	 * Tracking bits for delayed frontbuffer flushing du to gpu activity or
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index 1fb958dcc749..7db0808f6961 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -4447,7 +4447,7 @@ i915_gem_load_init(struct drm_device *dev)
>  
>  	dev_priv->mm.interruptible = true;
>  
> -	mutex_init(&dev_priv->fb_tracking.lock);
> +	spin_lock_init(&dev_priv->fb_tracking.lock);
>  }
>  
>  void i915_gem_load_cleanup(struct drm_device *dev)
> diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h
> index e74d851868c5..01056ce8d461 100644
> --- a/drivers/gpu/drm/i915/intel_drv.h
> +++ b/drivers/gpu/drm/i915/intel_drv.h
> @@ -1135,8 +1135,6 @@ void intel_ddi_set_vc_payload_alloc(struct drm_crtc *crtc, bool state);
>  uint32_t ddi_signal_levels(struct intel_dp *intel_dp);
>  
>  /* intel_frontbuffer.c */
> -void intel_fb_obj_invalidate(struct drm_i915_gem_object *obj,
> -			     enum fb_op_origin origin);
>  void intel_frontbuffer_flip_prepare(struct drm_device *dev,
>  				    unsigned frontbuffer_bits);
>  void intel_frontbuffer_flip_complete(struct drm_device *dev,
> @@ -1147,8 +1145,31 @@ unsigned int intel_fb_align_height(struct drm_device *dev,
>  				   unsigned int height,
>  				   uint32_t pixel_format,
>  				   uint64_t fb_format_modifier);
> -void intel_fb_obj_flush(struct drm_i915_gem_object *obj, bool retire,
> -			enum fb_op_origin origin);
> +
> +void __intel_fb_obj_invalidate(struct drm_i915_gem_object *obj,
> +			       enum fb_op_origin origin);
> +static inline void intel_fb_obj_invalidate(struct drm_i915_gem_object *obj,
> +					   enum fb_op_origin origin)
> +{
> +	if (!obj->frontbuffer_bits)
> +		return;
> +
> +	__intel_fb_obj_invalidate(obj, origin);
> +}
> +
> +void __intel_fb_obj_flush(struct drm_i915_gem_object *obj,
> +			  bool retire,
> +			  enum fb_op_origin origin);
> +static inline void intel_fb_obj_flush(struct drm_i915_gem_object *obj,
> +				      bool retire,
> +				      enum fb_op_origin origin)
> +{
> +	if (!obj->frontbuffer_bits)
> +		return;
> +
> +	__intel_fb_obj_flush(obj, retire, origin);
> +}
> +
>  u32 intel_fb_stride_alignment(const struct drm_i915_private *dev_priv,
>  			      uint64_t fb_modifier, uint32_t pixel_format);
>  
> diff --git a/drivers/gpu/drm/i915/intel_frontbuffer.c b/drivers/gpu/drm/i915/intel_frontbuffer.c
> index ac85357010b4..a38ccfe4894a 100644
> --- a/drivers/gpu/drm/i915/intel_frontbuffer.c
> +++ b/drivers/gpu/drm/i915/intel_frontbuffer.c
> @@ -76,24 +76,19 @@
>   * until the rendering completes or a flip on this frontbuffer plane is
>   * scheduled.
>   */
> -void intel_fb_obj_invalidate(struct drm_i915_gem_object *obj,
> -			     enum fb_op_origin origin)
> +void __intel_fb_obj_invalidate(struct drm_i915_gem_object *obj,
> +			       enum fb_op_origin origin)
>  {
>  	struct drm_device *dev = obj->base.dev;
>  	struct drm_i915_private *dev_priv = to_i915(dev);
>  
>  	WARN_ON(!mutex_is_locked(&dev->struct_mutex));
>  
> -	if (!obj->frontbuffer_bits)
> -		return;
> -
>  	if (origin == ORIGIN_CS) {
> -		mutex_lock(&dev_priv->fb_tracking.lock);
> -		dev_priv->fb_tracking.busy_bits
> -			|= obj->frontbuffer_bits;
> -		dev_priv->fb_tracking.flip_bits
> -			&= ~obj->frontbuffer_bits;
> -		mutex_unlock(&dev_priv->fb_tracking.lock);
> +		spin_lock(&dev_priv->fb_tracking.lock);
> +		dev_priv->fb_tracking.busy_bits |= obj->frontbuffer_bits;
> +		dev_priv->fb_tracking.flip_bits &= ~obj->frontbuffer_bits;
> +		spin_unlock(&dev_priv->fb_tracking.lock);
>  	}
>  
>  	intel_psr_invalidate(dev, obj->frontbuffer_bits);
> @@ -120,11 +115,11 @@ static void intel_frontbuffer_flush(struct drm_device *dev,
>  	struct drm_i915_private *dev_priv = to_i915(dev);
>  
>  	/* Delay flushing when rings are still busy.*/
> -	mutex_lock(&dev_priv->fb_tracking.lock);
> +	spin_lock(&dev_priv->fb_tracking.lock);
>  	frontbuffer_bits &= ~dev_priv->fb_tracking.busy_bits;
> -	mutex_unlock(&dev_priv->fb_tracking.lock);
> +	spin_unlock(&dev_priv->fb_tracking.lock);
>  
> -	if (!frontbuffer_bits)
> +	if (frontbuffer_bits == 0)
>  		return;
>  
>  	intel_edp_drrs_flush(dev, frontbuffer_bits);
> @@ -142,8 +137,9 @@ static void intel_frontbuffer_flush(struct drm_device *dev,
>   * completed and frontbuffer caching can be started again. If @retire is true
>   * then any delayed flushes will be unblocked.
>   */
> -void intel_fb_obj_flush(struct drm_i915_gem_object *obj,
> -			bool retire, enum fb_op_origin origin)
> +void __intel_fb_obj_flush(struct drm_i915_gem_object *obj,
> +			  bool retire,
> +			  enum fb_op_origin origin)
>  {
>  	struct drm_device *dev = obj->base.dev;
>  	struct drm_i915_private *dev_priv = to_i915(dev);
> @@ -151,21 +147,18 @@ void intel_fb_obj_flush(struct drm_i915_gem_object *obj,
>  
>  	WARN_ON(!mutex_is_locked(&dev->struct_mutex));
>  
> -	if (!obj->frontbuffer_bits)
> -		return;
> -
>  	frontbuffer_bits = obj->frontbuffer_bits;
>  
>  	if (retire) {
> -		mutex_lock(&dev_priv->fb_tracking.lock);
> +		spin_lock(&dev_priv->fb_tracking.lock);
>  		/* Filter out new bits since rendering started. */
>  		frontbuffer_bits &= dev_priv->fb_tracking.busy_bits;
> -
>  		dev_priv->fb_tracking.busy_bits &= ~frontbuffer_bits;
> -		mutex_unlock(&dev_priv->fb_tracking.lock);
> +		spin_unlock(&dev_priv->fb_tracking.lock);
>  	}
>  
> -	intel_frontbuffer_flush(dev, frontbuffer_bits, origin);
> +	if (frontbuffer_bits)
> +		intel_frontbuffer_flush(dev, frontbuffer_bits, origin);
>  }
>  
>  /**
> @@ -185,11 +178,11 @@ void intel_frontbuffer_flip_prepare(struct drm_device *dev,
>  {
>  	struct drm_i915_private *dev_priv = to_i915(dev);
>  
> -	mutex_lock(&dev_priv->fb_tracking.lock);
> +	spin_lock(&dev_priv->fb_tracking.lock);
>  	dev_priv->fb_tracking.flip_bits |= frontbuffer_bits;
>  	/* Remove stale busy bits due to the old buffer. */
>  	dev_priv->fb_tracking.busy_bits &= ~frontbuffer_bits;
> -	mutex_unlock(&dev_priv->fb_tracking.lock);
> +	spin_unlock(&dev_priv->fb_tracking.lock);
>  
>  	intel_psr_single_frame_update(dev, frontbuffer_bits);
>  }
> @@ -209,13 +202,14 @@ void intel_frontbuffer_flip_complete(struct drm_device *dev,
>  {
>  	struct drm_i915_private *dev_priv = to_i915(dev);
>  
> -	mutex_lock(&dev_priv->fb_tracking.lock);
> +	spin_lock(&dev_priv->fb_tracking.lock);
>  	/* Mask any cancelled flips. */
>  	frontbuffer_bits &= dev_priv->fb_tracking.flip_bits;
>  	dev_priv->fb_tracking.flip_bits &= ~frontbuffer_bits;
> -	mutex_unlock(&dev_priv->fb_tracking.lock);
> +	spin_unlock(&dev_priv->fb_tracking.lock);
>  
> -	intel_frontbuffer_flush(dev, frontbuffer_bits, ORIGIN_FLIP);
> +	if (frontbuffer_bits)
> +		intel_frontbuffer_flush(dev, frontbuffer_bits, ORIGIN_FLIP);
>  }
>  
>  /**
> @@ -234,10 +228,10 @@ void intel_frontbuffer_flip(struct drm_device *dev,
>  {
>  	struct drm_i915_private *dev_priv = to_i915(dev);
>  
> -	mutex_lock(&dev_priv->fb_tracking.lock);
> +	spin_lock(&dev_priv->fb_tracking.lock);
>  	/* Remove stale busy bits due to the old buffer. */
>  	dev_priv->fb_tracking.busy_bits &= ~frontbuffer_bits;
> -	mutex_unlock(&dev_priv->fb_tracking.lock);
> +	spin_unlock(&dev_priv->fb_tracking.lock);
>  
>  	intel_frontbuffer_flush(dev, frontbuffer_bits, ORIGIN_FLIP);
>  }
> -- 
> 2.8.1
> 
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/intel-gfx

-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 95+ messages in thread

* Re: [PATCH 17/22] drm/i915: Use atomics to manipulate obj->frontbuffer_bits
  2016-07-27 11:14 ` [PATCH 17/22] drm/i915: Use atomics to manipulate obj->frontbuffer_bits Chris Wilson
  2016-07-28  9:49   ` Joonas Lahtinen
@ 2016-07-28 10:05   ` Daniel Vetter
  1 sibling, 0 replies; 95+ messages in thread
From: Daniel Vetter @ 2016-07-28 10:05 UTC (permalink / raw)
  To: Chris Wilson; +Cc: Daniel Vetter, intel-gfx

On Wed, Jul 27, 2016 at 12:14:55PM +0100, Chris Wilson wrote:
> The individual bits inside obj->frontbuffer_bits are protected by each
> plane->mutex, but the whole bitfield may be accessed by multiple KMS
> operations simultaneously and so the RMW need to be under atomics.
> However, for updating the single field we do not need to mandate that it
> be under the struct_mutex, one more step towards its removal as the de
> facto BKL.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Daniel Vetter <daniel.vetter@ffwll.ch>

Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch>

> ---
>  drivers/gpu/drm/i915/i915_debugfs.c      |  6 ++++--
>  drivers/gpu/drm/i915/i915_drv.h          |  4 +---
>  drivers/gpu/drm/i915/i915_gem.c          | 18 +++++++++++-------
>  drivers/gpu/drm/i915/intel_display.c     | 18 ++++++------------
>  drivers/gpu/drm/i915/intel_drv.h         | 20 ++++++++++++++------
>  drivers/gpu/drm/i915/intel_frontbuffer.c | 23 +++++++++--------------
>  6 files changed, 45 insertions(+), 44 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
> index fcfa9ca6b50a..10a346237795 100644
> --- a/drivers/gpu/drm/i915/i915_debugfs.c
> +++ b/drivers/gpu/drm/i915/i915_debugfs.c
> @@ -138,6 +138,7 @@ describe_obj(struct seq_file *m, struct drm_i915_gem_object *obj)
>  	struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
>  	struct intel_engine_cs *engine;
>  	struct i915_vma *vma;
> +	unsigned int frontbuffer_bits;
>  	int pin_count = 0;
>  	enum intel_engine_id id;
>  
> @@ -204,8 +205,9 @@ describe_obj(struct seq_file *m, struct drm_i915_gem_object *obj)
>  	if (engine)
>  		seq_printf(m, " (%s)", engine->name);
>  
> -	if (obj->frontbuffer_bits)
> -		seq_printf(m, " (frontbuffer: 0x%03x)", obj->frontbuffer_bits);
> +	frontbuffer_bits = atomic_read(&obj->frontbuffer_bits);
> +	if (frontbuffer_bits)
> +		seq_printf(m, " (frontbuffer: 0x%03x)", frontbuffer_bits);
>  }
>  
>  static int i915_gem_object_list_info(struct seq_file *m, void *data)
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index a24d31e3e014..b6b9a1f78238 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -2127,8 +2127,6 @@ struct drm_i915_gem_object_ops {
>   */
>  #define INTEL_MAX_SPRITE_BITS_PER_PIPE 5
>  #define INTEL_FRONTBUFFER_BITS_PER_PIPE 8
> -#define INTEL_FRONTBUFFER_BITS \
> -	(INTEL_FRONTBUFFER_BITS_PER_PIPE * I915_MAX_PIPES)
>  #define INTEL_FRONTBUFFER_PRIMARY(pipe) \
>  	(1 << (INTEL_FRONTBUFFER_BITS_PER_PIPE * (pipe)))
>  #define INTEL_FRONTBUFFER_CURSOR(pipe) \
> @@ -2216,7 +2214,7 @@ struct drm_i915_gem_object {
>  	unsigned int cache_level:3;
>  	unsigned int cache_dirty:1;
>  
> -	unsigned int frontbuffer_bits:INTEL_FRONTBUFFER_BITS;
> +	atomic_t frontbuffer_bits;
>  
>  	unsigned int has_wc_mmap;
>  	/** Count of VMA actually bound by this object */
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index 7db0808f6961..bc5bc5ccdde0 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -4031,7 +4031,7 @@ void i915_gem_free_object(struct drm_gem_object *gem_obj)
>  	if (obj->stolen)
>  		i915_gem_object_unpin_pages(obj);
>  
> -	WARN_ON(obj->frontbuffer_bits);
> +	WARN_ON(atomic_read(&obj->frontbuffer_bits));
>  
>  	if (obj->pages && obj->madv == I915_MADV_WILLNEED &&
>  	    dev_priv->quirks & QUIRK_PIN_SWIZZLED_PAGES &&
> @@ -4549,16 +4549,20 @@ void i915_gem_track_fb(struct drm_i915_gem_object *old,
>  		       struct drm_i915_gem_object *new,
>  		       unsigned frontbuffer_bits)
>  {
> +	/* Control of individual bits within the bitfield are guarded by
> +	 * the owning plane->mutex, i.e. we can never see concurrent
> +	 * manipulation of individual bits. But since the bitfield as a whole
> +	 * is updated using RMW, we need to use atomics in order to update
> +	 * the bits.
> +	 */
>  	if (old) {
> -		WARN_ON(!mutex_is_locked(&old->base.dev->struct_mutex));
> -		WARN_ON(!(old->frontbuffer_bits & frontbuffer_bits));
> -		old->frontbuffer_bits &= ~frontbuffer_bits;
> +		WARN_ON(!(atomic_read(&old->frontbuffer_bits) & frontbuffer_bits));
> +		atomic_andnot(frontbuffer_bits, &old->frontbuffer_bits);
>  	}
>  
>  	if (new) {
> -		WARN_ON(!mutex_is_locked(&new->base.dev->struct_mutex));
> -		WARN_ON(new->frontbuffer_bits & frontbuffer_bits);
> -		new->frontbuffer_bits |= frontbuffer_bits;
> +		WARN_ON(atomic_read(&new->frontbuffer_bits) & frontbuffer_bits);
> +		atomic_or(frontbuffer_bits, &new->frontbuffer_bits);
>  	}
>  }
>  
> diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
> index ed2069c56036..1c70f68328b4 100644
> --- a/drivers/gpu/drm/i915/intel_display.c
> +++ b/drivers/gpu/drm/i915/intel_display.c
> @@ -2600,7 +2600,8 @@ valid_fb:
>  	primary->fb = primary->state->fb = fb;
>  	primary->crtc = primary->state->crtc = &intel_crtc->base;
>  	intel_crtc->base.state->plane_mask |= (1 << drm_plane_index(primary));
> -	obj->frontbuffer_bits |= to_intel_plane(primary)->frontbuffer_bit;
> +	atomic_or(to_intel_plane(primary)->frontbuffer_bit,
> +		  &obj->frontbuffer_bits);
>  }
>  
>  static void i9xx_update_primary_plane(struct drm_plane *primary,
> @@ -13807,19 +13808,12 @@ static void intel_atomic_track_fbs(struct drm_atomic_state *state)
>  {
>  	struct drm_plane_state *old_plane_state;
>  	struct drm_plane *plane;
> -	struct drm_i915_gem_object *obj, *old_obj;
> -	struct intel_plane *intel_plane;
>  	int i;
>  
> -	mutex_lock(&state->dev->struct_mutex);
> -	for_each_plane_in_state(state, plane, old_plane_state, i) {
> -		obj = intel_fb_obj(plane->state->fb);
> -		old_obj = intel_fb_obj(old_plane_state->fb);
> -		intel_plane = to_intel_plane(plane);
> -
> -		i915_gem_track_fb(old_obj, obj, intel_plane->frontbuffer_bit);
> -	}
> -	mutex_unlock(&state->dev->struct_mutex);
> +	for_each_plane_in_state(state, plane, old_plane_state, i)
> +		i915_gem_track_fb(intel_fb_obj(old_plane_state->fb),
> +				  intel_fb_obj(plane->state->fb),
> +				  to_intel_plane(plane)->frontbuffer_bit);
>  }
>  
>  /**
> diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h
> index 01056ce8d461..5294039cf238 100644
> --- a/drivers/gpu/drm/i915/intel_drv.h
> +++ b/drivers/gpu/drm/i915/intel_drv.h
> @@ -1147,27 +1147,35 @@ unsigned int intel_fb_align_height(struct drm_device *dev,
>  				   uint64_t fb_format_modifier);
>  
>  void __intel_fb_obj_invalidate(struct drm_i915_gem_object *obj,
> -			       enum fb_op_origin origin);
> +			       enum fb_op_origin origin,
> +			       unsigned int frontbuffer_bits);
>  static inline void intel_fb_obj_invalidate(struct drm_i915_gem_object *obj,
>  					   enum fb_op_origin origin)
>  {
> -	if (!obj->frontbuffer_bits)
> +	unsigned int frontbuffer_bits;
> +
> +	frontbuffer_bits = atomic_read(&obj->frontbuffer_bits);
> +	if (!frontbuffer_bits)
>  		return;
>  
> -	__intel_fb_obj_invalidate(obj, origin);
> +	__intel_fb_obj_invalidate(obj, origin, frontbuffer_bits);
>  }
>  
>  void __intel_fb_obj_flush(struct drm_i915_gem_object *obj,
>  			  bool retire,
> -			  enum fb_op_origin origin);
> +			  enum fb_op_origin origin,
> +			  unsigned int frontbuffer_tibst);
>  static inline void intel_fb_obj_flush(struct drm_i915_gem_object *obj,
>  				      bool retire,
>  				      enum fb_op_origin origin)
>  {
> -	if (!obj->frontbuffer_bits)
> +	unsigned int frontbuffer_bits;
> +
> +	frontbuffer_bits = atomic_read(&obj->frontbuffer_bits);
> +	if (!frontbuffer_bits)
>  		return;
>  
> -	__intel_fb_obj_flush(obj, retire, origin);
> +	__intel_fb_obj_flush(obj, retire, origin, frontbuffer_bits);
>  }
>  
>  u32 intel_fb_stride_alignment(const struct drm_i915_private *dev_priv,
> diff --git a/drivers/gpu/drm/i915/intel_frontbuffer.c b/drivers/gpu/drm/i915/intel_frontbuffer.c
> index a38ccfe4894a..636324da21c2 100644
> --- a/drivers/gpu/drm/i915/intel_frontbuffer.c
> +++ b/drivers/gpu/drm/i915/intel_frontbuffer.c
> @@ -77,23 +77,22 @@
>   * scheduled.
>   */
>  void __intel_fb_obj_invalidate(struct drm_i915_gem_object *obj,
> -			       enum fb_op_origin origin)
> +			       enum fb_op_origin origin,
> +			       unsigned int frontbuffer_bits)
>  {
>  	struct drm_device *dev = obj->base.dev;
>  	struct drm_i915_private *dev_priv = to_i915(dev);
>  
> -	WARN_ON(!mutex_is_locked(&dev->struct_mutex));
> -
>  	if (origin == ORIGIN_CS) {
>  		spin_lock(&dev_priv->fb_tracking.lock);
> -		dev_priv->fb_tracking.busy_bits |= obj->frontbuffer_bits;
> -		dev_priv->fb_tracking.flip_bits &= ~obj->frontbuffer_bits;
> +		dev_priv->fb_tracking.busy_bits |= frontbuffer_bits;
> +		dev_priv->fb_tracking.flip_bits &= ~frontbuffer_bits;
>  		spin_unlock(&dev_priv->fb_tracking.lock);
>  	}
>  
> -	intel_psr_invalidate(dev, obj->frontbuffer_bits);
> -	intel_edp_drrs_invalidate(dev, obj->frontbuffer_bits);
> -	intel_fbc_invalidate(dev_priv, obj->frontbuffer_bits, origin);
> +	intel_psr_invalidate(dev, frontbuffer_bits);
> +	intel_edp_drrs_invalidate(dev, frontbuffer_bits);
> +	intel_fbc_invalidate(dev_priv, frontbuffer_bits, origin);
>  }
>  
>  /**
> @@ -139,15 +138,11 @@ static void intel_frontbuffer_flush(struct drm_device *dev,
>   */
>  void __intel_fb_obj_flush(struct drm_i915_gem_object *obj,
>  			  bool retire,
> -			  enum fb_op_origin origin)
> +			  enum fb_op_origin origin,
> +			  unsigned int frontbuffer_bits)
>  {
>  	struct drm_device *dev = obj->base.dev;
>  	struct drm_i915_private *dev_priv = to_i915(dev);
> -	unsigned frontbuffer_bits;
> -
> -	WARN_ON(!mutex_is_locked(&dev->struct_mutex));
> -
> -	frontbuffer_bits = obj->frontbuffer_bits;
>  
>  	if (retire) {
>  		spin_lock(&dev_priv->fb_tracking.lock);
> -- 
> 2.8.1
> 

-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 95+ messages in thread

* Re: [PATCH 18/22] drm/i915: Use dev_priv consistently through the intel_frontbuffer interface
  2016-07-27 11:14 ` [PATCH 18/22] drm/i915: Use dev_priv consistently through the intel_frontbuffer interface Chris Wilson
  2016-07-28  9:36   ` Joonas Lahtinen
@ 2016-07-28 10:06   ` Daniel Vetter
  1 sibling, 0 replies; 95+ messages in thread
From: Daniel Vetter @ 2016-07-28 10:06 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

On Wed, Jul 27, 2016 at 12:14:56PM +0100, Chris Wilson wrote:
> Rather than a mismash of struct drm_device *dev and struct
> drm_i915_private *dev_priv being used freely within a function, be
> consistent and only pass along dev_priv.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>

Yeah, with the embedding there's really no reason any more not to use
dev_priv everywhere for internal functions.

Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch>

> ---
>  drivers/gpu/drm/i915/intel_display.c     | 10 ++++----
>  drivers/gpu/drm/i915/intel_dp.c          | 14 +++++-------
>  drivers/gpu/drm/i915/intel_drv.h         | 21 +++++++++--------
>  drivers/gpu/drm/i915/intel_frontbuffer.c | 39 ++++++++++++--------------------
>  drivers/gpu/drm/i915/intel_overlay.c     |  3 +--
>  drivers/gpu/drm/i915/intel_psr.c         | 26 +++++++++------------
>  6 files changed, 49 insertions(+), 64 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
> index 1c70f68328b4..9f15ced6fc7c 100644
> --- a/drivers/gpu/drm/i915/intel_display.c
> +++ b/drivers/gpu/drm/i915/intel_display.c
> @@ -4565,12 +4565,11 @@ static void intel_post_plane_update(struct intel_crtc_state *old_crtc_state)
>  	struct drm_atomic_state *old_state = old_crtc_state->base.state;
>  	struct intel_crtc_state *pipe_config =
>  		to_intel_crtc_state(crtc->base.state);
> -	struct drm_device *dev = crtc->base.dev;
>  	struct drm_plane *primary = crtc->base.primary;
>  	struct drm_plane_state *old_pri_state =
>  		drm_atomic_get_existing_plane_state(old_state, primary);
>  
> -	intel_frontbuffer_flip(dev, pipe_config->fb_bits);
> +	intel_frontbuffer_flip(to_i915(crtc->base.dev), pipe_config->fb_bits);
>  
>  	crtc->wm.cxsr_allowed = true;
>  
> @@ -4693,7 +4692,7 @@ static void intel_crtc_disable_planes(struct drm_crtc *crtc, unsigned plane_mask
>  	 * to compute the mask of flip planes precisely. For the time being
>  	 * consider this a flip to a NULL plane.
>  	 */
> -	intel_frontbuffer_flip(dev, INTEL_FRONTBUFFER_ALL_MASK(pipe));
> +	intel_frontbuffer_flip(to_i915(dev), INTEL_FRONTBUFFER_ALL_MASK(pipe));
>  }
>  
>  static void ironlake_crtc_enable(struct drm_crtc *crtc)
> @@ -10959,7 +10958,8 @@ static void intel_unpin_work_fn(struct work_struct *__work)
>  
>  	i915_gem_request_put(work->flip_queued_req);
>  
> -	intel_frontbuffer_flip_complete(dev, to_intel_plane(primary)->frontbuffer_bit);
> +	intel_frontbuffer_flip_complete(to_i915(dev),
> +					to_intel_plane(primary)->frontbuffer_bit);
>  	intel_fbc_post_update(crtc);
>  	drm_framebuffer_unreference(work->old_fb);
>  
> @@ -11734,7 +11734,7 @@ static int intel_crtc_page_flip(struct drm_crtc *crtc,
>  			  to_intel_plane(primary)->frontbuffer_bit);
>  	mutex_unlock(&dev->struct_mutex);
>  
> -	intel_frontbuffer_flip_prepare(dev,
> +	intel_frontbuffer_flip_prepare(to_i915(dev),
>  				       to_intel_plane(primary)->frontbuffer_bit);
>  
>  	trace_i915_flip_request(intel_crtc->plane, obj);
> diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c
> index 21b04c3eda41..2fd90d153fe7 100644
> --- a/drivers/gpu/drm/i915/intel_dp.c
> +++ b/drivers/gpu/drm/i915/intel_dp.c
> @@ -5186,7 +5186,7 @@ unlock:
>  
>  /**
>   * intel_edp_drrs_invalidate - Disable Idleness DRRS
> - * @dev: DRM device
> + * @dev_priv: i915 device
>   * @frontbuffer_bits: frontbuffer plane tracking bits
>   *
>   * This function gets called everytime rendering on the given planes start.
> @@ -5194,10 +5194,9 @@ unlock:
>   *
>   * Dirty frontbuffers relevant to DRRS are tracked in busy_frontbuffer_bits.
>   */
> -void intel_edp_drrs_invalidate(struct drm_device *dev,
> -		unsigned frontbuffer_bits)
> +void intel_edp_drrs_invalidate(struct drm_i915_private *dev_priv,
> +			       unsigned frontbuffer_bits)
>  {
> -	struct drm_i915_private *dev_priv = to_i915(dev);
>  	struct drm_crtc *crtc;
>  	enum pipe pipe;
>  
> @@ -5229,7 +5228,7 @@ void intel_edp_drrs_invalidate(struct drm_device *dev,
>  
>  /**
>   * intel_edp_drrs_flush - Restart Idleness DRRS
> - * @dev: DRM device
> + * @dev_priv: i915 device
>   * @frontbuffer_bits: frontbuffer plane tracking bits
>   *
>   * This function gets called every time rendering on the given planes has
> @@ -5239,10 +5238,9 @@ void intel_edp_drrs_invalidate(struct drm_device *dev,
>   *
>   * Dirty frontbuffers relevant to DRRS are tracked in busy_frontbuffer_bits.
>   */
> -void intel_edp_drrs_flush(struct drm_device *dev,
> -		unsigned frontbuffer_bits)
> +void intel_edp_drrs_flush(struct drm_i915_private *dev_priv,
> +			  unsigned frontbuffer_bits)
>  {
> -	struct drm_i915_private *dev_priv = to_i915(dev);
>  	struct drm_crtc *crtc;
>  	enum pipe pipe;
>  
> diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h
> index 5294039cf238..6f447d485db1 100644
> --- a/drivers/gpu/drm/i915/intel_drv.h
> +++ b/drivers/gpu/drm/i915/intel_drv.h
> @@ -1135,11 +1135,11 @@ void intel_ddi_set_vc_payload_alloc(struct drm_crtc *crtc, bool state);
>  uint32_t ddi_signal_levels(struct intel_dp *intel_dp);
>  
>  /* intel_frontbuffer.c */
> -void intel_frontbuffer_flip_prepare(struct drm_device *dev,
> +void intel_frontbuffer_flip_prepare(struct drm_i915_private *dev_priv,
>  				    unsigned frontbuffer_bits);
> -void intel_frontbuffer_flip_complete(struct drm_device *dev,
> +void intel_frontbuffer_flip_complete(struct drm_i915_private *dev_priv,
>  				     unsigned frontbuffer_bits);
> -void intel_frontbuffer_flip(struct drm_device *dev,
> +void intel_frontbuffer_flip(struct drm_i915_private *dev_priv,
>  			    unsigned frontbuffer_bits);
>  unsigned int intel_fb_align_height(struct drm_device *dev,
>  				   unsigned int height,
> @@ -1413,11 +1413,12 @@ uint32_t intel_dp_pack_aux(const uint8_t *src, int src_bytes);
>  void intel_plane_destroy(struct drm_plane *plane);
>  void intel_edp_drrs_enable(struct intel_dp *intel_dp);
>  void intel_edp_drrs_disable(struct intel_dp *intel_dp);
> -void intel_edp_drrs_invalidate(struct drm_device *dev,
> -		unsigned frontbuffer_bits);
> -void intel_edp_drrs_flush(struct drm_device *dev, unsigned frontbuffer_bits);
> +void intel_edp_drrs_invalidate(struct drm_i915_private *dev_priv,
> +			       unsigned frontbuffer_bits);
> +void intel_edp_drrs_flush(struct drm_i915_private *dev_priv,
> +			  unsigned frontbuffer_bits);
>  bool intel_digital_port_connected(struct drm_i915_private *dev_priv,
> -					 struct intel_digital_port *port);
> +				  struct intel_digital_port *port);
>  
>  void
>  intel_dp_program_link_training_pattern(struct intel_dp *intel_dp,
> @@ -1590,13 +1591,13 @@ static inline void intel_backlight_device_unregister(struct intel_connector *con
>  /* intel_psr.c */
>  void intel_psr_enable(struct intel_dp *intel_dp);
>  void intel_psr_disable(struct intel_dp *intel_dp);
> -void intel_psr_invalidate(struct drm_device *dev,
> +void intel_psr_invalidate(struct drm_i915_private *dev_priv,
>  			  unsigned frontbuffer_bits);
> -void intel_psr_flush(struct drm_device *dev,
> +void intel_psr_flush(struct drm_i915_private *dev_priv,
>  		     unsigned frontbuffer_bits,
>  		     enum fb_op_origin origin);
>  void intel_psr_init(struct drm_device *dev);
> -void intel_psr_single_frame_update(struct drm_device *dev,
> +void intel_psr_single_frame_update(struct drm_i915_private *dev_priv,
>  				   unsigned frontbuffer_bits);
>  
>  /* intel_runtime_pm.c */
> diff --git a/drivers/gpu/drm/i915/intel_frontbuffer.c b/drivers/gpu/drm/i915/intel_frontbuffer.c
> index 636324da21c2..42f718bb584c 100644
> --- a/drivers/gpu/drm/i915/intel_frontbuffer.c
> +++ b/drivers/gpu/drm/i915/intel_frontbuffer.c
> @@ -80,8 +80,7 @@ void __intel_fb_obj_invalidate(struct drm_i915_gem_object *obj,
>  			       enum fb_op_origin origin,
>  			       unsigned int frontbuffer_bits)
>  {
> -	struct drm_device *dev = obj->base.dev;
> -	struct drm_i915_private *dev_priv = to_i915(dev);
> +	struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
>  
>  	if (origin == ORIGIN_CS) {
>  		spin_lock(&dev_priv->fb_tracking.lock);
> @@ -90,8 +89,8 @@ void __intel_fb_obj_invalidate(struct drm_i915_gem_object *obj,
>  		spin_unlock(&dev_priv->fb_tracking.lock);
>  	}
>  
> -	intel_psr_invalidate(dev, frontbuffer_bits);
> -	intel_edp_drrs_invalidate(dev, frontbuffer_bits);
> +	intel_psr_invalidate(dev_priv, frontbuffer_bits);
> +	intel_edp_drrs_invalidate(dev_priv, frontbuffer_bits);
>  	intel_fbc_invalidate(dev_priv, frontbuffer_bits, origin);
>  }
>  
> @@ -107,12 +106,10 @@ void __intel_fb_obj_invalidate(struct drm_i915_gem_object *obj,
>   *
>   * Can be called without any locks held.
>   */
> -static void intel_frontbuffer_flush(struct drm_device *dev,
> +static void intel_frontbuffer_flush(struct drm_i915_private *dev_priv,
>  				    unsigned frontbuffer_bits,
>  				    enum fb_op_origin origin)
>  {
> -	struct drm_i915_private *dev_priv = to_i915(dev);
> -
>  	/* Delay flushing when rings are still busy.*/
>  	spin_lock(&dev_priv->fb_tracking.lock);
>  	frontbuffer_bits &= ~dev_priv->fb_tracking.busy_bits;
> @@ -121,8 +118,8 @@ static void intel_frontbuffer_flush(struct drm_device *dev,
>  	if (frontbuffer_bits == 0)
>  		return;
>  
> -	intel_edp_drrs_flush(dev, frontbuffer_bits);
> -	intel_psr_flush(dev, frontbuffer_bits, origin);
> +	intel_edp_drrs_flush(dev_priv, frontbuffer_bits);
> +	intel_psr_flush(dev_priv, frontbuffer_bits, origin);
>  	intel_fbc_flush(dev_priv, frontbuffer_bits, origin);
>  }
>  
> @@ -141,8 +138,7 @@ void __intel_fb_obj_flush(struct drm_i915_gem_object *obj,
>  			  enum fb_op_origin origin,
>  			  unsigned int frontbuffer_bits)
>  {
> -	struct drm_device *dev = obj->base.dev;
> -	struct drm_i915_private *dev_priv = to_i915(dev);
> +	struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
>  
>  	if (retire) {
>  		spin_lock(&dev_priv->fb_tracking.lock);
> @@ -153,7 +149,7 @@ void __intel_fb_obj_flush(struct drm_i915_gem_object *obj,
>  	}
>  
>  	if (frontbuffer_bits)
> -		intel_frontbuffer_flush(dev, frontbuffer_bits, origin);
> +		intel_frontbuffer_flush(dev_priv, frontbuffer_bits, origin);
>  }
>  
>  /**
> @@ -168,18 +164,16 @@ void __intel_fb_obj_flush(struct drm_i915_gem_object *obj,
>   *
>   * Can be called without any locks held.
>   */
> -void intel_frontbuffer_flip_prepare(struct drm_device *dev,
> +void intel_frontbuffer_flip_prepare(struct drm_i915_private *dev_priv,
>  				    unsigned frontbuffer_bits)
>  {
> -	struct drm_i915_private *dev_priv = to_i915(dev);
> -
>  	spin_lock(&dev_priv->fb_tracking.lock);
>  	dev_priv->fb_tracking.flip_bits |= frontbuffer_bits;
>  	/* Remove stale busy bits due to the old buffer. */
>  	dev_priv->fb_tracking.busy_bits &= ~frontbuffer_bits;
>  	spin_unlock(&dev_priv->fb_tracking.lock);
>  
> -	intel_psr_single_frame_update(dev, frontbuffer_bits);
> +	intel_psr_single_frame_update(dev_priv, frontbuffer_bits);
>  }
>  
>  /**
> @@ -192,11 +186,9 @@ void intel_frontbuffer_flip_prepare(struct drm_device *dev,
>   *
>   * Can be called without any locks held.
>   */
> -void intel_frontbuffer_flip_complete(struct drm_device *dev,
> +void intel_frontbuffer_flip_complete(struct drm_i915_private *dev_priv,
>  				     unsigned frontbuffer_bits)
>  {
> -	struct drm_i915_private *dev_priv = to_i915(dev);
> -
>  	spin_lock(&dev_priv->fb_tracking.lock);
>  	/* Mask any cancelled flips. */
>  	frontbuffer_bits &= dev_priv->fb_tracking.flip_bits;
> @@ -204,7 +196,8 @@ void intel_frontbuffer_flip_complete(struct drm_device *dev,
>  	spin_unlock(&dev_priv->fb_tracking.lock);
>  
>  	if (frontbuffer_bits)
> -		intel_frontbuffer_flush(dev, frontbuffer_bits, ORIGIN_FLIP);
> +		intel_frontbuffer_flush(dev_priv,
> +					frontbuffer_bits, ORIGIN_FLIP);
>  }
>  
>  /**
> @@ -218,15 +211,13 @@ void intel_frontbuffer_flip_complete(struct drm_device *dev,
>   *
>   * Can be called without any locks held.
>   */
> -void intel_frontbuffer_flip(struct drm_device *dev,
> +void intel_frontbuffer_flip(struct drm_i915_private *dev_priv,
>  			    unsigned frontbuffer_bits)
>  {
> -	struct drm_i915_private *dev_priv = to_i915(dev);
> -
>  	spin_lock(&dev_priv->fb_tracking.lock);
>  	/* Remove stale busy bits due to the old buffer. */
>  	dev_priv->fb_tracking.busy_bits &= ~frontbuffer_bits;
>  	spin_unlock(&dev_priv->fb_tracking.lock);
>  
> -	intel_frontbuffer_flush(dev, frontbuffer_bits, ORIGIN_FLIP);
> +	intel_frontbuffer_flush(dev_priv, frontbuffer_bits, ORIGIN_FLIP);
>  }
> diff --git a/drivers/gpu/drm/i915/intel_overlay.c b/drivers/gpu/drm/i915/intel_overlay.c
> index 217fefc49bf9..ad08df49ed48 100644
> --- a/drivers/gpu/drm/i915/intel_overlay.c
> +++ b/drivers/gpu/drm/i915/intel_overlay.c
> @@ -839,8 +839,7 @@ static int intel_overlay_do_put_image(struct intel_overlay *overlay,
>  	overlay->old_vid_bo = overlay->vid_bo;
>  	overlay->vid_bo = new_bo;
>  
> -	intel_frontbuffer_flip(&dev_priv->drm,
> -			       INTEL_FRONTBUFFER_OVERLAY(pipe));
> +	intel_frontbuffer_flip(dev_priv, INTEL_FRONTBUFFER_OVERLAY(pipe));
>  
>  	return 0;
>  
> diff --git a/drivers/gpu/drm/i915/intel_psr.c b/drivers/gpu/drm/i915/intel_psr.c
> index 68bd0bb34817..adf2ce0f38c0 100644
> --- a/drivers/gpu/drm/i915/intel_psr.c
> +++ b/drivers/gpu/drm/i915/intel_psr.c
> @@ -628,9 +628,8 @@ unlock:
>  	mutex_unlock(&dev_priv->psr.lock);
>  }
>  
> -static void intel_psr_exit(struct drm_device *dev)
> +static void intel_psr_exit(struct drm_i915_private *dev_priv)
>  {
> -	struct drm_i915_private *dev_priv = to_i915(dev);
>  	struct intel_dp *intel_dp = dev_priv->psr.enabled;
>  	struct drm_crtc *crtc = dp_to_dig_port(intel_dp)->base.base.crtc;
>  	enum pipe pipe = to_intel_crtc(crtc)->pipe;
> @@ -639,7 +638,7 @@ static void intel_psr_exit(struct drm_device *dev)
>  	if (!dev_priv->psr.active)
>  		return;
>  
> -	if (HAS_DDI(dev)) {
> +	if (HAS_DDI(dev_priv)) {
>  		val = I915_READ(EDP_PSR_CTL);
>  
>  		WARN_ON(!(val & EDP_PSR_ENABLE));
> @@ -674,7 +673,7 @@ static void intel_psr_exit(struct drm_device *dev)
>  
>  /**
>   * intel_psr_single_frame_update - Single Frame Update
> - * @dev: DRM device
> + * @dev_priv: i915 device
>   * @frontbuffer_bits: frontbuffer plane tracking bits
>   *
>   * Some platforms support a single frame update feature that is used to
> @@ -682,10 +681,9 @@ static void intel_psr_exit(struct drm_device *dev)
>   * So far it is only implemented for Valleyview and Cherryview because
>   * hardware requires this to be done before a page flip.
>   */
> -void intel_psr_single_frame_update(struct drm_device *dev,
> +void intel_psr_single_frame_update(struct drm_i915_private *dev_priv,
>  				   unsigned frontbuffer_bits)
>  {
> -	struct drm_i915_private *dev_priv = to_i915(dev);
>  	struct drm_crtc *crtc;
>  	enum pipe pipe;
>  	u32 val;
> @@ -694,7 +692,7 @@ void intel_psr_single_frame_update(struct drm_device *dev,
>  	 * Single frame update is already supported on BDW+ but it requires
>  	 * many W/A and it isn't really needed.
>  	 */
> -	if (!IS_VALLEYVIEW(dev) && !IS_CHERRYVIEW(dev))
> +	if (!IS_VALLEYVIEW(dev_priv) && !IS_CHERRYVIEW(dev_priv))
>  		return;
>  
>  	mutex_lock(&dev_priv->psr.lock);
> @@ -720,7 +718,7 @@ void intel_psr_single_frame_update(struct drm_device *dev,
>  
>  /**
>   * intel_psr_invalidate - Invalidade PSR
> - * @dev: DRM device
> + * @dev_priv: i915 device
>   * @frontbuffer_bits: frontbuffer plane tracking bits
>   *
>   * Since the hardware frontbuffer tracking has gaps we need to integrate
> @@ -730,10 +728,9 @@ void intel_psr_single_frame_update(struct drm_device *dev,
>   *
>   * Dirty frontbuffers relevant to PSR are tracked in busy_frontbuffer_bits."
>   */
> -void intel_psr_invalidate(struct drm_device *dev,
> +void intel_psr_invalidate(struct drm_i915_private *dev_priv,
>  			  unsigned frontbuffer_bits)
>  {
> -	struct drm_i915_private *dev_priv = to_i915(dev);
>  	struct drm_crtc *crtc;
>  	enum pipe pipe;
>  
> @@ -750,14 +747,14 @@ void intel_psr_invalidate(struct drm_device *dev,
>  	dev_priv->psr.busy_frontbuffer_bits |= frontbuffer_bits;
>  
>  	if (frontbuffer_bits)
> -		intel_psr_exit(dev);
> +		intel_psr_exit(dev_priv);
>  
>  	mutex_unlock(&dev_priv->psr.lock);
>  }
>  
>  /**
>   * intel_psr_flush - Flush PSR
> - * @dev: DRM device
> + * @dev_priv: i915 device
>   * @frontbuffer_bits: frontbuffer plane tracking bits
>   * @origin: which operation caused the flush
>   *
> @@ -768,10 +765,9 @@ void intel_psr_invalidate(struct drm_device *dev,
>   *
>   * Dirty frontbuffers relevant to PSR are tracked in busy_frontbuffer_bits.
>   */
> -void intel_psr_flush(struct drm_device *dev,
> +void intel_psr_flush(struct drm_i915_private *dev_priv,
>  		     unsigned frontbuffer_bits, enum fb_op_origin origin)
>  {
> -	struct drm_i915_private *dev_priv = to_i915(dev);
>  	struct drm_crtc *crtc;
>  	enum pipe pipe;
>  
> @@ -789,7 +785,7 @@ void intel_psr_flush(struct drm_device *dev,
>  
>  	/* By definition flush = invalidate + flush */
>  	if (frontbuffer_bits)
> -		intel_psr_exit(dev);
> +		intel_psr_exit(dev_priv);
>  
>  	if (!dev_priv->psr.active && !dev_priv->psr.busy_frontbuffer_bits)
>  		if (!work_busy(&dev_priv->psr.work.work))
> -- 
> 2.8.1
> 
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/intel-gfx

-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 95+ messages in thread

* Re: [PATCH 16/22] drm/i915: Make fb_tracking.lock a spinlock
  2016-07-28 10:02   ` Daniel Vetter
@ 2016-07-28 10:08     ` Daniel Vetter
  2016-07-29  8:25     ` Chris Wilson
  1 sibling, 0 replies; 95+ messages in thread
From: Daniel Vetter @ 2016-07-28 10:08 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

On Thu, Jul 28, 2016 at 12:02:01PM +0200, Daniel Vetter wrote:
> On Wed, Jul 27, 2016 at 12:14:54PM +0100, Chris Wilson wrote:
> > We only need a very lightweight mechanism here as the locking is only
> > used for co-ordinating a bitfield.
> > 
> > v2: Move the cheap unlikely tests into the caller
> > 
> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> 
> I think the code shuffling in here badly breaks the kerneldoc. Best fix
> would be to extract a small header for frontbuffer tracking and pull that
> into the kernel doc. Much less preferred is to explicitly pull in the
> kerneldoc function-by-function (but that tends to be rather fragile when
> someone adds something new).

fwiw the change itself looks correct and makes sense, so with
intel_frontbuffer.h extracted and the kerneldoc include directives in
gpu/drm-i915.rst fixed up this is:

Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch>

> -Daniel
> 
> > ---
> >  drivers/gpu/drm/i915/i915_drv.h          |  2 +-
> >  drivers/gpu/drm/i915/i915_gem.c          |  2 +-
> >  drivers/gpu/drm/i915/intel_drv.h         | 29 ++++++++++++++---
> >  drivers/gpu/drm/i915/intel_frontbuffer.c | 54 ++++++++++++++------------------
> >  4 files changed, 51 insertions(+), 36 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> > index 3a68e604ad10..a24d31e3e014 100644
> > --- a/drivers/gpu/drm/i915/i915_drv.h
> > +++ b/drivers/gpu/drm/i915/i915_drv.h
> > @@ -1669,7 +1669,7 @@ struct intel_pipe_crc {
> >  };
> >  
> >  struct i915_frontbuffer_tracking {
> > -	struct mutex lock;
> > +	spinlock_t lock;
> >  
> >  	/*
> >  	 * Tracking bits for delayed frontbuffer flushing du to gpu activity or
> > diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> > index 1fb958dcc749..7db0808f6961 100644
> > --- a/drivers/gpu/drm/i915/i915_gem.c
> > +++ b/drivers/gpu/drm/i915/i915_gem.c
> > @@ -4447,7 +4447,7 @@ i915_gem_load_init(struct drm_device *dev)
> >  
> >  	dev_priv->mm.interruptible = true;
> >  
> > -	mutex_init(&dev_priv->fb_tracking.lock);
> > +	spin_lock_init(&dev_priv->fb_tracking.lock);
> >  }
> >  
> >  void i915_gem_load_cleanup(struct drm_device *dev)
> > diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h
> > index e74d851868c5..01056ce8d461 100644
> > --- a/drivers/gpu/drm/i915/intel_drv.h
> > +++ b/drivers/gpu/drm/i915/intel_drv.h
> > @@ -1135,8 +1135,6 @@ void intel_ddi_set_vc_payload_alloc(struct drm_crtc *crtc, bool state);
> >  uint32_t ddi_signal_levels(struct intel_dp *intel_dp);
> >  
> >  /* intel_frontbuffer.c */
> > -void intel_fb_obj_invalidate(struct drm_i915_gem_object *obj,
> > -			     enum fb_op_origin origin);
> >  void intel_frontbuffer_flip_prepare(struct drm_device *dev,
> >  				    unsigned frontbuffer_bits);
> >  void intel_frontbuffer_flip_complete(struct drm_device *dev,
> > @@ -1147,8 +1145,31 @@ unsigned int intel_fb_align_height(struct drm_device *dev,
> >  				   unsigned int height,
> >  				   uint32_t pixel_format,
> >  				   uint64_t fb_format_modifier);
> > -void intel_fb_obj_flush(struct drm_i915_gem_object *obj, bool retire,
> > -			enum fb_op_origin origin);
> > +
> > +void __intel_fb_obj_invalidate(struct drm_i915_gem_object *obj,
> > +			       enum fb_op_origin origin);
> > +static inline void intel_fb_obj_invalidate(struct drm_i915_gem_object *obj,
> > +					   enum fb_op_origin origin)
> > +{
> > +	if (!obj->frontbuffer_bits)
> > +		return;
> > +
> > +	__intel_fb_obj_invalidate(obj, origin);
> > +}
> > +
> > +void __intel_fb_obj_flush(struct drm_i915_gem_object *obj,
> > +			  bool retire,
> > +			  enum fb_op_origin origin);
> > +static inline void intel_fb_obj_flush(struct drm_i915_gem_object *obj,
> > +				      bool retire,
> > +				      enum fb_op_origin origin)
> > +{
> > +	if (!obj->frontbuffer_bits)
> > +		return;
> > +
> > +	__intel_fb_obj_flush(obj, retire, origin);
> > +}
> > +
> >  u32 intel_fb_stride_alignment(const struct drm_i915_private *dev_priv,
> >  			      uint64_t fb_modifier, uint32_t pixel_format);
> >  
> > diff --git a/drivers/gpu/drm/i915/intel_frontbuffer.c b/drivers/gpu/drm/i915/intel_frontbuffer.c
> > index ac85357010b4..a38ccfe4894a 100644
> > --- a/drivers/gpu/drm/i915/intel_frontbuffer.c
> > +++ b/drivers/gpu/drm/i915/intel_frontbuffer.c
> > @@ -76,24 +76,19 @@
> >   * until the rendering completes or a flip on this frontbuffer plane is
> >   * scheduled.
> >   */
> > -void intel_fb_obj_invalidate(struct drm_i915_gem_object *obj,
> > -			     enum fb_op_origin origin)
> > +void __intel_fb_obj_invalidate(struct drm_i915_gem_object *obj,
> > +			       enum fb_op_origin origin)
> >  {
> >  	struct drm_device *dev = obj->base.dev;
> >  	struct drm_i915_private *dev_priv = to_i915(dev);
> >  
> >  	WARN_ON(!mutex_is_locked(&dev->struct_mutex));
> >  
> > -	if (!obj->frontbuffer_bits)
> > -		return;
> > -
> >  	if (origin == ORIGIN_CS) {
> > -		mutex_lock(&dev_priv->fb_tracking.lock);
> > -		dev_priv->fb_tracking.busy_bits
> > -			|= obj->frontbuffer_bits;
> > -		dev_priv->fb_tracking.flip_bits
> > -			&= ~obj->frontbuffer_bits;
> > -		mutex_unlock(&dev_priv->fb_tracking.lock);
> > +		spin_lock(&dev_priv->fb_tracking.lock);
> > +		dev_priv->fb_tracking.busy_bits |= obj->frontbuffer_bits;
> > +		dev_priv->fb_tracking.flip_bits &= ~obj->frontbuffer_bits;
> > +		spin_unlock(&dev_priv->fb_tracking.lock);
> >  	}
> >  
> >  	intel_psr_invalidate(dev, obj->frontbuffer_bits);
> > @@ -120,11 +115,11 @@ static void intel_frontbuffer_flush(struct drm_device *dev,
> >  	struct drm_i915_private *dev_priv = to_i915(dev);
> >  
> >  	/* Delay flushing when rings are still busy.*/
> > -	mutex_lock(&dev_priv->fb_tracking.lock);
> > +	spin_lock(&dev_priv->fb_tracking.lock);
> >  	frontbuffer_bits &= ~dev_priv->fb_tracking.busy_bits;
> > -	mutex_unlock(&dev_priv->fb_tracking.lock);
> > +	spin_unlock(&dev_priv->fb_tracking.lock);
> >  
> > -	if (!frontbuffer_bits)
> > +	if (frontbuffer_bits == 0)
> >  		return;
> >  
> >  	intel_edp_drrs_flush(dev, frontbuffer_bits);
> > @@ -142,8 +137,9 @@ static void intel_frontbuffer_flush(struct drm_device *dev,
> >   * completed and frontbuffer caching can be started again. If @retire is true
> >   * then any delayed flushes will be unblocked.
> >   */
> > -void intel_fb_obj_flush(struct drm_i915_gem_object *obj,
> > -			bool retire, enum fb_op_origin origin)
> > +void __intel_fb_obj_flush(struct drm_i915_gem_object *obj,
> > +			  bool retire,
> > +			  enum fb_op_origin origin)
> >  {
> >  	struct drm_device *dev = obj->base.dev;
> >  	struct drm_i915_private *dev_priv = to_i915(dev);
> > @@ -151,21 +147,18 @@ void intel_fb_obj_flush(struct drm_i915_gem_object *obj,
> >  
> >  	WARN_ON(!mutex_is_locked(&dev->struct_mutex));
> >  
> > -	if (!obj->frontbuffer_bits)
> > -		return;
> > -
> >  	frontbuffer_bits = obj->frontbuffer_bits;
> >  
> >  	if (retire) {
> > -		mutex_lock(&dev_priv->fb_tracking.lock);
> > +		spin_lock(&dev_priv->fb_tracking.lock);
> >  		/* Filter out new bits since rendering started. */
> >  		frontbuffer_bits &= dev_priv->fb_tracking.busy_bits;
> > -
> >  		dev_priv->fb_tracking.busy_bits &= ~frontbuffer_bits;
> > -		mutex_unlock(&dev_priv->fb_tracking.lock);
> > +		spin_unlock(&dev_priv->fb_tracking.lock);
> >  	}
> >  
> > -	intel_frontbuffer_flush(dev, frontbuffer_bits, origin);
> > +	if (frontbuffer_bits)
> > +		intel_frontbuffer_flush(dev, frontbuffer_bits, origin);
> >  }
> >  
> >  /**
> > @@ -185,11 +178,11 @@ void intel_frontbuffer_flip_prepare(struct drm_device *dev,
> >  {
> >  	struct drm_i915_private *dev_priv = to_i915(dev);
> >  
> > -	mutex_lock(&dev_priv->fb_tracking.lock);
> > +	spin_lock(&dev_priv->fb_tracking.lock);
> >  	dev_priv->fb_tracking.flip_bits |= frontbuffer_bits;
> >  	/* Remove stale busy bits due to the old buffer. */
> >  	dev_priv->fb_tracking.busy_bits &= ~frontbuffer_bits;
> > -	mutex_unlock(&dev_priv->fb_tracking.lock);
> > +	spin_unlock(&dev_priv->fb_tracking.lock);
> >  
> >  	intel_psr_single_frame_update(dev, frontbuffer_bits);
> >  }
> > @@ -209,13 +202,14 @@ void intel_frontbuffer_flip_complete(struct drm_device *dev,
> >  {
> >  	struct drm_i915_private *dev_priv = to_i915(dev);
> >  
> > -	mutex_lock(&dev_priv->fb_tracking.lock);
> > +	spin_lock(&dev_priv->fb_tracking.lock);
> >  	/* Mask any cancelled flips. */
> >  	frontbuffer_bits &= dev_priv->fb_tracking.flip_bits;
> >  	dev_priv->fb_tracking.flip_bits &= ~frontbuffer_bits;
> > -	mutex_unlock(&dev_priv->fb_tracking.lock);
> > +	spin_unlock(&dev_priv->fb_tracking.lock);
> >  
> > -	intel_frontbuffer_flush(dev, frontbuffer_bits, ORIGIN_FLIP);
> > +	if (frontbuffer_bits)
> > +		intel_frontbuffer_flush(dev, frontbuffer_bits, ORIGIN_FLIP);
> >  }
> >  
> >  /**
> > @@ -234,10 +228,10 @@ void intel_frontbuffer_flip(struct drm_device *dev,
> >  {
> >  	struct drm_i915_private *dev_priv = to_i915(dev);
> >  
> > -	mutex_lock(&dev_priv->fb_tracking.lock);
> > +	spin_lock(&dev_priv->fb_tracking.lock);
> >  	/* Remove stale busy bits due to the old buffer. */
> >  	dev_priv->fb_tracking.busy_bits &= ~frontbuffer_bits;
> > -	mutex_unlock(&dev_priv->fb_tracking.lock);
> > +	spin_unlock(&dev_priv->fb_tracking.lock);
> >  
> >  	intel_frontbuffer_flush(dev, frontbuffer_bits, ORIGIN_FLIP);
> >  }
> > -- 
> > 2.8.1
> > 
> > _______________________________________________
> > Intel-gfx mailing list
> > Intel-gfx@lists.freedesktop.org
> > https://lists.freedesktop.org/mailman/listinfo/intel-gfx
> 
> -- 
> Daniel Vetter
> Software Engineer, Intel Corporation
> http://blog.ffwll.ch

-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 95+ messages in thread

* Re: [PATCH 17/22] drm/i915: Use atomics to manipulate obj->frontbuffer_bits
  2016-07-28  9:49   ` Joonas Lahtinen
@ 2016-07-28 10:10     ` Chris Wilson
  2016-07-28 10:51       ` Joonas Lahtinen
  0 siblings, 1 reply; 95+ messages in thread
From: Chris Wilson @ 2016-07-28 10:10 UTC (permalink / raw)
  To: Joonas Lahtinen; +Cc: Daniel Vetter, intel-gfx

On Thu, Jul 28, 2016 at 12:49:31PM +0300, Joonas Lahtinen wrote:
> On ke, 2016-07-27 at 12:14 +0100, Chris Wilson wrote:
> >  static int i915_gem_object_list_info(struct seq_file *m, void *data)
> > diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> > index a24d31e3e014..b6b9a1f78238 100644
> > --- a/drivers/gpu/drm/i915/i915_drv.h
> > +++ b/drivers/gpu/drm/i915/i915_drv.h
> > @@ -2127,8 +2127,6 @@ struct drm_i915_gem_object_ops {
> >   */
> >  #define INTEL_MAX_SPRITE_BITS_PER_PIPE 5
> >  #define INTEL_FRONTBUFFER_BITS_PER_PIPE 8
> > -#define INTEL_FRONTBUFFER_BITS \
> > -	(INTEL_FRONTBUFFER_BITS_PER_PIPE * I915_MAX_PIPES)
> 
> Should we have a BUILD_BUG_ON to make sure we have a fit?
> 
> > @@ -4549,16 +4549,20 @@ void i915_gem_track_fb(struct drm_i915_gem_object *old,
> >  		       struct drm_i915_gem_object *new,
> >  		       unsigned frontbuffer_bits)
> >  {
> > +	/* Control of individual bits within the bitfield are guarded by
> 
> 'bitfield' refers to specific C construct, so not the appropriate term
> here now that it is removed. In this commit it is readable, but for
> future I think just confusing.

When I wrote the comment it was still a bitfield! s/bitfield/mask/

BUILD_BUG_ON(INTEL_FRONTBUFFER_BITS_PER_PIPE * I915_MAX_PIPES > sizeof(atomic_t) * BITS_PER_BYTE);
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 95+ messages in thread

* Re: [PATCH 16/22] drm/i915: Make fb_tracking.lock a spinlock
  2016-07-27 11:14 ` [PATCH 16/22] drm/i915: Make fb_tracking.lock a spinlock Chris Wilson
  2016-07-28 10:02   ` Daniel Vetter
@ 2016-07-28 10:19   ` Joonas Lahtinen
  1 sibling, 0 replies; 95+ messages in thread
From: Joonas Lahtinen @ 2016-07-28 10:19 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx

On ke, 2016-07-27 at 12:14 +0100, Chris Wilson wrote:
> 
>  	/* Delay flushing when rings are still busy.*/
> -	mutex_lock(&dev_priv->fb_tracking.lock);
> +	spin_lock(&dev_priv->fb_tracking.lock);
>  	frontbuffer_bits &= ~dev_priv->fb_tracking.busy_bits;
> -	mutex_unlock(&dev_priv->fb_tracking.lock);
> +	spin_unlock(&dev_priv->fb_tracking.lock);
>  
> -	if (!frontbuffer_bits)
> +	if (frontbuffer_bits == 0)

Ugh, there are two styles within same patch already, maybe go with !
here too. That unified;

Reviewed-by: Joonas Lahtien <joonas.lahtinen@linux.intel.com>

Regards, Joonas
-- 
Joonas Lahtinen
Open Source Technology Center
Intel Corporation
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 95+ messages in thread

* Re: [PATCH 21/22] drm/i915: Enable lockless lookup of request tracking via RCU
  2016-07-27 11:14 ` [PATCH 21/22] drm/i915: Enable lockless lookup of request tracking via RCU Chris Wilson
@ 2016-07-28 10:23   ` Daniel Vetter
  2016-07-28 20:49     ` Chris Wilson
  0 siblings, 1 reply; 95+ messages in thread
From: Daniel Vetter @ 2016-07-28 10:23 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx, Goel, Akash, Josh Triplett

On Wed, Jul 27, 2016 at 12:14:59PM +0100, Chris Wilson wrote:
> If we enable RCU for the requests (providing a grace period where we can
> inspect a "dead" request before it is freed), we can allow callers to
> carefully perform lockless lookup of an active request.
> 
> However, by enabling deferred freeing of requests, we can potentially
> hog a lot of memory when dealing with tens of thousands of requests per
> second - with a quick insertion of a synchronize_rcu() inside our
> shrinker callback, that issue disappears.
> 
> v2: Currently, it is our responsibility to handle reclaim i.e. to avoid
> hogging memory with the delayed slab frees. At the moment, we wait for a
> grace period in the shrinker, and block for all RCU callbacks on oom.
> Suggested alternatives focus on flushing our RCU callback when we have a
> certain number of outstanding request frees, and blocking on that flush
> after a second high watermark. (So rather than wait for the system to
> run out of memory, we stop issuing requests - both are nondeterministic.)
> 
> Paul E. McKenney wrote:
> 
> Another approach is synchronize_rcu() after some largish number of
> requests.  The advantage of this approach is that it throttles the
> production of callbacks at the source.  The corresponding disadvantage
> is that it slows things up.
> 
> Another approach is to use call_rcu(), but if the previous call_rcu()
> is still in flight, block waiting for it.  Yet another approach is
> the get_state_synchronize_rcu() / cond_synchronize_rcu() pair.  The
> idea is to do something like this:
> 
>         cond_synchronize_rcu(cookie);
>         cookie = get_state_synchronize_rcu();
> 
> You would of course do an initial get_state_synchronize_rcu() to
> get things going.  This would not block unless there was less than
> one grace period's worth of time between invocations.  But this
> assumes a busy system, where there is almost always a grace period
> in flight.  But you can make that happen as follows:
> 
>         cond_synchronize_rcu(cookie);
>         cookie = get_state_synchronize_rcu();
>         call_rcu(&my_rcu_head, noop_function);
> 
> Note that you need additional code to make sure that the old callback
> has completed before doing a new one.  Setting and clearing a flag
> with appropriate memory ordering control suffices (e.g,. smp_load_acquire()
> and smp_store_release()).
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
> Cc: "Goel, Akash" <akash.goel@intel.com>
> Cc: Josh Triplett <josh@joshtriplett.org>
> ---
>  drivers/gpu/drm/i915/i915_gem.c          |   7 +-
>  drivers/gpu/drm/i915/i915_gem_request.c  |   2 +-
>  drivers/gpu/drm/i915/i915_gem_request.h  | 114 +++++++++++++++++++++++++++++--
>  drivers/gpu/drm/i915/i915_gem_shrinker.c |  15 ++--
>  4 files changed, 126 insertions(+), 12 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index 54d8a3863d11..0c546f8099d9 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -4421,7 +4421,9 @@ i915_gem_load_init(struct drm_device *dev)
>  	dev_priv->requests =
>  		kmem_cache_create("i915_gem_request",
>  				  sizeof(struct drm_i915_gem_request), 0,
> -				  SLAB_HWCACHE_ALIGN,
> +				  SLAB_HWCACHE_ALIGN |
> +				  SLAB_RECLAIM_ACCOUNT |
> +				  SLAB_DESTROY_BY_RCU,
>  				  NULL);
>  
>  	INIT_LIST_HEAD(&dev_priv->context_list);
> @@ -4457,6 +4459,9 @@ void i915_gem_load_cleanup(struct drm_device *dev)
>  	kmem_cache_destroy(dev_priv->requests);
>  	kmem_cache_destroy(dev_priv->vmas);
>  	kmem_cache_destroy(dev_priv->objects);
> +
> +	/* And ensure that our DESTROY_BY_RCU slabs are truly destroyed */
> +	rcu_barrier();
>  }
>  
>  int i915_gem_freeze_late(struct drm_i915_private *dev_priv)
> diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c
> index 3395c955a532..bcc1369c0693 100644
> --- a/drivers/gpu/drm/i915/i915_gem_request.c
> +++ b/drivers/gpu/drm/i915/i915_gem_request.c
> @@ -190,7 +190,7 @@ static void i915_gem_request_retire(struct drm_i915_gem_request *request)
>  		prefetchw(next);
>  
>  		INIT_LIST_HEAD(&active->link);
> -		active->__request = NULL;
> +		RCU_INIT_POINTER(active->__request, NULL);
>  
>  		active->retire(active, request);
>  	}
> diff --git a/drivers/gpu/drm/i915/i915_gem_request.h b/drivers/gpu/drm/i915/i915_gem_request.h
> index 2eec0cac1e9f..bb03f4440b0f 100644
> --- a/drivers/gpu/drm/i915/i915_gem_request.h
> +++ b/drivers/gpu/drm/i915/i915_gem_request.h
> @@ -183,6 +183,12 @@ i915_gem_request_get(struct drm_i915_gem_request *req)
>  	return to_request(fence_get(&req->fence));
>  }
>  
> +static inline struct drm_i915_gem_request *
> +i915_gem_request_get_rcu(struct drm_i915_gem_request *req)
> +{
> +	return to_request(fence_get_rcu(&req->fence));
> +}
> +
>  static inline void
>  i915_gem_request_put(struct drm_i915_gem_request *req)
>  {
> @@ -286,7 +292,7 @@ typedef void (*i915_gem_retire_fn)(struct i915_gem_active *,
>  				   struct drm_i915_gem_request *);
>  
>  struct i915_gem_active {
> -	struct drm_i915_gem_request *__request;
> +	struct drm_i915_gem_request __rcu *__request;
>  	struct list_head link;
>  	i915_gem_retire_fn retire;
>  };
> @@ -323,13 +329,19 @@ i915_gem_active_set(struct i915_gem_active *active,
>  		    struct drm_i915_gem_request *request)
>  {
>  	list_move(&active->link, &request->active_list);
> -	active->__request = request;
> +	rcu_assign_pointer(active->__request, request);
>  }
>  
>  static inline struct drm_i915_gem_request *
>  __i915_gem_active_peek(const struct i915_gem_active *active)
>  {
> -	return active->__request;
> +	/* Inside the error capture (running with the driver in an unknown
> +	 * state), we want to bend the rules slightly (a lot).
> +	 *
> +	 * Work is in progress to make it safer, in the meantime this keeps
> +	 * the known issue from spamming the logs.
> +	 */
> +	return rcu_dereference_protected(active->__request, 1);
>  }
>  
>  /**
> @@ -345,7 +357,29 @@ i915_gem_active_peek(const struct i915_gem_active *active, struct mutex *mutex)
>  {
>  	struct drm_i915_gem_request *request;
>  
> -	request = active->__request;
> +	request = rcu_dereference_protected(active->__request,
> +					    lockdep_is_held(mutex));
> +	if (!request || i915_gem_request_completed(request))
> +		return NULL;
> +
> +	return request;
> +}
> +
> +/**
> + * i915_gem_active_peek_rcu - report the active request being monitored
> + * @active - the active tracker
> + *
> + * i915_gem_active_peek_rcu() returns the current request being tracked if
> + * still active, or NULL. It does not obtain a reference on the request
> + * for the caller, and inspection of the request is only valid under
> + * the RCU lock.
> + */
> +static inline struct drm_i915_gem_request *
> +i915_gem_active_peek_rcu(const struct i915_gem_active *active)
> +{
> +	struct drm_i915_gem_request *request;
> +
> +	request = rcu_dereference(active->__request);
>  	if (!request || i915_gem_request_completed(request))
>  		return NULL;
>  
> @@ -366,6 +400,72 @@ i915_gem_active_get(const struct i915_gem_active *active, struct mutex *mutex)
>  }
>  
>  /**
> + * i915_gem_active_get_rcu - return a reference to the active request
> + * @active - the active tracker
> + *
> + * i915_gem_active_get() returns a reference to the active request, or NULL
> + * if the active tracker is idle. The caller must hold the RCU read lock.
> + */
> +static inline struct drm_i915_gem_request *
> +i915_gem_active_get_rcu(const struct i915_gem_active *active)
> +{
> +	/* Performing a lockless retrieval of the active request is super
> +	 * tricky. SLAB_DESTROY_BY_RCU merely guarantees that the backing
> +	 * slab of request objects will not be freed whilst we hold the
> +	 * RCU read lock. It does not guarantee that the request itself
> +	 * will not be freed and then *reused*. Viz,
> +	 *
> +	 * Thread A			Thread B
> +	 *
> +	 * req = active.request
> +	 *				retire(req) -> free(req);
> +	 *				(req is now first on the slab freelist)
> +	 *				active.request = NULL
> +	 *
> +	 *				req = new submission on a new object
> +	 * ref(req)
> +	 *
> +	 * To prevent the request from being reused whilst the caller
> +	 * uses it, we take a reference like normal. Whilst acquiring
> +	 * the reference we check that it is not in a destroyed state
> +	 * (refcnt == 0). That prevents the request being reallocated
> +	 * whilst the caller holds on to it. To check that the request
> +	 * was not reallocated as we acquired the reference we have to
> +	 * check that our request remains the active request across
> +	 * the lookup, in the same manner as a seqlock. The visibility
> +	 * of the pointer versus the reference counting is controlled
> +	 * by using RCU barriers (rcu_dereference and rcu_assign_pointer).
> +	 *
> +	 * In the middle of all that, we inspect whether the request is
> +	 * complete. Retiring is lazy so the request may be completed long
> +	 * before the active tracker is updated. Querying whether the
> +	 * request is complete is far cheaper (as it involves no locked
> +	 * instructions setting cachelines to exclusive) than acquiring
> +	 * the reference, so we do it first. The RCU read lock ensures the
> +	 * pointer dereference is valid, but does not ensure that the
> +	 * seqno nor HWS is the right one! However, if the request was
> +	 * reallocated, that means the active tracker's request was complete.
> +	 * If the new request is also complete, then both are and we can
> +	 * just report the active tracker is idle. If the new request is
> +	 * incomplete, then we acquire a reference on it and check that
> +	 * it remained the active request.
> +	 */
> +	do {
> +		struct drm_i915_gem_request *request;
> +
> +		request = rcu_dereference(active->__request);
> +		if (!request || i915_gem_request_completed(request))
> +			return NULL;
> +
> +		request = i915_gem_request_get_rcu(request);

I think we have a race here still: The issue is that the
kref_get_unless_zero is an unordered atomic, and the rcu_dereference is
only an smb_read_barrier_depends, which doesn't prevent the fetch from
happening before the atomic_add_unless.

Well until I opened memory-barriers.txt and learned that atomic_add_unless
is a full smp_mb() on both sides on success. That's a bit too tricky for
my taste, what about the following comment:

		/* When request_get_rcu succeds the underlying
		 * atomic_add_unless has a full smp_mb() on both sides.
		 * This ensures that the rcu_dereference() below can't be
		 * reordered before the the refcounting increase has
		 * happened, which prevents the request from being reused.
		 */

I couldn't poke any other holes into this, and we're reusing the fence rcu
functions where appropriate. With the comment:

Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch>

> +		if (!request || request == rcu_dereference(active->__request))
> +			return request;
> +
> +		i915_gem_request_put(request);
> +	} while (1);
> +}
> +
> +/**
>   * __i915_gem_active_is_busy - report whether the active tracker is assigned
>   * @active - the active tracker
>   *
> @@ -433,7 +533,8 @@ i915_gem_active_retire(struct i915_gem_active *active,
>  	struct drm_i915_gem_request *request;
>  	int ret;
>  
> -	request = active->__request;
> +	request = rcu_dereference_protected(active->__request,
> +					    lockdep_is_held(mutex));
>  	if (!request)
>  		return 0;
>  
> @@ -442,7 +543,8 @@ i915_gem_active_retire(struct i915_gem_active *active,
>  		return ret;
>  
>  	list_del_init(&active->link);
> -	active->__request = NULL;
> +	RCU_INIT_POINTER(active->__request, NULL);
> +
>  	active->retire(active, request);
>  
>  	return 0;
> diff --git a/drivers/gpu/drm/i915/i915_gem_shrinker.c b/drivers/gpu/drm/i915/i915_gem_shrinker.c
> index 33f8dcb9b8c4..a1a805fcdffa 100644
> --- a/drivers/gpu/drm/i915/i915_gem_shrinker.c
> +++ b/drivers/gpu/drm/i915/i915_gem_shrinker.c
> @@ -191,6 +191,8 @@ i915_gem_shrink(struct drm_i915_private *dev_priv,
>  		intel_runtime_pm_put(dev_priv);
>  
>  	i915_gem_retire_requests(dev_priv);
> +	/* expedite the RCU grace period to free some request slabs */
> +	synchronize_rcu_expedited();
>  
>  	return count;
>  }
> @@ -211,10 +213,15 @@ i915_gem_shrink(struct drm_i915_private *dev_priv,
>   */
>  unsigned long i915_gem_shrink_all(struct drm_i915_private *dev_priv)
>  {
> -	return i915_gem_shrink(dev_priv, -1UL,
> -			       I915_SHRINK_BOUND |
> -			       I915_SHRINK_UNBOUND |
> -			       I915_SHRINK_ACTIVE);
> +	unsigned long freed;
> +
> +	freed = i915_gem_shrink(dev_priv, -1UL,
> +				I915_SHRINK_BOUND |
> +				I915_SHRINK_UNBOUND |
> +				I915_SHRINK_ACTIVE);
> +	rcu_barrier(); /* wait until our RCU delayed slab frees are completed */
> +
> +	return freed;
>  }
>  
>  static bool i915_gem_shrinker_lock(struct drm_device *dev, bool *unlock)
> -- 
> 2.8.1
> 
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/intel-gfx

-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 95+ messages in thread

* Re: [PATCH 04/22] drm/i915: Remove request retirement before each batch
  2016-07-28  9:54   ` Daniel Vetter
@ 2016-07-28 10:26     ` Chris Wilson
  2016-07-28 11:52       ` Daniel Vetter
  0 siblings, 1 reply; 95+ messages in thread
From: Chris Wilson @ 2016-07-28 10:26 UTC (permalink / raw)
  To: Daniel Vetter; +Cc: intel-gfx

On Thu, Jul 28, 2016 at 11:54:07AM +0200, Daniel Vetter wrote:
> On Wed, Jul 27, 2016 at 12:14:42PM +0100, Chris Wilson wrote:
> > This reimplements the denial-of-service protection against igt from
> > commit 227f782e4667 ("drm/i915: Retire requests before creating a new
> > one") and transfers the stall from before each batch into get_pages().
> > The issue is that the stall is increasing latency between batches which
> > is detrimental in some cases (especially coupled with execlists) to
> > keeping the GPU well fed. Also we have made the observation that retiring
> > requests can of itself free objects (and requests) and therefore makes
> > a good first step when shrinking.
> > 
> > v2: Recycle objects prior to i915_gem_object_get_pages()
> > v3: Remove the reference to the ring from i915_gem_requests_ring() as it
> > operates on an intel_engine_cs.
> > 
> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > ---
> >  drivers/gpu/drm/i915/i915_drv.h            | 1 -
> >  drivers/gpu/drm/i915/i915_gem.c            | 7 +++++--
> >  drivers/gpu/drm/i915/i915_gem_execbuffer.c | 2 --
> >  drivers/gpu/drm/i915/i915_gem_request.c    | 4 ++--
> >  4 files changed, 7 insertions(+), 7 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> > index fbda38f25c6b..2de3d16f7b80 100644
> > --- a/drivers/gpu/drm/i915/i915_drv.h
> > +++ b/drivers/gpu/drm/i915/i915_drv.h
> > @@ -3169,7 +3169,6 @@ struct drm_i915_gem_request *
> >  i915_gem_find_active_request(struct intel_engine_cs *engine);
> >  
> >  void i915_gem_retire_requests(struct drm_i915_private *dev_priv);
> > -void i915_gem_retire_requests_ring(struct intel_engine_cs *engine);
> >  
> >  static inline u32 i915_reset_counter(struct i915_gpu_error *error)
> >  {
> > diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> > index bf652dc88024..68dbe4f7940c 100644
> > --- a/drivers/gpu/drm/i915/i915_gem.c
> > +++ b/drivers/gpu/drm/i915/i915_gem.c
> > @@ -2244,7 +2244,6 @@ int
> >  i915_gem_object_get_pages(struct drm_i915_gem_object *obj)
> >  {
> >  	struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
> > -	const struct drm_i915_gem_object_ops *ops = obj->ops;
> >  	int ret;
> >  
> >  	if (obj->pages)
> > @@ -2257,7 +2256,10 @@ i915_gem_object_get_pages(struct drm_i915_gem_object *obj)
> >  
> >  	BUG_ON(obj->pages_pin_count);
> >  
> > -	ret = ops->get_pages(obj);
> > +	/* Recycle as many active objects as possible first */
> > +	i915_gem_retire_requests(dev_priv);
> > +
> > +	ret = obj->ops->get_pages(obj);
> 
> Why exactly do we need this?
> - shmem objs already call shrink_all if they can't get at the memory
> - everyone else doesn't care.

Because that is very expensive and we have very poor utilisation of
caches. On average, the affected benchmarks are about 100x slower
without it and demonstrate large variation.

Everyone else isn't allocating or has their own defense.

Otoh, the more aggressive shrinking is quite recent, more recent than
this patch. But I stand by the measurements as they were made that this
is the point at which utilisation mattered, if only to worry about it
later when I need to remove the call.

> Even if we need this in some case it looks funny, since it splits the
> memory cleanup between caller and callee of get_pages.

?
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 95+ messages in thread

* Re: [PATCH 22/22] drm/i915: Export our request as a dma-buf fence on the reservation object
  2016-07-27 11:15 ` [PATCH 22/22] drm/i915: Export our request as a dma-buf fence on the reservation object Chris Wilson
@ 2016-07-28 10:32   ` Daniel Vetter
  2016-07-28 10:40     ` Chris Wilson
  0 siblings, 1 reply; 95+ messages in thread
From: Daniel Vetter @ 2016-07-28 10:32 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

On Wed, Jul 27, 2016 at 12:15:00PM +0100, Chris Wilson wrote:
> If the GEM objects being rendered with in this request have been
> exported via dma-buf to a third party, hook ourselves into the dma-buf
> reservation object so that the third party can serialise with our
> rendering via the dma-buf fences.
> 
> Testcase: igt/prime_busy
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>

Style nit: I prefer ww_mutex_lock(&resv->lock, NULL); over
mutex_lock(&resv->lock.base). The former makes it clear it's a ww mutex,
but we don't bother with the multi-lock dance. The latter needles around
in implemenation details, which it really shouldn't. Please change.

The other wonky bit is that changing reservations on multiple objects
without the full ww mutex dance is deadlock-risky. But only when you both
add and wait/stall on fences.

Right now we only either wait (in the modeset/flip code) or only add (in
execbuf, after this patch) and hence there's no risk. I also think that
with the usual use-case of rendering on one gpu and displaying on the
other that's done in current PRIME (instead of e.g. rendering on one, then
compositing on 2nd, and displaying somewhere else) there's also no
immediate need to add that. At least before we have fixed up our locking
scheme ;-)

With the ww_mutex change: Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch>

> ---
>  drivers/gpu/drm/i915/i915_gem_dmabuf.c     | 56 ++++++++++++++++++++++++++++--
>  drivers/gpu/drm/i915/i915_gem_execbuffer.c | 33 ++++++++++++++++--
>  2 files changed, 83 insertions(+), 6 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_gem_dmabuf.c b/drivers/gpu/drm/i915/i915_gem_dmabuf.c
> index 3a00ab3ad06e..bab71ba9c25a 100644
> --- a/drivers/gpu/drm/i915/i915_gem_dmabuf.c
> +++ b/drivers/gpu/drm/i915/i915_gem_dmabuf.c
> @@ -23,9 +23,13 @@
>   * Authors:
>   *	Dave Airlie <airlied@redhat.com>
>   */
> +
> +#include <linux/dma-buf.h>
> +#include <linux/reservation.h>
> +
>  #include <drm/drmP.h>
> +
>  #include "i915_drv.h"
> -#include <linux/dma-buf.h>
>  
>  static struct drm_i915_gem_object *dma_buf_to_obj(struct dma_buf *buf)
>  {
> @@ -218,25 +222,71 @@ static const struct dma_buf_ops i915_dmabuf_ops =  {
>  	.end_cpu_access = i915_gem_end_cpu_access,
>  };
>  
> +static void export_fences(struct drm_i915_gem_object *obj,
> +			  struct dma_buf *dma_buf)
> +{
> +	struct reservation_object *resv = dma_buf->resv;
> +	struct drm_i915_gem_request *req;
> +	unsigned long active;
> +	int idx;
> +
> +	active = __I915_BO_ACTIVE(obj);
> +	if (!active)
> +		return;
> +
> +	/* Mark the object for future fences before racily adding old fences */
> +	obj->base.dma_buf = dma_buf;
> +
> +	mutex_lock(&resv->lock.base);
> +
> +	for_each_active(active, idx) {
> +		rcu_read_lock();
> +		req = i915_gem_active_get_rcu(&obj->last_read[idx]);
> +		rcu_read_unlock();
> +		if (!req)
> +			continue;
> +
> +		if (reservation_object_reserve_shared(resv) == 0)
> +			reservation_object_add_shared_fence(resv, &req->fence);
> +
> +		i915_gem_request_put(req);
> +	}
> +
> +	rcu_read_lock();
> +	req = i915_gem_active_get_rcu(&obj->last_write);
> +	rcu_read_unlock();
> +	if (req) {
> +		reservation_object_add_excl_fence(resv, &req->fence);
> +		i915_gem_request_put(req);
> +	}
> +
> +	mutex_unlock(&resv->lock.base);
> +}
> +
>  struct dma_buf *i915_gem_prime_export(struct drm_device *dev,
>  				      struct drm_gem_object *gem_obj, int flags)
>  {
>  	struct drm_i915_gem_object *obj = to_intel_bo(gem_obj);
>  	DEFINE_DMA_BUF_EXPORT_INFO(exp_info);
> +	struct dma_buf *dma_buf;
>  
>  	exp_info.ops = &i915_dmabuf_ops;
>  	exp_info.size = gem_obj->size;
>  	exp_info.flags = flags;
>  	exp_info.priv = gem_obj;
>  
> -
>  	if (obj->ops->dmabuf_export) {
>  		int ret = obj->ops->dmabuf_export(obj);
>  		if (ret)
>  			return ERR_PTR(ret);
>  	}
>  
> -	return dma_buf_export(&exp_info);
> +	dma_buf = dma_buf_export(&exp_info);
> +	if (IS_ERR(dma_buf))
> +		return dma_buf;
> +
> +	export_fences(obj, dma_buf);
> +	return dma_buf;
>  }
>  
>  static int i915_gem_object_get_pages_dmabuf(struct drm_i915_gem_object *obj)
> diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> index 0d28703d991a..e2aba40bf328 100644
> --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> @@ -26,13 +26,17 @@
>   *
>   */
>  
> +#include <linux/dma_remapping.h>
> +#include <linux/reservation.h>
> +#include <linux/uaccess.h>
> +
>  #include <drm/drmP.h>
>  #include <drm/i915_drm.h>
> +
>  #include "i915_drv.h"
> +#include "i915_gem_dmabuf.h"
>  #include "i915_trace.h"
>  #include "intel_drv.h"
> -#include <linux/dma_remapping.h>
> -#include <linux/uaccess.h>
>  
>  #define  __EXEC_OBJECT_HAS_PIN		(1<<31)
>  #define  __EXEC_OBJECT_HAS_FENCE	(1<<30)
> @@ -1193,7 +1197,29 @@ void i915_vma_move_to_active(struct i915_vma *vma,
>  	list_move_tail(&vma->vm_link, &vma->vm->active_list);
>  }
>  
> -static void
> +static void eb_export_fence(struct drm_i915_gem_object *obj,
> +			    struct drm_i915_gem_request *req,
> +			    unsigned int flags)
> +{
> +	struct reservation_object *resv;
> +
> +	resv = i915_gem_object_get_dmabuf_resv(obj);
> +	if (!resv)
> +		return;
> +
> +	/* Ignore errors from failing to allocate the new fence, we can't
> +	 * handle an error right now. Worst case should be missed
> +	 * synchronisation leading to rendering corruption.
> +	 */
> +	mutex_lock(&resv->lock.base);
> +	if (flags & EXEC_OBJECT_WRITE)
> +		reservation_object_add_excl_fence(resv, &req->fence);
> +	else if (reservation_object_reserve_shared(resv) == 0)
> +		reservation_object_add_shared_fence(resv, &req->fence);
> +	mutex_unlock(&resv->lock.base);
> +}
> +
> +void
>  i915_gem_execbuffer_move_to_active(struct list_head *vmas,
>  				   struct drm_i915_gem_request *req)
>  {
> @@ -1212,6 +1238,7 @@ i915_gem_execbuffer_move_to_active(struct list_head *vmas,
>  		obj->base.read_domains = obj->base.pending_read_domains;
>  
>  		i915_vma_move_to_active(vma, req, vma->exec_entry->flags);
> +		eb_export_fence(obj, req, vma->exec_entry->flags);
>  		trace_i915_gem_object_change_domain(obj, old_read, old_write);
>  	}
>  }
> -- 
> 2.8.1
> 
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/intel-gfx

-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 95+ messages in thread

* Re: [PATCH 07/22] drm/i915: Pad GTT views of exec objects up to user specified size
  2016-07-28  9:55   ` Daniel Vetter
@ 2016-07-28 10:33     ` Chris Wilson
  0 siblings, 0 replies; 95+ messages in thread
From: Chris Wilson @ 2016-07-28 10:33 UTC (permalink / raw)
  To: Daniel Vetter; +Cc: intel-gfx

On Thu, Jul 28, 2016 at 11:55:21AM +0200, Daniel Vetter wrote:
> On Wed, Jul 27, 2016 at 12:14:45PM +0100, Chris Wilson wrote:
> > Our GPUs impose certain requirements upon buffers that depend upon how
> > exactly they are used. Typically this is expressed as that they require
> > a larger surface than would be naively computed by pitch * height.
> > Normally such requirements are hidden away in the userspace driver, but
> > when we accept pointers from strangers and later impose extra conditions
> > on them, the original client allocator has no idea about the
> > monstrosities in the GPU and we require the userspace driver to inform
> > the kernel how many padding pages are required beyond the client
> > allocation.
> > 
> > v2: Long time, no see
> > v3: Try an anonymous union for uapi struct compatibility
> > 
> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> > Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> 
> This is missing the testcase line. Also short link to the
> libva/opencl/whatever patches would be good too.

Yes, I am waiting on kernel support before merging to the ddx.
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 95+ messages in thread

* Re: [PATCH 15/22] drm/i915: Remove highly confusing i915_gem_obj_ggtt_pin()
  2016-07-27 11:14 ` [PATCH 15/22] drm/i915: Remove highly confusing i915_gem_obj_ggtt_pin() Chris Wilson
@ 2016-07-28 10:38   ` Joonas Lahtinen
  2016-07-28 11:36     ` Chris Wilson
  0 siblings, 1 reply; 95+ messages in thread
From: Joonas Lahtinen @ 2016-07-28 10:38 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx

On ke, 2016-07-27 at 12:14 +0100, Chris Wilson wrote:
> Since i915_gem_obj_ggtt_pin() is an idiom breaking curry function for
> i915_gem_object_ggtt_pin(), spare us the confustion and remove it.

confustion should be combustion or confusion

> @@ -3741,7 +3742,8 @@ i915_gem_object_ggtt_pin(struct
> drm_i915_gem_object *obj,
>         struct i915_vma *vma;
>         int ret;
>  
> -       BUG_ON(!view);
> +       if (!view)
> +               view = &i915_ggtt_view_normal;
> 

Convert other calling sites to take advantage of this if, and also fix
the error handling; "i915_is_ggtt(vm) ? &i915_ggtt_view_normal : NULL"
is used in i915_gem_object_pin and i915_gem_obj_lookup_or_create_vma

The wrappers were introduced to reduce churn, which we're currently not
having any shortage of.

Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>

Regards, Joonas
-- 
Joonas Lahtinen
Open Source Technology Center
Intel Corporation

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 95+ messages in thread

* Re: [PATCH 22/22] drm/i915: Export our request as a dma-buf fence on the reservation object
  2016-07-28 10:32   ` Daniel Vetter
@ 2016-07-28 10:40     ` Chris Wilson
  2016-07-28 11:59       ` Daniel Vetter
  0 siblings, 1 reply; 95+ messages in thread
From: Chris Wilson @ 2016-07-28 10:40 UTC (permalink / raw)
  To: Daniel Vetter; +Cc: intel-gfx

On Thu, Jul 28, 2016 at 12:32:42PM +0200, Daniel Vetter wrote:
> On Wed, Jul 27, 2016 at 12:15:00PM +0100, Chris Wilson wrote:
> > If the GEM objects being rendered with in this request have been
> > exported via dma-buf to a third party, hook ourselves into the dma-buf
> > reservation object so that the third party can serialise with our
> > rendering via the dma-buf fences.
> > 
> > Testcase: igt/prime_busy
> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> 
> Style nit: I prefer ww_mutex_lock(&resv->lock, NULL); over
> mutex_lock(&resv->lock.base). The former makes it clear it's a ww mutex,
> but we don't bother with the multi-lock dance. The latter needles around
> in implemenation details, which it really shouldn't. Please change.

Passing NULL as ww_acquite_ctx is illegal.

> The other wonky bit is that changing reservations on multiple objects
> without the full ww mutex dance is deadlock-risky. But only when you both
> add and wait/stall on fences.

Note that it is only so when trying to lock multiple objects simultaneously,
which we do not require.
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 95+ messages in thread

* Re: [PATCH 17/22] drm/i915: Use atomics to manipulate obj->frontbuffer_bits
  2016-07-28 10:10     ` Chris Wilson
@ 2016-07-28 10:51       ` Joonas Lahtinen
  0 siblings, 0 replies; 95+ messages in thread
From: Joonas Lahtinen @ 2016-07-28 10:51 UTC (permalink / raw)
  To: Chris Wilson; +Cc: Daniel Vetter, intel-gfx

On to, 2016-07-28 at 11:10 +0100, Chris Wilson wrote:
> On Thu, Jul 28, 2016 at 12:49:31PM +0300, Joonas Lahtinen wrote:
> > 
> > On ke, 2016-07-27 at 12:14 +0100, Chris Wilson wrote:
> > > 
> > >  static int i915_gem_object_list_info(struct seq_file *m, void *data)
> > > diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> > > index a24d31e3e014..b6b9a1f78238 100644
> > > --- a/drivers/gpu/drm/i915/i915_drv.h
> > > +++ b/drivers/gpu/drm/i915/i915_drv.h
> > > @@ -2127,8 +2127,6 @@ struct drm_i915_gem_object_ops {
> > >   */
> > >  #define INTEL_MAX_SPRITE_BITS_PER_PIPE 5
> > >  #define INTEL_FRONTBUFFER_BITS_PER_PIPE 8
> > > -#define INTEL_FRONTBUFFER_BITS \
> > > -	(INTEL_FRONTBUFFER_BITS_PER_PIPE * I915_MAX_PIPES)
> > Should we have a BUILD_BUG_ON to make sure we have a fit?
> > 
> > > 
> > > @@ -4549,16 +4549,20 @@ void i915_gem_track_fb(struct drm_i915_gem_object *old,
> > >  		       struct drm_i915_gem_object *new,
> > >  		       unsigned frontbuffer_bits)
> > >  {
> > > +	/* Control of individual bits within the bitfield are guarded by
> > 'bitfield' refers to specific C construct, so not the appropriate term
> > here now that it is removed. In this commit it is readable, but for
> > future I think just confusing.
> When I wrote the comment it was still a bitfield! s/bitfield/mask/
> 
> BUILD_BUG_ON(INTEL_FRONTBUFFER_BITS_PER_PIPE * I915_MAX_PIPES > sizeof(atomic_t) * BITS_PER_BYTE);

Maybe split the line still, but yep.

> -Chris
> 
-- 
Joonas Lahtinen
Open Source Technology Center
Intel Corporation
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 95+ messages in thread

* Re: [PATCH 14/22] drm/i915: Make i915_vma_pin() small and inline
  2016-07-27 11:14 ` [PATCH 14/22] drm/i915: Make i915_vma_pin() small and inline Chris Wilson
@ 2016-07-28 11:06   ` Joonas Lahtinen
  0 siblings, 0 replies; 95+ messages in thread
From: Joonas Lahtinen @ 2016-07-28 11:06 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx

On ke, 2016-07-27 at 12:14 +0100, Chris Wilson wrote:
> +int __i915_vma_do_pin(struct i915_vma *vma,
> +		      u64 size, u64 alignment, u64 flags);
> +static inline int __must_check
> +i915_vma_pin(struct i915_vma *vma, u64 size, u64 alignment, u64 flags)
> +{
> +	BUILD_BUG_ON(PIN_MBZ != I915_VMA_PIN_OVERFLOW);
> +	BUILD_BUG_ON(PIN_GLOBAL != I915_VMA_GLOBAL_BIND);
> +	BUILD_BUG_ON(PIN_USER != I915_VMA_LOCAL_BIND);
> +
> +	/* Pin early to prevent the shrinker/eviction logic from destroying
> +	 * our vma as we insert and bind.
> +	 */
> +	if (likely(((++vma->flags ^ flags) & I915_VMA_BIND_MASK) == 0))
> +		return 0;

GCC can't optimize a separate increment and flags equality test? The
expression tries to be tad too smart (but nothing compared to the
semaphore funcs).

Apart from that, looks good,

Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>

Regards, Joonas
-- 
Joonas Lahtinen
Open Source Technology Center
Intel Corporation
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 95+ messages in thread

* Re: [PATCH 15/22] drm/i915: Remove highly confusing i915_gem_obj_ggtt_pin()
  2016-07-28 10:38   ` Joonas Lahtinen
@ 2016-07-28 11:36     ` Chris Wilson
  2016-07-28 11:53       ` Joonas Lahtinen
  0 siblings, 1 reply; 95+ messages in thread
From: Chris Wilson @ 2016-07-28 11:36 UTC (permalink / raw)
  To: Joonas Lahtinen; +Cc: intel-gfx

On Thu, Jul 28, 2016 at 01:38:14PM +0300, Joonas Lahtinen wrote:
> On ke, 2016-07-27 at 12:14 +0100, Chris Wilson wrote:
> > Since i915_gem_obj_ggtt_pin() is an idiom breaking curry function for
> > i915_gem_object_ggtt_pin(), spare us the confustion and remove it.
> 
> confustion should be combustion or confusion
> 
> > @@ -3741,7 +3742,8 @@ i915_gem_object_ggtt_pin(struct
> > drm_i915_gem_object *obj,
> >         struct i915_vma *vma;
> >         int ret;
> >  
> > -       BUG_ON(!view);
> > +       if (!view)
> > +               view = &i915_ggtt_view_normal;
> > 
> 
> Convert other calling sites to take advantage of this if,

Patches have already been sent to do so, and will be along later as part
of the VMA handling outside of execbuf.

> and also fix the error handling;

Hmm, what have I missed?

> "i915_is_ggtt(vm) ? &i915_ggtt_view_normal : NULL"
> is used in i915_gem_object_pin and i915_gem_obj_lookup_or_create_vma

They are queued for removal.
 
> The wrappers were introduced to reduce churn, which we're currently not
> having any shortage of.

I'm not a big fan of temporary API that sticks around and only made our
mistakes more glaring. :)
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 95+ messages in thread

* Re: [PATCH 04/22] drm/i915: Remove request retirement before each batch
  2016-07-28 10:26     ` Chris Wilson
@ 2016-07-28 11:52       ` Daniel Vetter
  2016-07-28 12:24         ` Chris Wilson
  0 siblings, 1 reply; 95+ messages in thread
From: Daniel Vetter @ 2016-07-28 11:52 UTC (permalink / raw)
  To: Chris Wilson, Daniel Vetter, intel-gfx

On Thu, Jul 28, 2016 at 11:26:48AM +0100, Chris Wilson wrote:
> On Thu, Jul 28, 2016 at 11:54:07AM +0200, Daniel Vetter wrote:
> > On Wed, Jul 27, 2016 at 12:14:42PM +0100, Chris Wilson wrote:
> > > This reimplements the denial-of-service protection against igt from
> > > commit 227f782e4667 ("drm/i915: Retire requests before creating a new
> > > one") and transfers the stall from before each batch into get_pages().
> > > The issue is that the stall is increasing latency between batches which
> > > is detrimental in some cases (especially coupled with execlists) to
> > > keeping the GPU well fed. Also we have made the observation that retiring
> > > requests can of itself free objects (and requests) and therefore makes
> > > a good first step when shrinking.
> > > 
> > > v2: Recycle objects prior to i915_gem_object_get_pages()
> > > v3: Remove the reference to the ring from i915_gem_requests_ring() as it
> > > operates on an intel_engine_cs.
> > > 
> > > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > > ---
> > >  drivers/gpu/drm/i915/i915_drv.h            | 1 -
> > >  drivers/gpu/drm/i915/i915_gem.c            | 7 +++++--
> > >  drivers/gpu/drm/i915/i915_gem_execbuffer.c | 2 --
> > >  drivers/gpu/drm/i915/i915_gem_request.c    | 4 ++--
> > >  4 files changed, 7 insertions(+), 7 deletions(-)
> > > 
> > > diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> > > index fbda38f25c6b..2de3d16f7b80 100644
> > > --- a/drivers/gpu/drm/i915/i915_drv.h
> > > +++ b/drivers/gpu/drm/i915/i915_drv.h
> > > @@ -3169,7 +3169,6 @@ struct drm_i915_gem_request *
> > >  i915_gem_find_active_request(struct intel_engine_cs *engine);
> > >  
> > >  void i915_gem_retire_requests(struct drm_i915_private *dev_priv);
> > > -void i915_gem_retire_requests_ring(struct intel_engine_cs *engine);
> > >  
> > >  static inline u32 i915_reset_counter(struct i915_gpu_error *error)
> > >  {
> > > diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> > > index bf652dc88024..68dbe4f7940c 100644
> > > --- a/drivers/gpu/drm/i915/i915_gem.c
> > > +++ b/drivers/gpu/drm/i915/i915_gem.c
> > > @@ -2244,7 +2244,6 @@ int
> > >  i915_gem_object_get_pages(struct drm_i915_gem_object *obj)
> > >  {
> > >  	struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
> > > -	const struct drm_i915_gem_object_ops *ops = obj->ops;
> > >  	int ret;
> > >  
> > >  	if (obj->pages)
> > > @@ -2257,7 +2256,10 @@ i915_gem_object_get_pages(struct drm_i915_gem_object *obj)
> > >  
> > >  	BUG_ON(obj->pages_pin_count);
> > >  
> > > -	ret = ops->get_pages(obj);
> > > +	/* Recycle as many active objects as possible first */
> > > +	i915_gem_retire_requests(dev_priv);
> > > +
> > > +	ret = obj->ops->get_pages(obj);
> > 
> > Why exactly do we need this?
> > - shmem objs already call shrink_all if they can't get at the memory
> > - everyone else doesn't care.
> 
> Because that is very expensive and we have very poor utilisation of
> caches. On average, the affected benchmarks are about 100x slower
> without it and demonstrate large variation.
> 
> Everyone else isn't allocating or has their own defense.
> 
> Otoh, the more aggressive shrinking is quite recent, more recent than
> this patch. But I stand by the measurements as they were made that this
> is the point at which utilisation mattered, if only to worry about it
> later when I need to remove the call.

Please add those numbers to the commit message, I think without them this
particular change isn't well-justified enough.

> > Even if we need this in some case it looks funny, since it splits the
> > memory cleanup between caller and callee of get_pages.

At least for shmem we now have the retire_requests() outside of the
get_pages vfunc, but the full shrink fallback inside. For OCD reasons I
htink it'd look better to have both inside the callback. Assuming that we
need the retire_requests for shmem objects, and not really for any of the
others.

It just felt like the placement is somewhat ad-hoc.
-Daniel
-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 95+ messages in thread

* Re: [PATCH 15/22] drm/i915: Remove highly confusing i915_gem_obj_ggtt_pin()
  2016-07-28 11:36     ` Chris Wilson
@ 2016-07-28 11:53       ` Joonas Lahtinen
  2016-07-28 16:12         ` Chris Wilson
  0 siblings, 1 reply; 95+ messages in thread
From: Joonas Lahtinen @ 2016-07-28 11:53 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

On to, 2016-07-28 at 12:36 +0100, Chris Wilson wrote:
> On Thu, Jul 28, 2016 at 01:38:14PM +0300, Joonas Lahtinen wrote:
> > 
> > On ke, 2016-07-27 at 12:14 +0100, Chris Wilson wrote:
> > > 
> > > Since i915_gem_obj_ggtt_pin() is an idiom breaking curry function for
> > > i915_gem_object_ggtt_pin(), spare us the confustion and remove it.
> > confustion should be combustion or confusion
> > 
> > > 
> > > @@ -3741,7 +3742,8 @@ i915_gem_object_ggtt_pin(struct
> > > drm_i915_gem_object *obj,
> > >         struct i915_vma *vma;
> > >         int ret;
> > >  
> > > -       BUG_ON(!view);
> > > +       if (!view)
> > > +               view = &i915_ggtt_view_normal;
> > > 
> > Convert other calling sites to take advantage of this if,
> Patches have already been sent to do so, and will be along later as part
> of the VMA handling outside of execbuf.
> 
> > 
> > and also fix the error handling;
> Hmm, what have I missed?
> 
> > 
> > "i915_is_ggtt(vm) ? &i915_ggtt_view_normal : NULL"
> > is used in i915_gem_object_pin and i915_gem_obj_lookup_or_create_vma
> They are queued for removal.
> 

Those two calling sites will rely on the BUG_ON(!view) and pass NULL to
trigger it. So the BUG on should be upper level temporarily to help
bisecting in future.

Regards, Joonas

-- 
Joonas Lahtinen
Open Source Technology Center
Intel Corporation
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 95+ messages in thread

* Re: [PATCH 22/22] drm/i915: Export our request as a dma-buf fence on the reservation object
  2016-07-28 10:40     ` Chris Wilson
@ 2016-07-28 11:59       ` Daniel Vetter
  2016-07-28 12:17         ` Chris Wilson
  0 siblings, 1 reply; 95+ messages in thread
From: Daniel Vetter @ 2016-07-28 11:59 UTC (permalink / raw)
  To: Chris Wilson, Daniel Vetter, intel-gfx

On Thu, Jul 28, 2016 at 11:40:29AM +0100, Chris Wilson wrote:
> On Thu, Jul 28, 2016 at 12:32:42PM +0200, Daniel Vetter wrote:
> > On Wed, Jul 27, 2016 at 12:15:00PM +0100, Chris Wilson wrote:
> > > If the GEM objects being rendered with in this request have been
> > > exported via dma-buf to a third party, hook ourselves into the dma-buf
> > > reservation object so that the third party can serialise with our
> > > rendering via the dma-buf fences.
> > > 
> > > Testcase: igt/prime_busy
> > > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > 
> > Style nit: I prefer ww_mutex_lock(&resv->lock, NULL); over
> > mutex_lock(&resv->lock.base). The former makes it clear it's a ww mutex,
> > but we don't bother with the multi-lock dance. The latter needles around
> > in implemenation details, which it really shouldn't. Please change.
> 
> Passing NULL as ww_acquite_ctx is illegal.

Hm, where exactly do you see that? kerneldoc for ww_mutex_lock clearly
says that it can be NULL, and the static inline has a check for it and
calls mutex_lock in the else path. Which means it /should/ boil down to
the exact same code after gcc has pondered it enough.

> > The other wonky bit is that changing reservations on multiple objects
> > without the full ww mutex dance is deadlock-risky. But only when you both
> > add and wait/stall on fences.
> 
> Note that it is only so when trying to lock multiple objects simultaneously,
> which we do not require.

With deadlocks I didn't mean locking deadlocks, but loops in the fences.
Sure we eventually recover when you have a good gpu driver with hang
check, but other drivers might be less fortunate. And if you lock objects
individually and update their fences individually it's fairly simple to
race command submission (on different drivers) against each another with
objects listed in reverse and end up with a ABBA fence depency. The fix
for that is to:
1. grab all the reservation locks
2. assemeble the full list of fences you need to stall on
3. update the reservation with the new fence for the current rendering job
4. only then release all locks

Without that drama ensues ;-) And the current patch is still pretty far
away from the above sequence (but it does roughly match to our overall
execbuf logic, with s/lock/pin/).
-Daniel
-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 95+ messages in thread

* Re: [PATCH 22/22] drm/i915: Export our request as a dma-buf fence on the reservation object
  2016-07-28 11:59       ` Daniel Vetter
@ 2016-07-28 12:17         ` Chris Wilson
  2016-07-28 12:28           ` Daniel Vetter
  0 siblings, 1 reply; 95+ messages in thread
From: Chris Wilson @ 2016-07-28 12:17 UTC (permalink / raw)
  To: Daniel Vetter; +Cc: intel-gfx

On Thu, Jul 28, 2016 at 01:59:45PM +0200, Daniel Vetter wrote:
> On Thu, Jul 28, 2016 at 11:40:29AM +0100, Chris Wilson wrote:
> > On Thu, Jul 28, 2016 at 12:32:42PM +0200, Daniel Vetter wrote:
> > > On Wed, Jul 27, 2016 at 12:15:00PM +0100, Chris Wilson wrote:
> > > > If the GEM objects being rendered with in this request have been
> > > > exported via dma-buf to a third party, hook ourselves into the dma-buf
> > > > reservation object so that the third party can serialise with our
> > > > rendering via the dma-buf fences.
> > > > 
> > > > Testcase: igt/prime_busy
> > > > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > > 
> > > Style nit: I prefer ww_mutex_lock(&resv->lock, NULL); over
> > > mutex_lock(&resv->lock.base). The former makes it clear it's a ww mutex,
> > > but we don't bother with the multi-lock dance. The latter needles around
> > > in implemenation details, which it really shouldn't. Please change.
> > 
> > Passing NULL as ww_acquite_ctx is illegal.
> 
> Hm, where exactly do you see that? kerneldoc for ww_mutex_lock clearly
> says that it can be NULL, and the static inline has a check for it and
> calls mutex_lock in the else path. Which means it /should/ boil down to
> the exact same code after gcc has pondered it enough.

But then explodes. Look at the lockdep. Clearly the kerneldoc is wrong.
Good job I was reading the code :-p
 
> > > The other wonky bit is that changing reservations on multiple objects
> > > without the full ww mutex dance is deadlock-risky. But only when you both
> > > add and wait/stall on fences.
> > 
> > Note that it is only so when trying to lock multiple objects simultaneously,
> > which we do not require.
> 
> With deadlocks I didn't mean locking deadlocks, but loops in the fences.
> Sure we eventually recover when you have a good gpu driver with hang
> check, but other drivers might be less fortunate. And if you lock objects
> individually and update their fences individually it's fairly simple to
> race command submission (on different drivers) against each another with
> objects listed in reverse and end up with a ABBA fence depency.

The dependency graph has to be acyclic, or else it is unresolvable. To
enforce that here just requires the fences to be collected before the
request is added to the exposed dma-bufs. You don't need to lock all the
incoming fences at once just to add them to the list.

This patch address exposing our request on the dma-buf after all the
fences have been added to collection.
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 95+ messages in thread

* Re: [PATCH 04/22] drm/i915: Remove request retirement before each batch
  2016-07-28 11:52       ` Daniel Vetter
@ 2016-07-28 12:24         ` Chris Wilson
  2016-07-28 14:21           ` Chris Wilson
  0 siblings, 1 reply; 95+ messages in thread
From: Chris Wilson @ 2016-07-28 12:24 UTC (permalink / raw)
  To: Daniel Vetter; +Cc: intel-gfx

On Thu, Jul 28, 2016 at 01:52:49PM +0200, Daniel Vetter wrote:
> On Thu, Jul 28, 2016 at 11:26:48AM +0100, Chris Wilson wrote:
> > On Thu, Jul 28, 2016 at 11:54:07AM +0200, Daniel Vetter wrote:
> > > On Wed, Jul 27, 2016 at 12:14:42PM +0100, Chris Wilson wrote:
> > > > This reimplements the denial-of-service protection against igt from
> > > > commit 227f782e4667 ("drm/i915: Retire requests before creating a new
> > > > one") and transfers the stall from before each batch into get_pages().
> > > > The issue is that the stall is increasing latency between batches which
> > > > is detrimental in some cases (especially coupled with execlists) to
> > > > keeping the GPU well fed. Also we have made the observation that retiring
> > > > requests can of itself free objects (and requests) and therefore makes
> > > > a good first step when shrinking.
> > > > 
> > > > v2: Recycle objects prior to i915_gem_object_get_pages()
> > > > v3: Remove the reference to the ring from i915_gem_requests_ring() as it
> > > > operates on an intel_engine_cs.
> > > > 
> > > > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > > > ---
> > > >  drivers/gpu/drm/i915/i915_drv.h            | 1 -
> > > >  drivers/gpu/drm/i915/i915_gem.c            | 7 +++++--
> > > >  drivers/gpu/drm/i915/i915_gem_execbuffer.c | 2 --
> > > >  drivers/gpu/drm/i915/i915_gem_request.c    | 4 ++--
> > > >  4 files changed, 7 insertions(+), 7 deletions(-)
> > > > 
> > > > diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> > > > index fbda38f25c6b..2de3d16f7b80 100644
> > > > --- a/drivers/gpu/drm/i915/i915_drv.h
> > > > +++ b/drivers/gpu/drm/i915/i915_drv.h
> > > > @@ -3169,7 +3169,6 @@ struct drm_i915_gem_request *
> > > >  i915_gem_find_active_request(struct intel_engine_cs *engine);
> > > >  
> > > >  void i915_gem_retire_requests(struct drm_i915_private *dev_priv);
> > > > -void i915_gem_retire_requests_ring(struct intel_engine_cs *engine);
> > > >  
> > > >  static inline u32 i915_reset_counter(struct i915_gpu_error *error)
> > > >  {
> > > > diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> > > > index bf652dc88024..68dbe4f7940c 100644
> > > > --- a/drivers/gpu/drm/i915/i915_gem.c
> > > > +++ b/drivers/gpu/drm/i915/i915_gem.c
> > > > @@ -2244,7 +2244,6 @@ int
> > > >  i915_gem_object_get_pages(struct drm_i915_gem_object *obj)
> > > >  {
> > > >  	struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
> > > > -	const struct drm_i915_gem_object_ops *ops = obj->ops;
> > > >  	int ret;
> > > >  
> > > >  	if (obj->pages)
> > > > @@ -2257,7 +2256,10 @@ i915_gem_object_get_pages(struct drm_i915_gem_object *obj)
> > > >  
> > > >  	BUG_ON(obj->pages_pin_count);
> > > >  
> > > > -	ret = ops->get_pages(obj);
> > > > +	/* Recycle as many active objects as possible first */
> > > > +	i915_gem_retire_requests(dev_priv);
> > > > +
> > > > +	ret = obj->ops->get_pages(obj);
> > > 
> > > Why exactly do we need this?
> > > - shmem objs already call shrink_all if they can't get at the memory
> > > - everyone else doesn't care.
> > 
> > Because that is very expensive and we have very poor utilisation of
> > caches. On average, the affected benchmarks are about 100x slower
> > without it and demonstrate large variation.
> > 
> > Everyone else isn't allocating or has their own defense.
> > 
> > Otoh, the more aggressive shrinking is quite recent, more recent than
> > this patch. But I stand by the measurements as they were made that this
> > is the point at which utilisation mattered, if only to worry about it
> > later when I need to remove the call.
> 
> Please add those numbers to the commit message, I think without them this
> particular change isn't well-justified enough.

The oom defense itself?

> > > Even if we need this in some case it looks funny, since it splits the
> > > memory cleanup between caller and callee of get_pages.
> 
> At least for shmem we now have the retire_requests() outside of the
> get_pages vfunc, but the full shrink fallback inside. For OCD reasons I
> htink it'd look better to have both inside the callback. Assuming that we
> need the retire_requests for shmem objects, and not really for any of the
> others.

No, that is much worse from the design standpoint imo. It is the common
point where we know we are under memory pressure (because we are paging
in) and we know that the request list is a good source of reusable
pages.
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 95+ messages in thread

* Re: [PATCH 22/22] drm/i915: Export our request as a dma-buf fence on the reservation object
  2016-07-28 12:17         ` Chris Wilson
@ 2016-07-28 12:28           ` Daniel Vetter
  2016-07-28 12:45             ` Chris Wilson
  0 siblings, 1 reply; 95+ messages in thread
From: Daniel Vetter @ 2016-07-28 12:28 UTC (permalink / raw)
  To: Chris Wilson, Daniel Vetter, intel-gfx

On Thu, Jul 28, 2016 at 01:17:39PM +0100, Chris Wilson wrote:
> On Thu, Jul 28, 2016 at 01:59:45PM +0200, Daniel Vetter wrote:
> > On Thu, Jul 28, 2016 at 11:40:29AM +0100, Chris Wilson wrote:
> > > On Thu, Jul 28, 2016 at 12:32:42PM +0200, Daniel Vetter wrote:
> > > > On Wed, Jul 27, 2016 at 12:15:00PM +0100, Chris Wilson wrote:
> > > > > If the GEM objects being rendered with in this request have been
> > > > > exported via dma-buf to a third party, hook ourselves into the dma-buf
> > > > > reservation object so that the third party can serialise with our
> > > > > rendering via the dma-buf fences.
> > > > > 
> > > > > Testcase: igt/prime_busy
> > > > > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > > > 
> > > > Style nit: I prefer ww_mutex_lock(&resv->lock, NULL); over
> > > > mutex_lock(&resv->lock.base). The former makes it clear it's a ww mutex,
> > > > but we don't bother with the multi-lock dance. The latter needles around
> > > > in implemenation details, which it really shouldn't. Please change.
> > > 
> > > Passing NULL as ww_acquite_ctx is illegal.
> > 
> > Hm, where exactly do you see that? kerneldoc for ww_mutex_lock clearly
> > says that it can be NULL, and the static inline has a check for it and
> > calls mutex_lock in the else path. Which means it /should/ boil down to
> > the exact same code after gcc has pondered it enough.
> 
> But then explodes. Look at the lockdep. Clearly the kerneldoc is wrong.
> Good job I was reading the code :-p

Hm, that'd be a bug in the ww_mutex_lock, and we make plenty use uf a NULL
ctx in drm_modeset_lock.c. How exactly does this blow up? Can you attach
the splat please?

> > > > The other wonky bit is that changing reservations on multiple objects
> > > > without the full ww mutex dance is deadlock-risky. But only when you both
> > > > add and wait/stall on fences.
> > > 
> > > Note that it is only so when trying to lock multiple objects simultaneously,
> > > which we do not require.
> > 
> > With deadlocks I didn't mean locking deadlocks, but loops in the fences.
> > Sure we eventually recover when you have a good gpu driver with hang
> > check, but other drivers might be less fortunate. And if you lock objects
> > individually and update their fences individually it's fairly simple to
> > race command submission (on different drivers) against each another with
> > objects listed in reverse and end up with a ABBA fence depency.
> 
> The dependency graph has to be acyclic, or else it is unresolvable. To
> enforce that here just requires the fences to be collected before the
> request is added to the exposed dma-bufs. You don't need to lock all the
> incoming fences at once just to add them to the list.
> 
> This patch address exposing our request on the dma-buf after all the
> fences have been added to collection.

Hm right ... now I wonder how I came up with that notion again.
-Daniel
-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 95+ messages in thread

* Re: [PATCH 22/22] drm/i915: Export our request as a dma-buf fence on the reservation object
  2016-07-28 12:28           ` Daniel Vetter
@ 2016-07-28 12:45             ` Chris Wilson
  2016-07-28 20:14               ` Daniel Vetter
  0 siblings, 1 reply; 95+ messages in thread
From: Chris Wilson @ 2016-07-28 12:45 UTC (permalink / raw)
  To: Daniel Vetter; +Cc: intel-gfx

On Thu, Jul 28, 2016 at 02:28:10PM +0200, Daniel Vetter wrote:
> On Thu, Jul 28, 2016 at 01:17:39PM +0100, Chris Wilson wrote:
> > On Thu, Jul 28, 2016 at 01:59:45PM +0200, Daniel Vetter wrote:
> > > On Thu, Jul 28, 2016 at 11:40:29AM +0100, Chris Wilson wrote:
> > > > On Thu, Jul 28, 2016 at 12:32:42PM +0200, Daniel Vetter wrote:
> > > > > On Wed, Jul 27, 2016 at 12:15:00PM +0100, Chris Wilson wrote:
> > > > > > If the GEM objects being rendered with in this request have been
> > > > > > exported via dma-buf to a third party, hook ourselves into the dma-buf
> > > > > > reservation object so that the third party can serialise with our
> > > > > > rendering via the dma-buf fences.
> > > > > > 
> > > > > > Testcase: igt/prime_busy
> > > > > > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > > > > 
> > > > > Style nit: I prefer ww_mutex_lock(&resv->lock, NULL); over
> > > > > mutex_lock(&resv->lock.base). The former makes it clear it's a ww mutex,
> > > > > but we don't bother with the multi-lock dance. The latter needles around
> > > > > in implemenation details, which it really shouldn't. Please change.
> > > > 
> > > > Passing NULL as ww_acquite_ctx is illegal.
> > > 
> > > Hm, where exactly do you see that? kerneldoc for ww_mutex_lock clearly
> > > says that it can be NULL, and the static inline has a check for it and
> > > calls mutex_lock in the else path. Which means it /should/ boil down to
> > > the exact same code after gcc has pondered it enough.
> > 
> > But then explodes. Look at the lockdep. Clearly the kerneldoc is wrong.
> > Good job I was reading the code :-p
> 
> Hm, that'd be a bug in the ww_mutex_lock, and we make plenty use uf a NULL
> ctx in drm_modeset_lock.c. How exactly does this blow up? Can you attach
> the splat please?

&ctx->dep_map with a NULL pointer != 0 which blows up when it gets
deferenced inside lockdep, __lockdep_acquire():
        if (nest_lock && !__lock_is_held(nest_lock))
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 95+ messages in thread

* Re: [PATCH 04/22] drm/i915: Remove request retirement before each batch
  2016-07-28 12:24         ` Chris Wilson
@ 2016-07-28 14:21           ` Chris Wilson
  0 siblings, 0 replies; 95+ messages in thread
From: Chris Wilson @ 2016-07-28 14:21 UTC (permalink / raw)
  To: Daniel Vetter, intel-gfx

On Thu, Jul 28, 2016 at 01:24:47PM +0100, Chris Wilson wrote:
> On Thu, Jul 28, 2016 at 01:52:49PM +0200, Daniel Vetter wrote:
> > Please add those numbers to the commit message, I think without them this
> > particular change isn't well-justified enough.
> 
> The oom defense itself?

In order to exhibit the bad behaviour that prompted v2, I have to
disable the request_alloc() retirement. That is after

commit 9b5f4e5ed6fd58390ecad3772b80936357f1aba6
Author: Chris Wilson <chris@chris-wilson.co.uk>
Date:   Wed Jul 20 09:21:09 2016 +0100

    drm/i915: Retire oldest completed request before allocating next

this is no longer appears to be required. And since this is only
considering requests, the situation where retiring all rings is better
than retiring the active ring is slim. I will have to see if I can
generate a hog on one engine that causes memory pressure to adversely
affect other clients (beyond reasonable expectations!).
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 95+ messages in thread

* Re: [PATCH 15/22] drm/i915: Remove highly confusing i915_gem_obj_ggtt_pin()
  2016-07-28 11:53       ` Joonas Lahtinen
@ 2016-07-28 16:12         ` Chris Wilson
  2016-07-29  9:10           ` Joonas Lahtinen
  0 siblings, 1 reply; 95+ messages in thread
From: Chris Wilson @ 2016-07-28 16:12 UTC (permalink / raw)
  To: Joonas Lahtinen; +Cc: intel-gfx

On Thu, Jul 28, 2016 at 02:53:03PM +0300, Joonas Lahtinen wrote:
> On to, 2016-07-28 at 12:36 +0100, Chris Wilson wrote:
> > On Thu, Jul 28, 2016 at 01:38:14PM +0300, Joonas Lahtinen wrote:
> > > 
> > > On ke, 2016-07-27 at 12:14 +0100, Chris Wilson wrote:
> > > > 
> > > > Since i915_gem_obj_ggtt_pin() is an idiom breaking curry function for
> > > > i915_gem_object_ggtt_pin(), spare us the confustion and remove it.
> > > confustion should be combustion or confusion
> > > 
> > > > 
> > > > @@ -3741,7 +3742,8 @@ i915_gem_object_ggtt_pin(struct
> > > > drm_i915_gem_object *obj,
> > > >         struct i915_vma *vma;
> > > >         int ret;
> > > >  
> > > > -       BUG_ON(!view);
> > > > +       if (!view)
> > > > +               view = &i915_ggtt_view_normal;
> > > > 
> > > Convert other calling sites to take advantage of this if,
> > Patches have already been sent to do so, and will be along later as part
> > of the VMA handling outside of execbuf.
> > 
> > > 
> > > and also fix the error handling;
> > Hmm, what have I missed?
> > 
> > > 
> > > "i915_is_ggtt(vm) ? &i915_ggtt_view_normal : NULL"
> > > is used in i915_gem_object_pin and i915_gem_obj_lookup_or_create_vma
> > They are queued for removal.
> > 
> 
> Those two calling sites will rely on the BUG_ON(!view) and pass NULL to
> trigger it. So the BUG on should be upper level temporarily to help
> bisecting in future.

I'm still lost.

i915_gem_obj_lookup_or_create_ggtt_vma(), view is always !NULL (single
caller of i915_gem_object_ggtt_pin).
i915_gem_obj_lookup_or_create_vma(), vm could be NULL (though neither of
the two callers do pass NULL), but there isn't a single uppper layer to
insert a BUG.

i915_gem_object_pin() doesn't exist, i915_gem_object_ggtt_pin() is the
only remaining similar function. (The other pin is i915_vma_pin.) vm
there is implied to be &dev_priv->ggtt, and the view is either supplied
by the caller or set to the default normal view.

Do you want i915_gem_obj_lookup_or_create_ggtt_vma() to inherit the
BUG_ON(!view) removed from i915_gem_object_ggtt_pin() by this patch?
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 95+ messages in thread

* Re: [PATCH 22/22] drm/i915: Export our request as a dma-buf fence on the reservation object
  2016-07-28 12:45             ` Chris Wilson
@ 2016-07-28 20:14               ` Daniel Vetter
  2016-07-28 21:08                 ` Chris Wilson
  0 siblings, 1 reply; 95+ messages in thread
From: Daniel Vetter @ 2016-07-28 20:14 UTC (permalink / raw)
  To: Chris Wilson, Daniel Vetter, intel-gfx

On Thu, Jul 28, 2016 at 01:45:44PM +0100, Chris Wilson wrote:
> On Thu, Jul 28, 2016 at 02:28:10PM +0200, Daniel Vetter wrote:
> > On Thu, Jul 28, 2016 at 01:17:39PM +0100, Chris Wilson wrote:
> > > On Thu, Jul 28, 2016 at 01:59:45PM +0200, Daniel Vetter wrote:
> > > > On Thu, Jul 28, 2016 at 11:40:29AM +0100, Chris Wilson wrote:
> > > > > On Thu, Jul 28, 2016 at 12:32:42PM +0200, Daniel Vetter wrote:
> > > > > > On Wed, Jul 27, 2016 at 12:15:00PM +0100, Chris Wilson wrote:
> > > > > > > If the GEM objects being rendered with in this request have been
> > > > > > > exported via dma-buf to a third party, hook ourselves into the dma-buf
> > > > > > > reservation object so that the third party can serialise with our
> > > > > > > rendering via the dma-buf fences.
> > > > > > > 
> > > > > > > Testcase: igt/prime_busy
> > > > > > > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > > > > > 
> > > > > > Style nit: I prefer ww_mutex_lock(&resv->lock, NULL); over
> > > > > > mutex_lock(&resv->lock.base). The former makes it clear it's a ww mutex,
> > > > > > but we don't bother with the multi-lock dance. The latter needles around
> > > > > > in implemenation details, which it really shouldn't. Please change.
> > > > > 
> > > > > Passing NULL as ww_acquite_ctx is illegal.
> > > > 
> > > > Hm, where exactly do you see that? kerneldoc for ww_mutex_lock clearly
> > > > says that it can be NULL, and the static inline has a check for it and
> > > > calls mutex_lock in the else path. Which means it /should/ boil down to
> > > > the exact same code after gcc has pondered it enough.
> > > 
> > > But then explodes. Look at the lockdep. Clearly the kerneldoc is wrong.
> > > Good job I was reading the code :-p
> > 
> > Hm, that'd be a bug in the ww_mutex_lock, and we make plenty use uf a NULL
> > ctx in drm_modeset_lock.c. How exactly does this blow up? Can you attach
> > the splat please?
> 
> &ctx->dep_map with a NULL pointer != 0 which blows up when it gets
> deferenced inside lockdep, __lockdep_acquire():
>         if (nest_lock && !__lock_is_held(nest_lock))

In my current tree I have:

void __sched ww_mutex_unlock(struct ww_mutex *lock)
{
	/*
	 * The unlocking fastpath is the 0->1 transition from 'locked'
	 * into 'unlocked' state:
	 */
	if (lock->ctx) {
#ifdef CONFIG_DEBUG_MUTEXES
		DEBUG_LOCKS_WARN_ON(!lock->ctx->acquired);
#endif
		if (lock->ctx->acquired > 0)
			lock->ctx->acquired--;
		lock->ctx = NULL;
	}

#ifndef CONFIG_DEBUG_MUTEXES
	/*
	 * When debugging is enabled we must not clear the owner before time,
	 * the slow path will always be taken, and that clears the owner field
	 * after verifying that it was indeed current.
	 */
	mutex_clear_owner(&lock->base);
#endif
	__mutex_fastpath_unlock(&lock->base.count, __mutex_unlock_slowpath);
}

and

static inline int ww_mutex_lock(struct ww_mutex *lock, struct ww_acquire_ctx *ctx)
{
	if (ctx)
		return __ww_mutex_lock(lock, ctx);

	mutex_lock(&lock->base);
	return 0;
}

I really don't see where we can blow up on NULL ctx when using
ww_mutex_lock/unlock. And if you look at some of the get* drm ioctls, you
can chase a drm_modeset_lock(obj, NULL) down to exactly such a
ww_mutex_lock(lock, NULL) call, and evidently X doesn't crash.

In short I still can't find how you managed to blow up on the nest_lock
being NULL anywhere, at least in ww_mutex code.
-Daniel
-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 95+ messages in thread

* Re: [PATCH 21/22] drm/i915: Enable lockless lookup of request tracking via RCU
  2016-07-28 10:23   ` Daniel Vetter
@ 2016-07-28 20:49     ` Chris Wilson
  2016-07-29  8:41       ` Daniel Vetter
  0 siblings, 1 reply; 95+ messages in thread
From: Chris Wilson @ 2016-07-28 20:49 UTC (permalink / raw)
  To: Daniel Vetter; +Cc: intel-gfx, Goel, Akash, Josh Triplett

On Thu, Jul 28, 2016 at 12:23:40PM +0200, Daniel Vetter wrote:
> I think we have a race here still: The issue is that the
> kref_get_unless_zero is an unordered atomic, and the rcu_dereference is
> only an smb_read_barrier_depends, which doesn't prevent the fetch from
> happening before the atomic_add_unless.
> 
> Well until I opened memory-barriers.txt and learned that atomic_add_unless
> is a full smp_mb() on both sides on success. That's a bit too tricky for
> my taste, what about the following comment:
> 
> 		/* When request_get_rcu succeds the underlying
> 		 * atomic_add_unless has a full smp_mb() on both sides.
> 		 * This ensures that the rcu_dereference() below can't be
> 		 * reordered before the the refcounting increase has
> 		 * happened, which prevents the request from being reused.
> 		 */
> 
> I couldn't poke any other holes into this, and we're reusing the fence rcu
> functions where appropriate. With the comment:


                /* What stops the following rcu_dereference() from occuring
                 * before the above i915_gem_request_get_rcu()? If we were
                 * to read the value before pausing to get the reference to
                 * the request, we may not notice a change in the active
                 * tracker.
                 *
                 * The rcu_dereference() is a mere read barrier, which means
                 * that operations after it will appear after, neither the
                 * CPU nor the compiler will bring them forwards. However,
                 * that does not restrict the rcu_dereference() itself. The
                 * read may be performed earlier by an out-of-order CPU, or
		 * adventurous compiler.
                 *
                 * The atomic operation at the heart of
                 * i915_gem_request_get_rcu(), see fence_get_rcu(), is
                 * atomic_inc_not_zero() which is only a full memory barrier
                 * when succesful. That is, if i915_gem_request_get_rcu()
                 * returns the request (and so with the reference counted
                 * incremented) then the following read for rcu_dereference()
                 * must occur after the atomic operation and so confirm
                 * that this request is the one currently being tracked.
                 */

-- 
Chris Wilson, Intel Open Source Technology Centre
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 95+ messages in thread

* Re: [PATCH 22/22] drm/i915: Export our request as a dma-buf fence on the reservation object
  2016-07-28 20:14               ` Daniel Vetter
@ 2016-07-28 21:08                 ` Chris Wilson
  0 siblings, 0 replies; 95+ messages in thread
From: Chris Wilson @ 2016-07-28 21:08 UTC (permalink / raw)
  To: Daniel Vetter; +Cc: intel-gfx

On Thu, Jul 28, 2016 at 10:14:36PM +0200, Daniel Vetter wrote:
> On Thu, Jul 28, 2016 at 01:45:44PM +0100, Chris Wilson wrote:
> > On Thu, Jul 28, 2016 at 02:28:10PM +0200, Daniel Vetter wrote:
> > > On Thu, Jul 28, 2016 at 01:17:39PM +0100, Chris Wilson wrote:
> > > > On Thu, Jul 28, 2016 at 01:59:45PM +0200, Daniel Vetter wrote:
> > > > > On Thu, Jul 28, 2016 at 11:40:29AM +0100, Chris Wilson wrote:
> > > > > > On Thu, Jul 28, 2016 at 12:32:42PM +0200, Daniel Vetter wrote:
> > > > > > > On Wed, Jul 27, 2016 at 12:15:00PM +0100, Chris Wilson wrote:
> > > > > > > > If the GEM objects being rendered with in this request have been
> > > > > > > > exported via dma-buf to a third party, hook ourselves into the dma-buf
> > > > > > > > reservation object so that the third party can serialise with our
> > > > > > > > rendering via the dma-buf fences.
> > > > > > > > 
> > > > > > > > Testcase: igt/prime_busy
> > > > > > > > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > > > > > > 
> > > > > > > Style nit: I prefer ww_mutex_lock(&resv->lock, NULL); over
> > > > > > > mutex_lock(&resv->lock.base). The former makes it clear it's a ww mutex,
> > > > > > > but we don't bother with the multi-lock dance. The latter needles around
> > > > > > > in implemenation details, which it really shouldn't. Please change.
> > > > > > 
> > > > > > Passing NULL as ww_acquite_ctx is illegal.
> > > > > 
> > > > > Hm, where exactly do you see that? kerneldoc for ww_mutex_lock clearly
> > > > > says that it can be NULL, and the static inline has a check for it and
> > > > > calls mutex_lock in the else path. Which means it /should/ boil down to
> > > > > the exact same code after gcc has pondered it enough.
> > > > 
> > > > But then explodes. Look at the lockdep. Clearly the kerneldoc is wrong.
> > > > Good job I was reading the code :-p
> > > 
> > > Hm, that'd be a bug in the ww_mutex_lock, and we make plenty use uf a NULL
> > > ctx in drm_modeset_lock.c. How exactly does this blow up? Can you attach
> > > the splat please?
> > 
> > &ctx->dep_map with a NULL pointer != 0 which blows up when it gets
> > deferenced inside lockdep, __lockdep_acquire():
> >         if (nest_lock && !__lock_is_held(nest_lock))
> 
> In my current tree I have:
> 
> void __sched ww_mutex_unlock(struct ww_mutex *lock)
> {
> 	/*
> 	 * The unlocking fastpath is the 0->1 transition from 'locked'
> 	 * into 'unlocked' state:
> 	 */
> 	if (lock->ctx) {
> #ifdef CONFIG_DEBUG_MUTEXES
> 		DEBUG_LOCKS_WARN_ON(!lock->ctx->acquired);
> #endif
> 		if (lock->ctx->acquired > 0)
> 			lock->ctx->acquired--;
> 		lock->ctx = NULL;
> 	}
> 
> #ifndef CONFIG_DEBUG_MUTEXES
> 	/*
> 	 * When debugging is enabled we must not clear the owner before time,
> 	 * the slow path will always be taken, and that clears the owner field
> 	 * after verifying that it was indeed current.
> 	 */
> 	mutex_clear_owner(&lock->base);
> #endif
> 	__mutex_fastpath_unlock(&lock->base.count, __mutex_unlock_slowpath);
> }
> 
> and
> 
> static inline int ww_mutex_lock(struct ww_mutex *lock, struct ww_acquire_ctx *ctx)
> {
> 	if (ctx)
> 		return __ww_mutex_lock(lock, ctx);
> 
> 	mutex_lock(&lock->base);
> 	return 0;
> }
> 
> I really don't see where we can blow up on NULL ctx when using
> ww_mutex_lock/unlock. And if you look at some of the get* drm ioctls, you
> can chase a drm_modeset_lock(obj, NULL) down to exactly such a
> ww_mutex_lock(lock, NULL) call, and evidently X doesn't crash.
> 
> In short I still can't find how you managed to blow up on the nest_lock
> being NULL anywhere, at least in ww_mutex code.

When I first tried ww_mutex_lock, I got an oops. However, I've just
tried with ww_mutex_lock(NULL) with KASAN and lockdep/RCU, and it
worked.

Pebkac.
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 95+ messages in thread

* Re: [PATCH 01/22] drm/i915: Combine loops within i915_gem_evict_something
  2016-07-27 11:14 ` [PATCH 01/22] drm/i915: Combine loops within i915_gem_evict_something Chris Wilson
@ 2016-07-29  6:17   ` Joonas Lahtinen
  2016-07-29  6:31     ` Chris Wilson
  0 siblings, 1 reply; 95+ messages in thread
From: Joonas Lahtinen @ 2016-07-29  6:17 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx

On ke, 2016-07-27 at 12:14 +0100, Chris Wilson wrote:
> --- a/drivers/gpu/drm/i915/i915_gem_evict.c
> +++ b/drivers/gpu/drm/i915/i915_gem_evict.c
> @@ -34,6 +34,19 @@
>  #include "i915_trace.h"
>  
>  static bool
> +gpu_is_idle(struct drm_i915_private *dev_priv)
> +{
> +	struct intel_engine_cs *engine;
> +
> +	for_each_engine(engine, dev_priv) {
> +		if (!list_empty(&engine->request_list))
> +			return false;
> +	}

Braces are not necessary here.
 
>  	/*
>  	 * The goal is to evict objects and amalgamate space in LRU order.
>  	 * The oldest idle objects reside on the inactive list, which is in
> -	 * retirement order. The next objects to retire are those on the (per
> -	 * ring) active list that do not have an outstanding flush. Once the
> -	 * hardware reports completion (the seqno is updated after the
> -	 * batchbuffer has been finished) the clean buffer objects would
> -	 * be retired to the inactive list. Any dirty objects would be added
> -	 * to the tail of the flushing list. So after processing the clean
> -	 * active objects we need to emit a MI_FLUSH to retire the flushing
> -	 * list, hence the retirement order of the flushing list is in
> -	 * advance of the dirty objects on the active lists.
> +	 * retirement order. The next objects to retire are those in flight,
> +	 * on the active list, again in retirement order.
>  	 *
>  	 * The retirement sequence is thus:
>  	 *   1. Inactive objects (already retired)
> -	 *   2. Clean active objects
> -	 *   3. Flushing list
> -	 *   4. Dirty active objects.
> +	 *   2. Active objects (will stall on unbinding)

Not quite sure how good a sequence list is for two phases :)
 
>  found:
>  	/* drm_mm doesn't allow any other other operations while
> -	 * scanning, therefore store to be evicted objects on a
> -	 * temporary list. */
> -	INIT_LIST_HEAD(&eviction_list);
> -	while (!list_empty(&unwind_list)) {
> -		vma = list_first_entry(&unwind_list,
> -				       struct i915_vma,
> -				       exec_list);
> -		if (drm_mm_scan_remove_block(&vma->node)) {
> +	 * scanning, therefore store to-be-evicted objects on a
> +	 * temporary list and take a reference for all before
> +	 * calling unbind (which may remove the active reference
> +	 * of any of our objects, thus corrupting the list).
> +	 */
> +	list_for_each_entry_safe(vma, next, &eviction_list, exec_list) {

s/exec_list/exec_link/ at some point in future.

> +		if (drm_mm_scan_remove_block(&vma->node))
>  			vma->pin_count++;
> -			list_move(&vma->exec_list, &eviction_list);
> -			continue;
> -		}
> -		list_del_init(&vma->exec_list);
> +		else
> +			list_del_init(&vma->exec_list);

Current behaviour is not changed, but gotta ask why no putting back to
to the list vma originated from?

Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>

Regards, Joonas
-- 
Joonas Lahtinen
Open Source Technology Center
Intel Corporation
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 95+ messages in thread

* Re: [PATCH 01/22] drm/i915: Combine loops within i915_gem_evict_something
  2016-07-29  6:17   ` Joonas Lahtinen
@ 2016-07-29  6:31     ` Chris Wilson
  0 siblings, 0 replies; 95+ messages in thread
From: Chris Wilson @ 2016-07-29  6:31 UTC (permalink / raw)
  To: Joonas Lahtinen; +Cc: intel-gfx

On Fri, Jul 29, 2016 at 09:17:00AM +0300, Joonas Lahtinen wrote:
> On ke, 2016-07-27 at 12:14 +0100, Chris Wilson wrote:
> > --- a/drivers/gpu/drm/i915/i915_gem_evict.c
> > +++ b/drivers/gpu/drm/i915/i915_gem_evict.c
> > @@ -34,6 +34,19 @@
> >  #include "i915_trace.h"
> >  
> >  static bool
> > +gpu_is_idle(struct drm_i915_private *dev_priv)
> > +{
> > +	struct intel_engine_cs *engine;
> > +
> > +	for_each_engine(engine, dev_priv) {
> > +		if (!list_empty(&engine->request_list))
> > +			return false;
> > +	}
> 
> Braces are not necessary here.
>  
> >  	/*
> >  	 * The goal is to evict objects and amalgamate space in LRU order.
> >  	 * The oldest idle objects reside on the inactive list, which is in
> > -	 * retirement order. The next objects to retire are those on the (per
> > -	 * ring) active list that do not have an outstanding flush. Once the
> > -	 * hardware reports completion (the seqno is updated after the
> > -	 * batchbuffer has been finished) the clean buffer objects would
> > -	 * be retired to the inactive list. Any dirty objects would be added
> > -	 * to the tail of the flushing list. So after processing the clean
> > -	 * active objects we need to emit a MI_FLUSH to retire the flushing
> > -	 * list, hence the retirement order of the flushing list is in
> > -	 * advance of the dirty objects on the active lists.
> > +	 * retirement order. The next objects to retire are those in flight,
> > +	 * on the active list, again in retirement order.
> >  	 *
> >  	 * The retirement sequence is thus:
> >  	 *   1. Inactive objects (already retired)
> > -	 *   2. Clean active objects
> > -	 *   3. Flushing list
> > -	 *   4. Dirty active objects.
> > +	 *   2. Active objects (will stall on unbinding)
> 
> Not quite sure how good a sequence list is for two phases :)
>  
> >  found:
> >  	/* drm_mm doesn't allow any other other operations while
> > -	 * scanning, therefore store to be evicted objects on a
> > -	 * temporary list. */
> > -	INIT_LIST_HEAD(&eviction_list);
> > -	while (!list_empty(&unwind_list)) {
> > -		vma = list_first_entry(&unwind_list,
> > -				       struct i915_vma,
> > -				       exec_list);
> > -		if (drm_mm_scan_remove_block(&vma->node)) {
> > +	 * scanning, therefore store to-be-evicted objects on a
> > +	 * temporary list and take a reference for all before
> > +	 * calling unbind (which may remove the active reference
> > +	 * of any of our objects, thus corrupting the list).
> > +	 */
> > +	list_for_each_entry_safe(vma, next, &eviction_list, exec_list) {
> 
> s/exec_list/exec_link/ at some point in future.

Look ahead, it becomes evict_link.

> > +		if (drm_mm_scan_remove_block(&vma->node))
> >  			vma->pin_count++;
> > -			list_move(&vma->exec_list, &eviction_list);
> > -			continue;
> > -		}
> > -		list_del_init(&vma->exec_list);
> > +		else
> > +			list_del_init(&vma->exec_list);
> 
> Current behaviour is not changed, but gotta ask why no putting back to
> to the list vma originated from?

It's not moved from the vma lists. exec_list is a slot reserved for use
in two particular non-current temporary lists (exec and evict). In the
nearish future, I propose we stop using exec_list as the unique
identifier for an execobject and have separate exec_link/evict_link so
we can keep the lists concurrently.

Trying to avoid allocating more temporary storage inside execbuf is a
pain. But using vma as temporary storage for execbuf has to die because
of the need to allow concurrency.
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 95+ messages in thread

* Re: [PATCH 10/22] drm/i915: Record allocated vma size
  2016-07-27 11:14 ` [PATCH 10/22] drm/i915: Record allocated vma size Chris Wilson
@ 2016-07-29  6:53   ` Joonas Lahtinen
  2016-07-29  7:18     ` Chris Wilson
  2016-07-29 10:19     ` [PATCH] drm/i915: Convert 4096 alignment request to 0 for drm_mm allocations Chris Wilson
  0 siblings, 2 replies; 95+ messages in thread
From: Joonas Lahtinen @ 2016-07-29  6:53 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx

On ke, 2016-07-27 at 12:14 +0100, Chris Wilson wrote:
> -uint32_t
> -i915_gem_get_gtt_size(struct drm_device *dev, uint32_t size, int tiling_mode);
> -uint32_t
> -i915_gem_get_gtt_alignment(struct drm_device *dev, uint32_t size,
> -			    int tiling_mode, bool fenced);
> +uint64_t

u64 for consistency with code elsewhere. Applies to all the type
changes.

>  	start = flags & PIN_OFFSET_BIAS ? flags & PIN_OFFSET_MASK : 0;
> -	end = vm->total;
> +
> +	end = vma->vm->total;

While touching, I might change the end to vm_end or so...

>  	if (flags & PIN_MAPPABLE)
>  		end = min_t(u64, end, dev_priv->ggtt.mappable_end);
>  	if (flags & PIN_ZONE_4G)
> @@ -3030,8 +3018,7 @@ i915_gem_object_insert_into_vm(struct drm_i915_gem_object *obj,
>  	 * attempt to find space.
>  	 */
>  	if (size > end) {
> -		DRM_DEBUG("Attempting to bind an object (view type=%u) larger than the aperture: request=%llu [object=%zd] > %s aperture=%llu\n",
> -			  ggtt_view ? ggtt_view->type : 0,
> +		DRM_DEBUG("Attempting to bind an object larger than the aperture: request=%llu [object=%zd] > %s aperture=%llu\n",

No view type no more?

>  		vma->node.start = offset;
>  		vma->node.size = size;
>  		vma->node.color = obj->cache_level;
> -		ret = drm_mm_reserve_node(&vm->mm, &vma->node);
> +		ret = drm_mm_reserve_node(&vma->vm->mm, &vma->node);

Not sure if dropping the vm alias makes things look any better, unless
you intend to create i915_vma_reserve_mem() or so?

> @@ -3077,37 +3060,39 @@ i915_gem_object_insert_into_vm(struct drm_i915_gem_object *obj,
>  			alloc_flag = DRM_MM_CREATE_DEFAULT;
>  		}
>  
> +		if (alignment <= 4096)
> +			alignment = 0; /* for efficient drm_mm searching */
> +

This is obviously not related and should be mentioned in the commit message or split.

>  void __iomem *i915_vma_pin_iomap(struct i915_vma *vma)
>  {
>  	void __iomem *ptr;
> diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h
> index a79015bf7261..5c3a093b83c1 100644
> --- a/drivers/gpu/drm/i915/i915_gem_gtt.h
> +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h
> @@ -180,6 +180,7 @@ struct i915_vma {
>  	struct drm_i915_gem_object *obj;
>  	struct i915_address_space *vm;
>  	void __iomem *iomap;
> +	u64 size;

I'm certain I had addition of this member in the list time ago. Good
that we're moving in the right direction with the vma stuff.

Regards, Joonas
-- 
Joonas Lahtinen
Open Source Technology Center
Intel Corporation
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 95+ messages in thread

* Re: [PATCH 11/22] drm/i915: Wrap vma->pin_count accessors with small inline helpers
  2016-07-27 11:14 ` [PATCH 11/22] drm/i915: Wrap vma->pin_count accessors with small inline helpers Chris Wilson
@ 2016-07-29  6:59   ` Joonas Lahtinen
  2016-07-29  7:23     ` Chris Wilson
  0 siblings, 1 reply; 95+ messages in thread
From: Joonas Lahtinen @ 2016-07-29  6:59 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx

On ke, 2016-07-27 at 12:14 +0100, Chris Wilson wrote:
> @@ -3810,10 +3810,11 @@ i915_gem_object_ggtt_unpin_view(struct drm_i915_gem_object *obj,
>  {
>  	struct i915_vma *vma = i915_gem_obj_to_ggtt_view(obj, view);
>  
> -	WARN_ON(vma->pin_count == 0);
> +	GEM_BUG_ON(!vma);
> +	WARN_ON(i915_vma_is_pinned(vma));

Shouldn't this be !i915_vma_is_pinned() ?

Otherwise a fine mechanical change, with that fixed;

Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>

Regards, Joonas
-- 
Joonas Lahtinen
Open Source Technology Center
Intel Corporation
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 95+ messages in thread

* Re: [PATCH 10/22] drm/i915: Record allocated vma size
  2016-07-29  6:53   ` Joonas Lahtinen
@ 2016-07-29  7:18     ` Chris Wilson
  2016-07-29 10:19     ` [PATCH] drm/i915: Convert 4096 alignment request to 0 for drm_mm allocations Chris Wilson
  1 sibling, 0 replies; 95+ messages in thread
From: Chris Wilson @ 2016-07-29  7:18 UTC (permalink / raw)
  To: Joonas Lahtinen; +Cc: intel-gfx

On Fri, Jul 29, 2016 at 09:53:11AM +0300, Joonas Lahtinen wrote:
> On ke, 2016-07-27 at 12:14 +0100, Chris Wilson wrote:
> > -uint32_t
> > -i915_gem_get_gtt_size(struct drm_device *dev, uint32_t size, int tiling_mode);
> > -uint32_t
> > -i915_gem_get_gtt_alignment(struct drm_device *dev, uint32_t size,
> > -			    int tiling_mode, bool fenced);
> > +uint64_t
> 
> u64 for consistency with code elsewhere. Applies to all the type
> changes.
> 
> >  	start = flags & PIN_OFFSET_BIAS ? flags & PIN_OFFSET_MASK : 0;
> > -	end = vm->total;
> > +
> > +	end = vma->vm->total;
> 
> While touching, I might change the end to vm_end or so...

I wouldn't. It's not derived from the address space but our request.

> >  	if (flags & PIN_MAPPABLE)
> >  		end = min_t(u64, end, dev_priv->ggtt.mappable_end);
> >  	if (flags & PIN_ZONE_4G)
> > @@ -3030,8 +3018,7 @@ i915_gem_object_insert_into_vm(struct drm_i915_gem_object *obj,
> >  	 * attempt to find space.
> >  	 */
> >  	if (size > end) {
> > -		DRM_DEBUG("Attempting to bind an object (view type=%u) larger than the aperture: request=%llu [object=%zd] > %s aperture=%llu\n",
> > -			  ggtt_view ? ggtt_view->type : 0,
> > +		DRM_DEBUG("Attempting to bind an object larger than the aperture: request=%llu [object=%zd] > %s aperture=%llu\n",
> 
> No view type no more?

There will be no view type here anymore. It is less important than the
request flags, but this is a user debug message ideally the information
presented here would closely relate to the user entry point.

> >  		vma->node.start = offset;
> >  		vma->node.size = size;
> >  		vma->node.color = obj->cache_level;
> > -		ret = drm_mm_reserve_node(&vm->mm, &vma->node);
> > +		ret = drm_mm_reserve_node(&vma->vm->mm, &vma->node);
> 
> Not sure if dropping the vm alias makes things look any better, unless
> you intend to create i915_vma_reserve_mem() or so?

We do. I'm not fond of having unnecessary offscreen locals, and here we
can see clearly how the mm relates to the vma which makes it easier to
compare this callsite to similar code.
 
> > @@ -3077,37 +3060,39 @@ i915_gem_object_insert_into_vm(struct drm_i915_gem_object *obj,
> >  			alloc_flag = DRM_MM_CREATE_DEFAULT;
> >  		}
> >  
> > +		if (alignment <= 4096)
> > +			alignment = 0; /* for efficient drm_mm searching */
> > +
> 
> This is obviously not related and should be mentioned in the commit message or split.

I had wondered where that had buried itself. It is self-evident, right?
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 95+ messages in thread

* Re: [PATCH 11/22] drm/i915: Wrap vma->pin_count accessors with small inline helpers
  2016-07-29  6:59   ` Joonas Lahtinen
@ 2016-07-29  7:23     ` Chris Wilson
  0 siblings, 0 replies; 95+ messages in thread
From: Chris Wilson @ 2016-07-29  7:23 UTC (permalink / raw)
  To: Joonas Lahtinen; +Cc: intel-gfx

On Fri, Jul 29, 2016 at 09:59:31AM +0300, Joonas Lahtinen wrote:
> On ke, 2016-07-27 at 12:14 +0100, Chris Wilson wrote:
> > @@ -3810,10 +3810,11 @@ i915_gem_object_ggtt_unpin_view(struct drm_i915_gem_object *obj,
> >  {
> >  	struct i915_vma *vma = i915_gem_obj_to_ggtt_view(obj, view);
> >  
> > -	WARN_ON(vma->pin_count == 0);
> > +	GEM_BUG_ON(!vma);
> > +	WARN_ON(i915_vma_is_pinned(vma));
> 
> Shouldn't this be !i915_vma_is_pinned() ?

The joy of inventing steps afterwards. The WARN gets removed in a couple
of patches time which explains why it was never encountered.
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 95+ messages in thread

* Re: [PATCH 13/22] drm/i915: Combine all i915_vma bitfields into a single set of flags
  2016-07-27 11:14 ` [PATCH 13/22] drm/i915: Combine all i915_vma bitfields into a single set of flags Chris Wilson
@ 2016-07-29  7:30   ` Joonas Lahtinen
  2016-07-29  7:44     ` Chris Wilson
  0 siblings, 1 reply; 95+ messages in thread
From: Joonas Lahtinen @ 2016-07-29  7:30 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx

On ke, 2016-07-27 at 12:14 +0100, Chris Wilson wrote:
> @@ -2979,7 +2980,7 @@ i915_vma_insert(struct i915_vma *vma, u64 size, u64 alignment, u64 flags)
>  	u64 min_alignment;
>  	int ret;
>  
> -	GEM_BUG_ON(vma->bound);
> +	GEM_BUG_ON(vma->flags & (I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND));

How bout i915_vma_is_bound?

>  	/* Pin early to prevent the shrinker/eviction logic from destroying
> @@ -3712,7 +3714,7 @@ i915_vma_pin(struct i915_vma *vma, u64 size, u64 alignment, u64 flags)
>  	 */
>  	__i915_vma_pin(vma);
>  
> -	if (!bound) {
> +	if ((bound & (I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND)) == 0) {

In this case especially !(bound & ...) looks far more readable. Again,
I'm against flip-flopping between styles, but I understand these are
old patches, so we can unify stuff at the end of churn.

> @@ -3682,8 +3682,8 @@ void __iomem *i915_vma_pin_iomap(struct i915_vma *vma)
>  	if (WARN_ON(!vma->obj->map_and_fenceable))
>  		return IO_ERR_PTR(-ENODEV);
>  
> -	GEM_BUG_ON(!vma->is_ggtt);
> -	GEM_BUG_ON((vma->bound & GLOBAL_BIND) == 0);
> +	GEM_BUG_ON(!i915_vma_is_ggtt(vma));
> +	GEM_BUG_ON((vma->flags & I915_VMA_GLOBAL_BIND) == 0);

Again !(vma->flags & ) is more readable.

But GEM_BUG_ON(!i915_vma_is_bound(vma)) would again be possible.

Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>

Regards, Joonas
-- 
Joonas Lahtinen
Open Source Technology Center
Intel Corporation
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 95+ messages in thread

* Re: [PATCH 19/22] drm/i915: Move obj->active:5 to obj->flags
  2016-07-27 11:14 ` [PATCH 19/22] drm/i915: Move obj->active:5 to obj->flags Chris Wilson
@ 2016-07-29  7:40   ` Joonas Lahtinen
  2016-07-29  8:04     ` Chris Wilson
  0 siblings, 1 reply; 95+ messages in thread
From: Joonas Lahtinen @ 2016-07-29  7:40 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx

On ke, 2016-07-27 at 12:14 +0100, Chris Wilson wrote:
> +static inline void
> +i915_gem_object_set_active(struct drm_i915_gem_object *obj, int engine)
> +{
> +	obj->flags |= 1 << (engine + I915_BO_ACTIVE_SHIFT);

BIT(engine) << I915_BO_ACTIVE_SHIFT would be more readable to my taste,
but I guess it's debatable.

>  /*
>   * Optimised SGL iterator for GEM objects
>   */
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index bc5bc5ccdde0..ca9741525bf4 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -1354,7 +1354,7 @@ i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj,
>  
>  	if (!readonly) {
>  		active = obj->last_read;
> -		active_mask = obj->active;
> +		active_mask = i915_gem_object_is_active(obj);

_is_active() does not really fit to be assigned to _mask. maybe have
object_active_mask() and then

_is_idle/inactive/whatever() { return !object_active_mask() }

Because the negation is used lot more.

> @@ -993,7 +993,7 @@ static int
>  i915_gem_execbuffer_move_to_gpu(struct drm_i915_gem_request *req,
>  				struct list_head *vmas)
>  {
> -	const unsigned other_rings = ~intel_engine_flag(req->engine);
> +	const unsigned int other_rings = (~intel_engine_flag(req->engine) & I915_BO_ACTIVE_MASK) << I915_BO_ACTIVE_SHIFT;

Horribly long line, is this intermediary?

Regards, Joonas
-- 
Joonas Lahtinen
Open Source Technology Center
Intel Corporation
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 95+ messages in thread

* Re: [PATCH 13/22] drm/i915: Combine all i915_vma bitfields into a single set of flags
  2016-07-29  7:30   ` Joonas Lahtinen
@ 2016-07-29  7:44     ` Chris Wilson
  0 siblings, 0 replies; 95+ messages in thread
From: Chris Wilson @ 2016-07-29  7:44 UTC (permalink / raw)
  To: Joonas Lahtinen; +Cc: intel-gfx

On Fri, Jul 29, 2016 at 10:30:26AM +0300, Joonas Lahtinen wrote:
> On ke, 2016-07-27 at 12:14 +0100, Chris Wilson wrote:
> > @@ -2979,7 +2980,7 @@ i915_vma_insert(struct i915_vma *vma, u64 size, u64 alignment, u64 flags)
> >  	u64 min_alignment;
> >  	int ret;
> >  
> > -	GEM_BUG_ON(vma->bound);
> > +	GEM_BUG_ON(vma->flags & (I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND));
> 
> How bout i915_vma_is_bound?
> 
> >  	/* Pin early to prevent the shrinker/eviction logic from destroying
> > @@ -3712,7 +3714,7 @@ i915_vma_pin(struct i915_vma *vma, u64 size, u64 alignment, u64 flags)
> >  	 */
> >  	__i915_vma_pin(vma);
> >  
> > -	if (!bound) {
> > +	if ((bound & (I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND)) == 0) {
> 
> In this case especially !(bound & ...) looks far more readable. Again,
> I'm against flip-flopping between styles, but I understand these are
> old patches, so we can unify stuff at the end of churn.

Here I intentionally used GLOBAL | LOCAL for two reasons: it looks more
like the existing use inside i915_vma_bind() and the contrast is very
important for the next patch where we add a fake BIND bit.

> > @@ -3682,8 +3682,8 @@ void __iomem *i915_vma_pin_iomap(struct i915_vma *vma)
> >  	if (WARN_ON(!vma->obj->map_and_fenceable))
> >  		return IO_ERR_PTR(-ENODEV);
> >  
> > -	GEM_BUG_ON(!vma->is_ggtt);
> > -	GEM_BUG_ON((vma->bound & GLOBAL_BIND) == 0);
> > +	GEM_BUG_ON(!i915_vma_is_ggtt(vma));
> > +	GEM_BUG_ON((vma->flags & I915_VMA_GLOBAL_BIND) == 0);
> 
> Again !(vma->flags & ) is more readable.

I disagree. I find the ! lost before the brackets and == matches the
pattern for checking bits. So I generally prefer (value & mask) == result.
 
> But GEM_BUG_ON(!i915_vma_is_bound(vma)) would again be possible.

GEM_BUG_ON(!i915_vma_is_bound_to_global(vma)) here though.
GEM_BUG_ON(!i915_vma_any_bound(vma, I915_VMA_GLOBAL_BIND))
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 95+ messages in thread

* Re: [PATCH 07/22] drm/i915: Pad GTT views of exec objects up to user specified size
  2016-07-27 11:14 ` [PATCH 07/22] drm/i915: Pad GTT views of exec objects up to user specified size Chris Wilson
  2016-07-28  9:55   ` Daniel Vetter
@ 2016-07-29  7:59   ` Joonas Lahtinen
  2016-07-29  8:08     ` Chris Wilson
  1 sibling, 1 reply; 95+ messages in thread
From: Joonas Lahtinen @ 2016-07-29  7:59 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx

On ke, 2016-07-27 at 12:14 +0100, Chris Wilson wrote:
> --- a/include/uapi/drm/i915_drm.h
> +++ b/include/uapi/drm/i915_drm.h
> @@ -727,11 +727,15 @@ struct drm_i915_gem_exec_object2 {
>  #define EXEC_OBJECT_WRITE		 (1<<2)
>  #define EXEC_OBJECT_SUPPORTS_48B_ADDRESS (1<<3)
>  #define EXEC_OBJECT_PINNED		 (1<<4)
> +#define EXEC_OBJECT_PAD_TO_SIZE		 (1<<5)
>  /* All remaining bits are MBZ and RESERVED FOR FUTURE USE */
> -#define __EXEC_OBJECT_UNKNOWN_FLAGS	(-(EXEC_OBJECT_PINNED<<1))
> +#define __EXEC_OBJECT_UNKNOWN_FLAGS -(EXEC_OBJECT_PAD_TO_SIZE<<1)

Do keep the () around, why not? With that fixed,

Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>

Regards, Joonas
-- 
Joonas Lahtinen
Open Source Technology Center
Intel Corporation
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 95+ messages in thread

* Re: [PATCH 19/22] drm/i915: Move obj->active:5 to obj->flags
  2016-07-29  7:40   ` Joonas Lahtinen
@ 2016-07-29  8:04     ` Chris Wilson
  2016-07-29  8:10       ` Chris Wilson
  0 siblings, 1 reply; 95+ messages in thread
From: Chris Wilson @ 2016-07-29  8:04 UTC (permalink / raw)
  To: Joonas Lahtinen; +Cc: intel-gfx

On Fri, Jul 29, 2016 at 10:40:09AM +0300, Joonas Lahtinen wrote:
> On ke, 2016-07-27 at 12:14 +0100, Chris Wilson wrote:
> > +static inline void
> > +i915_gem_object_set_active(struct drm_i915_gem_object *obj, int engine)
> > +{
> > +	obj->flags |= 1 << (engine + I915_BO_ACTIVE_SHIFT);
> 
> BIT(engine) << I915_BO_ACTIVE_SHIFT would be more readable to my taste,
> but I guess it's debatable.

I didn't change this to be BIT(engine + I915_BO_ACTIVE_SHIFT) because of
i915_gem_object_is_active() not following the pattern.

> >  	if (!readonly) {
> >  		active = obj->last_read;
> > -		active_mask = obj->active;
> > +		active_mask = i915_gem_object_is_active(obj);
> 
> _is_active() does not really fit to be assigned to _mask. maybe have
> object_active_mask() and then
> 
> _is_idle/inactive/whatever() { return !object_active_mask() }
> 
> Because the negation is used lot more.

10 i915_gem_object_is_active(), 1 !i915_gem_object_is_active(). Of which
4 use the mask and the rest as a boolean.

I'm still liking i915_gem_object_is_active() over
	i915_gem_object_active
	i915_gem_object_active_mask
	i915_gem_object_has_active

> > @@ -993,7 +993,7 @@ static int
> >  i915_gem_execbuffer_move_to_gpu(struct drm_i915_gem_request *req,
> >  				struct list_head *vmas)
> >  {
> > -	const unsigned other_rings = ~intel_engine_flag(req->engine);
> > +	const unsigned int other_rings = (~intel_engine_flag(req->engine) & I915_BO_ACTIVE_MASK) << I915_BO_ACTIVE_SHIFT;
> 
> Horribly long line, is this intermediary?

No. Sadly not, requires

eb_other_engines() /* the rings were an allusion to something that will break later */
{
	unsigned int mask;

	mask = ~intel_engine_flag(req->engine) & I915_BO_ACTIVE_MASK;
	mask <<= I915_BO_ACTIVE_SHIFT;

	return mask;
}

to get a reasonable split.
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 95+ messages in thread

* Re: [PATCH 07/22] drm/i915: Pad GTT views of exec objects up to user specified size
  2016-07-29  7:59   ` Joonas Lahtinen
@ 2016-07-29  8:08     ` Chris Wilson
  2016-07-29  8:55       ` Joonas Lahtinen
  0 siblings, 1 reply; 95+ messages in thread
From: Chris Wilson @ 2016-07-29  8:08 UTC (permalink / raw)
  To: Joonas Lahtinen; +Cc: intel-gfx

On Fri, Jul 29, 2016 at 10:59:26AM +0300, Joonas Lahtinen wrote:
> On ke, 2016-07-27 at 12:14 +0100, Chris Wilson wrote:
> > --- a/include/uapi/drm/i915_drm.h
> > +++ b/include/uapi/drm/i915_drm.h
> > @@ -727,11 +727,15 @@ struct drm_i915_gem_exec_object2 {
> >  #define EXEC_OBJECT_WRITE		 (1<<2)
> >  #define EXEC_OBJECT_SUPPORTS_48B_ADDRESS (1<<3)
> >  #define EXEC_OBJECT_PINNED		 (1<<4)
> > +#define EXEC_OBJECT_PAD_TO_SIZE		 (1<<5)
> >  /* All remaining bits are MBZ and RESERVED FOR FUTURE USE */
> > -#define __EXEC_OBJECT_UNKNOWN_FLAGS	(-(EXEC_OBJECT_PINNED<<1))
> > +#define __EXEC_OBJECT_UNKNOWN_FLAGS -(EXEC_OBJECT_PAD_TO_SIZE<<1)
> 
> Do keep the () around, why not? With that fixed,

Why not? Just lost in rebasing. There's no need for the extra (), why
were they added?
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 95+ messages in thread

* Re: [PATCH 19/22] drm/i915: Move obj->active:5 to obj->flags
  2016-07-29  8:04     ` Chris Wilson
@ 2016-07-29  8:10       ` Chris Wilson
  2016-07-29  9:34         ` Joonas Lahtinen
  0 siblings, 1 reply; 95+ messages in thread
From: Chris Wilson @ 2016-07-29  8:10 UTC (permalink / raw)
  To: Joonas Lahtinen, intel-gfx

On Fri, Jul 29, 2016 at 09:04:48AM +0100, Chris Wilson wrote:
> On Fri, Jul 29, 2016 at 10:40:09AM +0300, Joonas Lahtinen wrote:
> > _is_active() does not really fit to be assigned to _mask. maybe have
> > object_active_mask() and then
> > 
> > _is_idle/inactive/whatever() { return !object_active_mask() }
> > 
> > Because the negation is used lot more.
> 
> 10 i915_gem_object_is_active(), 1 !i915_gem_object_is_active(). Of which
> 4 use the mask and the rest as a boolean.

Plus another 5 using the READ_ONCE() variant who only look at the active
mask.
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 95+ messages in thread

* Re: [PATCH 12/22] drm/i915: Start passing around i915_vma from execbuffer
  2016-07-27 11:14 ` [PATCH 12/22] drm/i915: Start passing around i915_vma from execbuffer Chris Wilson
@ 2016-07-29  8:23   ` Joonas Lahtinen
  2016-08-01  7:34     ` Chris Wilson
  0 siblings, 1 reply; 95+ messages in thread
From: Joonas Lahtinen @ 2016-07-29  8:23 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx; +Cc: Mika Kuoppala

On ke, 2016-07-27 at 12:14 +0100, Chris Wilson wrote:

> +	if (i915_vma_misplaced(vma, size, alignment, flags)) {
> +		if (flags & PIN_NONBLOCK &&
> +		    (i915_vma_is_pinned(vma) || i915_vma_is_active(vma)))
> +			return -ENOSPC;

Why ENOSPC when active? Would not EAGAIN be more appropriate?
 
>  void
> diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> index 9ea99e181997..a0759fe613f8 100644
> --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> @@ -45,11 +45,10 @@
>  struct i915_execbuffer_params {
>  	struct drm_device               *dev;
>  	struct drm_file                 *file;
> -	u32				 dispatch_flags;
> -	u32				 args_batch_start_offset;
> -	u32				 batch_obj_vm_offset;
> +	struct i915_vma			*batch;
> +	u32				dispatch_flags;
> +	u32				args_batch_start_offset;

Remove the superfluous indent altogether.

>  	struct intel_engine_cs          *engine;
> -	struct drm_i915_gem_object      *batch_obj;
>  	struct i915_gem_context         *ctx;
>  	struct drm_i915_gem_request     *request;
>  };
> @@ -102,6 +101,26 @@ eb_reset(struct eb_vmas *eb)
>  		memset(eb->buckets, 0, (eb->and+1)*sizeof(struct hlist_head));
>  }
>  
> +static struct i915_vma *
> +eb_get_batch(struct eb_vmas *eb)
> +{

Could have migrated the comment here;

/* The batch is always the LAST item in the VMA list */ 

Other than that,

Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>

Regards, Joonas
-- 
Joonas Lahtinen
Open Source Technology Center
Intel Corporation
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 95+ messages in thread

* Re: [PATCH 16/22] drm/i915: Make fb_tracking.lock a spinlock
  2016-07-28 10:02   ` Daniel Vetter
  2016-07-28 10:08     ` Daniel Vetter
@ 2016-07-29  8:25     ` Chris Wilson
  1 sibling, 0 replies; 95+ messages in thread
From: Chris Wilson @ 2016-07-29  8:25 UTC (permalink / raw)
  To: Daniel Vetter; +Cc: intel-gfx

On Thu, Jul 28, 2016 at 12:02:01PM +0200, Daniel Vetter wrote:
> On Wed, Jul 27, 2016 at 12:14:54PM +0100, Chris Wilson wrote:
> > We only need a very lightweight mechanism here as the locking is only
> > used for co-ordinating a bitfield.
> > 
> > v2: Move the cheap unlikely tests into the caller
> > 
> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> 
> I think the code shuffling in here badly breaks the kerneldoc. Best fix
> would be to extract a small header for frontbuffer tracking and pull that
> into the kernel doc. Much less preferred is to explicitly pull in the
> kerneldoc function-by-function (but that tends to be rather fragile when
> someone adds something new).

You mean

/**
 * DOC: Frontbuffer tracking overview
 *
 * ...
 */

in i915_drv.h before the functions?
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 95+ messages in thread

* Re: [PATCH 21/22] drm/i915: Enable lockless lookup of request tracking via RCU
  2016-07-28 20:49     ` Chris Wilson
@ 2016-07-29  8:41       ` Daniel Vetter
  2016-07-29  8:49         ` Chris Wilson
  0 siblings, 1 reply; 95+ messages in thread
From: Daniel Vetter @ 2016-07-29  8:41 UTC (permalink / raw)
  To: Chris Wilson, Daniel Vetter, intel-gfx, Goel, Akash, Josh Triplett

On Thu, Jul 28, 2016 at 09:49:58PM +0100, Chris Wilson wrote:
> On Thu, Jul 28, 2016 at 12:23:40PM +0200, Daniel Vetter wrote:
> > I think we have a race here still: The issue is that the
> > kref_get_unless_zero is an unordered atomic, and the rcu_dereference is
> > only an smb_read_barrier_depends, which doesn't prevent the fetch from
> > happening before the atomic_add_unless.
> > 
> > Well until I opened memory-barriers.txt and learned that atomic_add_unless
> > is a full smp_mb() on both sides on success. That's a bit too tricky for
> > my taste, what about the following comment:
> > 
> > 		/* When request_get_rcu succeds the underlying
> > 		 * atomic_add_unless has a full smp_mb() on both sides.
> > 		 * This ensures that the rcu_dereference() below can't be
> > 		 * reordered before the the refcounting increase has
> > 		 * happened, which prevents the request from being reused.
> > 		 */
> > 
> > I couldn't poke any other holes into this, and we're reusing the fence rcu
> > functions where appropriate. With the comment:
> 

I guess it doesn't hurt to make this really, really clear. Perfect! Well
almost, one nit:

> 
>                 /* What stops the following rcu_dereference() from occuring
>                  * before the above i915_gem_request_get_rcu()? If we were
>                  * to read the value before pausing to get the reference to
>                  * the request, we may not notice a change in the active
>                  * tracker.
>                  *
>                  * The rcu_dereference() is a mere read barrier, which means

s/read barrier/barrier of depending reads/, rcu_dereference is not even a
full rmb!

>                  * that operations after it will appear after, neither the

hence also: s/operations/any operations through the read pointer/

Aside: I'm always impressive how alpha managed to mispredict dependent
reads somehow ... "sorry my magic 8ball had a glitch"!?
-Daniel

>                  * CPU nor the compiler will bring them forwards. However,
>                  * that does not restrict the rcu_dereference() itself. The
>                  * read may be performed earlier by an out-of-order CPU, or
> 		 * adventurous compiler.
>                  *
>                  * The atomic operation at the heart of
>                  * i915_gem_request_get_rcu(), see fence_get_rcu(), is
>                  * atomic_inc_not_zero() which is only a full memory barrier
>                  * when succesful. That is, if i915_gem_request_get_rcu()
>                  * returns the request (and so with the reference counted
>                  * incremented) then the following read for rcu_dereference()
>                  * must occur after the atomic operation and so confirm
>                  * that this request is the one currently being tracked.
>                  */
-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 95+ messages in thread

* Re: [PATCH 21/22] drm/i915: Enable lockless lookup of request tracking via RCU
  2016-07-29  8:41       ` Daniel Vetter
@ 2016-07-29  8:49         ` Chris Wilson
  2016-07-29  9:43           ` Chris Wilson
  0 siblings, 1 reply; 95+ messages in thread
From: Chris Wilson @ 2016-07-29  8:49 UTC (permalink / raw)
  To: Daniel Vetter; +Cc: intel-gfx, Goel, Akash, Josh Triplett

On Fri, Jul 29, 2016 at 10:41:14AM +0200, Daniel Vetter wrote:
> On Thu, Jul 28, 2016 at 09:49:58PM +0100, Chris Wilson wrote:
> > On Thu, Jul 28, 2016 at 12:23:40PM +0200, Daniel Vetter wrote:
> > > I think we have a race here still: The issue is that the
> > > kref_get_unless_zero is an unordered atomic, and the rcu_dereference is
> > > only an smb_read_barrier_depends, which doesn't prevent the fetch from
> > > happening before the atomic_add_unless.
> > > 
> > > Well until I opened memory-barriers.txt and learned that atomic_add_unless
> > > is a full smp_mb() on both sides on success. That's a bit too tricky for
> > > my taste, what about the following comment:
> > > 
> > > 		/* When request_get_rcu succeds the underlying
> > > 		 * atomic_add_unless has a full smp_mb() on both sides.
> > > 		 * This ensures that the rcu_dereference() below can't be
> > > 		 * reordered before the the refcounting increase has
> > > 		 * happened, which prevents the request from being reused.
> > > 		 */
> > > 
> > > I couldn't poke any other holes into this, and we're reusing the fence rcu
> > > functions where appropriate. With the comment:
> > 
> 
> I guess it doesn't hurt to make this really, really clear. Perfect! Well
> almost, one nit:
> 
> > 
> >                 /* What stops the following rcu_dereference() from occuring
> >                  * before the above i915_gem_request_get_rcu()? If we were
> >                  * to read the value before pausing to get the reference to
> >                  * the request, we may not notice a change in the active
> >                  * tracker.
> >                  *
> >                  * The rcu_dereference() is a mere read barrier, which means
> 
> s/read barrier/barrier of depending reads/, rcu_dereference is not even a
> full rmb!
> 
> >                  * that operations after it will appear after, neither the
> 
> hence also: s/operations/any operations through the read pointer/

Ah right, that needs to be dependent reads. Changes look good.
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 95+ messages in thread

* Re: [PATCH 07/22] drm/i915: Pad GTT views of exec objects up to user specified size
  2016-07-29  8:08     ` Chris Wilson
@ 2016-07-29  8:55       ` Joonas Lahtinen
  0 siblings, 0 replies; 95+ messages in thread
From: Joonas Lahtinen @ 2016-07-29  8:55 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

On pe, 2016-07-29 at 09:08 +0100, Chris Wilson wrote:
> On Fri, Jul 29, 2016 at 10:59:26AM +0300, Joonas Lahtinen wrote:
> > 
> > On ke, 2016-07-27 at 12:14 +0100, Chris Wilson wrote:
> > > 
> > > --- a/include/uapi/drm/i915_drm.h
> > > +++ b/include/uapi/drm/i915_drm.h
> > > @@ -727,11 +727,15 @@ struct drm_i915_gem_exec_object2 {
> > >  #define EXEC_OBJECT_WRITE		 (1<<2)
> > >  #define EXEC_OBJECT_SUPPORTS_48B_ADDRESS (1<<3)
> > >  #define EXEC_OBJECT_PINNED		 (1<<4)
> > > +#define EXEC_OBJECT_PAD_TO_SIZE		 (1<<5)
> > >  /* All remaining bits are MBZ and RESERVED FOR FUTURE USE */
> > > -#define __EXEC_OBJECT_UNKNOWN_FLAGS	(-(EXEC_OBJECT_PINNED<<1))
> > > +#define __EXEC_OBJECT_UNKNOWN_FLAGS -(EXEC_OBJECT_PAD_TO_SIZE<<1)
> > Do keep the () around, why not? With that fixed,
> Why not? Just lost in rebasing. There's no need for the extra (), why
> were they added?

CodingStyle; "macros defining constants using expressions must enclose
the expression in parentheses."

Regards, Joonas

> -Chris
> 
-- 
Joonas Lahtinen
Open Source Technology Center
Intel Corporation
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 95+ messages in thread

* Re: [PATCH 15/22] drm/i915: Remove highly confusing i915_gem_obj_ggtt_pin()
  2016-07-28 16:12         ` Chris Wilson
@ 2016-07-29  9:10           ` Joonas Lahtinen
  0 siblings, 0 replies; 95+ messages in thread
From: Joonas Lahtinen @ 2016-07-29  9:10 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

On to, 2016-07-28 at 17:12 +0100, Chris Wilson wrote:
> Do you want i915_gem_obj_lookup_or_create_ggtt_vma() to inherit the
> BUG_ON(!view) removed from i915_gem_object_ggtt_pin() by this patch?

Yes, as intermediary step.

Regards, Joonas

> -Chris
> 
-- 
Joonas Lahtinen
Open Source Technology Center
Intel Corporation
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 95+ messages in thread

* Re: [PATCH 19/22] drm/i915: Move obj->active:5 to obj->flags
  2016-07-29  8:10       ` Chris Wilson
@ 2016-07-29  9:34         ` Joonas Lahtinen
  0 siblings, 0 replies; 95+ messages in thread
From: Joonas Lahtinen @ 2016-07-29  9:34 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx

On pe, 2016-07-29 at 09:10 +0100, Chris Wilson wrote:
> On Fri, Jul 29, 2016 at 09:04:48AM +0100, Chris Wilson wrote:
> > 
> > On Fri, Jul 29, 2016 at 10:40:09AM +0300, Joonas Lahtinen wrote:
> > > 
> > > _is_active() does not really fit to be assigned to _mask. maybe have
> > > object_active_mask() and then
> > > 
> > > _is_idle/inactive/whatever() { return !object_active_mask() }
> > > 
> > > Because the negation is used lot more.
> > 10 i915_gem_object_is_active(), 1 !i915_gem_object_is_active(). Of which
> > 4 use the mask and the rest as a boolean.
> Plus another 5 using the READ_ONCE() variant who only look at the active
> mask.

Being used more does not make it less ugly, quite the contrary...

_is_() returns boolean, _get_() might be mask if you insist

Regards, Joonas

> -Chris
> 
-- 
Joonas Lahtinen
Open Source Technology Center
Intel Corporation
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 95+ messages in thread

* Re: [PATCH 21/22] drm/i915: Enable lockless lookup of request tracking via RCU
  2016-07-29  8:49         ` Chris Wilson
@ 2016-07-29  9:43           ` Chris Wilson
  2016-07-29  9:45             ` Daniel Vetter
  0 siblings, 1 reply; 95+ messages in thread
From: Chris Wilson @ 2016-07-29  9:43 UTC (permalink / raw)
  To: Daniel Vetter, intel-gfx, Goel, Akash, Josh Triplett

On Fri, Jul 29, 2016 at 09:49:54AM +0100, Chris Wilson wrote:
> On Fri, Jul 29, 2016 at 10:41:14AM +0200, Daniel Vetter wrote:
> > I guess it doesn't hurt to make this really, really clear. Perfect! Well
> > almost, one nit:
> > 
> > > 
> > >                 /* What stops the following rcu_dereference() from occuring
> > >                  * before the above i915_gem_request_get_rcu()? If we were
> > >                  * to read the value before pausing to get the reference to
> > >                  * the request, we may not notice a change in the active
> > >                  * tracker.
> > >                  *
> > >                  * The rcu_dereference() is a mere read barrier, which means
> > 
> > s/read barrier/barrier of depending reads/, rcu_dereference is not even a
> > full rmb!
> > 
> > >                  * that operations after it will appear after, neither the
> > 
> > hence also: s/operations/any operations through the read pointer/
> 
> Ah right, that needs to be dependent reads. Changes look good.


       do {
                struct drm_i915_gem_request *request;

                request = rcu_dereference(active->request);
                if (!request || i915_gem_request_completed(request))
                        return NULL;

                request = i915_gem_request_get_rcu(request);

                /* What stops the following rcu_access_pointer() from occurring
                 * before the above i915_gem_request_get_rcu()? If we were
                 * to read the value before pausing to get the reference to
                 * the request, we may not notice a change in the active
                 * tracker.
                 *
                 * The rcu_access_pointer() is a mere compiler barrier, which
                 * means both the CPU and compiler are free to perform the
                 * memory read without constraint. The compiler only has to
                 * ensure that any operations after the rcu_access_pointer()
                 * occur afterwards in program order. This means the read may
                 * be performed earlier by an out-of-order CPU, or adventurous
                 * compiler.
                 *
                 * The atomic operation at the heart of
                 * i915_gem_request_get_rcu(), see fence_get_rcu(), is
                 * atomic_inc_not_zero() which is only a full memory barrier
                 * when successful. That is, if i915_gem_request_get_rcu()
                 * returns the request (and so with the reference counted
                 * incremented) then the following read for rcu_access_pointer()
                 * must occur after the atomic operation and so confirm
                 * that this request is the one currently being tracked.
                 */
                if (!request || request == rcu_access_pointer(active->request))
                        return rcu_pointer_handoff(request);

                i915_gem_request_put(request);
        } while (1);


-- 
Chris Wilson, Intel Open Source Technology Centre
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 95+ messages in thread

* Re: [PATCH 21/22] drm/i915: Enable lockless lookup of request tracking via RCU
  2016-07-29  9:43           ` Chris Wilson
@ 2016-07-29  9:45             ` Daniel Vetter
  0 siblings, 0 replies; 95+ messages in thread
From: Daniel Vetter @ 2016-07-29  9:45 UTC (permalink / raw)
  To: Chris Wilson, Daniel Vetter, intel-gfx, Goel, Akash, Josh Triplett

On Fri, Jul 29, 2016 at 10:43:17AM +0100, Chris Wilson wrote:
> On Fri, Jul 29, 2016 at 09:49:54AM +0100, Chris Wilson wrote:
> > On Fri, Jul 29, 2016 at 10:41:14AM +0200, Daniel Vetter wrote:
> > > I guess it doesn't hurt to make this really, really clear. Perfect! Well
> > > almost, one nit:
> > > 
> > > > 
> > > >                 /* What stops the following rcu_dereference() from occuring
> > > >                  * before the above i915_gem_request_get_rcu()? If we were
> > > >                  * to read the value before pausing to get the reference to
> > > >                  * the request, we may not notice a change in the active
> > > >                  * tracker.
> > > >                  *
> > > >                  * The rcu_dereference() is a mere read barrier, which means
> > > 
> > > s/read barrier/barrier of depending reads/, rcu_dereference is not even a
> > > full rmb!
> > > 
> > > >                  * that operations after it will appear after, neither the
> > > 
> > > hence also: s/operations/any operations through the read pointer/
> > 
> > Ah right, that needs to be dependent reads. Changes look good.
> 
> 
>        do {
>                 struct drm_i915_gem_request *request;
> 
>                 request = rcu_dereference(active->request);
>                 if (!request || i915_gem_request_completed(request))
>                         return NULL;
> 
>                 request = i915_gem_request_get_rcu(request);
> 
>                 /* What stops the following rcu_access_pointer() from occurring
>                  * before the above i915_gem_request_get_rcu()? If we were
>                  * to read the value before pausing to get the reference to
>                  * the request, we may not notice a change in the active
>                  * tracker.
>                  *
>                  * The rcu_access_pointer() is a mere compiler barrier, which
>                  * means both the CPU and compiler are free to perform the
>                  * memory read without constraint. The compiler only has to
>                  * ensure that any operations after the rcu_access_pointer()
>                  * occur afterwards in program order. This means the read may
>                  * be performed earlier by an out-of-order CPU, or adventurous
>                  * compiler.
>                  *
>                  * The atomic operation at the heart of
>                  * i915_gem_request_get_rcu(), see fence_get_rcu(), is
>                  * atomic_inc_not_zero() which is only a full memory barrier
>                  * when successful. That is, if i915_gem_request_get_rcu()
>                  * returns the request (and so with the reference counted
>                  * incremented) then the following read for rcu_access_pointer()
>                  * must occur after the atomic operation and so confirm
>                  * that this request is the one currently being tracked.
>                  */
>                 if (!request || request == rcu_access_pointer(active->request))
>                         return rcu_pointer_handoff(request);
> 
>                 i915_gem_request_put(request);
>         } while (1);

lgtm now, Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch>
-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 95+ messages in thread

* [PATCH] drm/i915: Convert 4096 alignment request to 0 for drm_mm allocations
  2016-07-29  6:53   ` Joonas Lahtinen
  2016-07-29  7:18     ` Chris Wilson
@ 2016-07-29 10:19     ` Chris Wilson
  2016-07-29 10:28       ` Joonas Lahtinen
  1 sibling, 1 reply; 95+ messages in thread
From: Chris Wilson @ 2016-07-29 10:19 UTC (permalink / raw)
  To: intel-gfx

As we always allocate in chunks of 4096 (that being both the PAGE_SIZE
and our own GTT_PAGE_SIZE), we know that all results from the drm_mm are
aligned to at least 4096. The drm_mm allocator itself is optimised for
alignment == 0, and so by converting alignments of 4096 to 0 we can
satisfy our own requirements and still hit the faster path.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
---
 drivers/gpu/drm/i915/i915_gem.c | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 772b9739fdef..e1f1103b5d7b 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -3068,6 +3068,15 @@ i915_gem_object_insert_into_vm(struct drm_i915_gem_object *obj,
 			alloc_flag = DRM_MM_CREATE_DEFAULT;
 		}
 
+		/* We only allocate in PAGE_SIZE/GTT_PAGE_SIZE (4096) chunks,
+		 * so we know that we always have a minimum alignment of 4096.
+		 * The drm_mm range manager is optimised to return results
+		 * with zero alignment, so where possible use the optimal
+		 * path.
+		 */
+		if (alignment <= 4096)
+			alignment = 0;
+
 search_free:
 		ret = drm_mm_insert_node_in_range_generic(&vm->mm, &vma->node,
 							  size, alignment,
-- 
2.8.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 95+ messages in thread

* ✗ Ro.CI.BAT: failure for series starting with [01/22] drm/i915: Combine loops within i915_gem_evict_something (rev2)
  2016-07-27 11:14 Getting to RCU and exporting fences Chris Wilson
                   ` (22 preceding siblings ...)
  2016-07-27 11:23 ` ✗ Ro.CI.BAT: failure for series starting with [01/22] drm/i915: Combine loops within i915_gem_evict_something Patchwork
@ 2016-07-29 10:20 ` Patchwork
  23 siblings, 0 replies; 95+ messages in thread
From: Patchwork @ 2016-07-29 10:20 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

== Series Details ==

Series: series starting with [01/22] drm/i915: Combine loops within i915_gem_evict_something (rev2)
URL   : https://patchwork.freedesktop.org/series/10315/
State : failure

== Summary ==

Applying: drm/i915: Combine loops within i915_gem_evict_something
Using index info to reconstruct a base tree...
M	drivers/gpu/drm/i915/i915_gem_evict.c
Falling back to patching base and 3-way merge...
Auto-merging drivers/gpu/drm/i915/i915_gem_evict.c
CONFLICT (content): Merge conflict in drivers/gpu/drm/i915/i915_gem_evict.c
error: Failed to merge in the changes.
Patch failed at 0001 drm/i915: Combine loops within i915_gem_evict_something
The copy of the patch that failed is found in: .git/rebase-apply/patch
When you have resolved this problem, run "git am --continue".
If you prefer to skip this patch, run "git am --skip" instead.
To restore the original branch and stop patching, run "git am --abort".

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 95+ messages in thread

* Re: [PATCH] drm/i915: Convert 4096 alignment request to 0 for drm_mm allocations
  2016-07-29 10:19     ` [PATCH] drm/i915: Convert 4096 alignment request to 0 for drm_mm allocations Chris Wilson
@ 2016-07-29 10:28       ` Joonas Lahtinen
  2016-07-29 10:38         ` Chris Wilson
  0 siblings, 1 reply; 95+ messages in thread
From: Joonas Lahtinen @ 2016-07-29 10:28 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx

On pe, 2016-07-29 at 11:19 +0100, Chris Wilson wrote:
> As we always allocate in chunks of 4096 (that being both the PAGE_SIZE
> and our own GTT_PAGE_SIZE), we know that all results from the drm_mm are

That GTT_PAGE_SIZE define would be sweet to introduce finally :P

Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
-- 
Joonas Lahtinen
Open Source Technology Center
Intel Corporation
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 95+ messages in thread

* Re: [PATCH] drm/i915: Convert 4096 alignment request to 0 for drm_mm allocations
  2016-07-29 10:28       ` Joonas Lahtinen
@ 2016-07-29 10:38         ` Chris Wilson
  0 siblings, 0 replies; 95+ messages in thread
From: Chris Wilson @ 2016-07-29 10:38 UTC (permalink / raw)
  To: Joonas Lahtinen; +Cc: intel-gfx

On Fri, Jul 29, 2016 at 01:28:19PM +0300, Joonas Lahtinen wrote:
> On pe, 2016-07-29 at 11:19 +0100, Chris Wilson wrote:
> > As we always allocate in chunks of 4096 (that being both the PAGE_SIZE
> > and our own GTT_PAGE_SIZE), we know that all results from the drm_mm are
> 
> That GTT_PAGE_SIZE define would be sweet to introduce finally :P

Or 64k pages? :-p
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 95+ messages in thread

* Re: [PATCH 12/22] drm/i915: Start passing around i915_vma from execbuffer
  2016-07-29  8:23   ` Joonas Lahtinen
@ 2016-08-01  7:34     ` Chris Wilson
  0 siblings, 0 replies; 95+ messages in thread
From: Chris Wilson @ 2016-08-01  7:34 UTC (permalink / raw)
  To: Joonas Lahtinen; +Cc: intel-gfx, Mika Kuoppala

On Fri, Jul 29, 2016 at 11:23:43AM +0300, Joonas Lahtinen wrote:
> On ke, 2016-07-27 at 12:14 +0100, Chris Wilson wrote:
> 
> > +	if (i915_vma_misplaced(vma, size, alignment, flags)) {
> > +		if (flags & PIN_NONBLOCK &&
> > +		    (i915_vma_is_pinned(vma) || i915_vma_is_active(vma)))
> > +			return -ENOSPC;
> 
> Why ENOSPC when active? Would not EAGAIN be more appropriate?

The interface used by execbuf that PIN_NONBLOCK results in ENOSPC. That
allows us to do a trial-and-error pass with ENOSPC being resolved later
and all the others going back to userspace, primarily EIO, EAGAIN, EINTR,
EINVAL. On the second pass we drop the NONBLOCK and if were a genuine
failure, it will result in ENOSPC again.
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 95+ messages in thread

end of thread, other threads:[~2016-08-01  7:34 UTC | newest]

Thread overview: 95+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2016-07-27 11:14 Getting to RCU and exporting fences Chris Wilson
2016-07-27 11:14 ` [PATCH 01/22] drm/i915: Combine loops within i915_gem_evict_something Chris Wilson
2016-07-29  6:17   ` Joonas Lahtinen
2016-07-29  6:31     ` Chris Wilson
2016-07-27 11:14 ` [PATCH 02/22] drm/i915: Remove surplus drm_device parameter to i915_gem_evict_something() Chris Wilson
2016-07-28  8:07   ` Joonas Lahtinen
2016-07-27 11:14 ` [PATCH 03/22] drm/i915: Double check the active status on the batch pool Chris Wilson
2016-07-28  8:14   ` Joonas Lahtinen
2016-07-27 11:14 ` [PATCH 04/22] drm/i915: Remove request retirement before each batch Chris Wilson
2016-07-28  8:32   ` Joonas Lahtinen
2016-07-28  9:32     ` Chris Wilson
2016-07-28  9:53       ` Joonas Lahtinen
2016-07-28  9:54   ` Daniel Vetter
2016-07-28 10:26     ` Chris Wilson
2016-07-28 11:52       ` Daniel Vetter
2016-07-28 12:24         ` Chris Wilson
2016-07-28 14:21           ` Chris Wilson
2016-07-27 11:14 ` [PATCH 05/22] drm/i915: Remove i915_gem_execbuffer_retire_commands() Chris Wilson
2016-07-28  8:46   ` Joonas Lahtinen
2016-07-28  8:55     ` Chris Wilson
2016-07-28  9:54       ` Joonas Lahtinen
2016-07-27 11:14 ` [PATCH 06/22] drm/i915: Fix up vma alignment to be u64 Chris Wilson
2016-07-28  8:59   ` Joonas Lahtinen
2016-07-27 11:14 ` [PATCH 07/22] drm/i915: Pad GTT views of exec objects up to user specified size Chris Wilson
2016-07-28  9:55   ` Daniel Vetter
2016-07-28 10:33     ` Chris Wilson
2016-07-29  7:59   ` Joonas Lahtinen
2016-07-29  8:08     ` Chris Wilson
2016-07-29  8:55       ` Joonas Lahtinen
2016-07-27 11:14 ` [PATCH 08/22] drm/i915: Reduce WARN(i915_gem_valid_gtt_space) to a debug-only check Chris Wilson
2016-07-28  9:18   ` Joonas Lahtinen
2016-07-27 11:14 ` [PATCH 09/22] drm/i915: Split insertion/binding of an object into the VM Chris Wilson
2016-07-28  9:25   ` Joonas Lahtinen
2016-07-28  9:34     ` Chris Wilson
2016-07-27 11:14 ` [PATCH 10/22] drm/i915: Record allocated vma size Chris Wilson
2016-07-29  6:53   ` Joonas Lahtinen
2016-07-29  7:18     ` Chris Wilson
2016-07-29 10:19     ` [PATCH] drm/i915: Convert 4096 alignment request to 0 for drm_mm allocations Chris Wilson
2016-07-29 10:28       ` Joonas Lahtinen
2016-07-29 10:38         ` Chris Wilson
2016-07-27 11:14 ` [PATCH 11/22] drm/i915: Wrap vma->pin_count accessors with small inline helpers Chris Wilson
2016-07-29  6:59   ` Joonas Lahtinen
2016-07-29  7:23     ` Chris Wilson
2016-07-27 11:14 ` [PATCH 12/22] drm/i915: Start passing around i915_vma from execbuffer Chris Wilson
2016-07-29  8:23   ` Joonas Lahtinen
2016-08-01  7:34     ` Chris Wilson
2016-07-27 11:14 ` [PATCH 13/22] drm/i915: Combine all i915_vma bitfields into a single set of flags Chris Wilson
2016-07-29  7:30   ` Joonas Lahtinen
2016-07-29  7:44     ` Chris Wilson
2016-07-27 11:14 ` [PATCH 14/22] drm/i915: Make i915_vma_pin() small and inline Chris Wilson
2016-07-28 11:06   ` Joonas Lahtinen
2016-07-27 11:14 ` [PATCH 15/22] drm/i915: Remove highly confusing i915_gem_obj_ggtt_pin() Chris Wilson
2016-07-28 10:38   ` Joonas Lahtinen
2016-07-28 11:36     ` Chris Wilson
2016-07-28 11:53       ` Joonas Lahtinen
2016-07-28 16:12         ` Chris Wilson
2016-07-29  9:10           ` Joonas Lahtinen
2016-07-27 11:14 ` [PATCH 16/22] drm/i915: Make fb_tracking.lock a spinlock Chris Wilson
2016-07-28 10:02   ` Daniel Vetter
2016-07-28 10:08     ` Daniel Vetter
2016-07-29  8:25     ` Chris Wilson
2016-07-28 10:19   ` Joonas Lahtinen
2016-07-27 11:14 ` [PATCH 17/22] drm/i915: Use atomics to manipulate obj->frontbuffer_bits Chris Wilson
2016-07-28  9:49   ` Joonas Lahtinen
2016-07-28 10:10     ` Chris Wilson
2016-07-28 10:51       ` Joonas Lahtinen
2016-07-28 10:05   ` Daniel Vetter
2016-07-27 11:14 ` [PATCH 18/22] drm/i915: Use dev_priv consistently through the intel_frontbuffer interface Chris Wilson
2016-07-28  9:36   ` Joonas Lahtinen
2016-07-28 10:06   ` Daniel Vetter
2016-07-27 11:14 ` [PATCH 19/22] drm/i915: Move obj->active:5 to obj->flags Chris Wilson
2016-07-29  7:40   ` Joonas Lahtinen
2016-07-29  8:04     ` Chris Wilson
2016-07-29  8:10       ` Chris Wilson
2016-07-29  9:34         ` Joonas Lahtinen
2016-07-27 11:14 ` [PATCH 20/22] drm/i915: Move i915_gem_object_wait_rendering() Chris Wilson
2016-07-28  9:37   ` Joonas Lahtinen
2016-07-27 11:14 ` [PATCH 21/22] drm/i915: Enable lockless lookup of request tracking via RCU Chris Wilson
2016-07-28 10:23   ` Daniel Vetter
2016-07-28 20:49     ` Chris Wilson
2016-07-29  8:41       ` Daniel Vetter
2016-07-29  8:49         ` Chris Wilson
2016-07-29  9:43           ` Chris Wilson
2016-07-29  9:45             ` Daniel Vetter
2016-07-27 11:15 ` [PATCH 22/22] drm/i915: Export our request as a dma-buf fence on the reservation object Chris Wilson
2016-07-28 10:32   ` Daniel Vetter
2016-07-28 10:40     ` Chris Wilson
2016-07-28 11:59       ` Daniel Vetter
2016-07-28 12:17         ` Chris Wilson
2016-07-28 12:28           ` Daniel Vetter
2016-07-28 12:45             ` Chris Wilson
2016-07-28 20:14               ` Daniel Vetter
2016-07-28 21:08                 ` Chris Wilson
2016-07-27 11:23 ` ✗ Ro.CI.BAT: failure for series starting with [01/22] drm/i915: Combine loops within i915_gem_evict_something Patchwork
2016-07-29 10:20 ` ✗ Ro.CI.BAT: failure for series starting with [01/22] drm/i915: Combine loops within i915_gem_evict_something (rev2) Patchwork

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.