All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 1/6] drm/i915: trace vm eviction instead of everything
@ 2013-09-24 16:57 Ben Widawsky
  2013-09-24 16:57 ` [PATCH 2/6] drm/i915: Provide a cheap ggtt vma lookup Ben Widawsky
                   ` (5 more replies)
  0 siblings, 6 replies; 11+ messages in thread
From: Ben Widawsky @ 2013-09-24 16:57 UTC (permalink / raw)
  To: Intel GFX; +Cc: Ben Widawsky, Ben Widawsky

Tracing vm eviction is really the event we care about. For the cases we
evict everything, we still will get the trace.

v2: Add the drm device to the trace since we might not be the only
device in the system. (Chris)

Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
---
 drivers/gpu/drm/i915/i915_gem_evict.c |  2 ++
 drivers/gpu/drm/i915/i915_trace.h     | 15 +++++++++++++++
 2 files changed, 17 insertions(+)

diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c b/drivers/gpu/drm/i915/i915_gem_evict.c
index 3a3981e..b737653 100644
--- a/drivers/gpu/drm/i915/i915_gem_evict.c
+++ b/drivers/gpu/drm/i915/i915_gem_evict.c
@@ -175,6 +175,8 @@ int i915_gem_evict_vm(struct i915_address_space *vm, bool do_idle)
 	struct i915_vma *vma, *next;
 	int ret;
 
+	trace_i915_gem_evict_vm(vm);
+
 	if (do_idle) {
 		ret = i915_gpu_idle(vm->dev);
 		if (ret)
diff --git a/drivers/gpu/drm/i915/i915_trace.h b/drivers/gpu/drm/i915/i915_trace.h
index e2c5ee6..403309b 100644
--- a/drivers/gpu/drm/i915/i915_trace.h
+++ b/drivers/gpu/drm/i915/i915_trace.h
@@ -233,6 +233,21 @@ TRACE_EVENT(i915_gem_evict_everything,
 	    TP_printk("dev=%d", __entry->dev)
 );
 
+TRACE_EVENT(i915_gem_evict_vm,
+	    TP_PROTO(struct i915_address_space *vm),
+	    TP_ARGS(vm),
+
+	    TP_STRUCT__entry(
+			     __field(struct i915_address_space *, vm)
+			    ),
+
+	    TP_fast_assign(
+			   __entry->vm = vm;
+			  ),
+
+	    TP_printk("dev=%d, vm=%p", __entry->vm->dev->primary->index, __entry->vm)
+);
+
 TRACE_EVENT(i915_gem_ring_dispatch,
 	    TP_PROTO(struct intel_ring_buffer *ring, u32 seqno, u32 flags),
 	    TP_ARGS(ring, seqno, flags),
-- 
1.8.4

^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [PATCH 2/6] drm/i915: Provide a cheap ggtt vma lookup
  2013-09-24 16:57 [PATCH 1/6] drm/i915: trace vm eviction instead of everything Ben Widawsky
@ 2013-09-24 16:57 ` Ben Widawsky
  2013-09-24 19:11   ` Daniel Vetter
  2013-09-24 16:57 ` [PATCH 3/6] drm/i915: Convert active API to VMA Ben Widawsky
                   ` (4 subsequent siblings)
  5 siblings, 1 reply; 11+ messages in thread
From: Ben Widawsky @ 2013-09-24 16:57 UTC (permalink / raw)
  To: Intel GFX; +Cc: Ben Widawsky, Ben Widawsky

"We do fairly often lookup the ggtt vma for an obj." - Chris Wilson. As
such, provide a function to offer slightly cheaper access to the vma.
Not performance tested. By my quick estimation it saves at least 3
pointer dereferences from the existing mechanism.

This patch mostly matches code from Chris in
<20130911221430.GB7825@nuc-i3427.alporthouse.com>

Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
---
 drivers/gpu/drm/i915/i915_drv.h |  4 +++-
 drivers/gpu/drm/i915/i915_gem.c | 17 +++++++++++++++--
 2 files changed, 18 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 07de53c..67a31c2 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2026,6 +2026,9 @@ struct i915_vma *i915_gem_obj_to_vma(struct drm_i915_gem_object *obj,
 struct i915_vma *
 i915_gem_obj_lookup_or_create_vma(struct drm_i915_gem_object *obj,
 				  struct i915_address_space *vm);
+
+struct i915_vma *i915_gem_obj_to_ggtt(struct drm_i915_gem_object *obj);
+
 /* Some GGTT VM helpers */
 #define obj_to_ggtt(obj) \
 	(&((struct drm_i915_private *)(obj)->base.dev->dev_private)->gtt.base)
@@ -2062,7 +2065,6 @@ i915_gem_obj_ggtt_pin(struct drm_i915_gem_object *obj,
 	return i915_gem_object_pin(obj, obj_to_ggtt(obj), alignment,
 				   map_and_fenceable, nonblocking);
 }
-#undef obj_to_ggtt
 
 /* i915_gem_context.c */
 void i915_gem_context_init(struct drm_device *dev);
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 8450724..6b134f0 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -3403,8 +3403,7 @@ i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write)
 
 	/* And bump the LRU for this access */
 	if (i915_gem_object_is_inactive(obj)) {
-		struct i915_vma *vma = i915_gem_obj_to_vma(obj,
-							   &dev_priv->gtt.base);
+		struct i915_vma *vma = i915_gem_obj_to_ggtt(obj);
 		if (vma)
 			list_move_tail(&vma->mm_list,
 				       &dev_priv->gtt.base.inactive_list);
@@ -4940,3 +4939,17 @@ unsigned long i915_gem_obj_size(struct drm_i915_gem_object *o,
 
 	return 0;
 }
+
+struct i915_vma *i915_gem_obj_to_ggtt(struct drm_i915_gem_object *obj)
+{
+	struct i915_vma *vma;
+
+	if (WARN_ON(list_empty(&obj->vma_list)))
+		return NULL;
+
+	vma = list_first_entry(&obj->vma_list, typeof(*vma), vma_link);
+	if (WARN_ON(vma->vm != obj_to_ggtt(obj)))
+		return NULL;
+
+	return vma;
+}
-- 
1.8.4

^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [PATCH 3/6] drm/i915: Convert active API to VMA
  2013-09-24 16:57 [PATCH 1/6] drm/i915: trace vm eviction instead of everything Ben Widawsky
  2013-09-24 16:57 ` [PATCH 2/6] drm/i915: Provide a cheap ggtt vma lookup Ben Widawsky
@ 2013-09-24 16:57 ` Ben Widawsky
  2013-09-24 16:57 ` [PATCH 4/6] drm/i915: Add bind/unbind object functions to VM Ben Widawsky
                   ` (3 subsequent siblings)
  5 siblings, 0 replies; 11+ messages in thread
From: Ben Widawsky @ 2013-09-24 16:57 UTC (permalink / raw)
  To: Intel GFX; +Cc: Ben Widawsky

From: Ben Widawsky <ben@bwidawsk.net>

Even though we track object activity and not VMA, because we have the
active_list be based on the VM, it makes the most sense to use VMAs in
the APIs.

NOTE: Daniel intends to eventually rip out active/inactive LRUs, but for
now, leave them be.

v2: Remove leftover hunk from the previous patch which didn't keep
i915_gem_object_move_to_active. That patch had to rely on the ring to
get the dev instead of the obj. (Chris)

Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
---
 drivers/gpu/drm/i915/i915_drv.h            | 5 ++---
 drivers/gpu/drm/i915/i915_gem.c            | 9 ++++++++-
 drivers/gpu/drm/i915/i915_gem_context.c    | 5 +----
 drivers/gpu/drm/i915/i915_gem_execbuffer.c | 3 +--
 4 files changed, 12 insertions(+), 10 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 67a31c2..8530e49 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1901,9 +1901,8 @@ static inline void i915_gem_object_unpin_pages(struct drm_i915_gem_object *obj)
 int __must_check i915_mutex_lock_interruptible(struct drm_device *dev);
 int i915_gem_object_sync(struct drm_i915_gem_object *obj,
 			 struct intel_ring_buffer *to);
-void i915_gem_object_move_to_active(struct drm_i915_gem_object *obj,
-				    struct intel_ring_buffer *ring);
-
+void i915_vma_move_to_active(struct i915_vma *vma,
+			     struct intel_ring_buffer *ring);
 int i915_gem_dumb_create(struct drm_file *file_priv,
 			 struct drm_device *dev,
 			 struct drm_mode_create_dumb *args);
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 6b134f0..f6c8b0e 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -1910,7 +1910,7 @@ i915_gem_object_get_pages(struct drm_i915_gem_object *obj)
 	return 0;
 }
 
-void
+static void
 i915_gem_object_move_to_active(struct drm_i915_gem_object *obj,
 			       struct intel_ring_buffer *ring)
 {
@@ -1949,6 +1949,13 @@ i915_gem_object_move_to_active(struct drm_i915_gem_object *obj,
 	}
 }
 
+void i915_vma_move_to_active(struct i915_vma *vma,
+			     struct intel_ring_buffer *ring)
+{
+	list_move_tail(&vma->mm_list, &vma->vm->active_list);
+	return i915_gem_object_move_to_active(vma->obj, ring);
+}
+
 static void
 i915_gem_object_move_to_inactive(struct drm_i915_gem_object *obj)
 {
diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
index 9af3fe7..1a877a5 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -453,11 +453,8 @@ static int do_switch(struct i915_hw_context *to)
 	 * MI_SET_CONTEXT instead of when the next seqno has completed.
 	 */
 	if (from != NULL) {
-		struct drm_i915_private *dev_priv = from->obj->base.dev->dev_private;
-		struct i915_address_space *ggtt = &dev_priv->gtt.base;
 		from->obj->base.read_domains = I915_GEM_DOMAIN_INSTRUCTION;
-		list_move_tail(&i915_gem_obj_to_vma(from->obj, ggtt)->mm_list, &ggtt->active_list);
-		i915_gem_object_move_to_active(from->obj, ring);
+		i915_vma_move_to_active(i915_gem_obj_to_ggtt(from->obj), ring);
 		/* As long as MI_SET_CONTEXT is serializing, ie. it flushes the
 		 * whole damn pipeline, we don't need to explicitly mark the
 		 * object dirty. The only exception is that the context must be
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index da23cfe..0ce0d47 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -872,8 +872,7 @@ i915_gem_execbuffer_move_to_active(struct list_head *vmas,
 		obj->base.read_domains = obj->base.pending_read_domains;
 		obj->fenced_gpu_access = obj->pending_fenced_gpu_access;
 
-		list_move_tail(&vma->mm_list, &vma->vm->active_list);
-		i915_gem_object_move_to_active(obj, ring);
+		i915_vma_move_to_active(vma, ring);
 		if (obj->base.write_domain) {
 			obj->dirty = 1;
 			obj->last_write_seqno = intel_ring_get_seqno(ring);
-- 
1.8.4

^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [PATCH 4/6] drm/i915: Add bind/unbind object functions to VM
  2013-09-24 16:57 [PATCH 1/6] drm/i915: trace vm eviction instead of everything Ben Widawsky
  2013-09-24 16:57 ` [PATCH 2/6] drm/i915: Provide a cheap ggtt vma lookup Ben Widawsky
  2013-09-24 16:57 ` [PATCH 3/6] drm/i915: Convert active API to VMA Ben Widawsky
@ 2013-09-24 16:57 ` Ben Widawsky
  2013-09-24 16:58 ` [PATCH 5/6] drm/i915: Use the new vm [un]bind functions Ben Widawsky
                   ` (2 subsequent siblings)
  5 siblings, 0 replies; 11+ messages in thread
From: Ben Widawsky @ 2013-09-24 16:57 UTC (permalink / raw)
  To: Intel GFX; +Cc: Ben Widawsky

From: Ben Widawsky <ben@bwidawsk.net>

As we plumb the code with more VM information, it has become more
obvious that the easiest way to deal with bind and unbind is to simply
put the function pointers in the vm, and let those choose the correct
way to handle the page table updates. This change allows many places in
the code to simply be vm->bind, and not have to worry about
distinguishing PPGTT vs GGTT.

Notice that this patch has no impact on functionality. I've decided to
save the actual change until the next patch because I think it's easier
to review that way. I'm happy to squash the two, or let Daniel do it on
merge.

v2:
Make ggtt handle the quirky aliasing ppgtt
Add flags to bind object to support above
Don't ever call bind/unbind directly for PPGTT until we have real, full
PPGTT (use NULLs to assert this)
Make sure we rebind the ggtt if there already is a ggtt binding.  This
happens on set cache levels.
Use VMA for bind/unbind (Daniel, Ben)

v3: Reorganize ggtt_vma_bind to be more concise and easier to read
(Ville). Change logic in unbind to only unbind ggtt when there is a
global mapping, and to remove a redundant check if the aliasing ppgtt
exists.

v4: Make the bind function a bit smarter about the cache levels to avoid
unnecessary multiple remaps. "I accept it is a wart, I think unifying
the pin_vma / bind_vma could be unified later" (Chris)
Removed the git notes, and put version info here. (Daniel)

v5: Update the comment to not suck (Chris)

Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
---
 drivers/gpu/drm/i915/i915_drv.h     |  69 ++++++++++++----------
 drivers/gpu/drm/i915/i915_gem_gtt.c | 112 ++++++++++++++++++++++++++++++++++++
 2 files changed, 151 insertions(+), 30 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 8530e49..9995cdb 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -465,6 +465,36 @@ enum i915_cache_level {
 
 typedef uint32_t gen6_gtt_pte_t;
 
+/**
+ * A VMA represents a GEM BO that is bound into an address space. Therefore, a
+ * VMA's presence cannot be guaranteed before binding, or after unbinding the
+ * object into/from the address space.
+ *
+ * To make things as simple as possible (ie. no refcounting), a VMA's lifetime
+ * will always be <= an objects lifetime. So object refcounting should cover us.
+ */
+struct i915_vma {
+	struct drm_mm_node node;
+	struct drm_i915_gem_object *obj;
+	struct i915_address_space *vm;
+
+	/** This object's place on the active/inactive lists */
+	struct list_head mm_list;
+
+	struct list_head vma_link; /* Link in the object's VMA list */
+
+	/** This vma's place in the batchbuffer or on the eviction list */
+	struct list_head exec_list;
+
+	/**
+	 * Used for performing relocations during execbuffer insertion.
+	 */
+	struct hlist_node exec_node;
+	unsigned long exec_handle;
+	struct drm_i915_gem_exec_object2 *exec_entry;
+
+};
+
 struct i915_address_space {
 	struct drm_mm mm;
 	struct drm_device *dev;
@@ -503,9 +533,18 @@ struct i915_address_space {
 	/* FIXME: Need a more generic return type */
 	gen6_gtt_pte_t (*pte_encode)(dma_addr_t addr,
 				     enum i915_cache_level level);
+
+	/** Unmap an object from an address space. This usually consists of
+	 * setting the valid PTE entries to a reserved scratch page. */
+	void (*unbind_vma)(struct i915_vma *vma);
 	void (*clear_range)(struct i915_address_space *vm,
 			    unsigned int first_entry,
 			    unsigned int num_entries);
+	/* Map an object into an address space with the given cache flags. */
+#define GLOBAL_BIND (1<<0)
+	void (*bind_vma)(struct i915_vma *vma,
+			 enum i915_cache_level cache_level,
+			 u32 flags);
 	void (*insert_entries)(struct i915_address_space *vm,
 			       struct sg_table *st,
 			       unsigned int first_entry,
@@ -552,36 +591,6 @@ struct i915_hw_ppgtt {
 	int (*enable)(struct drm_device *dev);
 };
 
-/**
- * A VMA represents a GEM BO that is bound into an address space. Therefore, a
- * VMA's presence cannot be guaranteed before binding, or after unbinding the
- * object into/from the address space.
- *
- * To make things as simple as possible (ie. no refcounting), a VMA's lifetime
- * will always be <= an objects lifetime. So object refcounting should cover us.
- */
-struct i915_vma {
-	struct drm_mm_node node;
-	struct drm_i915_gem_object *obj;
-	struct i915_address_space *vm;
-
-	/** This object's place on the active/inactive lists */
-	struct list_head mm_list;
-
-	struct list_head vma_link; /* Link in the object's VMA list */
-
-	/** This vma's place in the batchbuffer or on the eviction list */
-	struct list_head exec_list;
-
-	/**
-	 * Used for performing relocations during execbuffer insertion.
-	 */
-	struct hlist_node exec_node;
-	unsigned long exec_handle;
-	struct drm_i915_gem_exec_object2 *exec_entry;
-
-};
-
 struct i915_ctx_hang_stats {
 	/* This context had batch pending when hang was declared */
 	unsigned batch_pending;
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index e999496..65b61d4 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -57,6 +57,11 @@
 #define HSW_WB_ELLC_LLC_AGE0		HSW_CACHEABILITY_CONTROL(0xb)
 #define HSW_WT_ELLC_LLC_AGE0		HSW_CACHEABILITY_CONTROL(0x6)
 
+static void gen6_ppgtt_bind_vma(struct i915_vma *vma,
+				enum i915_cache_level cache_level,
+				u32 flags);
+static void gen6_ppgtt_unbind_vma(struct i915_vma *vma);
+
 static gen6_gtt_pte_t snb_pte_encode(dma_addr_t addr,
 				     enum i915_cache_level level)
 {
@@ -332,7 +337,9 @@ static int gen6_ppgtt_init(struct i915_hw_ppgtt *ppgtt)
 	ppgtt->base.pte_encode = dev_priv->gtt.base.pte_encode;
 	ppgtt->num_pd_entries = GEN6_PPGTT_PD_ENTRIES;
 	ppgtt->enable = gen6_ppgtt_enable;
+	ppgtt->base.unbind_vma = NULL;
 	ppgtt->base.clear_range = gen6_ppgtt_clear_range;
+	ppgtt->base.bind_vma = NULL;
 	ppgtt->base.insert_entries = gen6_ppgtt_insert_entries;
 	ppgtt->base.cleanup = gen6_ppgtt_cleanup;
 	ppgtt->base.scratch = dev_priv->gtt.base.scratch;
@@ -439,6 +446,18 @@ void i915_ppgtt_bind_object(struct i915_hw_ppgtt *ppgtt,
 				   cache_level);
 }
 
+static void __always_unused
+gen6_ppgtt_bind_vma(struct i915_vma *vma,
+		    enum i915_cache_level cache_level,
+		    u32 flags)
+{
+	const unsigned long entry = vma->node.start >> PAGE_SHIFT;
+
+	WARN_ON(flags);
+
+	gen6_ppgtt_insert_entries(vma->vm, vma->obj->pages, entry, cache_level);
+}
+
 void i915_ppgtt_unbind_object(struct i915_hw_ppgtt *ppgtt,
 			      struct drm_i915_gem_object *obj)
 {
@@ -447,6 +466,14 @@ void i915_ppgtt_unbind_object(struct i915_hw_ppgtt *ppgtt,
 				obj->base.size >> PAGE_SHIFT);
 }
 
+static void __always_unused gen6_ppgtt_unbind_vma(struct i915_vma *vma)
+{
+	const unsigned long entry = vma->node.start >> PAGE_SHIFT;
+
+	gen6_ppgtt_clear_range(vma->vm, entry,
+			       vma->obj->base.size >> PAGE_SHIFT);
+}
+
 extern int intel_iommu_gfx_mapped;
 /* Certain Gen5 chipsets require require idling the GPU before
  * unmapping anything from the GTT when VT-d is enabled.
@@ -592,6 +619,19 @@ static void i915_ggtt_insert_entries(struct i915_address_space *vm,
 
 }
 
+static void i915_ggtt_bind_vma(struct i915_vma *vma,
+			       enum i915_cache_level cache_level,
+			       u32 unused)
+{
+	const unsigned long entry = vma->node.start >> PAGE_SHIFT;
+	unsigned int flags = (cache_level == I915_CACHE_NONE) ?
+		AGP_USER_MEMORY : AGP_USER_CACHED_MEMORY;
+
+	BUG_ON(!i915_is_ggtt(vma->vm));
+	intel_gtt_insert_sg_entries(vma->obj->pages, entry, flags);
+	vma->obj->has_global_gtt_mapping = 1;
+}
+
 static void i915_ggtt_clear_range(struct i915_address_space *vm,
 				  unsigned int first_entry,
 				  unsigned int num_entries)
@@ -599,6 +639,53 @@ static void i915_ggtt_clear_range(struct i915_address_space *vm,
 	intel_gtt_clear_range(first_entry, num_entries);
 }
 
+static void i915_ggtt_unbind_vma(struct i915_vma *vma)
+{
+	const unsigned int first = vma->node.start >> PAGE_SHIFT;
+	const unsigned int size = vma->obj->base.size >> PAGE_SHIFT;
+
+	BUG_ON(!i915_is_ggtt(vma->vm));
+	vma->obj->has_global_gtt_mapping = 0;
+	intel_gtt_clear_range(first, size);
+}
+
+static void gen6_ggtt_bind_vma(struct i915_vma *vma,
+			       enum i915_cache_level cache_level,
+			       u32 flags)
+{
+	struct drm_device *dev = vma->vm->dev;
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	struct drm_i915_gem_object *obj = vma->obj;
+	const unsigned long entry = vma->node.start >> PAGE_SHIFT;
+
+	/* If there is no aliasing PPGTT, or the caller needs a global mapping,
+	 * or we have a global mapping already but the cacheability flags have
+	 * changed, set the global PTEs.
+	 *
+	 * If there is an aliasing PPGTT it is anecdotally faster, so use that
+	 * instead if none of the above hold true.
+	 *
+	 * NB: A global mapping should only be needed for special regions like
+	 * "gtt mappable", SNB errata, or if specified via special execbuf
+	 * flags. At all other times, the GPU will use the aliasing PPGTT.
+	 */
+	if (!dev_priv->mm.aliasing_ppgtt || flags & GLOBAL_BIND) {
+		if (!obj->has_global_gtt_mapping ||
+		    (cache_level != obj->cache_level)) {
+			gen6_ggtt_insert_entries(vma->vm, obj->pages, entry,
+						 cache_level);
+			obj->has_global_gtt_mapping = 1;
+		}
+	}
+
+	if (dev_priv->mm.aliasing_ppgtt &&
+	    (!obj->has_aliasing_ppgtt_mapping ||
+	     (cache_level != obj->cache_level))) {
+		gen6_ppgtt_insert_entries(&dev_priv->mm.aliasing_ppgtt->base,
+					  vma->obj->pages, entry, cache_level);
+		vma->obj->has_aliasing_ppgtt_mapping = 1;
+	}
+}
 
 void i915_gem_gtt_bind_object(struct drm_i915_gem_object *obj,
 			      enum i915_cache_level cache_level)
@@ -627,6 +714,27 @@ void i915_gem_gtt_unbind_object(struct drm_i915_gem_object *obj)
 	obj->has_global_gtt_mapping = 0;
 }
 
+static void gen6_ggtt_unbind_vma(struct i915_vma *vma)
+{
+	struct drm_device *dev = vma->vm->dev;
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	struct drm_i915_gem_object *obj = vma->obj;
+	const unsigned long entry = vma->node.start >> PAGE_SHIFT;
+
+	if (obj->has_global_gtt_mapping) {
+		gen6_ggtt_clear_range(vma->vm, entry,
+				      vma->obj->base.size >> PAGE_SHIFT);
+		obj->has_global_gtt_mapping = 0;
+	}
+
+	if (obj->has_aliasing_ppgtt_mapping) {
+		gen6_ppgtt_clear_range(&dev_priv->mm.aliasing_ppgtt->base,
+				       entry,
+				       obj->base.size >> PAGE_SHIFT);
+		obj->has_aliasing_ppgtt_mapping = 0;
+	}
+}
+
 void i915_gem_gtt_finish_object(struct drm_i915_gem_object *obj)
 {
 	struct drm_device *dev = obj->base.dev;
@@ -860,7 +968,9 @@ static int gen6_gmch_probe(struct drm_device *dev,
 		DRM_ERROR("Scratch setup failed\n");
 
 	dev_priv->gtt.base.clear_range = gen6_ggtt_clear_range;
+	dev_priv->gtt.base.unbind_vma = gen6_ggtt_unbind_vma;
 	dev_priv->gtt.base.insert_entries = gen6_ggtt_insert_entries;
+	dev_priv->gtt.base.bind_vma = gen6_ggtt_bind_vma;
 
 	return ret;
 }
@@ -892,7 +1002,9 @@ static int i915_gmch_probe(struct drm_device *dev,
 
 	dev_priv->gtt.do_idle_maps = needs_idle_maps(dev_priv->dev);
 	dev_priv->gtt.base.clear_range = i915_ggtt_clear_range;
+	dev_priv->gtt.base.unbind_vma = i915_ggtt_unbind_vma;
 	dev_priv->gtt.base.insert_entries = i915_ggtt_insert_entries;
+	dev_priv->gtt.base.bind_vma = i915_ggtt_bind_vma;
 
 	return 0;
 }
-- 
1.8.4

^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [PATCH 5/6] drm/i915: Use the new vm [un]bind functions
  2013-09-24 16:57 [PATCH 1/6] drm/i915: trace vm eviction instead of everything Ben Widawsky
                   ` (2 preceding siblings ...)
  2013-09-24 16:57 ` [PATCH 4/6] drm/i915: Add bind/unbind object functions to VM Ben Widawsky
@ 2013-09-24 16:58 ` Ben Widawsky
  2013-09-25 11:42   ` Daniel Vetter
  2013-09-24 16:58 ` [PATCH 6/6] drm/i915: eliminate vm->insert_entries() Ben Widawsky
  2013-09-24 19:14 ` [PATCH 1/6] drm/i915: trace vm eviction instead of everything Daniel Vetter
  5 siblings, 1 reply; 11+ messages in thread
From: Ben Widawsky @ 2013-09-24 16:58 UTC (permalink / raw)
  To: Intel GFX; +Cc: Ben Widawsky

From: Ben Widawsky <ben@bwidawsk.net>

Building on the last patch which created the new function pointers in
the VM for bind/unbind, here we actually put those new function pointers
to use.

Split out as a separate patch to aid in review. I'm fine with squashing
into the previous patch if people request it.

v2: Updated to address the smart ggtt which can do aliasing as needed
Make sure we bind to global gtt when mappable and fenceable. I thought
we could get away without this initialy, but we cannot.

v3: Make the global GTT binding explicitly use the ggtt VM for
bind_vma(). While at it, use the new ggtt_vma helper (Chris)

v4: Make the code support the secure dispatch flag, which requires
special handling during execbuf. This was fixed (incorrectly) later in
the series, but having it here earlier in the series should be perfectly
acceptable. (Chris)
Move do_switch over to the new, special ggtt_vma interface.

v5: Don't use a local variable (or assertion) when setting the batch
object to the global GTT during secure dispatch (Chris)

v6: Caclulate the exec offset for the secure case (Bug fix missed on
v4). (Chris)
Remove redundant check for has_global_gtt_mapping, since it is done in
bind_vma.

v7: Remove now unused dev_priv in do_switch
Don't pass the vm to ggtt_offset (error from v6 which I should have
caught before sending).

v8: Assert, and rework the SNB workaround (to make it a bit clearer)
code to make sure the VM can't be anything but the GGTT.

v9: Fixing more bugs which can't exist yet on the behest of Chris. Make
sure that the batch object is properly bound, and added to the global
VM's active list - for when we use non-global VMs. (Chris) Note that
there is an ongoing confusion about what bugs existed, but the "known"
bugs are fixed here.

v10: Nitpicks on whitespacing etc. introduced in v9 (Chris)

Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
---
 drivers/gpu/drm/i915/i915_drv.h            |  9 -----
 drivers/gpu/drm/i915/i915_gem.c            | 33 +++++++---------
 drivers/gpu/drm/i915/i915_gem_context.c    |  6 ++-
 drivers/gpu/drm/i915/i915_gem_execbuffer.c | 61 ++++++++++++++++++++----------
 drivers/gpu/drm/i915/i915_gem_gtt.c        | 48 ++---------------------
 5 files changed, 62 insertions(+), 95 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 9995cdb..e8ae8fd 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2102,17 +2102,8 @@ int i915_gem_context_destroy_ioctl(struct drm_device *dev, void *data,
 
 /* i915_gem_gtt.c */
 void i915_gem_cleanup_aliasing_ppgtt(struct drm_device *dev);
-void i915_ppgtt_bind_object(struct i915_hw_ppgtt *ppgtt,
-			    struct drm_i915_gem_object *obj,
-			    enum i915_cache_level cache_level);
-void i915_ppgtt_unbind_object(struct i915_hw_ppgtt *ppgtt,
-			      struct drm_i915_gem_object *obj);
-
 void i915_gem_restore_gtt_mappings(struct drm_device *dev);
 int __must_check i915_gem_gtt_prepare_object(struct drm_i915_gem_object *obj);
-void i915_gem_gtt_bind_object(struct drm_i915_gem_object *obj,
-				enum i915_cache_level cache_level);
-void i915_gem_gtt_unbind_object(struct drm_i915_gem_object *obj);
 void i915_gem_gtt_finish_object(struct drm_i915_gem_object *obj);
 void i915_gem_init_global_gtt(struct drm_device *dev);
 void i915_gem_setup_global_gtt(struct drm_device *dev, unsigned long start,
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index f6c8b0e..378d4ef 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2693,12 +2693,8 @@ int i915_vma_unbind(struct i915_vma *vma)
 
 	trace_i915_vma_unbind(vma);
 
-	if (obj->has_global_gtt_mapping)
-		i915_gem_gtt_unbind_object(obj);
-	if (obj->has_aliasing_ppgtt_mapping) {
-		i915_ppgtt_unbind_object(dev_priv->mm.aliasing_ppgtt, obj);
-		obj->has_aliasing_ppgtt_mapping = 0;
-	}
+	vma->vm->unbind_vma(vma);
+
 	i915_gem_gtt_finish_object(obj);
 	i915_gem_object_unpin_pages(obj);
 
@@ -3155,7 +3151,7 @@ static void i915_gem_verify_gtt(struct drm_device *dev)
 /**
  * Finds free space in the GTT aperture and binds the object there.
  */
-static int
+int
 i915_gem_object_bind_to_vm(struct drm_i915_gem_object *obj,
 			   struct i915_address_space *vm,
 			   unsigned alignment,
@@ -3424,7 +3420,6 @@ int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
 				    enum i915_cache_level cache_level)
 {
 	struct drm_device *dev = obj->base.dev;
-	drm_i915_private_t *dev_priv = dev->dev_private;
 	struct i915_vma *vma;
 	int ret;
 
@@ -3463,11 +3458,8 @@ int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
 				return ret;
 		}
 
-		if (obj->has_global_gtt_mapping)
-			i915_gem_gtt_bind_object(obj, cache_level);
-		if (obj->has_aliasing_ppgtt_mapping)
-			i915_ppgtt_bind_object(dev_priv->mm.aliasing_ppgtt,
-					       obj, cache_level);
+		list_for_each_entry(vma, &obj->vma_list, vma_link)
+			vma->vm->bind_vma(vma, cache_level, 0);
 	}
 
 	list_for_each_entry(vma, &obj->vma_list, vma_link)
@@ -3795,6 +3787,7 @@ i915_gem_object_pin(struct drm_i915_gem_object *obj,
 		    bool map_and_fenceable,
 		    bool nonblocking)
 {
+	const u32 flags = map_and_fenceable ? GLOBAL_BIND : 0;
 	struct i915_vma *vma;
 	int ret;
 
@@ -3823,20 +3816,22 @@ i915_gem_object_pin(struct drm_i915_gem_object *obj,
 	}
 
 	if (!i915_gem_obj_bound(obj, vm)) {
-		struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
-
 		ret = i915_gem_object_bind_to_vm(obj, vm, alignment,
 						 map_and_fenceable,
 						 nonblocking);
 		if (ret)
 			return ret;
 
-		if (!dev_priv->mm.aliasing_ppgtt)
-			i915_gem_gtt_bind_object(obj, obj->cache_level);
-	}
+		vma = i915_gem_obj_to_vma(obj, vm);
+		vm->bind_vma(vma, obj->cache_level, flags);
+	} else
+		vma = i915_gem_obj_to_vma(obj, vm);
 
+	/* Objects are created map and fenceable. If we bind an object
+	 * the first time, and we had aliasing PPGTT (and didn't request
+	 * GLOBAL), we'll need to do this on the second bind.*/
 	if (!obj->has_global_gtt_mapping && map_and_fenceable)
-		i915_gem_gtt_bind_object(obj, obj->cache_level);
+		vm->bind_vma(vma, obj->cache_level, GLOBAL_BIND);
 
 	obj->pin_count++;
 	obj->pin_mappable |= map_and_fenceable;
diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
index 1a877a5..d4eb88a 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -422,8 +422,10 @@ static int do_switch(struct i915_hw_context *to)
 		return ret;
 	}
 
-	if (!to->obj->has_global_gtt_mapping)
-		i915_gem_gtt_bind_object(to->obj, to->obj->cache_level);
+	if (!to->obj->has_global_gtt_mapping) {
+		struct i915_vma *vma = i915_gem_obj_to_ggtt(to->obj);
+		vma->vm->bind_vma(vma, to->obj->cache_level, GLOBAL_BIND);
+	}
 
 	if (!to->is_initialized || is_default_context(to))
 		hw_flags |= MI_RESTORE_INHIBIT;
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 0ce0d47..88c924f 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -286,8 +286,14 @@ i915_gem_execbuffer_relocate_entry(struct drm_i915_gem_object *obj,
 	if (unlikely(IS_GEN6(dev) &&
 	    reloc->write_domain == I915_GEM_DOMAIN_INSTRUCTION &&
 	    !target_i915_obj->has_global_gtt_mapping)) {
-		i915_gem_gtt_bind_object(target_i915_obj,
-					 target_i915_obj->cache_level);
+		/* SNB shall not support full PPGTT. This path can only be taken
+		 * when the VM is the GGTT (aliasing PPGTT is not a real VM, and
+		 * therefore doesn't count).
+		 */
+		BUG_ON(vm != obj_to_ggtt(target_i915_obj));
+		vm->bind_vma(i915_gem_obj_to_ggtt(target_i915_obj),
+			     target_i915_obj->cache_level,
+			     GLOBAL_BIND);
 	}
 
 	/* Validate that the target is in a valid r/w GPU domain */
@@ -464,11 +470,12 @@ i915_gem_execbuffer_reserve_vma(struct i915_vma *vma,
 				struct intel_ring_buffer *ring,
 				bool *need_reloc)
 {
-	struct drm_i915_private *dev_priv = ring->dev->dev_private;
+	struct drm_i915_gem_object *obj = vma->obj;
 	struct drm_i915_gem_exec_object2 *entry = vma->exec_entry;
 	bool has_fenced_gpu_access = INTEL_INFO(ring->dev)->gen < 4;
 	bool need_fence, need_mappable;
-	struct drm_i915_gem_object *obj = vma->obj;
+	u32 flags = (entry->flags & EXEC_OBJECT_NEEDS_GTT) &&
+		!vma->obj->has_global_gtt_mapping ? GLOBAL_BIND : 0;
 	int ret;
 
 	need_fence =
@@ -497,14 +504,6 @@ i915_gem_execbuffer_reserve_vma(struct i915_vma *vma,
 		}
 	}
 
-	/* Ensure ppgtt mapping exists if needed */
-	if (dev_priv->mm.aliasing_ppgtt && !obj->has_aliasing_ppgtt_mapping) {
-		i915_ppgtt_bind_object(dev_priv->mm.aliasing_ppgtt,
-				       obj, obj->cache_level);
-
-		obj->has_aliasing_ppgtt_mapping = 1;
-	}
-
 	if (entry->offset != vma->node.start) {
 		entry->offset = vma->node.start;
 		*need_reloc = true;
@@ -515,9 +514,7 @@ i915_gem_execbuffer_reserve_vma(struct i915_vma *vma,
 		obj->base.pending_write_domain = I915_GEM_DOMAIN_RENDER;
 	}
 
-	if (entry->flags & EXEC_OBJECT_NEEDS_GTT &&
-	    !obj->has_global_gtt_mapping)
-		i915_gem_gtt_bind_object(obj, obj->cache_level);
+	vma->vm->bind_vma(vma, obj->cache_level, flags);
 
 	return 0;
 }
@@ -936,7 +933,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
 	struct intel_ring_buffer *ring;
 	struct i915_ctx_hang_stats *hs;
 	u32 ctx_id = i915_execbuffer2_get_context_id(*args);
-	u32 exec_start, exec_len;
+	u32 exec_len, exec_start = args->batch_start_offset;
 	u32 mask, flags;
 	int ret, mode, i;
 	bool need_relocs;
@@ -1118,8 +1115,34 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
 	 * batch" bit. Hence we need to pin secure batches into the global gtt.
 	 * hsw should have this fixed, but let's be paranoid and do it
 	 * unconditionally for now. */
-	if (flags & I915_DISPATCH_SECURE && !batch_obj->has_global_gtt_mapping)
-		i915_gem_gtt_bind_object(batch_obj, batch_obj->cache_level);
+	if (flags & I915_DISPATCH_SECURE) {
+		struct i915_address_space *ggtt = obj_to_ggtt(batch_obj);
+
+		/* Assuming all privileged batches are in the global GTT means
+		 * we need to make sure we have a global gtt offset, as well as
+		 * the PTEs mapped. As mentioned above, we can forego this on
+		 * HSW, but don't.
+		 */
+
+		ret = i915_gem_obj_ggtt_pin(batch_obj, 0, false, false);
+		if (ret)
+			goto err;
+
+		ggtt->bind_vma(i915_gem_obj_to_ggtt(batch_obj),
+			       batch_obj->cache_level,
+			       GLOBAL_BIND);
+
+		/* Since the active list is per VM, we need to make sure this
+		 * VMA ends up on the GGTT's active list to avoid premature
+		 * eviction.
+		 */
+		i915_vma_move_to_active(i915_gem_obj_to_ggtt(batch_obj), ring);
+
+		i915_gem_object_unpin(batch_obj);
+
+		exec_start += i915_gem_obj_ggtt_offset(batch_obj);
+	} else
+		exec_start += i915_gem_obj_offset(batch_obj, vm);
 
 	ret = i915_gem_execbuffer_move_to_gpu(ring, &eb->vmas);
 	if (ret)
@@ -1161,8 +1184,6 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
 			goto err;
 	}
 
-	exec_start = i915_gem_obj_offset(batch_obj, vm) +
-		args->batch_start_offset;
 	exec_len = args->batch_len;
 	if (cliprects) {
 		for (i = 0; i < args->num_cliprects; i++) {
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 65b61d4..e053f14 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -437,15 +437,6 @@ void i915_gem_cleanup_aliasing_ppgtt(struct drm_device *dev)
 	dev_priv->mm.aliasing_ppgtt = NULL;
 }
 
-void i915_ppgtt_bind_object(struct i915_hw_ppgtt *ppgtt,
-			    struct drm_i915_gem_object *obj,
-			    enum i915_cache_level cache_level)
-{
-	ppgtt->base.insert_entries(&ppgtt->base, obj->pages,
-				   i915_gem_obj_ggtt_offset(obj) >> PAGE_SHIFT,
-				   cache_level);
-}
-
 static void __always_unused
 gen6_ppgtt_bind_vma(struct i915_vma *vma,
 		    enum i915_cache_level cache_level,
@@ -458,14 +449,6 @@ gen6_ppgtt_bind_vma(struct i915_vma *vma,
 	gen6_ppgtt_insert_entries(vma->vm, vma->obj->pages, entry, cache_level);
 }
 
-void i915_ppgtt_unbind_object(struct i915_hw_ppgtt *ppgtt,
-			      struct drm_i915_gem_object *obj)
-{
-	ppgtt->base.clear_range(&ppgtt->base,
-				i915_gem_obj_ggtt_offset(obj) >> PAGE_SHIFT,
-				obj->base.size >> PAGE_SHIFT);
-}
-
 static void __always_unused gen6_ppgtt_unbind_vma(struct i915_vma *vma)
 {
 	const unsigned long entry = vma->node.start >> PAGE_SHIFT;
@@ -523,8 +506,10 @@ void i915_gem_restore_gtt_mappings(struct drm_device *dev)
 				       dev_priv->gtt.base.total / PAGE_SIZE);
 
 	list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) {
+		struct i915_vma *vma = i915_gem_obj_to_vma(obj,
+							   &dev_priv->gtt.base);
 		i915_gem_clflush_object(obj, obj->pin_display);
-		i915_gem_gtt_bind_object(obj, obj->cache_level);
+		vma->vm->bind_vma(vma, obj->cache_level, 0);
 	}
 
 	i915_gem_chipset_flush(dev);
@@ -687,33 +672,6 @@ static void gen6_ggtt_bind_vma(struct i915_vma *vma,
 	}
 }
 
-void i915_gem_gtt_bind_object(struct drm_i915_gem_object *obj,
-			      enum i915_cache_level cache_level)
-{
-	struct drm_device *dev = obj->base.dev;
-	struct drm_i915_private *dev_priv = dev->dev_private;
-	const unsigned long entry = i915_gem_obj_ggtt_offset(obj) >> PAGE_SHIFT;
-
-	dev_priv->gtt.base.insert_entries(&dev_priv->gtt.base, obj->pages,
-					  entry,
-					  cache_level);
-
-	obj->has_global_gtt_mapping = 1;
-}
-
-void i915_gem_gtt_unbind_object(struct drm_i915_gem_object *obj)
-{
-	struct drm_device *dev = obj->base.dev;
-	struct drm_i915_private *dev_priv = dev->dev_private;
-	const unsigned long entry = i915_gem_obj_ggtt_offset(obj) >> PAGE_SHIFT;
-
-	dev_priv->gtt.base.clear_range(&dev_priv->gtt.base,
-				       entry,
-				       obj->base.size >> PAGE_SHIFT);
-
-	obj->has_global_gtt_mapping = 0;
-}
-
 static void gen6_ggtt_unbind_vma(struct i915_vma *vma)
 {
 	struct drm_device *dev = vma->vm->dev;
-- 
1.8.4

^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [PATCH 6/6] drm/i915: eliminate vm->insert_entries()
  2013-09-24 16:57 [PATCH 1/6] drm/i915: trace vm eviction instead of everything Ben Widawsky
                   ` (3 preceding siblings ...)
  2013-09-24 16:58 ` [PATCH 5/6] drm/i915: Use the new vm [un]bind functions Ben Widawsky
@ 2013-09-24 16:58 ` Ben Widawsky
  2013-09-24 19:14 ` [PATCH 1/6] drm/i915: trace vm eviction instead of everything Daniel Vetter
  5 siblings, 0 replies; 11+ messages in thread
From: Ben Widawsky @ 2013-09-24 16:58 UTC (permalink / raw)
  To: Intel GFX; +Cc: Ben Widawsky

From: Ben Widawsky <ben@bwidawsk.net>

With bind/unbind function pointers in place, we no longer need
insert_entries. We could, and want, to remove clear_range, however it's
not totally easy at this point. Since it's used in a couple of place
still that don't only deal in objects: setup, ppgtt init, and restore
gtt mappings.

Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
---
 drivers/gpu/drm/i915/i915_gem_gtt.c | 17 +----------------
 1 file changed, 1 insertion(+), 16 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index e053f14..e763d48 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -339,8 +339,8 @@ static int gen6_ppgtt_init(struct i915_hw_ppgtt *ppgtt)
 	ppgtt->enable = gen6_ppgtt_enable;
 	ppgtt->base.unbind_vma = NULL;
 	ppgtt->base.clear_range = gen6_ppgtt_clear_range;
-	ppgtt->base.bind_vma = NULL;
 	ppgtt->base.insert_entries = gen6_ppgtt_insert_entries;
+	ppgtt->base.bind_vma = NULL;
 	ppgtt->base.cleanup = gen6_ppgtt_cleanup;
 	ppgtt->base.scratch = dev_priv->gtt.base.scratch;
 	ppgtt->pt_pages = kcalloc(ppgtt->num_pd_entries, sizeof(struct page *),
@@ -591,19 +591,6 @@ static void gen6_ggtt_clear_range(struct i915_address_space *vm,
 	readl(gtt_base);
 }
 
-
-static void i915_ggtt_insert_entries(struct i915_address_space *vm,
-				     struct sg_table *st,
-				     unsigned int pg_start,
-				     enum i915_cache_level cache_level)
-{
-	unsigned int flags = (cache_level == I915_CACHE_NONE) ?
-		AGP_USER_MEMORY : AGP_USER_CACHED_MEMORY;
-
-	intel_gtt_insert_sg_entries(st, pg_start, flags);
-
-}
-
 static void i915_ggtt_bind_vma(struct i915_vma *vma,
 			       enum i915_cache_level cache_level,
 			       u32 unused)
@@ -927,7 +914,6 @@ static int gen6_gmch_probe(struct drm_device *dev,
 
 	dev_priv->gtt.base.clear_range = gen6_ggtt_clear_range;
 	dev_priv->gtt.base.unbind_vma = gen6_ggtt_unbind_vma;
-	dev_priv->gtt.base.insert_entries = gen6_ggtt_insert_entries;
 	dev_priv->gtt.base.bind_vma = gen6_ggtt_bind_vma;
 
 	return ret;
@@ -961,7 +947,6 @@ static int i915_gmch_probe(struct drm_device *dev,
 	dev_priv->gtt.do_idle_maps = needs_idle_maps(dev_priv->dev);
 	dev_priv->gtt.base.clear_range = i915_ggtt_clear_range;
 	dev_priv->gtt.base.unbind_vma = i915_ggtt_unbind_vma;
-	dev_priv->gtt.base.insert_entries = i915_ggtt_insert_entries;
 	dev_priv->gtt.base.bind_vma = i915_ggtt_bind_vma;
 
 	return 0;
-- 
1.8.4

^ permalink raw reply related	[flat|nested] 11+ messages in thread

* Re: [PATCH 2/6] drm/i915: Provide a cheap ggtt vma lookup
  2013-09-24 16:57 ` [PATCH 2/6] drm/i915: Provide a cheap ggtt vma lookup Ben Widawsky
@ 2013-09-24 19:11   ` Daniel Vetter
  0 siblings, 0 replies; 11+ messages in thread
From: Daniel Vetter @ 2013-09-24 19:11 UTC (permalink / raw)
  To: Ben Widawsky; +Cc: Intel GFX, Ben Widawsky

On Tue, Sep 24, 2013 at 09:57:57AM -0700, Ben Widawsky wrote:
> "We do fairly often lookup the ggtt vma for an obj." - Chris Wilson. As
> such, provide a function to offer slightly cheaper access to the vma.
> Not performance tested. By my quick estimation it saves at least 3
> pointer dereferences from the existing mechanism.
> 
> This patch mostly matches code from Chris in
> <20130911221430.GB7825@nuc-i3427.alporthouse.com>
> 
> Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
> Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
> ---
>  drivers/gpu/drm/i915/i915_drv.h |  4 +++-
>  drivers/gpu/drm/i915/i915_gem.c | 17 +++++++++++++++--
>  2 files changed, 18 insertions(+), 3 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 07de53c..67a31c2 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -2026,6 +2026,9 @@ struct i915_vma *i915_gem_obj_to_vma(struct drm_i915_gem_object *obj,
>  struct i915_vma *
>  i915_gem_obj_lookup_or_create_vma(struct drm_i915_gem_object *obj,
>  				  struct i915_address_space *vm);
> +
> +struct i915_vma *i915_gem_obj_to_ggtt(struct drm_i915_gem_object *obj);
> +
>  /* Some GGTT VM helpers */
>  #define obj_to_ggtt(obj) \
>  	(&((struct drm_i915_private *)(obj)->base.dev->dev_private)->gtt.base)
> @@ -2062,7 +2065,6 @@ i915_gem_obj_ggtt_pin(struct drm_i915_gem_object *obj,
>  	return i915_gem_object_pin(obj, obj_to_ggtt(obj), alignment,
>  				   map_and_fenceable, nonblocking);
>  }
> -#undef obj_to_ggtt

Bikeshed police: obj_to_gtt should probably be a static inline function
and also might need a bit a prefix now ...
-Daniel

>  
>  /* i915_gem_context.c */
>  void i915_gem_context_init(struct drm_device *dev);
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index 8450724..6b134f0 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -3403,8 +3403,7 @@ i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write)
>  
>  	/* And bump the LRU for this access */
>  	if (i915_gem_object_is_inactive(obj)) {
> -		struct i915_vma *vma = i915_gem_obj_to_vma(obj,
> -							   &dev_priv->gtt.base);
> +		struct i915_vma *vma = i915_gem_obj_to_ggtt(obj);
>  		if (vma)
>  			list_move_tail(&vma->mm_list,
>  				       &dev_priv->gtt.base.inactive_list);
> @@ -4940,3 +4939,17 @@ unsigned long i915_gem_obj_size(struct drm_i915_gem_object *o,
>  
>  	return 0;
>  }
> +
> +struct i915_vma *i915_gem_obj_to_ggtt(struct drm_i915_gem_object *obj)
> +{
> +	struct i915_vma *vma;
> +
> +	if (WARN_ON(list_empty(&obj->vma_list)))
> +		return NULL;
> +
> +	vma = list_first_entry(&obj->vma_list, typeof(*vma), vma_link);
> +	if (WARN_ON(vma->vm != obj_to_ggtt(obj)))
> +		return NULL;
> +
> +	return vma;
> +}
> -- 
> 1.8.4
> 
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/intel-gfx

-- 
Daniel Vetter
Software Engineer, Intel Corporation
+41 (0) 79 365 57 48 - http://blog.ffwll.ch

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH 1/6] drm/i915: trace vm eviction instead of everything
  2013-09-24 16:57 [PATCH 1/6] drm/i915: trace vm eviction instead of everything Ben Widawsky
                   ` (4 preceding siblings ...)
  2013-09-24 16:58 ` [PATCH 6/6] drm/i915: eliminate vm->insert_entries() Ben Widawsky
@ 2013-09-24 19:14 ` Daniel Vetter
  5 siblings, 0 replies; 11+ messages in thread
From: Daniel Vetter @ 2013-09-24 19:14 UTC (permalink / raw)
  To: Ben Widawsky; +Cc: Intel GFX, Ben Widawsky

On Tue, Sep 24, 2013 at 09:57:56AM -0700, Ben Widawsky wrote:
> Tracing vm eviction is really the event we care about. For the cases we
> evict everything, we still will get the trace.
> 
> v2: Add the drm device to the trace since we might not be the only
> device in the system. (Chris)
> 
> Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
> Signed-off-by: Ben Widawsky <ben@bwidawsk.net>

I've slurped in the entire patch series. Besides the one comment I've
mentioned in my direct reply to the patch it concerns my maintainer script
spotted a few more BUG_ONs. You'll score a "I told you so" if we blow up
there and get a hard-to-debug report ;-)

Thanks for the patches,
-Daniel

> ---
>  drivers/gpu/drm/i915/i915_gem_evict.c |  2 ++
>  drivers/gpu/drm/i915/i915_trace.h     | 15 +++++++++++++++
>  2 files changed, 17 insertions(+)
> 
> diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c b/drivers/gpu/drm/i915/i915_gem_evict.c
> index 3a3981e..b737653 100644
> --- a/drivers/gpu/drm/i915/i915_gem_evict.c
> +++ b/drivers/gpu/drm/i915/i915_gem_evict.c
> @@ -175,6 +175,8 @@ int i915_gem_evict_vm(struct i915_address_space *vm, bool do_idle)
>  	struct i915_vma *vma, *next;
>  	int ret;
>  
> +	trace_i915_gem_evict_vm(vm);
> +
>  	if (do_idle) {
>  		ret = i915_gpu_idle(vm->dev);
>  		if (ret)
> diff --git a/drivers/gpu/drm/i915/i915_trace.h b/drivers/gpu/drm/i915/i915_trace.h
> index e2c5ee6..403309b 100644
> --- a/drivers/gpu/drm/i915/i915_trace.h
> +++ b/drivers/gpu/drm/i915/i915_trace.h
> @@ -233,6 +233,21 @@ TRACE_EVENT(i915_gem_evict_everything,
>  	    TP_printk("dev=%d", __entry->dev)
>  );
>  
> +TRACE_EVENT(i915_gem_evict_vm,
> +	    TP_PROTO(struct i915_address_space *vm),
> +	    TP_ARGS(vm),
> +
> +	    TP_STRUCT__entry(
> +			     __field(struct i915_address_space *, vm)
> +			    ),
> +
> +	    TP_fast_assign(
> +			   __entry->vm = vm;
> +			  ),
> +
> +	    TP_printk("dev=%d, vm=%p", __entry->vm->dev->primary->index, __entry->vm)
> +);
> +
>  TRACE_EVENT(i915_gem_ring_dispatch,
>  	    TP_PROTO(struct intel_ring_buffer *ring, u32 seqno, u32 flags),
>  	    TP_ARGS(ring, seqno, flags),
> -- 
> 1.8.4
> 
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/intel-gfx

-- 
Daniel Vetter
Software Engineer, Intel Corporation
+41 (0) 79 365 57 48 - http://blog.ffwll.ch

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH 5/6] drm/i915: Use the new vm [un]bind functions
  2013-09-24 16:58 ` [PATCH 5/6] drm/i915: Use the new vm [un]bind functions Ben Widawsky
@ 2013-09-25 11:42   ` Daniel Vetter
  2013-09-25 17:19     ` Ben Widawsky
  0 siblings, 1 reply; 11+ messages in thread
From: Daniel Vetter @ 2013-09-25 11:42 UTC (permalink / raw)
  To: Ben Widawsky; +Cc: Intel GFX, Ben Widawsky

On Tue, Sep 24, 2013 at 09:58:00AM -0700, Ben Widawsky wrote:
> From: Ben Widawsky <ben@bwidawsk.net>
> 
> Building on the last patch which created the new function pointers in
> the VM for bind/unbind, here we actually put those new function pointers
> to use.
> 
> Split out as a separate patch to aid in review. I'm fine with squashing
> into the previous patch if people request it.
> 
> v2: Updated to address the smart ggtt which can do aliasing as needed
> Make sure we bind to global gtt when mappable and fenceable. I thought
> we could get away without this initialy, but we cannot.
> 
> v3: Make the global GTT binding explicitly use the ggtt VM for
> bind_vma(). While at it, use the new ggtt_vma helper (Chris)
> 
> v4: Make the code support the secure dispatch flag, which requires
> special handling during execbuf. This was fixed (incorrectly) later in
> the series, but having it here earlier in the series should be perfectly
> acceptable. (Chris)
> Move do_switch over to the new, special ggtt_vma interface.
> 
> v5: Don't use a local variable (or assertion) when setting the batch
> object to the global GTT during secure dispatch (Chris)
> 
> v6: Caclulate the exec offset for the secure case (Bug fix missed on
> v4). (Chris)
> Remove redundant check for has_global_gtt_mapping, since it is done in
> bind_vma.
> 
> v7: Remove now unused dev_priv in do_switch
> Don't pass the vm to ggtt_offset (error from v6 which I should have
> caught before sending).
> 
> v8: Assert, and rework the SNB workaround (to make it a bit clearer)
> code to make sure the VM can't be anything but the GGTT.
> 
> v9: Fixing more bugs which can't exist yet on the behest of Chris. Make
> sure that the batch object is properly bound, and added to the global
> VM's active list - for when we use non-global VMs. (Chris) Note that
> there is an ongoing confusion about what bugs existed, but the "known"
> bugs are fixed here.
> 
> v10: Nitpicks on whitespacing etc. introduced in v9 (Chris)
> 
> Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
> Signed-off-by: Ben Widawsky <ben@bwidawsk.net>

Big rant here on two topics:

First splitting a patch into a "write new functions, but leave them as
dead code" (the previous patch) and "kill old code, use new functions"
(this patch) is imo pointless. It hampers reviewing (since you can't
compare old and new logic easily because it's split into two patches) and
it adds zero value for bisecting: Compile-testing dead code isn't a useful
additional checkpoint.

Check out Ville's vlv dpll refactoring for a great split-up patch series.

Second is the death of the ->insert_entries/->clear_range vfuncs. We
_need_ them in the internal tree and you really can't just kill them. If
you want to, you need to follow three steps:

1. Create new interfaces in the public tree, use the on public platforms
but keeep the old interfaces working.

2. Convert the -internal platforms over.

3. Remove old interface.

Doing 3. before 2. is bonghits and will result in the internal tree being
broken a bit in-between. Since I'm supposed to maintain that I'll undo the
damage here to be able to do a rebase.

Cheers, Daniel

> ---
>  drivers/gpu/drm/i915/i915_drv.h            |  9 -----
>  drivers/gpu/drm/i915/i915_gem.c            | 33 +++++++---------
>  drivers/gpu/drm/i915/i915_gem_context.c    |  6 ++-
>  drivers/gpu/drm/i915/i915_gem_execbuffer.c | 61 ++++++++++++++++++++----------
>  drivers/gpu/drm/i915/i915_gem_gtt.c        | 48 ++---------------------
>  5 files changed, 62 insertions(+), 95 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 9995cdb..e8ae8fd 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -2102,17 +2102,8 @@ int i915_gem_context_destroy_ioctl(struct drm_device *dev, void *data,
>  
>  /* i915_gem_gtt.c */
>  void i915_gem_cleanup_aliasing_ppgtt(struct drm_device *dev);
> -void i915_ppgtt_bind_object(struct i915_hw_ppgtt *ppgtt,
> -			    struct drm_i915_gem_object *obj,
> -			    enum i915_cache_level cache_level);
> -void i915_ppgtt_unbind_object(struct i915_hw_ppgtt *ppgtt,
> -			      struct drm_i915_gem_object *obj);
> -
>  void i915_gem_restore_gtt_mappings(struct drm_device *dev);
>  int __must_check i915_gem_gtt_prepare_object(struct drm_i915_gem_object *obj);
> -void i915_gem_gtt_bind_object(struct drm_i915_gem_object *obj,
> -				enum i915_cache_level cache_level);
> -void i915_gem_gtt_unbind_object(struct drm_i915_gem_object *obj);
>  void i915_gem_gtt_finish_object(struct drm_i915_gem_object *obj);
>  void i915_gem_init_global_gtt(struct drm_device *dev);
>  void i915_gem_setup_global_gtt(struct drm_device *dev, unsigned long start,
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index f6c8b0e..378d4ef 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -2693,12 +2693,8 @@ int i915_vma_unbind(struct i915_vma *vma)
>  
>  	trace_i915_vma_unbind(vma);
>  
> -	if (obj->has_global_gtt_mapping)
> -		i915_gem_gtt_unbind_object(obj);
> -	if (obj->has_aliasing_ppgtt_mapping) {
> -		i915_ppgtt_unbind_object(dev_priv->mm.aliasing_ppgtt, obj);
> -		obj->has_aliasing_ppgtt_mapping = 0;
> -	}
> +	vma->vm->unbind_vma(vma);
> +
>  	i915_gem_gtt_finish_object(obj);
>  	i915_gem_object_unpin_pages(obj);
>  
> @@ -3155,7 +3151,7 @@ static void i915_gem_verify_gtt(struct drm_device *dev)
>  /**
>   * Finds free space in the GTT aperture and binds the object there.
>   */
> -static int
> +int
>  i915_gem_object_bind_to_vm(struct drm_i915_gem_object *obj,
>  			   struct i915_address_space *vm,
>  			   unsigned alignment,
> @@ -3424,7 +3420,6 @@ int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
>  				    enum i915_cache_level cache_level)
>  {
>  	struct drm_device *dev = obj->base.dev;
> -	drm_i915_private_t *dev_priv = dev->dev_private;
>  	struct i915_vma *vma;
>  	int ret;
>  
> @@ -3463,11 +3458,8 @@ int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
>  				return ret;
>  		}
>  
> -		if (obj->has_global_gtt_mapping)
> -			i915_gem_gtt_bind_object(obj, cache_level);
> -		if (obj->has_aliasing_ppgtt_mapping)
> -			i915_ppgtt_bind_object(dev_priv->mm.aliasing_ppgtt,
> -					       obj, cache_level);
> +		list_for_each_entry(vma, &obj->vma_list, vma_link)
> +			vma->vm->bind_vma(vma, cache_level, 0);
>  	}
>  
>  	list_for_each_entry(vma, &obj->vma_list, vma_link)
> @@ -3795,6 +3787,7 @@ i915_gem_object_pin(struct drm_i915_gem_object *obj,
>  		    bool map_and_fenceable,
>  		    bool nonblocking)
>  {
> +	const u32 flags = map_and_fenceable ? GLOBAL_BIND : 0;
>  	struct i915_vma *vma;
>  	int ret;
>  
> @@ -3823,20 +3816,22 @@ i915_gem_object_pin(struct drm_i915_gem_object *obj,
>  	}
>  
>  	if (!i915_gem_obj_bound(obj, vm)) {
> -		struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
> -
>  		ret = i915_gem_object_bind_to_vm(obj, vm, alignment,
>  						 map_and_fenceable,
>  						 nonblocking);
>  		if (ret)
>  			return ret;
>  
> -		if (!dev_priv->mm.aliasing_ppgtt)
> -			i915_gem_gtt_bind_object(obj, obj->cache_level);
> -	}
> +		vma = i915_gem_obj_to_vma(obj, vm);
> +		vm->bind_vma(vma, obj->cache_level, flags);
> +	} else
> +		vma = i915_gem_obj_to_vma(obj, vm);
>  
> +	/* Objects are created map and fenceable. If we bind an object
> +	 * the first time, and we had aliasing PPGTT (and didn't request
> +	 * GLOBAL), we'll need to do this on the second bind.*/
>  	if (!obj->has_global_gtt_mapping && map_and_fenceable)
> -		i915_gem_gtt_bind_object(obj, obj->cache_level);
> +		vm->bind_vma(vma, obj->cache_level, GLOBAL_BIND);
>  
>  	obj->pin_count++;
>  	obj->pin_mappable |= map_and_fenceable;
> diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
> index 1a877a5..d4eb88a 100644
> --- a/drivers/gpu/drm/i915/i915_gem_context.c
> +++ b/drivers/gpu/drm/i915/i915_gem_context.c
> @@ -422,8 +422,10 @@ static int do_switch(struct i915_hw_context *to)
>  		return ret;
>  	}
>  
> -	if (!to->obj->has_global_gtt_mapping)
> -		i915_gem_gtt_bind_object(to->obj, to->obj->cache_level);
> +	if (!to->obj->has_global_gtt_mapping) {
> +		struct i915_vma *vma = i915_gem_obj_to_ggtt(to->obj);
> +		vma->vm->bind_vma(vma, to->obj->cache_level, GLOBAL_BIND);
> +	}
>  
>  	if (!to->is_initialized || is_default_context(to))
>  		hw_flags |= MI_RESTORE_INHIBIT;
> diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> index 0ce0d47..88c924f 100644
> --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> @@ -286,8 +286,14 @@ i915_gem_execbuffer_relocate_entry(struct drm_i915_gem_object *obj,
>  	if (unlikely(IS_GEN6(dev) &&
>  	    reloc->write_domain == I915_GEM_DOMAIN_INSTRUCTION &&
>  	    !target_i915_obj->has_global_gtt_mapping)) {
> -		i915_gem_gtt_bind_object(target_i915_obj,
> -					 target_i915_obj->cache_level);
> +		/* SNB shall not support full PPGTT. This path can only be taken
> +		 * when the VM is the GGTT (aliasing PPGTT is not a real VM, and
> +		 * therefore doesn't count).
> +		 */
> +		BUG_ON(vm != obj_to_ggtt(target_i915_obj));
> +		vm->bind_vma(i915_gem_obj_to_ggtt(target_i915_obj),
> +			     target_i915_obj->cache_level,
> +			     GLOBAL_BIND);
>  	}
>  
>  	/* Validate that the target is in a valid r/w GPU domain */
> @@ -464,11 +470,12 @@ i915_gem_execbuffer_reserve_vma(struct i915_vma *vma,
>  				struct intel_ring_buffer *ring,
>  				bool *need_reloc)
>  {
> -	struct drm_i915_private *dev_priv = ring->dev->dev_private;
> +	struct drm_i915_gem_object *obj = vma->obj;
>  	struct drm_i915_gem_exec_object2 *entry = vma->exec_entry;
>  	bool has_fenced_gpu_access = INTEL_INFO(ring->dev)->gen < 4;
>  	bool need_fence, need_mappable;
> -	struct drm_i915_gem_object *obj = vma->obj;
> +	u32 flags = (entry->flags & EXEC_OBJECT_NEEDS_GTT) &&
> +		!vma->obj->has_global_gtt_mapping ? GLOBAL_BIND : 0;
>  	int ret;
>  
>  	need_fence =
> @@ -497,14 +504,6 @@ i915_gem_execbuffer_reserve_vma(struct i915_vma *vma,
>  		}
>  	}
>  
> -	/* Ensure ppgtt mapping exists if needed */
> -	if (dev_priv->mm.aliasing_ppgtt && !obj->has_aliasing_ppgtt_mapping) {
> -		i915_ppgtt_bind_object(dev_priv->mm.aliasing_ppgtt,
> -				       obj, obj->cache_level);
> -
> -		obj->has_aliasing_ppgtt_mapping = 1;
> -	}
> -
>  	if (entry->offset != vma->node.start) {
>  		entry->offset = vma->node.start;
>  		*need_reloc = true;
> @@ -515,9 +514,7 @@ i915_gem_execbuffer_reserve_vma(struct i915_vma *vma,
>  		obj->base.pending_write_domain = I915_GEM_DOMAIN_RENDER;
>  	}
>  
> -	if (entry->flags & EXEC_OBJECT_NEEDS_GTT &&
> -	    !obj->has_global_gtt_mapping)
> -		i915_gem_gtt_bind_object(obj, obj->cache_level);
> +	vma->vm->bind_vma(vma, obj->cache_level, flags);
>  
>  	return 0;
>  }
> @@ -936,7 +933,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
>  	struct intel_ring_buffer *ring;
>  	struct i915_ctx_hang_stats *hs;
>  	u32 ctx_id = i915_execbuffer2_get_context_id(*args);
> -	u32 exec_start, exec_len;
> +	u32 exec_len, exec_start = args->batch_start_offset;
>  	u32 mask, flags;
>  	int ret, mode, i;
>  	bool need_relocs;
> @@ -1118,8 +1115,34 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
>  	 * batch" bit. Hence we need to pin secure batches into the global gtt.
>  	 * hsw should have this fixed, but let's be paranoid and do it
>  	 * unconditionally for now. */
> -	if (flags & I915_DISPATCH_SECURE && !batch_obj->has_global_gtt_mapping)
> -		i915_gem_gtt_bind_object(batch_obj, batch_obj->cache_level);
> +	if (flags & I915_DISPATCH_SECURE) {
> +		struct i915_address_space *ggtt = obj_to_ggtt(batch_obj);
> +
> +		/* Assuming all privileged batches are in the global GTT means
> +		 * we need to make sure we have a global gtt offset, as well as
> +		 * the PTEs mapped. As mentioned above, we can forego this on
> +		 * HSW, but don't.
> +		 */
> +
> +		ret = i915_gem_obj_ggtt_pin(batch_obj, 0, false, false);
> +		if (ret)
> +			goto err;
> +
> +		ggtt->bind_vma(i915_gem_obj_to_ggtt(batch_obj),
> +			       batch_obj->cache_level,
> +			       GLOBAL_BIND);
> +
> +		/* Since the active list is per VM, we need to make sure this
> +		 * VMA ends up on the GGTT's active list to avoid premature
> +		 * eviction.
> +		 */
> +		i915_vma_move_to_active(i915_gem_obj_to_ggtt(batch_obj), ring);
> +
> +		i915_gem_object_unpin(batch_obj);
> +
> +		exec_start += i915_gem_obj_ggtt_offset(batch_obj);
> +	} else
> +		exec_start += i915_gem_obj_offset(batch_obj, vm);
>  
>  	ret = i915_gem_execbuffer_move_to_gpu(ring, &eb->vmas);
>  	if (ret)
> @@ -1161,8 +1184,6 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
>  			goto err;
>  	}
>  
> -	exec_start = i915_gem_obj_offset(batch_obj, vm) +
> -		args->batch_start_offset;
>  	exec_len = args->batch_len;
>  	if (cliprects) {
>  		for (i = 0; i < args->num_cliprects; i++) {
> diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
> index 65b61d4..e053f14 100644
> --- a/drivers/gpu/drm/i915/i915_gem_gtt.c
> +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
> @@ -437,15 +437,6 @@ void i915_gem_cleanup_aliasing_ppgtt(struct drm_device *dev)
>  	dev_priv->mm.aliasing_ppgtt = NULL;
>  }
>  
> -void i915_ppgtt_bind_object(struct i915_hw_ppgtt *ppgtt,
> -			    struct drm_i915_gem_object *obj,
> -			    enum i915_cache_level cache_level)
> -{
> -	ppgtt->base.insert_entries(&ppgtt->base, obj->pages,
> -				   i915_gem_obj_ggtt_offset(obj) >> PAGE_SHIFT,
> -				   cache_level);
> -}
> -
>  static void __always_unused
>  gen6_ppgtt_bind_vma(struct i915_vma *vma,
>  		    enum i915_cache_level cache_level,
> @@ -458,14 +449,6 @@ gen6_ppgtt_bind_vma(struct i915_vma *vma,
>  	gen6_ppgtt_insert_entries(vma->vm, vma->obj->pages, entry, cache_level);
>  }
>  
> -void i915_ppgtt_unbind_object(struct i915_hw_ppgtt *ppgtt,
> -			      struct drm_i915_gem_object *obj)
> -{
> -	ppgtt->base.clear_range(&ppgtt->base,
> -				i915_gem_obj_ggtt_offset(obj) >> PAGE_SHIFT,
> -				obj->base.size >> PAGE_SHIFT);
> -}
> -
>  static void __always_unused gen6_ppgtt_unbind_vma(struct i915_vma *vma)
>  {
>  	const unsigned long entry = vma->node.start >> PAGE_SHIFT;
> @@ -523,8 +506,10 @@ void i915_gem_restore_gtt_mappings(struct drm_device *dev)
>  				       dev_priv->gtt.base.total / PAGE_SIZE);
>  
>  	list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) {
> +		struct i915_vma *vma = i915_gem_obj_to_vma(obj,
> +							   &dev_priv->gtt.base);
>  		i915_gem_clflush_object(obj, obj->pin_display);
> -		i915_gem_gtt_bind_object(obj, obj->cache_level);
> +		vma->vm->bind_vma(vma, obj->cache_level, 0);
>  	}
>  
>  	i915_gem_chipset_flush(dev);
> @@ -687,33 +672,6 @@ static void gen6_ggtt_bind_vma(struct i915_vma *vma,
>  	}
>  }
>  
> -void i915_gem_gtt_bind_object(struct drm_i915_gem_object *obj,
> -			      enum i915_cache_level cache_level)
> -{
> -	struct drm_device *dev = obj->base.dev;
> -	struct drm_i915_private *dev_priv = dev->dev_private;
> -	const unsigned long entry = i915_gem_obj_ggtt_offset(obj) >> PAGE_SHIFT;
> -
> -	dev_priv->gtt.base.insert_entries(&dev_priv->gtt.base, obj->pages,
> -					  entry,
> -					  cache_level);
> -
> -	obj->has_global_gtt_mapping = 1;
> -}
> -
> -void i915_gem_gtt_unbind_object(struct drm_i915_gem_object *obj)
> -{
> -	struct drm_device *dev = obj->base.dev;
> -	struct drm_i915_private *dev_priv = dev->dev_private;
> -	const unsigned long entry = i915_gem_obj_ggtt_offset(obj) >> PAGE_SHIFT;
> -
> -	dev_priv->gtt.base.clear_range(&dev_priv->gtt.base,
> -				       entry,
> -				       obj->base.size >> PAGE_SHIFT);
> -
> -	obj->has_global_gtt_mapping = 0;
> -}
> -
>  static void gen6_ggtt_unbind_vma(struct i915_vma *vma)
>  {
>  	struct drm_device *dev = vma->vm->dev;
> -- 
> 1.8.4
> 
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/intel-gfx

-- 
Daniel Vetter
Software Engineer, Intel Corporation
+41 (0) 79 365 57 48 - http://blog.ffwll.ch

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH 5/6] drm/i915: Use the new vm [un]bind functions
  2013-09-25 11:42   ` Daniel Vetter
@ 2013-09-25 17:19     ` Ben Widawsky
  0 siblings, 0 replies; 11+ messages in thread
From: Ben Widawsky @ 2013-09-25 17:19 UTC (permalink / raw)
  To: Daniel Vetter; +Cc: Intel GFX, Ben Widawsky

On Wed, Sep 25, 2013 at 01:42:21PM +0200, Daniel Vetter wrote:
> On Tue, Sep 24, 2013 at 09:58:00AM -0700, Ben Widawsky wrote:
> > From: Ben Widawsky <ben@bwidawsk.net>
> > 
> > Building on the last patch which created the new function pointers in
> > the VM for bind/unbind, here we actually put those new function pointers
> > to use.
> > 
> > Split out as a separate patch to aid in review. I'm fine with squashing
> > into the previous patch if people request it.
> > 
> > v2: Updated to address the smart ggtt which can do aliasing as needed
> > Make sure we bind to global gtt when mappable and fenceable. I thought
> > we could get away without this initialy, but we cannot.
> > 
> > v3: Make the global GTT binding explicitly use the ggtt VM for
> > bind_vma(). While at it, use the new ggtt_vma helper (Chris)
> > 
> > v4: Make the code support the secure dispatch flag, which requires
> > special handling during execbuf. This was fixed (incorrectly) later in
> > the series, but having it here earlier in the series should be perfectly
> > acceptable. (Chris)
> > Move do_switch over to the new, special ggtt_vma interface.
> > 
> > v5: Don't use a local variable (or assertion) when setting the batch
> > object to the global GTT during secure dispatch (Chris)
> > 
> > v6: Caclulate the exec offset for the secure case (Bug fix missed on
> > v4). (Chris)
> > Remove redundant check for has_global_gtt_mapping, since it is done in
> > bind_vma.
> > 
> > v7: Remove now unused dev_priv in do_switch
> > Don't pass the vm to ggtt_offset (error from v6 which I should have
> > caught before sending).
> > 
> > v8: Assert, and rework the SNB workaround (to make it a bit clearer)
> > code to make sure the VM can't be anything but the GGTT.
> > 
> > v9: Fixing more bugs which can't exist yet on the behest of Chris. Make
> > sure that the batch object is properly bound, and added to the global
> > VM's active list - for when we use non-global VMs. (Chris) Note that
> > there is an ongoing confusion about what bugs existed, but the "known"
> > bugs are fixed here.
> > 
> > v10: Nitpicks on whitespacing etc. introduced in v9 (Chris)
> > 
> > Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
> > Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
> 
 
> Second is the death of the ->insert_entries/->clear_range vfuncs. We
> _need_ them in the internal tree and you really can't just kill them. If
> you want to, you need to follow three steps:
> 
> 1. Create new interfaces in the public tree, use the on public platforms
> but keeep the old interfaces working.
> 
> 2. Convert the -internal platforms over.
> 
> 3. Remove old interface.
> 
> Doing 3. before 2. is bonghits and will result in the internal tree being
> broken a bit in-between. Since I'm supposed to maintain that I'll undo the
> damage here to be able to do a rebase.
> 
> Cheers, Daniel

As I've now stated multiple times over the course of the last 5 months -
I am fine with you not merging this. It's the right thing to do, but
it seems neither you or I have time to do it in the right way. Sometimes
reality gets in the way what's desirable.

> 
> > ---
> >  drivers/gpu/drm/i915/i915_drv.h            |  9 -----
> >  drivers/gpu/drm/i915/i915_gem.c            | 33 +++++++---------
> >  drivers/gpu/drm/i915/i915_gem_context.c    |  6 ++-
> >  drivers/gpu/drm/i915/i915_gem_execbuffer.c | 61 ++++++++++++++++++++----------
> >  drivers/gpu/drm/i915/i915_gem_gtt.c        | 48 ++---------------------
> >  5 files changed, 62 insertions(+), 95 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> > index 9995cdb..e8ae8fd 100644
> > --- a/drivers/gpu/drm/i915/i915_drv.h
> > +++ b/drivers/gpu/drm/i915/i915_drv.h
> > @@ -2102,17 +2102,8 @@ int i915_gem_context_destroy_ioctl(struct drm_device *dev, void *data,
> >  
> >  /* i915_gem_gtt.c */
> >  void i915_gem_cleanup_aliasing_ppgtt(struct drm_device *dev);
> > -void i915_ppgtt_bind_object(struct i915_hw_ppgtt *ppgtt,
> > -			    struct drm_i915_gem_object *obj,
> > -			    enum i915_cache_level cache_level);
> > -void i915_ppgtt_unbind_object(struct i915_hw_ppgtt *ppgtt,
> > -			      struct drm_i915_gem_object *obj);
> > -
> >  void i915_gem_restore_gtt_mappings(struct drm_device *dev);
> >  int __must_check i915_gem_gtt_prepare_object(struct drm_i915_gem_object *obj);
> > -void i915_gem_gtt_bind_object(struct drm_i915_gem_object *obj,
> > -				enum i915_cache_level cache_level);
> > -void i915_gem_gtt_unbind_object(struct drm_i915_gem_object *obj);
> >  void i915_gem_gtt_finish_object(struct drm_i915_gem_object *obj);
> >  void i915_gem_init_global_gtt(struct drm_device *dev);
> >  void i915_gem_setup_global_gtt(struct drm_device *dev, unsigned long start,
> > diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> > index f6c8b0e..378d4ef 100644
> > --- a/drivers/gpu/drm/i915/i915_gem.c
> > +++ b/drivers/gpu/drm/i915/i915_gem.c
> > @@ -2693,12 +2693,8 @@ int i915_vma_unbind(struct i915_vma *vma)
> >  
> >  	trace_i915_vma_unbind(vma);
> >  
> > -	if (obj->has_global_gtt_mapping)
> > -		i915_gem_gtt_unbind_object(obj);
> > -	if (obj->has_aliasing_ppgtt_mapping) {
> > -		i915_ppgtt_unbind_object(dev_priv->mm.aliasing_ppgtt, obj);
> > -		obj->has_aliasing_ppgtt_mapping = 0;
> > -	}
> > +	vma->vm->unbind_vma(vma);
> > +
> >  	i915_gem_gtt_finish_object(obj);
> >  	i915_gem_object_unpin_pages(obj);
> >  
> > @@ -3155,7 +3151,7 @@ static void i915_gem_verify_gtt(struct drm_device *dev)
> >  /**
> >   * Finds free space in the GTT aperture and binds the object there.
> >   */
> > -static int
> > +int
> >  i915_gem_object_bind_to_vm(struct drm_i915_gem_object *obj,
> >  			   struct i915_address_space *vm,
> >  			   unsigned alignment,
> > @@ -3424,7 +3420,6 @@ int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
> >  				    enum i915_cache_level cache_level)
> >  {
> >  	struct drm_device *dev = obj->base.dev;
> > -	drm_i915_private_t *dev_priv = dev->dev_private;
> >  	struct i915_vma *vma;
> >  	int ret;
> >  
> > @@ -3463,11 +3458,8 @@ int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
> >  				return ret;
> >  		}
> >  
> > -		if (obj->has_global_gtt_mapping)
> > -			i915_gem_gtt_bind_object(obj, cache_level);
> > -		if (obj->has_aliasing_ppgtt_mapping)
> > -			i915_ppgtt_bind_object(dev_priv->mm.aliasing_ppgtt,
> > -					       obj, cache_level);
> > +		list_for_each_entry(vma, &obj->vma_list, vma_link)
> > +			vma->vm->bind_vma(vma, cache_level, 0);
> >  	}
> >  
> >  	list_for_each_entry(vma, &obj->vma_list, vma_link)
> > @@ -3795,6 +3787,7 @@ i915_gem_object_pin(struct drm_i915_gem_object *obj,
> >  		    bool map_and_fenceable,
> >  		    bool nonblocking)
> >  {
> > +	const u32 flags = map_and_fenceable ? GLOBAL_BIND : 0;
> >  	struct i915_vma *vma;
> >  	int ret;
> >  
> > @@ -3823,20 +3816,22 @@ i915_gem_object_pin(struct drm_i915_gem_object *obj,
> >  	}
> >  
> >  	if (!i915_gem_obj_bound(obj, vm)) {
> > -		struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
> > -
> >  		ret = i915_gem_object_bind_to_vm(obj, vm, alignment,
> >  						 map_and_fenceable,
> >  						 nonblocking);
> >  		if (ret)
> >  			return ret;
> >  
> > -		if (!dev_priv->mm.aliasing_ppgtt)
> > -			i915_gem_gtt_bind_object(obj, obj->cache_level);
> > -	}
> > +		vma = i915_gem_obj_to_vma(obj, vm);
> > +		vm->bind_vma(vma, obj->cache_level, flags);
> > +	} else
> > +		vma = i915_gem_obj_to_vma(obj, vm);
> >  
> > +	/* Objects are created map and fenceable. If we bind an object
> > +	 * the first time, and we had aliasing PPGTT (and didn't request
> > +	 * GLOBAL), we'll need to do this on the second bind.*/
> >  	if (!obj->has_global_gtt_mapping && map_and_fenceable)
> > -		i915_gem_gtt_bind_object(obj, obj->cache_level);
> > +		vm->bind_vma(vma, obj->cache_level, GLOBAL_BIND);
> >  
> >  	obj->pin_count++;
> >  	obj->pin_mappable |= map_and_fenceable;
> > diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
> > index 1a877a5..d4eb88a 100644
> > --- a/drivers/gpu/drm/i915/i915_gem_context.c
> > +++ b/drivers/gpu/drm/i915/i915_gem_context.c
> > @@ -422,8 +422,10 @@ static int do_switch(struct i915_hw_context *to)
> >  		return ret;
> >  	}
> >  
> > -	if (!to->obj->has_global_gtt_mapping)
> > -		i915_gem_gtt_bind_object(to->obj, to->obj->cache_level);
> > +	if (!to->obj->has_global_gtt_mapping) {
> > +		struct i915_vma *vma = i915_gem_obj_to_ggtt(to->obj);
> > +		vma->vm->bind_vma(vma, to->obj->cache_level, GLOBAL_BIND);
> > +	}
> >  
> >  	if (!to->is_initialized || is_default_context(to))
> >  		hw_flags |= MI_RESTORE_INHIBIT;
> > diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> > index 0ce0d47..88c924f 100644
> > --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> > +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> > @@ -286,8 +286,14 @@ i915_gem_execbuffer_relocate_entry(struct drm_i915_gem_object *obj,
> >  	if (unlikely(IS_GEN6(dev) &&
> >  	    reloc->write_domain == I915_GEM_DOMAIN_INSTRUCTION &&
> >  	    !target_i915_obj->has_global_gtt_mapping)) {
> > -		i915_gem_gtt_bind_object(target_i915_obj,
> > -					 target_i915_obj->cache_level);
> > +		/* SNB shall not support full PPGTT. This path can only be taken
> > +		 * when the VM is the GGTT (aliasing PPGTT is not a real VM, and
> > +		 * therefore doesn't count).
> > +		 */
> > +		BUG_ON(vm != obj_to_ggtt(target_i915_obj));
> > +		vm->bind_vma(i915_gem_obj_to_ggtt(target_i915_obj),
> > +			     target_i915_obj->cache_level,
> > +			     GLOBAL_BIND);
> >  	}
> >  
> >  	/* Validate that the target is in a valid r/w GPU domain */
> > @@ -464,11 +470,12 @@ i915_gem_execbuffer_reserve_vma(struct i915_vma *vma,
> >  				struct intel_ring_buffer *ring,
> >  				bool *need_reloc)
> >  {
> > -	struct drm_i915_private *dev_priv = ring->dev->dev_private;
> > +	struct drm_i915_gem_object *obj = vma->obj;
> >  	struct drm_i915_gem_exec_object2 *entry = vma->exec_entry;
> >  	bool has_fenced_gpu_access = INTEL_INFO(ring->dev)->gen < 4;
> >  	bool need_fence, need_mappable;
> > -	struct drm_i915_gem_object *obj = vma->obj;
> > +	u32 flags = (entry->flags & EXEC_OBJECT_NEEDS_GTT) &&
> > +		!vma->obj->has_global_gtt_mapping ? GLOBAL_BIND : 0;
> >  	int ret;
> >  
> >  	need_fence =
> > @@ -497,14 +504,6 @@ i915_gem_execbuffer_reserve_vma(struct i915_vma *vma,
> >  		}
> >  	}
> >  
> > -	/* Ensure ppgtt mapping exists if needed */
> > -	if (dev_priv->mm.aliasing_ppgtt && !obj->has_aliasing_ppgtt_mapping) {
> > -		i915_ppgtt_bind_object(dev_priv->mm.aliasing_ppgtt,
> > -				       obj, obj->cache_level);
> > -
> > -		obj->has_aliasing_ppgtt_mapping = 1;
> > -	}
> > -
> >  	if (entry->offset != vma->node.start) {
> >  		entry->offset = vma->node.start;
> >  		*need_reloc = true;
> > @@ -515,9 +514,7 @@ i915_gem_execbuffer_reserve_vma(struct i915_vma *vma,
> >  		obj->base.pending_write_domain = I915_GEM_DOMAIN_RENDER;
> >  	}
> >  
> > -	if (entry->flags & EXEC_OBJECT_NEEDS_GTT &&
> > -	    !obj->has_global_gtt_mapping)
> > -		i915_gem_gtt_bind_object(obj, obj->cache_level);
> > +	vma->vm->bind_vma(vma, obj->cache_level, flags);
> >  
> >  	return 0;
> >  }
> > @@ -936,7 +933,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
> >  	struct intel_ring_buffer *ring;
> >  	struct i915_ctx_hang_stats *hs;
> >  	u32 ctx_id = i915_execbuffer2_get_context_id(*args);
> > -	u32 exec_start, exec_len;
> > +	u32 exec_len, exec_start = args->batch_start_offset;
> >  	u32 mask, flags;
> >  	int ret, mode, i;
> >  	bool need_relocs;
> > @@ -1118,8 +1115,34 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
> >  	 * batch" bit. Hence we need to pin secure batches into the global gtt.
> >  	 * hsw should have this fixed, but let's be paranoid and do it
> >  	 * unconditionally for now. */
> > -	if (flags & I915_DISPATCH_SECURE && !batch_obj->has_global_gtt_mapping)
> > -		i915_gem_gtt_bind_object(batch_obj, batch_obj->cache_level);
> > +	if (flags & I915_DISPATCH_SECURE) {
> > +		struct i915_address_space *ggtt = obj_to_ggtt(batch_obj);
> > +
> > +		/* Assuming all privileged batches are in the global GTT means
> > +		 * we need to make sure we have a global gtt offset, as well as
> > +		 * the PTEs mapped. As mentioned above, we can forego this on
> > +		 * HSW, but don't.
> > +		 */
> > +
> > +		ret = i915_gem_obj_ggtt_pin(batch_obj, 0, false, false);
> > +		if (ret)
> > +			goto err;
> > +
> > +		ggtt->bind_vma(i915_gem_obj_to_ggtt(batch_obj),
> > +			       batch_obj->cache_level,
> > +			       GLOBAL_BIND);
> > +
> > +		/* Since the active list is per VM, we need to make sure this
> > +		 * VMA ends up on the GGTT's active list to avoid premature
> > +		 * eviction.
> > +		 */
> > +		i915_vma_move_to_active(i915_gem_obj_to_ggtt(batch_obj), ring);
> > +
> > +		i915_gem_object_unpin(batch_obj);
> > +
> > +		exec_start += i915_gem_obj_ggtt_offset(batch_obj);
> > +	} else
> > +		exec_start += i915_gem_obj_offset(batch_obj, vm);
> >  
> >  	ret = i915_gem_execbuffer_move_to_gpu(ring, &eb->vmas);
> >  	if (ret)
> > @@ -1161,8 +1184,6 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
> >  			goto err;
> >  	}
> >  
> > -	exec_start = i915_gem_obj_offset(batch_obj, vm) +
> > -		args->batch_start_offset;
> >  	exec_len = args->batch_len;
> >  	if (cliprects) {
> >  		for (i = 0; i < args->num_cliprects; i++) {
> > diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
> > index 65b61d4..e053f14 100644
> > --- a/drivers/gpu/drm/i915/i915_gem_gtt.c
> > +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
> > @@ -437,15 +437,6 @@ void i915_gem_cleanup_aliasing_ppgtt(struct drm_device *dev)
> >  	dev_priv->mm.aliasing_ppgtt = NULL;
> >  }
> >  
> > -void i915_ppgtt_bind_object(struct i915_hw_ppgtt *ppgtt,
> > -			    struct drm_i915_gem_object *obj,
> > -			    enum i915_cache_level cache_level)
> > -{
> > -	ppgtt->base.insert_entries(&ppgtt->base, obj->pages,
> > -				   i915_gem_obj_ggtt_offset(obj) >> PAGE_SHIFT,
> > -				   cache_level);
> > -}
> > -
> >  static void __always_unused
> >  gen6_ppgtt_bind_vma(struct i915_vma *vma,
> >  		    enum i915_cache_level cache_level,
> > @@ -458,14 +449,6 @@ gen6_ppgtt_bind_vma(struct i915_vma *vma,
> >  	gen6_ppgtt_insert_entries(vma->vm, vma->obj->pages, entry, cache_level);
> >  }
> >  
> > -void i915_ppgtt_unbind_object(struct i915_hw_ppgtt *ppgtt,
> > -			      struct drm_i915_gem_object *obj)
> > -{
> > -	ppgtt->base.clear_range(&ppgtt->base,
> > -				i915_gem_obj_ggtt_offset(obj) >> PAGE_SHIFT,
> > -				obj->base.size >> PAGE_SHIFT);
> > -}
> > -
> >  static void __always_unused gen6_ppgtt_unbind_vma(struct i915_vma *vma)
> >  {
> >  	const unsigned long entry = vma->node.start >> PAGE_SHIFT;
> > @@ -523,8 +506,10 @@ void i915_gem_restore_gtt_mappings(struct drm_device *dev)
> >  				       dev_priv->gtt.base.total / PAGE_SIZE);
> >  
> >  	list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) {
> > +		struct i915_vma *vma = i915_gem_obj_to_vma(obj,
> > +							   &dev_priv->gtt.base);
> >  		i915_gem_clflush_object(obj, obj->pin_display);
> > -		i915_gem_gtt_bind_object(obj, obj->cache_level);
> > +		vma->vm->bind_vma(vma, obj->cache_level, 0);
> >  	}
> >  
> >  	i915_gem_chipset_flush(dev);
> > @@ -687,33 +672,6 @@ static void gen6_ggtt_bind_vma(struct i915_vma *vma,
> >  	}
> >  }
> >  
> > -void i915_gem_gtt_bind_object(struct drm_i915_gem_object *obj,
> > -			      enum i915_cache_level cache_level)
> > -{
> > -	struct drm_device *dev = obj->base.dev;
> > -	struct drm_i915_private *dev_priv = dev->dev_private;
> > -	const unsigned long entry = i915_gem_obj_ggtt_offset(obj) >> PAGE_SHIFT;
> > -
> > -	dev_priv->gtt.base.insert_entries(&dev_priv->gtt.base, obj->pages,
> > -					  entry,
> > -					  cache_level);
> > -
> > -	obj->has_global_gtt_mapping = 1;
> > -}
> > -
> > -void i915_gem_gtt_unbind_object(struct drm_i915_gem_object *obj)
> > -{
> > -	struct drm_device *dev = obj->base.dev;
> > -	struct drm_i915_private *dev_priv = dev->dev_private;
> > -	const unsigned long entry = i915_gem_obj_ggtt_offset(obj) >> PAGE_SHIFT;
> > -
> > -	dev_priv->gtt.base.clear_range(&dev_priv->gtt.base,
> > -				       entry,
> > -				       obj->base.size >> PAGE_SHIFT);
> > -
> > -	obj->has_global_gtt_mapping = 0;
> > -}
> > -
> >  static void gen6_ggtt_unbind_vma(struct i915_vma *vma)
> >  {
> >  	struct drm_device *dev = vma->vm->dev;
> > -- 
> > 1.8.4
> > 
> > _______________________________________________
> > Intel-gfx mailing list
> > Intel-gfx@lists.freedesktop.org
> > http://lists.freedesktop.org/mailman/listinfo/intel-gfx
> 
> -- 
> Daniel Vetter
> Software Engineer, Intel Corporation
> +41 (0) 79 365 57 48 - http://blog.ffwll.ch

-- 
Ben Widawsky, Intel Open Source Technology Center

^ permalink raw reply	[flat|nested] 11+ messages in thread

* [PATCH 2/6] drm/i915: Provide a cheap ggtt vma lookup
  2013-09-14 22:03 Ben Widawsky
@ 2013-09-14 22:03 ` Ben Widawsky
  0 siblings, 0 replies; 11+ messages in thread
From: Ben Widawsky @ 2013-09-14 22:03 UTC (permalink / raw)
  To: intel-gfx; +Cc: Ben Widawsky, Ben Widawsky

"We do fairly often lookup the ggtt vma for an obj." - Chris Wilson. As
such, provide a function to offer slightly cheaper access to the vma.
Not performance tested. By my quick estimation it saves at least 3
pointer dereferences from the existing mechanism.

This patch mostly matches code from Chris in
<20130911221430.GB7825@nuc-i3427.alporthouse.com>

CC: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
---
 drivers/gpu/drm/i915/i915_drv.h |  4 +++-
 drivers/gpu/drm/i915/i915_gem.c | 17 +++++++++++++++--
 2 files changed, 18 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 7caf71d..df43f71 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2009,6 +2009,9 @@ struct i915_vma *i915_gem_obj_to_vma(struct drm_i915_gem_object *obj,
 struct i915_vma *
 i915_gem_obj_lookup_or_create_vma(struct drm_i915_gem_object *obj,
 				  struct i915_address_space *vm);
+
+struct i915_vma *i915_gem_obj_to_ggtt(struct drm_i915_gem_object *obj);
+
 /* Some GGTT VM helpers */
 #define obj_to_ggtt(obj) \
 	(&((struct drm_i915_private *)(obj)->base.dev->dev_private)->gtt.base)
@@ -2045,7 +2048,6 @@ i915_gem_obj_ggtt_pin(struct drm_i915_gem_object *obj,
 	return i915_gem_object_pin(obj, obj_to_ggtt(obj), alignment,
 				   map_and_fenceable, nonblocking);
 }
-#undef obj_to_ggtt
 
 /* i915_gem_context.c */
 void i915_gem_context_init(struct drm_device *dev);
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 3d3de6e..83f946c 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -3403,8 +3403,7 @@ i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write)
 
 	/* And bump the LRU for this access */
 	if (i915_gem_object_is_inactive(obj)) {
-		struct i915_vma *vma = i915_gem_obj_to_vma(obj,
-							   &dev_priv->gtt.base);
+		struct i915_vma *vma = i915_gem_obj_to_ggtt(obj);
 		if (vma)
 			list_move_tail(&vma->mm_list,
 				       &dev_priv->gtt.base.inactive_list);
@@ -4942,3 +4941,17 @@ unsigned long i915_gem_obj_size(struct drm_i915_gem_object *o,
 
 	return 0;
 }
+
+struct i915_vma *i915_gem_obj_to_ggtt(struct drm_i915_gem_object *obj)
+{
+	struct i915_vma *vma;
+
+	if (WARN_ON(list_empty(&obj->vma_list)))
+		return NULL;
+
+	vma = list_first_entry(&obj->vma_list, typeof(*vma), vma_link);
+	if (WARN_ON(vma->vm != obj_to_ggtt(obj)))
+		return NULL;
+
+	return vma;
+}
-- 
1.8.4

^ permalink raw reply related	[flat|nested] 11+ messages in thread

end of thread, other threads:[~2013-09-25 17:20 UTC | newest]

Thread overview: 11+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2013-09-24 16:57 [PATCH 1/6] drm/i915: trace vm eviction instead of everything Ben Widawsky
2013-09-24 16:57 ` [PATCH 2/6] drm/i915: Provide a cheap ggtt vma lookup Ben Widawsky
2013-09-24 19:11   ` Daniel Vetter
2013-09-24 16:57 ` [PATCH 3/6] drm/i915: Convert active API to VMA Ben Widawsky
2013-09-24 16:57 ` [PATCH 4/6] drm/i915: Add bind/unbind object functions to VM Ben Widawsky
2013-09-24 16:58 ` [PATCH 5/6] drm/i915: Use the new vm [un]bind functions Ben Widawsky
2013-09-25 11:42   ` Daniel Vetter
2013-09-25 17:19     ` Ben Widawsky
2013-09-24 16:58 ` [PATCH 6/6] drm/i915: eliminate vm->insert_entries() Ben Widawsky
2013-09-24 19:14 ` [PATCH 1/6] drm/i915: trace vm eviction instead of everything Daniel Vetter
  -- strict thread matches above, loose matches on Subject: below --
2013-09-14 22:03 Ben Widawsky
2013-09-14 22:03 ` [PATCH 2/6] drm/i915: Provide a cheap ggtt vma lookup Ben Widawsky

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.