All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 1/3] drm/i915: Use the MRU stack search after evicting
@ 2017-01-10 21:55 Chris Wilson
  2017-01-10 21:55 ` [PATCH 2/3] drm/i915: Extract reserving space in the GTT to a helper Chris Wilson
                   ` (3 more replies)
  0 siblings, 4 replies; 10+ messages in thread
From: Chris Wilson @ 2017-01-10 21:55 UTC (permalink / raw)
  To: intel-gfx

When we evict from the GTT to make room for an object, the hole we
create is put onto the MRU stack inside the drm_mm range manager. On the
next search pass, we can speed up a PIN_HIGH allocation by referencing
that stack for the new hole.

v2: Pull together the 3 identical implements (ahem, a couple were
outdated) into a common routine for allocating a node and evicting as
necessary.
v3: Detect invalid calls to i915_gem_gtt_insert()

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
---
 drivers/gpu/drm/i915/gvt/aperture_gm.c | 33 ++++---------
 drivers/gpu/drm/i915/i915_gem_gtt.c    | 88 ++++++++++++++++++++++++++--------
 drivers/gpu/drm/i915/i915_gem_gtt.h    |  5 ++
 drivers/gpu/drm/i915/i915_vma.c        | 40 ++--------------
 4 files changed, 86 insertions(+), 80 deletions(-)

diff --git a/drivers/gpu/drm/i915/gvt/aperture_gm.c b/drivers/gpu/drm/i915/gvt/aperture_gm.c
index 016227d77dd4..a97d56ea3d83 100644
--- a/drivers/gpu/drm/i915/gvt/aperture_gm.c
+++ b/drivers/gpu/drm/i915/gvt/aperture_gm.c
@@ -41,47 +41,34 @@ static int alloc_gm(struct intel_vgpu *vgpu, bool high_gm)
 {
 	struct intel_gvt *gvt = vgpu->gvt;
 	struct drm_i915_private *dev_priv = gvt->dev_priv;
-	u32 alloc_flag, search_flag;
+	unsigned int flags;
 	u64 start, end, size;
 	struct drm_mm_node *node;
-	int retried = 0;
 	int ret;
 
 	if (high_gm) {
-		search_flag = DRM_MM_SEARCH_BELOW;
-		alloc_flag = DRM_MM_CREATE_TOP;
 		node = &vgpu->gm.high_gm_node;
 		size = vgpu_hidden_sz(vgpu);
 		start = gvt_hidden_gmadr_base(gvt);
 		end = gvt_hidden_gmadr_end(gvt);
+		flags = PIN_HIGH;
 	} else {
-		search_flag = DRM_MM_SEARCH_DEFAULT;
-		alloc_flag = DRM_MM_CREATE_DEFAULT;
 		node = &vgpu->gm.low_gm_node;
 		size = vgpu_aperture_sz(vgpu);
 		start = gvt_aperture_gmadr_base(gvt);
 		end = gvt_aperture_gmadr_end(gvt);
+		flags = PIN_MAPPABLE;
 	}
 
 	mutex_lock(&dev_priv->drm.struct_mutex);
-search_again:
-	ret = drm_mm_insert_node_in_range_generic(&dev_priv->ggtt.base.mm,
-						  node, size, 4096,
-						  I915_COLOR_UNEVICTABLE,
-						  start, end, search_flag,
-						  alloc_flag);
-	if (ret) {
-		ret = i915_gem_evict_something(&dev_priv->ggtt.base,
-					       size, 4096,
-					       I915_COLOR_UNEVICTABLE,
-					       start, end, 0);
-		if (ret == 0 && ++retried < 3)
-			goto search_again;
-
-		gvt_err("fail to alloc %s gm space from host, retried %d\n",
-				high_gm ? "high" : "low", retried);
-	}
+	ret = i915_gem_gtt_insert(&dev_priv->ggtt.base, node,
+				  size, 4096, I915_COLOR_UNEVICTABLE,
+				  start, end, flags);
 	mutex_unlock(&dev_priv->drm.struct_mutex);
+	if (ret)
+		gvt_err("fail to alloc %s gm space from host\n",
+			high_gm ? "high" : "low");
+
 	return ret;
 }
 
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 8aca11f5f446..b5ebfd7afe1d 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -23,10 +23,13 @@
  *
  */
 
+#include <linux/log2.h>
 #include <linux/seq_file.h>
 #include <linux/stop_machine.h>
+
 #include <drm/drmP.h>
 #include <drm/i915_drm.h>
+
 #include "i915_drv.h"
 #include "i915_vgpu.h"
 #include "i915_trace.h"
@@ -2032,7 +2035,6 @@ static int gen6_ppgtt_allocate_page_directories(struct i915_hw_ppgtt *ppgtt)
 	struct i915_address_space *vm = &ppgtt->base;
 	struct drm_i915_private *dev_priv = ppgtt->base.i915;
 	struct i915_ggtt *ggtt = &dev_priv->ggtt;
-	bool retried = false;
 	int ret;
 
 	/* PPGTT PDEs reside in the GGTT and consists of 512 entries. The
@@ -2045,29 +2047,14 @@ static int gen6_ppgtt_allocate_page_directories(struct i915_hw_ppgtt *ppgtt)
 	if (ret)
 		return ret;
 
-alloc:
-	ret = drm_mm_insert_node_in_range_generic(&ggtt->base.mm, &ppgtt->node,
-						  GEN6_PD_SIZE, GEN6_PD_ALIGN,
-						  I915_COLOR_UNEVICTABLE,
-						  0, ggtt->base.total,
-						  DRM_MM_TOPDOWN);
-	if (ret == -ENOSPC && !retried) {
-		ret = i915_gem_evict_something(&ggtt->base,
-					       GEN6_PD_SIZE, GEN6_PD_ALIGN,
-					       I915_COLOR_UNEVICTABLE,
-					       0, ggtt->base.total,
-					       0);
-		if (ret)
-			goto err_out;
-
-		retried = true;
-		goto alloc;
-	}
-
+	ret = i915_gem_gtt_insert(&ggtt->base, &ppgtt->node,
+				  GEN6_PD_SIZE, GEN6_PD_ALIGN,
+				  I915_COLOR_UNEVICTABLE,
+				  0, ggtt->base.total,
+				  PIN_HIGH);
 	if (ret)
 		goto err_out;
 
-
 	if (ppgtt->node.start < ggtt->mappable_end)
 		DRM_DEBUG("Forced to use aperture for PDEs\n");
 
@@ -3567,3 +3554,62 @@ i915_get_ggtt_vma_pages(struct i915_vma *vma)
 	return ret;
 }
 
+int i915_gem_gtt_insert(struct i915_address_space *vm,
+			struct drm_mm_node *node,
+			u64 size, u64 alignment, unsigned long color,
+			u64 start, u64 end, unsigned int flags)
+{
+	u32 search_flag, alloc_flag;
+	int err;
+
+	lockdep_assert_held(&vm->i915->drm.struct_mutex);
+	GEM_BUG_ON(!size);
+	GEM_BUG_ON(!IS_ALIGNED(size, I915_GTT_PAGE_SIZE));
+	GEM_BUG_ON(alignment && !is_power_of_2(alignment));
+	GEM_BUG_ON(alignment && !IS_ALIGNED(alignment, I915_GTT_MIN_ALIGNMENT));
+	GEM_BUG_ON(start >= end);
+	GEM_BUG_ON(!IS_ALIGNED(start, I915_GTT_PAGE_SIZE));
+	GEM_BUG_ON(!IS_ALIGNED(end, I915_GTT_PAGE_SIZE));
+
+	if (unlikely(range_overflows(start, size, end)))
+		return -ENOSPC;
+
+	if (unlikely(round_up(start, alignment) > round_down(end - size, alignment)))
+		return -ENOSPC;
+
+	if (flags & PIN_HIGH) {
+		search_flag = DRM_MM_SEARCH_BELOW;
+		alloc_flag = DRM_MM_CREATE_TOP;
+	} else {
+		search_flag = DRM_MM_SEARCH_DEFAULT;
+		alloc_flag = DRM_MM_CREATE_DEFAULT;
+	}
+
+	/* We only allocate in PAGE_SIZE/GTT_PAGE_SIZE (4096) chunks,
+	 * so we know that we always have a minimum alignment of 4096.
+	 * The drm_mm range manager is optimised to return results
+	 * with zero alignment, so where possible use the optimal
+	 * path.
+	 */
+	GEM_BUG_ON(size & (I915_GTT_MIN_ALIGNMENT - 1));
+	if (alignment <= I915_GTT_MIN_ALIGNMENT)
+		alignment = 0;
+
+	err = drm_mm_insert_node_in_range_generic(&vm->mm, node,
+						  size, alignment, color,
+						  start, end,
+						  search_flag, alloc_flag);
+	if (err != -ENOSPC)
+		return err;
+
+	err = i915_gem_evict_something(vm, size, alignment, color,
+				       start, end, flags);
+	if (err)
+		return err;
+
+	search_flag = DRM_MM_SEARCH_DEFAULT;
+	return drm_mm_insert_node_in_range_generic(&vm->mm, node,
+						   size, alignment, color,
+						   start, end,
+						   search_flag, alloc_flag);
+}
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h
index 34a4fd560fa2..79198352a491 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.h
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.h
@@ -532,6 +532,11 @@ int __must_check i915_gem_gtt_prepare_pages(struct drm_i915_gem_object *obj,
 void i915_gem_gtt_finish_pages(struct drm_i915_gem_object *obj,
 			       struct sg_table *pages);
 
+int i915_gem_gtt_insert(struct i915_address_space *vm,
+			struct drm_mm_node *node,
+			u64 size, u64 alignment, unsigned long color,
+			u64 start, u64 end, unsigned int flags);
+
 /* Flags used by pin/bind&friends. */
 #define PIN_NONBLOCK		BIT(0)
 #define PIN_MAPPABLE		BIT(1)
diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
index 490914f89663..df3750d4c907 100644
--- a/drivers/gpu/drm/i915/i915_vma.c
+++ b/drivers/gpu/drm/i915/i915_vma.c
@@ -431,43 +431,11 @@ i915_vma_insert(struct i915_vma *vma, u64 size, u64 alignment, u64 flags)
 				goto err_unpin;
 		}
 	} else {
-		u32 search_flag, alloc_flag;
-
-		if (flags & PIN_HIGH) {
-			search_flag = DRM_MM_SEARCH_BELOW;
-			alloc_flag = DRM_MM_CREATE_TOP;
-		} else {
-			search_flag = DRM_MM_SEARCH_DEFAULT;
-			alloc_flag = DRM_MM_CREATE_DEFAULT;
-		}
-
-		/* We only allocate in PAGE_SIZE/GTT_PAGE_SIZE (4096) chunks,
-		 * so we know that we always have a minimum alignment of 4096.
-		 * The drm_mm range manager is optimised to return results
-		 * with zero alignment, so where possible use the optimal
-		 * path.
-		 */
-		if (alignment <= I915_GTT_MIN_ALIGNMENT)
-			alignment = 0;
-
-search_free:
-		ret = drm_mm_insert_node_in_range_generic(&vma->vm->mm,
-							  &vma->node,
-							  size, alignment,
-							  obj->cache_level,
-							  start, end,
-							  search_flag,
-							  alloc_flag);
-		if (ret) {
-			ret = i915_gem_evict_something(vma->vm, size, alignment,
-						       obj->cache_level,
-						       start, end,
-						       flags);
-			if (ret == 0)
-				goto search_free;
-
+		ret = i915_gem_gtt_insert(vma->vm, &vma->node,
+					  size, alignment, obj->cache_level,
+					  start, end, flags);
+		if (ret)
 			goto err_unpin;
-		}
 
 		GEM_BUG_ON(vma->node.start < start);
 		GEM_BUG_ON(vma->node.start + vma->node.size > end);
-- 
2.11.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [PATCH 2/3] drm/i915: Extract reserving space in the GTT to a helper
  2017-01-10 21:55 [PATCH 1/3] drm/i915: Use the MRU stack search after evicting Chris Wilson
@ 2017-01-10 21:55 ` Chris Wilson
  2017-01-11  7:06   ` Joonas Lahtinen
  2017-01-10 21:55 ` [PATCH 3/3] drm/i915: Prefer random replacement before eviction search Chris Wilson
                   ` (2 subsequent siblings)
  3 siblings, 1 reply; 10+ messages in thread
From: Chris Wilson @ 2017-01-10 21:55 UTC (permalink / raw)
  To: intel-gfx

Extract drm_mm_reserve_node + calling i915_gem_evict_for_node into its
own routine so that it can be shared rather than duplicated.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
---
 drivers/gpu/drm/i915/i915_drv.h        |  5 +++--
 drivers/gpu/drm/i915/i915_gem_evict.c  | 33 ++++++++++++++++++---------------
 drivers/gpu/drm/i915/i915_gem_gtt.c    | 27 +++++++++++++++++++++++++++
 drivers/gpu/drm/i915/i915_gem_gtt.h    |  5 +++++
 drivers/gpu/drm/i915/i915_gem_stolen.c |  7 +++----
 drivers/gpu/drm/i915/i915_trace.h      | 16 ++++++++--------
 drivers/gpu/drm/i915/i915_vgpu.c       | 33 ++++++++++++---------------------
 drivers/gpu/drm/i915/i915_vma.c        | 16 +++++-----------
 8 files changed, 81 insertions(+), 61 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 89e0038ea26b..a29d138b6906 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -3468,8 +3468,9 @@ int __must_check i915_gem_evict_something(struct i915_address_space *vm,
 					  unsigned cache_level,
 					  u64 start, u64 end,
 					  unsigned flags);
-int __must_check i915_gem_evict_for_vma(struct i915_vma *vma,
-					unsigned int flags);
+int __must_check i915_gem_evict_for_node(struct i915_address_space *vm,
+					 struct drm_mm_node *node,
+					 unsigned int flags);
 int i915_gem_evict_vm(struct i915_address_space *vm, bool do_idle);
 
 /* belongs in i915_gem_gtt.h */
diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c b/drivers/gpu/drm/i915/i915_gem_evict.c
index 6a5415e31acf..50b4645bf627 100644
--- a/drivers/gpu/drm/i915/i915_gem_evict.c
+++ b/drivers/gpu/drm/i915/i915_gem_evict.c
@@ -231,7 +231,8 @@ i915_gem_evict_something(struct i915_address_space *vm,
 
 /**
  * i915_gem_evict_for_vma - Evict vmas to make room for binding a new one
- * @target: address space and range to evict for
+ * @vm: address space to evict from
+ * @target: range (and color) to evict for
  * @flags: additional flags to control the eviction algorithm
  *
  * This function will try to evict vmas that overlap the target node.
@@ -239,18 +240,20 @@ i915_gem_evict_something(struct i915_address_space *vm,
  * To clarify: This is for freeing up virtual address space, not for freeing
  * memory in e.g. the shrinker.
  */
-int i915_gem_evict_for_vma(struct i915_vma *target, unsigned int flags)
+int i915_gem_evict_for_node(struct i915_address_space *vm,
+			    struct drm_mm_node *target,
+			    unsigned int flags)
 {
 	LIST_HEAD(eviction_list);
 	struct drm_mm_node *node;
-	u64 start = target->node.start;
-	u64 end = start + target->node.size;
+	u64 start = target->start;
+	u64 end = start + target->size;
 	struct i915_vma *vma, *next;
 	bool check_color;
 	int ret = 0;
 
-	lockdep_assert_held(&target->vm->i915->drm.struct_mutex);
-	trace_i915_gem_evict_vma(target, flags);
+	lockdep_assert_held(&vm->i915->drm.struct_mutex);
+	trace_i915_gem_evict_node(vm, target, flags);
 
 	/* Retire before we search the active list. Although we have
 	 * reasonable accuracy in our retirement lists, we may have
@@ -258,18 +261,18 @@ int i915_gem_evict_for_vma(struct i915_vma *target, unsigned int flags)
 	 * retiring.
 	 */
 	if (!(flags & PIN_NONBLOCK))
-		i915_gem_retire_requests(target->vm->i915);
+		i915_gem_retire_requests(vm->i915);
 
-	check_color = target->vm->mm.color_adjust;
+	check_color = vm->mm.color_adjust;
 	if (check_color) {
 		/* Expand search to cover neighbouring guard pages (or lack!) */
-		if (start > target->vm->start)
+		if (start > vm->start)
 			start -= I915_GTT_PAGE_SIZE;
-		if (end < target->vm->start + target->vm->total)
+		if (end < vm->start + vm->total)
 			end += I915_GTT_PAGE_SIZE;
 	}
 
-	drm_mm_for_each_node_in_range(node, &target->vm->mm, start, end) {
+	drm_mm_for_each_node_in_range(node, &vm->mm, start, end) {
 		/* If we find any non-objects (!vma), we cannot evict them */
 		if (node->color == I915_COLOR_UNEVICTABLE) {
 			ret = -ENOSPC;
@@ -285,12 +288,12 @@ int i915_gem_evict_for_vma(struct i915_vma *target, unsigned int flags)
 		 * those as well to make room for our guard pages.
 		 */
 		if (check_color) {
-			if (vma->node.start + vma->node.size == target->node.start) {
-				if (vma->node.color == target->node.color)
+			if (vma->node.start + vma->node.size == node->start) {
+				if (vma->node.color == node->color)
 					continue;
 			}
-			if (vma->node.start == target->node.start + target->node.size) {
-				if (vma->node.color == target->node.color)
+			if (vma->node.start == node->start + node->size) {
+				if (vma->node.color == node->color)
 					continue;
 			}
 		}
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index b5ebfd7afe1d..a3ea32f79d86 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -3554,6 +3554,33 @@ i915_get_ggtt_vma_pages(struct i915_vma *vma)
 	return ret;
 }
 
+int i915_gem_gtt_reserve(struct i915_address_space *vm,
+			 struct drm_mm_node *node,
+			 u64 size, u64 offset, unsigned long color,
+			 unsigned int flags)
+{
+	int err;
+
+	GEM_BUG_ON(!size);
+	GEM_BUG_ON(!IS_ALIGNED(size, I915_GTT_PAGE_SIZE));
+	GEM_BUG_ON(!IS_ALIGNED(offset, I915_GTT_MIN_ALIGNMENT));
+	GEM_BUG_ON(range_overflows(offset, size, vm->total));
+
+	node->size = size;
+	node->start = offset;
+	node->color = color;
+
+	err = drm_mm_reserve_node(&vm->mm, node);
+	if (err != -ENOSPC)
+		return err;
+
+	err = i915_gem_evict_for_node(vm, node, flags);
+	if (err == 0)
+		err = drm_mm_reserve_node(&vm->mm, node);
+
+	return err;
+}
+
 int i915_gem_gtt_insert(struct i915_address_space *vm,
 			struct drm_mm_node *node,
 			u64 size, u64 alignment, unsigned long color,
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h
index 79198352a491..3e031a057f78 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.h
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.h
@@ -532,6 +532,11 @@ int __must_check i915_gem_gtt_prepare_pages(struct drm_i915_gem_object *obj,
 void i915_gem_gtt_finish_pages(struct drm_i915_gem_object *obj,
 			       struct sg_table *pages);
 
+int i915_gem_gtt_reserve(struct i915_address_space *vm,
+			 struct drm_mm_node *node,
+			 u64 size, u64 offset, unsigned long color,
+			 unsigned int flags);
+
 int i915_gem_gtt_insert(struct i915_address_space *vm,
 			struct drm_mm_node *node,
 			u64 size, u64 alignment, unsigned long color,
diff --git a/drivers/gpu/drm/i915/i915_gem_stolen.c b/drivers/gpu/drm/i915/i915_gem_stolen.c
index e5be8e04bf3b..52dbb9bab268 100644
--- a/drivers/gpu/drm/i915/i915_gem_stolen.c
+++ b/drivers/gpu/drm/i915/i915_gem_stolen.c
@@ -694,10 +694,9 @@ i915_gem_object_create_stolen_for_preallocated(struct drm_i915_private *dev_priv
 	 * setting up the GTT space. The actual reservation will occur
 	 * later.
 	 */
-	vma->node.start = gtt_offset;
-	vma->node.size = size;
-
-	ret = drm_mm_reserve_node(&ggtt->base.mm, &vma->node);
+	ret = i915_gem_gtt_reserve(&ggtt->base, &vma->node,
+				   size, gtt_offset, obj->cache_level,
+				   0);
 	if (ret) {
 		DRM_DEBUG_KMS("failed to allocate stolen GTT space\n");
 		goto err_pages;
diff --git a/drivers/gpu/drm/i915/i915_trace.h b/drivers/gpu/drm/i915/i915_trace.h
index 18ae37c411fd..4461df5a94fe 100644
--- a/drivers/gpu/drm/i915/i915_trace.h
+++ b/drivers/gpu/drm/i915/i915_trace.h
@@ -450,9 +450,9 @@ TRACE_EVENT(i915_gem_evict_vm,
 	    TP_printk("dev=%d, vm=%p", __entry->dev, __entry->vm)
 );
 
-TRACE_EVENT(i915_gem_evict_vma,
-	    TP_PROTO(struct i915_vma *vma, unsigned int flags),
-	    TP_ARGS(vma, flags),
+TRACE_EVENT(i915_gem_evict_node,
+	    TP_PROTO(struct i915_address_space *vm, struct drm_mm_node *node, unsigned int flags),
+	    TP_ARGS(vm, node, flags),
 
 	    TP_STRUCT__entry(
 			     __field(u32, dev)
@@ -464,11 +464,11 @@ TRACE_EVENT(i915_gem_evict_vma,
 			    ),
 
 	    TP_fast_assign(
-			   __entry->dev = vma->vm->i915->drm.primary->index;
-			   __entry->vm = vma->vm;
-			   __entry->start = vma->node.start;
-			   __entry->size = vma->node.size;
-			   __entry->color = vma->node.color;
+			   __entry->dev = vm->i915->drm.primary->index;
+			   __entry->vm = vm;
+			   __entry->start = node->start;
+			   __entry->size = node->size;
+			   __entry->color = node->color;
 			   __entry->flags = flags;
 			  ),
 
diff --git a/drivers/gpu/drm/i915/i915_vgpu.c b/drivers/gpu/drm/i915/i915_vgpu.c
index dae340cfc6c7..f1ad4fbb5ba7 100644
--- a/drivers/gpu/drm/i915/i915_vgpu.c
+++ b/drivers/gpu/drm/i915/i915_vgpu.c
@@ -116,22 +116,20 @@ void intel_vgt_deballoon(struct drm_i915_private *dev_priv)
 	memset(&bl_info, 0, sizeof(bl_info));
 }
 
-static int vgt_balloon_space(struct drm_mm *mm,
+static int vgt_balloon_space(struct i915_ggtt *ggtt,
 			     struct drm_mm_node *node,
 			     unsigned long start, unsigned long end)
 {
 	unsigned long size = end - start;
 
-	if (start == end)
+	if (start <= end)
 		return -EINVAL;
 
 	DRM_INFO("balloon space: range [ 0x%lx - 0x%lx ] %lu KiB.\n",
 		 start, end, size / 1024);
-
-	node->start = start;
-	node->size = size;
-
-	return drm_mm_reserve_node(mm, node);
+	return i915_gem_gtt_reserve(&ggtt->base, node,
+				    size, start, I915_COLOR_UNEVICTABLE,
+				    0);
 }
 
 /**
@@ -214,10 +212,8 @@ int intel_vgt_balloon(struct drm_i915_private *dev_priv)
 
 	/* Unmappable graphic memory ballooning */
 	if (unmappable_base > ggtt->mappable_end) {
-		ret = vgt_balloon_space(&ggtt->base.mm,
-					&bl_info.space[2],
-					ggtt->mappable_end,
-					unmappable_base);
+		ret = vgt_balloon_space(ggtt, &bl_info.space[2],
+					ggtt->mappable_end, unmappable_base);
 
 		if (ret)
 			goto err;
@@ -228,18 +224,15 @@ int intel_vgt_balloon(struct drm_i915_private *dev_priv)
 	 * because it is reserved to the guard page.
 	 */
 	if (unmappable_end < ggtt_end - PAGE_SIZE) {
-		ret = vgt_balloon_space(&ggtt->base.mm,
-					&bl_info.space[3],
-					unmappable_end,
-					ggtt_end - PAGE_SIZE);
+		ret = vgt_balloon_space(ggtt, &bl_info.space[3],
+					unmappable_end, ggtt_end - PAGE_SIZE);
 		if (ret)
 			goto err;
 	}
 
 	/* Mappable graphic memory ballooning */
 	if (mappable_base > ggtt->base.start) {
-		ret = vgt_balloon_space(&ggtt->base.mm,
-					&bl_info.space[0],
+		ret = vgt_balloon_space(ggtt, &bl_info.space[0],
 					ggtt->base.start, mappable_base);
 
 		if (ret)
@@ -247,10 +240,8 @@ int intel_vgt_balloon(struct drm_i915_private *dev_priv)
 	}
 
 	if (mappable_end < ggtt->mappable_end) {
-		ret = vgt_balloon_space(&ggtt->base.mm,
-					&bl_info.space[1],
-					mappable_end,
-					ggtt->mappable_end);
+		ret = vgt_balloon_space(ggtt, &bl_info.space[1],
+					mappable_end, ggtt->mappable_end);
 
 		if (ret)
 			goto err;
diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
index df3750d4c907..b74eeb73ae41 100644
--- a/drivers/gpu/drm/i915/i915_vma.c
+++ b/drivers/gpu/drm/i915/i915_vma.c
@@ -419,17 +419,11 @@ i915_vma_insert(struct i915_vma *vma, u64 size, u64 alignment, u64 flags)
 			goto err_unpin;
 		}
 
-		vma->node.start = offset;
-		vma->node.size = size;
-		vma->node.color = obj->cache_level;
-		ret = drm_mm_reserve_node(&vma->vm->mm, &vma->node);
-		if (ret) {
-			ret = i915_gem_evict_for_vma(vma, flags);
-			if (ret == 0)
-				ret = drm_mm_reserve_node(&vma->vm->mm, &vma->node);
-			if (ret)
-				goto err_unpin;
-		}
+		ret = i915_gem_gtt_reserve(vma->vm, &vma->node,
+					   size, offset, obj->cache_level,
+					   flags);
+		if (ret)
+			goto err_unpin;
 	} else {
 		ret = i915_gem_gtt_insert(vma->vm, &vma->node,
 					  size, alignment, obj->cache_level,
-- 
2.11.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [PATCH 3/3] drm/i915: Prefer random replacement before eviction search
  2017-01-10 21:55 [PATCH 1/3] drm/i915: Use the MRU stack search after evicting Chris Wilson
  2017-01-10 21:55 ` [PATCH 2/3] drm/i915: Extract reserving space in the GTT to a helper Chris Wilson
@ 2017-01-10 21:55 ` Chris Wilson
  2017-01-11  7:47   ` Joonas Lahtinen
  2017-01-10 22:23 ` ✓ Fi.CI.BAT: success for series starting with [1/3] drm/i915: Use the MRU stack search after evicting Patchwork
  2017-01-11 10:53 ` ✗ Fi.CI.BAT: warning for series starting with [1/3] drm/i915: Use the MRU stack search after evicting (rev3) Patchwork
  3 siblings, 1 reply; 10+ messages in thread
From: Chris Wilson @ 2017-01-10 21:55 UTC (permalink / raw)
  To: intel-gfx

Performing an eviction search can be very, very slow especially for a
range restricted replacement. For example, a workload like
gem_concurrent_blit will populate the entire GTT and then cause aperture
thrashing. Since the GTT is a mix of active and inactive tiny objects,
we have to search through almost 400k objects before finding anything
inside the mappable region, and as this search is required before every
operation performance falls off a cliff.

Instead of performing the full search, we do a trial replacement of the
node at a random location fitting the specified restrictions. We lose
the strict LRU property of the GTT in exchange for avoiding the slow
search (several orders of runtime improvement for gem_concurrent_blit
4KiB-global-gtt, e.g. from 5000s to 20s). The loss of LRU replacement is
(later) mitigated firstly by only doing replacement if we find no
freespace and secondly by execbuf doing a PIN_NONBLOCK search first before
it starts thrashing (i.e. the random replacement will only occur from the
already inactive set of objects).

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
---
 drivers/gpu/drm/i915/i915_gem_gtt.c | 36 ++++++++++++++++++++++++++++++++++++
 1 file changed, 36 insertions(+)

diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index a3ea32f79d86..9ccc49ef0008 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -24,6 +24,7 @@
  */
 
 #include <linux/log2.h>
+#include <linux/random.h>
 #include <linux/seq_file.h>
 #include <linux/stop_machine.h>
 
@@ -3581,6 +3582,31 @@ int i915_gem_gtt_reserve(struct i915_address_space *vm,
 	return err;
 }
 
+static u64 random_offset(u64 start, u64 end, u64 len, u64 align)
+{
+	u64 range, addr;
+
+	if (align == 0)
+		align = I915_GTT_MIN_ALIGNMENT;
+
+	range = round_down(end - len, align) - round_up(start, align);
+	if (range) {
+		if (sizeof(unsigned long) == sizeof(u64)) {
+			addr = get_random_long();
+		} else {
+			addr = get_random_int();
+			if (range > U32_MAX) {
+				addr <<= 32;
+				addr |= get_random_int();
+			}
+		}
+		div64_u64_rem(addr, range, &addr);
+		start += addr;
+	}
+
+	return round_up(start, align);
+}
+
 int i915_gem_gtt_insert(struct i915_address_space *vm,
 			struct drm_mm_node *node,
 			u64 size, u64 alignment, unsigned long color,
@@ -3629,6 +3655,16 @@ int i915_gem_gtt_insert(struct i915_address_space *vm,
 	if (err != -ENOSPC)
 		return err;
 
+	/* No free space, pick a slot at random */
+	err = i915_gem_gtt_reserve(vm, node,
+				   size,
+				   random_offset(start, end, size, alignment),
+				   color,
+				   flags);
+	if (err != -ENOSPC)
+		return err;
+
+	/* Randomly selected placement is pinned, do a search */
 	err = i915_gem_evict_something(vm, size, alignment, color,
 				       start, end, flags);
 	if (err)
-- 
2.11.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 10+ messages in thread

* ✓ Fi.CI.BAT: success for series starting with [1/3] drm/i915: Use the MRU stack search after evicting
  2017-01-10 21:55 [PATCH 1/3] drm/i915: Use the MRU stack search after evicting Chris Wilson
  2017-01-10 21:55 ` [PATCH 2/3] drm/i915: Extract reserving space in the GTT to a helper Chris Wilson
  2017-01-10 21:55 ` [PATCH 3/3] drm/i915: Prefer random replacement before eviction search Chris Wilson
@ 2017-01-10 22:23 ` Patchwork
  2017-01-11 10:53 ` ✗ Fi.CI.BAT: warning for series starting with [1/3] drm/i915: Use the MRU stack search after evicting (rev3) Patchwork
  3 siblings, 0 replies; 10+ messages in thread
From: Patchwork @ 2017-01-10 22:23 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

== Series Details ==

Series: series starting with [1/3] drm/i915: Use the MRU stack search after evicting
URL   : https://patchwork.freedesktop.org/series/17784/
State : success

== Summary ==

Series 17784v1 Series without cover letter
https://patchwork.freedesktop.org/api/1.0/series/17784/revisions/1/mbox/


fi-bdw-5557u     total:246  pass:232  dwarn:0   dfail:0   fail:0   skip:14 
fi-bsw-n3050     total:246  pass:207  dwarn:0   dfail:0   fail:0   skip:39 
fi-bxt-j4205     total:246  pass:224  dwarn:0   dfail:0   fail:0   skip:22 
fi-bxt-t5700     total:82   pass:69   dwarn:0   dfail:0   fail:0   skip:12 
fi-byt-j1900     total:246  pass:219  dwarn:0   dfail:0   fail:0   skip:27 
fi-byt-n2820     total:246  pass:215  dwarn:0   dfail:0   fail:0   skip:31 
fi-hsw-4770      total:246  pass:227  dwarn:0   dfail:0   fail:0   skip:19 
fi-hsw-4770r     total:246  pass:227  dwarn:0   dfail:0   fail:0   skip:19 
fi-ivb-3520m     total:246  pass:225  dwarn:0   dfail:0   fail:0   skip:21 
fi-kbl-7500u     total:246  pass:225  dwarn:0   dfail:0   fail:0   skip:21 
fi-skl-6260u     total:246  pass:233  dwarn:0   dfail:0   fail:0   skip:13 
fi-skl-6700hq    total:246  pass:226  dwarn:0   dfail:0   fail:0   skip:20 
fi-skl-6700k     total:246  pass:222  dwarn:3   dfail:0   fail:0   skip:21 
fi-skl-6770hq    total:246  pass:233  dwarn:0   dfail:0   fail:0   skip:13 
fi-snb-2520m     total:246  pass:215  dwarn:0   dfail:0   fail:0   skip:31 
fi-snb-2600      total:246  pass:214  dwarn:0   dfail:0   fail:0   skip:32 

3449456d6100818aa0f569f2d8a4e1134ae0a5de drm-tip: 2017y-01m-10d-20h-56m-50s UTC integration manifest
e028665 drm/i915: Prefer random replacement before eviction search
18ee830 drm/i915: Extract reserving space in the GTT to a helper
ab2406f drm/i915: Use the MRU stack search after evicting

== Logs ==

For more details see: https://intel-gfx-ci.01.org/CI/Patchwork_3471/
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH 2/3] drm/i915: Extract reserving space in the GTT to a helper
  2017-01-10 21:55 ` [PATCH 2/3] drm/i915: Extract reserving space in the GTT to a helper Chris Wilson
@ 2017-01-11  7:06   ` Joonas Lahtinen
  0 siblings, 0 replies; 10+ messages in thread
From: Joonas Lahtinen @ 2017-01-11  7:06 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx

On ti, 2017-01-10 at 21:55 +0000, Chris Wilson wrote:
> Extract drm_mm_reserve_node + calling i915_gem_evict_for_node into its
> own routine so that it can be shared rather than duplicated.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>

Cc: the gvt m-l

<SNIP>

Add kerneldoc here.

> +int i915_gem_gtt_reserve(struct i915_address_space *vm,
> +			 struct drm_mm_node *node,
> +			 u64 size, u64 offset, unsigned long color,
> +			 unsigned int flags)
> +{

With kerneldoc and Cc:

Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>

Regards, Joonas
-- 
Joonas Lahtinen
Open Source Technology Center
Intel Corporation
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH 3/3] drm/i915: Prefer random replacement before eviction search
  2017-01-10 21:55 ` [PATCH 3/3] drm/i915: Prefer random replacement before eviction search Chris Wilson
@ 2017-01-11  7:47   ` Joonas Lahtinen
  2017-01-11  9:37     ` Chris Wilson
  2017-01-11 10:02     ` [PATCH] " Chris Wilson
  0 siblings, 2 replies; 10+ messages in thread
From: Joonas Lahtinen @ 2017-01-11  7:47 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx

On ti, 2017-01-10 at 21:55 +0000, Chris Wilson wrote:
> Performing an eviction search can be very, very slow especially for a
> range restricted replacement. For example, a workload like
> gem_concurrent_blit will populate the entire GTT and then cause aperture
> thrashing. Since the GTT is a mix of active and inactive tiny objects,
> we have to search through almost 400k objects before finding anything
> inside the mappable region, and as this search is required before every
> operation performance falls off a cliff.
> 
> Instead of performing the full search, we do a trial replacement of the
> node at a random location fitting the specified restrictions. We lose
> the strict LRU property of the GTT in exchange for avoiding the slow
> search (several orders of runtime improvement for gem_concurrent_blit
> 4KiB-global-gtt, e.g. from 5000s to 20s). The loss of LRU replacement is
> (later) mitigated firstly by only doing replacement if we find no
> freespace and secondly by execbuf doing a PIN_NONBLOCK search first before
> it starts thrashing (i.e. the random replacement will only occur from the
> already inactive set of objects).
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>

<SNIP>

> +static u64 random_offset(u64 start, u64 end, u64 len, u64 align)
> +{

The usual GEM_BUG_ON dance to make sure the inputs make some sense. Or
are you relying on the upper level callers?

> +	u64 range, addr;
> +
> +	if (align == 0)
> +		align = I915_GTT_MIN_ALIGNMENT;
> +
> +	range = round_down(end - len, align) - round_up(start, align);

For example this may cause an odd result.

> @@ -3629,6 +3655,16 @@ int i915_gem_gtt_insert(struct i915_address_space *vm,
>  	if (err != -ENOSPC)
>  		return err;
>  
> +	/* No free space, pick a slot at random */
> +	err = i915_gem_gtt_reserve(vm, node,
> +				   size,
> +				   random_offset(start, end, size, alignment),

I'd pull this to a line above just to make it more humane to read.

> +				   color,
> +				   flags);
> +	if (err != -ENOSPC)
> +		return err;
> +
> +	/* Randomly selected placement is pinned, do a search */
>  	err = i915_gem_evict_something(vm, size, alignment, color,
>  				       start, end, flags);
>  	if (err)

I'm bit unsure why it would make such a big difference, but if you've
been running the numbers. Code itself is all good, so this is;

Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>

Regards, Joonas
-- 
Joonas Lahtinen
Open Source Technology Center
Intel Corporation
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH 3/3] drm/i915: Prefer random replacement before eviction search
  2017-01-11  7:47   ` Joonas Lahtinen
@ 2017-01-11  9:37     ` Chris Wilson
  2017-01-11 10:02     ` [PATCH] " Chris Wilson
  1 sibling, 0 replies; 10+ messages in thread
From: Chris Wilson @ 2017-01-11  9:37 UTC (permalink / raw)
  To: Joonas Lahtinen; +Cc: intel-gfx

On Wed, Jan 11, 2017 at 09:47:41AM +0200, Joonas Lahtinen wrote:
> On ti, 2017-01-10 at 21:55 +0000, Chris Wilson wrote:
> > Performing an eviction search can be very, very slow especially for a
> > range restricted replacement. For example, a workload like
> > gem_concurrent_blit will populate the entire GTT and then cause aperture
> > thrashing. Since the GTT is a mix of active and inactive tiny objects,
> > we have to search through almost 400k objects before finding anything
> > inside the mappable region, and as this search is required before every
> > operation performance falls off a cliff.
> > 
> > Instead of performing the full search, we do a trial replacement of the
> > node at a random location fitting the specified restrictions. We lose
> > the strict LRU property of the GTT in exchange for avoiding the slow
> > search (several orders of runtime improvement for gem_concurrent_blit
> > 4KiB-global-gtt, e.g. from 5000s to 20s). The loss of LRU replacement is
> > (later) mitigated firstly by only doing replacement if we find no
> > freespace and secondly by execbuf doing a PIN_NONBLOCK search first before
> > it starts thrashing (i.e. the random replacement will only occur from the
> > already inactive set of objects).
> > 
> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> > Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
> 
> <SNIP>
> 
> > +static u64 random_offset(u64 start, u64 end, u64 len, u64 align)
> > +{
> 
> The usual GEM_BUG_ON dance to make sure the inputs make some sense. Or
> are you relying on the upper level callers?

It was static and the callers were checking, but yeah might as well catch
them whilst we think about it.
 
> > +	u64 range, addr;
> > +
> > +	if (align == 0)
> > +		align = I915_GTT_MIN_ALIGNMENT;
> > +
> > +	range = round_down(end - len, align) - round_up(start, align);
> 
> For example this may cause an odd result.
> 
> > @@ -3629,6 +3655,16 @@ int i915_gem_gtt_insert(struct i915_address_space *vm,
> >  	if (err != -ENOSPC)
> >  		return err;
> >  
> > +	/* No free space, pick a slot at random */
> > +	err = i915_gem_gtt_reserve(vm, node,
> > +				   size,
> > +				   random_offset(start, end, size, alignment),
> 
> I'd pull this to a line above just to make it more humane to read.

> > +				   color,
> > +				   flags);
> > +	if (err != -ENOSPC)
> > +		return err;
> > +
> > +	/* Randomly selected placement is pinned, do a search */
> >  	err = i915_gem_evict_something(vm, size, alignment, color,
> >  				       start, end, flags);
> >  	if (err)
> 
> I'm bit unsure why it would make such a big difference, but if you've
> been running the numbers. Code itself is all good, so this is;


The pathological case we have is

	|<-- 256 MiB aperture (64k objects) -->||<-- 1792 MiB unmappable (448k objects) -->|

Now imagine that the eviction LRU is ordered top-down (just because
pathology meets reallife), and that we need to evict an object to make
room inside the aperture. The eviction scan then has to walk the list
448k before it finds one within range. And now imagine that it has to
search for a new hole between every byte inside the memcpy, for several
simultaneous clients.

If there are a few holes in the unmappable region, we also have a
similar problem with hole skipping inside the drm_mm range search. This
is mitigated by using DRM_MM_INSERT_LOW, but only once we have that
support in drm_mm. Right now, the drm_mm search is also having to walk
the MRU rejecting the holes above the full aperture.
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 10+ messages in thread

* [PATCH] drm/i915: Prefer random replacement before eviction search
  2017-01-11  7:47   ` Joonas Lahtinen
  2017-01-11  9:37     ` Chris Wilson
@ 2017-01-11 10:02     ` Chris Wilson
  2017-01-11 10:28       ` [PATCH v3] " Chris Wilson
  1 sibling, 1 reply; 10+ messages in thread
From: Chris Wilson @ 2017-01-11 10:02 UTC (permalink / raw)
  To: intel-gfx

Performing an eviction search can be very, very slow especially for a
range restricted replacement. For example, a workload like
gem_concurrent_blit will populate the entire GTT and then cause aperture
thrashing. Since the GTT is a mix of active and inactive tiny objects,
we have to search through almost 400k objects before finding anything
inside the mappable region, and as this search is required before every
operation performance falls off a cliff.

Instead of performing the full search, we do a trial replacement of the
node at a random location fitting the specified restrictions. We lose
the strict LRU property of the GTT in exchange for avoiding the slow
search (several orders of runtime improvement for gem_concurrent_blit
4KiB-global-gtt, e.g. from 5000s to 20s). The loss of LRU replacement is
(later) mitigated firstly by only doing replacement if we find no
freespace and secondly by execbuf doing a PIN_NONBLOCK search first before
it starts thrashing (i.e. the random replacement will only occur from the
already inactive set of objects).

v2: Ascii-art, and check preconditions

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
---
 drivers/gpu/drm/i915/i915_gem_gtt.c | 52 +++++++++++++++++++++++++++++++++++++
 1 file changed, 52 insertions(+)

diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index a3ea32f79d86..9aa53bdf5a48 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -24,6 +24,7 @@
  */
 
 #include <linux/log2.h>
+#include <linux/random.h>
 #include <linux/seq_file.h>
 #include <linux/stop_machine.h>
 
@@ -3581,12 +3582,38 @@ int i915_gem_gtt_reserve(struct i915_address_space *vm,
 	return err;
 }
 
+static u64 random_offset(u64 start, u64 end, u64 len, u64 align)
+{
+	u64 range, addr;
+
+	GEM_BUG_ON(range_overflows(start, len, end));
+	GEM_BUG_ON(round_up(start, align) > round_down(end - len, align));
+
+	range = round_down(end - len, align) - round_up(start, align);
+	if (range) {
+		if (sizeof(unsigned long) == sizeof(u64)) {
+			addr = get_random_long();
+		} else {
+			addr = get_random_int();
+			if (range > U32_MAX) {
+				addr <<= 32;
+				addr |= get_random_int();
+			}
+		}
+		div64_u64_rem(addr, range, &addr);
+		start += addr;
+	}
+
+	return round_up(start, align);
+}
+
 int i915_gem_gtt_insert(struct i915_address_space *vm,
 			struct drm_mm_node *node,
 			u64 size, u64 alignment, unsigned long color,
 			u64 start, u64 end, unsigned int flags)
 {
 	u32 search_flag, alloc_flag;
+	u64 offset;
 	int err;
 
 	lockdep_assert_held(&vm->i915->drm.struct_mutex);
@@ -3629,6 +3656,31 @@ int i915_gem_gtt_insert(struct i915_address_space *vm,
 	if (err != -ENOSPC)
 		return err;
 
+	/* No free space, pick a slot at random.
+	 *
+	 * There is a pathological case here using a GTT shared between
+	 * mmap and GPU (i.e. ggtt/aliasing_ppgtt but not full-ppgtt):
+	 *
+	 *    |<-- 256 MiB aperture -->||<-- 1792 MiB unmappable -->|
+	 *         (64k objects)             (448k objects)
+	 *
+	 * Now imagine that the eviction LRU is ordered top-down (just because
+	 * pathology meets real life), and that we need to evict an object to
+	 * make room inside the aperture. The eviction scan then has to walk
+	 * the list 448k before it finds one within range. And now imagine that
+	 * it has to search for a new hole between every byte inside the memcpy,
+	 * for several simultaneous clients.
+	 *
+	 * On a full-ppgtt system, if we have run out of available space, there
+	 * will be lots and lots of objects in the eviction list!
+	 */
+	offset = random_offset(start, end,
+			       size, alignment ?: I915_GTT_MIN_ALIGNMENT);
+	err = i915_gem_gtt_reserve(vm, node, size, offset, color, flags);
+	if (err != -ENOSPC)
+		return err;
+
+	/* Randomly selected placement is pinned, do a search */
 	err = i915_gem_evict_something(vm, size, alignment, color,
 				       start, end, flags);
 	if (err)
-- 
2.11.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [PATCH v3] drm/i915: Prefer random replacement before eviction search
  2017-01-11 10:02     ` [PATCH] " Chris Wilson
@ 2017-01-11 10:28       ` Chris Wilson
  0 siblings, 0 replies; 10+ messages in thread
From: Chris Wilson @ 2017-01-11 10:28 UTC (permalink / raw)
  To: intel-gfx

Performing an eviction search can be very, very slow especially for a
range restricted replacement. For example, a workload like
gem_concurrent_blit will populate the entire GTT and then cause aperture
thrashing. Since the GTT is a mix of active and inactive tiny objects,
we have to search through almost 400k objects before finding anything
inside the mappable region, and as this search is required before every
operation performance falls off a cliff.

Instead of performing the full search, we do a trial replacement of the
node at a random location fitting the specified restrictions. We lose
the strict LRU property of the GTT in exchange for avoiding the slow
search (several orders of runtime improvement for gem_concurrent_blit
4KiB-global-gtt, e.g. from 5000s to 20s). The loss of LRU replacement is
(later) mitigated firstly by only doing replacement if we find no
freespace and secondly by execbuf doing a PIN_NONBLOCK search first before
it starts thrashing (i.e. the random replacement will only occur from the
already inactive set of objects).

v2: Ascii-art, and check preconditionst
v3: Rephrase final sentence in comment to explain why we don't both with
if (i915_is_ggtt(vm)) for preferring random replacement.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
---
 drivers/gpu/drm/i915/i915_gem_gtt.c | 56 +++++++++++++++++++++++++++++++++++++
 1 file changed, 56 insertions(+)

diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index a3ea32f79d86..b320cdd22f2f 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -24,6 +24,7 @@
  */
 
 #include <linux/log2.h>
+#include <linux/random.h>
 #include <linux/seq_file.h>
 #include <linux/stop_machine.h>
 
@@ -3581,12 +3582,38 @@ int i915_gem_gtt_reserve(struct i915_address_space *vm,
 	return err;
 }
 
+static u64 random_offset(u64 start, u64 end, u64 len, u64 align)
+{
+	u64 range, addr;
+
+	GEM_BUG_ON(range_overflows(start, len, end));
+	GEM_BUG_ON(round_up(start, align) > round_down(end - len, align));
+
+	range = round_down(end - len, align) - round_up(start, align);
+	if (range) {
+		if (sizeof(unsigned long) == sizeof(u64)) {
+			addr = get_random_long();
+		} else {
+			addr = get_random_int();
+			if (range > U32_MAX) {
+				addr <<= 32;
+				addr |= get_random_int();
+			}
+		}
+		div64_u64_rem(addr, range, &addr);
+		start += addr;
+	}
+
+	return round_up(start, align);
+}
+
 int i915_gem_gtt_insert(struct i915_address_space *vm,
 			struct drm_mm_node *node,
 			u64 size, u64 alignment, unsigned long color,
 			u64 start, u64 end, unsigned int flags)
 {
 	u32 search_flag, alloc_flag;
+	u64 offset;
 	int err;
 
 	lockdep_assert_held(&vm->i915->drm.struct_mutex);
@@ -3629,6 +3656,35 @@ int i915_gem_gtt_insert(struct i915_address_space *vm,
 	if (err != -ENOSPC)
 		return err;
 
+	/* No free space, pick a slot at random.
+	 *
+	 * There is a pathological case here using a GTT shared between
+	 * mmap and GPU (i.e. ggtt/aliasing_ppgtt but not full-ppgtt):
+	 *
+	 *    |<-- 256 MiB aperture -->||<-- 1792 MiB unmappable -->|
+	 *         (64k objects)             (448k objects)
+	 *
+	 * Now imagine that the eviction LRU is ordered top-down (just because
+	 * pathology meets real life), and that we need to evict an object to
+	 * make room inside the aperture. The eviction scan then has to walk
+	 * the 448k list before it finds one within range. And now imagine that
+	 * it has to search for a new hole between every byte inside the memcpy,
+	 * for several simultaneous clients.
+	 *
+	 * On a full-ppgtt system, if we have run out of available space, there
+	 * will be lots and lots of objects in the eviction list! Again,
+	 * searching that LRU list may be slow if we are also applying any
+	 * range restrictions (e.g. restriction to low 4GiB) and so, for
+	 * simplicity and similarilty between different GTT, try the single
+	 * random replacement first.
+	 */
+	offset = random_offset(start, end,
+			       size, alignment ?: I915_GTT_MIN_ALIGNMENT);
+	err = i915_gem_gtt_reserve(vm, node, size, offset, color, flags);
+	if (err != -ENOSPC)
+		return err;
+
+	/* Randomly selected placement is pinned, do a search */
 	err = i915_gem_evict_something(vm, size, alignment, color,
 				       start, end, flags);
 	if (err)
-- 
2.11.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 10+ messages in thread

* ✗ Fi.CI.BAT: warning for series starting with [1/3] drm/i915: Use the MRU stack search after evicting (rev3)
  2017-01-10 21:55 [PATCH 1/3] drm/i915: Use the MRU stack search after evicting Chris Wilson
                   ` (2 preceding siblings ...)
  2017-01-10 22:23 ` ✓ Fi.CI.BAT: success for series starting with [1/3] drm/i915: Use the MRU stack search after evicting Patchwork
@ 2017-01-11 10:53 ` Patchwork
  3 siblings, 0 replies; 10+ messages in thread
From: Patchwork @ 2017-01-11 10:53 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

== Series Details ==

Series: series starting with [1/3] drm/i915: Use the MRU stack search after evicting (rev3)
URL   : https://patchwork.freedesktop.org/series/17784/
State : warning

== Summary ==

Series 17784v3 Series without cover letter
https://patchwork.freedesktop.org/api/1.0/series/17784/revisions/3/mbox/

Test kms_force_connector_basic:
        Subgroup force-connector-state:
                pass       -> DMESG-WARN (fi-snb-2520m)

fi-bdw-5557u     total:246  pass:232  dwarn:0   dfail:0   fail:0   skip:14 
fi-bsw-n3050     total:246  pass:207  dwarn:0   dfail:0   fail:0   skip:39 
fi-bxt-j4205     total:246  pass:224  dwarn:0   dfail:0   fail:0   skip:22 
fi-bxt-t5700     total:82   pass:69   dwarn:0   dfail:0   fail:0   skip:12 
fi-byt-j1900     total:246  pass:219  dwarn:0   dfail:0   fail:0   skip:27 
fi-byt-n2820     total:246  pass:215  dwarn:0   dfail:0   fail:0   skip:31 
fi-hsw-4770      total:246  pass:227  dwarn:0   dfail:0   fail:0   skip:19 
fi-hsw-4770r     total:246  pass:227  dwarn:0   dfail:0   fail:0   skip:19 
fi-ivb-3520m     total:246  pass:225  dwarn:0   dfail:0   fail:0   skip:21 
fi-ivb-3770      total:246  pass:225  dwarn:0   dfail:0   fail:0   skip:21 
fi-kbl-7500u     total:246  pass:225  dwarn:0   dfail:0   fail:0   skip:21 
fi-skl-6260u     total:246  pass:233  dwarn:0   dfail:0   fail:0   skip:13 
fi-skl-6700hq    total:246  pass:226  dwarn:0   dfail:0   fail:0   skip:20 
fi-skl-6700k     total:246  pass:222  dwarn:3   dfail:0   fail:0   skip:21 
fi-skl-6770hq    total:246  pass:233  dwarn:0   dfail:0   fail:0   skip:13 
fi-snb-2520m     total:246  pass:214  dwarn:1   dfail:0   fail:0   skip:31 
fi-snb-2600      total:246  pass:214  dwarn:0   dfail:0   fail:0   skip:32 

abf5260be6dda4ade94e8edf66e133260083f29b drm-tip: 2017y-01m-10d-23h-42m-21s UTC integration manifest
cee7403 drm/i915: Prefer random replacement before eviction search
df15dc1 drm/i915: Extract reserving space in the GTT to a helper
c4a94c9 drm/i915: Use the MRU stack search after evicting

== Logs ==

For more details see: https://intel-gfx-ci.01.org/CI/Patchwork_3475/
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 10+ messages in thread

end of thread, other threads:[~2017-01-11 10:53 UTC | newest]

Thread overview: 10+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-01-10 21:55 [PATCH 1/3] drm/i915: Use the MRU stack search after evicting Chris Wilson
2017-01-10 21:55 ` [PATCH 2/3] drm/i915: Extract reserving space in the GTT to a helper Chris Wilson
2017-01-11  7:06   ` Joonas Lahtinen
2017-01-10 21:55 ` [PATCH 3/3] drm/i915: Prefer random replacement before eviction search Chris Wilson
2017-01-11  7:47   ` Joonas Lahtinen
2017-01-11  9:37     ` Chris Wilson
2017-01-11 10:02     ` [PATCH] " Chris Wilson
2017-01-11 10:28       ` [PATCH v3] " Chris Wilson
2017-01-10 22:23 ` ✓ Fi.CI.BAT: success for series starting with [1/3] drm/i915: Use the MRU stack search after evicting Patchwork
2017-01-11 10:53 ` ✗ Fi.CI.BAT: warning for series starting with [1/3] drm/i915: Use the MRU stack search after evicting (rev3) Patchwork

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.