All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v2 1/3] drm/i915: Use the MRU stack search after evicting
@ 2017-01-11 11:23 Chris Wilson
  2017-01-11 11:23 ` [PATCH v2 2/3] drm/i915: Extract reserving space in the GTT to a helper Chris Wilson
                   ` (3 more replies)
  0 siblings, 4 replies; 7+ messages in thread
From: Chris Wilson @ 2017-01-11 11:23 UTC (permalink / raw)
  To: intel-gfx

When we evict from the GTT to make room for an object, the hole we
create is put onto the MRU stack inside the drm_mm range manager. On the
next search pass, we can speed up a PIN_HIGH allocation by referencing
that stack for the new hole.

v2: Pull together the 3 identical implements (ahem, a couple were
outdated) into a common routine for allocating a node and evicting as
necessary.
v3: Detect invalid calls to i915_gem_gtt_insert()
v4: kerneldoc

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
---
 drivers/gpu/drm/i915/gvt/aperture_gm.c |  33 +++------
 drivers/gpu/drm/i915/i915_gem_gtt.c    | 121 +++++++++++++++++++++++++++------
 drivers/gpu/drm/i915/i915_gem_gtt.h    |   5 ++
 drivers/gpu/drm/i915/i915_vma.c        |  40 ++---------
 4 files changed, 119 insertions(+), 80 deletions(-)

diff --git a/drivers/gpu/drm/i915/gvt/aperture_gm.c b/drivers/gpu/drm/i915/gvt/aperture_gm.c
index 016227d77dd4..a97d56ea3d83 100644
--- a/drivers/gpu/drm/i915/gvt/aperture_gm.c
+++ b/drivers/gpu/drm/i915/gvt/aperture_gm.c
@@ -41,47 +41,34 @@ static int alloc_gm(struct intel_vgpu *vgpu, bool high_gm)
 {
 	struct intel_gvt *gvt = vgpu->gvt;
 	struct drm_i915_private *dev_priv = gvt->dev_priv;
-	u32 alloc_flag, search_flag;
+	unsigned int flags;
 	u64 start, end, size;
 	struct drm_mm_node *node;
-	int retried = 0;
 	int ret;
 
 	if (high_gm) {
-		search_flag = DRM_MM_SEARCH_BELOW;
-		alloc_flag = DRM_MM_CREATE_TOP;
 		node = &vgpu->gm.high_gm_node;
 		size = vgpu_hidden_sz(vgpu);
 		start = gvt_hidden_gmadr_base(gvt);
 		end = gvt_hidden_gmadr_end(gvt);
+		flags = PIN_HIGH;
 	} else {
-		search_flag = DRM_MM_SEARCH_DEFAULT;
-		alloc_flag = DRM_MM_CREATE_DEFAULT;
 		node = &vgpu->gm.low_gm_node;
 		size = vgpu_aperture_sz(vgpu);
 		start = gvt_aperture_gmadr_base(gvt);
 		end = gvt_aperture_gmadr_end(gvt);
+		flags = PIN_MAPPABLE;
 	}
 
 	mutex_lock(&dev_priv->drm.struct_mutex);
-search_again:
-	ret = drm_mm_insert_node_in_range_generic(&dev_priv->ggtt.base.mm,
-						  node, size, 4096,
-						  I915_COLOR_UNEVICTABLE,
-						  start, end, search_flag,
-						  alloc_flag);
-	if (ret) {
-		ret = i915_gem_evict_something(&dev_priv->ggtt.base,
-					       size, 4096,
-					       I915_COLOR_UNEVICTABLE,
-					       start, end, 0);
-		if (ret == 0 && ++retried < 3)
-			goto search_again;
-
-		gvt_err("fail to alloc %s gm space from host, retried %d\n",
-				high_gm ? "high" : "low", retried);
-	}
+	ret = i915_gem_gtt_insert(&dev_priv->ggtt.base, node,
+				  size, 4096, I915_COLOR_UNEVICTABLE,
+				  start, end, flags);
 	mutex_unlock(&dev_priv->drm.struct_mutex);
+	if (ret)
+		gvt_err("fail to alloc %s gm space from host\n",
+			high_gm ? "high" : "low");
+
 	return ret;
 }
 
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 8aca11f5f446..136f90ba95ab 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -23,10 +23,13 @@
  *
  */
 
+#include <linux/log2.h>
 #include <linux/seq_file.h>
 #include <linux/stop_machine.h>
+
 #include <drm/drmP.h>
 #include <drm/i915_drm.h>
+
 #include "i915_drv.h"
 #include "i915_vgpu.h"
 #include "i915_trace.h"
@@ -2032,7 +2035,6 @@ static int gen6_ppgtt_allocate_page_directories(struct i915_hw_ppgtt *ppgtt)
 	struct i915_address_space *vm = &ppgtt->base;
 	struct drm_i915_private *dev_priv = ppgtt->base.i915;
 	struct i915_ggtt *ggtt = &dev_priv->ggtt;
-	bool retried = false;
 	int ret;
 
 	/* PPGTT PDEs reside in the GGTT and consists of 512 entries. The
@@ -2045,29 +2047,14 @@ static int gen6_ppgtt_allocate_page_directories(struct i915_hw_ppgtt *ppgtt)
 	if (ret)
 		return ret;
 
-alloc:
-	ret = drm_mm_insert_node_in_range_generic(&ggtt->base.mm, &ppgtt->node,
-						  GEN6_PD_SIZE, GEN6_PD_ALIGN,
-						  I915_COLOR_UNEVICTABLE,
-						  0, ggtt->base.total,
-						  DRM_MM_TOPDOWN);
-	if (ret == -ENOSPC && !retried) {
-		ret = i915_gem_evict_something(&ggtt->base,
-					       GEN6_PD_SIZE, GEN6_PD_ALIGN,
-					       I915_COLOR_UNEVICTABLE,
-					       0, ggtt->base.total,
-					       0);
-		if (ret)
-			goto err_out;
-
-		retried = true;
-		goto alloc;
-	}
-
+	ret = i915_gem_gtt_insert(&ggtt->base, &ppgtt->node,
+				  GEN6_PD_SIZE, GEN6_PD_ALIGN,
+				  I915_COLOR_UNEVICTABLE,
+				  0, ggtt->base.total,
+				  PIN_HIGH);
 	if (ret)
 		goto err_out;
 
-
 	if (ppgtt->node.start < ggtt->mappable_end)
 		DRM_DEBUG("Forced to use aperture for PDEs\n");
 
@@ -3567,3 +3554,95 @@ i915_get_ggtt_vma_pages(struct i915_vma *vma)
 	return ret;
 }
 
+/**
+ * i915_gem_gtt_insert - insert a node into an address_space (GTT)
+ * @vm - the &struct i915_address_space
+ * @node - the @struct drm_mm_node (typicallay i915_vma.mode)
+ * @size - how much space to allocate inside the GTT,
+ *         must be #I915_GTT_PAGE_SIZE aligned
+ * @alignment - required alignment of starting offset, may be 0 but
+ *              if specified, this must be a power-of-two and at least
+ *              #I915_GTT_MIN_ALIGNMENT
+ * @color - color to apply to node
+ * @start - start of any range restriction inside GTT (0 for all),
+ *          must be #I915_GTT_PAGE_SIZE aligned
+ * @end - end of any range restriction inside GTT (U64_MAX for all),
+ *        must be #I915_GTT_PAGE_SIZE aligned
+ * @flags - control search and eviction behaviour
+ *
+ * i915_gem_gtt_insert() first searches for an available hole into which
+ * is can insert the node. The hole address is aligned to @alignment and
+ * its @size must then fit entirely within the [@start, @end] bounds. The
+ * nodes on either side of the hole must match @color, or else a guard page
+ * will be inserted between the two nodes (or the node evicted). If no
+ * suitable hole is found, then the LRU list of objects within the GTT
+ * is scanned to find the first set of replacement nodes to create the hole.
+ * Those old overlapping nodes are evicted from the GTT (and so must be
+ * rebound before any future use). Any node that is current pinned cannot
+ * be evicted (see i915_vma_pin()). Similar if the node's VMA is currently
+ * active and #PIN_NONBLOCK is specified, that node is also skipped when
+ * searching for an eviction candidate. See i915_gem_evict_something() for
+ * the gory details on the eviction algorithm.
+ *
+ * Returns: 0 on success, -ENOSPC if no suitable hole is found, -EINTR if
+ * asked to wait for eviction and interrupted.
+ */
+int i915_gem_gtt_insert(struct i915_address_space *vm,
+			struct drm_mm_node *node,
+			u64 size, u64 alignment, unsigned long color,
+			u64 start, u64 end, unsigned int flags)
+{
+	u32 search_flag, alloc_flag;
+	int err;
+
+	lockdep_assert_held(&vm->i915->drm.struct_mutex);
+	GEM_BUG_ON(!size);
+	GEM_BUG_ON(!IS_ALIGNED(size, I915_GTT_PAGE_SIZE));
+	GEM_BUG_ON(alignment && !is_power_of_2(alignment));
+	GEM_BUG_ON(alignment && !IS_ALIGNED(alignment, I915_GTT_MIN_ALIGNMENT));
+	GEM_BUG_ON(start >= end);
+	GEM_BUG_ON(!IS_ALIGNED(start, I915_GTT_PAGE_SIZE));
+	GEM_BUG_ON(!IS_ALIGNED(end, I915_GTT_PAGE_SIZE));
+
+	if (unlikely(range_overflows(start, size, end)))
+		return -ENOSPC;
+
+	if (unlikely(round_up(start, alignment) > round_down(end - size, alignment)))
+		return -ENOSPC;
+
+	if (flags & PIN_HIGH) {
+		search_flag = DRM_MM_SEARCH_BELOW;
+		alloc_flag = DRM_MM_CREATE_TOP;
+	} else {
+		search_flag = DRM_MM_SEARCH_DEFAULT;
+		alloc_flag = DRM_MM_CREATE_DEFAULT;
+	}
+
+	/* We only allocate in PAGE_SIZE/GTT_PAGE_SIZE (4096) chunks,
+	 * so we know that we always have a minimum alignment of 4096.
+	 * The drm_mm range manager is optimised to return results
+	 * with zero alignment, so where possible use the optimal
+	 * path.
+	 */
+	GEM_BUG_ON(size & (I915_GTT_MIN_ALIGNMENT - 1));
+	if (alignment <= I915_GTT_MIN_ALIGNMENT)
+		alignment = 0;
+
+	err = drm_mm_insert_node_in_range_generic(&vm->mm, node,
+						  size, alignment, color,
+						  start, end,
+						  search_flag, alloc_flag);
+	if (err != -ENOSPC)
+		return err;
+
+	err = i915_gem_evict_something(vm, size, alignment, color,
+				       start, end, flags);
+	if (err)
+		return err;
+
+	search_flag = DRM_MM_SEARCH_DEFAULT;
+	return drm_mm_insert_node_in_range_generic(&vm->mm, node,
+						   size, alignment, color,
+						   start, end,
+						   search_flag, alloc_flag);
+}
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h
index 34a4fd560fa2..79198352a491 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.h
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.h
@@ -532,6 +532,11 @@ int __must_check i915_gem_gtt_prepare_pages(struct drm_i915_gem_object *obj,
 void i915_gem_gtt_finish_pages(struct drm_i915_gem_object *obj,
 			       struct sg_table *pages);
 
+int i915_gem_gtt_insert(struct i915_address_space *vm,
+			struct drm_mm_node *node,
+			u64 size, u64 alignment, unsigned long color,
+			u64 start, u64 end, unsigned int flags);
+
 /* Flags used by pin/bind&friends. */
 #define PIN_NONBLOCK		BIT(0)
 #define PIN_MAPPABLE		BIT(1)
diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
index 490914f89663..df3750d4c907 100644
--- a/drivers/gpu/drm/i915/i915_vma.c
+++ b/drivers/gpu/drm/i915/i915_vma.c
@@ -431,43 +431,11 @@ i915_vma_insert(struct i915_vma *vma, u64 size, u64 alignment, u64 flags)
 				goto err_unpin;
 		}
 	} else {
-		u32 search_flag, alloc_flag;
-
-		if (flags & PIN_HIGH) {
-			search_flag = DRM_MM_SEARCH_BELOW;
-			alloc_flag = DRM_MM_CREATE_TOP;
-		} else {
-			search_flag = DRM_MM_SEARCH_DEFAULT;
-			alloc_flag = DRM_MM_CREATE_DEFAULT;
-		}
-
-		/* We only allocate in PAGE_SIZE/GTT_PAGE_SIZE (4096) chunks,
-		 * so we know that we always have a minimum alignment of 4096.
-		 * The drm_mm range manager is optimised to return results
-		 * with zero alignment, so where possible use the optimal
-		 * path.
-		 */
-		if (alignment <= I915_GTT_MIN_ALIGNMENT)
-			alignment = 0;
-
-search_free:
-		ret = drm_mm_insert_node_in_range_generic(&vma->vm->mm,
-							  &vma->node,
-							  size, alignment,
-							  obj->cache_level,
-							  start, end,
-							  search_flag,
-							  alloc_flag);
-		if (ret) {
-			ret = i915_gem_evict_something(vma->vm, size, alignment,
-						       obj->cache_level,
-						       start, end,
-						       flags);
-			if (ret == 0)
-				goto search_free;
-
+		ret = i915_gem_gtt_insert(vma->vm, &vma->node,
+					  size, alignment, obj->cache_level,
+					  start, end, flags);
+		if (ret)
 			goto err_unpin;
-		}
 
 		GEM_BUG_ON(vma->node.start < start);
 		GEM_BUG_ON(vma->node.start + vma->node.size > end);
-- 
2.11.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [PATCH v2 2/3] drm/i915: Extract reserving space in the GTT to a helper
  2017-01-11 11:23 [PATCH v2 1/3] drm/i915: Use the MRU stack search after evicting Chris Wilson
@ 2017-01-11 11:23 ` Chris Wilson
  2017-01-12  2:13   ` [igvt-g-dev] " Zhenyu Wang
  2017-01-11 11:23 ` [PATCH v2 3/3] drm/i915: Prefer random replacement before eviction search Chris Wilson
                   ` (2 subsequent siblings)
  3 siblings, 1 reply; 7+ messages in thread
From: Chris Wilson @ 2017-01-11 11:23 UTC (permalink / raw)
  To: intel-gfx; +Cc: igvt-g-dev

Extract drm_mm_reserve_node + calling i915_gem_evict_for_node into its
own routine so that it can be shared rather than duplicated.

v2: Kerneldoc

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Cc: igvt-g-dev@lists.01.org
Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
---
 drivers/gpu/drm/i915/i915_drv.h        |  5 ++--
 drivers/gpu/drm/i915/i915_gem_evict.c  | 33 ++++++++++++----------
 drivers/gpu/drm/i915/i915_gem_gtt.c    | 51 ++++++++++++++++++++++++++++++++++
 drivers/gpu/drm/i915/i915_gem_gtt.h    |  5 ++++
 drivers/gpu/drm/i915/i915_gem_stolen.c |  7 ++---
 drivers/gpu/drm/i915/i915_trace.h      | 16 +++++------
 drivers/gpu/drm/i915/i915_vgpu.c       | 33 ++++++++--------------
 drivers/gpu/drm/i915/i915_vma.c        | 16 ++++-------
 8 files changed, 105 insertions(+), 61 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 89e0038ea26b..a29d138b6906 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -3468,8 +3468,9 @@ int __must_check i915_gem_evict_something(struct i915_address_space *vm,
 					  unsigned cache_level,
 					  u64 start, u64 end,
 					  unsigned flags);
-int __must_check i915_gem_evict_for_vma(struct i915_vma *vma,
-					unsigned int flags);
+int __must_check i915_gem_evict_for_node(struct i915_address_space *vm,
+					 struct drm_mm_node *node,
+					 unsigned int flags);
 int i915_gem_evict_vm(struct i915_address_space *vm, bool do_idle);
 
 /* belongs in i915_gem_gtt.h */
diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c b/drivers/gpu/drm/i915/i915_gem_evict.c
index 6a5415e31acf..50b4645bf627 100644
--- a/drivers/gpu/drm/i915/i915_gem_evict.c
+++ b/drivers/gpu/drm/i915/i915_gem_evict.c
@@ -231,7 +231,8 @@ i915_gem_evict_something(struct i915_address_space *vm,
 
 /**
  * i915_gem_evict_for_vma - Evict vmas to make room for binding a new one
- * @target: address space and range to evict for
+ * @vm: address space to evict from
+ * @target: range (and color) to evict for
  * @flags: additional flags to control the eviction algorithm
  *
  * This function will try to evict vmas that overlap the target node.
@@ -239,18 +240,20 @@ i915_gem_evict_something(struct i915_address_space *vm,
  * To clarify: This is for freeing up virtual address space, not for freeing
  * memory in e.g. the shrinker.
  */
-int i915_gem_evict_for_vma(struct i915_vma *target, unsigned int flags)
+int i915_gem_evict_for_node(struct i915_address_space *vm,
+			    struct drm_mm_node *target,
+			    unsigned int flags)
 {
 	LIST_HEAD(eviction_list);
 	struct drm_mm_node *node;
-	u64 start = target->node.start;
-	u64 end = start + target->node.size;
+	u64 start = target->start;
+	u64 end = start + target->size;
 	struct i915_vma *vma, *next;
 	bool check_color;
 	int ret = 0;
 
-	lockdep_assert_held(&target->vm->i915->drm.struct_mutex);
-	trace_i915_gem_evict_vma(target, flags);
+	lockdep_assert_held(&vm->i915->drm.struct_mutex);
+	trace_i915_gem_evict_node(vm, target, flags);
 
 	/* Retire before we search the active list. Although we have
 	 * reasonable accuracy in our retirement lists, we may have
@@ -258,18 +261,18 @@ int i915_gem_evict_for_vma(struct i915_vma *target, unsigned int flags)
 	 * retiring.
 	 */
 	if (!(flags & PIN_NONBLOCK))
-		i915_gem_retire_requests(target->vm->i915);
+		i915_gem_retire_requests(vm->i915);
 
-	check_color = target->vm->mm.color_adjust;
+	check_color = vm->mm.color_adjust;
 	if (check_color) {
 		/* Expand search to cover neighbouring guard pages (or lack!) */
-		if (start > target->vm->start)
+		if (start > vm->start)
 			start -= I915_GTT_PAGE_SIZE;
-		if (end < target->vm->start + target->vm->total)
+		if (end < vm->start + vm->total)
 			end += I915_GTT_PAGE_SIZE;
 	}
 
-	drm_mm_for_each_node_in_range(node, &target->vm->mm, start, end) {
+	drm_mm_for_each_node_in_range(node, &vm->mm, start, end) {
 		/* If we find any non-objects (!vma), we cannot evict them */
 		if (node->color == I915_COLOR_UNEVICTABLE) {
 			ret = -ENOSPC;
@@ -285,12 +288,12 @@ int i915_gem_evict_for_vma(struct i915_vma *target, unsigned int flags)
 		 * those as well to make room for our guard pages.
 		 */
 		if (check_color) {
-			if (vma->node.start + vma->node.size == target->node.start) {
-				if (vma->node.color == target->node.color)
+			if (vma->node.start + vma->node.size == node->start) {
+				if (vma->node.color == node->color)
 					continue;
 			}
-			if (vma->node.start == target->node.start + target->node.size) {
-				if (vma->node.color == target->node.color)
+			if (vma->node.start == node->start + node->size) {
+				if (vma->node.color == node->color)
 					continue;
 			}
 		}
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 136f90ba95ab..92b907f27986 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -3555,6 +3555,57 @@ i915_get_ggtt_vma_pages(struct i915_vma *vma)
 }
 
 /**
+ * i915_gem_gtt_reserve - reserve a node in an address_space (GTT)
+ * @vm - the &struct i915_address_space
+ * @node - the @struct drm_mm_node (typicallay i915_vma.mode)
+ * @size - how much space to allocate inside the GTT,
+ *         must be #I915_GTT_PAGE_SIZE aligned
+ * @offset - where to insert inside the GTT,
+ *           must be #I915_GTT_MIN_ALIGNMENT aligned, and the node
+ *           (@offset + @size) must fit within the address space
+ * @color - color to apply to node
+ * @flags - control search and eviction behaviour
+ *
+ * i915_gem_gtt_reserve() tries to insert the @node at the exact @offset inside
+ * the address space (using @size and @color). If the @node does not fit, it
+ * tries to evict any overlapping nodes from the GTT, including any
+ * neighbouring nodes if the colors do not match (to ensure guard pages between
+ * differing domains). See i915_gem_evict_for_node() for the gory details
+ * on the eviction algorithm. #PIN_NONBLOCK may used to prevent waiting on
+ * evicting active overlapping objects, and any overlapping node that is pinned
+ * or marked as unevictable will also result in failure.
+ *
+ * Returns: 0 on success, -ENOSPC if no suitable hole is found, -EINTR if
+ * asked to wait for eviction and interrupted.
+ */
+int i915_gem_gtt_reserve(struct i915_address_space *vm,
+			 struct drm_mm_node *node,
+			 u64 size, u64 offset, unsigned long color,
+			 unsigned int flags)
+{
+	int err;
+
+	GEM_BUG_ON(!size);
+	GEM_BUG_ON(!IS_ALIGNED(size, I915_GTT_PAGE_SIZE));
+	GEM_BUG_ON(!IS_ALIGNED(offset, I915_GTT_MIN_ALIGNMENT));
+	GEM_BUG_ON(range_overflows(offset, size, vm->total));
+
+	node->size = size;
+	node->start = offset;
+	node->color = color;
+
+	err = drm_mm_reserve_node(&vm->mm, node);
+	if (err != -ENOSPC)
+		return err;
+
+	err = i915_gem_evict_for_node(vm, node, flags);
+	if (err == 0)
+		err = drm_mm_reserve_node(&vm->mm, node);
+
+	return err;
+}
+
+/**
  * i915_gem_gtt_insert - insert a node into an address_space (GTT)
  * @vm - the &struct i915_address_space
  * @node - the @struct drm_mm_node (typicallay i915_vma.mode)
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h
index 79198352a491..3e031a057f78 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.h
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.h
@@ -532,6 +532,11 @@ int __must_check i915_gem_gtt_prepare_pages(struct drm_i915_gem_object *obj,
 void i915_gem_gtt_finish_pages(struct drm_i915_gem_object *obj,
 			       struct sg_table *pages);
 
+int i915_gem_gtt_reserve(struct i915_address_space *vm,
+			 struct drm_mm_node *node,
+			 u64 size, u64 offset, unsigned long color,
+			 unsigned int flags);
+
 int i915_gem_gtt_insert(struct i915_address_space *vm,
 			struct drm_mm_node *node,
 			u64 size, u64 alignment, unsigned long color,
diff --git a/drivers/gpu/drm/i915/i915_gem_stolen.c b/drivers/gpu/drm/i915/i915_gem_stolen.c
index e5be8e04bf3b..52dbb9bab268 100644
--- a/drivers/gpu/drm/i915/i915_gem_stolen.c
+++ b/drivers/gpu/drm/i915/i915_gem_stolen.c
@@ -694,10 +694,9 @@ i915_gem_object_create_stolen_for_preallocated(struct drm_i915_private *dev_priv
 	 * setting up the GTT space. The actual reservation will occur
 	 * later.
 	 */
-	vma->node.start = gtt_offset;
-	vma->node.size = size;
-
-	ret = drm_mm_reserve_node(&ggtt->base.mm, &vma->node);
+	ret = i915_gem_gtt_reserve(&ggtt->base, &vma->node,
+				   size, gtt_offset, obj->cache_level,
+				   0);
 	if (ret) {
 		DRM_DEBUG_KMS("failed to allocate stolen GTT space\n");
 		goto err_pages;
diff --git a/drivers/gpu/drm/i915/i915_trace.h b/drivers/gpu/drm/i915/i915_trace.h
index 18ae37c411fd..4461df5a94fe 100644
--- a/drivers/gpu/drm/i915/i915_trace.h
+++ b/drivers/gpu/drm/i915/i915_trace.h
@@ -450,9 +450,9 @@ TRACE_EVENT(i915_gem_evict_vm,
 	    TP_printk("dev=%d, vm=%p", __entry->dev, __entry->vm)
 );
 
-TRACE_EVENT(i915_gem_evict_vma,
-	    TP_PROTO(struct i915_vma *vma, unsigned int flags),
-	    TP_ARGS(vma, flags),
+TRACE_EVENT(i915_gem_evict_node,
+	    TP_PROTO(struct i915_address_space *vm, struct drm_mm_node *node, unsigned int flags),
+	    TP_ARGS(vm, node, flags),
 
 	    TP_STRUCT__entry(
 			     __field(u32, dev)
@@ -464,11 +464,11 @@ TRACE_EVENT(i915_gem_evict_vma,
 			    ),
 
 	    TP_fast_assign(
-			   __entry->dev = vma->vm->i915->drm.primary->index;
-			   __entry->vm = vma->vm;
-			   __entry->start = vma->node.start;
-			   __entry->size = vma->node.size;
-			   __entry->color = vma->node.color;
+			   __entry->dev = vm->i915->drm.primary->index;
+			   __entry->vm = vm;
+			   __entry->start = node->start;
+			   __entry->size = node->size;
+			   __entry->color = node->color;
 			   __entry->flags = flags;
 			  ),
 
diff --git a/drivers/gpu/drm/i915/i915_vgpu.c b/drivers/gpu/drm/i915/i915_vgpu.c
index dae340cfc6c7..f1ad4fbb5ba7 100644
--- a/drivers/gpu/drm/i915/i915_vgpu.c
+++ b/drivers/gpu/drm/i915/i915_vgpu.c
@@ -116,22 +116,20 @@ void intel_vgt_deballoon(struct drm_i915_private *dev_priv)
 	memset(&bl_info, 0, sizeof(bl_info));
 }
 
-static int vgt_balloon_space(struct drm_mm *mm,
+static int vgt_balloon_space(struct i915_ggtt *ggtt,
 			     struct drm_mm_node *node,
 			     unsigned long start, unsigned long end)
 {
 	unsigned long size = end - start;
 
-	if (start == end)
+	if (start <= end)
 		return -EINVAL;
 
 	DRM_INFO("balloon space: range [ 0x%lx - 0x%lx ] %lu KiB.\n",
 		 start, end, size / 1024);
-
-	node->start = start;
-	node->size = size;
-
-	return drm_mm_reserve_node(mm, node);
+	return i915_gem_gtt_reserve(&ggtt->base, node,
+				    size, start, I915_COLOR_UNEVICTABLE,
+				    0);
 }
 
 /**
@@ -214,10 +212,8 @@ int intel_vgt_balloon(struct drm_i915_private *dev_priv)
 
 	/* Unmappable graphic memory ballooning */
 	if (unmappable_base > ggtt->mappable_end) {
-		ret = vgt_balloon_space(&ggtt->base.mm,
-					&bl_info.space[2],
-					ggtt->mappable_end,
-					unmappable_base);
+		ret = vgt_balloon_space(ggtt, &bl_info.space[2],
+					ggtt->mappable_end, unmappable_base);
 
 		if (ret)
 			goto err;
@@ -228,18 +224,15 @@ int intel_vgt_balloon(struct drm_i915_private *dev_priv)
 	 * because it is reserved to the guard page.
 	 */
 	if (unmappable_end < ggtt_end - PAGE_SIZE) {
-		ret = vgt_balloon_space(&ggtt->base.mm,
-					&bl_info.space[3],
-					unmappable_end,
-					ggtt_end - PAGE_SIZE);
+		ret = vgt_balloon_space(ggtt, &bl_info.space[3],
+					unmappable_end, ggtt_end - PAGE_SIZE);
 		if (ret)
 			goto err;
 	}
 
 	/* Mappable graphic memory ballooning */
 	if (mappable_base > ggtt->base.start) {
-		ret = vgt_balloon_space(&ggtt->base.mm,
-					&bl_info.space[0],
+		ret = vgt_balloon_space(ggtt, &bl_info.space[0],
 					ggtt->base.start, mappable_base);
 
 		if (ret)
@@ -247,10 +240,8 @@ int intel_vgt_balloon(struct drm_i915_private *dev_priv)
 	}
 
 	if (mappable_end < ggtt->mappable_end) {
-		ret = vgt_balloon_space(&ggtt->base.mm,
-					&bl_info.space[1],
-					mappable_end,
-					ggtt->mappable_end);
+		ret = vgt_balloon_space(ggtt, &bl_info.space[1],
+					mappable_end, ggtt->mappable_end);
 
 		if (ret)
 			goto err;
diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
index df3750d4c907..b74eeb73ae41 100644
--- a/drivers/gpu/drm/i915/i915_vma.c
+++ b/drivers/gpu/drm/i915/i915_vma.c
@@ -419,17 +419,11 @@ i915_vma_insert(struct i915_vma *vma, u64 size, u64 alignment, u64 flags)
 			goto err_unpin;
 		}
 
-		vma->node.start = offset;
-		vma->node.size = size;
-		vma->node.color = obj->cache_level;
-		ret = drm_mm_reserve_node(&vma->vm->mm, &vma->node);
-		if (ret) {
-			ret = i915_gem_evict_for_vma(vma, flags);
-			if (ret == 0)
-				ret = drm_mm_reserve_node(&vma->vm->mm, &vma->node);
-			if (ret)
-				goto err_unpin;
-		}
+		ret = i915_gem_gtt_reserve(vma->vm, &vma->node,
+					   size, offset, obj->cache_level,
+					   flags);
+		if (ret)
+			goto err_unpin;
 	} else {
 		ret = i915_gem_gtt_insert(vma->vm, &vma->node,
 					  size, alignment, obj->cache_level,
-- 
2.11.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [PATCH v2 3/3] drm/i915: Prefer random replacement before eviction search
  2017-01-11 11:23 [PATCH v2 1/3] drm/i915: Use the MRU stack search after evicting Chris Wilson
  2017-01-11 11:23 ` [PATCH v2 2/3] drm/i915: Extract reserving space in the GTT to a helper Chris Wilson
@ 2017-01-11 11:23 ` Chris Wilson
  2017-01-11 11:53 ` ✓ Fi.CI.BAT: success for series starting with [v2,1/3] drm/i915: Use the MRU stack search after evicting Patchwork
  2017-01-11 12:04 ` [PATCH v2 1/3] " Joonas Lahtinen
  3 siblings, 0 replies; 7+ messages in thread
From: Chris Wilson @ 2017-01-11 11:23 UTC (permalink / raw)
  To: intel-gfx

Performing an eviction search can be very, very slow especially for a
range restricted replacement. For example, a workload like
gem_concurrent_blit will populate the entire GTT and then cause aperture
thrashing. Since the GTT is a mix of active and inactive tiny objects,
we have to search through almost 400k objects before finding anything
inside the mappable region, and as this search is required before every
operation performance falls off a cliff.

Instead of performing the full search, we do a trial replacement of the
node at a random location fitting the specified restrictions. We lose
the strict LRU property of the GTT in exchange for avoiding the slow
search (several orders of runtime improvement for gem_concurrent_blit
4KiB-global-gtt, e.g. from 5000s to 20s). The loss of LRU replacement is
(later) mitigated firstly by only doing replacement if we find no
freespace and secondly by execbuf doing a PIN_NONBLOCK search first before
it starts thrashing (i.e. the random replacement will only occur from the
already inactive set of objects).

v2: Ascii-art, and check preconditionst
v3: Rephrase final sentence in comment to explain why we don't both with
if (i915_is_ggtt(vm)) for preferring random replacement.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
---
 drivers/gpu/drm/i915/i915_gem_gtt.c | 59 ++++++++++++++++++++++++++++++++++++-
 1 file changed, 58 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 92b907f27986..0c48d6286419 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -24,6 +24,7 @@
  */
 
 #include <linux/log2.h>
+#include <linux/random.h>
 #include <linux/seq_file.h>
 #include <linux/stop_machine.h>
 
@@ -3605,6 +3606,31 @@ int i915_gem_gtt_reserve(struct i915_address_space *vm,
 	return err;
 }
 
+static u64 random_offset(u64 start, u64 end, u64 len, u64 align)
+{
+	u64 range, addr;
+
+	GEM_BUG_ON(range_overflows(start, len, end));
+	GEM_BUG_ON(round_up(start, align) > round_down(end - len, align));
+
+	range = round_down(end - len, align) - round_up(start, align);
+	if (range) {
+		if (sizeof(unsigned long) == sizeof(u64)) {
+			addr = get_random_long();
+		} else {
+			addr = get_random_int();
+			if (range > U32_MAX) {
+				addr <<= 32;
+				addr |= get_random_int();
+			}
+		}
+		div64_u64_rem(addr, range, &addr);
+		start += addr;
+	}
+
+	return round_up(start, align);
+}
+
 /**
  * i915_gem_gtt_insert - insert a node into an address_space (GTT)
  * @vm - the &struct i915_address_space
@@ -3626,7 +3652,8 @@ int i915_gem_gtt_reserve(struct i915_address_space *vm,
  * its @size must then fit entirely within the [@start, @end] bounds. The
  * nodes on either side of the hole must match @color, or else a guard page
  * will be inserted between the two nodes (or the node evicted). If no
- * suitable hole is found, then the LRU list of objects within the GTT
+ * suitable hole is found, first a victim is randomly selected and tested
+ * for eviction, otherwise then the LRU list of objects within the GTT
  * is scanned to find the first set of replacement nodes to create the hole.
  * Those old overlapping nodes are evicted from the GTT (and so must be
  * rebound before any future use). Any node that is current pinned cannot
@@ -3644,6 +3671,7 @@ int i915_gem_gtt_insert(struct i915_address_space *vm,
 			u64 start, u64 end, unsigned int flags)
 {
 	u32 search_flag, alloc_flag;
+	u64 offset;
 	int err;
 
 	lockdep_assert_held(&vm->i915->drm.struct_mutex);
@@ -3686,6 +3714,35 @@ int i915_gem_gtt_insert(struct i915_address_space *vm,
 	if (err != -ENOSPC)
 		return err;
 
+	/* No free space, pick a slot at random.
+	 *
+	 * There is a pathological case here using a GTT shared between
+	 * mmap and GPU (i.e. ggtt/aliasing_ppgtt but not full-ppgtt):
+	 *
+	 *    |<-- 256 MiB aperture -->||<-- 1792 MiB unmappable -->|
+	 *         (64k objects)             (448k objects)
+	 *
+	 * Now imagine that the eviction LRU is ordered top-down (just because
+	 * pathology meets real life), and that we need to evict an object to
+	 * make room inside the aperture. The eviction scan then has to walk
+	 * the 448k list before it finds one within range. And now imagine that
+	 * it has to search for a new hole between every byte inside the memcpy,
+	 * for several simultaneous clients.
+	 *
+	 * On a full-ppgtt system, if we have run out of available space, there
+	 * will be lots and lots of objects in the eviction list! Again,
+	 * searching that LRU list may be slow if we are also applying any
+	 * range restrictions (e.g. restriction to low 4GiB) and so, for
+	 * simplicity and similarilty between different GTT, try the single
+	 * random replacement first.
+	 */
+	offset = random_offset(start, end,
+			       size, alignment ?: I915_GTT_MIN_ALIGNMENT);
+	err = i915_gem_gtt_reserve(vm, node, size, offset, color, flags);
+	if (err != -ENOSPC)
+		return err;
+
+	/* Randomly selected placement is pinned, do a search */
 	err = i915_gem_evict_something(vm, size, alignment, color,
 				       start, end, flags);
 	if (err)
-- 
2.11.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 7+ messages in thread

* ✓ Fi.CI.BAT: success for series starting with [v2,1/3] drm/i915: Use the MRU stack search after evicting
  2017-01-11 11:23 [PATCH v2 1/3] drm/i915: Use the MRU stack search after evicting Chris Wilson
  2017-01-11 11:23 ` [PATCH v2 2/3] drm/i915: Extract reserving space in the GTT to a helper Chris Wilson
  2017-01-11 11:23 ` [PATCH v2 3/3] drm/i915: Prefer random replacement before eviction search Chris Wilson
@ 2017-01-11 11:53 ` Patchwork
  2017-01-11 12:04 ` [PATCH v2 1/3] " Joonas Lahtinen
  3 siblings, 0 replies; 7+ messages in thread
From: Patchwork @ 2017-01-11 11:53 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

== Series Details ==

Series: series starting with [v2,1/3] drm/i915: Use the MRU stack search after evicting
URL   : https://patchwork.freedesktop.org/series/17822/
State : success

== Summary ==

Series 17822v1 Series without cover letter
https://patchwork.freedesktop.org/api/1.0/series/17822/revisions/1/mbox/


fi-bdw-5557u     total:246  pass:232  dwarn:0   dfail:0   fail:0   skip:14 
fi-bsw-n3050     total:246  pass:207  dwarn:0   dfail:0   fail:0   skip:39 
fi-bxt-j4205     total:246  pass:224  dwarn:0   dfail:0   fail:0   skip:22 
fi-bxt-t5700     total:82   pass:69   dwarn:0   dfail:0   fail:0   skip:12 
fi-byt-j1900     total:246  pass:219  dwarn:0   dfail:0   fail:0   skip:27 
fi-byt-n2820     total:246  pass:215  dwarn:0   dfail:0   fail:0   skip:31 
fi-hsw-4770      total:246  pass:227  dwarn:0   dfail:0   fail:0   skip:19 
fi-hsw-4770r     total:246  pass:227  dwarn:0   dfail:0   fail:0   skip:19 
fi-ivb-3520m     total:246  pass:225  dwarn:0   dfail:0   fail:0   skip:21 
fi-ivb-3770      total:246  pass:225  dwarn:0   dfail:0   fail:0   skip:21 
fi-kbl-7500u     total:246  pass:225  dwarn:0   dfail:0   fail:0   skip:21 
fi-skl-6260u     total:246  pass:233  dwarn:0   dfail:0   fail:0   skip:13 
fi-skl-6700hq    total:246  pass:226  dwarn:0   dfail:0   fail:0   skip:20 
fi-skl-6700k     total:246  pass:222  dwarn:3   dfail:0   fail:0   skip:21 
fi-skl-6770hq    total:246  pass:233  dwarn:0   dfail:0   fail:0   skip:13 
fi-snb-2520m     total:246  pass:215  dwarn:0   dfail:0   fail:0   skip:31 
fi-snb-2600      total:246  pass:214  dwarn:0   dfail:0   fail:0   skip:32 

abf5260be6dda4ade94e8edf66e133260083f29b drm-tip: 2017y-01m-10d-23h-42m-21s UTC integration manifest
0db2997 drm/i915: Prefer random replacement before eviction search
a2d9bf3 drm/i915: Extract reserving space in the GTT to a helper
be9c2e1 drm/i915: Use the MRU stack search after evicting

== Logs ==

For more details see: https://intel-gfx-ci.01.org/CI/Patchwork_3476/
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH v2 1/3] drm/i915: Use the MRU stack search after evicting
  2017-01-11 11:23 [PATCH v2 1/3] drm/i915: Use the MRU stack search after evicting Chris Wilson
                   ` (2 preceding siblings ...)
  2017-01-11 11:53 ` ✓ Fi.CI.BAT: success for series starting with [v2,1/3] drm/i915: Use the MRU stack search after evicting Patchwork
@ 2017-01-11 12:04 ` Joonas Lahtinen
  2017-01-11 12:25   ` Chris Wilson
  3 siblings, 1 reply; 7+ messages in thread
From: Joonas Lahtinen @ 2017-01-11 12:04 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx

On ke, 2017-01-11 at 11:23 +0000, Chris Wilson wrote:
> When we evict from the GTT to make room for an object, the hole we
> create is put onto the MRU stack inside the drm_mm range manager. On the
> next search pass, we can speed up a PIN_HIGH allocation by referencing
> that stack for the new hole.
> 
> v2: Pull together the 3 identical implements (ahem, a couple were
> outdated) into a common routine for allocating a node and evicting as
> necessary.
> v3: Detect invalid calls to i915_gem_gtt_insert()
> v4: kerneldoc
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>

<SNIP>

> +/**
> + * i915_gem_gtt_insert - insert a node into an address_space (GTT)
> + * @vm - the &struct i915_address_space

mixing &struct and @struct, I guess you meant &struct in later line
too.

> + * @node - the @struct drm_mm_node (typicallay i915_vma.mode)

"typicallly" and "i915_vma.node"

> + * @size - how much space to allocate inside the GTT,
> + *         must be #I915_GTT_PAGE_SIZE aligned
> + * @alignment - required alignment of starting offset, may be 0 but
> + *              if specified, this must be a power-of-two and at least
> + *              #I915_GTT_MIN_ALIGNMENT
> + * @color - color to apply to node
> + * @start - start of any range restriction inside GTT (0 for all),
> + *          must be #I915_GTT_PAGE_SIZE aligned
> + * @end - end of any range restriction inside GTT (U64_MAX for all),
> + *        must be #I915_GTT_PAGE_SIZE aligned
> + * @flags - control search and eviction behaviour
> + *
> + * i915_gem_gtt_insert() first searches for an available hole into which
> + * is can insert the node. The hole address is aligned to @alignment and
> + * its @size must then fit entirely within the [@start, @end] bounds. The
> + * nodes on either side of the hole must match @color, or else a guard page
> + * will be inserted between the two nodes (or the node evicted). If no
> + * suitable hole is found, then the LRU list of objects within the GTT
> + * is scanned to find the first set of replacement nodes to create the hole.
> + * Those old overlapping nodes are evicted from the GTT (and so must be
> + * rebound before any future use). Any node that is current pinned cannot

"currently"

> + * be evicted (see i915_vma_pin()). Similar if the node's VMA is currently
> + * active and #PIN_NONBLOCK is specified, that node is also skipped when
> + * searching for an eviction candidate. See i915_gem_evict_something() for
> + * the gory details on the eviction algorithm.
> + *
> + * Returns: 0 on success, -ENOSPC if no suitable hole is found, -EINTR if
> + * asked to wait for eviction and interrupted.
> + */

Fit those fixed, good to merge.

Regards, Joonas
-- 
Joonas Lahtinen
Open Source Technology Center
Intel Corporation
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH v2 1/3] drm/i915: Use the MRU stack search after evicting
  2017-01-11 12:04 ` [PATCH v2 1/3] " Joonas Lahtinen
@ 2017-01-11 12:25   ` Chris Wilson
  0 siblings, 0 replies; 7+ messages in thread
From: Chris Wilson @ 2017-01-11 12:25 UTC (permalink / raw)
  To: Joonas Lahtinen; +Cc: intel-gfx

On Wed, Jan 11, 2017 at 02:04:53PM +0200, Joonas Lahtinen wrote:
> On ke, 2017-01-11 at 11:23 +0000, Chris Wilson wrote:
> > When we evict from the GTT to make room for an object, the hole we
> > create is put onto the MRU stack inside the drm_mm range manager. On the
> > next search pass, we can speed up a PIN_HIGH allocation by referencing
> > that stack for the new hole.
> > 
> > v2: Pull together the 3 identical implements (ahem, a couple were
> > outdated) into a common routine for allocating a node and evicting as
> > necessary.
> > v3: Detect invalid calls to i915_gem_gtt_insert()
> > v4: kerneldoc
> > 
> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
> 
> <SNIP>
> 
> > +/**
> > + * i915_gem_gtt_insert - insert a node into an address_space (GTT)
> > + * @vm - the &struct i915_address_space
> 
> mixing &struct and @struct, I guess you meant &struct in later line
> too.
> 
> > + * @node - the @struct drm_mm_node (typicallay i915_vma.mode)
> 
> "typicallly" and "i915_vma.node"
> 
> > + * @size - how much space to allocate inside the GTT,
> > + *         must be #I915_GTT_PAGE_SIZE aligned
> > + * @alignment - required alignment of starting offset, may be 0 but
> > + *              if specified, this must be a power-of-two and at least
> > + *              #I915_GTT_MIN_ALIGNMENT
> > + * @color - color to apply to node
> > + * @start - start of any range restriction inside GTT (0 for all),
> > + *          must be #I915_GTT_PAGE_SIZE aligned
> > + * @end - end of any range restriction inside GTT (U64_MAX for all),
> > + *        must be #I915_GTT_PAGE_SIZE aligned
> > + * @flags - control search and eviction behaviour
> > + *
> > + * i915_gem_gtt_insert() first searches for an available hole into which
> > + * is can insert the node. The hole address is aligned to @alignment and
> > + * its @size must then fit entirely within the [@start, @end] bounds. The
> > + * nodes on either side of the hole must match @color, or else a guard page
> > + * will be inserted between the two nodes (or the node evicted). If no
> > + * suitable hole is found, then the LRU list of objects within the GTT
> > + * is scanned to find the first set of replacement nodes to create the hole.
> > + * Those old overlapping nodes are evicted from the GTT (and so must be
> > + * rebound before any future use). Any node that is current pinned cannot
> 
> "currently"
> 
> > + * be evicted (see i915_vma_pin()). Similar if the node's VMA is currently
> > + * active and #PIN_NONBLOCK is specified, that node is also skipped when
> > + * searching for an eviction candidate. See i915_gem_evict_something() for
> > + * the gory details on the eviction algorithm.
> > + *
> > + * Returns: 0 on success, -ENOSPC if no suitable hole is found, -EINTR if
> > + * asked to wait for eviction and interrupted.
> > + */
> 
> Fit those fixed, good to merge.

Thanks for proof reading.
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [igvt-g-dev] [PATCH v2 2/3] drm/i915: Extract reserving space in the GTT to a helper
  2017-01-11 11:23 ` [PATCH v2 2/3] drm/i915: Extract reserving space in the GTT to a helper Chris Wilson
@ 2017-01-12  2:13   ` Zhenyu Wang
  0 siblings, 0 replies; 7+ messages in thread
From: Zhenyu Wang @ 2017-01-12  2:13 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx, igvt-g-dev


[-- Attachment #1.1: Type: text/plain, Size: 14520 bytes --]

On 2017.01.11 11:23:11 +0000, Chris Wilson wrote:
> Extract drm_mm_reserve_node + calling i915_gem_evict_for_node into its
> own routine so that it can be shared rather than duplicated.
> 
> v2: Kerneldoc
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
> Cc: igvt-g-dev@lists.01.org
> Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
> ---

Looks good for vgpu balloon change.

Acked-by: Zhenyu Wang <zhenyuw@linux.intel.com>

>  drivers/gpu/drm/i915/i915_drv.h        |  5 ++--
>  drivers/gpu/drm/i915/i915_gem_evict.c  | 33 ++++++++++++----------
>  drivers/gpu/drm/i915/i915_gem_gtt.c    | 51 ++++++++++++++++++++++++++++++++++
>  drivers/gpu/drm/i915/i915_gem_gtt.h    |  5 ++++
>  drivers/gpu/drm/i915/i915_gem_stolen.c |  7 ++---
>  drivers/gpu/drm/i915/i915_trace.h      | 16 +++++------
>  drivers/gpu/drm/i915/i915_vgpu.c       | 33 ++++++++--------------
>  drivers/gpu/drm/i915/i915_vma.c        | 16 ++++-------
>  8 files changed, 105 insertions(+), 61 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 89e0038ea26b..a29d138b6906 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -3468,8 +3468,9 @@ int __must_check i915_gem_evict_something(struct i915_address_space *vm,
>  					  unsigned cache_level,
>  					  u64 start, u64 end,
>  					  unsigned flags);
> -int __must_check i915_gem_evict_for_vma(struct i915_vma *vma,
> -					unsigned int flags);
> +int __must_check i915_gem_evict_for_node(struct i915_address_space *vm,
> +					 struct drm_mm_node *node,
> +					 unsigned int flags);
>  int i915_gem_evict_vm(struct i915_address_space *vm, bool do_idle);
>  
>  /* belongs in i915_gem_gtt.h */
> diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c b/drivers/gpu/drm/i915/i915_gem_evict.c
> index 6a5415e31acf..50b4645bf627 100644
> --- a/drivers/gpu/drm/i915/i915_gem_evict.c
> +++ b/drivers/gpu/drm/i915/i915_gem_evict.c
> @@ -231,7 +231,8 @@ i915_gem_evict_something(struct i915_address_space *vm,
>  
>  /**
>   * i915_gem_evict_for_vma - Evict vmas to make room for binding a new one
> - * @target: address space and range to evict for
> + * @vm: address space to evict from
> + * @target: range (and color) to evict for
>   * @flags: additional flags to control the eviction algorithm
>   *
>   * This function will try to evict vmas that overlap the target node.
> @@ -239,18 +240,20 @@ i915_gem_evict_something(struct i915_address_space *vm,
>   * To clarify: This is for freeing up virtual address space, not for freeing
>   * memory in e.g. the shrinker.
>   */
> -int i915_gem_evict_for_vma(struct i915_vma *target, unsigned int flags)
> +int i915_gem_evict_for_node(struct i915_address_space *vm,
> +			    struct drm_mm_node *target,
> +			    unsigned int flags)
>  {
>  	LIST_HEAD(eviction_list);
>  	struct drm_mm_node *node;
> -	u64 start = target->node.start;
> -	u64 end = start + target->node.size;
> +	u64 start = target->start;
> +	u64 end = start + target->size;
>  	struct i915_vma *vma, *next;
>  	bool check_color;
>  	int ret = 0;
>  
> -	lockdep_assert_held(&target->vm->i915->drm.struct_mutex);
> -	trace_i915_gem_evict_vma(target, flags);
> +	lockdep_assert_held(&vm->i915->drm.struct_mutex);
> +	trace_i915_gem_evict_node(vm, target, flags);
>  
>  	/* Retire before we search the active list. Although we have
>  	 * reasonable accuracy in our retirement lists, we may have
> @@ -258,18 +261,18 @@ int i915_gem_evict_for_vma(struct i915_vma *target, unsigned int flags)
>  	 * retiring.
>  	 */
>  	if (!(flags & PIN_NONBLOCK))
> -		i915_gem_retire_requests(target->vm->i915);
> +		i915_gem_retire_requests(vm->i915);
>  
> -	check_color = target->vm->mm.color_adjust;
> +	check_color = vm->mm.color_adjust;
>  	if (check_color) {
>  		/* Expand search to cover neighbouring guard pages (or lack!) */
> -		if (start > target->vm->start)
> +		if (start > vm->start)
>  			start -= I915_GTT_PAGE_SIZE;
> -		if (end < target->vm->start + target->vm->total)
> +		if (end < vm->start + vm->total)
>  			end += I915_GTT_PAGE_SIZE;
>  	}
>  
> -	drm_mm_for_each_node_in_range(node, &target->vm->mm, start, end) {
> +	drm_mm_for_each_node_in_range(node, &vm->mm, start, end) {
>  		/* If we find any non-objects (!vma), we cannot evict them */
>  		if (node->color == I915_COLOR_UNEVICTABLE) {
>  			ret = -ENOSPC;
> @@ -285,12 +288,12 @@ int i915_gem_evict_for_vma(struct i915_vma *target, unsigned int flags)
>  		 * those as well to make room for our guard pages.
>  		 */
>  		if (check_color) {
> -			if (vma->node.start + vma->node.size == target->node.start) {
> -				if (vma->node.color == target->node.color)
> +			if (vma->node.start + vma->node.size == node->start) {
> +				if (vma->node.color == node->color)
>  					continue;
>  			}
> -			if (vma->node.start == target->node.start + target->node.size) {
> -				if (vma->node.color == target->node.color)
> +			if (vma->node.start == node->start + node->size) {
> +				if (vma->node.color == node->color)
>  					continue;
>  			}
>  		}
> diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
> index 136f90ba95ab..92b907f27986 100644
> --- a/drivers/gpu/drm/i915/i915_gem_gtt.c
> +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
> @@ -3555,6 +3555,57 @@ i915_get_ggtt_vma_pages(struct i915_vma *vma)
>  }
>  
>  /**
> + * i915_gem_gtt_reserve - reserve a node in an address_space (GTT)
> + * @vm - the &struct i915_address_space
> + * @node - the @struct drm_mm_node (typicallay i915_vma.mode)
> + * @size - how much space to allocate inside the GTT,
> + *         must be #I915_GTT_PAGE_SIZE aligned
> + * @offset - where to insert inside the GTT,
> + *           must be #I915_GTT_MIN_ALIGNMENT aligned, and the node
> + *           (@offset + @size) must fit within the address space
> + * @color - color to apply to node
> + * @flags - control search and eviction behaviour
> + *
> + * i915_gem_gtt_reserve() tries to insert the @node at the exact @offset inside
> + * the address space (using @size and @color). If the @node does not fit, it
> + * tries to evict any overlapping nodes from the GTT, including any
> + * neighbouring nodes if the colors do not match (to ensure guard pages between
> + * differing domains). See i915_gem_evict_for_node() for the gory details
> + * on the eviction algorithm. #PIN_NONBLOCK may used to prevent waiting on
> + * evicting active overlapping objects, and any overlapping node that is pinned
> + * or marked as unevictable will also result in failure.
> + *
> + * Returns: 0 on success, -ENOSPC if no suitable hole is found, -EINTR if
> + * asked to wait for eviction and interrupted.
> + */
> +int i915_gem_gtt_reserve(struct i915_address_space *vm,
> +			 struct drm_mm_node *node,
> +			 u64 size, u64 offset, unsigned long color,
> +			 unsigned int flags)
> +{
> +	int err;
> +
> +	GEM_BUG_ON(!size);
> +	GEM_BUG_ON(!IS_ALIGNED(size, I915_GTT_PAGE_SIZE));
> +	GEM_BUG_ON(!IS_ALIGNED(offset, I915_GTT_MIN_ALIGNMENT));
> +	GEM_BUG_ON(range_overflows(offset, size, vm->total));
> +
> +	node->size = size;
> +	node->start = offset;
> +	node->color = color;
> +
> +	err = drm_mm_reserve_node(&vm->mm, node);
> +	if (err != -ENOSPC)
> +		return err;
> +
> +	err = i915_gem_evict_for_node(vm, node, flags);
> +	if (err == 0)
> +		err = drm_mm_reserve_node(&vm->mm, node);
> +
> +	return err;
> +}
> +
> +/**
>   * i915_gem_gtt_insert - insert a node into an address_space (GTT)
>   * @vm - the &struct i915_address_space
>   * @node - the @struct drm_mm_node (typicallay i915_vma.mode)
> diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h
> index 79198352a491..3e031a057f78 100644
> --- a/drivers/gpu/drm/i915/i915_gem_gtt.h
> +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h
> @@ -532,6 +532,11 @@ int __must_check i915_gem_gtt_prepare_pages(struct drm_i915_gem_object *obj,
>  void i915_gem_gtt_finish_pages(struct drm_i915_gem_object *obj,
>  			       struct sg_table *pages);
>  
> +int i915_gem_gtt_reserve(struct i915_address_space *vm,
> +			 struct drm_mm_node *node,
> +			 u64 size, u64 offset, unsigned long color,
> +			 unsigned int flags);
> +
>  int i915_gem_gtt_insert(struct i915_address_space *vm,
>  			struct drm_mm_node *node,
>  			u64 size, u64 alignment, unsigned long color,
> diff --git a/drivers/gpu/drm/i915/i915_gem_stolen.c b/drivers/gpu/drm/i915/i915_gem_stolen.c
> index e5be8e04bf3b..52dbb9bab268 100644
> --- a/drivers/gpu/drm/i915/i915_gem_stolen.c
> +++ b/drivers/gpu/drm/i915/i915_gem_stolen.c
> @@ -694,10 +694,9 @@ i915_gem_object_create_stolen_for_preallocated(struct drm_i915_private *dev_priv
>  	 * setting up the GTT space. The actual reservation will occur
>  	 * later.
>  	 */
> -	vma->node.start = gtt_offset;
> -	vma->node.size = size;
> -
> -	ret = drm_mm_reserve_node(&ggtt->base.mm, &vma->node);
> +	ret = i915_gem_gtt_reserve(&ggtt->base, &vma->node,
> +				   size, gtt_offset, obj->cache_level,
> +				   0);
>  	if (ret) {
>  		DRM_DEBUG_KMS("failed to allocate stolen GTT space\n");
>  		goto err_pages;
> diff --git a/drivers/gpu/drm/i915/i915_trace.h b/drivers/gpu/drm/i915/i915_trace.h
> index 18ae37c411fd..4461df5a94fe 100644
> --- a/drivers/gpu/drm/i915/i915_trace.h
> +++ b/drivers/gpu/drm/i915/i915_trace.h
> @@ -450,9 +450,9 @@ TRACE_EVENT(i915_gem_evict_vm,
>  	    TP_printk("dev=%d, vm=%p", __entry->dev, __entry->vm)
>  );
>  
> -TRACE_EVENT(i915_gem_evict_vma,
> -	    TP_PROTO(struct i915_vma *vma, unsigned int flags),
> -	    TP_ARGS(vma, flags),
> +TRACE_EVENT(i915_gem_evict_node,
> +	    TP_PROTO(struct i915_address_space *vm, struct drm_mm_node *node, unsigned int flags),
> +	    TP_ARGS(vm, node, flags),
>  
>  	    TP_STRUCT__entry(
>  			     __field(u32, dev)
> @@ -464,11 +464,11 @@ TRACE_EVENT(i915_gem_evict_vma,
>  			    ),
>  
>  	    TP_fast_assign(
> -			   __entry->dev = vma->vm->i915->drm.primary->index;
> -			   __entry->vm = vma->vm;
> -			   __entry->start = vma->node.start;
> -			   __entry->size = vma->node.size;
> -			   __entry->color = vma->node.color;
> +			   __entry->dev = vm->i915->drm.primary->index;
> +			   __entry->vm = vm;
> +			   __entry->start = node->start;
> +			   __entry->size = node->size;
> +			   __entry->color = node->color;
>  			   __entry->flags = flags;
>  			  ),
>  
> diff --git a/drivers/gpu/drm/i915/i915_vgpu.c b/drivers/gpu/drm/i915/i915_vgpu.c
> index dae340cfc6c7..f1ad4fbb5ba7 100644
> --- a/drivers/gpu/drm/i915/i915_vgpu.c
> +++ b/drivers/gpu/drm/i915/i915_vgpu.c
> @@ -116,22 +116,20 @@ void intel_vgt_deballoon(struct drm_i915_private *dev_priv)
>  	memset(&bl_info, 0, sizeof(bl_info));
>  }
>  
> -static int vgt_balloon_space(struct drm_mm *mm,
> +static int vgt_balloon_space(struct i915_ggtt *ggtt,
>  			     struct drm_mm_node *node,
>  			     unsigned long start, unsigned long end)
>  {
>  	unsigned long size = end - start;
>  
> -	if (start == end)
> +	if (start <= end)
>  		return -EINVAL;
>  
>  	DRM_INFO("balloon space: range [ 0x%lx - 0x%lx ] %lu KiB.\n",
>  		 start, end, size / 1024);
> -
> -	node->start = start;
> -	node->size = size;
> -
> -	return drm_mm_reserve_node(mm, node);
> +	return i915_gem_gtt_reserve(&ggtt->base, node,
> +				    size, start, I915_COLOR_UNEVICTABLE,
> +				    0);
>  }
>  
>  /**
> @@ -214,10 +212,8 @@ int intel_vgt_balloon(struct drm_i915_private *dev_priv)
>  
>  	/* Unmappable graphic memory ballooning */
>  	if (unmappable_base > ggtt->mappable_end) {
> -		ret = vgt_balloon_space(&ggtt->base.mm,
> -					&bl_info.space[2],
> -					ggtt->mappable_end,
> -					unmappable_base);
> +		ret = vgt_balloon_space(ggtt, &bl_info.space[2],
> +					ggtt->mappable_end, unmappable_base);
>  
>  		if (ret)
>  			goto err;
> @@ -228,18 +224,15 @@ int intel_vgt_balloon(struct drm_i915_private *dev_priv)
>  	 * because it is reserved to the guard page.
>  	 */
>  	if (unmappable_end < ggtt_end - PAGE_SIZE) {
> -		ret = vgt_balloon_space(&ggtt->base.mm,
> -					&bl_info.space[3],
> -					unmappable_end,
> -					ggtt_end - PAGE_SIZE);
> +		ret = vgt_balloon_space(ggtt, &bl_info.space[3],
> +					unmappable_end, ggtt_end - PAGE_SIZE);
>  		if (ret)
>  			goto err;
>  	}
>  
>  	/* Mappable graphic memory ballooning */
>  	if (mappable_base > ggtt->base.start) {
> -		ret = vgt_balloon_space(&ggtt->base.mm,
> -					&bl_info.space[0],
> +		ret = vgt_balloon_space(ggtt, &bl_info.space[0],
>  					ggtt->base.start, mappable_base);
>  
>  		if (ret)
> @@ -247,10 +240,8 @@ int intel_vgt_balloon(struct drm_i915_private *dev_priv)
>  	}
>  
>  	if (mappable_end < ggtt->mappable_end) {
> -		ret = vgt_balloon_space(&ggtt->base.mm,
> -					&bl_info.space[1],
> -					mappable_end,
> -					ggtt->mappable_end);
> +		ret = vgt_balloon_space(ggtt, &bl_info.space[1],
> +					mappable_end, ggtt->mappable_end);
>  
>  		if (ret)
>  			goto err;
> diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
> index df3750d4c907..b74eeb73ae41 100644
> --- a/drivers/gpu/drm/i915/i915_vma.c
> +++ b/drivers/gpu/drm/i915/i915_vma.c
> @@ -419,17 +419,11 @@ i915_vma_insert(struct i915_vma *vma, u64 size, u64 alignment, u64 flags)
>  			goto err_unpin;
>  		}
>  
> -		vma->node.start = offset;
> -		vma->node.size = size;
> -		vma->node.color = obj->cache_level;
> -		ret = drm_mm_reserve_node(&vma->vm->mm, &vma->node);
> -		if (ret) {
> -			ret = i915_gem_evict_for_vma(vma, flags);
> -			if (ret == 0)
> -				ret = drm_mm_reserve_node(&vma->vm->mm, &vma->node);
> -			if (ret)
> -				goto err_unpin;
> -		}
> +		ret = i915_gem_gtt_reserve(vma->vm, &vma->node,
> +					   size, offset, obj->cache_level,
> +					   flags);
> +		if (ret)
> +			goto err_unpin;
>  	} else {
>  		ret = i915_gem_gtt_insert(vma->vm, &vma->node,
>  					  size, alignment, obj->cache_level,
> -- 
> 2.11.0
> 
> _______________________________________________
> igvt-g-dev mailing list
> igvt-g-dev@lists.01.org
> https://lists.01.org/mailman/listinfo/igvt-g-dev

-- 
Open Source Technology Center, Intel ltd.

$gpg --keyserver wwwkeys.pgp.net --recv-keys 4D781827

[-- Attachment #1.2: signature.asc --]
[-- Type: application/pgp-signature, Size: 163 bytes --]

[-- Attachment #2: Type: text/plain, Size: 160 bytes --]

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2017-01-12  2:18 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-01-11 11:23 [PATCH v2 1/3] drm/i915: Use the MRU stack search after evicting Chris Wilson
2017-01-11 11:23 ` [PATCH v2 2/3] drm/i915: Extract reserving space in the GTT to a helper Chris Wilson
2017-01-12  2:13   ` [igvt-g-dev] " Zhenyu Wang
2017-01-11 11:23 ` [PATCH v2 3/3] drm/i915: Prefer random replacement before eviction search Chris Wilson
2017-01-11 11:53 ` ✓ Fi.CI.BAT: success for series starting with [v2,1/3] drm/i915: Use the MRU stack search after evicting Patchwork
2017-01-11 12:04 ` [PATCH v2 1/3] " Joonas Lahtinen
2017-01-11 12:25   ` Chris Wilson

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.