All of lore.kernel.org
 help / color / mirror / Atom feed
From: Chris Wilson <chris@chris-wilson.co.uk>
To: intel-gfx@lists.freedesktop.org
Subject: [PATCH] drm/i915: Segregate memory domains in the GTT using coloring
Date: Thu, 26 Jul 2012 11:49:32 +0100	[thread overview]
Message-ID: <1343299772-26138-1-git-send-email-chris@chris-wilson.co.uk> (raw)
In-Reply-To: <20120726103419.GB5326@phenom.ffwll.local>

Several functions of the GPU have the restriction that differing memory
domains cannot be placed next to each other (as the GPU may prefetch
beyond the end of one domain and hang as it crosses into the other
domain). We use the facility of the drm_mm to mark ranges with a
particular color that corresponds to the cache attributes of those pages
in order to prevent allocating adjacent blocks of differing memory
types.

v2: Rebase ontop of drm_mm coloring v2.
v3: Fix rebinding existing gtt_space and add a verification routine.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---

So I found another bug in testing that I forgot to forward on in a timely
manner.
-Chris

---
 drivers/gpu/drm/i915/i915_drv.h       |    5 +-
 drivers/gpu/drm/i915/i915_gem.c       |  111 ++++++++++++++++++++++++++++++---
 drivers/gpu/drm/i915/i915_gem_evict.c |    7 ++-
 drivers/gpu/drm/i915/i915_gem_gtt.c   |   19 ++++++
 4 files changed, 128 insertions(+), 14 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index f176589..d6c0d0e 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -109,6 +109,7 @@ struct intel_pch_pll {
 
 #define WATCH_COHERENCY	0
 #define WATCH_LISTS	0
+#define WATCH_GTT	0
 
 #define I915_GEM_PHYS_CURSOR_0 1
 #define I915_GEM_PHYS_CURSOR_1 2
@@ -1404,7 +1405,9 @@ void i915_gem_init_global_gtt(struct drm_device *dev,
 
 /* i915_gem_evict.c */
 int __must_check i915_gem_evict_something(struct drm_device *dev, int min_size,
-					  unsigned alignment, bool mappable);
+					  unsigned alignment,
+					  unsigned cache_level,
+					  bool mappable);
 int i915_gem_evict_everything(struct drm_device *dev, bool purgeable_only);
 
 /* i915_gem_stolen.c */
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index b274810..19bdc24 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2586,6 +2586,76 @@ i915_gem_object_get_fence(struct drm_i915_gem_object *obj)
 	return 0;
 }
 
+static bool i915_gem_valid_gtt_space(struct drm_device *dev,
+				     struct drm_mm_node *gtt_space,
+				     unsigned long cache_level)
+{
+	struct drm_mm_node *other;
+
+	/* On non-LLC machines we have to be careful when putting differing
+	 * types of snoopable memory together to avoid the prefetcher
+	 * crossing memory domains and dieing.
+	 */
+	if (HAS_LLC(dev))
+		return true;
+
+	if (gtt_space == NULL)
+		return true;
+
+	if (list_empty(&gtt_space->node_list))
+		return true;
+
+	other = list_entry(gtt_space->node_list.prev, struct drm_mm_node, node_list);
+	if (other->allocated && !other->hole_follows && other->color != cache_level)
+		return false;
+
+	other = list_entry(gtt_space->node_list.next, struct drm_mm_node, node_list);
+	if (other->allocated && !gtt_space->hole_follows && other->color != cache_level)
+		return false;
+
+	return true;
+}
+
+static void i915_gem_verify_gtt(struct drm_device *dev)
+{
+#if WATCH_GTT
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	struct drm_i915_gem_object *obj;
+	int err = 0;
+
+	list_for_each_entry(obj, &dev_priv->mm.gtt_list, gtt_list) {
+		if (obj->gtt_space == NULL) {
+			printk(KERN_ERR "object found on GTT list with no space reserved\n");
+			err++;
+			continue;
+		}
+
+		if (obj->cache_level != obj->gtt_space->color) {
+			printk(KERN_ERR "object reserved space [%08lx, %08lx] with wrong color, cache_level=%x, color=%lx\n",
+			       obj->gtt_space->start,
+			       obj->gtt_space->start + obj->gtt_space->size,
+			       obj->cache_level,
+			       obj->gtt_space->color);
+			err++;
+			continue;
+		}
+
+		if (!i915_gem_valid_gtt_space(dev,
+					      obj->gtt_space,
+					      obj->cache_level)) {
+			printk(KERN_ERR "invalid GTT space found at [%08lx, %08lx] - color=%x\n",
+			       obj->gtt_space->start,
+			       obj->gtt_space->start + obj->gtt_space->size,
+			       obj->cache_level);
+			err++;
+			continue;
+		}
+	}
+
+	WARN_ON(err);
+#endif
+}
+
 /**
  * Finds free space in the GTT aperture and binds the object there.
  */
@@ -2640,36 +2710,47 @@ i915_gem_object_bind_to_gtt(struct drm_i915_gem_object *obj,
  search_free:
 	if (map_and_fenceable)
 		free_space =
-			drm_mm_search_free_in_range(&dev_priv->mm.gtt_space,
-						    size, alignment,
-						    0, dev_priv->mm.gtt_mappable_end,
-						    0);
+			drm_mm_search_free_in_range_color(&dev_priv->mm.gtt_space,
+							  size, alignment, obj->cache_level,
+							  0, dev_priv->mm.gtt_mappable_end,
+							  false);
 	else
-		free_space = drm_mm_search_free(&dev_priv->mm.gtt_space,
-						size, alignment, 0);
+		free_space = drm_mm_search_free_color(&dev_priv->mm.gtt_space,
+						      size, alignment, obj->cache_level,
+						      false);
 
 	if (free_space != NULL) {
 		if (map_and_fenceable)
 			obj->gtt_space =
 				drm_mm_get_block_range_generic(free_space,
-							       size, alignment, 0,
+							       size, alignment, obj->cache_level,
 							       0, dev_priv->mm.gtt_mappable_end,
-							       0);
+							       false);
 		else
 			obj->gtt_space =
-				drm_mm_get_block(free_space, size, alignment);
+				drm_mm_get_block_generic(free_space,
+							 size, alignment, obj->cache_level,
+							 false);
 	}
 	if (obj->gtt_space == NULL) {
 		/* If the gtt is empty and we're still having trouble
 		 * fitting our object in, we're out of memory.
 		 */
 		ret = i915_gem_evict_something(dev, size, alignment,
+					       obj->cache_level,
 					       map_and_fenceable);
 		if (ret)
 			return ret;
 
 		goto search_free;
 	}
+	if (WARN_ON(!i915_gem_valid_gtt_space(dev,
+					      obj->gtt_space,
+					      obj->cache_level))) {
+		drm_mm_put_block(obj->gtt_space);
+		obj->gtt_space = NULL;
+		return -EINVAL;
+	}
 
 	ret = i915_gem_object_get_pages_gtt(obj, gfpmask);
 	if (ret) {
@@ -2732,6 +2813,7 @@ i915_gem_object_bind_to_gtt(struct drm_i915_gem_object *obj,
 	obj->map_and_fenceable = mappable && fenceable;
 
 	trace_i915_gem_object_bind(obj, map_and_fenceable);
+	i915_gem_verify_gtt(dev);
 	return 0;
 }
 
@@ -2873,6 +2955,12 @@ int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
 		return -EBUSY;
 	}
 
+	if (!i915_gem_valid_gtt_space(dev, obj->gtt_space, cache_level)) {
+		ret = i915_gem_object_unbind(obj);
+		if (ret)
+			return ret;
+	}
+
 	if (obj->gtt_space) {
 		ret = i915_gem_object_finish_gpu(obj);
 		if (ret)
@@ -2884,7 +2972,7 @@ int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
 		 * registers with snooped memory, so relinquish any fences
 		 * currently pointing to our region in the aperture.
 		 */
-		if (INTEL_INFO(obj->base.dev)->gen < 6) {
+		if (INTEL_INFO(dev)->gen < 6) {
 			ret = i915_gem_object_put_fence(obj);
 			if (ret)
 				return ret;
@@ -2895,6 +2983,8 @@ int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
 		if (obj->has_aliasing_ppgtt_mapping)
 			i915_ppgtt_bind_object(dev_priv->mm.aliasing_ppgtt,
 					       obj, cache_level);
+
+		obj->gtt_space->color = cache_level;
 	}
 
 	if (cache_level == I915_CACHE_NONE) {
@@ -2921,6 +3011,7 @@ int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
 	}
 
 	obj->cache_level = cache_level;
+	i915_gem_verify_gtt(dev);
 	return 0;
 }
 
diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c b/drivers/gpu/drm/i915/i915_gem_evict.c
index 51e547c..7279c31 100644
--- a/drivers/gpu/drm/i915/i915_gem_evict.c
+++ b/drivers/gpu/drm/i915/i915_gem_evict.c
@@ -44,7 +44,8 @@ mark_free(struct drm_i915_gem_object *obj, struct list_head *unwind)
 
 int
 i915_gem_evict_something(struct drm_device *dev, int min_size,
-			 unsigned alignment, bool mappable)
+			 unsigned alignment, unsigned cache_level,
+			 bool mappable)
 {
 	drm_i915_private_t *dev_priv = dev->dev_private;
 	struct list_head eviction_list, unwind_list;
@@ -79,11 +80,11 @@ i915_gem_evict_something(struct drm_device *dev, int min_size,
 	INIT_LIST_HEAD(&unwind_list);
 	if (mappable)
 		drm_mm_init_scan_with_range(&dev_priv->mm.gtt_space,
-					    min_size, alignment, 0,
+					    min_size, alignment, cache_level,
 					    0, dev_priv->mm.gtt_mappable_end);
 	else
 		drm_mm_init_scan(&dev_priv->mm.gtt_space,
-				 min_size, alignment, 0);
+				 min_size, alignment, cache_level);
 
 	/* First see if there is a large enough contiguous idle region... */
 	list_for_each_entry(obj, &dev_priv->mm.inactive_list, mm_list) {
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 9fd25a4..4584f7f 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -422,6 +422,23 @@ void i915_gem_gtt_finish_object(struct drm_i915_gem_object *obj)
 	undo_idling(dev_priv, interruptible);
 }
 
+static void i915_gtt_color_adjust(struct drm_mm_node *node,
+				  unsigned long color,
+				  unsigned long *start,
+				  unsigned long *end)
+{
+	if (node->color != color)
+		*start += 4096;
+
+	if (!list_empty(&node->node_list)) {
+		node = list_entry(node->node_list.next,
+				  struct drm_mm_node,
+				  node_list);
+		if (node->allocated && node->color != color)
+			*end -= 4096;
+	}
+}
+
 void i915_gem_init_global_gtt(struct drm_device *dev,
 			      unsigned long start,
 			      unsigned long mappable_end,
@@ -431,6 +448,8 @@ void i915_gem_init_global_gtt(struct drm_device *dev,
 
 	/* Substract the guard page ... */
 	drm_mm_init(&dev_priv->mm.gtt_space, start, end - start - PAGE_SIZE);
+	if (!HAS_LLC(dev))
+		dev_priv->mm.gtt_space.color_adjust = i915_gtt_color_adjust;
 
 	dev_priv->mm.gtt_start = start;
 	dev_priv->mm.gtt_mappable_end = mappable_end;
-- 
1.7.10.4

  reply	other threads:[~2012-07-26 10:49 UTC|newest]

Thread overview: 18+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2012-07-09 11:34 [RFC] Set cache level ioctl Chris Wilson
2012-07-09 11:34 ` [PATCH 1/3] drm: Add colouring to the range allocator Chris Wilson
2012-07-10  9:21   ` Daniel Vetter
2012-07-10  9:29     ` Chris Wilson
2012-07-10  9:40       ` Daniel Vetter
2012-07-10 10:15         ` [PATCH 1/2] " Chris Wilson
2012-07-10 10:15           ` [PATCH 2/2] drm/i915: Segregate memory domains in the GTT using coloring Chris Wilson
2012-07-12 12:19             ` Daniel Vetter
2012-07-12 12:15           ` [PATCH 1/2] drm: Add colouring to the range allocator Daniel Vetter
2012-07-09 11:34 ` [PATCH 2/3] drm/i915: Segregate memory domains in the GTT using coloring Chris Wilson
2012-07-09 11:34 ` [PATCH 3/3] drm/i915: Export ability of changing cache levels to userspace Chris Wilson
2012-07-10  8:54   ` Daniel Vetter
2012-07-10  9:00     ` Chris Wilson
2012-07-10  9:27       ` [PATCH] " Chris Wilson
2012-07-18 18:06         ` Daniel Vetter
2012-07-26 10:34         ` Daniel Vetter
2012-07-26 10:49           ` Chris Wilson [this message]
2012-07-26 11:00             ` [PATCH] drm/i915: Segregate memory domains in the GTT using coloring Daniel Vetter

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1343299772-26138-1-git-send-email-chris@chris-wilson.co.uk \
    --to=chris@chris-wilson.co.uk \
    --cc=intel-gfx@lists.freedesktop.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.