All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 1/6] drm/i915: Refactor testing obj->mm.pages
@ 2017-08-12 11:51 Chris Wilson
  2017-08-12 11:51 ` [PATCH 2/6] drm/i915: Remove walk over obj->vma_list for the shrinker Chris Wilson
                   ` (7 more replies)
  0 siblings, 8 replies; 24+ messages in thread
From: Chris Wilson @ 2017-08-12 11:51 UTC (permalink / raw)
  To: intel-gfx

Since we occasionally stuff an error pointer into obj->mm.pages for a
semi-permanent or even permanent failure, we have to be more careful and
not just test against NULL when deciding if the object has a complete
set of its concurrent pages.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
---
 drivers/gpu/drm/i915/i915_drv.h              | 10 ++++++++--
 drivers/gpu/drm/i915/i915_gem.c              | 19 ++++++++++---------
 drivers/gpu/drm/i915/i915_gem_clflush.c      |  1 +
 drivers/gpu/drm/i915/i915_gem_render_state.c |  2 +-
 drivers/gpu/drm/i915/i915_gem_shrinker.c     | 10 +++++-----
 drivers/gpu/drm/i915/i915_gem_tiling.c       |  2 +-
 drivers/gpu/drm/i915/i915_gem_userptr.c      |  2 +-
 7 files changed, 27 insertions(+), 19 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 907603cba447..9b66ff3c49d6 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -3431,10 +3431,16 @@ i915_gem_object_pin_pages(struct drm_i915_gem_object *obj)
 	return __i915_gem_object_get_pages(obj);
 }
 
+static inline bool
+i915_gem_object_has_pages(struct drm_i915_gem_object *obj)
+{
+	return !IS_ERR_OR_NULL(READ_ONCE(obj->mm.pages));
+}
+
 static inline void
 __i915_gem_object_pin_pages(struct drm_i915_gem_object *obj)
 {
-	GEM_BUG_ON(!obj->mm.pages);
+	GEM_BUG_ON(!i915_gem_object_has_pages(obj));
 
 	atomic_inc(&obj->mm.pages_pin_count);
 }
@@ -3448,8 +3454,8 @@ i915_gem_object_has_pinned_pages(struct drm_i915_gem_object *obj)
 static inline void
 __i915_gem_object_unpin_pages(struct drm_i915_gem_object *obj)
 {
+	GEM_BUG_ON(!i915_gem_object_has_pages(obj));
 	GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj));
-	GEM_BUG_ON(!obj->mm.pages);
 
 	atomic_dec(&obj->mm.pages_pin_count);
 }
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 000a764ee8d9..b259ca2535f3 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2163,7 +2163,7 @@ void __i915_gem_object_invalidate(struct drm_i915_gem_object *obj)
 	struct address_space *mapping;
 
 	lockdep_assert_held(&obj->mm.lock);
-	GEM_BUG_ON(obj->mm.pages);
+	GEM_BUG_ON(i915_gem_object_has_pages(obj));
 
 	switch (obj->mm.madv) {
 	case I915_MADV_DONTNEED:
@@ -2226,7 +2226,7 @@ void __i915_gem_object_put_pages(struct drm_i915_gem_object *obj,
 		return;
 
 	GEM_BUG_ON(obj->bind_count);
-	if (!READ_ONCE(obj->mm.pages))
+	if (!i915_gem_object_has_pages(obj))
 		return;
 
 	/* May be called by shrinker from within get_pages() (on another bo) */
@@ -2505,7 +2505,7 @@ int __i915_gem_object_get_pages(struct drm_i915_gem_object *obj)
 	if (err)
 		return err;
 
-	if (unlikely(IS_ERR_OR_NULL(obj->mm.pages))) {
+	if (unlikely(!i915_gem_object_has_pages(obj))) {
 		err = ____i915_gem_object_get_pages(obj);
 		if (err)
 			goto unlock;
@@ -2583,7 +2583,7 @@ void *i915_gem_object_pin_map(struct drm_i915_gem_object *obj,
 
 	pinned = true;
 	if (!atomic_inc_not_zero(&obj->mm.pages_pin_count)) {
-		if (unlikely(IS_ERR_OR_NULL(obj->mm.pages))) {
+		if (unlikely(!i915_gem_object_has_pages(obj))) {
 			ret = ____i915_gem_object_get_pages(obj);
 			if (ret)
 				goto err_unlock;
@@ -2593,7 +2593,7 @@ void *i915_gem_object_pin_map(struct drm_i915_gem_object *obj,
 		atomic_inc(&obj->mm.pages_pin_count);
 		pinned = false;
 	}
-	GEM_BUG_ON(!obj->mm.pages);
+	GEM_BUG_ON(!i915_gem_object_has_pages(obj));
 
 	ptr = page_unpack_bits(obj->mm.mapping, &has_type);
 	if (ptr && has_type != type) {
@@ -2648,7 +2648,7 @@ i915_gem_object_pwrite_gtt(struct drm_i915_gem_object *obj,
 	 * allows it to avoid the cost of retrieving a page (either swapin
 	 * or clearing-before-use) before it is overwritten.
 	 */
-	if (READ_ONCE(obj->mm.pages))
+	if (i915_gem_object_has_pages(obj))
 		return -ENODEV;
 
 	/* Before the pages are instantiated the object is treated as being
@@ -4199,7 +4199,7 @@ i915_gem_madvise_ioctl(struct drm_device *dev, void *data,
 	if (err)
 		goto out;
 
-	if (obj->mm.pages &&
+	if (i915_gem_object_has_pages(obj) &&
 	    i915_gem_object_is_tiled(obj) &&
 	    dev_priv->quirks & QUIRK_PIN_SWIZZLED_PAGES) {
 		if (obj->mm.madv == I915_MADV_WILLNEED) {
@@ -4218,7 +4218,8 @@ i915_gem_madvise_ioctl(struct drm_device *dev, void *data,
 		obj->mm.madv = args->madv;
 
 	/* if the object is no longer attached, discard its backing storage */
-	if (obj->mm.madv == I915_MADV_DONTNEED && !obj->mm.pages)
+	if (obj->mm.madv == I915_MADV_DONTNEED &&
+	    !i915_gem_object_has_pages(obj))
 		i915_gem_object_truncate(obj);
 
 	args->retained = obj->mm.madv != __I915_MADV_PURGED;
@@ -4409,7 +4410,7 @@ static void __i915_gem_free_objects(struct drm_i915_private *i915,
 		if (WARN_ON(i915_gem_object_has_pinned_pages(obj)))
 			atomic_set(&obj->mm.pages_pin_count, 0);
 		__i915_gem_object_put_pages(obj, I915_MM_NORMAL);
-		GEM_BUG_ON(obj->mm.pages);
+		GEM_BUG_ON(i915_gem_object_has_pages(obj));
 
 		if (obj->base.import_attach)
 			drm_prime_gem_destroy(&obj->base, NULL);
diff --git a/drivers/gpu/drm/i915/i915_gem_clflush.c b/drivers/gpu/drm/i915/i915_gem_clflush.c
index 348b29a845c9..3bc4d6b080f7 100644
--- a/drivers/gpu/drm/i915/i915_gem_clflush.c
+++ b/drivers/gpu/drm/i915/i915_gem_clflush.c
@@ -70,6 +70,7 @@ static const struct dma_fence_ops i915_clflush_ops = {
 
 static void __i915_do_clflush(struct drm_i915_gem_object *obj)
 {
+	GEM_BUG_ON(!i915_gem_object_has_pages(obj));
 	drm_clflush_sg(obj->mm.pages);
 	intel_fb_obj_flush(obj, ORIGIN_CPU);
 }
diff --git a/drivers/gpu/drm/i915/i915_gem_render_state.c b/drivers/gpu/drm/i915/i915_gem_render_state.c
index 4dd4c2159a92..3703dc91eeda 100644
--- a/drivers/gpu/drm/i915/i915_gem_render_state.c
+++ b/drivers/gpu/drm/i915/i915_gem_render_state.c
@@ -229,7 +229,7 @@ int i915_gem_render_state_emit(struct drm_i915_gem_request *req)
 		return 0;
 
 	/* Recreate the page after shrinking */
-	if (!so->vma->obj->mm.pages)
+	if (!i915_gem_object_has_pages(so->vma->obj))
 		so->batch_offset = -1;
 
 	ret = i915_vma_pin(so->vma, 0, 0, PIN_GLOBAL | PIN_HIGH);
diff --git a/drivers/gpu/drm/i915/i915_gem_shrinker.c b/drivers/gpu/drm/i915/i915_gem_shrinker.c
index 77fb39808131..80caeadb9d34 100644
--- a/drivers/gpu/drm/i915/i915_gem_shrinker.c
+++ b/drivers/gpu/drm/i915/i915_gem_shrinker.c
@@ -97,7 +97,7 @@ static bool swap_available(void)
 
 static bool can_release_pages(struct drm_i915_gem_object *obj)
 {
-	if (!obj->mm.pages)
+	if (!i915_gem_object_has_pages(obj))
 		return false;
 
 	/* Consider only shrinkable ojects. */
@@ -129,7 +129,7 @@ static bool unsafe_drop_pages(struct drm_i915_gem_object *obj)
 {
 	if (i915_gem_object_unbind(obj) == 0)
 		__i915_gem_object_put_pages(obj, I915_MM_SHRINKER);
-	return !READ_ONCE(obj->mm.pages);
+	return !i915_gem_object_has_pages(obj);
 }
 
 /**
@@ -243,7 +243,7 @@ i915_gem_shrink(struct drm_i915_private *dev_priv,
 				/* May arrive from get_pages on another bo */
 				mutex_lock_nested(&obj->mm.lock,
 						  I915_MM_SHRINKER);
-				if (!obj->mm.pages) {
+				if (!i915_gem_object_has_pages(obj)) {
 					__i915_gem_object_invalidate(obj);
 					list_del_init(&obj->global_link);
 					count += obj->base.size >> PAGE_SHIFT;
@@ -401,7 +401,7 @@ i915_gem_shrinker_oom(struct notifier_block *nb, unsigned long event, void *ptr)
 	 */
 	unbound = bound = unevictable = 0;
 	list_for_each_entry(obj, &dev_priv->mm.unbound_list, global_link) {
-		if (!obj->mm.pages)
+		if (!i915_gem_object_has_pages(obj))
 			continue;
 
 		if (!can_release_pages(obj))
@@ -410,7 +410,7 @@ i915_gem_shrinker_oom(struct notifier_block *nb, unsigned long event, void *ptr)
 			unbound += obj->base.size >> PAGE_SHIFT;
 	}
 	list_for_each_entry(obj, &dev_priv->mm.bound_list, global_link) {
-		if (!obj->mm.pages)
+		if (!i915_gem_object_has_pages(obj))
 			continue;
 
 		if (!can_release_pages(obj))
diff --git a/drivers/gpu/drm/i915/i915_gem_tiling.c b/drivers/gpu/drm/i915/i915_gem_tiling.c
index fb5231f98c0d..1294cf695df0 100644
--- a/drivers/gpu/drm/i915/i915_gem_tiling.c
+++ b/drivers/gpu/drm/i915/i915_gem_tiling.c
@@ -269,7 +269,7 @@ i915_gem_object_set_tiling(struct drm_i915_gem_object *obj,
 	 * due to the change in swizzling.
 	 */
 	mutex_lock(&obj->mm.lock);
-	if (obj->mm.pages &&
+	if (i915_gem_object_has_pages(obj) &&
 	    obj->mm.madv == I915_MADV_WILLNEED &&
 	    i915->quirks & QUIRK_PIN_SWIZZLED_PAGES) {
 		if (tiling == I915_TILING_NONE) {
diff --git a/drivers/gpu/drm/i915/i915_gem_userptr.c b/drivers/gpu/drm/i915/i915_gem_userptr.c
index ccd09e8419f5..f5d4f9875e32 100644
--- a/drivers/gpu/drm/i915/i915_gem_userptr.c
+++ b/drivers/gpu/drm/i915/i915_gem_userptr.c
@@ -82,7 +82,7 @@ static void cancel_userptr(struct work_struct *work)
 	/* We are inside a kthread context and can't be interrupted */
 	if (i915_gem_object_unbind(obj) == 0)
 		__i915_gem_object_put_pages(obj, I915_MM_NORMAL);
-	WARN_ONCE(obj->mm.pages,
+	WARN_ONCE(i915_gem_object_has_pages(obj),
 		  "Failed to release pages: bind_count=%d, pages_pin_count=%d, pin_display=%d\n",
 		  obj->bind_count,
 		  atomic_read(&obj->mm.pages_pin_count),
-- 
2.13.3

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 24+ messages in thread

* [PATCH 2/6] drm/i915: Remove walk over obj->vma_list for the shrinker
  2017-08-12 11:51 [PATCH 1/6] drm/i915: Refactor testing obj->mm.pages Chris Wilson
@ 2017-08-12 11:51 ` Chris Wilson
  2017-08-15 14:56   ` Joonas Lahtinen
  2017-08-12 11:51 ` [PATCH 3/6] drm/i915: Move dev_priv->mm.[un]bound_list to its own lock Chris Wilson
                   ` (6 subsequent siblings)
  7 siblings, 1 reply; 24+ messages in thread
From: Chris Wilson @ 2017-08-12 11:51 UTC (permalink / raw)
  To: intel-gfx

In the next patch, we want to reduce the lock coverage within the
shrinker, and one of the dangerous walks we have is over obj->vma_list.
We are only walking the obj->vma_list in order to check whether it has
been permanently pinned by HW access, typically via use on the scanout.
But we have a couple of other long term pins, the context objects for
which we currently have to check the individual vma pin_count. If we
instead mark these using obj->pin_display, we can forgo the dangerous
and sometimes slow list iteration.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_gem_shrinker.c | 27 +++++++--------------------
 drivers/gpu/drm/i915/intel_lrc.c         |  2 ++
 drivers/gpu/drm/i915/intel_ringbuffer.c  |  7 ++++++-
 3 files changed, 15 insertions(+), 21 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_shrinker.c b/drivers/gpu/drm/i915/i915_gem_shrinker.c
index 80caeadb9d34..89b62b8045a5 100644
--- a/drivers/gpu/drm/i915/i915_gem_shrinker.c
+++ b/drivers/gpu/drm/i915/i915_gem_shrinker.c
@@ -71,25 +71,6 @@ static void shrinker_unlock(struct drm_i915_private *dev_priv, bool unlock)
 	mutex_unlock(&dev_priv->drm.struct_mutex);
 }
 
-static bool any_vma_pinned(struct drm_i915_gem_object *obj)
-{
-	struct i915_vma *vma;
-
-	list_for_each_entry(vma, &obj->vma_list, obj_link) {
-		/* Only GGTT vma may be permanently pinned, and are always
-		 * at the start of the list. We can stop hunting as soon
-		 * as we see a ppGTT vma.
-		 */
-		if (!i915_vma_is_ggtt(vma))
-			break;
-
-		if (i915_vma_is_pinned(vma))
-			return true;
-	}
-
-	return false;
-}
-
 static bool swap_available(void)
 {
 	return get_nr_swap_pages() > 0;
@@ -115,7 +96,13 @@ static bool can_release_pages(struct drm_i915_gem_object *obj)
 	if (atomic_read(&obj->mm.pages_pin_count) > obj->bind_count)
 		return false;
 
-	if (any_vma_pinned(obj))
+	/* If any vma are permanently pinned, it will prevent us from reclaiming
+	 * the obj->mm.pages. We only allow scanout objects to claim a permanent
+	 * pin, along with a few others like the reserved context object. To
+	 * simplify the scan, and to avoid walking the list of vma under the
+	 * object, we just check the count of its permanently pinned.
+	 */
+	if (obj->pin_display)
 		return false;
 
 	/* We can only return physical pages to the system if we can either
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index b0738d2b2a7f..874562bd59ae 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -866,6 +866,7 @@ execlists_context_pin(struct intel_engine_cs *engine,
 		i915_ggtt_offset(ce->ring->vma);
 
 	ce->state->obj->mm.dirty = true;
+	ce->state->obj->pin_display++;
 
 	i915_gem_context_get(ctx);
 out:
@@ -892,6 +893,7 @@ static void execlists_context_unpin(struct intel_engine_cs *engine,
 		return;
 
 	intel_ring_unpin(ce->ring);
+	ce->state->obj->pin_display--;
 
 	i915_gem_object_unpin_map(ce->state->obj);
 	i915_vma_unpin(ce->state);
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index cdf084ef5aae..c90d3ea951e8 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -1321,6 +1321,7 @@ int intel_ring_pin(struct intel_ring *ring,
 	if (IS_ERR(addr))
 		goto err;
 
+	vma->obj->pin_display++;
 	ring->vaddr = addr;
 	return 0;
 
@@ -1352,6 +1353,7 @@ void intel_ring_unpin(struct intel_ring *ring)
 		i915_gem_object_unpin_map(ring->vma->obj);
 	ring->vaddr = NULL;
 
+	ring->vma->obj->pin_display--;
 	i915_vma_unpin(ring->vma);
 }
 
@@ -1515,6 +1517,7 @@ intel_ring_context_pin(struct intel_engine_cs *engine,
 		if (ret)
 			goto err;
 
+		ce->state->obj->pin_display++;
 		ce->state->obj->mm.dirty = true;
 	}
 
@@ -1550,8 +1553,10 @@ static void intel_ring_context_unpin(struct intel_engine_cs *engine,
 	if (--ce->pin_count)
 		return;
 
-	if (ce->state)
+	if (ce->state) {
+		ce->state->obj->pin_display--;
 		i915_vma_unpin(ce->state);
+	}
 
 	i915_gem_context_put(ctx);
 }
-- 
2.13.3

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 24+ messages in thread

* [PATCH 3/6] drm/i915: Move dev_priv->mm.[un]bound_list to its own lock
  2017-08-12 11:51 [PATCH 1/6] drm/i915: Refactor testing obj->mm.pages Chris Wilson
  2017-08-12 11:51 ` [PATCH 2/6] drm/i915: Remove walk over obj->vma_list for the shrinker Chris Wilson
@ 2017-08-12 11:51 ` Chris Wilson
  2017-08-16 13:13   ` Joonas Lahtinen
  2017-08-12 11:51 ` [PATCH 4/6] drm/i915: Set our shrinker->batch to 4096 (~16MiB) Chris Wilson
                   ` (5 subsequent siblings)
  7 siblings, 1 reply; 24+ messages in thread
From: Chris Wilson @ 2017-08-12 11:51 UTC (permalink / raw)
  To: intel-gfx

Remove the struct_mutex requirement around dev_priv->mm.bound_list and
dev_priv->mm.unbound_list by giving it its own spinlock. This reduces
one more requirement for struct_mutex and in the process gives us
slightly more accurate unbound_list tracking, which should improve the
shrinker - but the drawback is that we drop the retirement before
counting so i915_gem_object_is_active() may be stale and lead us to
underestimate the number of objects that may be shrunk (see commit
bed50aea61df ("drm/i915/shrinker: Flush active on objects before
counting")).

v2: Crosslink the spinlock to the lists it protects, and btw this
changes s/obj->global_link/obj->mm.link/

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_debugfs.c               | 39 +++++++++++---
 drivers/gpu/drm/i915/i915_drv.h                   |  3 ++
 drivers/gpu/drm/i915/i915_gem.c                   | 41 ++++++++++++---
 drivers/gpu/drm/i915/i915_gem_gtt.c               |  3 +-
 drivers/gpu/drm/i915/i915_gem_object.h            |  7 ++-
 drivers/gpu/drm/i915/i915_gem_shrinker.c          | 64 +++++++++--------------
 drivers/gpu/drm/i915/i915_gem_stolen.c            |  5 +-
 drivers/gpu/drm/i915/i915_vma.c                   | 16 ++++--
 drivers/gpu/drm/i915/selftests/i915_gem_context.c |  2 +-
 drivers/gpu/drm/i915/selftests/i915_gem_evict.c   |  8 ++-
 10 files changed, 122 insertions(+), 66 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index 329fb3649dc3..18bee25cf6da 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -238,7 +238,9 @@ static int i915_gem_stolen_list_info(struct seq_file *m, void *data)
 		goto out;
 
 	total_obj_size = total_gtt_size = count = 0;
-	list_for_each_entry(obj, &dev_priv->mm.bound_list, global_link) {
+
+	spin_lock(&dev_priv->mm.obj_lock);
+	list_for_each_entry(obj, &dev_priv->mm.bound_list, mm.link) {
 		if (count == total)
 			break;
 
@@ -250,7 +252,7 @@ static int i915_gem_stolen_list_info(struct seq_file *m, void *data)
 		total_gtt_size += i915_gem_obj_total_ggtt_size(obj);
 
 	}
-	list_for_each_entry(obj, &dev_priv->mm.unbound_list, global_link) {
+	list_for_each_entry(obj, &dev_priv->mm.unbound_list, mm.link) {
 		if (count == total)
 			break;
 
@@ -260,6 +262,7 @@ static int i915_gem_stolen_list_info(struct seq_file *m, void *data)
 		objects[count++] = obj;
 		total_obj_size += obj->base.size;
 	}
+	spin_unlock(&dev_priv->mm.obj_lock);
 
 	sort(objects, count, sizeof(*objects), obj_rank_by_stolen, NULL);
 
@@ -418,7 +421,9 @@ static int i915_gem_object_info(struct seq_file *m, void *data)
 	size = count = 0;
 	mapped_size = mapped_count = 0;
 	purgeable_size = purgeable_count = 0;
-	list_for_each_entry(obj, &dev_priv->mm.unbound_list, global_link) {
+
+	spin_lock(&dev_priv->mm.obj_lock);
+	list_for_each_entry(obj, &dev_priv->mm.unbound_list, mm.link) {
 		size += obj->base.size;
 		++count;
 
@@ -435,7 +440,7 @@ static int i915_gem_object_info(struct seq_file *m, void *data)
 	seq_printf(m, "%u unbound objects, %llu bytes\n", count, size);
 
 	size = count = dpy_size = dpy_count = 0;
-	list_for_each_entry(obj, &dev_priv->mm.bound_list, global_link) {
+	list_for_each_entry(obj, &dev_priv->mm.bound_list, mm.link) {
 		size += obj->base.size;
 		++count;
 
@@ -454,6 +459,8 @@ static int i915_gem_object_info(struct seq_file *m, void *data)
 			mapped_size += obj->base.size;
 		}
 	}
+	spin_unlock(&dev_priv->mm.obj_lock);
+
 	seq_printf(m, "%u bound objects, %llu bytes\n",
 		   count, size);
 	seq_printf(m, "%u purgeable objects, %llu bytes\n",
@@ -514,31 +521,49 @@ static int i915_gem_gtt_info(struct seq_file *m, void *data)
 	struct drm_i915_private *dev_priv = node_to_i915(node);
 	struct drm_device *dev = &dev_priv->drm;
 	bool show_pin_display_only = !!node->info_ent->data;
+	struct drm_i915_gem_object **objects;
 	struct drm_i915_gem_object *obj;
 	u64 total_obj_size, total_gtt_size;
+	unsigned long nobject, n;
 	int count, ret;
 
+	nobject = READ_ONCE(dev_priv->mm.object_count);
+	objects = kvmalloc_array(nobject, sizeof(*objects), GFP_KERNEL);
+	if (!objects)
+		return -ENOMEM;
+
 	ret = mutex_lock_interruptible(&dev->struct_mutex);
 	if (ret)
 		return ret;
 
-	total_obj_size = total_gtt_size = count = 0;
-	list_for_each_entry(obj, &dev_priv->mm.bound_list, global_link) {
+	count = 0;
+	spin_lock(&dev_priv->mm.obj_lock);
+	list_for_each_entry(obj, &dev_priv->mm.bound_list, mm.link) {
 		if (show_pin_display_only && !obj->pin_display)
 			continue;
 
+		objects[count++] = obj;
+		if (count == nobject)
+			break;
+	}
+	spin_unlock(&dev_priv->mm.obj_lock);
+
+	total_obj_size = total_gtt_size = 0;
+	for (n = 0;  n < count; n++) {
+		obj = objects[n];
+
 		seq_puts(m, "   ");
 		describe_obj(m, obj);
 		seq_putc(m, '\n');
 		total_obj_size += obj->base.size;
 		total_gtt_size += i915_gem_obj_total_ggtt_size(obj);
-		count++;
 	}
 
 	mutex_unlock(&dev->struct_mutex);
 
 	seq_printf(m, "Total %d objects, %llu bytes, %llu GTT size\n",
 		   count, total_obj_size, total_gtt_size);
+	kvfree(objects);
 
 	return 0;
 }
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 9b66ff3c49d6..8f9e751f36a5 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1443,6 +1443,9 @@ struct i915_gem_mm {
 	 * always the inner lock when overlapping with struct_mutex. */
 	struct mutex stolen_lock;
 
+	/* Protects bound_list/unbound_list and #drm_i915_gem_object.mm.link */
+	spinlock_t obj_lock;
+
 	/** List of all objects in gtt_space. Used to restore gtt
 	 * mappings on resume */
 	struct list_head bound_list;
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index b259ca2535f3..2c0ad7ee6ec2 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -1536,9 +1536,12 @@ static void i915_gem_object_bump_inactive_ggtt(struct drm_i915_gem_object *obj)
 		list_move_tail(&vma->vm_link, &vma->vm->inactive_list);
 	}
 
+	GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj));
 	i915 = to_i915(obj->base.dev);
+	spin_lock(&i915->mm.obj_lock);
 	list = obj->bind_count ? &i915->mm.bound_list : &i915->mm.unbound_list;
-	list_move_tail(&obj->global_link, list);
+	list_move_tail(&obj->mm.link, list);
+	spin_unlock(&i915->mm.obj_lock);
 }
 
 /**
@@ -2220,6 +2223,7 @@ static void __i915_gem_object_reset_page_iter(struct drm_i915_gem_object *obj)
 void __i915_gem_object_put_pages(struct drm_i915_gem_object *obj,
 				 enum i915_mm_subclass subclass)
 {
+	struct drm_i915_private *i915 = to_i915(obj->base.dev);
 	struct sg_table *pages;
 
 	if (i915_gem_object_has_pinned_pages(obj))
@@ -2240,6 +2244,10 @@ void __i915_gem_object_put_pages(struct drm_i915_gem_object *obj,
 	pages = fetch_and_zero(&obj->mm.pages);
 	GEM_BUG_ON(!pages);
 
+	spin_lock(&i915->mm.obj_lock);
+	list_del(&obj->mm.link);
+	spin_unlock(&i915->mm.obj_lock);
+
 	if (obj->mm.mapping) {
 		void *ptr;
 
@@ -2456,6 +2464,8 @@ i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj)
 void __i915_gem_object_set_pages(struct drm_i915_gem_object *obj,
 				 struct sg_table *pages)
 {
+	struct drm_i915_private *i915 = to_i915(obj->base.dev);
+
 	lockdep_assert_held(&obj->mm.lock);
 
 	obj->mm.get_page.sg_pos = pages->sgl;
@@ -2464,11 +2474,15 @@ void __i915_gem_object_set_pages(struct drm_i915_gem_object *obj,
 	obj->mm.pages = pages;
 
 	if (i915_gem_object_is_tiled(obj) &&
-	    to_i915(obj->base.dev)->quirks & QUIRK_PIN_SWIZZLED_PAGES) {
+	    i915->quirks & QUIRK_PIN_SWIZZLED_PAGES) {
 		GEM_BUG_ON(obj->mm.quirked);
 		__i915_gem_object_pin_pages(obj);
 		obj->mm.quirked = true;
 	}
+
+	spin_lock(&i915->mm.obj_lock);
+	list_add(&obj->mm.link, &i915->mm.unbound_list);
+	spin_unlock(&i915->mm.obj_lock);
 }
 
 static int ____i915_gem_object_get_pages(struct drm_i915_gem_object *obj)
@@ -4245,7 +4259,6 @@ void i915_gem_object_init(struct drm_i915_gem_object *obj,
 {
 	mutex_init(&obj->mm.lock);
 
-	INIT_LIST_HEAD(&obj->global_link);
 	INIT_LIST_HEAD(&obj->userfault_link);
 	INIT_LIST_HEAD(&obj->vma_list);
 	INIT_LIST_HEAD(&obj->batch_pool_link);
@@ -4393,7 +4406,18 @@ static void __i915_gem_free_objects(struct drm_i915_private *i915,
 		GEM_BUG_ON(!list_empty(&obj->vma_list));
 		GEM_BUG_ON(!RB_EMPTY_ROOT(&obj->vma_tree));
 
-		list_del(&obj->global_link);
+		/* This serializes freeing with the shrinker. Since the free
+		 * is delayed, first by RCU then by the workqueue, we want the
+		 * shrinker to be able to free pages of unreferenced objects,
+		 * or else we may oom whilst there are plenty of deferred
+		 * freed objects.
+		 */
+		if (i915_gem_object_has_pages(obj)) {
+			spin_lock(&i915->mm.obj_lock);
+			list_del_init(&obj->mm.link);
+			spin_unlock(&i915->mm.obj_lock);
+		}
+
 	}
 	intel_runtime_pm_put(i915);
 	mutex_unlock(&i915->drm.struct_mutex);
@@ -4909,11 +4933,14 @@ i915_gem_load_init(struct drm_i915_private *dev_priv)
 		goto err_priorities;
 
 	INIT_WORK(&dev_priv->mm.free_work, __i915_gem_free_work);
+
+	spin_lock_init(&dev_priv->mm.obj_lock);
 	init_llist_head(&dev_priv->mm.free_list);
 	INIT_LIST_HEAD(&dev_priv->mm.unbound_list);
 	INIT_LIST_HEAD(&dev_priv->mm.bound_list);
 	INIT_LIST_HEAD(&dev_priv->mm.fence_list);
 	INIT_LIST_HEAD(&dev_priv->mm.userfault_list);
+
 	INIT_DELAYED_WORK(&dev_priv->gt.retire_work,
 			  i915_gem_retire_work_handler);
 	INIT_DELAYED_WORK(&dev_priv->gt.idle_work,
@@ -4998,12 +5025,12 @@ int i915_gem_freeze_late(struct drm_i915_private *dev_priv)
 	i915_gem_shrink(dev_priv, -1UL, I915_SHRINK_UNBOUND);
 	i915_gem_drain_freed_objects(dev_priv);
 
-	mutex_lock(&dev_priv->drm.struct_mutex);
+	spin_lock(&dev_priv->mm.obj_lock);
 	for (p = phases; *p; p++) {
-		list_for_each_entry(obj, *p, global_link)
+		list_for_each_entry(obj, *p, mm.link)
 			__start_cpu_write(obj);
 	}
-	mutex_unlock(&dev_priv->drm.struct_mutex);
+	spin_unlock(&dev_priv->mm.obj_lock);
 
 	return 0;
 }
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 10aa7762d9a6..fd564431155d 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -3109,8 +3109,7 @@ void i915_gem_restore_gtt_mappings(struct drm_i915_private *dev_priv)
 	ggtt->base.closed = true; /* skip rewriting PTE on VMA unbind */
 
 	/* clflush objects bound into the GGTT and rebind them. */
-	list_for_each_entry_safe(obj, on,
-				 &dev_priv->mm.bound_list, global_link) {
+	list_for_each_entry_safe(obj, on, &dev_priv->mm.bound_list, mm.link) {
 		bool ggtt_bound = false;
 		struct i915_vma *vma;
 
diff --git a/drivers/gpu/drm/i915/i915_gem_object.h b/drivers/gpu/drm/i915/i915_gem_object.h
index 5b19a4916a4d..b6425f163c8e 100644
--- a/drivers/gpu/drm/i915/i915_gem_object.h
+++ b/drivers/gpu/drm/i915/i915_gem_object.h
@@ -90,7 +90,6 @@ struct drm_i915_gem_object {
 
 	/** Stolen memory for this object, instead of being backed by shmem. */
 	struct drm_mm_node *stolen;
-	struct list_head global_link;
 	union {
 		struct rcu_head rcu;
 		struct llist_node freed;
@@ -152,6 +151,12 @@ struct drm_i915_gem_object {
 		} get_page;
 
 		/**
+		 * Element within i915->mm.unbound_list or i915->mm.bound_list,
+		 * locked by i915->mm.obj_lock.
+		 */
+		struct list_head link;
+
+		/**
 		 * Advice: are the backing pages purgeable?
 		 */
 		unsigned int madv:2;
diff --git a/drivers/gpu/drm/i915/i915_gem_shrinker.c b/drivers/gpu/drm/i915/i915_gem_shrinker.c
index 89b62b8045a5..5b8bc0e4f336 100644
--- a/drivers/gpu/drm/i915/i915_gem_shrinker.c
+++ b/drivers/gpu/drm/i915/i915_gem_shrinker.c
@@ -78,9 +78,6 @@ static bool swap_available(void)
 
 static bool can_release_pages(struct drm_i915_gem_object *obj)
 {
-	if (!i915_gem_object_has_pages(obj))
-		return false;
-
 	/* Consider only shrinkable ojects. */
 	if (!i915_gem_object_is_shrinkable(obj))
 		return false;
@@ -102,7 +99,7 @@ static bool can_release_pages(struct drm_i915_gem_object *obj)
 	 * simplify the scan, and to avoid walking the list of vma under the
 	 * object, we just check the count of its permanently pinned.
 	 */
-	if (obj->pin_display)
+	if (READ_ONCE(obj->pin_display))
 		return false;
 
 	/* We can only return physical pages to the system if we can either
@@ -200,15 +197,20 @@ i915_gem_shrink(struct drm_i915_private *dev_priv,
 			continue;
 
 		INIT_LIST_HEAD(&still_in_list);
+
+		/*
+		 * We serialize our access to unreferenced objects through
+		 * the use of the struct_mutex. While the objects are not
+		 * yet freed (due to RCU then a workqueue) we still want
+		 * to be able to shrink their pages, so they remain on
+		 * the unbound/bound list until actually freed.
+		 */
+		spin_lock(&dev_priv->mm.obj_lock);
 		while (count < target &&
 		       (obj = list_first_entry_or_null(phase->list,
 						       typeof(*obj),
-						       global_link))) {
-			list_move_tail(&obj->global_link, &still_in_list);
-			if (!obj->mm.pages) {
-				list_del_init(&obj->global_link);
-				continue;
-			}
+						       mm.link))) {
+			list_move_tail(&obj->mm.link, &still_in_list);
 
 			if (flags & I915_SHRINK_PURGEABLE &&
 			    obj->mm.madv != I915_MADV_DONTNEED)
@@ -226,19 +228,23 @@ i915_gem_shrink(struct drm_i915_private *dev_priv,
 			if (!can_release_pages(obj))
 				continue;
 
+			spin_unlock(&dev_priv->mm.obj_lock);
+
 			if (unsafe_drop_pages(obj)) {
 				/* May arrive from get_pages on another bo */
 				mutex_lock_nested(&obj->mm.lock,
 						  I915_MM_SHRINKER);
 				if (!i915_gem_object_has_pages(obj)) {
 					__i915_gem_object_invalidate(obj);
-					list_del_init(&obj->global_link);
 					count += obj->base.size >> PAGE_SHIFT;
 				}
 				mutex_unlock(&obj->mm.lock);
 			}
+
+			spin_lock(&dev_priv->mm.obj_lock);
 		}
 		list_splice_tail(&still_in_list, phase->list);
+		spin_unlock(&dev_priv->mm.obj_lock);
 	}
 
 	if (flags & I915_SHRINK_BOUND)
@@ -285,25 +291,17 @@ i915_gem_shrinker_count(struct shrinker *shrinker, struct shrink_control *sc)
 	struct drm_i915_private *dev_priv =
 		container_of(shrinker, struct drm_i915_private, mm.shrinker);
 	struct drm_i915_gem_object *obj;
-	unsigned long count;
-	bool unlock;
-
-	if (!shrinker_lock(dev_priv, &unlock))
-		return 0;
-
-	i915_gem_retire_requests(dev_priv);
+	unsigned long count = 0;
 
-	count = 0;
-	list_for_each_entry(obj, &dev_priv->mm.unbound_list, global_link)
+	spin_lock(&dev_priv->mm.obj_lock);
+	list_for_each_entry(obj, &dev_priv->mm.unbound_list, mm.link)
 		if (can_release_pages(obj))
 			count += obj->base.size >> PAGE_SHIFT;
 
-	list_for_each_entry(obj, &dev_priv->mm.bound_list, global_link) {
+	list_for_each_entry(obj, &dev_priv->mm.bound_list, mm.link)
 		if (!i915_gem_object_is_active(obj) && can_release_pages(obj))
 			count += obj->base.size >> PAGE_SHIFT;
-	}
-
-	shrinker_unlock(dev_priv, unlock);
+	spin_unlock(&dev_priv->mm.obj_lock);
 
 	return count;
 }
@@ -375,10 +373,6 @@ i915_gem_shrinker_oom(struct notifier_block *nb, unsigned long event, void *ptr)
 		container_of(nb, struct drm_i915_private, mm.oom_notifier);
 	struct drm_i915_gem_object *obj;
 	unsigned long unevictable, bound, unbound, freed_pages;
-	bool unlock;
-
-	if (!shrinker_lock_uninterruptible(dev_priv, &unlock, 5000))
-		return NOTIFY_DONE;
 
 	freed_pages = i915_gem_shrink_all(dev_priv);
 
@@ -387,26 +381,20 @@ i915_gem_shrinker_oom(struct notifier_block *nb, unsigned long event, void *ptr)
 	 * being pointed to by hardware.
 	 */
 	unbound = bound = unevictable = 0;
-	list_for_each_entry(obj, &dev_priv->mm.unbound_list, global_link) {
-		if (!i915_gem_object_has_pages(obj))
-			continue;
-
+	spin_lock(&dev_priv->mm.obj_lock);
+	list_for_each_entry(obj, &dev_priv->mm.unbound_list, mm.link) {
 		if (!can_release_pages(obj))
 			unevictable += obj->base.size >> PAGE_SHIFT;
 		else
 			unbound += obj->base.size >> PAGE_SHIFT;
 	}
-	list_for_each_entry(obj, &dev_priv->mm.bound_list, global_link) {
-		if (!i915_gem_object_has_pages(obj))
-			continue;
-
+	list_for_each_entry(obj, &dev_priv->mm.bound_list, mm.link) {
 		if (!can_release_pages(obj))
 			unevictable += obj->base.size >> PAGE_SHIFT;
 		else
 			bound += obj->base.size >> PAGE_SHIFT;
 	}
-
-	shrinker_unlock(dev_priv, unlock);
+	spin_unlock(&dev_priv->mm.obj_lock);
 
 	if (freed_pages || unbound || bound)
 		pr_info("Purging GPU memory, %lu pages freed, "
diff --git a/drivers/gpu/drm/i915/i915_gem_stolen.c b/drivers/gpu/drm/i915/i915_gem_stolen.c
index c11c915382e7..a6e49d1bd389 100644
--- a/drivers/gpu/drm/i915/i915_gem_stolen.c
+++ b/drivers/gpu/drm/i915/i915_gem_stolen.c
@@ -717,8 +717,11 @@ i915_gem_object_create_stolen_for_preallocated(struct drm_i915_private *dev_priv
 	vma->flags |= I915_VMA_GLOBAL_BIND;
 	__i915_vma_set_map_and_fenceable(vma);
 	list_move_tail(&vma->vm_link, &ggtt->base.inactive_list);
-	list_move_tail(&obj->global_link, &dev_priv->mm.bound_list);
+
+	spin_lock(&dev_priv->mm.obj_lock);
+	list_move_tail(&obj->mm.link, &dev_priv->mm.bound_list);
 	obj->bind_count++;
+	spin_unlock(&dev_priv->mm.obj_lock);
 
 	return obj;
 
diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
index 958be0a95960..35330df38e6f 100644
--- a/drivers/gpu/drm/i915/i915_vma.c
+++ b/drivers/gpu/drm/i915/i915_vma.c
@@ -58,8 +58,10 @@ i915_vma_retire(struct i915_gem_active *active,
 	 * so that we don't steal from recently used but inactive objects
 	 * (unless we are forced to ofc!)
 	 */
+	spin_lock(&rq->i915->mm.obj_lock);
 	if (obj->bind_count)
-		list_move_tail(&obj->global_link, &rq->i915->mm.bound_list);
+		list_move_tail(&obj->mm.link, &rq->i915->mm.bound_list);
+	spin_unlock(&rq->i915->mm.obj_lock);
 
 	obj->mm.dirty = true; /* be paranoid  */
 
@@ -497,9 +499,13 @@ i915_vma_insert(struct i915_vma *vma, u64 size, u64 alignment, u64 flags)
 	GEM_BUG_ON(!drm_mm_node_allocated(&vma->node));
 	GEM_BUG_ON(!i915_gem_valid_gtt_space(vma, obj->cache_level));
 
-	list_move_tail(&obj->global_link, &dev_priv->mm.bound_list);
 	list_move_tail(&vma->vm_link, &vma->vm->inactive_list);
+
+	spin_lock(&dev_priv->mm.obj_lock);
+	list_move_tail(&obj->mm.link, &dev_priv->mm.bound_list);
 	obj->bind_count++;
+	spin_unlock(&dev_priv->mm.obj_lock);
+
 	GEM_BUG_ON(atomic_read(&obj->mm.pages_pin_count) < obj->bind_count);
 
 	return 0;
@@ -512,6 +518,7 @@ i915_vma_insert(struct i915_vma *vma, u64 size, u64 alignment, u64 flags)
 static void
 i915_vma_remove(struct i915_vma *vma)
 {
+	struct drm_i915_private *i915 = vma->vm->i915;
 	struct drm_i915_gem_object *obj = vma->obj;
 
 	GEM_BUG_ON(!drm_mm_node_allocated(&vma->node));
@@ -523,9 +530,10 @@ i915_vma_remove(struct i915_vma *vma)
 	/* Since the unbound list is global, only move to that list if
 	 * no more VMAs exist.
 	 */
+	spin_lock(&i915->mm.obj_lock);
 	if (--obj->bind_count == 0)
-		list_move_tail(&obj->global_link,
-			       &to_i915(obj->base.dev)->mm.unbound_list);
+		list_move_tail(&obj->mm.link, &i915->mm.unbound_list);
+	spin_unlock(&i915->mm.obj_lock);
 
 	/* And finally now the object is completely decoupled from this vma,
 	 * we can drop its hold on the backing storage and allow it to be
diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/selftests/i915_gem_context.c
index 12b85b3278cd..6a98a5e86d49 100644
--- a/drivers/gpu/drm/i915/selftests/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/selftests/i915_gem_context.c
@@ -415,7 +415,7 @@ static int fake_aliasing_ppgtt_enable(struct drm_i915_private *i915)
 	if (err)
 		return err;
 
-	list_for_each_entry(obj, &i915->mm.bound_list, global_link) {
+	list_for_each_entry(obj, &i915->mm.bound_list, mm.link) {
 		struct i915_vma *vma;
 
 		vma = i915_vma_instance(obj, &i915->ggtt.base, NULL);
diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_evict.c b/drivers/gpu/drm/i915/selftests/i915_gem_evict.c
index 5ea373221f49..d423a46bf019 100644
--- a/drivers/gpu/drm/i915/selftests/i915_gem_evict.c
+++ b/drivers/gpu/drm/i915/selftests/i915_gem_evict.c
@@ -47,7 +47,7 @@ static int populate_ggtt(struct drm_i915_private *i915)
 
 	if (!list_empty(&i915->mm.unbound_list)) {
 		size = 0;
-		list_for_each_entry(obj, &i915->mm.unbound_list, global_link)
+		list_for_each_entry(obj, &i915->mm.unbound_list, mm.link)
 			size++;
 
 		pr_err("Found %lld objects unbound!\n", size);
@@ -74,10 +74,10 @@ static void cleanup_objects(struct drm_i915_private *i915)
 {
 	struct drm_i915_gem_object *obj, *on;
 
-	list_for_each_entry_safe(obj, on, &i915->mm.unbound_list, global_link)
+	list_for_each_entry_safe(obj, on, &i915->mm.unbound_list, mm.link)
 		i915_gem_object_put(obj);
 
-	list_for_each_entry_safe(obj, on, &i915->mm.bound_list, global_link)
+	list_for_each_entry_safe(obj, on, &i915->mm.bound_list, mm.link)
 		i915_gem_object_put(obj);
 
 	mutex_unlock(&i915->drm.struct_mutex);
@@ -149,8 +149,6 @@ static int igt_overcommit(void *arg)
 		goto cleanup;
 	}
 
-	list_move(&obj->global_link, &i915->mm.unbound_list);
-
 	vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, 0);
 	if (!IS_ERR(vma) || PTR_ERR(vma) != -ENOSPC) {
 		pr_err("Failed to evict+insert, i915_gem_object_ggtt_pin returned err=%d\n", (int)PTR_ERR(vma));
-- 
2.13.3

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 24+ messages in thread

* [PATCH 4/6] drm/i915: Set our shrinker->batch to 4096 (~16MiB)
  2017-08-12 11:51 [PATCH 1/6] drm/i915: Refactor testing obj->mm.pages Chris Wilson
  2017-08-12 11:51 ` [PATCH 2/6] drm/i915: Remove walk over obj->vma_list for the shrinker Chris Wilson
  2017-08-12 11:51 ` [PATCH 3/6] drm/i915: Move dev_priv->mm.[un]bound_list to its own lock Chris Wilson
@ 2017-08-12 11:51 ` Chris Wilson
  2017-08-16 13:39   ` Joonas Lahtinen
  2017-08-12 11:51 ` [PATCH 5/6] drm/i915: Trim struct_mutex hold duration for i915_gem_free_objects Chris Wilson
                   ` (4 subsequent siblings)
  7 siblings, 1 reply; 24+ messages in thread
From: Chris Wilson @ 2017-08-12 11:51 UTC (permalink / raw)
  To: intel-gfx

Prefer to defer activating our GEM shrinker until we have a few
megabytes to free; or we have accumulated sufficient mempressure by
deferring the reclaim to force a shrink. The intent is that because our
objects may typically be large, we are too effective at shrinking and
are not rewarded for freeing more pages than the batch. It will also
defer the initial shrinking to hopefully put it at a lower priority than
say the buffer cache (although it will balance out over a number of
reclaims, with GEM being more bursty).

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_gem_shrinker.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/gpu/drm/i915/i915_gem_shrinker.c b/drivers/gpu/drm/i915/i915_gem_shrinker.c
index 5b8bc0e4f336..8bb17e9a52de 100644
--- a/drivers/gpu/drm/i915/i915_gem_shrinker.c
+++ b/drivers/gpu/drm/i915/i915_gem_shrinker.c
@@ -461,6 +461,7 @@ void i915_gem_shrinker_init(struct drm_i915_private *dev_priv)
 	dev_priv->mm.shrinker.scan_objects = i915_gem_shrinker_scan;
 	dev_priv->mm.shrinker.count_objects = i915_gem_shrinker_count;
 	dev_priv->mm.shrinker.seeks = DEFAULT_SEEKS;
+	dev_priv->mm.shrinker.batch = 4096;
 	WARN_ON(register_shrinker(&dev_priv->mm.shrinker));
 
 	dev_priv->mm.oom_notifier.notifier_call = i915_gem_shrinker_oom;
-- 
2.13.3

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 24+ messages in thread

* [PATCH 5/6] drm/i915: Trim struct_mutex hold duration for i915_gem_free_objects
  2017-08-12 11:51 [PATCH 1/6] drm/i915: Refactor testing obj->mm.pages Chris Wilson
                   ` (2 preceding siblings ...)
  2017-08-12 11:51 ` [PATCH 4/6] drm/i915: Set our shrinker->batch to 4096 (~16MiB) Chris Wilson
@ 2017-08-12 11:51 ` Chris Wilson
  2017-08-12 11:51 ` [PATCH 6/6] drm/i915: Only free the oldest stale object before a fresh allocation Chris Wilson
                   ` (3 subsequent siblings)
  7 siblings, 0 replies; 24+ messages in thread
From: Chris Wilson @ 2017-08-12 11:51 UTC (permalink / raw)
  To: intel-gfx

We free objects in bulk after they wait for their RCU grace period.
Currently, we take struct_mutex and unbind all the objects. This can lead
to a long lock duration during which time those objects have their pages
unfreeable (i.e. the shrinker is prevented from reaping those pages). If
we only process a single object under the struct_mutex and then free the
pages, the number of objects locked away from the shrinker is minimal
and we allow regular clients better access to struct_mutex if they need
it.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
---
 drivers/gpu/drm/i915/i915_gem.c | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 2c0ad7ee6ec2..646457c16be2 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -4389,13 +4389,14 @@ static void __i915_gem_free_objects(struct drm_i915_private *i915,
 {
 	struct drm_i915_gem_object *obj, *on;
 
-	mutex_lock(&i915->drm.struct_mutex);
 	intel_runtime_pm_get(i915);
-	llist_for_each_entry(obj, freed, freed) {
+	llist_for_each_entry_safe(obj, on, freed, freed) {
 		struct i915_vma *vma, *vn;
 
 		trace_i915_gem_object_destroy(obj);
 
+		mutex_lock(&i915->drm.struct_mutex);
+
 		GEM_BUG_ON(i915_gem_object_is_active(obj));
 		list_for_each_entry_safe(vma, vn,
 					 &obj->vma_list, obj_link) {
@@ -4418,13 +4419,8 @@ static void __i915_gem_free_objects(struct drm_i915_private *i915,
 			spin_unlock(&i915->mm.obj_lock);
 		}
 
-	}
-	intel_runtime_pm_put(i915);
-	mutex_unlock(&i915->drm.struct_mutex);
+		mutex_unlock(&i915->drm.struct_mutex);
 
-	cond_resched();
-
-	llist_for_each_entry_safe(obj, on, freed, freed) {
 		GEM_BUG_ON(obj->bind_count);
 		GEM_BUG_ON(atomic_read(&obj->frontbuffer_bits));
 
@@ -4445,7 +4441,11 @@ static void __i915_gem_free_objects(struct drm_i915_private *i915,
 
 		kfree(obj->bit_17);
 		i915_gem_object_free(obj);
+
+		if (on)
+			cond_resched();
 	}
+	intel_runtime_pm_put(i915);
 }
 
 static void i915_gem_flush_free_objects(struct drm_i915_private *i915)
-- 
2.13.3

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 24+ messages in thread

* [PATCH 6/6] drm/i915: Only free the oldest stale object before a fresh allocation
  2017-08-12 11:51 [PATCH 1/6] drm/i915: Refactor testing obj->mm.pages Chris Wilson
                   ` (3 preceding siblings ...)
  2017-08-12 11:51 ` [PATCH 5/6] drm/i915: Trim struct_mutex hold duration for i915_gem_free_objects Chris Wilson
@ 2017-08-12 11:51 ` Chris Wilson
  2017-08-16 13:53   ` Joonas Lahtinen
  2017-08-12 12:09 ` ✓ Fi.CI.BAT: success for series starting with [1/6] drm/i915: Refactor testing obj->mm.pages Patchwork
                   ` (2 subsequent siblings)
  7 siblings, 1 reply; 24+ messages in thread
From: Chris Wilson @ 2017-08-12 11:51 UTC (permalink / raw)
  To: intel-gfx

Inspired by Tvrtko's critique of the reaping of the stale contexts
before allocating a new one, also limit the freed object reaping to the
oldest stale object before allocating a fresh object. Unlike contexts,
objects may have radically different sizes of backing storage, but
similar to contexts, whilst we want to prevent starvation due to
excessive freed lists, we also want do not want to delay fresh
allocations for too long. Only freeing the oldest on the freed object
list before each allocation is a reasonable compromise.

v2: Only a single consumer of llist_del_first() is allowed (although
multiple llist_add are still allowed in parallel). Unlike
i915_gem_context, i915_gem_flush_free_objects() is itself not serialized
and so we need to add our own spinlock. Otherwise KASAN eventually spots
a use-after-free for the race on *first->next.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com> #v1
---
 drivers/gpu/drm/i915/i915_drv.h |  1 +
 drivers/gpu/drm/i915/i915_gem.c | 14 ++++++++++++--
 2 files changed, 13 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 8f9e751f36a5..9e92af6f72fd 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1466,6 +1466,7 @@ struct i915_gem_mm {
 	 */
 	struct llist_head free_list;
 	struct work_struct free_work;
+	spinlock_t free_lock;
 
 	/** Usable portion of the GTT for GEM */
 	dma_addr_t stolen_base; /* limited to low memory (32-bit) */
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 646457c16be2..300be6251c67 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -4452,9 +4452,18 @@ static void i915_gem_flush_free_objects(struct drm_i915_private *i915)
 {
 	struct llist_node *freed;
 
-	freed = llist_del_all(&i915->mm.free_list);
-	if (unlikely(freed))
+	/* Free the oldest, most stale object to keep the free_list short */
+	freed = NULL;
+	if (!llist_empty(&i915->mm.free_list)) { /* quick test for hotpath */
+		/* Only one consumer of llist_del_first() allowed */
+		spin_lock(&i915->mm.free_lock);
+		freed = llist_del_first(&i915->mm.free_list);
+		spin_unlock(&i915->mm.free_lock);
+	}
+	if (unlikely(freed)) {
+		freed->next = NULL;
 		__i915_gem_free_objects(i915, freed);
+	}
 }
 
 static void __i915_gem_free_work(struct work_struct *work)
@@ -4935,6 +4944,7 @@ i915_gem_load_init(struct drm_i915_private *dev_priv)
 	INIT_WORK(&dev_priv->mm.free_work, __i915_gem_free_work);
 
 	spin_lock_init(&dev_priv->mm.obj_lock);
+	spin_lock_init(&dev_priv->mm.free_lock);
 	init_llist_head(&dev_priv->mm.free_list);
 	INIT_LIST_HEAD(&dev_priv->mm.unbound_list);
 	INIT_LIST_HEAD(&dev_priv->mm.bound_list);
-- 
2.13.3

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 24+ messages in thread

* ✓ Fi.CI.BAT: success for series starting with [1/6] drm/i915: Refactor testing obj->mm.pages
  2017-08-12 11:51 [PATCH 1/6] drm/i915: Refactor testing obj->mm.pages Chris Wilson
                   ` (4 preceding siblings ...)
  2017-08-12 11:51 ` [PATCH 6/6] drm/i915: Only free the oldest stale object before a fresh allocation Chris Wilson
@ 2017-08-12 12:09 ` Patchwork
  2017-08-15 16:46 ` ✗ Fi.CI.BAT: failure for series starting with [1/6] drm/i915: Refactor testing obj->mm.pages (rev3) Patchwork
  2017-08-16 14:54 ` ✓ Fi.CI.BAT: success for series starting with [1/6] drm/i915: Refactor testing obj->mm.pages (rev4) Patchwork
  7 siblings, 0 replies; 24+ messages in thread
From: Patchwork @ 2017-08-12 12:09 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

== Series Details ==

Series: series starting with [1/6] drm/i915: Refactor testing obj->mm.pages
URL   : https://patchwork.freedesktop.org/series/28709/
State : success

== Summary ==

Series 28709v1 Series without cover letter
https://patchwork.freedesktop.org/api/1.0/series/28709/revisions/1/mbox/

Test gem_exec_flush:
        Subgroup basic-batch-kernel-default-uc:
                pass       -> FAIL       (fi-snb-2600) fdo#100007
Test kms_flip:
        Subgroup basic-flip-vs-modeset:
                skip       -> PASS       (fi-skl-x1585l) fdo#101781
Test kms_pipe_crc_basic:
        Subgroup suspend-read-crc-pipe-b:
                pass       -> DMESG-WARN (fi-byt-n2820) fdo#101705

fdo#100007 https://bugs.freedesktop.org/show_bug.cgi?id=100007
fdo#101781 https://bugs.freedesktop.org/show_bug.cgi?id=101781
fdo#101705 https://bugs.freedesktop.org/show_bug.cgi?id=101705

fi-bdw-5557u     total:279  pass:268  dwarn:0   dfail:0   fail:0   skip:11  time:446s
fi-bdw-gvtdvm    total:279  pass:265  dwarn:0   dfail:0   fail:0   skip:14  time:435s
fi-blb-e6850     total:279  pass:224  dwarn:1   dfail:0   fail:0   skip:54  time:363s
fi-bsw-n3050     total:279  pass:243  dwarn:0   dfail:0   fail:0   skip:36  time:547s
fi-bxt-j4205     total:279  pass:260  dwarn:0   dfail:0   fail:0   skip:19  time:514s
fi-byt-j1900     total:279  pass:254  dwarn:1   dfail:0   fail:0   skip:24  time:523s
fi-byt-n2820     total:279  pass:250  dwarn:1   dfail:0   fail:0   skip:28  time:514s
fi-glk-2a        total:279  pass:260  dwarn:0   dfail:0   fail:0   skip:19  time:611s
fi-hsw-4770      total:279  pass:263  dwarn:0   dfail:0   fail:0   skip:16  time:441s
fi-hsw-4770r     total:279  pass:263  dwarn:0   dfail:0   fail:0   skip:16  time:421s
fi-ilk-650       total:279  pass:229  dwarn:0   dfail:0   fail:0   skip:50  time:426s
fi-ivb-3520m     total:279  pass:261  dwarn:0   dfail:0   fail:0   skip:18  time:505s
fi-ivb-3770      total:279  pass:261  dwarn:0   dfail:0   fail:0   skip:18  time:481s
fi-kbl-7500u     total:279  pass:261  dwarn:0   dfail:0   fail:0   skip:18  time:474s
fi-kbl-7560u     total:279  pass:269  dwarn:0   dfail:0   fail:0   skip:10  time:585s
fi-kbl-r         total:279  pass:261  dwarn:0   dfail:0   fail:0   skip:18  time:589s
fi-pnv-d510      total:279  pass:223  dwarn:1   dfail:0   fail:0   skip:55  time:522s
fi-skl-6260u     total:279  pass:269  dwarn:0   dfail:0   fail:0   skip:10  time:462s
fi-skl-6700k     total:279  pass:261  dwarn:0   dfail:0   fail:0   skip:18  time:472s
fi-skl-6770hq    total:279  pass:269  dwarn:0   dfail:0   fail:0   skip:10  time:480s
fi-skl-gvtdvm    total:279  pass:266  dwarn:0   dfail:0   fail:0   skip:13  time:440s
fi-skl-x1585l    total:279  pass:269  dwarn:0   dfail:0   fail:0   skip:10  time:499s
fi-snb-2520m     total:279  pass:251  dwarn:0   dfail:0   fail:0   skip:28  time:548s
fi-snb-2600      total:279  pass:249  dwarn:0   dfail:0   fail:1   skip:29  time:407s

0689b6b1aa3edec5d99f35902c9b38c0e6b701b9 drm-tip: 2017y-08m-11d-18h-55m-01s UTC integration manifest
87ca896ae8e1 drm/i915: Only free the oldest stale object before a fresh allocation
aff535b3087a drm/i915: Trim struct_mutex hold duration for i915_gem_free_objects
652925ad453d drm/i915: Set our shrinker->batch to 4096 (~16MiB)
dfcf75cdf03d drm/i915: Move dev_priv->mm.[un]bound_list to its own lock
83e5c8bd1a23 drm/i915: Remove walk over obj->vma_list for the shrinker
4aba17df83b9 drm/i915: Refactor testing obj->mm.pages

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_5389/
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH 2/6] drm/i915: Remove walk over obj->vma_list for the shrinker
  2017-08-12 11:51 ` [PATCH 2/6] drm/i915: Remove walk over obj->vma_list for the shrinker Chris Wilson
@ 2017-08-15 14:56   ` Joonas Lahtinen
  2017-08-15 15:05     ` Chris Wilson
  2017-08-15 16:17     ` [PATCH 1/2] drm/i915: Rename obj->pin_display to obj->pin_global Chris Wilson
  0 siblings, 2 replies; 24+ messages in thread
From: Joonas Lahtinen @ 2017-08-15 14:56 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx

On Sat, 2017-08-12 at 12:51 +0100, Chris Wilson wrote:
> In the next patch, we want to reduce the lock coverage within the
> shrinker, and one of the dangerous walks we have is over obj->vma_list.
> We are only walking the obj->vma_list in order to check whether it has
> been permanently pinned by HW access, typically via use on the scanout.
> But we have a couple of other long term pins, the context objects for
> which we currently have to check the individual vma pin_count. If we
> instead mark these using obj->pin_display, we can forgo the dangerous
> and sometimes slow list iteration.

s/pin_display/pin_permanent/g

> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>

<SNIP>

>  static bool swap_available(void)
>  {
>  	return get_nr_swap_pages() > 0;
> @@ -115,7 +96,13 @@ static bool can_release_pages(struct drm_i915_gem_object *obj)
>  	if (atomic_read(&obj->mm.pages_pin_count) > obj->bind_count)
>  		return false;
>  
> -	if (any_vma_pinned(obj))
> +	/* If any vma are permanently pinned, it will prevent us from reclaiming

Oh, it sounds so much better here when the variable is 'pin_permanent'.

> +	 * the obj->mm.pages. We only allow scanout objects to claim a permanent
> +	 * pin, along with a few others like the reserved context object. To
> +	 * simplify the scan, and to avoid walking the list of vma under the
> +	 * object, we just check the count of its permanently pinned.
> +	 */
> +	if (obj->pin_display)
>  		return false;
>  
>  	/* We can only return physical pages to the system if we can either
> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> index b0738d2b2a7f..874562bd59ae 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/intel_lrc.c
> @@ -866,6 +866,7 @@ execlists_context_pin(struct intel_engine_cs *engine,
>  		i915_ggtt_offset(ce->ring->vma);
>  
>  	ce->state->obj->mm.dirty = true;
> +	ce->state->obj->pin_display++;

This should be closer to intel_ring_pin (my preference).

>  
>  	i915_gem_context_get(ctx);
>  out:
> @@ -892,6 +893,7 @@ static void execlists_context_unpin(struct intel_engine_cs *engine,
>  		return;
>  
>  	intel_ring_unpin(ce->ring);
> +	ce->state->obj->pin_display--;

Or this should be closer to i915_gem_context_put. Just make it
symmetrict.
 
> @@ -1515,6 +1517,7 @@ intel_ring_context_pin(struct intel_engine_cs *engine,
>  		if (ret)
>  			goto err;
>  
> +		ce->state->obj->pin_display++;
>  		ce->state->obj->mm.dirty = true;

This is rather symmetric, the above can have mm.dirty after it, too.

As 'pin_permanent', this is;

Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>

Regards, Joonas
-- 
Joonas Lahtinen
Open Source Technology Center
Intel Corporation
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH 2/6] drm/i915: Remove walk over obj->vma_list for the shrinker
  2017-08-15 14:56   ` Joonas Lahtinen
@ 2017-08-15 15:05     ` Chris Wilson
  2017-08-15 16:17     ` [PATCH 1/2] drm/i915: Rename obj->pin_display to obj->pin_global Chris Wilson
  1 sibling, 0 replies; 24+ messages in thread
From: Chris Wilson @ 2017-08-15 15:05 UTC (permalink / raw)
  To: Joonas Lahtinen, intel-gfx

Quoting Joonas Lahtinen (2017-08-15 15:56:07)
> On Sat, 2017-08-12 at 12:51 +0100, Chris Wilson wrote:
> > diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> > index b0738d2b2a7f..874562bd59ae 100644
> > --- a/drivers/gpu/drm/i915/intel_lrc.c
> > +++ b/drivers/gpu/drm/i915/intel_lrc.c
> > @@ -866,6 +866,7 @@ execlists_context_pin(struct intel_engine_cs *engine,
> >               i915_ggtt_offset(ce->ring->vma);
> >  
> >       ce->state->obj->mm.dirty = true;
> > +     ce->state->obj->pin_display++;
> 
> This should be closer to intel_ring_pin (my preference).
> 
> >  
> >       i915_gem_context_get(ctx);
> >  out:
> > @@ -892,6 +893,7 @@ static void execlists_context_unpin(struct intel_engine_cs *engine,
> >               return;
> >  
> >       intel_ring_unpin(ce->ring);
> > +     ce->state->obj->pin_display--;
> 
> Or this should be closer to i915_gem_context_put. Just make it
> symmetric.

Ah, I see how that would add confusion. Gotcha.
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 24+ messages in thread

* [PATCH 1/2] drm/i915: Rename obj->pin_display to obj->pin_global
  2017-08-15 14:56   ` Joonas Lahtinen
  2017-08-15 15:05     ` Chris Wilson
@ 2017-08-15 16:17     ` Chris Wilson
  2017-08-15 16:17       ` [PATCH 2/2] drm/i915: Drop debugfs/i915_gem_pin_display Chris Wilson
  1 sibling, 1 reply; 24+ messages in thread
From: Chris Wilson @ 2017-08-15 16:17 UTC (permalink / raw)
  To: intel-gfx

In the next patch, we want to extend use of the global pin counter for
semi-permanent pinning of context/ring objects. Given that we plan to
extend the usage to encompass a disparate set of objects, we want a name
that reflects both and should entail less confusion.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_debugfs.c     | 12 ++++++------
 drivers/gpu/drm/i915/i915_gem.c         | 20 ++++++++++----------
 drivers/gpu/drm/i915/i915_gem_object.h  |  3 ++-
 drivers/gpu/drm/i915/i915_gem_userptr.c |  4 ++--
 4 files changed, 20 insertions(+), 19 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index 329fb3649dc3..f8c888ed017b 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -81,7 +81,7 @@ static char get_active_flag(struct drm_i915_gem_object *obj)
 
 static char get_pin_flag(struct drm_i915_gem_object *obj)
 {
-	return obj->pin_display ? 'p' : ' ';
+	return obj->pin_global ? 'p' : ' ';
 }
 
 static char get_tiling_flag(struct drm_i915_gem_object *obj)
@@ -148,8 +148,8 @@ describe_obj(struct seq_file *m, struct drm_i915_gem_object *obj)
 			pin_count++;
 	}
 	seq_printf(m, " (pinned x %d)", pin_count);
-	if (obj->pin_display)
-		seq_printf(m, " (display)");
+	if (obj->pin_global)
+		seq_printf(m, " (global)");
 	list_for_each_entry(vma, &obj->vma_list, obj_link) {
 		if (!drm_mm_node_allocated(&vma->node))
 			continue;
@@ -439,7 +439,7 @@ static int i915_gem_object_info(struct seq_file *m, void *data)
 		size += obj->base.size;
 		++count;
 
-		if (obj->pin_display) {
+		if (obj->pin_global) {
 			dpy_size += obj->base.size;
 			++dpy_count;
 		}
@@ -460,7 +460,7 @@ static int i915_gem_object_info(struct seq_file *m, void *data)
 		   purgeable_count, purgeable_size);
 	seq_printf(m, "%u mapped objects, %llu bytes\n",
 		   mapped_count, mapped_size);
-	seq_printf(m, "%u display objects (pinned), %llu bytes\n",
+	seq_printf(m, "%u display objects (globally pinned), %llu bytes\n",
 		   dpy_count, dpy_size);
 
 	seq_printf(m, "%llu [%llu] gtt total\n",
@@ -524,7 +524,7 @@ static int i915_gem_gtt_info(struct seq_file *m, void *data)
 
 	total_obj_size = total_gtt_size = count = 0;
 	list_for_each_entry(obj, &dev_priv->mm.bound_list, global_link) {
-		if (show_pin_display_only && !obj->pin_display)
+		if (show_pin_display_only && !obj->pin_global)
 			continue;
 
 		seq_puts(m, "   ");
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index bd8ae96527b4..2c10eeb2dfe0 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -55,7 +55,7 @@ static bool cpu_write_needs_clflush(struct drm_i915_gem_object *obj)
 	if (!(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE))
 		return true;
 
-	return obj->pin_display;
+	return obj->pin_global; /* currently in use by HW, keep flushed */
 }
 
 static int
@@ -3424,7 +3424,7 @@ static void __i915_gem_object_flush_for_display(struct drm_i915_gem_object *obj)
 
 void i915_gem_object_flush_if_display(struct drm_i915_gem_object *obj)
 {
-	if (!READ_ONCE(obj->pin_display))
+	if (!READ_ONCE(obj->pin_global))
 		return;
 
 	mutex_lock(&obj->base.dev->struct_mutex);
@@ -3791,10 +3791,10 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
 
 	lockdep_assert_held(&obj->base.dev->struct_mutex);
 
-	/* Mark the pin_display early so that we account for the
+	/* Mark the global pin early so that we account for the
 	 * display coherency whilst setting up the cache domains.
 	 */
-	obj->pin_display++;
+	obj->pin_global++;
 
 	/* The display engine is not coherent with the LLC cache on gen6.  As
 	 * a result, we make sure that the pinning that is about to occur is
@@ -3810,7 +3810,7 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
 					      I915_CACHE_WT : I915_CACHE_NONE);
 	if (ret) {
 		vma = ERR_PTR(ret);
-		goto err_unpin_display;
+		goto err_unpin_global;
 	}
 
 	/* As the user may map the buffer once pinned in the display plane
@@ -3841,7 +3841,7 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
 		vma = i915_gem_object_ggtt_pin(obj, view, 0, alignment, flags);
 	}
 	if (IS_ERR(vma))
-		goto err_unpin_display;
+		goto err_unpin_global;
 
 	vma->display_alignment = max_t(u64, vma->display_alignment, alignment);
 
@@ -3856,8 +3856,8 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
 
 	return vma;
 
-err_unpin_display:
-	obj->pin_display--;
+err_unpin_global:
+	obj->pin_global--;
 	return vma;
 }
 
@@ -3866,10 +3866,10 @@ i915_gem_object_unpin_from_display_plane(struct i915_vma *vma)
 {
 	lockdep_assert_held(&vma->vm->i915->drm.struct_mutex);
 
-	if (WARN_ON(vma->obj->pin_display == 0))
+	if (WARN_ON(vma->obj->pin_global == 0))
 		return;
 
-	if (--vma->obj->pin_display == 0)
+	if (--vma->obj->pin_global == 0)
 		vma->display_alignment = I915_GTT_MIN_ALIGNMENT;
 
 	/* Bump the LRU to try and avoid premature eviction whilst flipping  */
diff --git a/drivers/gpu/drm/i915/i915_gem_object.h b/drivers/gpu/drm/i915/i915_gem_object.h
index 3baa341432db..b299497bbbdc 100644
--- a/drivers/gpu/drm/i915/i915_gem_object.h
+++ b/drivers/gpu/drm/i915/i915_gem_object.h
@@ -139,7 +139,8 @@ struct drm_i915_gem_object {
 	/** Count of VMA actually bound by this object */
 	unsigned int bind_count;
 	unsigned int active_count;
-	unsigned int pin_display;
+	/** Count of how many global VMA are currently pinned for use by HW */
+	unsigned int pin_global;
 
 	struct {
 		struct mutex lock; /* protects the pages and their use */
diff --git a/drivers/gpu/drm/i915/i915_gem_userptr.c b/drivers/gpu/drm/i915/i915_gem_userptr.c
index c29f4ce6baf6..7cdcb586f148 100644
--- a/drivers/gpu/drm/i915/i915_gem_userptr.c
+++ b/drivers/gpu/drm/i915/i915_gem_userptr.c
@@ -83,10 +83,10 @@ static void cancel_userptr(struct work_struct *work)
 	if (i915_gem_object_unbind(obj) == 0)
 		__i915_gem_object_put_pages(obj, I915_MM_NORMAL);
 	WARN_ONCE(i915_gem_object_has_pages(obj),
-		  "Failed to release pages: bind_count=%d, pages_pin_count=%d, pin_display=%d\n",
+		  "Failed to release pages: bind_count=%d, pages_pin_count=%d, pin_global=%d\n",
 		  obj->bind_count,
 		  atomic_read(&obj->mm.pages_pin_count),
-		  obj->pin_display);
+		  obj->pin_global);
 
 	mutex_unlock(&obj->base.dev->struct_mutex);
 
-- 
2.13.3

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 24+ messages in thread

* [PATCH 2/2] drm/i915: Drop debugfs/i915_gem_pin_display
  2017-08-15 16:17     ` [PATCH 1/2] drm/i915: Rename obj->pin_display to obj->pin_global Chris Wilson
@ 2017-08-15 16:17       ` Chris Wilson
  0 siblings, 0 replies; 24+ messages in thread
From: Chris Wilson @ 2017-08-15 16:17 UTC (permalink / raw)
  To: intel-gfx

It has now lost its meaning (it shows more than just pin_display), I do
not believe that we are using in preference to the complete listing from
i915_gem_gtt, or the listing from i915_gem_framebuffer, or the listing
of active display objects in i915_display_info.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_debugfs.c | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index f8c888ed017b..0c1bb6963c18 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -513,7 +513,6 @@ static int i915_gem_gtt_info(struct seq_file *m, void *data)
 	struct drm_info_node *node = m->private;
 	struct drm_i915_private *dev_priv = node_to_i915(node);
 	struct drm_device *dev = &dev_priv->drm;
-	bool show_pin_display_only = !!node->info_ent->data;
 	struct drm_i915_gem_object *obj;
 	u64 total_obj_size, total_gtt_size;
 	int count, ret;
@@ -524,9 +523,6 @@ static int i915_gem_gtt_info(struct seq_file *m, void *data)
 
 	total_obj_size = total_gtt_size = count = 0;
 	list_for_each_entry(obj, &dev_priv->mm.bound_list, global_link) {
-		if (show_pin_display_only && !obj->pin_global)
-			continue;
-
 		seq_puts(m, "   ");
 		describe_obj(m, obj);
 		seq_putc(m, '\n');
@@ -4788,7 +4784,6 @@ static const struct drm_info_list i915_debugfs_list[] = {
 	{"i915_capabilities", i915_capabilities, 0},
 	{"i915_gem_objects", i915_gem_object_info, 0},
 	{"i915_gem_gtt", i915_gem_gtt_info, 0},
-	{"i915_gem_pin_display", i915_gem_gtt_info, 0, (void *)1},
 	{"i915_gem_stolen", i915_gem_stolen_list_info },
 	{"i915_gem_request", i915_gem_request_info, 0},
 	{"i915_gem_seqno", i915_gem_seqno_info, 0},
-- 
2.13.3

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 24+ messages in thread

* ✗ Fi.CI.BAT: failure for series starting with [1/6] drm/i915: Refactor testing obj->mm.pages (rev3)
  2017-08-12 11:51 [PATCH 1/6] drm/i915: Refactor testing obj->mm.pages Chris Wilson
                   ` (5 preceding siblings ...)
  2017-08-12 12:09 ` ✓ Fi.CI.BAT: success for series starting with [1/6] drm/i915: Refactor testing obj->mm.pages Patchwork
@ 2017-08-15 16:46 ` Patchwork
  2017-08-16 14:54 ` ✓ Fi.CI.BAT: success for series starting with [1/6] drm/i915: Refactor testing obj->mm.pages (rev4) Patchwork
  7 siblings, 0 replies; 24+ messages in thread
From: Patchwork @ 2017-08-15 16:46 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

== Series Details ==

Series: series starting with [1/6] drm/i915: Refactor testing obj->mm.pages (rev3)
URL   : https://patchwork.freedesktop.org/series/28709/
State : failure

== Summary ==

Series 28709v3 Series without cover letter
https://patchwork.freedesktop.org/api/1.0/series/28709/revisions/3/mbox/

Test gem_exec_flush:
        Subgroup basic-batch-kernel-default-uc:
                fail       -> PASS       (fi-snb-2600) fdo#100007
Test gem_exec_store:
        Subgroup basic-bsd:
                pass       -> INCOMPLETE (fi-byt-j1900)
Test kms_flip:
        Subgroup basic-flip-vs-modeset:
                skip       -> PASS       (fi-skl-x1585l) fdo#101781

fdo#100007 https://bugs.freedesktop.org/show_bug.cgi?id=100007
fdo#101781 https://bugs.freedesktop.org/show_bug.cgi?id=101781

fi-bdw-5557u     total:279  pass:268  dwarn:0   dfail:0   fail:0   skip:11  time:445s
fi-bdw-gvtdvm    total:279  pass:265  dwarn:0   dfail:0   fail:0   skip:14  time:442s
fi-blb-e6850     total:279  pass:224  dwarn:1   dfail:0   fail:0   skip:54  time:356s
fi-bsw-n3050     total:279  pass:243  dwarn:0   dfail:0   fail:0   skip:36  time:541s
fi-bxt-j4205     total:279  pass:260  dwarn:0   dfail:0   fail:0   skip:19  time:521s
fi-byt-j1900     total:102  pass:92   dwarn:0   dfail:0   fail:0   skip:9  
fi-byt-n2820     total:279  pass:250  dwarn:1   dfail:0   fail:0   skip:28  time:513s
fi-glk-2a        total:279  pass:260  dwarn:0   dfail:0   fail:0   skip:19  time:610s
fi-hsw-4770      total:279  pass:263  dwarn:0   dfail:0   fail:0   skip:16  time:439s
fi-hsw-4770r     total:279  pass:263  dwarn:0   dfail:0   fail:0   skip:16  time:417s
fi-ilk-650       total:279  pass:229  dwarn:0   dfail:0   fail:0   skip:50  time:417s
fi-ivb-3520m     total:279  pass:261  dwarn:0   dfail:0   fail:0   skip:18  time:510s
fi-ivb-3770      total:279  pass:261  dwarn:0   dfail:0   fail:0   skip:18  time:475s
fi-kbl-7500u     total:279  pass:261  dwarn:0   dfail:0   fail:0   skip:18  time:470s
fi-kbl-7560u     total:279  pass:269  dwarn:0   dfail:0   fail:0   skip:10  time:579s
fi-kbl-r         total:279  pass:261  dwarn:0   dfail:0   fail:0   skip:18  time:596s
fi-skl-6260u     total:279  pass:269  dwarn:0   dfail:0   fail:0   skip:10  time:463s
fi-skl-6700k     total:279  pass:261  dwarn:0   dfail:0   fail:0   skip:18  time:475s
fi-skl-6770hq    total:279  pass:269  dwarn:0   dfail:0   fail:0   skip:10  time:478s
fi-skl-gvtdvm    total:279  pass:266  dwarn:0   dfail:0   fail:0   skip:13  time:439s
fi-skl-x1585l    total:279  pass:269  dwarn:0   dfail:0   fail:0   skip:10  time:501s
fi-snb-2520m     total:279  pass:251  dwarn:0   dfail:0   fail:0   skip:28  time:555s
fi-snb-2600      total:279  pass:250  dwarn:0   dfail:0   fail:0   skip:29  time:408s

134bab7e0e7c147edb39a904d310ff88e3408442 drm-tip: 2017y-08m-15d-15h-48m-26s UTC integration manifest
160bf07c7cb7 drm/i915: Refactor testing obj->mm.pages

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_5405/
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH 3/6] drm/i915: Move dev_priv->mm.[un]bound_list to its own lock
  2017-08-12 11:51 ` [PATCH 3/6] drm/i915: Move dev_priv->mm.[un]bound_list to its own lock Chris Wilson
@ 2017-08-16 13:13   ` Joonas Lahtinen
  0 siblings, 0 replies; 24+ messages in thread
From: Joonas Lahtinen @ 2017-08-16 13:13 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx

On Sat, 2017-08-12 at 12:51 +0100, Chris Wilson wrote:
> Remove the struct_mutex requirement around dev_priv->mm.bound_list and
> dev_priv->mm.unbound_list by giving it its own spinlock. This reduces
> one more requirement for struct_mutex and in the process gives us
> slightly more accurate unbound_list tracking, which should improve the
> shrinker - but the drawback is that we drop the retirement before
> counting so i915_gem_object_is_active() may be stale and lead us to
> underestimate the number of objects that may be shrunk (see commit
> bed50aea61df ("drm/i915/shrinker: Flush active on objects before
> counting")).
> 
> v2: Crosslink the spinlock to the lists it protects, and btw this
> changes s/obj->global_link/obj->mm.link/
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>

<SNIP>

> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -1536,9 +1536,12 @@ static void i915_gem_object_bump_inactive_ggtt(struct drm_i915_gem_object *obj)
>  		list_move_tail(&vma->vm_link, &vma->vm->inactive_list);
>  	}
>  
> +	GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj));

Lift this precondition to the beginning of this func, there's no need
for the loop to be in front.

Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>

Regards, Joonas
-- 
Joonas Lahtinen
Open Source Technology Center
Intel Corporation
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH 4/6] drm/i915: Set our shrinker->batch to 4096 (~16MiB)
  2017-08-12 11:51 ` [PATCH 4/6] drm/i915: Set our shrinker->batch to 4096 (~16MiB) Chris Wilson
@ 2017-08-16 13:39   ` Joonas Lahtinen
  2017-08-16 13:55     ` Chris Wilson
  0 siblings, 1 reply; 24+ messages in thread
From: Joonas Lahtinen @ 2017-08-16 13:39 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx

On Sat, 2017-08-12 at 12:51 +0100, Chris Wilson wrote:
> Prefer to defer activating our GEM shrinker until we have a few
> megabytes to free; or we have accumulated sufficient mempressure by
> deferring the reclaim to force a shrink. The intent is that because our
> objects may typically be large, we are too effective at shrinking and
> are not rewarded for freeing more pages than the batch. It will also
> defer the initial shrinking to hopefully put it at a lower priority than
> say the buffer cache (although it will balance out over a number of
> reclaims, with GEM being more bursty).
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>  drivers/gpu/drm/i915/i915_gem_shrinker.c | 1 +
>  1 file changed, 1 insertion(+)
> 
> diff --git a/drivers/gpu/drm/i915/i915_gem_shrinker.c b/drivers/gpu/drm/i915/i915_gem_shrinker.c
> index 5b8bc0e4f336..8bb17e9a52de 100644
> --- a/drivers/gpu/drm/i915/i915_gem_shrinker.c
> +++ b/drivers/gpu/drm/i915/i915_gem_shrinker.c
> @@ -461,6 +461,7 @@ void i915_gem_shrinker_init(struct drm_i915_private *dev_priv)
>  	dev_priv->mm.shrinker.scan_objects = i915_gem_shrinker_scan;
>  	dev_priv->mm.shrinker.count_objects = i915_gem_shrinker_count;
>  	dev_priv->mm.shrinker.seeks = DEFAULT_SEEKS;
> +	dev_priv->mm.shrinker.batch = 4096;

Did you try how this alone effects the runtime of two consequtive
gem.testlist runs? Is there some specific test/usecase that benefits
from this. We'd be the first one to set this, md/raid5.c sets it to 128
which is the default (0).

Regards, Joonas
-- 
Joonas Lahtinen
Open Source Technology Center
Intel Corporation
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH 6/6] drm/i915: Only free the oldest stale object before a fresh allocation
  2017-08-12 11:51 ` [PATCH 6/6] drm/i915: Only free the oldest stale object before a fresh allocation Chris Wilson
@ 2017-08-16 13:53   ` Joonas Lahtinen
  0 siblings, 0 replies; 24+ messages in thread
From: Joonas Lahtinen @ 2017-08-16 13:53 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx

On Sat, 2017-08-12 at 12:51 +0100, Chris Wilson wrote:
> Inspired by Tvrtko's critique of the reaping of the stale contexts
> before allocating a new one, also limit the freed object reaping to the
> oldest stale object before allocating a fresh object. Unlike contexts,
> objects may have radically different sizes of backing storage, but
> similar to contexts, whilst we want to prevent starvation due to
> excessive freed lists, we also want do not want to delay fresh
> allocations for too long. Only freeing the oldest on the freed object
> list before each allocation is a reasonable compromise.
> 
> v2: Only a single consumer of llist_del_first() is allowed (although
> multiple llist_add are still allowed in parallel). Unlike
> i915_gem_context, i915_gem_flush_free_objects() is itself not serialized
> and so we need to add our own spinlock. Otherwise KASAN eventually spots
> a use-after-free for the race on *first->next.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com> #v1

<SNIP>

> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -1466,6 +1466,7 @@ struct i915_gem_mm {
>  	 */
>  	struct llist_head free_list;
>  	struct work_struct free_work;
> +	spinlock_t free_lock;

Please document the scope of this lock here.

Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>

Regards, Joonas
-- 
Joonas Lahtinen
Open Source Technology Center
Intel Corporation
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH 4/6] drm/i915: Set our shrinker->batch to 4096 (~16MiB)
  2017-08-16 13:39   ` Joonas Lahtinen
@ 2017-08-16 13:55     ` Chris Wilson
  2017-08-16 14:23       ` [PATCH v2] " Chris Wilson
  2017-09-20 13:28       ` [PATCH 4/6] " Joonas Lahtinen
  0 siblings, 2 replies; 24+ messages in thread
From: Chris Wilson @ 2017-08-16 13:55 UTC (permalink / raw)
  To: Joonas Lahtinen, intel-gfx

Quoting Joonas Lahtinen (2017-08-16 14:39:00)
> On Sat, 2017-08-12 at 12:51 +0100, Chris Wilson wrote:
> > Prefer to defer activating our GEM shrinker until we have a few
> > megabytes to free; or we have accumulated sufficient mempressure by
> > deferring the reclaim to force a shrink. The intent is that because our
> > objects may typically be large, we are too effective at shrinking and
> > are not rewarded for freeing more pages than the batch. It will also
> > defer the initial shrinking to hopefully put it at a lower priority than
> > say the buffer cache (although it will balance out over a number of
> > reclaims, with GEM being more bursty).
> > 
> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > ---
> >  drivers/gpu/drm/i915/i915_gem_shrinker.c | 1 +
> >  1 file changed, 1 insertion(+)
> > 
> > diff --git a/drivers/gpu/drm/i915/i915_gem_shrinker.c b/drivers/gpu/drm/i915/i915_gem_shrinker.c
> > index 5b8bc0e4f336..8bb17e9a52de 100644
> > --- a/drivers/gpu/drm/i915/i915_gem_shrinker.c
> > +++ b/drivers/gpu/drm/i915/i915_gem_shrinker.c
> > @@ -461,6 +461,7 @@ void i915_gem_shrinker_init(struct drm_i915_private *dev_priv)
> >       dev_priv->mm.shrinker.scan_objects = i915_gem_shrinker_scan;
> >       dev_priv->mm.shrinker.count_objects = i915_gem_shrinker_count;
> >       dev_priv->mm.shrinker.seeks = DEFAULT_SEEKS;
> > +     dev_priv->mm.shrinker.batch = 4096;
> 
> Did you try how this alone effects the runtime of two consequtive
> gem.testlist runs? Is there some specific test/usecase that benefits
> from this. We'd be the first one to set this, md/raid5.c sets it to 128
> which is the default (0).

My testing was trying to play a game that was hitting swap on an old
hdd. So not very quantifiable, and vmscan is very unintuitive. 

Note also that we are special in that we don't report objects but pages.
Not that it makes any difference, upon reclaim every slab is basically
asked to give up some %% of what it reports, with some hysteresis thrown
in on top.

The only way we can do anything here is to throw it at lots of systems
and see how that helps. My gut feeling says that the batch size should
be approximately the typical object size in the freeable list, to try to
reduce the amount of inefficient work. Now, the value is read before
scan->count is called, but we can always improve the estimate for the
next pass.
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 24+ messages in thread

* [PATCH v2] drm/i915: Set our shrinker->batch to 4096 (~16MiB)
  2017-08-16 13:55     ` Chris Wilson
@ 2017-08-16 14:23       ` Chris Wilson
  2017-08-18 12:56         ` Chris Wilson
  2017-09-20 13:28       ` [PATCH 4/6] " Joonas Lahtinen
  1 sibling, 1 reply; 24+ messages in thread
From: Chris Wilson @ 2017-08-16 14:23 UTC (permalink / raw)
  To: intel-gfx

Prefer to defer activating our GEM shrinker until we have a few
megabytes to free; or we have accumulated sufficient mempressure by
deferring the reclaim to force a shrink. The intent is that because our
objects may typically be large, we are too effective at shrinking and
are not rewarded for freeing more pages than the batch. It will also
defer the initial shrinking to hopefully put it at a lower priority than
say the buffer cache (although it will balance out over a number of
reclaims, with GEM being more bursty).

v2: Give it a feedback system to try and tune the batch size towards
an effective size for the available objects.
v3: Start keeping track of shrinker stats in debugfs

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_debugfs.c      | 11 +++++++++++
 drivers/gpu/drm/i915/i915_gem_shrinker.c | 30 +++++++++++++++++++++++-------
 2 files changed, 34 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index 52173e92864f..6bad53f89738 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -3414,6 +3414,16 @@ static int i915_engine_info(struct seq_file *m, void *unused)
 	return 0;
 }
 
+static int i915_shrinker_info(struct seq_file *m, void *unused)
+{
+	struct drm_i915_private *i915 = node_to_i915(m->private);
+
+	seq_printf(m, "seeks = %d\n", i915->mm.shrinker.seeks);
+	seq_printf(m, "batch = %lu\n", i915->mm.shrinker.batch);
+
+	return 0;
+}
+
 static int i915_semaphore_status(struct seq_file *m, void *unused)
 {
 	struct drm_i915_private *dev_priv = node_to_i915(m->private);
@@ -4842,6 +4852,7 @@ static const struct drm_info_list i915_debugfs_list[] = {
 	{"i915_dmc_info", i915_dmc_info, 0},
 	{"i915_display_info", i915_display_info, 0},
 	{"i915_engine_info", i915_engine_info, 0},
+	{"i915_shrinker_info", i915_shrinker_info, 0},
 	{"i915_semaphore_status", i915_semaphore_status, 0},
 	{"i915_shared_dplls_info", i915_shared_dplls_info, 0},
 	{"i915_dp_mst_info", i915_dp_mst_info, 0},
diff --git a/drivers/gpu/drm/i915/i915_gem_shrinker.c b/drivers/gpu/drm/i915/i915_gem_shrinker.c
index 2afca480bbea..ca30a182787e 100644
--- a/drivers/gpu/drm/i915/i915_gem_shrinker.c
+++ b/drivers/gpu/drm/i915/i915_gem_shrinker.c
@@ -288,20 +288,35 @@ unsigned long i915_gem_shrink_all(struct drm_i915_private *dev_priv)
 static unsigned long
 i915_gem_shrinker_count(struct shrinker *shrinker, struct shrink_control *sc)
 {
-	struct drm_i915_private *dev_priv =
+	struct drm_i915_private *i915 =
 		container_of(shrinker, struct drm_i915_private, mm.shrinker);
 	struct drm_i915_gem_object *obj;
+	unsigned long num_objects = 0;
 	unsigned long count = 0;
 
-	spin_lock(&dev_priv->mm.obj_lock);
-	list_for_each_entry(obj, &dev_priv->mm.unbound_list, mm.link)
-		if (can_release_pages(obj))
+	spin_lock(&i915->mm.obj_lock);
+	list_for_each_entry(obj, &i915->mm.unbound_list, mm.link)
+		if (can_release_pages(obj)) {
 			count += obj->base.size >> PAGE_SHIFT;
+			num_objects++;
+		}
 
-	list_for_each_entry(obj, &dev_priv->mm.bound_list, mm.link)
-		if (!i915_gem_object_is_active(obj) && can_release_pages(obj))
+	list_for_each_entry(obj, &i915->mm.bound_list, mm.link)
+		if (!i915_gem_object_is_active(obj) && can_release_pages(obj)) {
 			count += obj->base.size >> PAGE_SHIFT;
-	spin_unlock(&dev_priv->mm.obj_lock);
+			num_objects++;
+		}
+	spin_unlock(&i915->mm.obj_lock);
+
+	/* Update our prefered vmscan batch size for the next pass.
+	 * Our rough guess for an effective batch size is roughly 2
+	 * available GEM objects worth of pages. That is we don't want
+	 * the shrinker to fire, until it is worth the cost of freeing an
+	 * entire GEM object.
+	 */
+	i915->mm.shrinker.batch =
+		max((i915->mm.shrinker.batch + 2 * count / num_objects) >> 1,
+		    128ul /* default SHRINK_BATCH */);
 
 	return count;
 }
@@ -461,6 +476,7 @@ void i915_gem_shrinker_init(struct drm_i915_private *dev_priv)
 	dev_priv->mm.shrinker.scan_objects = i915_gem_shrinker_scan;
 	dev_priv->mm.shrinker.count_objects = i915_gem_shrinker_count;
 	dev_priv->mm.shrinker.seeks = DEFAULT_SEEKS;
+	dev_priv->mm.shrinker.batch = 4096;
 	WARN_ON(register_shrinker(&dev_priv->mm.shrinker));
 
 	dev_priv->mm.oom_notifier.notifier_call = i915_gem_shrinker_oom;
-- 
2.14.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 24+ messages in thread

* ✓ Fi.CI.BAT: success for series starting with [1/6] drm/i915: Refactor testing obj->mm.pages (rev4)
  2017-08-12 11:51 [PATCH 1/6] drm/i915: Refactor testing obj->mm.pages Chris Wilson
                   ` (6 preceding siblings ...)
  2017-08-15 16:46 ` ✗ Fi.CI.BAT: failure for series starting with [1/6] drm/i915: Refactor testing obj->mm.pages (rev3) Patchwork
@ 2017-08-16 14:54 ` Patchwork
  7 siblings, 0 replies; 24+ messages in thread
From: Patchwork @ 2017-08-16 14:54 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

== Series Details ==

Series: series starting with [1/6] drm/i915: Refactor testing obj->mm.pages (rev4)
URL   : https://patchwork.freedesktop.org/series/28709/
State : success

== Summary ==

Series 28709v4 Series without cover letter
https://patchwork.freedesktop.org/api/1.0/series/28709/revisions/4/mbox/

fi-bdw-5557u     total:279  pass:268  dwarn:0   dfail:0   fail:0   skip:11  time:447s
fi-blb-e6850     total:279  pass:224  dwarn:1   dfail:0   fail:0   skip:54  time:363s
fi-bsw-n3050     total:279  pass:243  dwarn:0   dfail:0   fail:0   skip:36  time:541s
fi-bxt-j4205     total:279  pass:260  dwarn:0   dfail:0   fail:0   skip:19  time:535s
fi-byt-j1900     total:279  pass:254  dwarn:1   dfail:0   fail:0   skip:24  time:519s
fi-byt-n2820     total:279  pass:251  dwarn:0   dfail:0   fail:0   skip:28  time:512s
fi-glk-2a        total:279  pass:260  dwarn:0   dfail:0   fail:0   skip:19  time:608s
fi-hsw-4770      total:279  pass:263  dwarn:0   dfail:0   fail:0   skip:16  time:445s
fi-hsw-4770r     total:279  pass:263  dwarn:0   dfail:0   fail:0   skip:16  time:421s
fi-ilk-650       total:279  pass:229  dwarn:0   dfail:0   fail:0   skip:50  time:416s
fi-ivb-3520m     total:279  pass:261  dwarn:0   dfail:0   fail:0   skip:18  time:509s
fi-ivb-3770      total:279  pass:261  dwarn:0   dfail:0   fail:0   skip:18  time:481s
fi-kbl-7500u     total:279  pass:261  dwarn:0   dfail:0   fail:0   skip:18  time:474s
fi-kbl-7560u     total:279  pass:269  dwarn:0   dfail:0   fail:0   skip:10  time:595s
fi-kbl-r         total:279  pass:261  dwarn:0   dfail:0   fail:0   skip:18  time:596s
fi-pnv-d510      total:279  pass:223  dwarn:1   dfail:0   fail:0   skip:55  time:523s
fi-skl-6260u     total:279  pass:269  dwarn:0   dfail:0   fail:0   skip:10  time:479s
fi-skl-6700k     total:279  pass:261  dwarn:0   dfail:0   fail:0   skip:18  time:470s
fi-skl-6770hq    total:279  pass:269  dwarn:0   dfail:0   fail:0   skip:10  time:487s
fi-skl-gvtdvm    total:279  pass:266  dwarn:0   dfail:0   fail:0   skip:13  time:441s
fi-skl-x1585l    total:279  pass:269  dwarn:0   dfail:0   fail:0   skip:10  time:493s
fi-snb-2520m     total:279  pass:251  dwarn:0   dfail:0   fail:0   skip:28  time:551s
fi-snb-2600      total:279  pass:250  dwarn:0   dfail:0   fail:0   skip:29  time:409s

5118c7fa9c878351d1ed17a36b97966f68da72df drm-tip: 2017y-08m-15d-23h-58m-24s UTC integration manifest
1ddc743fc0ba drm/i915: Refactor testing obj->mm.pages

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_5414/
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH v2] drm/i915: Set our shrinker->batch to 4096 (~16MiB)
  2017-08-16 14:23       ` [PATCH v2] " Chris Wilson
@ 2017-08-18 12:56         ` Chris Wilson
  2017-08-18 22:48           ` Chris Wilson
  0 siblings, 1 reply; 24+ messages in thread
From: Chris Wilson @ 2017-08-18 12:56 UTC (permalink / raw)
  To: intel-gfx

Quoting Chris Wilson (2017-08-16 15:23:06)
> Prefer to defer activating our GEM shrinker until we have a few
> megabytes to free; or we have accumulated sufficient mempressure by
> deferring the reclaim to force a shrink. The intent is that because our
> objects may typically be large, we are too effective at shrinking and
> are not rewarded for freeing more pages than the batch. It will also
> defer the initial shrinking to hopefully put it at a lower priority than
> say the buffer cache (although it will balance out over a number of
> reclaims, with GEM being more bursty).
> 
> v2: Give it a feedback system to try and tune the batch size towards
> an effective size for the available objects.
> v3: Start keeping track of shrinker stats in debugfs

I think this is helping a treat. Still we get shrinker stalls, but
(anecdotally sadly) they do not feel as bad. Hmm, I wonder if latencytop
helps, but I also need a consistent workload+environment to replay.

One task fills the buffercache (-> vmpressure, triggering
reclaim/kswapd), the other task does something simple like copying
between a ring of buffers slightly too large for memory? Hmm, can wrap
this is as a mode of gem_syslatency. Then we measure the latency of a
third party to wakeup events? Or something engineered to hit the vm?
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH v2] drm/i915: Set our shrinker->batch to 4096 (~16MiB)
  2017-08-18 12:56         ` Chris Wilson
@ 2017-08-18 22:48           ` Chris Wilson
  0 siblings, 0 replies; 24+ messages in thread
From: Chris Wilson @ 2017-08-18 22:48 UTC (permalink / raw)
  To: intel-gfx

Quoting Chris Wilson (2017-08-18 13:56:08)
> Quoting Chris Wilson (2017-08-16 15:23:06)
> > Prefer to defer activating our GEM shrinker until we have a few
> > megabytes to free; or we have accumulated sufficient mempressure by
> > deferring the reclaim to force a shrink. The intent is that because our
> > objects may typically be large, we are too effective at shrinking and
> > are not rewarded for freeing more pages than the batch. It will also
> > defer the initial shrinking to hopefully put it at a lower priority than
> > say the buffer cache (although it will balance out over a number of
> > reclaims, with GEM being more bursty).
> > 
> > v2: Give it a feedback system to try and tune the batch size towards
> > an effective size for the available objects.
> > v3: Start keeping track of shrinker stats in debugfs
> 
> I think this is helping a treat. Still we get shrinker stalls, but
> (anecdotally sadly) they do not feel as bad. Hmm, I wonder if latencytop
> helps, but I also need a consistent workload+environment to replay.
> 
> One task fills the buffercache (-> vmpressure, triggering
> reclaim/kswapd), the other task does something simple like copying
> between a ring of buffers slightly too large for memory? Hmm, can wrap
> this is as a mode of gem_syslatency. Then we measure the latency of a
> third party to wakeup events? Or something engineered to hit the vm?

Hmm, this didn't make as big a difference (to the buffercache vs i915) as
I hoped, but

[RFC] mm,drm/i915: Mark pinned shmemfs pages as unevictable
https://patchwork.freedesktop.org/patch/160075/

did!
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH 4/6] drm/i915: Set our shrinker->batch to 4096 (~16MiB)
  2017-08-16 13:55     ` Chris Wilson
  2017-08-16 14:23       ` [PATCH v2] " Chris Wilson
@ 2017-09-20 13:28       ` Joonas Lahtinen
  2017-09-26 15:02         ` Chris Wilson
  1 sibling, 1 reply; 24+ messages in thread
From: Joonas Lahtinen @ 2017-09-20 13:28 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx

On Wed, 2017-08-16 at 14:55 +0100, Chris Wilson wrote:
> Quoting Joonas Lahtinen (2017-08-16 14:39:00)
> > On Sat, 2017-08-12 at 12:51 +0100, Chris Wilson wrote:
> > > Prefer to defer activating our GEM shrinker until we have a few
> > > megabytes to free; or we have accumulated sufficient mempressure by
> > > deferring the reclaim to force a shrink. The intent is that because our
> > > objects may typically be large, we are too effective at shrinking and
> > > are not rewarded for freeing more pages than the batch. It will also
> > > defer the initial shrinking to hopefully put it at a lower priority than
> > > say the buffer cache (although it will balance out over a number of
> > > reclaims, with GEM being more bursty).
> > > 
> > > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > > ---
> > >  drivers/gpu/drm/i915/i915_gem_shrinker.c | 1 +
> > >  1 file changed, 1 insertion(+)
> > > 
> > > diff --git a/drivers/gpu/drm/i915/i915_gem_shrinker.c b/drivers/gpu/drm/i915/i915_gem_shrinker.c
> > > index 5b8bc0e4f336..8bb17e9a52de 100644
> > > --- a/drivers/gpu/drm/i915/i915_gem_shrinker.c
> > > +++ b/drivers/gpu/drm/i915/i915_gem_shrinker.c
> > > @@ -461,6 +461,7 @@ void i915_gem_shrinker_init(struct drm_i915_private *dev_priv)
> > >       dev_priv->mm.shrinker.scan_objects = i915_gem_shrinker_scan;
> > >       dev_priv->mm.shrinker.count_objects = i915_gem_shrinker_count;
> > >       dev_priv->mm.shrinker.seeks = DEFAULT_SEEKS;
> > > +     dev_priv->mm.shrinker.batch = 4096;
> > 
> > Did you try how this alone effects the runtime of two consequtive
> > gem.testlist runs? Is there some specific test/usecase that benefits
> > from this. We'd be the first one to set this, md/raid5.c sets it to 128
> > which is the default (0).
> 
> My testing was trying to play a game that was hitting swap on an old
> hdd. So not very quantifiable, and vmscan is very unintuitive. 
> 
> Note also that we are special in that we don't report objects but pages.
> Not that it makes any difference, upon reclaim every slab is basically
> asked to give up some %% of what it reports, with some hysteresis thrown
> in on top.
> 
> The only way we can do anything here is to throw it at lots of systems
> and see how that helps. My gut feeling says that the batch size should
> be approximately the typical object size in the freeable list, to try to
> reduce the amount of inefficient work. Now, the value is read before
> scan->count is called, but we can always improve the estimate for the
> next pass.

For documentation purposes, from IRC, this is;

Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>

Regards, Joonas
-- 
Joonas Lahtinen
Open Source Technology Center
Intel Corporation
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH 4/6] drm/i915: Set our shrinker->batch to 4096 (~16MiB)
  2017-09-20 13:28       ` [PATCH 4/6] " Joonas Lahtinen
@ 2017-09-26 15:02         ` Chris Wilson
  2017-09-27  8:38           ` Joonas Lahtinen
  0 siblings, 1 reply; 24+ messages in thread
From: Chris Wilson @ 2017-09-26 15:02 UTC (permalink / raw)
  To: Joonas Lahtinen, intel-gfx

Quoting Joonas Lahtinen (2017-09-20 14:28:40)
> On Wed, 2017-08-16 at 14:55 +0100, Chris Wilson wrote:
> > Quoting Joonas Lahtinen (2017-08-16 14:39:00)
> > > On Sat, 2017-08-12 at 12:51 +0100, Chris Wilson wrote:
> > > > Prefer to defer activating our GEM shrinker until we have a few
> > > > megabytes to free; or we have accumulated sufficient mempressure by
> > > > deferring the reclaim to force a shrink. The intent is that because our
> > > > objects may typically be large, we are too effective at shrinking and
> > > > are not rewarded for freeing more pages than the batch. It will also
> > > > defer the initial shrinking to hopefully put it at a lower priority than
> > > > say the buffer cache (although it will balance out over a number of
> > > > reclaims, with GEM being more bursty).
> > > > 
> > > > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > > > ---
> > > >  drivers/gpu/drm/i915/i915_gem_shrinker.c | 1 +
> > > >  1 file changed, 1 insertion(+)
> > > > 
> > > > diff --git a/drivers/gpu/drm/i915/i915_gem_shrinker.c b/drivers/gpu/drm/i915/i915_gem_shrinker.c
> > > > index 5b8bc0e4f336..8bb17e9a52de 100644
> > > > --- a/drivers/gpu/drm/i915/i915_gem_shrinker.c
> > > > +++ b/drivers/gpu/drm/i915/i915_gem_shrinker.c
> > > > @@ -461,6 +461,7 @@ void i915_gem_shrinker_init(struct drm_i915_private *dev_priv)
> > > >       dev_priv->mm.shrinker.scan_objects = i915_gem_shrinker_scan;
> > > >       dev_priv->mm.shrinker.count_objects = i915_gem_shrinker_count;
> > > >       dev_priv->mm.shrinker.seeks = DEFAULT_SEEKS;
> > > > +     dev_priv->mm.shrinker.batch = 4096;
> > > 
> > > Did you try how this alone effects the runtime of two consequtive
> > > gem.testlist runs? Is there some specific test/usecase that benefits
> > > from this. We'd be the first one to set this, md/raid5.c sets it to 128
> > > which is the default (0).
> > 
> > My testing was trying to play a game that was hitting swap on an old
> > hdd. So not very quantifiable, and vmscan is very unintuitive. 
> > 
> > Note also that we are special in that we don't report objects but pages.
> > Not that it makes any difference, upon reclaim every slab is basically
> > asked to give up some %% of what it reports, with some hysteresis thrown
> > in on top.
> > 
> > The only way we can do anything here is to throw it at lots of systems
> > and see how that helps. My gut feeling says that the batch size should
> > be approximately the typical object size in the freeable list, to try to
> > reduce the amount of inefficient work. Now, the value is read before
> > scan->count is called, but we can always improve the estimate for the
> > next pass.
> 
> For documentation purposes, from IRC, this is;
> 
> Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>

And for reference, I took a stab at measuring vmpressure with gem_syslatency
https://patchwork.freedesktop.org/patch/178777/
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH 4/6] drm/i915: Set our shrinker->batch to 4096 (~16MiB)
  2017-09-26 15:02         ` Chris Wilson
@ 2017-09-27  8:38           ` Joonas Lahtinen
  2017-09-27  8:54             ` Chris Wilson
  0 siblings, 1 reply; 24+ messages in thread
From: Joonas Lahtinen @ 2017-09-27  8:38 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx

On Tue, 2017-09-26 at 16:02 +0100, Chris Wilson wrote:
> Quoting Joonas Lahtinen (2017-09-20 14:28:40)
> > On Wed, 2017-08-16 at 14:55 +0100, Chris Wilson wrote:
> > > Quoting Joonas Lahtinen (2017-08-16 14:39:00)
> > > > On Sat, 2017-08-12 at 12:51 +0100, Chris Wilson wrote:
> > > > > Prefer to defer activating our GEM shrinker until we have a few
> > > > > megabytes to free; or we have accumulated sufficient mempressure by
> > > > > deferring the reclaim to force a shrink. The intent is that because our
> > > > > objects may typically be large, we are too effective at shrinking and
> > > > > are not rewarded for freeing more pages than the batch. It will also
> > > > > defer the initial shrinking to hopefully put it at a lower priority than
> > > > > say the buffer cache (although it will balance out over a number of
> > > > > reclaims, with GEM being more bursty).
> > > > > 
> > > > > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > > > > ---
> > > > >  drivers/gpu/drm/i915/i915_gem_shrinker.c | 1 +
> > > > >  1 file changed, 1 insertion(+)
> > > > > 
> > > > > diff --git a/drivers/gpu/drm/i915/i915_gem_shrinker.c b/drivers/gpu/drm/i915/i915_gem_shrinker.c
> > > > > index 5b8bc0e4f336..8bb17e9a52de 100644
> > > > > --- a/drivers/gpu/drm/i915/i915_gem_shrinker.c
> > > > > +++ b/drivers/gpu/drm/i915/i915_gem_shrinker.c
> > > > > @@ -461,6 +461,7 @@ void i915_gem_shrinker_init(struct drm_i915_private *dev_priv)
> > > > >       dev_priv->mm.shrinker.scan_objects = i915_gem_shrinker_scan;
> > > > >       dev_priv->mm.shrinker.count_objects = i915_gem_shrinker_count;
> > > > >       dev_priv->mm.shrinker.seeks = DEFAULT_SEEKS;
> > > > > +     dev_priv->mm.shrinker.batch = 4096;
> > > > 
> > > > Did you try how this alone effects the runtime of two consequtive
> > > > gem.testlist runs? Is there some specific test/usecase that benefits
> > > > from this. We'd be the first one to set this, md/raid5.c sets it to 128
> > > > which is the default (0).
> > > 
> > > My testing was trying to play a game that was hitting swap on an old
> > > hdd. So not very quantifiable, and vmscan is very unintuitive. 
> > > 
> > > Note also that we are special in that we don't report objects but pages.
> > > Not that it makes any difference, upon reclaim every slab is basically
> > > asked to give up some %% of what it reports, with some hysteresis thrown
> > > in on top.
> > > 
> > > The only way we can do anything here is to throw it at lots of systems
> > > and see how that helps. My gut feeling says that the batch size should
> > > be approximately the typical object size in the freeable list, to try to
> > > reduce the amount of inefficient work. Now, the value is read before
> > > scan->count is called, but we can always improve the estimate for the
> > > next pass.
> > 
> > For documentation purposes, from IRC, this is;
> > 
> > Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
> 
> And for reference, I took a stab at measuring vmpressure with gem_syslatency
> https://patchwork.freedesktop.org/patch/178777/

And, did we get any results? :)

Regards, Joonas
-- 
Joonas Lahtinen
Open Source Technology Center
Intel Corporation
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH 4/6] drm/i915: Set our shrinker->batch to 4096 (~16MiB)
  2017-09-27  8:38           ` Joonas Lahtinen
@ 2017-09-27  8:54             ` Chris Wilson
  0 siblings, 0 replies; 24+ messages in thread
From: Chris Wilson @ 2017-09-27  8:54 UTC (permalink / raw)
  To: Joonas Lahtinen, intel-gfx

Quoting Joonas Lahtinen (2017-09-27 09:38:20)
> On Tue, 2017-09-26 at 16:02 +0100, Chris Wilson wrote:
> > Quoting Joonas Lahtinen (2017-09-20 14:28:40)
> > > On Wed, 2017-08-16 at 14:55 +0100, Chris Wilson wrote:
> > > > Quoting Joonas Lahtinen (2017-08-16 14:39:00)
> > > > > On Sat, 2017-08-12 at 12:51 +0100, Chris Wilson wrote:
> > > > > > Prefer to defer activating our GEM shrinker until we have a few
> > > > > > megabytes to free; or we have accumulated sufficient mempressure by
> > > > > > deferring the reclaim to force a shrink. The intent is that because our
> > > > > > objects may typically be large, we are too effective at shrinking and
> > > > > > are not rewarded for freeing more pages than the batch. It will also
> > > > > > defer the initial shrinking to hopefully put it at a lower priority than
> > > > > > say the buffer cache (although it will balance out over a number of
> > > > > > reclaims, with GEM being more bursty).
> > > > > > 
> > > > > > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > > > > > ---
> > > > > >  drivers/gpu/drm/i915/i915_gem_shrinker.c | 1 +
> > > > > >  1 file changed, 1 insertion(+)
> > > > > > 
> > > > > > diff --git a/drivers/gpu/drm/i915/i915_gem_shrinker.c b/drivers/gpu/drm/i915/i915_gem_shrinker.c
> > > > > > index 5b8bc0e4f336..8bb17e9a52de 100644
> > > > > > --- a/drivers/gpu/drm/i915/i915_gem_shrinker.c
> > > > > > +++ b/drivers/gpu/drm/i915/i915_gem_shrinker.c
> > > > > > @@ -461,6 +461,7 @@ void i915_gem_shrinker_init(struct drm_i915_private *dev_priv)
> > > > > >       dev_priv->mm.shrinker.scan_objects = i915_gem_shrinker_scan;
> > > > > >       dev_priv->mm.shrinker.count_objects = i915_gem_shrinker_count;
> > > > > >       dev_priv->mm.shrinker.seeks = DEFAULT_SEEKS;
> > > > > > +     dev_priv->mm.shrinker.batch = 4096;
> > > > > 
> > > > > Did you try how this alone effects the runtime of two consequtive
> > > > > gem.testlist runs? Is there some specific test/usecase that benefits
> > > > > from this. We'd be the first one to set this, md/raid5.c sets it to 128
> > > > > which is the default (0).
> > > > 
> > > > My testing was trying to play a game that was hitting swap on an old
> > > > hdd. So not very quantifiable, and vmscan is very unintuitive. 
> > > > 
> > > > Note also that we are special in that we don't report objects but pages.
> > > > Not that it makes any difference, upon reclaim every slab is basically
> > > > asked to give up some %% of what it reports, with some hysteresis thrown
> > > > in on top.
> > > > 
> > > > The only way we can do anything here is to throw it at lots of systems
> > > > and see how that helps. My gut feeling says that the batch size should
> > > > be approximately the typical object size in the freeable list, to try to
> > > > reduce the amount of inefficient work. Now, the value is read before
> > > > scan->count is called, but we can always improve the estimate for the
> > > > next pass.
> > > 
> > > For documentation purposes, from IRC, this is;
> > > 
> > > Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
> > 
> > And for reference, I took a stab at measuring vmpressure with gem_syslatency
> > https://patchwork.freedesktop.org/patch/178777/
> 
> And, did we get any results? :)

Elsewhere I reported that the only thing that made a dramatic bit of
difference was mlock_vma_pages(). That page completely erradicates the
system stalls under vmpressure with memory full of i915 objects and fs
buffer cache.
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 24+ messages in thread

end of thread, other threads:[~2017-09-27  8:54 UTC | newest]

Thread overview: 24+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-08-12 11:51 [PATCH 1/6] drm/i915: Refactor testing obj->mm.pages Chris Wilson
2017-08-12 11:51 ` [PATCH 2/6] drm/i915: Remove walk over obj->vma_list for the shrinker Chris Wilson
2017-08-15 14:56   ` Joonas Lahtinen
2017-08-15 15:05     ` Chris Wilson
2017-08-15 16:17     ` [PATCH 1/2] drm/i915: Rename obj->pin_display to obj->pin_global Chris Wilson
2017-08-15 16:17       ` [PATCH 2/2] drm/i915: Drop debugfs/i915_gem_pin_display Chris Wilson
2017-08-12 11:51 ` [PATCH 3/6] drm/i915: Move dev_priv->mm.[un]bound_list to its own lock Chris Wilson
2017-08-16 13:13   ` Joonas Lahtinen
2017-08-12 11:51 ` [PATCH 4/6] drm/i915: Set our shrinker->batch to 4096 (~16MiB) Chris Wilson
2017-08-16 13:39   ` Joonas Lahtinen
2017-08-16 13:55     ` Chris Wilson
2017-08-16 14:23       ` [PATCH v2] " Chris Wilson
2017-08-18 12:56         ` Chris Wilson
2017-08-18 22:48           ` Chris Wilson
2017-09-20 13:28       ` [PATCH 4/6] " Joonas Lahtinen
2017-09-26 15:02         ` Chris Wilson
2017-09-27  8:38           ` Joonas Lahtinen
2017-09-27  8:54             ` Chris Wilson
2017-08-12 11:51 ` [PATCH 5/6] drm/i915: Trim struct_mutex hold duration for i915_gem_free_objects Chris Wilson
2017-08-12 11:51 ` [PATCH 6/6] drm/i915: Only free the oldest stale object before a fresh allocation Chris Wilson
2017-08-16 13:53   ` Joonas Lahtinen
2017-08-12 12:09 ` ✓ Fi.CI.BAT: success for series starting with [1/6] drm/i915: Refactor testing obj->mm.pages Patchwork
2017-08-15 16:46 ` ✗ Fi.CI.BAT: failure for series starting with [1/6] drm/i915: Refactor testing obj->mm.pages (rev3) Patchwork
2017-08-16 14:54 ` ✓ Fi.CI.BAT: success for series starting with [1/6] drm/i915: Refactor testing obj->mm.pages (rev4) Patchwork

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.