All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 1/6] drm/i915: Split i915_gem_batch_pool into its own header
@ 2015-03-09  9:55 Chris Wilson
  2015-03-09  9:55 ` [PATCH 2/6] drm/i915: Tidy batch pool logic Chris Wilson
                   ` (5 more replies)
  0 siblings, 6 replies; 32+ messages in thread
From: Chris Wilson @ 2015-03-09  9:55 UTC (permalink / raw)
  To: intel-gfx

In the next patch, I want to use the structure elsewhere and so require
it defined earlier. Rather than move the definition to an earlier location
where it feels very odd, place it in its own header file.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_drv.h            | 13 +--------
 drivers/gpu/drm/i915/i915_gem_batch_pool.c |  1 +
 drivers/gpu/drm/i915/i915_gem_batch_pool.h | 42 ++++++++++++++++++++++++++++++
 3 files changed, 44 insertions(+), 12 deletions(-)
 create mode 100644 drivers/gpu/drm/i915/i915_gem_batch_pool.h

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index c986e355e4d0..b62e3c478221 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -37,6 +37,7 @@
 #include "intel_bios.h"
 #include "intel_ringbuffer.h"
 #include "intel_lrc.h"
+#include "i915_gem_batch_pool.h"
 #include "i915_gem_gtt.h"
 #include "i915_gem_render_state.h"
 #include <linux/io-mapping.h>
@@ -1135,11 +1136,6 @@ struct intel_l3_parity {
 	int which_slice;
 };
 
-struct i915_gem_batch_pool {
-	struct drm_device *dev;
-	struct list_head cache_list;
-};
-
 struct i915_gem_mm {
 	/** Memory allocator for GTT stolen memory */
 	struct drm_mm stolen;
@@ -3048,13 +3044,6 @@ void i915_destroy_error_state(struct drm_device *dev);
 void i915_get_extra_instdone(struct drm_device *dev, uint32_t *instdone);
 const char *i915_cache_level_str(struct drm_i915_private *i915, int type);
 
-/* i915_gem_batch_pool.c */
-void i915_gem_batch_pool_init(struct drm_device *dev,
-			      struct i915_gem_batch_pool *pool);
-void i915_gem_batch_pool_fini(struct i915_gem_batch_pool *pool);
-struct drm_i915_gem_object*
-i915_gem_batch_pool_get(struct i915_gem_batch_pool *pool, size_t size);
-
 /* i915_cmd_parser.c */
 int i915_cmd_parser_get_version(void);
 int i915_cmd_parser_init_ring(struct intel_engine_cs *ring);
diff --git a/drivers/gpu/drm/i915/i915_gem_batch_pool.c b/drivers/gpu/drm/i915/i915_gem_batch_pool.c
index c690170a1c4f..564be7c5ea7e 100644
--- a/drivers/gpu/drm/i915/i915_gem_batch_pool.c
+++ b/drivers/gpu/drm/i915/i915_gem_batch_pool.c
@@ -23,6 +23,7 @@
  */
 
 #include "i915_drv.h"
+#include "i915_gem_batch_pool.h"
 
 /**
  * DOC: batch pool
diff --git a/drivers/gpu/drm/i915/i915_gem_batch_pool.h b/drivers/gpu/drm/i915/i915_gem_batch_pool.h
new file mode 100644
index 000000000000..5ed70ef6a887
--- /dev/null
+++ b/drivers/gpu/drm/i915/i915_gem_batch_pool.h
@@ -0,0 +1,42 @@
+/*
+ * Copyright © 2014 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ */
+
+#ifndef I915_GEM_BATCH_POOL_H
+#define I915_GEM_BATCH_POOL_H
+
+#include "i915_drv.h"
+
+struct i915_gem_batch_pool {
+	struct drm_device *dev;
+	struct list_head cache_list;
+};
+
+/* i915_gem_batch_pool.c */
+void i915_gem_batch_pool_init(struct drm_device *dev,
+			      struct i915_gem_batch_pool *pool);
+void i915_gem_batch_pool_fini(struct i915_gem_batch_pool *pool);
+struct drm_i915_gem_object*
+i915_gem_batch_pool_get(struct i915_gem_batch_pool *pool, size_t size);
+
+#endif /* I915_GEM_BATCH_POOL_H */
-- 
2.1.4

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 32+ messages in thread

* [PATCH 2/6] drm/i915: Tidy batch pool logic
  2015-03-09  9:55 [PATCH 1/6] drm/i915: Split i915_gem_batch_pool into its own header Chris Wilson
@ 2015-03-09  9:55 ` Chris Wilson
  2015-03-17 17:42   ` Tvrtko Ursulin
  2015-03-09  9:55 ` [PATCH 3/6] drm/i915: Split the batch pool by engine Chris Wilson
                   ` (4 subsequent siblings)
  5 siblings, 1 reply; 32+ messages in thread
From: Chris Wilson @ 2015-03-09  9:55 UTC (permalink / raw)
  To: intel-gfx

Move the madvise logic out of the execbuffer main path into the
relatively rare allocation path, making the execbuffer manipulation less
fragile.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_cmd_parser.c     | 12 +++------
 drivers/gpu/drm/i915/i915_gem_batch_pool.c | 39 +++++++++++++++---------------
 drivers/gpu/drm/i915/i915_gem_execbuffer.c | 13 ++++------
 3 files changed, 27 insertions(+), 37 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c b/drivers/gpu/drm/i915/i915_cmd_parser.c
index 9a6da3536ae5..3e5e8cb54a88 100644
--- a/drivers/gpu/drm/i915/i915_cmd_parser.c
+++ b/drivers/gpu/drm/i915/i915_cmd_parser.c
@@ -866,6 +866,9 @@ static u32 *copy_batch(struct drm_i915_gem_object *dest_obj,
 	    batch_len + batch_start_offset > src_obj->base.size)
 		return ERR_PTR(-E2BIG);
 
+	if (WARN_ON(dest_obj->pages_pin_count == 0))
+		return ERR_PTR(-ENODEV);
+
 	ret = i915_gem_obj_prepare_shmem_read(src_obj, &needs_clflush);
 	if (ret) {
 		DRM_DEBUG_DRIVER("CMD: failed to prepare shadow batch\n");
@@ -879,13 +882,6 @@ static u32 *copy_batch(struct drm_i915_gem_object *dest_obj,
 		goto unpin_src;
 	}
 
-	ret = i915_gem_object_get_pages(dest_obj);
-	if (ret) {
-		DRM_DEBUG_DRIVER("CMD: Failed to get pages for shadow batch\n");
-		goto unmap_src;
-	}
-	i915_gem_object_pin_pages(dest_obj);
-
 	ret = i915_gem_object_set_to_cpu_domain(dest_obj, true);
 	if (ret) {
 		DRM_DEBUG_DRIVER("CMD: Failed to set shadow batch to CPU\n");
@@ -895,7 +891,6 @@ static u32 *copy_batch(struct drm_i915_gem_object *dest_obj,
 	dst = vmap_batch(dest_obj, 0, batch_len);
 	if (!dst) {
 		DRM_DEBUG_DRIVER("CMD: Failed to vmap shadow batch\n");
-		i915_gem_object_unpin_pages(dest_obj);
 		ret = -ENOMEM;
 		goto unmap_src;
 	}
@@ -1126,7 +1121,6 @@ int i915_parse_cmds(struct intel_engine_cs *ring,
 	}
 
 	vunmap(batch_base);
-	i915_gem_object_unpin_pages(shadow_batch_obj);
 
 	return ret;
 }
diff --git a/drivers/gpu/drm/i915/i915_gem_batch_pool.c b/drivers/gpu/drm/i915/i915_gem_batch_pool.c
index 564be7c5ea7e..21f3356cc0ab 100644
--- a/drivers/gpu/drm/i915/i915_gem_batch_pool.c
+++ b/drivers/gpu/drm/i915/i915_gem_batch_pool.c
@@ -67,25 +67,23 @@ void i915_gem_batch_pool_fini(struct i915_gem_batch_pool *pool)
 					 struct drm_i915_gem_object,
 					 batch_pool_list);
 
-		WARN_ON(obj->active);
-
-		list_del_init(&obj->batch_pool_list);
+		list_del(&obj->batch_pool_list);
 		drm_gem_object_unreference(&obj->base);
 	}
 }
 
 /**
- * i915_gem_batch_pool_get() - select a buffer from the pool
+ * i915_gem_batch_pool_get() - allocate a buffer from the pool
  * @pool: the batch buffer pool
  * @size: the minimum desired size of the returned buffer
  *
- * Finds or allocates a batch buffer in the pool with at least the requested
- * size. The caller is responsible for any domain, active/inactive, or
- * purgeability management for the returned buffer.
+ * Returns an inactive buffer from @pool with at least @size bytes,
+ * with the pages pinned. The caller must i915_gem_object_unpin_pages()
+ * on the returned object.
  *
  * Note: Callers must hold the struct_mutex
  *
- * Return: the selected batch buffer object
+ * Return: the buffer object or an error pointer
  */
 struct drm_i915_gem_object *
 i915_gem_batch_pool_get(struct i915_gem_batch_pool *pool,
@@ -97,8 +95,7 @@ i915_gem_batch_pool_get(struct i915_gem_batch_pool *pool,
 	WARN_ON(!mutex_is_locked(&pool->dev->struct_mutex));
 
 	list_for_each_entry_safe(tmp, next,
-			&pool->cache_list, batch_pool_list) {
-
+				 &pool->cache_list, batch_pool_list) {
 		if (tmp->active)
 			continue;
 
@@ -114,25 +111,27 @@ i915_gem_batch_pool_get(struct i915_gem_batch_pool *pool,
 		 * but not 'too much' bigger. A better way to do this
 		 * might be to bucket the pool objects based on size.
 		 */
-		if (tmp->base.size >= size &&
-		    tmp->base.size <= (2 * size)) {
+		if (tmp->base.size >= size && tmp->base.size <= 2 * size) {
 			obj = tmp;
 			break;
 		}
 	}
 
-	if (!obj) {
+	if (obj == NULL) {
+		int ret;
+
 		obj = i915_gem_alloc_object(pool->dev, size);
-		if (!obj)
+		if (obj == NULL)
 			return ERR_PTR(-ENOMEM);
 
-		list_add_tail(&obj->batch_pool_list, &pool->cache_list);
-	}
-	else
-		/* Keep list in LRU order */
-		list_move_tail(&obj->batch_pool_list, &pool->cache_list);
+		ret = i915_gem_object_get_pages(obj);
+		if (ret)
+			return ERR_PTR(ret);
 
-	obj->madv = I915_MADV_WILLNEED;
+		obj->madv = I915_MADV_DONTNEED;
+	}
 
+	list_move_tail(&obj->batch_pool_list, &pool->cache_list);
+	i915_gem_object_pin_pages(obj);
 	return obj;
 }
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 58537079d3d7..36f8f05928d1 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -37,7 +37,6 @@
 #define  __EXEC_OBJECT_HAS_FENCE (1<<30)
 #define  __EXEC_OBJECT_NEEDS_MAP (1<<29)
 #define  __EXEC_OBJECT_NEEDS_BIAS (1<<28)
-#define  __EXEC_OBJECT_PURGEABLE (1<<27)
 
 #define BATCH_OFFSET_BIAS (256*1024)
 
@@ -224,12 +223,7 @@ i915_gem_execbuffer_unreserve_vma(struct i915_vma *vma)
 	if (entry->flags & __EXEC_OBJECT_HAS_PIN)
 		vma->pin_count--;
 
-	if (entry->flags & __EXEC_OBJECT_PURGEABLE)
-		obj->madv = I915_MADV_DONTNEED;
-
-	entry->flags &= ~(__EXEC_OBJECT_HAS_FENCE |
-			  __EXEC_OBJECT_HAS_PIN |
-			  __EXEC_OBJECT_PURGEABLE);
+	entry->flags &= ~(__EXEC_OBJECT_HAS_FENCE | __EXEC_OBJECT_HAS_PIN);
 }
 
 static void eb_destroy(struct eb_vmas *eb)
@@ -1181,11 +1175,13 @@ i915_gem_execbuffer_parse(struct intel_engine_cs *ring,
 	if (ret)
 		goto err;
 
+	i915_gem_object_unpin_pages(shadow_batch_obj);
+
 	memset(shadow_exec_entry, 0, sizeof(*shadow_exec_entry));
 
 	vma = i915_gem_obj_to_ggtt(shadow_batch_obj);
 	vma->exec_entry = shadow_exec_entry;
-	vma->exec_entry->flags = __EXEC_OBJECT_PURGEABLE | __EXEC_OBJECT_HAS_PIN;
+	vma->exec_entry->flags = __EXEC_OBJECT_HAS_PIN;
 	drm_gem_object_reference(&shadow_batch_obj->base);
 	list_add_tail(&vma->exec_list, &eb->vmas);
 
@@ -1194,6 +1190,7 @@ i915_gem_execbuffer_parse(struct intel_engine_cs *ring,
 	return shadow_batch_obj;
 
 err:
+	i915_gem_object_unpin_pages(shadow_batch_obj);
 	if (ret == -EACCES) /* unhandled chained batch */
 		return batch_obj;
 	else
-- 
2.1.4

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 32+ messages in thread

* [PATCH 3/6] drm/i915: Split the batch pool by engine
  2015-03-09  9:55 [PATCH 1/6] drm/i915: Split i915_gem_batch_pool into its own header Chris Wilson
  2015-03-09  9:55 ` [PATCH 2/6] drm/i915: Tidy batch pool logic Chris Wilson
@ 2015-03-09  9:55 ` Chris Wilson
  2015-03-18 16:51   ` Tvrtko Ursulin
  2015-03-09  9:55 ` [PATCH 4/6] drm/i915: Free batch pool when idle Chris Wilson
                   ` (3 subsequent siblings)
  5 siblings, 1 reply; 32+ messages in thread
From: Chris Wilson @ 2015-03-09  9:55 UTC (permalink / raw)
  To: intel-gfx

I woke up one morning and found 50k objects sitting in the batch pool
and every search seemed to iterate the entire list... Painting the
screen in oils would provide a more fluid display.

One issue with the current design is that we only check for retirements
on the current ring when preparing to submit a new batch. This means
that we can have thousands of "active" batches on another ring that we
have to walk over. The simplest way to avoid that is to split the pools
per ring and then our LRU execution ordering will also ensure that the
inactive buffers remain at the front.

v2: execlists still requires duplicate code.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_debugfs.c        | 33 ++++++++++++++++++------------
 drivers/gpu/drm/i915/i915_dma.c            |  1 -
 drivers/gpu/drm/i915/i915_drv.h            |  8 --------
 drivers/gpu/drm/i915/i915_gem.c            |  2 --
 drivers/gpu/drm/i915/i915_gem_batch_pool.c |  3 ++-
 drivers/gpu/drm/i915/i915_gem_execbuffer.c |  3 +--
 drivers/gpu/drm/i915/intel_lrc.c           |  1 +
 drivers/gpu/drm/i915/intel_ringbuffer.c    |  2 ++
 drivers/gpu/drm/i915/intel_ringbuffer.h    |  8 ++++++++
 9 files changed, 34 insertions(+), 27 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index d2e6475b0466..d8ca5c89647c 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -377,13 +377,17 @@ static void print_batch_pool_stats(struct seq_file *m,
 {
 	struct drm_i915_gem_object *obj;
 	struct file_stats stats;
+	struct intel_engine_cs *ring;
+	int i;
 
 	memset(&stats, 0, sizeof(stats));
 
-	list_for_each_entry(obj,
-			    &dev_priv->mm.batch_pool.cache_list,
-			    batch_pool_list)
-		per_file_stats(0, obj, &stats);
+	for_each_ring(ring, dev_priv, i) {
+		list_for_each_entry(obj,
+				    &ring->batch_pool.cache_list,
+				    batch_pool_list)
+			per_file_stats(0, obj, &stats);
+	}
 
 	print_file_stats(m, "batch pool", stats);
 }
@@ -619,21 +623,24 @@ static int i915_gem_batch_pool_info(struct seq_file *m, void *data)
 	struct drm_device *dev = node->minor->dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct drm_i915_gem_object *obj;
+	struct intel_engine_cs *ring;
 	int count = 0;
-	int ret;
+	int ret, i;
 
 	ret = mutex_lock_interruptible(&dev->struct_mutex);
 	if (ret)
 		return ret;
 
-	seq_puts(m, "cache:\n");
-	list_for_each_entry(obj,
-			    &dev_priv->mm.batch_pool.cache_list,
-			    batch_pool_list) {
-		seq_puts(m, "   ");
-		describe_obj(m, obj);
-		seq_putc(m, '\n');
-		count++;
+	for_each_ring(ring, dev_priv, i) {
+		seq_printf(m, "%s cache:\n", ring->name);
+		list_for_each_entry(obj,
+				    &ring->batch_pool.cache_list,
+				    batch_pool_list) {
+			seq_puts(m, "   ");
+			describe_obj(m, obj);
+			seq_putc(m, '\n');
+			count++;
+		}
 	}
 
 	seq_printf(m, "total: %d\n", count);
diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
index 8e914303b831..70e050ac02b4 100644
--- a/drivers/gpu/drm/i915/i915_dma.c
+++ b/drivers/gpu/drm/i915/i915_dma.c
@@ -1062,7 +1062,6 @@ int i915_driver_unload(struct drm_device *dev)
 
 	mutex_lock(&dev->struct_mutex);
 	i915_gem_cleanup_ringbuffer(dev);
-	i915_gem_batch_pool_fini(&dev_priv->mm.batch_pool);
 	i915_gem_context_fini(dev);
 	mutex_unlock(&dev->struct_mutex);
 	i915_gem_cleanup_stolen(dev);
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index b62e3c478221..9fe1274cfe59 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -37,7 +37,6 @@
 #include "intel_bios.h"
 #include "intel_ringbuffer.h"
 #include "intel_lrc.h"
-#include "i915_gem_batch_pool.h"
 #include "i915_gem_gtt.h"
 #include "i915_gem_render_state.h"
 #include <linux/io-mapping.h>
@@ -1149,13 +1148,6 @@ struct i915_gem_mm {
 	 */
 	struct list_head unbound_list;
 
-	/*
-	 * A pool of objects to use as shadow copies of client batch buffers
-	 * when the command parser is enabled. Prevents the client from
-	 * modifying the batch contents after software parsing.
-	 */
-	struct i915_gem_batch_pool batch_pool;
-
 	/** Usable portion of the GTT for GEM */
 	unsigned long stolen_base; /* limited to low memory (32-bit) */
 
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 4d8fb3f4a7a1..9f33005bdfd1 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -5039,8 +5039,6 @@ i915_gem_load(struct drm_device *dev)
 	dev_priv->mm.oom_notifier.notifier_call = i915_gem_shrinker_oom;
 	register_oom_notifier(&dev_priv->mm.oom_notifier);
 
-	i915_gem_batch_pool_init(dev, &dev_priv->mm.batch_pool);
-
 	mutex_init(&dev_priv->fb_tracking.lock);
 }
 
diff --git a/drivers/gpu/drm/i915/i915_gem_batch_pool.c b/drivers/gpu/drm/i915/i915_gem_batch_pool.c
index 21f3356cc0ab..1287abf55b84 100644
--- a/drivers/gpu/drm/i915/i915_gem_batch_pool.c
+++ b/drivers/gpu/drm/i915/i915_gem_batch_pool.c
@@ -96,8 +96,9 @@ i915_gem_batch_pool_get(struct i915_gem_batch_pool *pool,
 
 	list_for_each_entry_safe(tmp, next,
 				 &pool->cache_list, batch_pool_list) {
+		/* The batches are strictly LRU ordered */
 		if (tmp->active)
-			continue;
+			break;
 
 		/* While we're looping, do some clean up */
 		if (tmp->madv == __I915_MADV_PURGED) {
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 36f8f05928d1..8129a8c75a21 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -1152,12 +1152,11 @@ i915_gem_execbuffer_parse(struct intel_engine_cs *ring,
 			  u32 batch_len,
 			  bool is_master)
 {
-	struct drm_i915_private *dev_priv = to_i915(batch_obj->base.dev);
 	struct drm_i915_gem_object *shadow_batch_obj;
 	struct i915_vma *vma;
 	int ret;
 
-	shadow_batch_obj = i915_gem_batch_pool_get(&dev_priv->mm.batch_pool,
+	shadow_batch_obj = i915_gem_batch_pool_get(&ring->batch_pool,
 						   PAGE_ALIGN(batch_len));
 	if (IS_ERR(shadow_batch_obj))
 		return shadow_batch_obj;
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index fcb074bd55dc..a6d4d5502188 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -1421,6 +1421,7 @@ static int logical_ring_init(struct drm_device *dev, struct intel_engine_cs *rin
 	ring->dev = dev;
 	INIT_LIST_HEAD(&ring->active_list);
 	INIT_LIST_HEAD(&ring->request_list);
+	i915_gem_batch_pool_init(dev, &ring->batch_pool);
 	init_waitqueue_head(&ring->irq_queue);
 
 	INIT_LIST_HEAD(&ring->execlist_queue);
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index 441e2502b889..a351178913f7 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -1972,6 +1972,7 @@ static int intel_init_ring_buffer(struct drm_device *dev,
 	INIT_LIST_HEAD(&ring->active_list);
 	INIT_LIST_HEAD(&ring->request_list);
 	INIT_LIST_HEAD(&ring->execlist_queue);
+	i915_gem_batch_pool_init(dev, &ring->batch_pool);
 	ringbuf->size = 32 * PAGE_SIZE;
 	ringbuf->ring = ring;
 	memset(ring->semaphore.sync_seqno, 0, sizeof(ring->semaphore.sync_seqno));
@@ -2050,6 +2051,7 @@ void intel_cleanup_ring_buffer(struct intel_engine_cs *ring)
 	cleanup_status_page(ring);
 
 	i915_cmd_parser_fini_ring(ring);
+	i915_gem_batch_pool_fini(&ring->batch_pool);
 
 	kfree(ringbuf);
 	ring->buffer = NULL;
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index c761fe05ad6f..1d08d8f9149d 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -2,6 +2,7 @@
 #define _INTEL_RINGBUFFER_H_
 
 #include <linux/hashtable.h>
+#include "i915_gem_batch_pool.h"
 
 #define I915_CMD_HASH_ORDER 9
 
@@ -133,6 +134,13 @@ struct  intel_engine_cs {
 	struct		drm_device *dev;
 	struct intel_ringbuffer *buffer;
 
+	/*
+	 * A pool of objects to use as shadow copies of client batch buffers
+	 * when the command parser is enabled. Prevents the client from
+	 * modifying the batch contents after software parsing.
+	 */
+	struct i915_gem_batch_pool batch_pool;
+
 	struct intel_hw_status_page status_page;
 
 	unsigned irq_refcount; /* protected by dev_priv->irq_lock */
-- 
2.1.4

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 32+ messages in thread

* [PATCH 4/6] drm/i915: Free batch pool when idle
  2015-03-09  9:55 [PATCH 1/6] drm/i915: Split i915_gem_batch_pool into its own header Chris Wilson
  2015-03-09  9:55 ` [PATCH 2/6] drm/i915: Tidy batch pool logic Chris Wilson
  2015-03-09  9:55 ` [PATCH 3/6] drm/i915: Split the batch pool by engine Chris Wilson
@ 2015-03-09  9:55 ` Chris Wilson
  2015-03-19 17:03   ` Tvrtko Ursulin
  2015-03-09  9:55 ` [PATCH 5/6] drm/i915: Split batch pool into size buckets Chris Wilson
                   ` (2 subsequent siblings)
  5 siblings, 1 reply; 32+ messages in thread
From: Chris Wilson @ 2015-03-09  9:55 UTC (permalink / raw)
  To: intel-gfx

At runtime, this helps ensure that the batch pools are kept trim and
fast. Then at suspend, this releases memory that we do not need to
restore. It also ties into the oom-notifier to ensure that we recover as
much kernel memory as possible during OOM.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_gem.c | 13 ++++++++++++-
 1 file changed, 12 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 9f33005bdfd1..efb5545251c7 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2849,8 +2849,19 @@ i915_gem_idle_work_handler(struct work_struct *work)
 {
 	struct drm_i915_private *dev_priv =
 		container_of(work, typeof(*dev_priv), mm.idle_work.work);
+	struct drm_device *dev = dev_priv->dev;
+
+	intel_mark_idle(dev);
 
-	intel_mark_idle(dev_priv->dev);
+	if (mutex_trylock(&dev->struct_mutex)) {
+		struct intel_engine_cs *ring;
+		int i;
+
+		for_each_ring(ring, dev_priv, i)
+			i915_gem_batch_pool_fini(&ring->batch_pool);
+
+		mutex_unlock(&dev->struct_mutex);
+	}
 }
 
 /**
-- 
2.1.4

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 32+ messages in thread

* [PATCH 5/6] drm/i915: Split batch pool into size buckets
  2015-03-09  9:55 [PATCH 1/6] drm/i915: Split i915_gem_batch_pool into its own header Chris Wilson
                   ` (2 preceding siblings ...)
  2015-03-09  9:55 ` [PATCH 4/6] drm/i915: Free batch pool when idle Chris Wilson
@ 2015-03-09  9:55 ` Chris Wilson
  2015-03-19 17:35   ` Tvrtko Ursulin
  2015-03-09  9:55 ` [PATCH 6/6] drm/i915: Include active flag when describing objects in debugfs Chris Wilson
  2015-03-19 17:37 ` [PATCH 1/6] drm/i915: Split i915_gem_batch_pool into its own header Tvrtko Ursulin
  5 siblings, 1 reply; 32+ messages in thread
From: Chris Wilson @ 2015-03-09  9:55 UTC (permalink / raw)
  To: intel-gfx

Now with the trimmed memcpy before the command parser, we try to
allocate many different sizes of batches, predominantly one or two
pages. We can therefore speed up searching for a good sized batch by
keeping the objects of buckets of roughly the same size.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_debugfs.c        | 46 +++++++++++++++++++-----------
 drivers/gpu/drm/i915/i915_drv.h            |  2 +-
 drivers/gpu/drm/i915/i915_gem.c            |  2 +-
 drivers/gpu/drm/i915/i915_gem_batch_pool.c | 45 +++++++++++++++++------------
 drivers/gpu/drm/i915/i915_gem_batch_pool.h |  2 +-
 5 files changed, 60 insertions(+), 37 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index d8ca5c89647c..042ad2fec484 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -378,15 +378,17 @@ static void print_batch_pool_stats(struct seq_file *m,
 	struct drm_i915_gem_object *obj;
 	struct file_stats stats;
 	struct intel_engine_cs *ring;
-	int i;
+	int i, j;
 
 	memset(&stats, 0, sizeof(stats));
 
 	for_each_ring(ring, dev_priv, i) {
-		list_for_each_entry(obj,
-				    &ring->batch_pool.cache_list,
-				    batch_pool_list)
-			per_file_stats(0, obj, &stats);
+		for (j = 0; j < ARRAY_SIZE(ring->batch_pool.cache_list); j++) {
+			list_for_each_entry(obj,
+					    &ring->batch_pool.cache_list[j],
+					    batch_pool_link)
+				per_file_stats(0, obj, &stats);
+		}
 	}
 
 	print_file_stats(m, "batch pool", stats);
@@ -624,26 +626,38 @@ static int i915_gem_batch_pool_info(struct seq_file *m, void *data)
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct drm_i915_gem_object *obj;
 	struct intel_engine_cs *ring;
-	int count = 0;
-	int ret, i;
+	int total = 0;
+	int ret, i, j;
 
 	ret = mutex_lock_interruptible(&dev->struct_mutex);
 	if (ret)
 		return ret;
 
 	for_each_ring(ring, dev_priv, i) {
-		seq_printf(m, "%s cache:\n", ring->name);
-		list_for_each_entry(obj,
-				    &ring->batch_pool.cache_list,
-				    batch_pool_list) {
-			seq_puts(m, "   ");
-			describe_obj(m, obj);
-			seq_putc(m, '\n');
-			count++;
+		for (j = 0; j < ARRAY_SIZE(ring->batch_pool.cache_list); j++) {
+			int count;
+
+			count = 0;
+			list_for_each_entry(obj,
+					    &ring->batch_pool.cache_list[j],
+					    batch_pool_link)
+				count++;
+			seq_printf(m, "%s cache[%d]: %d objects\n",
+				   ring->name, j, count);
+
+			list_for_each_entry(obj,
+					    &ring->batch_pool.cache_list[j],
+					    batch_pool_link) {
+				seq_puts(m, "   ");
+				describe_obj(m, obj);
+				seq_putc(m, '\n');
+			}
+
+			total += count;
 		}
 	}
 
-	seq_printf(m, "total: %d\n", count);
+	seq_printf(m, "total: %d\n", total);
 
 	mutex_unlock(&dev->struct_mutex);
 
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 9fe1274cfe59..5c7b89bd138d 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1893,7 +1893,7 @@ struct drm_i915_gem_object {
 	/** Used in execbuf to temporarily hold a ref */
 	struct list_head obj_exec_link;
 
-	struct list_head batch_pool_list;
+	struct list_head batch_pool_link;
 
 	/**
 	 * This is set if the object is on the active lists (has pending
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index efb5545251c7..a0c35f80836c 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -4439,7 +4439,7 @@ void i915_gem_object_init(struct drm_i915_gem_object *obj,
 	INIT_LIST_HEAD(&obj->ring_list);
 	INIT_LIST_HEAD(&obj->obj_exec_link);
 	INIT_LIST_HEAD(&obj->vma_list);
-	INIT_LIST_HEAD(&obj->batch_pool_list);
+	INIT_LIST_HEAD(&obj->batch_pool_link);
 
 	obj->ops = ops;
 
diff --git a/drivers/gpu/drm/i915/i915_gem_batch_pool.c b/drivers/gpu/drm/i915/i915_gem_batch_pool.c
index 1287abf55b84..23374352d8eb 100644
--- a/drivers/gpu/drm/i915/i915_gem_batch_pool.c
+++ b/drivers/gpu/drm/i915/i915_gem_batch_pool.c
@@ -47,8 +47,12 @@
 void i915_gem_batch_pool_init(struct drm_device *dev,
 			      struct i915_gem_batch_pool *pool)
 {
+	int n;
+
 	pool->dev = dev;
-	INIT_LIST_HEAD(&pool->cache_list);
+
+	for (n = 0; n < ARRAY_SIZE(pool->cache_list); n++)
+		INIT_LIST_HEAD(&pool->cache_list[n]);
 }
 
 /**
@@ -59,16 +63,20 @@ void i915_gem_batch_pool_init(struct drm_device *dev,
  */
 void i915_gem_batch_pool_fini(struct i915_gem_batch_pool *pool)
 {
+	int n;
+
 	WARN_ON(!mutex_is_locked(&pool->dev->struct_mutex));
 
-	while (!list_empty(&pool->cache_list)) {
-		struct drm_i915_gem_object *obj =
-			list_first_entry(&pool->cache_list,
-					 struct drm_i915_gem_object,
-					 batch_pool_list);
+	for (n = 0; n < ARRAY_SIZE(pool->cache_list); n++) {
+		while (!list_empty(&pool->cache_list[n])) {
+			struct drm_i915_gem_object *obj =
+				list_first_entry(&pool->cache_list[n],
+						 struct drm_i915_gem_object,
+						 batch_pool_link);
 
-		list_del(&obj->batch_pool_list);
-		drm_gem_object_unreference(&obj->base);
+			list_del(&obj->batch_pool_link);
+			drm_gem_object_unreference(&obj->base);
+		}
 	}
 }
 
@@ -91,28 +99,29 @@ i915_gem_batch_pool_get(struct i915_gem_batch_pool *pool,
 {
 	struct drm_i915_gem_object *obj = NULL;
 	struct drm_i915_gem_object *tmp, *next;
+	struct list_head *list;
+	int n;
 
 	WARN_ON(!mutex_is_locked(&pool->dev->struct_mutex));
 
-	list_for_each_entry_safe(tmp, next,
-				 &pool->cache_list, batch_pool_list) {
+	n = fls(size >> PAGE_SHIFT) - 1;
+	if (n >= ARRAY_SIZE(pool->cache_list))
+		n = ARRAY_SIZE(pool->cache_list) - 1;
+	list = &pool->cache_list[n];
+
+	list_for_each_entry_safe(tmp, next, list, batch_pool_link) {
 		/* The batches are strictly LRU ordered */
 		if (tmp->active)
 			break;
 
 		/* While we're looping, do some clean up */
 		if (tmp->madv == __I915_MADV_PURGED) {
-			list_del(&tmp->batch_pool_list);
+			list_del(&tmp->batch_pool_link);
 			drm_gem_object_unreference(&tmp->base);
 			continue;
 		}
 
-		/*
-		 * Select a buffer that is at least as big as needed
-		 * but not 'too much' bigger. A better way to do this
-		 * might be to bucket the pool objects based on size.
-		 */
-		if (tmp->base.size >= size && tmp->base.size <= 2 * size) {
+		if (tmp->base.size >= size) {
 			obj = tmp;
 			break;
 		}
@@ -132,7 +141,7 @@ i915_gem_batch_pool_get(struct i915_gem_batch_pool *pool,
 		obj->madv = I915_MADV_DONTNEED;
 	}
 
-	list_move_tail(&obj->batch_pool_list, &pool->cache_list);
+	list_move_tail(&obj->batch_pool_link, list);
 	i915_gem_object_pin_pages(obj);
 	return obj;
 }
diff --git a/drivers/gpu/drm/i915/i915_gem_batch_pool.h b/drivers/gpu/drm/i915/i915_gem_batch_pool.h
index 5ed70ef6a887..848e90703eed 100644
--- a/drivers/gpu/drm/i915/i915_gem_batch_pool.h
+++ b/drivers/gpu/drm/i915/i915_gem_batch_pool.h
@@ -29,7 +29,7 @@
 
 struct i915_gem_batch_pool {
 	struct drm_device *dev;
-	struct list_head cache_list;
+	struct list_head cache_list[4];
 };
 
 /* i915_gem_batch_pool.c */
-- 
2.1.4

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 32+ messages in thread

* [PATCH 6/6] drm/i915: Include active flag when describing objects in debugfs
  2015-03-09  9:55 [PATCH 1/6] drm/i915: Split i915_gem_batch_pool into its own header Chris Wilson
                   ` (3 preceding siblings ...)
  2015-03-09  9:55 ` [PATCH 5/6] drm/i915: Split batch pool into size buckets Chris Wilson
@ 2015-03-09  9:55 ` Chris Wilson
  2015-03-09 14:59   ` shuang.he
  2015-03-19 17:41   ` Tvrtko Ursulin
  2015-03-19 17:37 ` [PATCH 1/6] drm/i915: Split i915_gem_batch_pool into its own header Tvrtko Ursulin
  5 siblings, 2 replies; 32+ messages in thread
From: Chris Wilson @ 2015-03-09  9:55 UTC (permalink / raw)
  To: intel-gfx

Since we use obj->active as a hint in many places throughout the code,
knowing its state in debugfs is extremely useful.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_debugfs.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index 042ad2fec484..809f6eadc10c 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -123,8 +123,9 @@ describe_obj(struct seq_file *m, struct drm_i915_gem_object *obj)
 	struct i915_vma *vma;
 	int pin_count = 0;
 
-	seq_printf(m, "%pK: %s%s%s %8zdKiB %02x %02x %x %x %x%s%s%s",
+	seq_printf(m, "%pK: %s%s%s%s %8zdKiB %02x %02x %x %x %x%s%s%s",
 		   &obj->base,
+		   obj->active ? "*" : " ",
 		   get_pin_flag(obj),
 		   get_tiling_flag(obj),
 		   get_global_flag(obj),
-- 
2.1.4

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 32+ messages in thread

* Re: [PATCH 6/6] drm/i915: Include active flag when describing objects in debugfs
  2015-03-09  9:55 ` [PATCH 6/6] drm/i915: Include active flag when describing objects in debugfs Chris Wilson
@ 2015-03-09 14:59   ` shuang.he
  2015-03-19 17:41   ` Tvrtko Ursulin
  1 sibling, 0 replies; 32+ messages in thread
From: shuang.he @ 2015-03-09 14:59 UTC (permalink / raw)
  To: shuang.he, ethan.gao, intel-gfx, chris

Tested-By: PRC QA PRTS (Patch Regression Test System Contact: shuang.he@intel.com)
Task id: 5916
-------------------------------------Summary-------------------------------------
Platform          Delta          drm-intel-nightly          Series Applied
PNV                 -2              282/282              280/282
ILK                                  308/308              308/308
SNB                                  307/307              307/307
IVB                 -2              375/375              373/375
BYT                 -2              294/294              292/294
HSW                 -3              385/385              382/385
BDW                                  315/315              315/315
-------------------------------------Detailed-------------------------------------
Platform  Test                                drm-intel-nightly          Series Applied
*PNV  igt_drv_debugfs_reader      PASS(2)      DMESG_WARN(1)PASS(1)
*PNV  igt_drv_hangman_error-state-sysfs-entry      PASS(2)      TIMEOUT(1)PASS(1)
*IVB  igt_gem_flink_race_flink_close      PASS(2)      FAIL(2)
*IVB  igt_prime_self_import_reimport-vs-gem_close-race      PASS(2)      FAIL(2)
*BYT  igt_gem_flink_race_flink_close      PASS(2)      FAIL(2)
*BYT  igt_prime_self_import_reimport-vs-gem_close-race      PASS(2)      FAIL(2)
*HSW  igt_gem_flink_race_flink_close      PASS(2)      FAIL(2)
*HSW  igt_gem_storedw_loop_bsd      PASS(2)      DMESG_WARN(1)PASS(1)
*HSW  igt_prime_self_import_reimport-vs-gem_close-race      PASS(2)      FAIL(2)
Note: You need to pay more attention to line start with '*'
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCH 2/6] drm/i915: Tidy batch pool logic
  2015-03-09  9:55 ` [PATCH 2/6] drm/i915: Tidy batch pool logic Chris Wilson
@ 2015-03-17 17:42   ` Tvrtko Ursulin
  2015-03-17 20:53     ` Chris Wilson
  0 siblings, 1 reply; 32+ messages in thread
From: Tvrtko Ursulin @ 2015-03-17 17:42 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


Hi,

Tidy is small understatement but OK.. :) inline..

On 03/09/2015 09:55 AM, Chris Wilson wrote:
> Move the madvise logic out of the execbuffer main path into the
> relatively rare allocation path, making the execbuffer manipulation less
> fragile.
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>   drivers/gpu/drm/i915/i915_cmd_parser.c     | 12 +++------
>   drivers/gpu/drm/i915/i915_gem_batch_pool.c | 39 +++++++++++++++---------------
>   drivers/gpu/drm/i915/i915_gem_execbuffer.c | 13 ++++------
>   3 files changed, 27 insertions(+), 37 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c b/drivers/gpu/drm/i915/i915_cmd_parser.c
> index 9a6da3536ae5..3e5e8cb54a88 100644
> --- a/drivers/gpu/drm/i915/i915_cmd_parser.c
> +++ b/drivers/gpu/drm/i915/i915_cmd_parser.c
> @@ -866,6 +866,9 @@ static u32 *copy_batch(struct drm_i915_gem_object *dest_obj,
>   	    batch_len + batch_start_offset > src_obj->base.size)
>   		return ERR_PTR(-E2BIG);
>
> +	if (WARN_ON(dest_obj->pages_pin_count == 0))
> +		return ERR_PTR(-ENODEV);
> +
>   	ret = i915_gem_obj_prepare_shmem_read(src_obj, &needs_clflush);
>   	if (ret) {
>   		DRM_DEBUG_DRIVER("CMD: failed to prepare shadow batch\n");
> @@ -879,13 +882,6 @@ static u32 *copy_batch(struct drm_i915_gem_object *dest_obj,
>   		goto unpin_src;
>   	}
>
> -	ret = i915_gem_object_get_pages(dest_obj);
> -	if (ret) {
> -		DRM_DEBUG_DRIVER("CMD: Failed to get pages for shadow batch\n");
> -		goto unmap_src;
> -	}
> -	i915_gem_object_pin_pages(dest_obj);
> -
>   	ret = i915_gem_object_set_to_cpu_domain(dest_obj, true);
>   	if (ret) {
>   		DRM_DEBUG_DRIVER("CMD: Failed to set shadow batch to CPU\n");
> @@ -895,7 +891,6 @@ static u32 *copy_batch(struct drm_i915_gem_object *dest_obj,
>   	dst = vmap_batch(dest_obj, 0, batch_len);
>   	if (!dst) {
>   		DRM_DEBUG_DRIVER("CMD: Failed to vmap shadow batch\n");
> -		i915_gem_object_unpin_pages(dest_obj);
>   		ret = -ENOMEM;
>   		goto unmap_src;
>   	}

My tree, or maybe the current tree, has another unpin under unpin_src 
label, so perhaps you'll need to rebase?

> @@ -1126,7 +1121,6 @@ int i915_parse_cmds(struct intel_engine_cs *ring,
>   	}
>
>   	vunmap(batch_base);
> -	i915_gem_object_unpin_pages(shadow_batch_obj);
>
>   	return ret;
>   }
> diff --git a/drivers/gpu/drm/i915/i915_gem_batch_pool.c b/drivers/gpu/drm/i915/i915_gem_batch_pool.c
> index 564be7c5ea7e..21f3356cc0ab 100644
> --- a/drivers/gpu/drm/i915/i915_gem_batch_pool.c
> +++ b/drivers/gpu/drm/i915/i915_gem_batch_pool.c
> @@ -67,25 +67,23 @@ void i915_gem_batch_pool_fini(struct i915_gem_batch_pool *pool)
>   					 struct drm_i915_gem_object,
>   					 batch_pool_list);
>
> -		WARN_ON(obj->active);
> -
> -		list_del_init(&obj->batch_pool_list);
> +		list_del(&obj->batch_pool_list);
>   		drm_gem_object_unreference(&obj->base);
>   	}
>   }
>
>   /**
> - * i915_gem_batch_pool_get() - select a buffer from the pool
> + * i915_gem_batch_pool_get() - allocate a buffer from the pool
>    * @pool: the batch buffer pool
>    * @size: the minimum desired size of the returned buffer
>    *
> - * Finds or allocates a batch buffer in the pool with at least the requested
> - * size. The caller is responsible for any domain, active/inactive, or
> - * purgeability management for the returned buffer.
> + * Returns an inactive buffer from @pool with at least @size bytes,
> + * with the pages pinned. The caller must i915_gem_object_unpin_pages()
> + * on the returned object.
>    *
>    * Note: Callers must hold the struct_mutex
>    *
> - * Return: the selected batch buffer object
> + * Return: the buffer object or an error pointer
>    */
>   struct drm_i915_gem_object *
>   i915_gem_batch_pool_get(struct i915_gem_batch_pool *pool,
> @@ -97,8 +95,7 @@ i915_gem_batch_pool_get(struct i915_gem_batch_pool *pool,
>   	WARN_ON(!mutex_is_locked(&pool->dev->struct_mutex));
>
>   	list_for_each_entry_safe(tmp, next,
> -			&pool->cache_list, batch_pool_list) {
> -
> +				 &pool->cache_list, batch_pool_list) {
>   		if (tmp->active)
>   			continue;
>
> @@ -114,25 +111,27 @@ i915_gem_batch_pool_get(struct i915_gem_batch_pool *pool,
>   		 * but not 'too much' bigger. A better way to do this
>   		 * might be to bucket the pool objects based on size.
>   		 */
> -		if (tmp->base.size >= size &&
> -		    tmp->base.size <= (2 * size)) {
> +		if (tmp->base.size >= size && tmp->base.size <= 2 * size) {
>   			obj = tmp;
>   			break;
>   		}
>   	}
>
> -	if (!obj) {
> +	if (obj == NULL) {
> +		int ret;
> +
>   		obj = i915_gem_alloc_object(pool->dev, size);
> -		if (!obj)
> +		if (obj == NULL)
>   			return ERR_PTR(-ENOMEM);
>
> -		list_add_tail(&obj->batch_pool_list, &pool->cache_list);
> -	}
> -	else
> -		/* Keep list in LRU order */
> -		list_move_tail(&obj->batch_pool_list, &pool->cache_list);
> +		ret = i915_gem_object_get_pages(obj);
> +		if (ret)
> +			return ERR_PTR(ret);
>
> -	obj->madv = I915_MADV_WILLNEED;
> +		obj->madv = I915_MADV_DONTNEED;
> +	}
>
> +	list_move_tail(&obj->batch_pool_list, &pool->cache_list);
> +	i915_gem_object_pin_pages(obj);
>   	return obj;
>   }
> diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> index 58537079d3d7..36f8f05928d1 100644
> --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> @@ -37,7 +37,6 @@
>   #define  __EXEC_OBJECT_HAS_FENCE (1<<30)
>   #define  __EXEC_OBJECT_NEEDS_MAP (1<<29)
>   #define  __EXEC_OBJECT_NEEDS_BIAS (1<<28)
> -#define  __EXEC_OBJECT_PURGEABLE (1<<27)
>
>   #define BATCH_OFFSET_BIAS (256*1024)
>
> @@ -224,12 +223,7 @@ i915_gem_execbuffer_unreserve_vma(struct i915_vma *vma)
>   	if (entry->flags & __EXEC_OBJECT_HAS_PIN)
>   		vma->pin_count--;
>
> -	if (entry->flags & __EXEC_OBJECT_PURGEABLE)
> -		obj->madv = I915_MADV_DONTNEED;
> -
> -	entry->flags &= ~(__EXEC_OBJECT_HAS_FENCE |
> -			  __EXEC_OBJECT_HAS_PIN |
> -			  __EXEC_OBJECT_PURGEABLE);
> +	entry->flags &= ~(__EXEC_OBJECT_HAS_FENCE | __EXEC_OBJECT_HAS_PIN);
>   }
>
>   static void eb_destroy(struct eb_vmas *eb)
> @@ -1181,11 +1175,13 @@ i915_gem_execbuffer_parse(struct intel_engine_cs *ring,
>   	if (ret)
>   		goto err;
>
> +	i915_gem_object_unpin_pages(shadow_batch_obj);
> +
>   	memset(shadow_exec_entry, 0, sizeof(*shadow_exec_entry));
>
>   	vma = i915_gem_obj_to_ggtt(shadow_batch_obj);
>   	vma->exec_entry = shadow_exec_entry;
> -	vma->exec_entry->flags = __EXEC_OBJECT_PURGEABLE | __EXEC_OBJECT_HAS_PIN;
> +	vma->exec_entry->flags = __EXEC_OBJECT_HAS_PIN;
>   	drm_gem_object_reference(&shadow_batch_obj->base);
>   	list_add_tail(&vma->exec_list, &eb->vmas);
>
> @@ -1194,6 +1190,7 @@ i915_gem_execbuffer_parse(struct intel_engine_cs *ring,
>   	return shadow_batch_obj;
>
>   err:
> +	i915_gem_object_unpin_pages(shadow_batch_obj);
>   	if (ret == -EACCES) /* unhandled chained batch */
>   		return batch_obj;
>   	else
>

Lets see if I got it right: By making the returned object pinned and 
having the caller unpin when done with it, it now can always be in 
"dontneed" state, and will be a shrinker candidate only (or as soon) 
when it is unpinned, correct?

I just don't get at the moment, if previously there was some race or bad 
interaction with less pinning and that signaling using 
_EXEC_OBJECT_PURGEABLE?

Regards,

Tvrtko

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCH 2/6] drm/i915: Tidy batch pool logic
  2015-03-17 17:42   ` Tvrtko Ursulin
@ 2015-03-17 20:53     ` Chris Wilson
  0 siblings, 0 replies; 32+ messages in thread
From: Chris Wilson @ 2015-03-17 20:53 UTC (permalink / raw)
  To: Tvrtko Ursulin; +Cc: intel-gfx

On Tue, Mar 17, 2015 at 05:42:12PM +0000, Tvrtko Ursulin wrote:
> Lets see if I got it right: By making the returned object pinned and
> having the caller unpin when done with it, it now can always be in
> "dontneed" state, and will be a shrinker candidate only (or as soon)
> when it is unpinned, correct?
> 
> I just don't get at the moment, if previously there was some race or
> bad interaction with less pinning and that signaling using
> _EXEC_OBJECT_PURGEABLE?

The code was using overzealous pinning (too much too early), did too
much work manipulating tricky state and introduced an unneeded flag into
a user API field. It is very clumsy.
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCH 3/6] drm/i915: Split the batch pool by engine
  2015-03-09  9:55 ` [PATCH 3/6] drm/i915: Split the batch pool by engine Chris Wilson
@ 2015-03-18 16:51   ` Tvrtko Ursulin
  2015-03-18 17:27     ` Chris Wilson
  0 siblings, 1 reply; 32+ messages in thread
From: Tvrtko Ursulin @ 2015-03-18 16:51 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 03/09/2015 09:55 AM, Chris Wilson wrote:
> I woke up one morning and found 50k objects sitting in the batch pool
> and every search seemed to iterate the entire list... Painting the
> screen in oils would provide a more fluid display.
>
> One issue with the current design is that we only check for retirements
> on the current ring when preparing to submit a new batch. This means
> that we can have thousands of "active" batches on another ring that we
> have to walk over. The simplest way to avoid that is to split the pools
> per ring and then our LRU execution ordering will also ensure that the
> inactive buffers remain at the front.

Is the retire work handler not keeping up? Regularly or under some 
specific circumstances?

> v2: execlists still requires duplicate code.
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>   drivers/gpu/drm/i915/i915_debugfs.c        | 33 ++++++++++++++++++------------
>   drivers/gpu/drm/i915/i915_dma.c            |  1 -
>   drivers/gpu/drm/i915/i915_drv.h            |  8 --------
>   drivers/gpu/drm/i915/i915_gem.c            |  2 --
>   drivers/gpu/drm/i915/i915_gem_batch_pool.c |  3 ++-
>   drivers/gpu/drm/i915/i915_gem_execbuffer.c |  3 +--
>   drivers/gpu/drm/i915/intel_lrc.c           |  1 +
>   drivers/gpu/drm/i915/intel_ringbuffer.c    |  2 ++
>   drivers/gpu/drm/i915/intel_ringbuffer.h    |  8 ++++++++
>   9 files changed, 34 insertions(+), 27 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
> index d2e6475b0466..d8ca5c89647c 100644
> --- a/drivers/gpu/drm/i915/i915_debugfs.c
> +++ b/drivers/gpu/drm/i915/i915_debugfs.c
> @@ -377,13 +377,17 @@ static void print_batch_pool_stats(struct seq_file *m,
>   {
>   	struct drm_i915_gem_object *obj;
>   	struct file_stats stats;
> +	struct intel_engine_cs *ring;
> +	int i;
>
>   	memset(&stats, 0, sizeof(stats));
>
> -	list_for_each_entry(obj,
> -			    &dev_priv->mm.batch_pool.cache_list,
> -			    batch_pool_list)
> -		per_file_stats(0, obj, &stats);
> +	for_each_ring(ring, dev_priv, i) {
> +		list_for_each_entry(obj,
> +				    &ring->batch_pool.cache_list,
> +				    batch_pool_list)
> +			per_file_stats(0, obj, &stats);
> +	}
>
>   	print_file_stats(m, "batch pool", stats);
>   }
> @@ -619,21 +623,24 @@ static int i915_gem_batch_pool_info(struct seq_file *m, void *data)
>   	struct drm_device *dev = node->minor->dev;
>   	struct drm_i915_private *dev_priv = dev->dev_private;
>   	struct drm_i915_gem_object *obj;
> +	struct intel_engine_cs *ring;
>   	int count = 0;
> -	int ret;
> +	int ret, i;
>
>   	ret = mutex_lock_interruptible(&dev->struct_mutex);
>   	if (ret)
>   		return ret;
>
> -	seq_puts(m, "cache:\n");
> -	list_for_each_entry(obj,
> -			    &dev_priv->mm.batch_pool.cache_list,
> -			    batch_pool_list) {
> -		seq_puts(m, "   ");
> -		describe_obj(m, obj);
> -		seq_putc(m, '\n');
> -		count++;
> +	for_each_ring(ring, dev_priv, i) {
> +		seq_printf(m, "%s cache:\n", ring->name);
> +		list_for_each_entry(obj,
> +				    &ring->batch_pool.cache_list,
> +				    batch_pool_list) {
> +			seq_puts(m, "   ");
> +			describe_obj(m, obj);
> +			seq_putc(m, '\n');
> +			count++;
> +		}
>   	}
>
>   	seq_printf(m, "total: %d\n", count);
> diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
> index 8e914303b831..70e050ac02b4 100644
> --- a/drivers/gpu/drm/i915/i915_dma.c
> +++ b/drivers/gpu/drm/i915/i915_dma.c
> @@ -1062,7 +1062,6 @@ int i915_driver_unload(struct drm_device *dev)
>
>   	mutex_lock(&dev->struct_mutex);
>   	i915_gem_cleanup_ringbuffer(dev);
> -	i915_gem_batch_pool_fini(&dev_priv->mm.batch_pool);
>   	i915_gem_context_fini(dev);
>   	mutex_unlock(&dev->struct_mutex);
>   	i915_gem_cleanup_stolen(dev);
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index b62e3c478221..9fe1274cfe59 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -37,7 +37,6 @@
>   #include "intel_bios.h"
>   #include "intel_ringbuffer.h"
>   #include "intel_lrc.h"
> -#include "i915_gem_batch_pool.h"
>   #include "i915_gem_gtt.h"
>   #include "i915_gem_render_state.h"
>   #include <linux/io-mapping.h>
> @@ -1149,13 +1148,6 @@ struct i915_gem_mm {
>   	 */
>   	struct list_head unbound_list;
>
> -	/*
> -	 * A pool of objects to use as shadow copies of client batch buffers
> -	 * when the command parser is enabled. Prevents the client from
> -	 * modifying the batch contents after software parsing.
> -	 */
> -	struct i915_gem_batch_pool batch_pool;
> -
>   	/** Usable portion of the GTT for GEM */
>   	unsigned long stolen_base; /* limited to low memory (32-bit) */
>
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index 4d8fb3f4a7a1..9f33005bdfd1 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -5039,8 +5039,6 @@ i915_gem_load(struct drm_device *dev)
>   	dev_priv->mm.oom_notifier.notifier_call = i915_gem_shrinker_oom;
>   	register_oom_notifier(&dev_priv->mm.oom_notifier);
>
> -	i915_gem_batch_pool_init(dev, &dev_priv->mm.batch_pool);
> -
>   	mutex_init(&dev_priv->fb_tracking.lock);
>   }
>
> diff --git a/drivers/gpu/drm/i915/i915_gem_batch_pool.c b/drivers/gpu/drm/i915/i915_gem_batch_pool.c
> index 21f3356cc0ab..1287abf55b84 100644
> --- a/drivers/gpu/drm/i915/i915_gem_batch_pool.c
> +++ b/drivers/gpu/drm/i915/i915_gem_batch_pool.c
> @@ -96,8 +96,9 @@ i915_gem_batch_pool_get(struct i915_gem_batch_pool *pool,
>
>   	list_for_each_entry_safe(tmp, next,
>   				 &pool->cache_list, batch_pool_list) {
> +		/* The batches are strictly LRU ordered */
>   		if (tmp->active)
> -			continue;
> +			break;

This hunk would be a better fit for 2/6, correct?

>   		/* While we're looping, do some clean up */
>   		if (tmp->madv == __I915_MADV_PURGED) {
> diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> index 36f8f05928d1..8129a8c75a21 100644
> --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> @@ -1152,12 +1152,11 @@ i915_gem_execbuffer_parse(struct intel_engine_cs *ring,
>   			  u32 batch_len,
>   			  bool is_master)
>   {
> -	struct drm_i915_private *dev_priv = to_i915(batch_obj->base.dev);
>   	struct drm_i915_gem_object *shadow_batch_obj;
>   	struct i915_vma *vma;
>   	int ret;
>
> -	shadow_batch_obj = i915_gem_batch_pool_get(&dev_priv->mm.batch_pool,
> +	shadow_batch_obj = i915_gem_batch_pool_get(&ring->batch_pool,
>   						   PAGE_ALIGN(batch_len));
>   	if (IS_ERR(shadow_batch_obj))
>   		return shadow_batch_obj;
> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> index fcb074bd55dc..a6d4d5502188 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/intel_lrc.c
> @@ -1421,6 +1421,7 @@ static int logical_ring_init(struct drm_device *dev, struct intel_engine_cs *rin
>   	ring->dev = dev;
>   	INIT_LIST_HEAD(&ring->active_list);
>   	INIT_LIST_HEAD(&ring->request_list);
> +	i915_gem_batch_pool_init(dev, &ring->batch_pool);
>   	init_waitqueue_head(&ring->irq_queue);
>
>   	INIT_LIST_HEAD(&ring->execlist_queue);


Cleanup part for _lrc ?

Regards,

Tvrtko

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCH 3/6] drm/i915: Split the batch pool by engine
  2015-03-18 16:51   ` Tvrtko Ursulin
@ 2015-03-18 17:27     ` Chris Wilson
  2015-03-18 17:33       ` Tvrtko Ursulin
  0 siblings, 1 reply; 32+ messages in thread
From: Chris Wilson @ 2015-03-18 17:27 UTC (permalink / raw)
  To: Tvrtko Ursulin; +Cc: intel-gfx

On Wed, Mar 18, 2015 at 04:51:58PM +0000, Tvrtko Ursulin wrote:
> 
> On 03/09/2015 09:55 AM, Chris Wilson wrote:
> >I woke up one morning and found 50k objects sitting in the batch pool
> >and every search seemed to iterate the entire list... Painting the
> >screen in oils would provide a more fluid display.
> >
> >One issue with the current design is that we only check for retirements
> >on the current ring when preparing to submit a new batch. This means
> >that we can have thousands of "active" batches on another ring that we
> >have to walk over. The simplest way to avoid that is to split the pools
> >per ring and then our LRU execution ordering will also ensure that the
> >inactive buffers remain at the front.
> 
> Is the retire work handler not keeping up? Regularly or under some
> specific circumstances?

The retire_work handler that runs at ~1Hz? Yes, it fails to keep up.
Which is why we explicitly run retire_ring before each execbuffer.

> >diff --git a/drivers/gpu/drm/i915/i915_gem_batch_pool.c b/drivers/gpu/drm/i915/i915_gem_batch_pool.c
> >index 21f3356cc0ab..1287abf55b84 100644
> >--- a/drivers/gpu/drm/i915/i915_gem_batch_pool.c
> >+++ b/drivers/gpu/drm/i915/i915_gem_batch_pool.c
> >@@ -96,8 +96,9 @@ i915_gem_batch_pool_get(struct i915_gem_batch_pool *pool,
> >
> >  	list_for_each_entry_safe(tmp, next,
> >  				 &pool->cache_list, batch_pool_list) {
> >+		/* The batches are strictly LRU ordered */
> >  		if (tmp->active)
> >-			continue;
> >+			break;
> 
> This hunk would be a better fit for 2/6, correct?

No. The explanation is given by the comment + changelog.

> >diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> >index fcb074bd55dc..a6d4d5502188 100644
> >--- a/drivers/gpu/drm/i915/intel_lrc.c
> >+++ b/drivers/gpu/drm/i915/intel_lrc.c
> >@@ -1421,6 +1421,7 @@ static int logical_ring_init(struct drm_device *dev, struct intel_engine_cs *rin
> >  	ring->dev = dev;
> >  	INIT_LIST_HEAD(&ring->active_list);
> >  	INIT_LIST_HEAD(&ring->request_list);
> >+	i915_gem_batch_pool_init(dev, &ring->batch_pool);
> >  	init_waitqueue_head(&ring->irq_queue);
> >
> >  	INIT_LIST_HEAD(&ring->execlist_queue);
> 
> 
> Cleanup part for _lrc ?

Probably. Becomes redundant later anyway.
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCH 3/6] drm/i915: Split the batch pool by engine
  2015-03-18 17:27     ` Chris Wilson
@ 2015-03-18 17:33       ` Tvrtko Ursulin
  2015-03-19  9:36         ` Tvrtko Ursulin
  0 siblings, 1 reply; 32+ messages in thread
From: Tvrtko Ursulin @ 2015-03-18 17:33 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 03/18/2015 05:27 PM, Chris Wilson wrote:
> On Wed, Mar 18, 2015 at 04:51:58PM +0000, Tvrtko Ursulin wrote:
>> On 03/09/2015 09:55 AM, Chris Wilson wrote:
>>> diff --git a/drivers/gpu/drm/i915/i915_gem_batch_pool.c b/drivers/gpu/drm/i915/i915_gem_batch_pool.c
>>> index 21f3356cc0ab..1287abf55b84 100644
>>> --- a/drivers/gpu/drm/i915/i915_gem_batch_pool.c
>>> +++ b/drivers/gpu/drm/i915/i915_gem_batch_pool.c
>>> @@ -96,8 +96,9 @@ i915_gem_batch_pool_get(struct i915_gem_batch_pool *pool,
>>>
>>>   	list_for_each_entry_safe(tmp, next,
>>>   				 &pool->cache_list, batch_pool_list) {
>>> +		/* The batches are strictly LRU ordered */
>>>   		if (tmp->active)
>>> -			continue;
>>> +			break;
>>
>> This hunk would be a better fit for 2/6, correct?
>
> No. The explanation is given by the comment + changelog.

I don't see this patch introducing this strict LRU ordering, rather it 
was there before this patch. Am I missing something? If I am not, then I 
see this hunk as a better fit with "Tidy batch pool logic", than "Split 
the batch pool by engine".

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCH 3/6] drm/i915: Split the batch pool by engine
  2015-03-18 17:33       ` Tvrtko Ursulin
@ 2015-03-19  9:36         ` Tvrtko Ursulin
  2015-03-19 10:06           ` Chris Wilson
  0 siblings, 1 reply; 32+ messages in thread
From: Tvrtko Ursulin @ 2015-03-19  9:36 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 03/18/2015 05:33 PM, Tvrtko Ursulin wrote:
>
> On 03/18/2015 05:27 PM, Chris Wilson wrote:
>> On Wed, Mar 18, 2015 at 04:51:58PM +0000, Tvrtko Ursulin wrote:
>>> On 03/09/2015 09:55 AM, Chris Wilson wrote:
>>>> diff --git a/drivers/gpu/drm/i915/i915_gem_batch_pool.c
>>>> b/drivers/gpu/drm/i915/i915_gem_batch_pool.c
>>>> index 21f3356cc0ab..1287abf55b84 100644
>>>> --- a/drivers/gpu/drm/i915/i915_gem_batch_pool.c
>>>> +++ b/drivers/gpu/drm/i915/i915_gem_batch_pool.c
>>>> @@ -96,8 +96,9 @@ i915_gem_batch_pool_get(struct i915_gem_batch_pool
>>>> *pool,
>>>>
>>>>       list_for_each_entry_safe(tmp, next,
>>>>                    &pool->cache_list, batch_pool_list) {
>>>> +        /* The batches are strictly LRU ordered */
>>>>           if (tmp->active)
>>>> -            continue;
>>>> +            break;
>>>
>>> This hunk would be a better fit for 2/6, correct?
>>
>> No. The explanation is given by the comment + changelog.
>
> I don't see this patch introducing this strict LRU ordering, rather it
> was there before this patch. Am I missing something? If I am not, then I
> see this hunk as a better fit with "Tidy batch pool logic", than "Split
> the batch pool by engine".

Oh right, I got it, but not sure I like it that much. I don't think 
batch pool implementation is well enough decoupled from the users. Well 
in a way at least where when we talk about LRU ordering, it depends on 
retiring working properly and that is not obvious from code layout and 
module separation.

And then with this me move traversal inefficiency to possible more 
resource use. Would it be better to fix the cause rather than symptoms? 
Is it feasible? What would be the downside of retiring all rings before 
submission?

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCH 3/6] drm/i915: Split the batch pool by engine
  2015-03-19  9:36         ` Tvrtko Ursulin
@ 2015-03-19 10:06           ` Chris Wilson
  2015-03-19 11:39             ` Tvrtko Ursulin
  0 siblings, 1 reply; 32+ messages in thread
From: Chris Wilson @ 2015-03-19 10:06 UTC (permalink / raw)
  To: Tvrtko Ursulin; +Cc: intel-gfx

On Thu, Mar 19, 2015 at 09:36:14AM +0000, Tvrtko Ursulin wrote:
> Oh right, I got it, but not sure I like it that much. I don't think
> batch pool implementation is well enough decoupled from the users.

"batch pool" Splitting it this way actually improves decoupling.

> Well in a way at least where when we talk about LRU ordering, it
> depends on retiring working properly and that is not obvious from
> code layout and module separation.

I've lost you. The list is in LRU submission order. With this split, the
list is both in LRU submission and LRU retirememnt order. That the two
are not the same originally is not a fault of retiring not working
properly, but that the hardware is split into different units and
timelines.
 
> And then with this me move traversal inefficiency to possible more
> resource use. Would it be better to fix the cause rather than
> symptoms? Is it feasible? What would be the downside of retiring all
> rings before submission?

Not really. Inefficient userspace is inefficient. All we want to be sure
is that one abusive client doesn't cause a DoS on another, whilst making
sure that good clients are not penalized.
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCH 3/6] drm/i915: Split the batch pool by engine
  2015-03-19 10:06           ` Chris Wilson
@ 2015-03-19 11:39             ` Tvrtko Ursulin
  2015-03-19 11:46               ` Chris Wilson
  0 siblings, 1 reply; 32+ messages in thread
From: Tvrtko Ursulin @ 2015-03-19 11:39 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx

On 03/19/2015 10:06 AM, Chris Wilson wrote:
> On Thu, Mar 19, 2015 at 09:36:14AM +0000, Tvrtko Ursulin wrote:
>> Oh right, I got it, but not sure I like it that much. I don't think
>> batch pool implementation is well enough decoupled from the users.
>
> "batch pool" Splitting it this way actually improves decoupling.

Of the resulting system yes, of the actual modules no in my opinion.

>> Well in a way at least where when we talk about LRU ordering, it
>> depends on retiring working properly and that is not obvious from
>> code layout and module separation.
>
> I've lost you. The list is in LRU submission order. With this split, the
> list is both in LRU submission and LRU retirememnt order. That the two
> are not the same originally is not a fault of retiring not working
> properly, but that the hardware is split into different units and
> timelines.
>
>> And then with this me move traversal inefficiency to possible more
>> resource use. Would it be better to fix the cause rather than
>> symptoms? Is it feasible? What would be the downside of retiring all
>> rings before submission?
>
> Not really. Inefficient userspace is inefficient. All we want to be sure
> is that one abusive client doesn't cause a DoS on another, whilst making
> sure that good clients are not penalized.

Not sure to which of my question your "not really" was the answer.

I understood that this is about the completed work which hasn't been 
retired due the latter only happening on submission to the same ring, or 
with too low frequency from retire work handler.

If this is true, could we just not do a retire pass on all rings on any 
submission?

Otherwise, how do per ring pools help abusive and good client on the 
same ring?

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCH 3/6] drm/i915: Split the batch pool by engine
  2015-03-19 11:39             ` Tvrtko Ursulin
@ 2015-03-19 11:46               ` Chris Wilson
  2015-03-19 11:58                 ` Tvrtko Ursulin
  0 siblings, 1 reply; 32+ messages in thread
From: Chris Wilson @ 2015-03-19 11:46 UTC (permalink / raw)
  To: Tvrtko Ursulin; +Cc: intel-gfx

On Thu, Mar 19, 2015 at 11:39:16AM +0000, Tvrtko Ursulin wrote:
> On 03/19/2015 10:06 AM, Chris Wilson wrote:
> >On Thu, Mar 19, 2015 at 09:36:14AM +0000, Tvrtko Ursulin wrote:
> >>Well in a way at least where when we talk about LRU ordering, it
> >>depends on retiring working properly and that is not obvious from
> >>code layout and module separation.
> >
> >I've lost you. The list is in LRU submission order. With this split, the
> >list is both in LRU submission and LRU retirememnt order. That the two
> >are not the same originally is not a fault of retiring not working
> >properly, but that the hardware is split into different units and
> >timelines.
> >
> >>And then with this me move traversal inefficiency to possible more
> >>resource use. Would it be better to fix the cause rather than
> >>symptoms? Is it feasible? What would be the downside of retiring all
> >>rings before submission?
> >
> >Not really. Inefficient userspace is inefficient. All we want to be sure
> >is that one abusive client doesn't cause a DoS on another, whilst making
> >sure that good clients are not penalized.
> 
> Not sure to which of my question your "not really" was the answer.

We do "fix" the cause later, and I've amended the throttling in mesa to
prevent a reoccurrence.  So I was thinking of why we only retire on
the current ring.
 
> I understood that this is about the completed work which hasn't been
> retired due the latter only happening on submission to the same
> ring, or with too low frequency from retire work handler.
> 
> If this is true, could we just not do a retire pass on all rings on
> any submission?

No. The problem is that rings retire out of order. So a global LRU
submission list is not strictly separated between inactive and active
objects (in contrast to the per-engine list where it is true).
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCH 3/6] drm/i915: Split the batch pool by engine
  2015-03-19 11:46               ` Chris Wilson
@ 2015-03-19 11:58                 ` Tvrtko Ursulin
  2015-03-19 12:04                   ` Chris Wilson
  0 siblings, 1 reply; 32+ messages in thread
From: Tvrtko Ursulin @ 2015-03-19 11:58 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 03/19/2015 11:46 AM, Chris Wilson wrote:
> On Thu, Mar 19, 2015 at 11:39:16AM +0000, Tvrtko Ursulin wrote:
>> On 03/19/2015 10:06 AM, Chris Wilson wrote:
>>> On Thu, Mar 19, 2015 at 09:36:14AM +0000, Tvrtko Ursulin wrote:
>>>> Well in a way at least where when we talk about LRU ordering, it
>>>> depends on retiring working properly and that is not obvious from
>>>> code layout and module separation.
>>>
>>> I've lost you. The list is in LRU submission order. With this split, the
>>> list is both in LRU submission and LRU retirememnt order. That the two
>>> are not the same originally is not a fault of retiring not working
>>> properly, but that the hardware is split into different units and
>>> timelines.
>>>
>>>> And then with this me move traversal inefficiency to possible more
>>>> resource use. Would it be better to fix the cause rather than
>>>> symptoms? Is it feasible? What would be the downside of retiring all
>>>> rings before submission?
>>>
>>> Not really. Inefficient userspace is inefficient. All we want to be sure
>>> is that one abusive client doesn't cause a DoS on another, whilst making
>>> sure that good clients are not penalized.
>>
>> Not sure to which of my question your "not really" was the answer.
>
> We do "fix" the cause later, and I've amended the throttling in mesa to
> prevent a reoccurrence.  So I was thinking of why we only retire on
> the current ring.
>
>> I understood that this is about the completed work which hasn't been
>> retired due the latter only happening on submission to the same
>> ring, or with too low frequency from retire work handler.
>>
>> If this is true, could we just not do a retire pass on all rings on
>> any submission?
>
> No. The problem is that rings retire out of order. So a global LRU
> submission list is not strictly separated between inactive and active
> objects (in contrast to the per-engine list where it is true).

How about retire all rings and then the inactive batch search with a 
global pool becomes only O(num_rings) at worst? Might be worth saving 
memory resource (multiple pools) vs. trivial traversal like that?

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCH 3/6] drm/i915: Split the batch pool by engine
  2015-03-19 11:58                 ` Tvrtko Ursulin
@ 2015-03-19 12:04                   ` Chris Wilson
  2015-03-19 14:01                     ` Tvrtko Ursulin
  0 siblings, 1 reply; 32+ messages in thread
From: Chris Wilson @ 2015-03-19 12:04 UTC (permalink / raw)
  To: Tvrtko Ursulin; +Cc: intel-gfx

On Thu, Mar 19, 2015 at 11:58:17AM +0000, Tvrtko Ursulin wrote:
> How about retire all rings and then the inactive batch search with a
> global pool becomes only O(num_rings) at worst? Might be worth
> saving memory resource (multiple pools) vs. trivial traversal like
> that?

There isn't a memory resource issue here though. The pool is made out of
easily reclaimable objects, and is ultimately limited by just how many
batches can be submitted whilst the GPU is active. The principal issue
is finding a new buffer to use for the next batch. Splitting by engine
is also likely to have nice secondary effects like grouping of batch
sizes.
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCH 3/6] drm/i915: Split the batch pool by engine
  2015-03-19 12:04                   ` Chris Wilson
@ 2015-03-19 14:01                     ` Tvrtko Ursulin
  2015-03-19 14:34                       ` Chris Wilson
  0 siblings, 1 reply; 32+ messages in thread
From: Tvrtko Ursulin @ 2015-03-19 14:01 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 03/19/2015 12:04 PM, Chris Wilson wrote:
> On Thu, Mar 19, 2015 at 11:58:17AM +0000, Tvrtko Ursulin wrote:
>> How about retire all rings and then the inactive batch search with a
>> global pool becomes only O(num_rings) at worst? Might be worth
>> saving memory resource (multiple pools) vs. trivial traversal like
>> that?
>
> There isn't a memory resource issue here though. The pool is made out of
> easily reclaimable objects, and is ultimately limited by just how many
> batches can be submitted whilst the GPU is active. The principal issue
> is finding a new buffer to use for the next batch. Splitting by engine
> is also likely to have nice secondary effects like grouping of batch
> sizes.

True on the last bit, yes.

Also, I was under the wrong impression that only backing storage gets 
discarded and pool objects remain. Maybe it was like that some time back 
in some initial version, no idea now.

Anyway with this misconception cleared I agree resource problem is much 
smaller, although I still wonder how big or small exactly would be a 
difference in dynamic numbers of allocated pool batches between 
global-pool-but-fixed-inactive-lookup and per-ring-pool scenarios.

Especially since you'll later add buckets and then per ring pools with 
buckets sounds not as optimal, both from design point of view and from 
resource usage point of view, as single bucketed pool with efficient 
object lookup would be.

So I still don't like it, but it will work and behaves better than 
before, so will r-b it when you add the missing destructor.

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCH 3/6] drm/i915: Split the batch pool by engine
  2015-03-19 14:01                     ` Tvrtko Ursulin
@ 2015-03-19 14:34                       ` Chris Wilson
  0 siblings, 0 replies; 32+ messages in thread
From: Chris Wilson @ 2015-03-19 14:34 UTC (permalink / raw)
  To: Tvrtko Ursulin; +Cc: intel-gfx

On Thu, Mar 19, 2015 at 02:01:37PM +0000, Tvrtko Ursulin wrote:
> 
> On 03/19/2015 12:04 PM, Chris Wilson wrote:
> >On Thu, Mar 19, 2015 at 11:58:17AM +0000, Tvrtko Ursulin wrote:
> >>How about retire all rings and then the inactive batch search with a
> >>global pool becomes only O(num_rings) at worst? Might be worth
> >>saving memory resource (multiple pools) vs. trivial traversal like
> >>that?
> >
> >There isn't a memory resource issue here though. The pool is made out of
> >easily reclaimable objects, and is ultimately limited by just how many
> >batches can be submitted whilst the GPU is active. The principal issue
> >is finding a new buffer to use for the next batch. Splitting by engine
> >is also likely to have nice secondary effects like grouping of batch
> >sizes.
> 
> True on the last bit, yes.
> 
> Also, I was under the wrong impression that only backing storage
> gets discarded and pool objects remain. Maybe it was like that some
> time back in some initial version, no idea now.
> 
> Anyway with this misconception cleared I agree resource problem is
> much smaller, although I still wonder how big or small exactly would
> be a difference in dynamic numbers of allocated pool batches between
> global-pool-but-fixed-inactive-lookup and per-ring-pool scenarios.

My guess is that really it will only be a few buffers in it. If you have
active buffers on a particular ring, you will be likely to reuse them
again very shortly. So the only real question is how many inactive
buffers do you have on that ring that you could be using on the other
before they are reaped by the idle ring. And if you really, really wanted
you could always just search other rings, which (modulo doing the
retire) would be a quick search because of the buckets + strict
ordering.
 
> Especially since you'll later add buckets and then per ring pools
> with buckets sounds not as optimal, both from design point of view
> and from resource usage point of view, as single bucketed pool with
> efficient object lookup would be.

The buckets have the same memory efficiency as the list. The tradeoff
there is greater static allocation (4 lists instead of 1) to avoid
having the test inside the iterator.

Really the only drawback is that we don't allow buffers to cross between
rings, and so can end up with more temporarily unused buffers. Otoh, the
cmdparser is only for gen7 and is a pita.
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCH 4/6] drm/i915: Free batch pool when idle
  2015-03-09  9:55 ` [PATCH 4/6] drm/i915: Free batch pool when idle Chris Wilson
@ 2015-03-19 17:03   ` Tvrtko Ursulin
  2015-03-19 17:14     ` Chris Wilson
  0 siblings, 1 reply; 32+ messages in thread
From: Tvrtko Ursulin @ 2015-03-19 17:03 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 03/09/2015 09:55 AM, Chris Wilson wrote:
> At runtime, this helps ensure that the batch pools are kept trim and
> fast. Then at suspend, this releases memory that we do not need to
> restore. It also ties into the oom-notifier to ensure that we recover as
> much kernel memory as possible during OOM.
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>   drivers/gpu/drm/i915/i915_gem.c | 13 ++++++++++++-
>   1 file changed, 12 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index 9f33005bdfd1..efb5545251c7 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -2849,8 +2849,19 @@ i915_gem_idle_work_handler(struct work_struct *work)
>   {
>   	struct drm_i915_private *dev_priv =
>   		container_of(work, typeof(*dev_priv), mm.idle_work.work);
> +	struct drm_device *dev = dev_priv->dev;
> +
> +	intel_mark_idle(dev);
>
> -	intel_mark_idle(dev_priv->dev);
> +	if (mutex_trylock(&dev->struct_mutex)) {
> +		struct intel_engine_cs *ring;
> +		int i;
> +
> +		for_each_ring(ring, dev_priv, i)
> +			i915_gem_batch_pool_fini(&ring->batch_pool);

This is sooo bad... destructor in the idle handler, what message does 
that send? :D

> +
> +		mutex_unlock(&dev->struct_mutex);
> +	}

Would it be worth self-re-arming if trylock fails? I couldn't 
immediately figure out if last retirement can somehow race with a 
struct_mutex holder somewhere.

Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Regards,

Tvrtko

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCH 4/6] drm/i915: Free batch pool when idle
  2015-03-19 17:03   ` Tvrtko Ursulin
@ 2015-03-19 17:14     ` Chris Wilson
  2015-03-19 17:27       ` Tvrtko Ursulin
  0 siblings, 1 reply; 32+ messages in thread
From: Chris Wilson @ 2015-03-19 17:14 UTC (permalink / raw)
  To: Tvrtko Ursulin; +Cc: intel-gfx

On Thu, Mar 19, 2015 at 05:03:56PM +0000, Tvrtko Ursulin wrote:
> 
> On 03/09/2015 09:55 AM, Chris Wilson wrote:
> >At runtime, this helps ensure that the batch pools are kept trim and
> >fast. Then at suspend, this releases memory that we do not need to
> >restore. It also ties into the oom-notifier to ensure that we recover as
> >much kernel memory as possible during OOM.
> >
> >Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> >---
> >  drivers/gpu/drm/i915/i915_gem.c | 13 ++++++++++++-
> >  1 file changed, 12 insertions(+), 1 deletion(-)
> >
> >diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> >index 9f33005bdfd1..efb5545251c7 100644
> >--- a/drivers/gpu/drm/i915/i915_gem.c
> >+++ b/drivers/gpu/drm/i915/i915_gem.c
> >@@ -2849,8 +2849,19 @@ i915_gem_idle_work_handler(struct work_struct *work)
> >  {
> >  	struct drm_i915_private *dev_priv =
> >  		container_of(work, typeof(*dev_priv), mm.idle_work.work);
> >+	struct drm_device *dev = dev_priv->dev;
> >+
> >+	intel_mark_idle(dev);
> >
> >-	intel_mark_idle(dev_priv->dev);
> >+	if (mutex_trylock(&dev->struct_mutex)) {
> >+		struct intel_engine_cs *ring;
> >+		int i;
> >+
> >+		for_each_ring(ring, dev_priv, i)
> >+			i915_gem_batch_pool_fini(&ring->batch_pool);
> 
> This is sooo bad... destructor in the idle handler, what message
> does that send? :D

That I noticed the function could also be used to cleanup the lists at
runtime, and I wasn't a fan of say i915_gem_batch_pool_cleanup().

#define i915_gem_batch_pool_reset i915_gem_batch_pool_fini ?

> >+
> >+		mutex_unlock(&dev->struct_mutex);
> >+	}
> 
> Would it be worth self-re-arming if trylock fails?

Possibly. I wasn't too concerned, holding the struct_mutex generally
involves work being emitted shortly afterwards. And I regarded this as
a performance optimisation rather than preventing leaks.

> I couldn't
> immediately figure out if last retirement can somehow race with a
> struct_mutex holder somewhere.

On suspend, we drop the struct mutex before flushing the idle work. Then
we explicitly free the lists anyway on shutdown.
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCH 4/6] drm/i915: Free batch pool when idle
  2015-03-19 17:14     ` Chris Wilson
@ 2015-03-19 17:27       ` Tvrtko Ursulin
  0 siblings, 0 replies; 32+ messages in thread
From: Tvrtko Ursulin @ 2015-03-19 17:27 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 03/19/2015 05:14 PM, Chris Wilson wrote:
> On Thu, Mar 19, 2015 at 05:03:56PM +0000, Tvrtko Ursulin wrote:
>>
>> On 03/09/2015 09:55 AM, Chris Wilson wrote:
>>> At runtime, this helps ensure that the batch pools are kept trim and
>>> fast. Then at suspend, this releases memory that we do not need to
>>> restore. It also ties into the oom-notifier to ensure that we recover as
>>> much kernel memory as possible during OOM.
>>>
>>> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
>>> ---
>>>   drivers/gpu/drm/i915/i915_gem.c | 13 ++++++++++++-
>>>   1 file changed, 12 insertions(+), 1 deletion(-)
>>>
>>> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
>>> index 9f33005bdfd1..efb5545251c7 100644
>>> --- a/drivers/gpu/drm/i915/i915_gem.c
>>> +++ b/drivers/gpu/drm/i915/i915_gem.c
>>> @@ -2849,8 +2849,19 @@ i915_gem_idle_work_handler(struct work_struct *work)
>>>   {
>>>   	struct drm_i915_private *dev_priv =
>>>   		container_of(work, typeof(*dev_priv), mm.idle_work.work);
>>> +	struct drm_device *dev = dev_priv->dev;
>>> +
>>> +	intel_mark_idle(dev);
>>>
>>> -	intel_mark_idle(dev_priv->dev);
>>> +	if (mutex_trylock(&dev->struct_mutex)) {
>>> +		struct intel_engine_cs *ring;
>>> +		int i;
>>> +
>>> +		for_each_ring(ring, dev_priv, i)
>>> +			i915_gem_batch_pool_fini(&ring->batch_pool);
>>
>> This is sooo bad... destructor in the idle handler, what message
>> does that send? :D
>
> That I noticed the function could also be used to cleanup the lists at
> runtime, and I wasn't a fan of say i915_gem_batch_pool_cleanup().
>
> #define i915_gem_batch_pool_reset i915_gem_batch_pool_fini ?

Obviously I noticed the same or I wouldn't have given my r-b. :)

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCH 5/6] drm/i915: Split batch pool into size buckets
  2015-03-09  9:55 ` [PATCH 5/6] drm/i915: Split batch pool into size buckets Chris Wilson
@ 2015-03-19 17:35   ` Tvrtko Ursulin
  2015-03-19 21:09     ` Chris Wilson
  0 siblings, 1 reply; 32+ messages in thread
From: Tvrtko Ursulin @ 2015-03-19 17:35 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 03/09/2015 09:55 AM, Chris Wilson wrote:
> Now with the trimmed memcpy before the command parser, we try to
> allocate many different sizes of batches, predominantly one or two
> pages. We can therefore speed up searching for a good sized batch by
> keeping the objects of buckets of roughly the same size.
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>   drivers/gpu/drm/i915/i915_debugfs.c        | 46 +++++++++++++++++++-----------
>   drivers/gpu/drm/i915/i915_drv.h            |  2 +-
>   drivers/gpu/drm/i915/i915_gem.c            |  2 +-
>   drivers/gpu/drm/i915/i915_gem_batch_pool.c | 45 +++++++++++++++++------------
>   drivers/gpu/drm/i915/i915_gem_batch_pool.h |  2 +-
>   5 files changed, 60 insertions(+), 37 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
> index d8ca5c89647c..042ad2fec484 100644
> --- a/drivers/gpu/drm/i915/i915_debugfs.c
> +++ b/drivers/gpu/drm/i915/i915_debugfs.c
> @@ -378,15 +378,17 @@ static void print_batch_pool_stats(struct seq_file *m,
>   	struct drm_i915_gem_object *obj;
>   	struct file_stats stats;
>   	struct intel_engine_cs *ring;
> -	int i;
> +	int i, j;
>
>   	memset(&stats, 0, sizeof(stats));
>
>   	for_each_ring(ring, dev_priv, i) {
> -		list_for_each_entry(obj,
> -				    &ring->batch_pool.cache_list,
> -				    batch_pool_list)
> -			per_file_stats(0, obj, &stats);
> +		for (j = 0; j < ARRAY_SIZE(ring->batch_pool.cache_list); j++) {
> +			list_for_each_entry(obj,
> +					    &ring->batch_pool.cache_list[j],
> +					    batch_pool_link)
> +				per_file_stats(0, obj, &stats);
> +		}
>   	}
>
>   	print_file_stats(m, "batch pool", stats);
> @@ -624,26 +626,38 @@ static int i915_gem_batch_pool_info(struct seq_file *m, void *data)
>   	struct drm_i915_private *dev_priv = dev->dev_private;
>   	struct drm_i915_gem_object *obj;
>   	struct intel_engine_cs *ring;
> -	int count = 0;
> -	int ret, i;
> +	int total = 0;
> +	int ret, i, j;
>
>   	ret = mutex_lock_interruptible(&dev->struct_mutex);
>   	if (ret)
>   		return ret;
>
>   	for_each_ring(ring, dev_priv, i) {
> -		seq_printf(m, "%s cache:\n", ring->name);
> -		list_for_each_entry(obj,
> -				    &ring->batch_pool.cache_list,
> -				    batch_pool_list) {
> -			seq_puts(m, "   ");
> -			describe_obj(m, obj);
> -			seq_putc(m, '\n');
> -			count++;
> +		for (j = 0; j < ARRAY_SIZE(ring->batch_pool.cache_list); j++) {
> +			int count;
> +
> +			count = 0;
> +			list_for_each_entry(obj,
> +					    &ring->batch_pool.cache_list[j],
> +					    batch_pool_link)
> +				count++;
> +			seq_printf(m, "%s cache[%d]: %d objects\n",
> +				   ring->name, j, count);
> +
> +			list_for_each_entry(obj,
> +					    &ring->batch_pool.cache_list[j],
> +					    batch_pool_link) {
> +				seq_puts(m, "   ");
> +				describe_obj(m, obj);
> +				seq_putc(m, '\n');
> +			}
> +
> +			total += count;
>   		}
>   	}
>
> -	seq_printf(m, "total: %d\n", count);
> +	seq_printf(m, "total: %d\n", total);
>
>   	mutex_unlock(&dev->struct_mutex);
>
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 9fe1274cfe59..5c7b89bd138d 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -1893,7 +1893,7 @@ struct drm_i915_gem_object {
>   	/** Used in execbuf to temporarily hold a ref */
>   	struct list_head obj_exec_link;
>
> -	struct list_head batch_pool_list;
> +	struct list_head batch_pool_link;
>
>   	/**
>   	 * This is set if the object is on the active lists (has pending
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index efb5545251c7..a0c35f80836c 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -4439,7 +4439,7 @@ void i915_gem_object_init(struct drm_i915_gem_object *obj,
>   	INIT_LIST_HEAD(&obj->ring_list);
>   	INIT_LIST_HEAD(&obj->obj_exec_link);
>   	INIT_LIST_HEAD(&obj->vma_list);
> -	INIT_LIST_HEAD(&obj->batch_pool_list);
> +	INIT_LIST_HEAD(&obj->batch_pool_link);
>
>   	obj->ops = ops;
>
> diff --git a/drivers/gpu/drm/i915/i915_gem_batch_pool.c b/drivers/gpu/drm/i915/i915_gem_batch_pool.c
> index 1287abf55b84..23374352d8eb 100644
> --- a/drivers/gpu/drm/i915/i915_gem_batch_pool.c
> +++ b/drivers/gpu/drm/i915/i915_gem_batch_pool.c
> @@ -47,8 +47,12 @@
>   void i915_gem_batch_pool_init(struct drm_device *dev,
>   			      struct i915_gem_batch_pool *pool)
>   {
> +	int n;
> +
>   	pool->dev = dev;
> -	INIT_LIST_HEAD(&pool->cache_list);
> +
> +	for (n = 0; n < ARRAY_SIZE(pool->cache_list); n++)
> +		INIT_LIST_HEAD(&pool->cache_list[n]);
>   }
>
>   /**
> @@ -59,16 +63,20 @@ void i915_gem_batch_pool_init(struct drm_device *dev,
>    */
>   void i915_gem_batch_pool_fini(struct i915_gem_batch_pool *pool)
>   {
> +	int n;
> +
>   	WARN_ON(!mutex_is_locked(&pool->dev->struct_mutex));
>
> -	while (!list_empty(&pool->cache_list)) {
> -		struct drm_i915_gem_object *obj =
> -			list_first_entry(&pool->cache_list,
> -					 struct drm_i915_gem_object,
> -					 batch_pool_list);
> +	for (n = 0; n < ARRAY_SIZE(pool->cache_list); n++) {
> +		while (!list_empty(&pool->cache_list[n])) {
> +			struct drm_i915_gem_object *obj =
> +				list_first_entry(&pool->cache_list[n],
> +						 struct drm_i915_gem_object,
> +						 batch_pool_link);
>
> -		list_del(&obj->batch_pool_list);
> -		drm_gem_object_unreference(&obj->base);
> +			list_del(&obj->batch_pool_link);
> +			drm_gem_object_unreference(&obj->base);
> +		}
>   	}
>   }
>
> @@ -91,28 +99,29 @@ i915_gem_batch_pool_get(struct i915_gem_batch_pool *pool,
>   {
>   	struct drm_i915_gem_object *obj = NULL;
>   	struct drm_i915_gem_object *tmp, *next;
> +	struct list_head *list;
> +	int n;
>
>   	WARN_ON(!mutex_is_locked(&pool->dev->struct_mutex));
>
> -	list_for_each_entry_safe(tmp, next,
> -				 &pool->cache_list, batch_pool_list) {
> +	n = fls(size >> PAGE_SHIFT) - 1;

Is this better in some way that size / PAGE_SIZE - 1? Or I went from 
trusting compiler smartness to little to too much? :)

> +	if (n >= ARRAY_SIZE(pool->cache_list))
> +		n = ARRAY_SIZE(pool->cache_list) - 1;
> +	list = &pool->cache_list[n];
> +
> +	list_for_each_entry_safe(tmp, next, list, batch_pool_link) {
>   		/* The batches are strictly LRU ordered */
>   		if (tmp->active)
>   			break;
>
>   		/* While we're looping, do some clean up */
>   		if (tmp->madv == __I915_MADV_PURGED) {
> -			list_del(&tmp->batch_pool_list);
> +			list_del(&tmp->batch_pool_link);
>   			drm_gem_object_unreference(&tmp->base);
>   			continue;
>   		}
>
> -		/*
> -		 * Select a buffer that is at least as big as needed
> -		 * but not 'too much' bigger. A better way to do this
> -		 * might be to bucket the pool objects based on size.
> -		 */
> -		if (tmp->base.size >= size && tmp->base.size <= 2 * size) {
> +		if (tmp->base.size >= size) {
>   			obj = tmp;
>   			break;
>   		}
> @@ -132,7 +141,7 @@ i915_gem_batch_pool_get(struct i915_gem_batch_pool *pool,
>   		obj->madv = I915_MADV_DONTNEED;
>   	}
>
> -	list_move_tail(&obj->batch_pool_list, &pool->cache_list);
> +	list_move_tail(&obj->batch_pool_link, list);
>   	i915_gem_object_pin_pages(obj);
>   	return obj;
>   }
> diff --git a/drivers/gpu/drm/i915/i915_gem_batch_pool.h b/drivers/gpu/drm/i915/i915_gem_batch_pool.h
> index 5ed70ef6a887..848e90703eed 100644
> --- a/drivers/gpu/drm/i915/i915_gem_batch_pool.h
> +++ b/drivers/gpu/drm/i915/i915_gem_batch_pool.h
> @@ -29,7 +29,7 @@
>
>   struct i915_gem_batch_pool {
>   	struct drm_device *dev;
> -	struct list_head cache_list;
> +	struct list_head cache_list[4];
>   };
>
>   /* i915_gem_batch_pool.c */
>

Pretty straightforward and trusting you on bucket sizes,

Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Regards,

Tvrtko

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCH 1/6] drm/i915: Split i915_gem_batch_pool into its own header
  2015-03-09  9:55 [PATCH 1/6] drm/i915: Split i915_gem_batch_pool into its own header Chris Wilson
                   ` (4 preceding siblings ...)
  2015-03-09  9:55 ` [PATCH 6/6] drm/i915: Include active flag when describing objects in debugfs Chris Wilson
@ 2015-03-19 17:37 ` Tvrtko Ursulin
  2015-03-19 21:06   ` Chris Wilson
  5 siblings, 1 reply; 32+ messages in thread
From: Tvrtko Ursulin @ 2015-03-19 17:37 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 03/09/2015 09:55 AM, Chris Wilson wrote:
> In the next patch, I want to use the structure elsewhere and so require
> it defined earlier. Rather than move the definition to an earlier location
> where it feels very odd, place it in its own header file.
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>   drivers/gpu/drm/i915/i915_drv.h            | 13 +--------
>   drivers/gpu/drm/i915/i915_gem_batch_pool.c |  1 +
>   drivers/gpu/drm/i915/i915_gem_batch_pool.h | 42 ++++++++++++++++++++++++++++++
>   3 files changed, 44 insertions(+), 12 deletions(-)
>   create mode 100644 drivers/gpu/drm/i915/i915_gem_batch_pool.h
>
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index c986e355e4d0..b62e3c478221 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -37,6 +37,7 @@
>   #include "intel_bios.h"
>   #include "intel_ringbuffer.h"
>   #include "intel_lrc.h"
> +#include "i915_gem_batch_pool.h"
>   #include "i915_gem_gtt.h"
>   #include "i915_gem_render_state.h"
>   #include <linux/io-mapping.h>
> @@ -1135,11 +1136,6 @@ struct intel_l3_parity {
>   	int which_slice;
>   };
>
> -struct i915_gem_batch_pool {
> -	struct drm_device *dev;
> -	struct list_head cache_list;
> -};
> -
>   struct i915_gem_mm {
>   	/** Memory allocator for GTT stolen memory */
>   	struct drm_mm stolen;
> @@ -3048,13 +3044,6 @@ void i915_destroy_error_state(struct drm_device *dev);
>   void i915_get_extra_instdone(struct drm_device *dev, uint32_t *instdone);
>   const char *i915_cache_level_str(struct drm_i915_private *i915, int type);
>
> -/* i915_gem_batch_pool.c */
> -void i915_gem_batch_pool_init(struct drm_device *dev,
> -			      struct i915_gem_batch_pool *pool);
> -void i915_gem_batch_pool_fini(struct i915_gem_batch_pool *pool);
> -struct drm_i915_gem_object*
> -i915_gem_batch_pool_get(struct i915_gem_batch_pool *pool, size_t size);
> -
>   /* i915_cmd_parser.c */
>   int i915_cmd_parser_get_version(void);
>   int i915_cmd_parser_init_ring(struct intel_engine_cs *ring);
> diff --git a/drivers/gpu/drm/i915/i915_gem_batch_pool.c b/drivers/gpu/drm/i915/i915_gem_batch_pool.c
> index c690170a1c4f..564be7c5ea7e 100644
> --- a/drivers/gpu/drm/i915/i915_gem_batch_pool.c
> +++ b/drivers/gpu/drm/i915/i915_gem_batch_pool.c
> @@ -23,6 +23,7 @@
>    */
>
>   #include "i915_drv.h"
> +#include "i915_gem_batch_pool.h"
>
>   /**
>    * DOC: batch pool
> diff --git a/drivers/gpu/drm/i915/i915_gem_batch_pool.h b/drivers/gpu/drm/i915/i915_gem_batch_pool.h
> new file mode 100644
> index 000000000000..5ed70ef6a887
> --- /dev/null
> +++ b/drivers/gpu/drm/i915/i915_gem_batch_pool.h
> @@ -0,0 +1,42 @@
> +/*
> + * Copyright © 2014 Intel Corporation

Maybe 2014-15. or whatever the guidelines are?

> + * Permission is hereby granted, free of charge, to any person obtaining a
> + * copy of this software and associated documentation files (the "Software"),
> + * to deal in the Software without restriction, including without limitation
> + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
> + * and/or sell copies of the Software, and to permit persons to whom the
> + * Software is furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice (including the next
> + * paragraph) shall be included in all copies or substantial portions of the
> + * Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
> + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
> + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
> + * IN THE SOFTWARE.
> + *
> + */
> +
> +#ifndef I915_GEM_BATCH_POOL_H
> +#define I915_GEM_BATCH_POOL_H
> +
> +#include "i915_drv.h"
> +
> +struct i915_gem_batch_pool {
> +	struct drm_device *dev;
> +	struct list_head cache_list;
> +};
> +
> +/* i915_gem_batch_pool.c */
> +void i915_gem_batch_pool_init(struct drm_device *dev,
> +			      struct i915_gem_batch_pool *pool);
> +void i915_gem_batch_pool_fini(struct i915_gem_batch_pool *pool);
> +struct drm_i915_gem_object*
> +i915_gem_batch_pool_get(struct i915_gem_batch_pool *pool, size_t size);
> +
> +#endif /* I915_GEM_BATCH_POOL_H */
>

Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Regards,

Tvrtko



_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCH 6/6] drm/i915: Include active flag when describing objects in debugfs
  2015-03-09  9:55 ` [PATCH 6/6] drm/i915: Include active flag when describing objects in debugfs Chris Wilson
  2015-03-09 14:59   ` shuang.he
@ 2015-03-19 17:41   ` Tvrtko Ursulin
  2015-03-19 21:05     ` Chris Wilson
  1 sibling, 1 reply; 32+ messages in thread
From: Tvrtko Ursulin @ 2015-03-19 17:41 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 03/09/2015 09:55 AM, Chris Wilson wrote:
> Since we use obj->active as a hint in many places throughout the code,
> knowing its state in debugfs is extremely useful.
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>   drivers/gpu/drm/i915/i915_debugfs.c | 3 ++-
>   1 file changed, 2 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
> index 042ad2fec484..809f6eadc10c 100644
> --- a/drivers/gpu/drm/i915/i915_debugfs.c
> +++ b/drivers/gpu/drm/i915/i915_debugfs.c
> @@ -123,8 +123,9 @@ describe_obj(struct seq_file *m, struct drm_i915_gem_object *obj)
>   	struct i915_vma *vma;
>   	int pin_count = 0;
>
> -	seq_printf(m, "%pK: %s%s%s %8zdKiB %02x %02x %x %x %x%s%s%s",
> +	seq_printf(m, "%pK: %s%s%s%s %8zdKiB %02x %02x %x %x %x%s%s%s",
>   		   &obj->base,
> +		   obj->active ? "*" : " ",

%c etc would maybe be more compact code? (Hey I have to earn my 
bike-shedding badge somehow! ;) Anyway,

Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

This now completes the series apart from I think two respins - rebase 
for possible extra unpin and missing lrc destructor.

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCH 6/6] drm/i915: Include active flag when describing objects in debugfs
  2015-03-19 17:41   ` Tvrtko Ursulin
@ 2015-03-19 21:05     ` Chris Wilson
  2015-03-20  9:23       ` Tvrtko Ursulin
  0 siblings, 1 reply; 32+ messages in thread
From: Chris Wilson @ 2015-03-19 21:05 UTC (permalink / raw)
  To: Tvrtko Ursulin; +Cc: intel-gfx

On Thu, Mar 19, 2015 at 05:41:26PM +0000, Tvrtko Ursulin wrote:
> 
> On 03/09/2015 09:55 AM, Chris Wilson wrote:
> >Since we use obj->active as a hint in many places throughout the code,
> >knowing its state in debugfs is extremely useful.
> >
> >Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> >---
> >  drivers/gpu/drm/i915/i915_debugfs.c | 3 ++-
> >  1 file changed, 2 insertions(+), 1 deletion(-)
> >
> >diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
> >index 042ad2fec484..809f6eadc10c 100644
> >--- a/drivers/gpu/drm/i915/i915_debugfs.c
> >+++ b/drivers/gpu/drm/i915/i915_debugfs.c
> >@@ -123,8 +123,9 @@ describe_obj(struct seq_file *m, struct drm_i915_gem_object *obj)
> >  	struct i915_vma *vma;
> >  	int pin_count = 0;
> >
> >-	seq_printf(m, "%pK: %s%s%s %8zdKiB %02x %02x %x %x %x%s%s%s",
> >+	seq_printf(m, "%pK: %s%s%s%s %8zdKiB %02x %02x %x %x %x%s%s%s",
> >  		   &obj->base,
> >+		   obj->active ? "*" : " ",
> 
> %c etc would maybe be more compact code? (Hey I have to earn my
> bike-shedding badge somehow! ;) Anyway,

The rationale for the empty flags to use " " was to keep the fields
aligned. I'm still about 60:40 whether that was a good idea in terms of
formatting the debugfs files.
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCH 1/6] drm/i915: Split i915_gem_batch_pool into its own header
  2015-03-19 17:37 ` [PATCH 1/6] drm/i915: Split i915_gem_batch_pool into its own header Tvrtko Ursulin
@ 2015-03-19 21:06   ` Chris Wilson
  0 siblings, 0 replies; 32+ messages in thread
From: Chris Wilson @ 2015-03-19 21:06 UTC (permalink / raw)
  To: Tvrtko Ursulin; +Cc: intel-gfx

On Thu, Mar 19, 2015 at 05:37:30PM +0000, Tvrtko Ursulin wrote:
> >diff --git a/drivers/gpu/drm/i915/i915_gem_batch_pool.h b/drivers/gpu/drm/i915/i915_gem_batch_pool.h
> >new file mode 100644
> >index 000000000000..5ed70ef6a887
> >--- /dev/null
> >+++ b/drivers/gpu/drm/i915/i915_gem_batch_pool.h
> >@@ -0,0 +1,42 @@
> >+/*
> >+ * Copyright © 2014 Intel Corporation
> 
> Maybe 2014-15. or whatever the guidelines are?

I didn't touch it here since I didn't change anything, but yeah we could
update it for the next set of patches. Good thing we don't track this at
all in git.
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCH 5/6] drm/i915: Split batch pool into size buckets
  2015-03-19 17:35   ` Tvrtko Ursulin
@ 2015-03-19 21:09     ` Chris Wilson
  2015-03-20  9:25       ` Tvrtko Ursulin
  0 siblings, 1 reply; 32+ messages in thread
From: Chris Wilson @ 2015-03-19 21:09 UTC (permalink / raw)
  To: Tvrtko Ursulin; +Cc: intel-gfx

On Thu, Mar 19, 2015 at 05:35:35PM +0000, Tvrtko Ursulin wrote:
> >-	list_for_each_entry_safe(tmp, next,
> >-				 &pool->cache_list, batch_pool_list) {
> >+	n = fls(size >> PAGE_SHIFT) - 1;
> 
> Is this better in some way that size / PAGE_SIZE - 1? Or I went from
> trusting compiler smartness to little to too much? :)

The month has 31 days in it, we use >> PAGE_SHIFT, otherwise /
PAGE_SIZE. Note the fls() puts these into power-of-two buckets not pure
page-count buckets.
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCH 6/6] drm/i915: Include active flag when describing objects in debugfs
  2015-03-19 21:05     ` Chris Wilson
@ 2015-03-20  9:23       ` Tvrtko Ursulin
  0 siblings, 0 replies; 32+ messages in thread
From: Tvrtko Ursulin @ 2015-03-20  9:23 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 03/19/2015 09:05 PM, Chris Wilson wrote:
> On Thu, Mar 19, 2015 at 05:41:26PM +0000, Tvrtko Ursulin wrote:
>>
>> On 03/09/2015 09:55 AM, Chris Wilson wrote:
>>> Since we use obj->active as a hint in many places throughout the code,
>>> knowing its state in debugfs is extremely useful.
>>>
>>> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
>>> ---
>>>   drivers/gpu/drm/i915/i915_debugfs.c | 3 ++-
>>>   1 file changed, 2 insertions(+), 1 deletion(-)
>>>
>>> diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
>>> index 042ad2fec484..809f6eadc10c 100644
>>> --- a/drivers/gpu/drm/i915/i915_debugfs.c
>>> +++ b/drivers/gpu/drm/i915/i915_debugfs.c
>>> @@ -123,8 +123,9 @@ describe_obj(struct seq_file *m, struct drm_i915_gem_object *obj)
>>>   	struct i915_vma *vma;
>>>   	int pin_count = 0;
>>>
>>> -	seq_printf(m, "%pK: %s%s%s %8zdKiB %02x %02x %x %x %x%s%s%s",
>>> +	seq_printf(m, "%pK: %s%s%s%s %8zdKiB %02x %02x %x %x %x%s%s%s",
>>>   		   &obj->base,
>>> +		   obj->active ? "*" : " ",
>>
>> %c etc would maybe be more compact code? (Hey I have to earn my
>> bike-shedding badge somehow! ;) Anyway,
>
> The rationale for the empty flags to use " " was to keep the fields
> aligned. I'm still about 60:40 whether that was a good idea in terms of
> formatting the debugfs files.

Doesn't matter really, my joke was only about two chars being smaller 
than two one-char strings, weak joke yes. :)

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCH 5/6] drm/i915: Split batch pool into size buckets
  2015-03-19 21:09     ` Chris Wilson
@ 2015-03-20  9:25       ` Tvrtko Ursulin
  0 siblings, 0 replies; 32+ messages in thread
From: Tvrtko Ursulin @ 2015-03-20  9:25 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 03/19/2015 09:09 PM, Chris Wilson wrote:
> On Thu, Mar 19, 2015 at 05:35:35PM +0000, Tvrtko Ursulin wrote:
>>> -	list_for_each_entry_safe(tmp, next,
>>> -				 &pool->cache_list, batch_pool_list) {
>>> +	n = fls(size >> PAGE_SHIFT) - 1;
>>
>> Is this better in some way that size / PAGE_SIZE - 1? Or I went from
>> trusting compiler smartness to little to too much? :)
>
> The month has 31 days in it, we use >> PAGE_SHIFT, otherwise /
> PAGE_SIZE. Note the fls() puts these into power-of-two buckets not pure
> page-count buckets.

Yeah silly me.. was thinking about 1-2 page buckets form the commit 
message only. Perhaps add a comment about bucket sizes above n = fls 
line if you feel like it. Shouldn't harm.

Regards,

Tvrtko

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 32+ messages in thread

* [PATCH 1/6] drm/i915: Split i915_gem_batch_pool into its own header
@ 2015-02-26 10:05 Chris Wilson
  0 siblings, 0 replies; 32+ messages in thread
From: Chris Wilson @ 2015-02-26 10:05 UTC (permalink / raw)
  To: intel-gfx

In the next patch, I want to use the structure elsewhere and so require
it defined earlier. Rather than move the definition to an earlier location
where it feels very odd, place it in its own header file.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_drv.h            | 13 +--------
 drivers/gpu/drm/i915/i915_gem_batch_pool.c |  1 +
 drivers/gpu/drm/i915/i915_gem_batch_pool.h | 42 ++++++++++++++++++++++++++++++
 3 files changed, 44 insertions(+), 12 deletions(-)
 create mode 100644 drivers/gpu/drm/i915/i915_gem_batch_pool.h

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 60a66f6cef78..6246a224f15b 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -37,6 +37,7 @@
 #include "intel_bios.h"
 #include "intel_ringbuffer.h"
 #include "intel_lrc.h"
+#include "i915_gem_batch_pool.h"
 #include "i915_gem_gtt.h"
 #include "i915_gem_render_state.h"
 #include <linux/io-mapping.h>
@@ -1251,11 +1252,6 @@ struct intel_l3_parity {
 	int which_slice;
 };
 
-struct i915_gem_batch_pool {
-	struct drm_device *dev;
-	struct list_head cache_list;
-};
-
 struct i915_gem_mm {
 	/** Memory allocator for GTT stolen memory */
 	struct drm_mm stolen;
@@ -3145,13 +3141,6 @@ void i915_destroy_error_state(struct drm_device *dev);
 void i915_get_extra_instdone(struct drm_device *dev, uint32_t *instdone);
 const char *i915_cache_level_str(struct drm_i915_private *i915, int type);
 
-/* i915_gem_batch_pool.c */
-void i915_gem_batch_pool_init(struct drm_device *dev,
-			      struct i915_gem_batch_pool *pool);
-void i915_gem_batch_pool_fini(struct i915_gem_batch_pool *pool);
-struct drm_i915_gem_object*
-i915_gem_batch_pool_get(struct i915_gem_batch_pool *pool, size_t size);
-
 /* i915_cmd_parser.c */
 int i915_cmd_parser_get_version(void);
 int i915_cmd_parser_init_ring(struct intel_engine_cs *ring);
diff --git a/drivers/gpu/drm/i915/i915_gem_batch_pool.c b/drivers/gpu/drm/i915/i915_gem_batch_pool.c
index aa477ddf12f1..40ab2207fcb9 100644
--- a/drivers/gpu/drm/i915/i915_gem_batch_pool.c
+++ b/drivers/gpu/drm/i915/i915_gem_batch_pool.c
@@ -23,6 +23,7 @@
  */
 
 #include "i915_drv.h"
+#include "i915_gem_batch_pool.h"
 
 /**
  * DOC: batch pool
diff --git a/drivers/gpu/drm/i915/i915_gem_batch_pool.h b/drivers/gpu/drm/i915/i915_gem_batch_pool.h
new file mode 100644
index 000000000000..5ed70ef6a887
--- /dev/null
+++ b/drivers/gpu/drm/i915/i915_gem_batch_pool.h
@@ -0,0 +1,42 @@
+/*
+ * Copyright © 2014 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ */
+
+#ifndef I915_GEM_BATCH_POOL_H
+#define I915_GEM_BATCH_POOL_H
+
+#include "i915_drv.h"
+
+struct i915_gem_batch_pool {
+	struct drm_device *dev;
+	struct list_head cache_list;
+};
+
+/* i915_gem_batch_pool.c */
+void i915_gem_batch_pool_init(struct drm_device *dev,
+			      struct i915_gem_batch_pool *pool);
+void i915_gem_batch_pool_fini(struct i915_gem_batch_pool *pool);
+struct drm_i915_gem_object*
+i915_gem_batch_pool_get(struct i915_gem_batch_pool *pool, size_t size);
+
+#endif /* I915_GEM_BATCH_POOL_H */
-- 
2.1.4

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 32+ messages in thread

end of thread, other threads:[~2015-03-20  9:25 UTC | newest]

Thread overview: 32+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2015-03-09  9:55 [PATCH 1/6] drm/i915: Split i915_gem_batch_pool into its own header Chris Wilson
2015-03-09  9:55 ` [PATCH 2/6] drm/i915: Tidy batch pool logic Chris Wilson
2015-03-17 17:42   ` Tvrtko Ursulin
2015-03-17 20:53     ` Chris Wilson
2015-03-09  9:55 ` [PATCH 3/6] drm/i915: Split the batch pool by engine Chris Wilson
2015-03-18 16:51   ` Tvrtko Ursulin
2015-03-18 17:27     ` Chris Wilson
2015-03-18 17:33       ` Tvrtko Ursulin
2015-03-19  9:36         ` Tvrtko Ursulin
2015-03-19 10:06           ` Chris Wilson
2015-03-19 11:39             ` Tvrtko Ursulin
2015-03-19 11:46               ` Chris Wilson
2015-03-19 11:58                 ` Tvrtko Ursulin
2015-03-19 12:04                   ` Chris Wilson
2015-03-19 14:01                     ` Tvrtko Ursulin
2015-03-19 14:34                       ` Chris Wilson
2015-03-09  9:55 ` [PATCH 4/6] drm/i915: Free batch pool when idle Chris Wilson
2015-03-19 17:03   ` Tvrtko Ursulin
2015-03-19 17:14     ` Chris Wilson
2015-03-19 17:27       ` Tvrtko Ursulin
2015-03-09  9:55 ` [PATCH 5/6] drm/i915: Split batch pool into size buckets Chris Wilson
2015-03-19 17:35   ` Tvrtko Ursulin
2015-03-19 21:09     ` Chris Wilson
2015-03-20  9:25       ` Tvrtko Ursulin
2015-03-09  9:55 ` [PATCH 6/6] drm/i915: Include active flag when describing objects in debugfs Chris Wilson
2015-03-09 14:59   ` shuang.he
2015-03-19 17:41   ` Tvrtko Ursulin
2015-03-19 21:05     ` Chris Wilson
2015-03-20  9:23       ` Tvrtko Ursulin
2015-03-19 17:37 ` [PATCH 1/6] drm/i915: Split i915_gem_batch_pool into its own header Tvrtko Ursulin
2015-03-19 21:06   ` Chris Wilson
  -- strict thread matches above, loose matches on Subject: below --
2015-02-26 10:05 Chris Wilson

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.