All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 1/3] lib: Export interval_tree
@ 2014-03-17 12:21 Chris Wilson
  2014-03-17 12:21 ` [PATCH 2/3] drm/i915: Do not call retire_requests from wait_for_rendering Chris Wilson
                   ` (2 more replies)
  0 siblings, 3 replies; 17+ messages in thread
From: Chris Wilson @ 2014-03-17 12:21 UTC (permalink / raw)
  To: intel-gfx
  Cc: Andrea Arcangeli, Rik van Riel, Peter Zijlstra, Andrew Morton,
	Michel Lespinasse

lib/interval_tree.c provides a simple interface for an interval-tree
(an augmented red-black tree) but is only built when testing the generic
macros for building interval-trees. For drivers with modest needs,
export the simple interval-tree library as is.

v2: Lots of help from Michel Lespinasse to only compile the code
    as required:
    - make INTERVAL_TREE a config option
    - make INTERVAL_TREE_TEST select the library functions
      and sanitize the filenames & Makefile
    - prepare interval_tree for being built as a module if required

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Michel Lespinasse <walken@google.com>
Cc: Rik van Riel <riel@redhat.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: David Woodhouse <dwmw2@infradead.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Reviewed-by: Michel Lespinasse <walken@google.com>
[Acked for inclusion via drm/i915 by Andrew Morton.]
---
 lib/Kconfig                   |  14 ++++++
 lib/Kconfig.debug             |   1 +
 lib/Makefile                  |   3 +-
 lib/interval_tree.c           |   6 +++
 lib/interval_tree_test.c      | 106 ++++++++++++++++++++++++++++++++++++++++++
 lib/interval_tree_test_main.c | 106 ------------------------------------------
 6 files changed, 128 insertions(+), 108 deletions(-)
 create mode 100644 lib/interval_tree_test.c
 delete mode 100644 lib/interval_tree_test_main.c

diff --git a/lib/Kconfig b/lib/Kconfig
index 991c98bc4a3f..04270aae4b60 100644
--- a/lib/Kconfig
+++ b/lib/Kconfig
@@ -322,6 +322,20 @@ config TEXTSEARCH_FSM
 config BTREE
 	boolean
 
+config INTERVAL_TREE
+	boolean
+	help
+	  Simple, embeddable, interval-tree. Can find the start of an
+	  overlapping range in log(n) time and then iterate over all
+	  overlapping nodes. The algorithm is implemented as an
+	  augmented rbtree.
+
+	  See:
+
+		Documentation/rbtree.txt
+
+	  for more information.
+
 config ASSOCIATIVE_ARRAY
 	bool
 	help
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index a48abeac753f..135a6a0588c9 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -1487,6 +1487,7 @@ config RBTREE_TEST
 config INTERVAL_TREE_TEST
 	tristate "Interval tree test"
 	depends on m && DEBUG_KERNEL
+	select INTERVAL_TREE
 	help
 	  A benchmark measuring the performance of the interval tree library
 
diff --git a/lib/Makefile b/lib/Makefile
index 48140e3ba73f..0969494f20d1 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -50,6 +50,7 @@ CFLAGS_hweight.o = $(subst $(quote),,$(CONFIG_ARCH_HWEIGHT_CFLAGS))
 obj-$(CONFIG_GENERIC_HWEIGHT) += hweight.o
 
 obj-$(CONFIG_BTREE) += btree.o
+obj-$(CONFIG_INTERVAL_TREE) += interval_tree.o
 obj-$(CONFIG_ASSOCIATIVE_ARRAY) += assoc_array.o
 obj-$(CONFIG_DEBUG_PREEMPT) += smp_processor_id.o
 obj-$(CONFIG_DEBUG_LIST) += list_debug.o
@@ -155,8 +156,6 @@ lib-$(CONFIG_LIBFDT) += $(libfdt_files)
 obj-$(CONFIG_RBTREE_TEST) += rbtree_test.o
 obj-$(CONFIG_INTERVAL_TREE_TEST) += interval_tree_test.o
 
-interval_tree_test-objs := interval_tree_test_main.o interval_tree.o
-
 obj-$(CONFIG_PERCPU_TEST) += percpu_test.o
 
 obj-$(CONFIG_ASN1) += asn1_decoder.o
diff --git a/lib/interval_tree.c b/lib/interval_tree.c
index e6eb406f2d65..e4109f624e51 100644
--- a/lib/interval_tree.c
+++ b/lib/interval_tree.c
@@ -1,6 +1,7 @@
 #include <linux/init.h>
 #include <linux/interval_tree.h>
 #include <linux/interval_tree_generic.h>
+#include <linux/module.h>
 
 #define START(node) ((node)->start)
 #define LAST(node)  ((node)->last)
@@ -8,3 +9,8 @@
 INTERVAL_TREE_DEFINE(struct interval_tree_node, rb,
 		     unsigned long, __subtree_last,
 		     START, LAST,, interval_tree)
+
+EXPORT_SYMBOL(interval_tree_insert);
+EXPORT_SYMBOL(interval_tree_remove);
+EXPORT_SYMBOL(interval_tree_iter_first);
+EXPORT_SYMBOL(interval_tree_iter_next);
diff --git a/lib/interval_tree_test.c b/lib/interval_tree_test.c
new file mode 100644
index 000000000000..245900b98c8e
--- /dev/null
+++ b/lib/interval_tree_test.c
@@ -0,0 +1,106 @@
+#include <linux/module.h>
+#include <linux/interval_tree.h>
+#include <linux/random.h>
+#include <asm/timex.h>
+
+#define NODES        100
+#define PERF_LOOPS   100000
+#define SEARCHES     100
+#define SEARCH_LOOPS 10000
+
+static struct rb_root root = RB_ROOT;
+static struct interval_tree_node nodes[NODES];
+static u32 queries[SEARCHES];
+
+static struct rnd_state rnd;
+
+static inline unsigned long
+search(unsigned long query, struct rb_root *root)
+{
+	struct interval_tree_node *node;
+	unsigned long results = 0;
+
+	for (node = interval_tree_iter_first(root, query, query); node;
+	     node = interval_tree_iter_next(node, query, query))
+		results++;
+	return results;
+}
+
+static void init(void)
+{
+	int i;
+	for (i = 0; i < NODES; i++) {
+		u32 a = prandom_u32_state(&rnd);
+		u32 b = prandom_u32_state(&rnd);
+		if (a <= b) {
+			nodes[i].start = a;
+			nodes[i].last = b;
+		} else {
+			nodes[i].start = b;
+			nodes[i].last = a;
+		}
+	}
+	for (i = 0; i < SEARCHES; i++)
+		queries[i] = prandom_u32_state(&rnd);
+}
+
+static int interval_tree_test_init(void)
+{
+	int i, j;
+	unsigned long results;
+	cycles_t time1, time2, time;
+
+	printk(KERN_ALERT "interval tree insert/remove");
+
+	prandom_seed_state(&rnd, 3141592653589793238ULL);
+	init();
+
+	time1 = get_cycles();
+
+	for (i = 0; i < PERF_LOOPS; i++) {
+		for (j = 0; j < NODES; j++)
+			interval_tree_insert(nodes + j, &root);
+		for (j = 0; j < NODES; j++)
+			interval_tree_remove(nodes + j, &root);
+	}
+
+	time2 = get_cycles();
+	time = time2 - time1;
+
+	time = div_u64(time, PERF_LOOPS);
+	printk(" -> %llu cycles\n", (unsigned long long)time);
+
+	printk(KERN_ALERT "interval tree search");
+
+	for (j = 0; j < NODES; j++)
+		interval_tree_insert(nodes + j, &root);
+
+	time1 = get_cycles();
+
+	results = 0;
+	for (i = 0; i < SEARCH_LOOPS; i++)
+		for (j = 0; j < SEARCHES; j++)
+			results += search(queries[j], &root);
+
+	time2 = get_cycles();
+	time = time2 - time1;
+
+	time = div_u64(time, SEARCH_LOOPS);
+	results = div_u64(results, SEARCH_LOOPS);
+	printk(" -> %llu cycles (%lu results)\n",
+	       (unsigned long long)time, results);
+
+	return -EAGAIN; /* Fail will directly unload the module */
+}
+
+static void interval_tree_test_exit(void)
+{
+	printk(KERN_ALERT "test exit\n");
+}
+
+module_init(interval_tree_test_init)
+module_exit(interval_tree_test_exit)
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Michel Lespinasse");
+MODULE_DESCRIPTION("Interval Tree test");
diff --git a/lib/interval_tree_test_main.c b/lib/interval_tree_test_main.c
deleted file mode 100644
index 245900b98c8e..000000000000
--- a/lib/interval_tree_test_main.c
+++ /dev/null
@@ -1,106 +0,0 @@
-#include <linux/module.h>
-#include <linux/interval_tree.h>
-#include <linux/random.h>
-#include <asm/timex.h>
-
-#define NODES        100
-#define PERF_LOOPS   100000
-#define SEARCHES     100
-#define SEARCH_LOOPS 10000
-
-static struct rb_root root = RB_ROOT;
-static struct interval_tree_node nodes[NODES];
-static u32 queries[SEARCHES];
-
-static struct rnd_state rnd;
-
-static inline unsigned long
-search(unsigned long query, struct rb_root *root)
-{
-	struct interval_tree_node *node;
-	unsigned long results = 0;
-
-	for (node = interval_tree_iter_first(root, query, query); node;
-	     node = interval_tree_iter_next(node, query, query))
-		results++;
-	return results;
-}
-
-static void init(void)
-{
-	int i;
-	for (i = 0; i < NODES; i++) {
-		u32 a = prandom_u32_state(&rnd);
-		u32 b = prandom_u32_state(&rnd);
-		if (a <= b) {
-			nodes[i].start = a;
-			nodes[i].last = b;
-		} else {
-			nodes[i].start = b;
-			nodes[i].last = a;
-		}
-	}
-	for (i = 0; i < SEARCHES; i++)
-		queries[i] = prandom_u32_state(&rnd);
-}
-
-static int interval_tree_test_init(void)
-{
-	int i, j;
-	unsigned long results;
-	cycles_t time1, time2, time;
-
-	printk(KERN_ALERT "interval tree insert/remove");
-
-	prandom_seed_state(&rnd, 3141592653589793238ULL);
-	init();
-
-	time1 = get_cycles();
-
-	for (i = 0; i < PERF_LOOPS; i++) {
-		for (j = 0; j < NODES; j++)
-			interval_tree_insert(nodes + j, &root);
-		for (j = 0; j < NODES; j++)
-			interval_tree_remove(nodes + j, &root);
-	}
-
-	time2 = get_cycles();
-	time = time2 - time1;
-
-	time = div_u64(time, PERF_LOOPS);
-	printk(" -> %llu cycles\n", (unsigned long long)time);
-
-	printk(KERN_ALERT "interval tree search");
-
-	for (j = 0; j < NODES; j++)
-		interval_tree_insert(nodes + j, &root);
-
-	time1 = get_cycles();
-
-	results = 0;
-	for (i = 0; i < SEARCH_LOOPS; i++)
-		for (j = 0; j < SEARCHES; j++)
-			results += search(queries[j], &root);
-
-	time2 = get_cycles();
-	time = time2 - time1;
-
-	time = div_u64(time, SEARCH_LOOPS);
-	results = div_u64(results, SEARCH_LOOPS);
-	printk(" -> %llu cycles (%lu results)\n",
-	       (unsigned long long)time, results);
-
-	return -EAGAIN; /* Fail will directly unload the module */
-}
-
-static void interval_tree_test_exit(void)
-{
-	printk(KERN_ALERT "test exit\n");
-}
-
-module_init(interval_tree_test_init)
-module_exit(interval_tree_test_exit)
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Michel Lespinasse");
-MODULE_DESCRIPTION("Interval Tree test");
-- 
1.9.0

^ permalink raw reply related	[flat|nested] 17+ messages in thread

* [PATCH 2/3] drm/i915: Do not call retire_requests from wait_for_rendering
  2014-03-17 12:21 [PATCH 1/3] lib: Export interval_tree Chris Wilson
@ 2014-03-17 12:21 ` Chris Wilson
  2014-03-28 22:58   ` Volkin, Bradley D
  2014-03-17 12:21 ` [PATCH 3/3] drm/i915: Introduce mapping of user pages into video memory (userptr) ioctl Chris Wilson
  2014-03-17 12:31 ` [PATCH 1/3] lib: Export interval_tree David Woodhouse
  2 siblings, 1 reply; 17+ messages in thread
From: Chris Wilson @ 2014-03-17 12:21 UTC (permalink / raw)
  To: intel-gfx

A common issue we have is that retiring requests causes recursion
through GTT manipulation or page table manipulation which we can only
handle at very specific points. However, to maintain internal
consistency (enforced through our sanity checks on write_domain at
various points in the GEM object lifecycle) we do need to retire the
object prior to marking it with a new write_domain, and also clear the
write_domain for the implicit flush following a batch.

Note that this then allows the unbound objects to still be on the active
lists, and so care must be taken when removing objects from unbound lists
(similar to the caveats we face processing the bound lists).

v2: Fix i915_gem_shrink_all() to handle updated object lifetime rules,
by refactoring it to call into __i915_gem_shrink().

v3: Missed an object-retire prior to changing cache domains in
i915_gem_object_set_cache_leve()

v4: Rebase

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Tested-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
---
 drivers/gpu/drm/i915/i915_gem.c            | 112 ++++++++++++++++-------------
 drivers/gpu/drm/i915/i915_gem_execbuffer.c |   3 +
 2 files changed, 66 insertions(+), 49 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 58704ce62e3e..5cf4d80de867 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -44,6 +44,9 @@ static void i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *o
 static __must_check int
 i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj,
 			       bool readonly);
+static void
+i915_gem_object_retire(struct drm_i915_gem_object *obj);
+
 static int i915_gem_phys_pwrite(struct drm_device *dev,
 				struct drm_i915_gem_object *obj,
 				struct drm_i915_gem_pwrite *args,
@@ -502,6 +505,8 @@ int i915_gem_obj_prepare_shmem_read(struct drm_i915_gem_object *obj,
 		ret = i915_gem_object_wait_rendering(obj, true);
 		if (ret)
 			return ret;
+
+		i915_gem_object_retire(obj);
 	}
 
 	ret = i915_gem_object_get_pages(obj);
@@ -917,6 +922,8 @@ i915_gem_shmem_pwrite(struct drm_device *dev,
 		ret = i915_gem_object_wait_rendering(obj, false);
 		if (ret)
 			return ret;
+
+		i915_gem_object_retire(obj);
 	}
 	/* Same trick applies to invalidate partially written cachelines read
 	 * before writing. */
@@ -1304,7 +1311,8 @@ static int
 i915_gem_object_wait_rendering__tail(struct drm_i915_gem_object *obj,
 				     struct intel_ring_buffer *ring)
 {
-	i915_gem_retire_requests_ring(ring);
+	if (!obj->active)
+		return 0;
 
 	/* Manually manage the write flush as we may have not yet
 	 * retired the buffer.
@@ -1314,7 +1322,6 @@ i915_gem_object_wait_rendering__tail(struct drm_i915_gem_object *obj,
 	 * we know we have passed the last write.
 	 */
 	obj->last_write_seqno = 0;
-	obj->base.write_domain &= ~I915_GEM_GPU_DOMAINS;
 
 	return 0;
 }
@@ -1949,58 +1956,58 @@ static unsigned long
 __i915_gem_shrink(struct drm_i915_private *dev_priv, long target,
 		  bool purgeable_only)
 {
-	struct list_head still_bound_list;
-	struct drm_i915_gem_object *obj, *next;
+	struct list_head still_in_list;
+	struct drm_i915_gem_object *obj;
 	unsigned long count = 0;
 
-	list_for_each_entry_safe(obj, next,
-				 &dev_priv->mm.unbound_list,
-				 global_list) {
-		if ((i915_gem_object_is_purgeable(obj) || !purgeable_only) &&
-		    i915_gem_object_put_pages(obj) == 0) {
-			count += obj->base.size >> PAGE_SHIFT;
-			if (count >= target)
-				return count;
-		}
-	}
-
 	/*
-	 * As we may completely rewrite the bound list whilst unbinding
+	 * As we may completely rewrite the (un)bound list whilst unbinding
 	 * (due to retiring requests) we have to strictly process only
 	 * one element of the list at the time, and recheck the list
 	 * on every iteration.
+	 *
+	 * In particular, we must hold a reference whilst removing the
+	 * object as we may end up waiting for and/or retiring the objects.
+	 * This might release the final reference (held by the active list)
+	 * and result in the object being freed from under us. This is
+	 * similar to the precautions the eviction code must take whilst
+	 * removing objects.
+	 *
+	 * Also note that although these lists do not hold a reference to
+	 * the object we can safely grab one here: The final object
+	 * unreferencing and the bound_list are both protected by the
+	 * dev->struct_mutex and so we won't ever be able to observe an
+	 * object on the bound_list with a reference count equals 0.
 	 */
-	INIT_LIST_HEAD(&still_bound_list);
+	INIT_LIST_HEAD(&still_in_list);
+	while (count < target && !list_empty(&dev_priv->mm.unbound_list)) {
+		obj = list_first_entry(&dev_priv->mm.unbound_list,
+				       typeof(*obj), global_list);
+		list_move_tail(&obj->global_list, &still_in_list);
+
+		if (!i915_gem_object_is_purgeable(obj) && purgeable_only)
+			continue;
+
+		drm_gem_object_reference(&obj->base);
+
+		if (i915_gem_object_put_pages(obj) == 0)
+			count += obj->base.size >> PAGE_SHIFT;
+
+		drm_gem_object_unreference(&obj->base);
+	}
+	list_splice(&still_in_list, &dev_priv->mm.unbound_list);
+
+	INIT_LIST_HEAD(&still_in_list);
 	while (count < target && !list_empty(&dev_priv->mm.bound_list)) {
 		struct i915_vma *vma, *v;
 
 		obj = list_first_entry(&dev_priv->mm.bound_list,
 				       typeof(*obj), global_list);
-		list_move_tail(&obj->global_list, &still_bound_list);
+		list_move_tail(&obj->global_list, &still_in_list);
 
 		if (!i915_gem_object_is_purgeable(obj) && purgeable_only)
 			continue;
 
-		/*
-		 * Hold a reference whilst we unbind this object, as we may
-		 * end up waiting for and retiring requests. This might
-		 * release the final reference (held by the active list)
-		 * and result in the object being freed from under us.
-		 * in this object being freed.
-		 *
-		 * Note 1: Shrinking the bound list is special since only active
-		 * (and hence bound objects) can contain such limbo objects, so
-		 * we don't need special tricks for shrinking the unbound list.
-		 * The only other place where we have to be careful with active
-		 * objects suddenly disappearing due to retiring requests is the
-		 * eviction code.
-		 *
-		 * Note 2: Even though the bound list doesn't hold a reference
-		 * to the object we can safely grab one here: The final object
-		 * unreferencing and the bound_list are both protected by the
-		 * dev->struct_mutex and so we won't ever be able to observe an
-		 * object on the bound_list with a reference count equals 0.
-		 */
 		drm_gem_object_reference(&obj->base);
 
 		list_for_each_entry_safe(vma, v, &obj->vma_list, vma_link)
@@ -2012,7 +2019,7 @@ __i915_gem_shrink(struct drm_i915_private *dev_priv, long target,
 
 		drm_gem_object_unreference(&obj->base);
 	}
-	list_splice(&still_bound_list, &dev_priv->mm.bound_list);
+	list_splice(&still_in_list, &dev_priv->mm.bound_list);
 
 	return count;
 }
@@ -2026,17 +2033,8 @@ i915_gem_purge(struct drm_i915_private *dev_priv, long target)
 static unsigned long
 i915_gem_shrink_all(struct drm_i915_private *dev_priv)
 {
-	struct drm_i915_gem_object *obj, *next;
-	long freed = 0;
-
 	i915_gem_evict_everything(dev_priv->dev);
-
-	list_for_each_entry_safe(obj, next, &dev_priv->mm.unbound_list,
-				 global_list) {
-		if (i915_gem_object_put_pages(obj) == 0)
-			freed += obj->base.size >> PAGE_SHIFT;
-	}
-	return freed;
+	return __i915_gem_shrink(dev_priv, LONG_MAX, false);
 }
 
 static int
@@ -2265,6 +2263,19 @@ i915_gem_object_move_to_inactive(struct drm_i915_gem_object *obj)
 	WARN_ON(i915_verify_lists(dev));
 }
 
+static void
+i915_gem_object_retire(struct drm_i915_gem_object *obj)
+{
+	struct intel_ring_buffer *ring = obj->ring;
+
+	if (ring == NULL)
+		return;
+
+	if (i915_seqno_passed(ring->get_seqno(ring, true),
+			      obj->last_read_seqno))
+		i915_gem_object_move_to_inactive(obj);
+}
+
 static int
 i915_gem_init_seqno(struct drm_device *dev, u32 seqno)
 {
@@ -3618,6 +3629,7 @@ i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write)
 	if (ret)
 		return ret;
 
+	i915_gem_object_retire(obj);
 	i915_gem_object_flush_cpu_write_domain(obj, false);
 
 	/* Serialise direct access to this object with the barriers for
@@ -3716,6 +3728,7 @@ int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
 		 * in obj->write_domain and have been skipping the clflushes.
 		 * Just set it to the CPU cache for now.
 		 */
+		i915_gem_object_retire(obj);
 		WARN_ON(obj->base.write_domain & ~I915_GEM_DOMAIN_CPU);
 
 		old_read_domains = obj->base.read_domains;
@@ -3938,6 +3951,7 @@ i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write)
 	if (ret)
 		return ret;
 
+	i915_gem_object_retire(obj);
 	i915_gem_object_flush_gtt_write_domain(obj);
 
 	old_write_domain = obj->base.write_domain;
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 3851a1b1dc88..6ec5d1d5c625 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -955,6 +955,9 @@ i915_gem_execbuffer_move_to_active(struct list_head *vmas,
 			if (i915_gem_obj_ggtt_bound(obj) &&
 			    i915_gem_obj_to_ggtt(obj)->pin_count)
 				intel_mark_fb_busy(obj, ring);
+
+			/* update for the implicit flush after a batch */
+			obj->base.write_domain &= ~I915_GEM_GPU_DOMAINS;
 		}
 
 		trace_i915_gem_object_change_domain(obj, old_read, old_write);
-- 
1.9.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 17+ messages in thread

* [PATCH 3/3] drm/i915: Introduce mapping of user pages into video memory (userptr) ioctl
  2014-03-17 12:21 [PATCH 1/3] lib: Export interval_tree Chris Wilson
  2014-03-17 12:21 ` [PATCH 2/3] drm/i915: Do not call retire_requests from wait_for_rendering Chris Wilson
@ 2014-03-17 12:21 ` Chris Wilson
  2014-04-17 22:18   ` Volkin, Bradley D
  2014-03-17 12:31 ` [PATCH 1/3] lib: Export interval_tree David Woodhouse
  2 siblings, 1 reply; 17+ messages in thread
From: Chris Wilson @ 2014-03-17 12:21 UTC (permalink / raw)
  To: intel-gfx; +Cc: Akash Goel

By exporting the ability to map user address and inserting PTEs
representing their backing pages into the GTT, we can exploit UMA in order
to utilize normal application data as a texture source or even as a
render target (depending upon the capabilities of the chipset). This has
a number of uses, with zero-copy downloads to the GPU and efficient
readback making the intermixed streaming of CPU and GPU operations
fairly efficient. This ability has many widespread implications from
faster rendering of client-side software rasterisers (chromium),
mitigation of stalls due to read back (firefox) and to faster pipelining
of texture data (such as pixel buffer objects in GL or data blobs in CL).

v2: Compile with CONFIG_MMU_NOTIFIER
v3: We can sleep while performing invalidate-range, which we can utilise
to drop our page references prior to the kernel manipulating the vma
(for either discard or cloning) and so protect normal users.
v4: Only run the invalidate notifier if the range intercepts the bo.
v5: Prevent userspace from attempting to GTT mmap non-page aligned buffers
v6: Recheck after reacquire mutex for lost mmu.
v7: Fix implicit padding of ioctl struct by rounding to next 64bit boundary.
v8: Fix rebasing error after forwarding porting the back port.
v9: Limit the userptr to page aligned entries. We now expect userspace
    to handle all the offset-in-page adjustments itself.
v10: Prevent vma from being copied across fork to avoid issues with cow.
v11: Drop vma behaviour changes -- locking is nigh on impossible.
     Use a worker to load user pages to avoid lock inversions.
v12: Use get_task_mm()/mmput() for correct refcounting of mm.
v13: Use a worker to release the mmu_notifier to avoid lock inversion
v14: Decouple mmu_notifier from struct_mutex using a custom mmu_notifer
     with its own locking and tree of objects for each mm/mmu_notifier.
v15: Prevent overlapping userptr objects, and invalidate all objects
     within the mmu_notifier range
v16: Fix a typo for iterating over multiple objects in the range and
     rearrange error path to destroy the mmu_notifier locklessly.
     Also close a race between invalidate_range and the get_pages_worker.
v17: Close a race between get_pages_worker/invalidate_range and fresh
     allocations of the same userptr range - and notice that
     struct_mutex was presumed to be held when during creation it wasn't.
v18: Sigh. Fix the refactor of st_set_pages() to allocate enough memory
     for the struct sg_table and to clear it before reporting an error.
v19: Always error out on read-only userptr requests as we don't have the
     hardware infrastructure to support them at the moment.
v20: Refuse to implement read-only support until we have the required
     infrastructure - but reserve the bit in flags for future use.
v21: use_mm() is not required for get_user_pages(). It is only meant to
     be used to fix up the kernel thread's current->mm for use with
     copy_user().

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com>
Cc: "Gong, Zhipeng" <zhipeng.gong@intel.com>
Cc: Akash Goel <akash.goel@intel.com>
Cc: "Volkin, Bradley D" <bradley.d.volkin@intel.com>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com>
---
 drivers/gpu/drm/i915/Kconfig            |   1 +
 drivers/gpu/drm/i915/Makefile           |   1 +
 drivers/gpu/drm/i915/i915_dma.c         |   1 +
 drivers/gpu/drm/i915/i915_drv.h         |  24 +-
 drivers/gpu/drm/i915/i915_gem.c         |   4 +
 drivers/gpu/drm/i915/i915_gem_dmabuf.c  |   5 +
 drivers/gpu/drm/i915/i915_gem_userptr.c | 679 ++++++++++++++++++++++++++++++++
 drivers/gpu/drm/i915/i915_gpu_error.c   |   2 +
 include/uapi/drm/i915_drm.h             |  16 +
 9 files changed, 732 insertions(+), 1 deletion(-)
 create mode 100644 drivers/gpu/drm/i915/i915_gem_userptr.c

diff --git a/drivers/gpu/drm/i915/Kconfig b/drivers/gpu/drm/i915/Kconfig
index 73ed59eff139..9940baee10c2 100644
--- a/drivers/gpu/drm/i915/Kconfig
+++ b/drivers/gpu/drm/i915/Kconfig
@@ -5,6 +5,7 @@ config DRM_I915
 	depends on (AGP || AGP=n)
 	select INTEL_GTT
 	select AGP_INTEL if AGP
+	select INTERVAL_TREE
 	# we need shmfs for the swappable backing store, and in particular
 	# the shmem_readpage() which depends upon tmpfs
 	select SHMEM
diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
index 2e02137a0d6e..1856494439e9 100644
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -27,6 +27,7 @@ i915-y += i915_cmd_parser.o \
 	  i915_gem.o \
 	  i915_gem_stolen.o \
 	  i915_gem_tiling.o \
+	  i915_gem_userptr.o \
 	  i915_gpu_error.o \
 	  i915_irq.o \
 	  i915_trace_points.o \
diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
index a98d66fb0302..4f7b93e2cb4f 100644
--- a/drivers/gpu/drm/i915/i915_dma.c
+++ b/drivers/gpu/drm/i915/i915_dma.c
@@ -1979,6 +1979,7 @@ const struct drm_ioctl_desc i915_ioctls[] = {
 	DRM_IOCTL_DEF_DRV(I915_REG_READ, i915_reg_read_ioctl, DRM_UNLOCKED|DRM_RENDER_ALLOW),
 	DRM_IOCTL_DEF_DRV(I915_GET_RESET_STATS, i915_get_reset_stats_ioctl, DRM_UNLOCKED|DRM_RENDER_ALLOW),
 	DRM_IOCTL_DEF_DRV(I915_GEM_CREATE2, i915_gem_create2_ioctl, DRM_UNLOCKED|DRM_RENDER_ALLOW),
+	DRM_IOCTL_DEF_DRV(I915_GEM_USERPTR, i915_gem_userptr_ioctl, DRM_UNLOCKED|DRM_RENDER_ALLOW),
 };
 
 int i915_max_ioctl = DRM_ARRAY_SIZE(i915_ioctls);
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 2923cb364ae0..a8940c81eca5 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -40,6 +40,7 @@
 #include <linux/i2c-algo-bit.h>
 #include <drm/intel-gtt.h>
 #include <linux/backlight.h>
+#include <linux/hashtable.h>
 #include <linux/intel-iommu.h>
 #include <linux/kref.h>
 #include <linux/pm_qos.h>
@@ -171,6 +172,7 @@ enum hpd_pin {
 		if ((intel_connector)->base.encoder == (__encoder))
 
 struct drm_i915_private;
+struct i915_mmu_object;
 
 enum intel_dpll_id {
 	DPLL_ID_PRIVATE = -1, /* non-shared dpll in use */
@@ -397,6 +399,7 @@ struct drm_i915_error_state {
 		u32 tiling:2;
 		u32 dirty:1;
 		u32 purgeable:1;
+		u32 userptr:1;
 		s32 ring:4;
 		u32 cache_level:3;
 	} **active_bo, **pinned_bo;
@@ -1546,6 +1549,9 @@ typedef struct drm_i915_private {
 	struct i915_gtt gtt; /* VMA representing the global address space */
 
 	struct i915_gem_mm mm;
+#if defined(CONFIG_MMU_NOTIFIER)
+	DECLARE_HASHTABLE(mmu_notifiers, 7);
+#endif
 
 	/* Kernel Modesetting */
 
@@ -1678,6 +1684,7 @@ struct drm_i915_gem_object_ops {
 	 */
 	int (*get_pages)(struct drm_i915_gem_object *);
 	void (*put_pages)(struct drm_i915_gem_object *);
+	void (*release)(struct drm_i915_gem_object *);
 };
 
 struct drm_i915_gem_object {
@@ -1791,8 +1798,20 @@ struct drm_i915_gem_object {
 
 	/** for phy allocated objects */
 	struct drm_i915_gem_phys_object *phys_obj;
-};
 
+	union {
+		struct i915_gem_userptr {
+			uintptr_t ptr;
+			unsigned read_only :1;
+			unsigned active :4;
+#define I915_GEM_USERPTR_MAX_ACTIVE 15
+
+			struct mm_struct *mm;
+			struct i915_mmu_object *mn;
+			struct work_struct *work;
+		} userptr;
+	};
+};
 #define to_intel_bo(x) container_of(x, struct drm_i915_gem_object, base)
 
 /**
@@ -2207,6 +2226,9 @@ int i915_gem_set_tiling_ioctl(struct drm_device *dev, void *data,
 			      struct drm_file *file_priv);
 int i915_gem_get_tiling_ioctl(struct drm_device *dev, void *data,
 			      struct drm_file *file_priv);
+int i915_gem_init_userptr(struct drm_device *dev);
+int i915_gem_userptr_ioctl(struct drm_device *dev, void *data,
+			   struct drm_file *file);
 int i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data,
 				struct drm_file *file_priv);
 int i915_gem_wait_ioctl(struct drm_device *dev, void *data,
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 5cf4d80de867..4869f0dce37b 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -4408,6 +4408,9 @@ void i915_gem_free_object(struct drm_gem_object *gem_obj)
 	if (obj->base.import_attach)
 		drm_prime_gem_destroy(&obj->base, NULL);
 
+	if (obj->ops->release)
+		obj->ops->release(obj);
+
 	drm_gem_object_release(&obj->base);
 	i915_gem_info_remove_obj(dev_priv, obj->base.size);
 
@@ -4672,6 +4675,7 @@ int i915_gem_init(struct drm_device *dev)
 			DRM_DEBUG_DRIVER("allow wake ack timed out\n");
 	}
 
+	i915_gem_init_userptr(dev);
 	i915_gem_init_global_gtt(dev);
 
 	ret = i915_gem_context_init(dev);
diff --git a/drivers/gpu/drm/i915/i915_gem_dmabuf.c b/drivers/gpu/drm/i915/i915_gem_dmabuf.c
index 9bb533e0d762..ac295ec8ee2e 100644
--- a/drivers/gpu/drm/i915/i915_gem_dmabuf.c
+++ b/drivers/gpu/drm/i915/i915_gem_dmabuf.c
@@ -233,6 +233,11 @@ static const struct dma_buf_ops i915_dmabuf_ops =  {
 struct dma_buf *i915_gem_prime_export(struct drm_device *dev,
 				      struct drm_gem_object *gem_obj, int flags)
 {
+	struct drm_i915_gem_object *obj = to_intel_bo(gem_obj);
+
+	if (obj->userptr.mm && obj->userptr.mn == NULL)
+		return ERR_PTR(-EINVAL);
+
 	return dma_buf_export(gem_obj, &i915_dmabuf_ops, gem_obj->size, flags);
 }
 
diff --git a/drivers/gpu/drm/i915/i915_gem_userptr.c b/drivers/gpu/drm/i915/i915_gem_userptr.c
new file mode 100644
index 000000000000..632028332d85
--- /dev/null
+++ b/drivers/gpu/drm/i915/i915_gem_userptr.c
@@ -0,0 +1,679 @@
+/*
+ * Copyright © 2012-2014 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ */
+
+#include "drmP.h"
+#include "i915_drm.h"
+#include "i915_drv.h"
+#include "i915_trace.h"
+#include "intel_drv.h"
+#include <linux/mmu_context.h>
+#include <linux/mmu_notifier.h>
+#include <linux/mempolicy.h>
+#include <linux/swap.h>
+
+#if defined(CONFIG_MMU_NOTIFIER)
+#include <linux/interval_tree.h>
+
+struct i915_mmu_notifier {
+	spinlock_t lock;
+	struct hlist_node node;
+	struct mmu_notifier mn;
+	struct rb_root objects;
+	struct drm_device *dev;
+	struct mm_struct *mm;
+	struct work_struct work;
+	unsigned long count;
+	unsigned long serial;
+};
+
+struct i915_mmu_object {
+	struct i915_mmu_notifier *mmu;
+	struct interval_tree_node it;
+	struct drm_i915_gem_object *obj;
+};
+
+static void i915_gem_userptr_mn_invalidate_range_start(struct mmu_notifier *_mn,
+						       struct mm_struct *mm,
+						       unsigned long start,
+						       unsigned long end)
+{
+	struct i915_mmu_notifier *mn = container_of(_mn, struct i915_mmu_notifier, mn);
+	struct interval_tree_node *it = NULL;
+	unsigned long serial = 0;
+
+	end--; /* interval ranges are inclusive, but invalidate range is exclusive */
+	while (start < end) {
+		struct drm_i915_gem_object *obj;
+
+		obj = NULL;
+		spin_lock(&mn->lock);
+		if (serial == mn->serial)
+			it = interval_tree_iter_next(it, start, end);
+		else
+			it = interval_tree_iter_first(&mn->objects, start, end);
+		if (it != NULL) {
+			obj = container_of(it, struct i915_mmu_object, it)->obj;
+			drm_gem_object_reference(&obj->base);
+			serial = mn->serial;
+		}
+		spin_unlock(&mn->lock);
+		if (obj == NULL)
+			return;
+
+		mutex_lock(&mn->dev->struct_mutex);
+		/* Cancel any active worker and force us to re-evaluate gup */
+		obj->userptr.work = NULL;
+
+		if (obj->pages != NULL) {
+			struct drm_i915_private *dev_priv = to_i915(mn->dev);
+			struct i915_vma *vma, *tmp;
+			bool was_interruptible;
+
+			was_interruptible = dev_priv->mm.interruptible;
+			dev_priv->mm.interruptible = false;
+
+			list_for_each_entry_safe(vma, tmp, &obj->vma_list, vma_link) {
+				int ret = i915_vma_unbind(vma);
+				WARN_ON(ret && ret != -EIO);
+			}
+			WARN_ON(i915_gem_object_put_pages(obj));
+
+			dev_priv->mm.interruptible = was_interruptible;
+		}
+
+		start = obj->userptr.ptr + obj->base.size;
+
+		drm_gem_object_unreference(&obj->base);
+		mutex_unlock(&mn->dev->struct_mutex);
+	}
+}
+
+static const struct mmu_notifier_ops i915_gem_userptr_notifier = {
+	.invalidate_range_start = i915_gem_userptr_mn_invalidate_range_start,
+};
+
+static struct i915_mmu_notifier *
+__i915_mmu_notifier_lookup(struct drm_device *dev, struct mm_struct *mm)
+{
+	struct drm_i915_private *dev_priv = to_i915(dev);
+	struct i915_mmu_notifier *mmu;
+
+	/* Protected by dev->struct_mutex */
+	hash_for_each_possible(dev_priv->mmu_notifiers, mmu, node, (unsigned long)mm)
+		if (mmu->mm == mm)
+			return mmu;
+
+	return NULL;
+}
+
+static struct i915_mmu_notifier *
+i915_mmu_notifier_get(struct drm_device *dev, struct mm_struct *mm)
+{
+	struct drm_i915_private *dev_priv = to_i915(dev);
+	struct i915_mmu_notifier *mmu;
+	int ret;
+
+	lockdep_assert_held(&dev->struct_mutex);
+
+	mmu = __i915_mmu_notifier_lookup(dev, mm);
+	if (mmu)
+		return mmu;
+
+	mmu = kmalloc(sizeof(*mmu), GFP_KERNEL);
+	if (mmu == NULL)
+		return ERR_PTR(-ENOMEM);
+
+	spin_lock_init(&mmu->lock);
+	mmu->dev = dev;
+	mmu->mn.ops = &i915_gem_userptr_notifier;
+	mmu->mm = mm;
+	mmu->objects = RB_ROOT;
+	mmu->count = 0;
+	mmu->serial = 0;
+
+	/* Protected by mmap_sem (write-lock) */
+	ret = __mmu_notifier_register(&mmu->mn, mm);
+	if (ret) {
+		kfree(mmu);
+		return ERR_PTR(ret);
+	}
+
+	/* Protected by dev->struct_mutex */
+	hash_add(dev_priv->mmu_notifiers, &mmu->node, (unsigned long)mm);
+	return mmu;
+}
+
+static void
+__i915_mmu_notifier_destroy_worker(struct work_struct *work)
+{
+	struct i915_mmu_notifier *mmu = container_of(work, typeof(*mmu), work);
+	mmu_notifier_unregister(&mmu->mn, mmu->mm);
+	kfree(mmu);
+}
+
+static void
+__i915_mmu_notifier_destroy(struct i915_mmu_notifier *mmu)
+{
+	lockdep_assert_held(&mmu->dev->struct_mutex);
+
+	/* Protected by dev->struct_mutex */
+	hash_del(&mmu->node);
+
+	/* Our lock ordering is: mmap_sem, mmu_notifier_scru, struct_mutex.
+	 * We enter the function holding struct_mutex, therefore we need
+	 * to drop our mutex prior to calling mmu_notifier_unregister in
+	 * order to prevent lock inversion (and system-wide deadlock)
+	 * between the mmap_sem and struct-mutex.
+	 */
+	INIT_WORK(&mmu->work, __i915_mmu_notifier_destroy_worker);
+	schedule_work(&mmu->work);
+}
+
+static void __i915_mmu_notifier_update_serial(struct i915_mmu_notifier *mmu)
+{
+	if (++mmu->serial == 0)
+		mmu->serial = 1;
+}
+
+static void
+i915_mmu_notifier_del(struct i915_mmu_notifier *mmu,
+		      struct i915_mmu_object *mn)
+{
+	lockdep_assert_held(&mmu->dev->struct_mutex);
+
+	spin_lock(&mmu->lock);
+	interval_tree_remove(&mn->it, &mmu->objects);
+	__i915_mmu_notifier_update_serial(mmu);
+	spin_unlock(&mmu->lock);
+
+	/* Protected against _add() by dev->struct_mutex */
+	if (--mmu->count == 0)
+		__i915_mmu_notifier_destroy(mmu);
+}
+
+static int
+i915_mmu_notifier_add(struct i915_mmu_notifier *mmu,
+		      struct i915_mmu_object *mn)
+{
+	struct interval_tree_node *it;
+	int ret;
+
+	/* Make sure we drop the final active reference (and thereby
+	 * remove the objects from the interval tree) before we do
+	 * the check for overlapping objects.
+	 */
+	ret = i915_mutex_lock_interruptible(mmu->dev);
+	if (ret)
+		return ret;
+
+	i915_gem_retire_requests(mmu->dev);
+
+	/* Disallow overlapping userptr objects */
+	spin_lock(&mmu->lock);
+	it = interval_tree_iter_first(&mmu->objects,
+				      mn->it.start, mn->it.last);
+	if (it) {
+		struct drm_i915_gem_object *obj;
+
+		/* We only need to check the first object as it either
+		 * is idle (and in use elsewhere) or we try again in order
+		 * to give time for the gup-worker to run and flush its
+		 * object references. Afterwards if we find another
+		 * object that is idle (and so referenced elsewhere)
+		 * we know that the overlap with an pinned object is
+		 * genuine.
+		 */
+		obj = container_of(it, struct i915_mmu_object, it)->obj;
+		ret = obj->userptr.active ? -EAGAIN : -EINVAL;
+	} else {
+		interval_tree_insert(&mn->it, &mmu->objects);
+		__i915_mmu_notifier_update_serial(mmu);
+		ret = 0;
+	}
+	spin_unlock(&mmu->lock);
+	mutex_unlock(&mmu->dev->struct_mutex);
+
+	return ret;
+}
+
+static void
+i915_gem_userptr_release__mmu_notifier(struct drm_i915_gem_object *obj)
+{
+	struct i915_mmu_object *mn;
+
+	mn = obj->userptr.mn;
+	if (mn == NULL)
+		return;
+
+	i915_mmu_notifier_del(mn->mmu, mn);
+	obj->userptr.mn = NULL;
+}
+
+static int
+i915_gem_userptr_init__mmu_notifier(struct drm_i915_gem_object *obj,
+				    unsigned flags)
+{
+	struct i915_mmu_notifier *mmu;
+	struct i915_mmu_object *mn;
+	int ret;
+
+	if (flags & I915_USERPTR_UNSYNCHRONIZED)
+		return capable(CAP_SYS_ADMIN) ? 0 : -EPERM;
+
+	down_write(&obj->userptr.mm->mmap_sem);
+	ret = i915_mutex_lock_interruptible(obj->base.dev);
+	if (ret == 0) {
+		mmu = i915_mmu_notifier_get(obj->base.dev, obj->userptr.mm);
+		if (!IS_ERR(mmu))
+			mmu->count++; /* preemptive add to act as a refcount */
+		else
+			ret = PTR_ERR(mmu);
+		mutex_unlock(&obj->base.dev->struct_mutex);
+	}
+	up_write(&obj->userptr.mm->mmap_sem);
+	if (ret)
+		return ret;
+
+	mn = kzalloc(sizeof(*mn), GFP_KERNEL);
+	if (mn == NULL) {
+		ret = -ENOMEM;
+		goto destroy_mmu;
+	}
+
+	mn->mmu = mmu;
+	mn->it.start = obj->userptr.ptr;
+	mn->it.last = mn->it.start + obj->base.size - 1;
+	mn->obj = obj;
+
+	ret = i915_mmu_notifier_add(mmu, mn);
+	if (ret)
+		goto free_mn;
+
+	obj->userptr.mn = mn;
+	return 0;
+
+free_mn:
+	kfree(mn);
+destroy_mmu:
+	mutex_lock(&obj->base.dev->struct_mutex);
+	if (--mmu->count == 0)
+		__i915_mmu_notifier_destroy(mmu);
+	mutex_unlock(&obj->base.dev->struct_mutex);
+	return ret;
+}
+
+#else
+
+static void
+i915_gem_userptr_release__mmu_notifier(struct drm_i915_gem_object *obj)
+{
+}
+
+static int
+i915_gem_userptr_init__mmu_notifier(struct drm_i915_gem_object *obj,
+				    unsigned flags)
+{
+	if ((flags & I915_USERPTR_UNSYNCHRONIZED) == 0)
+		return -ENODEV;
+
+	if (!capable(CAP_SYS_ADMIN))
+		return -EPERM;
+
+	return 0;
+}
+#endif
+
+struct get_pages_work {
+	struct work_struct work;
+	struct drm_i915_gem_object *obj;
+	struct task_struct *task;
+};
+
+static int
+st_set_pages(struct sg_table **st, struct page **pvec, int num_pages)
+{
+	struct scatterlist *sg;
+	int n;
+
+	*st = kmalloc(sizeof(**st), GFP_KERNEL);
+	if (*st == NULL || sg_alloc_table(*st, num_pages, GFP_KERNEL)) {
+		kfree(*st);
+		*st = NULL;
+		return -ENOMEM;
+	}
+
+	for_each_sg((*st)->sgl, sg, num_pages, n)
+		sg_set_page(sg, pvec[n], PAGE_SIZE, 0);
+
+	return 0;
+}
+
+static void
+__i915_gem_userptr_get_pages_worker(struct work_struct *_work)
+{
+	struct get_pages_work *work = container_of(_work, typeof(*work), work);
+	struct drm_i915_gem_object *obj = work->obj;
+	struct drm_device *dev = obj->base.dev;
+	const int num_pages = obj->base.size >> PAGE_SHIFT;
+	struct page **pvec;
+	int pinned, ret;
+
+	ret = -ENOMEM;
+	pinned = 0;
+
+	pvec = kmalloc(num_pages*sizeof(struct page *),
+		       GFP_TEMPORARY | __GFP_NOWARN | __GFP_NORETRY);
+	if (pvec == NULL)
+		pvec = drm_malloc_ab(num_pages, sizeof(struct page *));
+	if (pvec != NULL) {
+		struct mm_struct *mm = obj->userptr.mm;
+
+		down_read(&mm->mmap_sem);
+		while (pinned < num_pages) {
+			ret = get_user_pages(work->task, mm,
+					     obj->userptr.ptr + pinned * PAGE_SIZE,
+					     num_pages - pinned,
+					     !obj->userptr.read_only, 0,
+					     pvec + pinned, NULL);
+			if (ret < 0)
+				break;
+
+			pinned += ret;
+		}
+		up_read(&mm->mmap_sem);
+	}
+
+	mutex_lock(&dev->struct_mutex);
+	if (obj->userptr.work != &work->work) {
+		ret = 0;
+	} else if (pinned == num_pages) {
+		ret = st_set_pages(&obj->pages, pvec, num_pages);
+		if (ret == 0) {
+			list_add_tail(&obj->global_list, &to_i915(dev)->mm.unbound_list);
+			pinned = 0;
+		}
+	}
+
+	obj->userptr.work = ERR_PTR(ret);
+	obj->userptr.active--;
+	drm_gem_object_unreference(&obj->base);
+	mutex_unlock(&dev->struct_mutex);
+
+	release_pages(pvec, pinned, 0);
+	drm_free_large(pvec);
+
+	put_task_struct(work->task);
+	kfree(work);
+}
+
+static int
+i915_gem_userptr_get_pages(struct drm_i915_gem_object *obj)
+{
+	const int num_pages = obj->base.size >> PAGE_SHIFT;
+	struct page **pvec;
+	int pinned, ret;
+
+	/* If userspace should engineer that these pages are replaced in
+	 * the vma between us binding this page into the GTT and completion
+	 * of rendering... Their loss. If they change the mapping of their
+	 * pages they need to create a new bo to point to the new vma.
+	 *
+	 * However, that still leaves open the possibility of the vma
+	 * being copied upon fork. Which falls under the same userspace
+	 * synchronisation issue as a regular bo, except that this time
+	 * the process may not be expecting that a particular piece of
+	 * memory is tied to the GPU.
+	 *
+	 * Fortunately, we can hook into the mmu_notifier in order to
+	 * discard the page references prior to anything nasty happening
+	 * to the vma (discard or cloning) which should prevent the more
+	 * egregious cases from causing harm.
+	 */
+
+	pvec = NULL;
+	pinned = 0;
+	if (obj->userptr.mm == current->mm) {
+		pvec = kmalloc(num_pages*sizeof(struct page *),
+			       GFP_TEMPORARY | __GFP_NOWARN | __GFP_NORETRY);
+		if (pvec == NULL) {
+			pvec = drm_malloc_ab(num_pages, sizeof(struct page *));
+			if (pvec == NULL)
+				return -ENOMEM;
+		}
+
+		pinned = __get_user_pages_fast(obj->userptr.ptr, num_pages,
+					       !obj->userptr.read_only, pvec);
+	}
+	if (pinned < num_pages) {
+		if (pinned < 0) {
+			ret = pinned;
+			pinned = 0;
+		} else {
+			/* Spawn a worker so that we can acquire the
+			 * user pages without holding our mutex. Access
+			 * to the user pages requires mmap_sem, and we have
+			 * a strict lock ordering of mmap_sem, struct_mutex -
+			 * we already hold struct_mutex here and so cannot
+			 * call gup without encountering a lock inversion.
+			 *
+			 * Userspace will keep on repeating the operation
+			 * (thanks to EAGAIN) until either we hit the fast
+			 * path or the worker completes. If the worker is
+			 * cancelled or superseded, the task is still run
+			 * but the results ignored. (This leads to
+			 * complications that we may have a stray object
+			 * refcount that we need to be wary of when
+			 * checking for existing objects during creation.)
+			 * If the worker encounters an error, it reports
+			 * that error back to this function through
+			 * obj->userptr.work = ERR_PTR.
+			 */
+			ret = -EAGAIN;
+			if (obj->userptr.work == NULL &&
+			    obj->userptr.active < I915_GEM_USERPTR_MAX_ACTIVE) {
+				struct get_pages_work *work;
+
+				work = kmalloc(sizeof(*work), GFP_KERNEL);
+				if (work != NULL) {
+					obj->userptr.work = &work->work;
+					obj->userptr.active++;
+
+					work->obj = obj;
+					drm_gem_object_reference(&obj->base);
+
+					work->task = current;
+					get_task_struct(work->task);
+
+					INIT_WORK(&work->work, __i915_gem_userptr_get_pages_worker);
+					schedule_work(&work->work);
+				} else
+					ret = -ENOMEM;
+			} else {
+				if (IS_ERR(obj->userptr.work)) {
+					ret = PTR_ERR(obj->userptr.work);
+					obj->userptr.work = NULL;
+				}
+			}
+		}
+	} else {
+		ret = st_set_pages(&obj->pages, pvec, num_pages);
+		if (ret == 0) {
+			obj->userptr.work = NULL;
+			pinned = 0;
+		}
+	}
+
+	release_pages(pvec, pinned, 0);
+	drm_free_large(pvec);
+	return ret;
+}
+
+static void
+i915_gem_userptr_put_pages(struct drm_i915_gem_object *obj)
+{
+	struct scatterlist *sg;
+	int i;
+
+	BUG_ON(obj->userptr.work != NULL);
+
+	if (obj->madv != I915_MADV_WILLNEED)
+		obj->dirty = 0;
+
+	for_each_sg(obj->pages->sgl, sg, obj->pages->nents, i) {
+		struct page *page = sg_page(sg);
+
+		if (obj->dirty)
+			set_page_dirty(page);
+
+		mark_page_accessed(page);
+		page_cache_release(page);
+	}
+	obj->dirty = 0;
+
+	sg_free_table(obj->pages);
+	kfree(obj->pages);
+}
+
+static void
+i915_gem_userptr_release(struct drm_i915_gem_object *obj)
+{
+	i915_gem_userptr_release__mmu_notifier(obj);
+
+	if (obj->userptr.mm) {
+		mmput(obj->userptr.mm);
+		obj->userptr.mm = NULL;
+	}
+}
+
+static const struct drm_i915_gem_object_ops i915_gem_userptr_ops = {
+	.get_pages = i915_gem_userptr_get_pages,
+	.put_pages = i915_gem_userptr_put_pages,
+	.release = i915_gem_userptr_release,
+};
+
+/**
+ * Creates a new mm object that wraps some normal memory from the process
+ * context - user memory.
+ *
+ * We impose several restrictions upon the memory being mapped
+ * into the GPU.
+ * 1. It must be page aligned (both start/end addresses, i.e ptr and size).
+ * 2. It cannot overlap any other userptr object in the same address space.
+ * 3. It must be normal system memory, not a pointer into another map of IO
+ *    space (e.g. it must not be a GTT mmapping of another object).
+ * 4. We only allow a bo as large as we could in theory map into the GTT,
+ *    that is we limit the size to the total size of the GTT.
+ * 5. The bo is marked as being snoopable. The backing pages are left
+ *    accessible directly by the CPU, but reads and writes by the GPU may
+ *    incur the cost of a snoop (unless you have an LLC architecture).
+ *
+ * Synchronisation between multiple users and the GPU is left to userspace
+ * through the normal set-domain-ioctl. The kernel will enforce that the
+ * GPU relinquishes the VMA before it is returned back to the system
+ * i.e. upon free(), munmap() or process termination. However, the userspace
+ * malloc() library may not immediately relinquish the VMA after free() and
+ * instead reuse it whilst the GPU is still reading and writing to the VMA.
+ * Caveat emptor.
+ *
+ * Also note, that the object created here is not currently a "first class"
+ * object, in that several ioctls are banned. These are the CPU access
+ * ioctls: mmap(), pwrite and pread. In practice, you are expected to use
+ * direct access via your pointer rather than use those ioctls.
+ *
+ * If you think this is a good interface to use to pass GPU memory between
+ * drivers, please use dma-buf instead. In fact, wherever possible use
+ * dma-buf instead.
+ */
+int
+i915_gem_userptr_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
+{
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	struct drm_i915_gem_userptr *args = data;
+	struct drm_i915_gem_object *obj;
+	int ret;
+	u32 handle;
+
+	if (args->flags & ~(I915_USERPTR_READ_ONLY |
+			    I915_USERPTR_UNSYNCHRONIZED))
+		return -EINVAL;
+
+	if (offset_in_page(args->user_ptr | args->user_size))
+		return -EINVAL;
+
+	if (args->user_size > dev_priv->gtt.base.total)
+		return -E2BIG;
+
+	if (!access_ok(args->flags & I915_USERPTR_READ_ONLY ? VERIFY_READ : VERIFY_WRITE,
+		       (char __user *)(unsigned long)args->user_ptr, args->user_size))
+		return -EFAULT;
+
+	if (args->flags & I915_USERPTR_READ_ONLY) {
+		/* On almost all of the current hw, we cannot tell the GPU that a
+		 * page is readonly, so this is just a placeholder in the uAPI.
+		 */
+		return -ENODEV;
+	}
+
+	/* Allocate the new object */
+	obj = i915_gem_object_alloc(dev);
+	if (obj == NULL)
+		return -ENOMEM;
+
+	drm_gem_private_object_init(dev, &obj->base, args->user_size);
+	i915_gem_object_init(obj, &i915_gem_userptr_ops);
+	obj->cache_level = I915_CACHE_LLC;
+	obj->base.write_domain = I915_GEM_DOMAIN_CPU;
+	obj->base.read_domains = I915_GEM_DOMAIN_CPU;
+
+	obj->userptr.ptr = args->user_ptr;
+	obj->userptr.read_only = !!(args->flags & I915_USERPTR_READ_ONLY);
+
+	/* And keep a pointer to the current->mm for resolving the user pages
+	 * at binding. This means that we need to hook into the mmu_notifier
+	 * in order to detect if the mmu is destroyed.
+	 */
+	ret = -ENOMEM;
+	if ((obj->userptr.mm = get_task_mm(current)))
+		ret = i915_gem_userptr_init__mmu_notifier(obj, args->flags);
+	if (ret == 0)
+		ret = drm_gem_handle_create(file, &obj->base, &handle);
+
+	/* drop reference from allocate - handle holds it now */
+	drm_gem_object_unreference_unlocked(&obj->base);
+	if (ret)
+		return ret;
+
+	args->handle = handle;
+	return 0;
+}
+
+int
+i915_gem_init_userptr(struct drm_device *dev)
+{
+#if defined(CONFIG_MMU_NOTIFIER)
+	struct drm_i915_private *dev_priv = to_i915(dev);
+	hash_init(dev_priv->mmu_notifiers);
+#endif
+	return 0;
+}
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
index baf1ca690dc5..60c6bf2c30c8 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -204,6 +204,7 @@ static void print_error_buffers(struct drm_i915_error_state_buf *m,
 		err_puts(m, tiling_flag(err->tiling));
 		err_puts(m, dirty_flag(err->dirty));
 		err_puts(m, purgeable_flag(err->purgeable));
+		err_puts(m, err->userptr ? " userptr" : "");
 		err_puts(m, err->ring != -1 ? " " : "");
 		err_puts(m, ring_str(err->ring));
 		err_puts(m, i915_cache_level_str(err->cache_level));
@@ -648,6 +649,7 @@ static void capture_bo(struct drm_i915_error_buffer *err,
 	err->tiling = obj->tiling_mode;
 	err->dirty = obj->dirty;
 	err->purgeable = obj->madv != I915_MADV_WILLNEED;
+	err->userptr = obj->userptr.mm != 0;
 	err->ring = obj->ring ? obj->ring->id : -1;
 	err->cache_level = obj->cache_level;
 }
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index f5897eefb604..ebe90b439140 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -224,6 +224,7 @@ typedef struct _drm_i915_sarea {
 #define DRM_I915_REG_READ		0x31
 #define DRM_I915_GET_RESET_STATS	0x32
 #define DRM_I915_GEM_CREATE2		0x33
+#define DRM_I915_GEM_USERPTR		0x34
 
 #define DRM_IOCTL_I915_INIT		DRM_IOW( DRM_COMMAND_BASE + DRM_I915_INIT, drm_i915_init_t)
 #define DRM_IOCTL_I915_FLUSH		DRM_IO ( DRM_COMMAND_BASE + DRM_I915_FLUSH)
@@ -275,6 +276,7 @@ typedef struct _drm_i915_sarea {
 #define DRM_IOCTL_I915_GEM_CONTEXT_DESTROY	DRM_IOW (DRM_COMMAND_BASE + DRM_I915_GEM_CONTEXT_DESTROY, struct drm_i915_gem_context_destroy)
 #define DRM_IOCTL_I915_REG_READ			DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_REG_READ, struct drm_i915_reg_read)
 #define DRM_IOCTL_I915_GET_RESET_STATS		DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_GET_RESET_STATS, struct drm_i915_reset_stats)
+#define DRM_IOCTL_I915_GEM_USERPTR			DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_GEM_USERPTR, struct drm_i915_gem_userptr)
 
 /* Allow drivers to submit batchbuffers directly to hardware, relying
  * on the security mechanisms provided by hardware.
@@ -1156,4 +1158,18 @@ struct drm_i915_reset_stats {
 	__u32 pad;
 };
 
+struct drm_i915_gem_userptr {
+	__u64 user_ptr;
+	__u64 user_size;
+	__u32 flags;
+#define I915_USERPTR_READ_ONLY 0x1
+#define I915_USERPTR_UNSYNCHRONIZED 0x80000000
+	/**
+	 * Returned handle for the object.
+	 *
+	 * Object handles are nonzero.
+	 */
+	__u32 handle;
+};
+
 #endif /* _UAPI_I915_DRM_H_ */
-- 
1.9.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 17+ messages in thread

* Re: [PATCH 1/3] lib: Export interval_tree
  2014-03-17 12:21 [PATCH 1/3] lib: Export interval_tree Chris Wilson
  2014-03-17 12:21 ` [PATCH 2/3] drm/i915: Do not call retire_requests from wait_for_rendering Chris Wilson
  2014-03-17 12:21 ` [PATCH 3/3] drm/i915: Introduce mapping of user pages into video memory (userptr) ioctl Chris Wilson
@ 2014-03-17 12:31 ` David Woodhouse
  2014-03-17 13:08   ` Michel Lespinasse
  2 siblings, 1 reply; 17+ messages in thread
From: David Woodhouse @ 2014-03-17 12:31 UTC (permalink / raw)
  To: Chris Wilson
  Cc: Andrea Arcangeli, Rik van Riel, Peter Zijlstra, intel-gfx,
	Andrew Morton, Michel Lespinasse


[-- Attachment #1.1: Type: text/plain, Size: 294 bytes --]

On Mon, 2014-03-17 at 12:21 +0000, Chris Wilson wrote:
> +EXPORT_SYMBOL(interval_tree_insert);
> +EXPORT_SYMBOL(interval_tree_remove);
> +EXPORT_SYMBOL(interval_tree_iter_first);
> +EXPORT_SYMBOL(interval_tree_iter_next);

I'd prefer to see EXPORT_SYMBOL_GPL for this.


-- 
dwmw2


[-- Attachment #1.2: smime.p7s --]
[-- Type: application/x-pkcs7-signature, Size: 5745 bytes --]

[-- Attachment #2: Type: text/plain, Size: 159 bytes --]

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH 1/3] lib: Export interval_tree
  2014-03-17 12:31 ` [PATCH 1/3] lib: Export interval_tree David Woodhouse
@ 2014-03-17 13:08   ` Michel Lespinasse
  2014-03-18  9:56     ` Chris Wilson
  0 siblings, 1 reply; 17+ messages in thread
From: Michel Lespinasse @ 2014-03-17 13:08 UTC (permalink / raw)
  To: David Woodhouse
  Cc: Andrea Arcangeli, Rik van Riel, Peter Zijlstra, intel-gfx, Andrew Morton

On Mon, Mar 17, 2014 at 5:31 AM, David Woodhouse <dwmw2@infradead.org> wrote:
> On Mon, 2014-03-17 at 12:21 +0000, Chris Wilson wrote:
>> +EXPORT_SYMBOL(interval_tree_insert);
>> +EXPORT_SYMBOL(interval_tree_remove);
>> +EXPORT_SYMBOL(interval_tree_iter_first);
>> +EXPORT_SYMBOL(interval_tree_iter_next);
>
> I'd prefer to see EXPORT_SYMBOL_GPL for this.

I'm fine with it either way. I think it would help if you stated the
reason for your preference though ?

-- 
Michel "Walken" Lespinasse
A program is never fully debugged until the last user dies.

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH 1/3] lib: Export interval_tree
  2014-03-17 13:08   ` Michel Lespinasse
@ 2014-03-18  9:56     ` Chris Wilson
  2014-04-25 14:17       ` Daniel Vetter
  0 siblings, 1 reply; 17+ messages in thread
From: Chris Wilson @ 2014-03-18  9:56 UTC (permalink / raw)
  To: Michel Lespinasse
  Cc: Andrea Arcangeli, Rik van Riel, Peter Zijlstra, intel-gfx, Andrew Morton

On Mon, Mar 17, 2014 at 06:08:57AM -0700, Michel Lespinasse wrote:
> On Mon, Mar 17, 2014 at 5:31 AM, David Woodhouse <dwmw2@infradead.org> wrote:
> > On Mon, 2014-03-17 at 12:21 +0000, Chris Wilson wrote:
> >> +EXPORT_SYMBOL(interval_tree_insert);
> >> +EXPORT_SYMBOL(interval_tree_remove);
> >> +EXPORT_SYMBOL(interval_tree_iter_first);
> >> +EXPORT_SYMBOL(interval_tree_iter_next);
> >
> > I'd prefer to see EXPORT_SYMBOL_GPL for this.
> 
> I'm fine with it either way. I think it would help if you stated the
> reason for your preference though ?

Nobody objects?

How about

v3: Prefer EXPORT_SYMBOL_GPL for new library routines

as the reason in the changelog for amending the patch?
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH 2/3] drm/i915: Do not call retire_requests from wait_for_rendering
  2014-03-17 12:21 ` [PATCH 2/3] drm/i915: Do not call retire_requests from wait_for_rendering Chris Wilson
@ 2014-03-28 22:58   ` Volkin, Bradley D
  2014-04-17 22:20     ` Volkin, Bradley D
  2014-04-24  9:22     ` Chris Wilson
  0 siblings, 2 replies; 17+ messages in thread
From: Volkin, Bradley D @ 2014-03-28 22:58 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

On Mon, Mar 17, 2014 at 05:21:55AM -0700, Chris Wilson wrote:
> A common issue we have is that retiring requests causes recursion
> through GTT manipulation or page table manipulation which we can only
> handle at very specific points. However, to maintain internal
> consistency (enforced through our sanity checks on write_domain at
> various points in the GEM object lifecycle) we do need to retire the
> object prior to marking it with a new write_domain, and also clear the
> write_domain for the implicit flush following a batch.
> 
> Note that this then allows the unbound objects to still be on the active
> lists, and so care must be taken when removing objects from unbound lists
> (similar to the caveats we face processing the bound lists).
> 
> v2: Fix i915_gem_shrink_all() to handle updated object lifetime rules,
> by refactoring it to call into __i915_gem_shrink().
> 
> v3: Missed an object-retire prior to changing cache domains in
> i915_gem_object_set_cache_leve()
> 
> v4: Rebase
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Tested-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
> ---
>  drivers/gpu/drm/i915/i915_gem.c            | 112 ++++++++++++++++-------------
>  drivers/gpu/drm/i915/i915_gem_execbuffer.c |   3 +
>  2 files changed, 66 insertions(+), 49 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index 58704ce62e3e..5cf4d80de867 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -44,6 +44,9 @@ static void i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *o
>  static __must_check int
>  i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj,
>  			       bool readonly);
> +static void
> +i915_gem_object_retire(struct drm_i915_gem_object *obj);
> +
>  static int i915_gem_phys_pwrite(struct drm_device *dev,
>  				struct drm_i915_gem_object *obj,
>  				struct drm_i915_gem_pwrite *args,
> @@ -502,6 +505,8 @@ int i915_gem_obj_prepare_shmem_read(struct drm_i915_gem_object *obj,
>  		ret = i915_gem_object_wait_rendering(obj, true);
>  		if (ret)
>  			return ret;
> +
> +		i915_gem_object_retire(obj);
>  	}
>  
>  	ret = i915_gem_object_get_pages(obj);
> @@ -917,6 +922,8 @@ i915_gem_shmem_pwrite(struct drm_device *dev,
>  		ret = i915_gem_object_wait_rendering(obj, false);
>  		if (ret)
>  			return ret;
> +
> +		i915_gem_object_retire(obj);
>  	}
>  	/* Same trick applies to invalidate partially written cachelines read
>  	 * before writing. */
> @@ -1304,7 +1311,8 @@ static int
>  i915_gem_object_wait_rendering__tail(struct drm_i915_gem_object *obj,
>  				     struct intel_ring_buffer *ring)
>  {
> -	i915_gem_retire_requests_ring(ring);
> +	if (!obj->active)
> +		return 0;
>  
>  	/* Manually manage the write flush as we may have not yet
>  	 * retired the buffer.
> @@ -1314,7 +1322,6 @@ i915_gem_object_wait_rendering__tail(struct drm_i915_gem_object *obj,
>  	 * we know we have passed the last write.
>  	 */
>  	obj->last_write_seqno = 0;
> -	obj->base.write_domain &= ~I915_GEM_GPU_DOMAINS;
>  
>  	return 0;
>  }
> @@ -1949,58 +1956,58 @@ static unsigned long
>  __i915_gem_shrink(struct drm_i915_private *dev_priv, long target,
>  		  bool purgeable_only)
>  {
> -	struct list_head still_bound_list;
> -	struct drm_i915_gem_object *obj, *next;
> +	struct list_head still_in_list;
> +	struct drm_i915_gem_object *obj;
>  	unsigned long count = 0;
>  
> -	list_for_each_entry_safe(obj, next,
> -				 &dev_priv->mm.unbound_list,
> -				 global_list) {
> -		if ((i915_gem_object_is_purgeable(obj) || !purgeable_only) &&
> -		    i915_gem_object_put_pages(obj) == 0) {
> -			count += obj->base.size >> PAGE_SHIFT;
> -			if (count >= target)
> -				return count;
> -		}
> -	}
> -
>  	/*
> -	 * As we may completely rewrite the bound list whilst unbinding
> +	 * As we may completely rewrite the (un)bound list whilst unbinding
>  	 * (due to retiring requests) we have to strictly process only
>  	 * one element of the list at the time, and recheck the list
>  	 * on every iteration.

Is it still true that we could retire requests on this path? I see that
currently we will retire requests via:
i915_vma_unbind -> i915_gem_object_finish_gpu -> i915_gem_object_wait_rendering.

But we've taken the explicit request retirement out of the wait_rendering path.
Have I missed somewhere that it could still happen?

Thanks,
Brad

> +	 *
> +	 * In particular, we must hold a reference whilst removing the
> +	 * object as we may end up waiting for and/or retiring the objects.
> +	 * This might release the final reference (held by the active list)
> +	 * and result in the object being freed from under us. This is
> +	 * similar to the precautions the eviction code must take whilst
> +	 * removing objects.
> +	 *
> +	 * Also note that although these lists do not hold a reference to
> +	 * the object we can safely grab one here: The final object
> +	 * unreferencing and the bound_list are both protected by the
> +	 * dev->struct_mutex and so we won't ever be able to observe an
> +	 * object on the bound_list with a reference count equals 0.
>  	 */
> -	INIT_LIST_HEAD(&still_bound_list);
> +	INIT_LIST_HEAD(&still_in_list);
> +	while (count < target && !list_empty(&dev_priv->mm.unbound_list)) {
> +		obj = list_first_entry(&dev_priv->mm.unbound_list,
> +				       typeof(*obj), global_list);
> +		list_move_tail(&obj->global_list, &still_in_list);
> +
> +		if (!i915_gem_object_is_purgeable(obj) && purgeable_only)
> +			continue;
> +
> +		drm_gem_object_reference(&obj->base);
> +
> +		if (i915_gem_object_put_pages(obj) == 0)
> +			count += obj->base.size >> PAGE_SHIFT;
> +
> +		drm_gem_object_unreference(&obj->base);
> +	}
> +	list_splice(&still_in_list, &dev_priv->mm.unbound_list);
> +
> +	INIT_LIST_HEAD(&still_in_list);
>  	while (count < target && !list_empty(&dev_priv->mm.bound_list)) {
>  		struct i915_vma *vma, *v;
>  
>  		obj = list_first_entry(&dev_priv->mm.bound_list,
>  				       typeof(*obj), global_list);
> -		list_move_tail(&obj->global_list, &still_bound_list);
> +		list_move_tail(&obj->global_list, &still_in_list);
>  
>  		if (!i915_gem_object_is_purgeable(obj) && purgeable_only)
>  			continue;
>  
> -		/*
> -		 * Hold a reference whilst we unbind this object, as we may
> -		 * end up waiting for and retiring requests. This might
> -		 * release the final reference (held by the active list)
> -		 * and result in the object being freed from under us.
> -		 * in this object being freed.
> -		 *
> -		 * Note 1: Shrinking the bound list is special since only active
> -		 * (and hence bound objects) can contain such limbo objects, so
> -		 * we don't need special tricks for shrinking the unbound list.
> -		 * The only other place where we have to be careful with active
> -		 * objects suddenly disappearing due to retiring requests is the
> -		 * eviction code.
> -		 *
> -		 * Note 2: Even though the bound list doesn't hold a reference
> -		 * to the object we can safely grab one here: The final object
> -		 * unreferencing and the bound_list are both protected by the
> -		 * dev->struct_mutex and so we won't ever be able to observe an
> -		 * object on the bound_list with a reference count equals 0.
> -		 */
>  		drm_gem_object_reference(&obj->base);
>  
>  		list_for_each_entry_safe(vma, v, &obj->vma_list, vma_link)
> @@ -2012,7 +2019,7 @@ __i915_gem_shrink(struct drm_i915_private *dev_priv, long target,
>  
>  		drm_gem_object_unreference(&obj->base);
>  	}
> -	list_splice(&still_bound_list, &dev_priv->mm.bound_list);
> +	list_splice(&still_in_list, &dev_priv->mm.bound_list);
>  
>  	return count;
>  }
> @@ -2026,17 +2033,8 @@ i915_gem_purge(struct drm_i915_private *dev_priv, long target)
>  static unsigned long
>  i915_gem_shrink_all(struct drm_i915_private *dev_priv)
>  {
> -	struct drm_i915_gem_object *obj, *next;
> -	long freed = 0;
> -
>  	i915_gem_evict_everything(dev_priv->dev);
> -
> -	list_for_each_entry_safe(obj, next, &dev_priv->mm.unbound_list,
> -				 global_list) {
> -		if (i915_gem_object_put_pages(obj) == 0)
> -			freed += obj->base.size >> PAGE_SHIFT;
> -	}
> -	return freed;
> +	return __i915_gem_shrink(dev_priv, LONG_MAX, false);
>  }
>  
>  static int
> @@ -2265,6 +2263,19 @@ i915_gem_object_move_to_inactive(struct drm_i915_gem_object *obj)
>  	WARN_ON(i915_verify_lists(dev));
>  }
>  
> +static void
> +i915_gem_object_retire(struct drm_i915_gem_object *obj)
> +{
> +	struct intel_ring_buffer *ring = obj->ring;
> +
> +	if (ring == NULL)
> +		return;
> +
> +	if (i915_seqno_passed(ring->get_seqno(ring, true),
> +			      obj->last_read_seqno))
> +		i915_gem_object_move_to_inactive(obj);
> +}
> +
>  static int
>  i915_gem_init_seqno(struct drm_device *dev, u32 seqno)
>  {
> @@ -3618,6 +3629,7 @@ i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write)
>  	if (ret)
>  		return ret;
>  
> +	i915_gem_object_retire(obj);
>  	i915_gem_object_flush_cpu_write_domain(obj, false);
>  
>  	/* Serialise direct access to this object with the barriers for
> @@ -3716,6 +3728,7 @@ int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
>  		 * in obj->write_domain and have been skipping the clflushes.
>  		 * Just set it to the CPU cache for now.
>  		 */
> +		i915_gem_object_retire(obj);
>  		WARN_ON(obj->base.write_domain & ~I915_GEM_DOMAIN_CPU);
>  
>  		old_read_domains = obj->base.read_domains;
> @@ -3938,6 +3951,7 @@ i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write)
>  	if (ret)
>  		return ret;
>  
> +	i915_gem_object_retire(obj);
>  	i915_gem_object_flush_gtt_write_domain(obj);
>  
>  	old_write_domain = obj->base.write_domain;
> diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> index 3851a1b1dc88..6ec5d1d5c625 100644
> --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> @@ -955,6 +955,9 @@ i915_gem_execbuffer_move_to_active(struct list_head *vmas,
>  			if (i915_gem_obj_ggtt_bound(obj) &&
>  			    i915_gem_obj_to_ggtt(obj)->pin_count)
>  				intel_mark_fb_busy(obj, ring);
> +
> +			/* update for the implicit flush after a batch */
> +			obj->base.write_domain &= ~I915_GEM_GPU_DOMAINS;
>  		}
>  
>  		trace_i915_gem_object_change_domain(obj, old_read, old_write);
> -- 
> 1.9.0
> 
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH 3/3] drm/i915: Introduce mapping of user pages into video memory (userptr) ioctl
  2014-03-17 12:21 ` [PATCH 3/3] drm/i915: Introduce mapping of user pages into video memory (userptr) ioctl Chris Wilson
@ 2014-04-17 22:18   ` Volkin, Bradley D
  2014-05-16 10:07     ` Chris Wilson
  0 siblings, 1 reply; 17+ messages in thread
From: Volkin, Bradley D @ 2014-04-17 22:18 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx, Goel, Akash

On Mon, Mar 17, 2014 at 05:21:56AM -0700, Chris Wilson wrote:
> By exporting the ability to map user address and inserting PTEs
> representing their backing pages into the GTT, we can exploit UMA in order
> to utilize normal application data as a texture source or even as a
> render target (depending upon the capabilities of the chipset). This has
> a number of uses, with zero-copy downloads to the GPU and efficient
> readback making the intermixed streaming of CPU and GPU operations
> fairly efficient. This ability has many widespread implications from
> faster rendering of client-side software rasterisers (chromium),
> mitigation of stalls due to read back (firefox) and to faster pipelining
> of texture data (such as pixel buffer objects in GL or data blobs in CL).
> 
> v2: Compile with CONFIG_MMU_NOTIFIER
> v3: We can sleep while performing invalidate-range, which we can utilise
> to drop our page references prior to the kernel manipulating the vma
> (for either discard or cloning) and so protect normal users.
> v4: Only run the invalidate notifier if the range intercepts the bo.
> v5: Prevent userspace from attempting to GTT mmap non-page aligned buffers
> v6: Recheck after reacquire mutex for lost mmu.
> v7: Fix implicit padding of ioctl struct by rounding to next 64bit boundary.
> v8: Fix rebasing error after forwarding porting the back port.
> v9: Limit the userptr to page aligned entries. We now expect userspace
>     to handle all the offset-in-page adjustments itself.
> v10: Prevent vma from being copied across fork to avoid issues with cow.
> v11: Drop vma behaviour changes -- locking is nigh on impossible.
>      Use a worker to load user pages to avoid lock inversions.
> v12: Use get_task_mm()/mmput() for correct refcounting of mm.
> v13: Use a worker to release the mmu_notifier to avoid lock inversion
> v14: Decouple mmu_notifier from struct_mutex using a custom mmu_notifer
>      with its own locking and tree of objects for each mm/mmu_notifier.
> v15: Prevent overlapping userptr objects, and invalidate all objects
>      within the mmu_notifier range
> v16: Fix a typo for iterating over multiple objects in the range and
>      rearrange error path to destroy the mmu_notifier locklessly.
>      Also close a race between invalidate_range and the get_pages_worker.
> v17: Close a race between get_pages_worker/invalidate_range and fresh
>      allocations of the same userptr range - and notice that
>      struct_mutex was presumed to be held when during creation it wasn't.
> v18: Sigh. Fix the refactor of st_set_pages() to allocate enough memory
>      for the struct sg_table and to clear it before reporting an error.
> v19: Always error out on read-only userptr requests as we don't have the
>      hardware infrastructure to support them at the moment.
> v20: Refuse to implement read-only support until we have the required
>      infrastructure - but reserve the bit in flags for future use.
> v21: use_mm() is not required for get_user_pages(). It is only meant to
>      be used to fix up the kernel thread's current->mm for use with
>      copy_user().
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com>
> Cc: "Gong, Zhipeng" <zhipeng.gong@intel.com>
> Cc: Akash Goel <akash.goel@intel.com>
> Cc: "Volkin, Bradley D" <bradley.d.volkin@intel.com>
> Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com>
> ---
>  drivers/gpu/drm/i915/Kconfig            |   1 +
>  drivers/gpu/drm/i915/Makefile           |   1 +
>  drivers/gpu/drm/i915/i915_dma.c         |   1 +
>  drivers/gpu/drm/i915/i915_drv.h         |  24 +-
>  drivers/gpu/drm/i915/i915_gem.c         |   4 +
>  drivers/gpu/drm/i915/i915_gem_dmabuf.c  |   5 +
>  drivers/gpu/drm/i915/i915_gem_userptr.c | 679 ++++++++++++++++++++++++++++++++
>  drivers/gpu/drm/i915/i915_gpu_error.c   |   2 +
>  include/uapi/drm/i915_drm.h             |  16 +
>  9 files changed, 732 insertions(+), 1 deletion(-)
>  create mode 100644 drivers/gpu/drm/i915/i915_gem_userptr.c
> 
> diff --git a/drivers/gpu/drm/i915/Kconfig b/drivers/gpu/drm/i915/Kconfig
> index 73ed59eff139..9940baee10c2 100644
> --- a/drivers/gpu/drm/i915/Kconfig
> +++ b/drivers/gpu/drm/i915/Kconfig
> @@ -5,6 +5,7 @@ config DRM_I915
>  	depends on (AGP || AGP=n)
>  	select INTEL_GTT
>  	select AGP_INTEL if AGP
> +	select INTERVAL_TREE
>  	# we need shmfs for the swappable backing store, and in particular
>  	# the shmem_readpage() which depends upon tmpfs
>  	select SHMEM
> diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
> index 2e02137a0d6e..1856494439e9 100644
> --- a/drivers/gpu/drm/i915/Makefile
> +++ b/drivers/gpu/drm/i915/Makefile
> @@ -27,6 +27,7 @@ i915-y += i915_cmd_parser.o \
>  	  i915_gem.o \
>  	  i915_gem_stolen.o \
>  	  i915_gem_tiling.o \
> +	  i915_gem_userptr.o \
>  	  i915_gpu_error.o \
>  	  i915_irq.o \
>  	  i915_trace_points.o \
> diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
> index a98d66fb0302..4f7b93e2cb4f 100644
> --- a/drivers/gpu/drm/i915/i915_dma.c
> +++ b/drivers/gpu/drm/i915/i915_dma.c
> @@ -1979,6 +1979,7 @@ const struct drm_ioctl_desc i915_ioctls[] = {
>  	DRM_IOCTL_DEF_DRV(I915_REG_READ, i915_reg_read_ioctl, DRM_UNLOCKED|DRM_RENDER_ALLOW),
>  	DRM_IOCTL_DEF_DRV(I915_GET_RESET_STATS, i915_get_reset_stats_ioctl, DRM_UNLOCKED|DRM_RENDER_ALLOW),
>  	DRM_IOCTL_DEF_DRV(I915_GEM_CREATE2, i915_gem_create2_ioctl, DRM_UNLOCKED|DRM_RENDER_ALLOW),
> +	DRM_IOCTL_DEF_DRV(I915_GEM_USERPTR, i915_gem_userptr_ioctl, DRM_UNLOCKED|DRM_RENDER_ALLOW),
>  };
>  
>  int i915_max_ioctl = DRM_ARRAY_SIZE(i915_ioctls);
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 2923cb364ae0..a8940c81eca5 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -40,6 +40,7 @@
>  #include <linux/i2c-algo-bit.h>
>  #include <drm/intel-gtt.h>
>  #include <linux/backlight.h>
> +#include <linux/hashtable.h>
>  #include <linux/intel-iommu.h>
>  #include <linux/kref.h>
>  #include <linux/pm_qos.h>
> @@ -171,6 +172,7 @@ enum hpd_pin {
>  		if ((intel_connector)->base.encoder == (__encoder))
>  
>  struct drm_i915_private;
> +struct i915_mmu_object;
>  
>  enum intel_dpll_id {
>  	DPLL_ID_PRIVATE = -1, /* non-shared dpll in use */
> @@ -397,6 +399,7 @@ struct drm_i915_error_state {
>  		u32 tiling:2;
>  		u32 dirty:1;
>  		u32 purgeable:1;
> +		u32 userptr:1;
>  		s32 ring:4;
>  		u32 cache_level:3;
>  	} **active_bo, **pinned_bo;
> @@ -1546,6 +1549,9 @@ typedef struct drm_i915_private {
>  	struct i915_gtt gtt; /* VMA representing the global address space */
>  
>  	struct i915_gem_mm mm;
> +#if defined(CONFIG_MMU_NOTIFIER)
> +	DECLARE_HASHTABLE(mmu_notifiers, 7);
> +#endif
>  
>  	/* Kernel Modesetting */
>  
> @@ -1678,6 +1684,7 @@ struct drm_i915_gem_object_ops {
>  	 */
>  	int (*get_pages)(struct drm_i915_gem_object *);
>  	void (*put_pages)(struct drm_i915_gem_object *);
> +	void (*release)(struct drm_i915_gem_object *);
>  };
>  
>  struct drm_i915_gem_object {
> @@ -1791,8 +1798,20 @@ struct drm_i915_gem_object {
>  
>  	/** for phy allocated objects */
>  	struct drm_i915_gem_phys_object *phys_obj;
> -};
>  
> +	union {
> +		struct i915_gem_userptr {

Out of curiosity, what's the point of using both a union and a
struct here, given that everything is within the struct?

> +			uintptr_t ptr;
> +			unsigned read_only :1;
> +			unsigned active :4;
> +#define I915_GEM_USERPTR_MAX_ACTIVE 15

Maybe s/active/active_workers/ or just 'workers'

> +
> +			struct mm_struct *mm;
> +			struct i915_mmu_object *mn;
> +			struct work_struct *work;
> +		} userptr;
> +	};
> +};
>  #define to_intel_bo(x) container_of(x, struct drm_i915_gem_object, base)
>  
>  /**
> @@ -2207,6 +2226,9 @@ int i915_gem_set_tiling_ioctl(struct drm_device *dev, void *data,
>  			      struct drm_file *file_priv);
>  int i915_gem_get_tiling_ioctl(struct drm_device *dev, void *data,
>  			      struct drm_file *file_priv);
> +int i915_gem_init_userptr(struct drm_device *dev);
> +int i915_gem_userptr_ioctl(struct drm_device *dev, void *data,
> +			   struct drm_file *file);
>  int i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data,
>  				struct drm_file *file_priv);
>  int i915_gem_wait_ioctl(struct drm_device *dev, void *data,
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index 5cf4d80de867..4869f0dce37b 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -4408,6 +4408,9 @@ void i915_gem_free_object(struct drm_gem_object *gem_obj)
>  	if (obj->base.import_attach)
>  		drm_prime_gem_destroy(&obj->base, NULL);
>  
> +	if (obj->ops->release)
> +		obj->ops->release(obj);
> +
>  	drm_gem_object_release(&obj->base);
>  	i915_gem_info_remove_obj(dev_priv, obj->base.size);
>  
> @@ -4672,6 +4675,7 @@ int i915_gem_init(struct drm_device *dev)
>  			DRM_DEBUG_DRIVER("allow wake ack timed out\n");
>  	}
>  
> +	i915_gem_init_userptr(dev);
>  	i915_gem_init_global_gtt(dev);
>  
>  	ret = i915_gem_context_init(dev);
> diff --git a/drivers/gpu/drm/i915/i915_gem_dmabuf.c b/drivers/gpu/drm/i915/i915_gem_dmabuf.c
> index 9bb533e0d762..ac295ec8ee2e 100644
> --- a/drivers/gpu/drm/i915/i915_gem_dmabuf.c
> +++ b/drivers/gpu/drm/i915/i915_gem_dmabuf.c
> @@ -233,6 +233,11 @@ static const struct dma_buf_ops i915_dmabuf_ops =  {
>  struct dma_buf *i915_gem_prime_export(struct drm_device *dev,
>  				      struct drm_gem_object *gem_obj, int flags)
>  {
> +	struct drm_i915_gem_object *obj = to_intel_bo(gem_obj);
> +
> +	if (obj->userptr.mm && obj->userptr.mn == NULL)
> +		return ERR_PTR(-EINVAL);
> +

So this is basically saying we won't export an unsynchronized
userptr? I don't think we've documented this or the other
restrictions on unsynchronized userptrs (e.g. root only).

>  	return dma_buf_export(gem_obj, &i915_dmabuf_ops, gem_obj->size, flags);
>  }
>  
> diff --git a/drivers/gpu/drm/i915/i915_gem_userptr.c b/drivers/gpu/drm/i915/i915_gem_userptr.c
> new file mode 100644
> index 000000000000..632028332d85
> --- /dev/null
> +++ b/drivers/gpu/drm/i915/i915_gem_userptr.c
> @@ -0,0 +1,679 @@
> +/*
> + * Copyright © 2012-2014 Intel Corporation
> + *
> + * Permission is hereby granted, free of charge, to any person obtaining a
> + * copy of this software and associated documentation files (the "Software"),
> + * to deal in the Software without restriction, including without limitation
> + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
> + * and/or sell copies of the Software, and to permit persons to whom the
> + * Software is furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice (including the next
> + * paragraph) shall be included in all copies or substantial portions of the
> + * Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
> + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
> + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
> + * IN THE SOFTWARE.
> + *
> + */
> +
> +#include "drmP.h"
> +#include "i915_drm.h"
> +#include "i915_drv.h"
> +#include "i915_trace.h"
> +#include "intel_drv.h"
> +#include <linux/mmu_context.h>
> +#include <linux/mmu_notifier.h>
> +#include <linux/mempolicy.h>
> +#include <linux/swap.h>
> +
> +#if defined(CONFIG_MMU_NOTIFIER)
> +#include <linux/interval_tree.h>
> +
> +struct i915_mmu_notifier {
> +	spinlock_t lock;
> +	struct hlist_node node;
> +	struct mmu_notifier mn;
> +	struct rb_root objects;
> +	struct drm_device *dev;
> +	struct mm_struct *mm;
> +	struct work_struct work;
> +	unsigned long count;
> +	unsigned long serial;
> +};
> +
> +struct i915_mmu_object {
> +	struct i915_mmu_notifier *mmu;
> +	struct interval_tree_node it;
> +	struct drm_i915_gem_object *obj;
> +};
> +
> +static void i915_gem_userptr_mn_invalidate_range_start(struct mmu_notifier *_mn,
> +						       struct mm_struct *mm,
> +						       unsigned long start,
> +						       unsigned long end)
> +{
> +	struct i915_mmu_notifier *mn = container_of(_mn, struct i915_mmu_notifier, mn);
> +	struct interval_tree_node *it = NULL;
> +	unsigned long serial = 0;
> +
> +	end--; /* interval ranges are inclusive, but invalidate range is exclusive */
> +	while (start < end) {
> +		struct drm_i915_gem_object *obj;
> +
> +		obj = NULL;
> +		spin_lock(&mn->lock);
> +		if (serial == mn->serial)
> +			it = interval_tree_iter_next(it, start, end);
> +		else
> +			it = interval_tree_iter_first(&mn->objects, start, end);

Is the issue here just that items being removed from the tree could make 'it'
invalid on the next call to interval_tree_iter_next()? Or are we also worried
about items being added to the tree? If items can be added, we've been advancing
'start', so I think there would be the potential for adding an item to the
portion of the invalidate range that we've already processed. Whether that
could happen given the locking or if the invalidation should apply to such an
object, I'm not sure.

> +		if (it != NULL) {
> +			obj = container_of(it, struct i915_mmu_object, it)->obj;
> +			drm_gem_object_reference(&obj->base);
> +			serial = mn->serial;
> +		}
> +		spin_unlock(&mn->lock);
> +		if (obj == NULL)
> +			return;
> +
> +		mutex_lock(&mn->dev->struct_mutex);
> +		/* Cancel any active worker and force us to re-evaluate gup */
> +		obj->userptr.work = NULL;
> +
> +		if (obj->pages != NULL) {
> +			struct drm_i915_private *dev_priv = to_i915(mn->dev);
> +			struct i915_vma *vma, *tmp;
> +			bool was_interruptible;
> +
> +			was_interruptible = dev_priv->mm.interruptible;
> +			dev_priv->mm.interruptible = false;
> +
> +			list_for_each_entry_safe(vma, tmp, &obj->vma_list, vma_link) {
> +				int ret = i915_vma_unbind(vma);
> +				WARN_ON(ret && ret != -EIO);
> +			}
> +			WARN_ON(i915_gem_object_put_pages(obj));
> +
> +			dev_priv->mm.interruptible = was_interruptible;
> +		}
> +
> +		start = obj->userptr.ptr + obj->base.size;
> +
> +		drm_gem_object_unreference(&obj->base);
> +		mutex_unlock(&mn->dev->struct_mutex);
> +	}
> +}
> +
> +static const struct mmu_notifier_ops i915_gem_userptr_notifier = {
> +	.invalidate_range_start = i915_gem_userptr_mn_invalidate_range_start,
> +};
> +
> +static struct i915_mmu_notifier *
> +__i915_mmu_notifier_lookup(struct drm_device *dev, struct mm_struct *mm)
> +{
> +	struct drm_i915_private *dev_priv = to_i915(dev);
> +	struct i915_mmu_notifier *mmu;
> +
> +	/* Protected by dev->struct_mutex */
> +	hash_for_each_possible(dev_priv->mmu_notifiers, mmu, node, (unsigned long)mm)
> +		if (mmu->mm == mm)
> +			return mmu;
> +
> +	return NULL;
> +}
> +
> +static struct i915_mmu_notifier *
> +i915_mmu_notifier_get(struct drm_device *dev, struct mm_struct *mm)
> +{
> +	struct drm_i915_private *dev_priv = to_i915(dev);
> +	struct i915_mmu_notifier *mmu;
> +	int ret;
> +
> +	lockdep_assert_held(&dev->struct_mutex);
> +
> +	mmu = __i915_mmu_notifier_lookup(dev, mm);
> +	if (mmu)
> +		return mmu;
> +
> +	mmu = kmalloc(sizeof(*mmu), GFP_KERNEL);
> +	if (mmu == NULL)
> +		return ERR_PTR(-ENOMEM);
> +
> +	spin_lock_init(&mmu->lock);
> +	mmu->dev = dev;
> +	mmu->mn.ops = &i915_gem_userptr_notifier;
> +	mmu->mm = mm;
> +	mmu->objects = RB_ROOT;
> +	mmu->count = 0;
> +	mmu->serial = 0;
> +
> +	/* Protected by mmap_sem (write-lock) */
> +	ret = __mmu_notifier_register(&mmu->mn, mm);
> +	if (ret) {
> +		kfree(mmu);
> +		return ERR_PTR(ret);
> +	}
> +
> +	/* Protected by dev->struct_mutex */
> +	hash_add(dev_priv->mmu_notifiers, &mmu->node, (unsigned long)mm);
> +	return mmu;
> +}
> +
> +static void
> +__i915_mmu_notifier_destroy_worker(struct work_struct *work)
> +{
> +	struct i915_mmu_notifier *mmu = container_of(work, typeof(*mmu), work);
> +	mmu_notifier_unregister(&mmu->mn, mmu->mm);
> +	kfree(mmu);
> +}
> +
> +static void
> +__i915_mmu_notifier_destroy(struct i915_mmu_notifier *mmu)
> +{
> +	lockdep_assert_held(&mmu->dev->struct_mutex);
> +
> +	/* Protected by dev->struct_mutex */
> +	hash_del(&mmu->node);
> +
> +	/* Our lock ordering is: mmap_sem, mmu_notifier_scru, struct_mutex.
> +	 * We enter the function holding struct_mutex, therefore we need
> +	 * to drop our mutex prior to calling mmu_notifier_unregister in
> +	 * order to prevent lock inversion (and system-wide deadlock)
> +	 * between the mmap_sem and struct-mutex.
> +	 */
> +	INIT_WORK(&mmu->work, __i915_mmu_notifier_destroy_worker);
> +	schedule_work(&mmu->work);
> +}
> +
> +static void __i915_mmu_notifier_update_serial(struct i915_mmu_notifier *mmu)
> +{
> +	if (++mmu->serial == 0)
> +		mmu->serial = 1;
> +}
> +
> +static void
> +i915_mmu_notifier_del(struct i915_mmu_notifier *mmu,
> +		      struct i915_mmu_object *mn)
> +{
> +	lockdep_assert_held(&mmu->dev->struct_mutex);
> +
> +	spin_lock(&mmu->lock);
> +	interval_tree_remove(&mn->it, &mmu->objects);
> +	__i915_mmu_notifier_update_serial(mmu);
> +	spin_unlock(&mmu->lock);
> +
> +	/* Protected against _add() by dev->struct_mutex */
> +	if (--mmu->count == 0)
> +		__i915_mmu_notifier_destroy(mmu);
> +}
> +
> +static int
> +i915_mmu_notifier_add(struct i915_mmu_notifier *mmu,
> +		      struct i915_mmu_object *mn)
> +{
> +	struct interval_tree_node *it;
> +	int ret;
> +
> +	/* Make sure we drop the final active reference (and thereby
> +	 * remove the objects from the interval tree) before we do
> +	 * the check for overlapping objects.
> +	 */

I assume this comment refers to the retire_requests call, in which
case I would move it down.

> +	ret = i915_mutex_lock_interruptible(mmu->dev);
> +	if (ret)
> +		return ret;
> +
> +	i915_gem_retire_requests(mmu->dev);
> +
> +	/* Disallow overlapping userptr objects */
> +	spin_lock(&mmu->lock);
> +	it = interval_tree_iter_first(&mmu->objects,
> +				      mn->it.start, mn->it.last);
> +	if (it) {
> +		struct drm_i915_gem_object *obj;
> +
> +		/* We only need to check the first object as it either
> +		 * is idle (and in use elsewhere) or we try again in order
> +		 * to give time for the gup-worker to run and flush its
> +		 * object references. Afterwards if we find another
> +		 * object that is idle (and so referenced elsewhere)
> +		 * we know that the overlap with an pinned object is
> +		 * genuine.
> +		 */

I found this a bit confusing because it refers to the object being idle when
it's really a question of the worker executing or not. In my mind, the object
has the opposite idle/active state as the worker e.g. if the worker is active,
the object is idle w.r.t. being used by the GPU. Hence the earlier suggestion
r.e. renaming userptr.active.

> +		obj = container_of(it, struct i915_mmu_object, it)->obj;
> +		ret = obj->userptr.active ? -EAGAIN : -EINVAL;
> +	} else {
> +		interval_tree_insert(&mn->it, &mmu->objects);
> +		__i915_mmu_notifier_update_serial(mmu);
> +		ret = 0;
> +	}
> +	spin_unlock(&mmu->lock);
> +	mutex_unlock(&mmu->dev->struct_mutex);
> +
> +	return ret;
> +}
> +
> +static void
> +i915_gem_userptr_release__mmu_notifier(struct drm_i915_gem_object *obj)
> +{
> +	struct i915_mmu_object *mn;
> +
> +	mn = obj->userptr.mn;
> +	if (mn == NULL)
> +		return;
> +
> +	i915_mmu_notifier_del(mn->mmu, mn);
> +	obj->userptr.mn = NULL;
> +}
> +
> +static int
> +i915_gem_userptr_init__mmu_notifier(struct drm_i915_gem_object *obj,
> +				    unsigned flags)
> +{
> +	struct i915_mmu_notifier *mmu;
> +	struct i915_mmu_object *mn;
> +	int ret;
> +
> +	if (flags & I915_USERPTR_UNSYNCHRONIZED)
> +		return capable(CAP_SYS_ADMIN) ? 0 : -EPERM;
> +
> +	down_write(&obj->userptr.mm->mmap_sem);
> +	ret = i915_mutex_lock_interruptible(obj->base.dev);
> +	if (ret == 0) {
> +		mmu = i915_mmu_notifier_get(obj->base.dev, obj->userptr.mm);
> +		if (!IS_ERR(mmu))
> +			mmu->count++; /* preemptive add to act as a refcount */
> +		else
> +			ret = PTR_ERR(mmu);
> +		mutex_unlock(&obj->base.dev->struct_mutex);
> +	}
> +	up_write(&obj->userptr.mm->mmap_sem);
> +	if (ret)
> +		return ret;
> +
> +	mn = kzalloc(sizeof(*mn), GFP_KERNEL);
> +	if (mn == NULL) {
> +		ret = -ENOMEM;
> +		goto destroy_mmu;
> +	}
> +
> +	mn->mmu = mmu;
> +	mn->it.start = obj->userptr.ptr;
> +	mn->it.last = mn->it.start + obj->base.size - 1;
> +	mn->obj = obj;
> +
> +	ret = i915_mmu_notifier_add(mmu, mn);
> +	if (ret)
> +		goto free_mn;
> +
> +	obj->userptr.mn = mn;
> +	return 0;
> +
> +free_mn:
> +	kfree(mn);
> +destroy_mmu:
> +	mutex_lock(&obj->base.dev->struct_mutex);
> +	if (--mmu->count == 0)
> +		__i915_mmu_notifier_destroy(mmu);
> +	mutex_unlock(&obj->base.dev->struct_mutex);
> +	return ret;
> +}
> +
> +#else
> +
> +static void
> +i915_gem_userptr_release__mmu_notifier(struct drm_i915_gem_object *obj)
> +{
> +}
> +
> +static int
> +i915_gem_userptr_init__mmu_notifier(struct drm_i915_gem_object *obj,
> +				    unsigned flags)
> +{
> +	if ((flags & I915_USERPTR_UNSYNCHRONIZED) == 0)
> +		return -ENODEV;
> +
> +	if (!capable(CAP_SYS_ADMIN))
> +		return -EPERM;
> +
> +	return 0;
> +}
> +#endif
> +
> +struct get_pages_work {
> +	struct work_struct work;
> +	struct drm_i915_gem_object *obj;
> +	struct task_struct *task;
> +};
> +
> +static int
> +st_set_pages(struct sg_table **st, struct page **pvec, int num_pages)
> +{
> +	struct scatterlist *sg;
> +	int n;
> +
> +	*st = kmalloc(sizeof(**st), GFP_KERNEL);
> +	if (*st == NULL || sg_alloc_table(*st, num_pages, GFP_KERNEL)) {
> +		kfree(*st);
> +		*st = NULL;
> +		return -ENOMEM;
> +	}
> +
> +	for_each_sg((*st)->sgl, sg, num_pages, n)
> +		sg_set_page(sg, pvec[n], PAGE_SIZE, 0);
> +
> +	return 0;
> +}
> +
> +static void
> +__i915_gem_userptr_get_pages_worker(struct work_struct *_work)
> +{
> +	struct get_pages_work *work = container_of(_work, typeof(*work), work);
> +	struct drm_i915_gem_object *obj = work->obj;
> +	struct drm_device *dev = obj->base.dev;
> +	const int num_pages = obj->base.size >> PAGE_SHIFT;
> +	struct page **pvec;
> +	int pinned, ret;
> +
> +	ret = -ENOMEM;
> +	pinned = 0;
> +
> +	pvec = kmalloc(num_pages*sizeof(struct page *),
> +		       GFP_TEMPORARY | __GFP_NOWARN | __GFP_NORETRY);
> +	if (pvec == NULL)
> +		pvec = drm_malloc_ab(num_pages, sizeof(struct page *));
> +	if (pvec != NULL) {
> +		struct mm_struct *mm = obj->userptr.mm;
> +
> +		down_read(&mm->mmap_sem);
> +		while (pinned < num_pages) {
> +			ret = get_user_pages(work->task, mm,
> +					     obj->userptr.ptr + pinned * PAGE_SIZE,
> +					     num_pages - pinned,
> +					     !obj->userptr.read_only, 0,
> +					     pvec + pinned, NULL);
> +			if (ret < 0)
> +				break;
> +
> +			pinned += ret;
> +		}
> +		up_read(&mm->mmap_sem);
> +	}
> +
> +	mutex_lock(&dev->struct_mutex);
> +	if (obj->userptr.work != &work->work) {
> +		ret = 0;
> +	} else if (pinned == num_pages) {
> +		ret = st_set_pages(&obj->pages, pvec, num_pages);
> +		if (ret == 0) {
> +			list_add_tail(&obj->global_list, &to_i915(dev)->mm.unbound_list);
> +			pinned = 0;
> +		}
> +	}
> +
> +	obj->userptr.work = ERR_PTR(ret);
> +	obj->userptr.active--;
> +	drm_gem_object_unreference(&obj->base);
> +	mutex_unlock(&dev->struct_mutex);
> +
> +	release_pages(pvec, pinned, 0);
> +	drm_free_large(pvec);
> +
> +	put_task_struct(work->task);
> +	kfree(work);
> +}
> +
> +static int
> +i915_gem_userptr_get_pages(struct drm_i915_gem_object *obj)
> +{
> +	const int num_pages = obj->base.size >> PAGE_SHIFT;
> +	struct page **pvec;
> +	int pinned, ret;
> +
> +	/* If userspace should engineer that these pages are replaced in
> +	 * the vma between us binding this page into the GTT and completion
> +	 * of rendering... Their loss. If they change the mapping of their
> +	 * pages they need to create a new bo to point to the new vma.
> +	 *
> +	 * However, that still leaves open the possibility of the vma
> +	 * being copied upon fork. Which falls under the same userspace
> +	 * synchronisation issue as a regular bo, except that this time
> +	 * the process may not be expecting that a particular piece of
> +	 * memory is tied to the GPU.
> +	 *
> +	 * Fortunately, we can hook into the mmu_notifier in order to
> +	 * discard the page references prior to anything nasty happening
> +	 * to the vma (discard or cloning) which should prevent the more
> +	 * egregious cases from causing harm.
> +	 */
> +
> +	pvec = NULL;
> +	pinned = 0;
> +	if (obj->userptr.mm == current->mm) {
> +		pvec = kmalloc(num_pages*sizeof(struct page *),
> +			       GFP_TEMPORARY | __GFP_NOWARN | __GFP_NORETRY);
> +		if (pvec == NULL) {
> +			pvec = drm_malloc_ab(num_pages, sizeof(struct page *));
> +			if (pvec == NULL)
> +				return -ENOMEM;
> +		}
> +
> +		pinned = __get_user_pages_fast(obj->userptr.ptr, num_pages,
> +					       !obj->userptr.read_only, pvec);
> +	}
> +	if (pinned < num_pages) {
> +		if (pinned < 0) {
> +			ret = pinned;
> +			pinned = 0;
> +		} else {
> +			/* Spawn a worker so that we can acquire the
> +			 * user pages without holding our mutex. Access
> +			 * to the user pages requires mmap_sem, and we have
> +			 * a strict lock ordering of mmap_sem, struct_mutex -
> +			 * we already hold struct_mutex here and so cannot
> +			 * call gup without encountering a lock inversion.
> +			 *
> +			 * Userspace will keep on repeating the operation
> +			 * (thanks to EAGAIN) until either we hit the fast
> +			 * path or the worker completes. If the worker is
> +			 * cancelled or superseded, the task is still run
> +			 * but the results ignored. (This leads to
> +			 * complications that we may have a stray object
> +			 * refcount that we need to be wary of when
> +			 * checking for existing objects during creation.)
> +			 * If the worker encounters an error, it reports
> +			 * that error back to this function through
> +			 * obj->userptr.work = ERR_PTR.
> +			 */
> +			ret = -EAGAIN;
> +			if (obj->userptr.work == NULL &&
> +			    obj->userptr.active < I915_GEM_USERPTR_MAX_ACTIVE) {
> +				struct get_pages_work *work;
> +
> +				work = kmalloc(sizeof(*work), GFP_KERNEL);
> +				if (work != NULL) {
> +					obj->userptr.work = &work->work;
> +					obj->userptr.active++;
> +
> +					work->obj = obj;
> +					drm_gem_object_reference(&obj->base);
> +
> +					work->task = current;
> +					get_task_struct(work->task);
> +
> +					INIT_WORK(&work->work, __i915_gem_userptr_get_pages_worker);
> +					schedule_work(&work->work);
> +				} else
> +					ret = -ENOMEM;
> +			} else {
> +				if (IS_ERR(obj->userptr.work)) {
> +					ret = PTR_ERR(obj->userptr.work);
> +					obj->userptr.work = NULL;
> +				}
> +			}
> +		}
> +	} else {
> +		ret = st_set_pages(&obj->pages, pvec, num_pages);
> +		if (ret == 0) {
> +			obj->userptr.work = NULL;
> +			pinned = 0;
> +		}
> +	}
> +
> +	release_pages(pvec, pinned, 0);
> +	drm_free_large(pvec);
> +	return ret;
> +}
> +
> +static void
> +i915_gem_userptr_put_pages(struct drm_i915_gem_object *obj)
> +{
> +	struct scatterlist *sg;
> +	int i;
> +
> +	BUG_ON(obj->userptr.work != NULL);
> +
> +	if (obj->madv != I915_MADV_WILLNEED)
> +		obj->dirty = 0;
> +
> +	for_each_sg(obj->pages->sgl, sg, obj->pages->nents, i) {
> +		struct page *page = sg_page(sg);
> +
> +		if (obj->dirty)
> +			set_page_dirty(page);
> +
> +		mark_page_accessed(page);
> +		page_cache_release(page);
> +	}
> +	obj->dirty = 0;
> +
> +	sg_free_table(obj->pages);
> +	kfree(obj->pages);
> +}
> +
> +static void
> +i915_gem_userptr_release(struct drm_i915_gem_object *obj)
> +{
> +	i915_gem_userptr_release__mmu_notifier(obj);
> +
> +	if (obj->userptr.mm) {
> +		mmput(obj->userptr.mm);
> +		obj->userptr.mm = NULL;
> +	}
> +}
> +
> +static const struct drm_i915_gem_object_ops i915_gem_userptr_ops = {
> +	.get_pages = i915_gem_userptr_get_pages,
> +	.put_pages = i915_gem_userptr_put_pages,
> +	.release = i915_gem_userptr_release,
> +};
> +
> +/**
> + * Creates a new mm object that wraps some normal memory from the process
> + * context - user memory.
> + *
> + * We impose several restrictions upon the memory being mapped
> + * into the GPU.
> + * 1. It must be page aligned (both start/end addresses, i.e ptr and size).
> + * 2. It cannot overlap any other userptr object in the same address space.
> + * 3. It must be normal system memory, not a pointer into another map of IO
> + *    space (e.g. it must not be a GTT mmapping of another object).
> + * 4. We only allow a bo as large as we could in theory map into the GTT,
> + *    that is we limit the size to the total size of the GTT.
> + * 5. The bo is marked as being snoopable. The backing pages are left
> + *    accessible directly by the CPU, but reads and writes by the GPU may
> + *    incur the cost of a snoop (unless you have an LLC architecture).
> + *
> + * Synchronisation between multiple users and the GPU is left to userspace
> + * through the normal set-domain-ioctl. The kernel will enforce that the
> + * GPU relinquishes the VMA before it is returned back to the system
> + * i.e. upon free(), munmap() or process termination. However, the userspace
> + * malloc() library may not immediately relinquish the VMA after free() and
> + * instead reuse it whilst the GPU is still reading and writing to the VMA.
> + * Caveat emptor.
> + *
> + * Also note, that the object created here is not currently a "first class"
> + * object, in that several ioctls are banned. These are the CPU access
> + * ioctls: mmap(), pwrite and pread. In practice, you are expected to use
> + * direct access via your pointer rather than use those ioctls.
> + *
> + * If you think this is a good interface to use to pass GPU memory between
> + * drivers, please use dma-buf instead. In fact, wherever possible use
> + * dma-buf instead.
> + */
> +int
> +i915_gem_userptr_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
> +{
> +	struct drm_i915_private *dev_priv = dev->dev_private;
> +	struct drm_i915_gem_userptr *args = data;
> +	struct drm_i915_gem_object *obj;
> +	int ret;
> +	u32 handle;
> +
> +	if (args->flags & ~(I915_USERPTR_READ_ONLY |
> +			    I915_USERPTR_UNSYNCHRONIZED))
> +		return -EINVAL;
> +
> +	if (offset_in_page(args->user_ptr | args->user_size))
> +		return -EINVAL;
> +
> +	if (args->user_size > dev_priv->gtt.base.total)
> +		return -E2BIG;
> +
> +	if (!access_ok(args->flags & I915_USERPTR_READ_ONLY ? VERIFY_READ : VERIFY_WRITE,
> +		       (char __user *)(unsigned long)args->user_ptr, args->user_size))
> +		return -EFAULT;
> +
> +	if (args->flags & I915_USERPTR_READ_ONLY) {
> +		/* On almost all of the current hw, we cannot tell the GPU that a
> +		 * page is readonly, so this is just a placeholder in the uAPI.
> +		 */
> +		return -ENODEV;
> +	}
> +
> +	/* Allocate the new object */
> +	obj = i915_gem_object_alloc(dev);
> +	if (obj == NULL)
> +		return -ENOMEM;
> +
> +	drm_gem_private_object_init(dev, &obj->base, args->user_size);
> +	i915_gem_object_init(obj, &i915_gem_userptr_ops);
> +	obj->cache_level = I915_CACHE_LLC;
> +	obj->base.write_domain = I915_GEM_DOMAIN_CPU;
> +	obj->base.read_domains = I915_GEM_DOMAIN_CPU;
> +
> +	obj->userptr.ptr = args->user_ptr;
> +	obj->userptr.read_only = !!(args->flags & I915_USERPTR_READ_ONLY);
> +
> +	/* And keep a pointer to the current->mm for resolving the user pages
> +	 * at binding. This means that we need to hook into the mmu_notifier
> +	 * in order to detect if the mmu is destroyed.
> +	 */
> +	ret = -ENOMEM;
> +	if ((obj->userptr.mm = get_task_mm(current)))
> +		ret = i915_gem_userptr_init__mmu_notifier(obj, args->flags);
> +	if (ret == 0)
> +		ret = drm_gem_handle_create(file, &obj->base, &handle);
> +
> +	/* drop reference from allocate - handle holds it now */
> +	drm_gem_object_unreference_unlocked(&obj->base);
> +	if (ret)
> +		return ret;
> +
> +	args->handle = handle;
> +	return 0;
> +}
> +
> +int
> +i915_gem_init_userptr(struct drm_device *dev)
> +{
> +#if defined(CONFIG_MMU_NOTIFIER)
> +	struct drm_i915_private *dev_priv = to_i915(dev);
> +	hash_init(dev_priv->mmu_notifiers);
> +#endif
> +	return 0;
> +}
> diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
> index baf1ca690dc5..60c6bf2c30c8 100644
> --- a/drivers/gpu/drm/i915/i915_gpu_error.c
> +++ b/drivers/gpu/drm/i915/i915_gpu_error.c
> @@ -204,6 +204,7 @@ static void print_error_buffers(struct drm_i915_error_state_buf *m,
>  		err_puts(m, tiling_flag(err->tiling));
>  		err_puts(m, dirty_flag(err->dirty));
>  		err_puts(m, purgeable_flag(err->purgeable));
> +		err_puts(m, err->userptr ? " userptr" : "");
>  		err_puts(m, err->ring != -1 ? " " : "");
>  		err_puts(m, ring_str(err->ring));
>  		err_puts(m, i915_cache_level_str(err->cache_level));
> @@ -648,6 +649,7 @@ static void capture_bo(struct drm_i915_error_buffer *err,
>  	err->tiling = obj->tiling_mode;
>  	err->dirty = obj->dirty;
>  	err->purgeable = obj->madv != I915_MADV_WILLNEED;
> +	err->userptr = obj->userptr.mm != 0;
>  	err->ring = obj->ring ? obj->ring->id : -1;
>  	err->cache_level = obj->cache_level;
>  }
> diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
> index f5897eefb604..ebe90b439140 100644
> --- a/include/uapi/drm/i915_drm.h
> +++ b/include/uapi/drm/i915_drm.h
> @@ -224,6 +224,7 @@ typedef struct _drm_i915_sarea {
>  #define DRM_I915_REG_READ		0x31
>  #define DRM_I915_GET_RESET_STATS	0x32
>  #define DRM_I915_GEM_CREATE2		0x33
> +#define DRM_I915_GEM_USERPTR		0x34
>  
>  #define DRM_IOCTL_I915_INIT		DRM_IOW( DRM_COMMAND_BASE + DRM_I915_INIT, drm_i915_init_t)
>  #define DRM_IOCTL_I915_FLUSH		DRM_IO ( DRM_COMMAND_BASE + DRM_I915_FLUSH)
> @@ -275,6 +276,7 @@ typedef struct _drm_i915_sarea {
>  #define DRM_IOCTL_I915_GEM_CONTEXT_DESTROY	DRM_IOW (DRM_COMMAND_BASE + DRM_I915_GEM_CONTEXT_DESTROY, struct drm_i915_gem_context_destroy)
>  #define DRM_IOCTL_I915_REG_READ			DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_REG_READ, struct drm_i915_reg_read)
>  #define DRM_IOCTL_I915_GET_RESET_STATS		DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_GET_RESET_STATS, struct drm_i915_reset_stats)
> +#define DRM_IOCTL_I915_GEM_USERPTR			DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_GEM_USERPTR, struct drm_i915_gem_userptr)
>  
>  /* Allow drivers to submit batchbuffers directly to hardware, relying
>   * on the security mechanisms provided by hardware.
> @@ -1156,4 +1158,18 @@ struct drm_i915_reset_stats {
>  	__u32 pad;
>  };
>  
> +struct drm_i915_gem_userptr {
> +	__u64 user_ptr;
> +	__u64 user_size;
> +	__u32 flags;
> +#define I915_USERPTR_READ_ONLY 0x1
> +#define I915_USERPTR_UNSYNCHRONIZED 0x80000000
> +	/**
> +	 * Returned handle for the object.
> +	 *
> +	 * Object handles are nonzero.
> +	 */
> +	__u32 handle;
> +};
> +
>  #endif /* _UAPI_I915_DRM_H_ */
> -- 
> 1.9.0
> 
> 

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH 2/3] drm/i915: Do not call retire_requests from wait_for_rendering
  2014-03-28 22:58   ` Volkin, Bradley D
@ 2014-04-17 22:20     ` Volkin, Bradley D
  2014-04-24  9:22     ` Chris Wilson
  1 sibling, 0 replies; 17+ messages in thread
From: Volkin, Bradley D @ 2014-04-17 22:20 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

Hi Chris, just want to bring this one back to your attention while
I'm going through the rest of the series.
Thanks,
Brad

On Fri, Mar 28, 2014 at 03:58:25PM -0700, Volkin, Bradley D wrote:
> On Mon, Mar 17, 2014 at 05:21:55AM -0700, Chris Wilson wrote:
> > A common issue we have is that retiring requests causes recursion
> > through GTT manipulation or page table manipulation which we can only
> > handle at very specific points. However, to maintain internal
> > consistency (enforced through our sanity checks on write_domain at
> > various points in the GEM object lifecycle) we do need to retire the
> > object prior to marking it with a new write_domain, and also clear the
> > write_domain for the implicit flush following a batch.
> > 
> > Note that this then allows the unbound objects to still be on the active
> > lists, and so care must be taken when removing objects from unbound lists
> > (similar to the caveats we face processing the bound lists).
> > 
> > v2: Fix i915_gem_shrink_all() to handle updated object lifetime rules,
> > by refactoring it to call into __i915_gem_shrink().
> > 
> > v3: Missed an object-retire prior to changing cache domains in
> > i915_gem_object_set_cache_leve()
> > 
> > v4: Rebase
> > 
> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > Tested-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
> > ---
> >  drivers/gpu/drm/i915/i915_gem.c            | 112 ++++++++++++++++-------------
> >  drivers/gpu/drm/i915/i915_gem_execbuffer.c |   3 +
> >  2 files changed, 66 insertions(+), 49 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> > index 58704ce62e3e..5cf4d80de867 100644
> > --- a/drivers/gpu/drm/i915/i915_gem.c
> > +++ b/drivers/gpu/drm/i915/i915_gem.c
> > @@ -44,6 +44,9 @@ static void i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *o
> >  static __must_check int
> >  i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj,
> >  			       bool readonly);
> > +static void
> > +i915_gem_object_retire(struct drm_i915_gem_object *obj);
> > +
> >  static int i915_gem_phys_pwrite(struct drm_device *dev,
> >  				struct drm_i915_gem_object *obj,
> >  				struct drm_i915_gem_pwrite *args,
> > @@ -502,6 +505,8 @@ int i915_gem_obj_prepare_shmem_read(struct drm_i915_gem_object *obj,
> >  		ret = i915_gem_object_wait_rendering(obj, true);
> >  		if (ret)
> >  			return ret;
> > +
> > +		i915_gem_object_retire(obj);
> >  	}
> >  
> >  	ret = i915_gem_object_get_pages(obj);
> > @@ -917,6 +922,8 @@ i915_gem_shmem_pwrite(struct drm_device *dev,
> >  		ret = i915_gem_object_wait_rendering(obj, false);
> >  		if (ret)
> >  			return ret;
> > +
> > +		i915_gem_object_retire(obj);
> >  	}
> >  	/* Same trick applies to invalidate partially written cachelines read
> >  	 * before writing. */
> > @@ -1304,7 +1311,8 @@ static int
> >  i915_gem_object_wait_rendering__tail(struct drm_i915_gem_object *obj,
> >  				     struct intel_ring_buffer *ring)
> >  {
> > -	i915_gem_retire_requests_ring(ring);
> > +	if (!obj->active)
> > +		return 0;
> >  
> >  	/* Manually manage the write flush as we may have not yet
> >  	 * retired the buffer.
> > @@ -1314,7 +1322,6 @@ i915_gem_object_wait_rendering__tail(struct drm_i915_gem_object *obj,
> >  	 * we know we have passed the last write.
> >  	 */
> >  	obj->last_write_seqno = 0;
> > -	obj->base.write_domain &= ~I915_GEM_GPU_DOMAINS;
> >  
> >  	return 0;
> >  }
> > @@ -1949,58 +1956,58 @@ static unsigned long
> >  __i915_gem_shrink(struct drm_i915_private *dev_priv, long target,
> >  		  bool purgeable_only)
> >  {
> > -	struct list_head still_bound_list;
> > -	struct drm_i915_gem_object *obj, *next;
> > +	struct list_head still_in_list;
> > +	struct drm_i915_gem_object *obj;
> >  	unsigned long count = 0;
> >  
> > -	list_for_each_entry_safe(obj, next,
> > -				 &dev_priv->mm.unbound_list,
> > -				 global_list) {
> > -		if ((i915_gem_object_is_purgeable(obj) || !purgeable_only) &&
> > -		    i915_gem_object_put_pages(obj) == 0) {
> > -			count += obj->base.size >> PAGE_SHIFT;
> > -			if (count >= target)
> > -				return count;
> > -		}
> > -	}
> > -
> >  	/*
> > -	 * As we may completely rewrite the bound list whilst unbinding
> > +	 * As we may completely rewrite the (un)bound list whilst unbinding
> >  	 * (due to retiring requests) we have to strictly process only
> >  	 * one element of the list at the time, and recheck the list
> >  	 * on every iteration.
> 
> Is it still true that we could retire requests on this path? I see that
> currently we will retire requests via:
> i915_vma_unbind -> i915_gem_object_finish_gpu -> i915_gem_object_wait_rendering.
> 
> But we've taken the explicit request retirement out of the wait_rendering path.
> Have I missed somewhere that it could still happen?
> 
> Thanks,
> Brad
> 
> > +	 *
> > +	 * In particular, we must hold a reference whilst removing the
> > +	 * object as we may end up waiting for and/or retiring the objects.
> > +	 * This might release the final reference (held by the active list)
> > +	 * and result in the object being freed from under us. This is
> > +	 * similar to the precautions the eviction code must take whilst
> > +	 * removing objects.
> > +	 *
> > +	 * Also note that although these lists do not hold a reference to
> > +	 * the object we can safely grab one here: The final object
> > +	 * unreferencing and the bound_list are both protected by the
> > +	 * dev->struct_mutex and so we won't ever be able to observe an
> > +	 * object on the bound_list with a reference count equals 0.
> >  	 */
> > -	INIT_LIST_HEAD(&still_bound_list);
> > +	INIT_LIST_HEAD(&still_in_list);
> > +	while (count < target && !list_empty(&dev_priv->mm.unbound_list)) {
> > +		obj = list_first_entry(&dev_priv->mm.unbound_list,
> > +				       typeof(*obj), global_list);
> > +		list_move_tail(&obj->global_list, &still_in_list);
> > +
> > +		if (!i915_gem_object_is_purgeable(obj) && purgeable_only)
> > +			continue;
> > +
> > +		drm_gem_object_reference(&obj->base);
> > +
> > +		if (i915_gem_object_put_pages(obj) == 0)
> > +			count += obj->base.size >> PAGE_SHIFT;
> > +
> > +		drm_gem_object_unreference(&obj->base);
> > +	}
> > +	list_splice(&still_in_list, &dev_priv->mm.unbound_list);
> > +
> > +	INIT_LIST_HEAD(&still_in_list);
> >  	while (count < target && !list_empty(&dev_priv->mm.bound_list)) {
> >  		struct i915_vma *vma, *v;
> >  
> >  		obj = list_first_entry(&dev_priv->mm.bound_list,
> >  				       typeof(*obj), global_list);
> > -		list_move_tail(&obj->global_list, &still_bound_list);
> > +		list_move_tail(&obj->global_list, &still_in_list);
> >  
> >  		if (!i915_gem_object_is_purgeable(obj) && purgeable_only)
> >  			continue;
> >  
> > -		/*
> > -		 * Hold a reference whilst we unbind this object, as we may
> > -		 * end up waiting for and retiring requests. This might
> > -		 * release the final reference (held by the active list)
> > -		 * and result in the object being freed from under us.
> > -		 * in this object being freed.
> > -		 *
> > -		 * Note 1: Shrinking the bound list is special since only active
> > -		 * (and hence bound objects) can contain such limbo objects, so
> > -		 * we don't need special tricks for shrinking the unbound list.
> > -		 * The only other place where we have to be careful with active
> > -		 * objects suddenly disappearing due to retiring requests is the
> > -		 * eviction code.
> > -		 *
> > -		 * Note 2: Even though the bound list doesn't hold a reference
> > -		 * to the object we can safely grab one here: The final object
> > -		 * unreferencing and the bound_list are both protected by the
> > -		 * dev->struct_mutex and so we won't ever be able to observe an
> > -		 * object on the bound_list with a reference count equals 0.
> > -		 */
> >  		drm_gem_object_reference(&obj->base);
> >  
> >  		list_for_each_entry_safe(vma, v, &obj->vma_list, vma_link)
> > @@ -2012,7 +2019,7 @@ __i915_gem_shrink(struct drm_i915_private *dev_priv, long target,
> >  
> >  		drm_gem_object_unreference(&obj->base);
> >  	}
> > -	list_splice(&still_bound_list, &dev_priv->mm.bound_list);
> > +	list_splice(&still_in_list, &dev_priv->mm.bound_list);
> >  
> >  	return count;
> >  }
> > @@ -2026,17 +2033,8 @@ i915_gem_purge(struct drm_i915_private *dev_priv, long target)
> >  static unsigned long
> >  i915_gem_shrink_all(struct drm_i915_private *dev_priv)
> >  {
> > -	struct drm_i915_gem_object *obj, *next;
> > -	long freed = 0;
> > -
> >  	i915_gem_evict_everything(dev_priv->dev);
> > -
> > -	list_for_each_entry_safe(obj, next, &dev_priv->mm.unbound_list,
> > -				 global_list) {
> > -		if (i915_gem_object_put_pages(obj) == 0)
> > -			freed += obj->base.size >> PAGE_SHIFT;
> > -	}
> > -	return freed;
> > +	return __i915_gem_shrink(dev_priv, LONG_MAX, false);
> >  }
> >  
> >  static int
> > @@ -2265,6 +2263,19 @@ i915_gem_object_move_to_inactive(struct drm_i915_gem_object *obj)
> >  	WARN_ON(i915_verify_lists(dev));
> >  }
> >  
> > +static void
> > +i915_gem_object_retire(struct drm_i915_gem_object *obj)
> > +{
> > +	struct intel_ring_buffer *ring = obj->ring;
> > +
> > +	if (ring == NULL)
> > +		return;
> > +
> > +	if (i915_seqno_passed(ring->get_seqno(ring, true),
> > +			      obj->last_read_seqno))
> > +		i915_gem_object_move_to_inactive(obj);
> > +}
> > +
> >  static int
> >  i915_gem_init_seqno(struct drm_device *dev, u32 seqno)
> >  {
> > @@ -3618,6 +3629,7 @@ i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write)
> >  	if (ret)
> >  		return ret;
> >  
> > +	i915_gem_object_retire(obj);
> >  	i915_gem_object_flush_cpu_write_domain(obj, false);
> >  
> >  	/* Serialise direct access to this object with the barriers for
> > @@ -3716,6 +3728,7 @@ int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
> >  		 * in obj->write_domain and have been skipping the clflushes.
> >  		 * Just set it to the CPU cache for now.
> >  		 */
> > +		i915_gem_object_retire(obj);
> >  		WARN_ON(obj->base.write_domain & ~I915_GEM_DOMAIN_CPU);
> >  
> >  		old_read_domains = obj->base.read_domains;
> > @@ -3938,6 +3951,7 @@ i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write)
> >  	if (ret)
> >  		return ret;
> >  
> > +	i915_gem_object_retire(obj);
> >  	i915_gem_object_flush_gtt_write_domain(obj);
> >  
> >  	old_write_domain = obj->base.write_domain;
> > diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> > index 3851a1b1dc88..6ec5d1d5c625 100644
> > --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> > +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> > @@ -955,6 +955,9 @@ i915_gem_execbuffer_move_to_active(struct list_head *vmas,
> >  			if (i915_gem_obj_ggtt_bound(obj) &&
> >  			    i915_gem_obj_to_ggtt(obj)->pin_count)
> >  				intel_mark_fb_busy(obj, ring);
> > +
> > +			/* update for the implicit flush after a batch */
> > +			obj->base.write_domain &= ~I915_GEM_GPU_DOMAINS;
> >  		}
> >  
> >  		trace_i915_gem_object_change_domain(obj, old_read, old_write);
> > -- 
> > 1.9.0
> > 
> > _______________________________________________
> > Intel-gfx mailing list
> > Intel-gfx@lists.freedesktop.org
> > http://lists.freedesktop.org/mailman/listinfo/intel-gfx
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH 2/3] drm/i915: Do not call retire_requests from wait_for_rendering
  2014-03-28 22:58   ` Volkin, Bradley D
  2014-04-17 22:20     ` Volkin, Bradley D
@ 2014-04-24  9:22     ` Chris Wilson
  2014-04-24 16:20       ` Volkin, Bradley D
  1 sibling, 1 reply; 17+ messages in thread
From: Chris Wilson @ 2014-04-24  9:22 UTC (permalink / raw)
  To: Volkin, Bradley D; +Cc: intel-gfx

On Fri, Mar 28, 2014 at 03:58:25PM -0700, Volkin, Bradley D wrote:
> On Mon, Mar 17, 2014 at 05:21:55AM -0700, Chris Wilson wrote:
> > @@ -1949,58 +1956,58 @@ static unsigned long
> >  __i915_gem_shrink(struct drm_i915_private *dev_priv, long target,
> >  		  bool purgeable_only)
> >  {
> > -	struct list_head still_bound_list;
> > -	struct drm_i915_gem_object *obj, *next;
> > +	struct list_head still_in_list;
> > +	struct drm_i915_gem_object *obj;
> >  	unsigned long count = 0;
> >  
> > -	list_for_each_entry_safe(obj, next,
> > -				 &dev_priv->mm.unbound_list,
> > -				 global_list) {
> > -		if ((i915_gem_object_is_purgeable(obj) || !purgeable_only) &&
> > -		    i915_gem_object_put_pages(obj) == 0) {
> > -			count += obj->base.size >> PAGE_SHIFT;
> > -			if (count >= target)
> > -				return count;
> > -		}
> > -	}
> > -
> >  	/*
> > -	 * As we may completely rewrite the bound list whilst unbinding
> > +	 * As we may completely rewrite the (un)bound list whilst unbinding
> >  	 * (due to retiring requests) we have to strictly process only
> >  	 * one element of the list at the time, and recheck the list
> >  	 * on every iteration.
> 
> Is it still true that we could retire requests on this path? I see that
> currently we will retire requests via:
> i915_vma_unbind -> i915_gem_object_finish_gpu -> i915_gem_object_wait_rendering.
> 
> But we've taken the explicit request retirement out of the wait_rendering path.
> Have I missed somewhere that it could still happen?

Yes, as wait_rendering doesn't retire all the requests, we may still have
a request associated with the bo. This will then cause us to call
i915_gem_object_retire() during i915_gem_object_put_pages() (through
i915_gem_object_set_to_cpu_domain) thereby discard the last active
reference and destroying the object unless we take care.
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH 2/3] drm/i915: Do not call retire_requests from wait_for_rendering
  2014-04-24  9:22     ` Chris Wilson
@ 2014-04-24 16:20       ` Volkin, Bradley D
  2014-04-25 14:18         ` Daniel Vetter
  0 siblings, 1 reply; 17+ messages in thread
From: Volkin, Bradley D @ 2014-04-24 16:20 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

On Thu, Apr 24, 2014 at 02:22:39AM -0700, Chris Wilson wrote:
> On Fri, Mar 28, 2014 at 03:58:25PM -0700, Volkin, Bradley D wrote:
> > On Mon, Mar 17, 2014 at 05:21:55AM -0700, Chris Wilson wrote:
> > > @@ -1949,58 +1956,58 @@ static unsigned long
> > >  __i915_gem_shrink(struct drm_i915_private *dev_priv, long target,
> > >  		  bool purgeable_only)
> > >  {
> > > -	struct list_head still_bound_list;
> > > -	struct drm_i915_gem_object *obj, *next;
> > > +	struct list_head still_in_list;
> > > +	struct drm_i915_gem_object *obj;
> > >  	unsigned long count = 0;
> > >  
> > > -	list_for_each_entry_safe(obj, next,
> > > -				 &dev_priv->mm.unbound_list,
> > > -				 global_list) {
> > > -		if ((i915_gem_object_is_purgeable(obj) || !purgeable_only) &&
> > > -		    i915_gem_object_put_pages(obj) == 0) {
> > > -			count += obj->base.size >> PAGE_SHIFT;
> > > -			if (count >= target)
> > > -				return count;
> > > -		}
> > > -	}
> > > -
> > >  	/*
> > > -	 * As we may completely rewrite the bound list whilst unbinding
> > > +	 * As we may completely rewrite the (un)bound list whilst unbinding
> > >  	 * (due to retiring requests) we have to strictly process only
> > >  	 * one element of the list at the time, and recheck the list
> > >  	 * on every iteration.
> > 
> > Is it still true that we could retire requests on this path? I see that
> > currently we will retire requests via:
> > i915_vma_unbind -> i915_gem_object_finish_gpu -> i915_gem_object_wait_rendering.
> > 
> > But we've taken the explicit request retirement out of the wait_rendering path.
> > Have I missed somewhere that it could still happen?
> 
> Yes, as wait_rendering doesn't retire all the requests, we may still have
> a request associated with the bo. This will then cause us to call
> i915_gem_object_retire() during i915_gem_object_put_pages() (through
> i915_gem_object_set_to_cpu_domain) thereby discard the last active
> reference and destroying the object unless we take care.

Ok, I see it now. Thanks. This one is
Reviewed-by: Brad Volkin <bradley.d.volkin@intel.com>

> -Chris
> 
> -- 
> Chris Wilson, Intel Open Source Technology Centre

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH 1/3] lib: Export interval_tree
  2014-03-18  9:56     ` Chris Wilson
@ 2014-04-25 14:17       ` Daniel Vetter
  0 siblings, 0 replies; 17+ messages in thread
From: Daniel Vetter @ 2014-04-25 14:17 UTC (permalink / raw)
  To: Chris Wilson, Michel Lespinasse, David Woodhouse, intel-gfx,
	Rik van Riel, Peter Zijlstra, Andrea Arcangeli, Andrew Morton

On Tue, Mar 18, 2014 at 09:56:12AM +0000, Chris Wilson wrote:
> On Mon, Mar 17, 2014 at 06:08:57AM -0700, Michel Lespinasse wrote:
> > On Mon, Mar 17, 2014 at 5:31 AM, David Woodhouse <dwmw2@infradead.org> wrote:
> > > On Mon, 2014-03-17 at 12:21 +0000, Chris Wilson wrote:
> > >> +EXPORT_SYMBOL(interval_tree_insert);
> > >> +EXPORT_SYMBOL(interval_tree_remove);
> > >> +EXPORT_SYMBOL(interval_tree_iter_first);
> > >> +EXPORT_SYMBOL(interval_tree_iter_next);
> > >
> > > I'd prefer to see EXPORT_SYMBOL_GPL for this.
> > 
> > I'm fine with it either way. I think it would help if you stated the
> > reason for your preference though ?
> 
> Nobody objects?
> 
> How about
> 
> v3: Prefer EXPORT_SYMBOL_GPL for new library routines
> 
> as the reason in the changelog for amending the patch?

Frobbed _GPL onto the exports while applying and pulled into my
drm-intel-next queue for 3.16. Thanks for the patch and reviews.
-Daniel
-- 
Daniel Vetter
Software Engineer, Intel Corporation
+41 (0) 79 365 57 48 - http://blog.ffwll.ch

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH 2/3] drm/i915: Do not call retire_requests from wait_for_rendering
  2014-04-24 16:20       ` Volkin, Bradley D
@ 2014-04-25 14:18         ` Daniel Vetter
  0 siblings, 0 replies; 17+ messages in thread
From: Daniel Vetter @ 2014-04-25 14:18 UTC (permalink / raw)
  To: Volkin, Bradley D; +Cc: intel-gfx

On Thu, Apr 24, 2014 at 09:20:35AM -0700, Volkin, Bradley D wrote:
> On Thu, Apr 24, 2014 at 02:22:39AM -0700, Chris Wilson wrote:
> > On Fri, Mar 28, 2014 at 03:58:25PM -0700, Volkin, Bradley D wrote:
> > > On Mon, Mar 17, 2014 at 05:21:55AM -0700, Chris Wilson wrote:
> > > > @@ -1949,58 +1956,58 @@ static unsigned long
> > > >  __i915_gem_shrink(struct drm_i915_private *dev_priv, long target,
> > > >  		  bool purgeable_only)
> > > >  {
> > > > -	struct list_head still_bound_list;
> > > > -	struct drm_i915_gem_object *obj, *next;
> > > > +	struct list_head still_in_list;
> > > > +	struct drm_i915_gem_object *obj;
> > > >  	unsigned long count = 0;
> > > >  
> > > > -	list_for_each_entry_safe(obj, next,
> > > > -				 &dev_priv->mm.unbound_list,
> > > > -				 global_list) {
> > > > -		if ((i915_gem_object_is_purgeable(obj) || !purgeable_only) &&
> > > > -		    i915_gem_object_put_pages(obj) == 0) {
> > > > -			count += obj->base.size >> PAGE_SHIFT;
> > > > -			if (count >= target)
> > > > -				return count;
> > > > -		}
> > > > -	}
> > > > -
> > > >  	/*
> > > > -	 * As we may completely rewrite the bound list whilst unbinding
> > > > +	 * As we may completely rewrite the (un)bound list whilst unbinding
> > > >  	 * (due to retiring requests) we have to strictly process only
> > > >  	 * one element of the list at the time, and recheck the list
> > > >  	 * on every iteration.
> > > 
> > > Is it still true that we could retire requests on this path? I see that
> > > currently we will retire requests via:
> > > i915_vma_unbind -> i915_gem_object_finish_gpu -> i915_gem_object_wait_rendering.
> > > 
> > > But we've taken the explicit request retirement out of the wait_rendering path.
> > > Have I missed somewhere that it could still happen?
> > 
> > Yes, as wait_rendering doesn't retire all the requests, we may still have
> > a request associated with the bo. This will then cause us to call
> > i915_gem_object_retire() during i915_gem_object_put_pages() (through
> > i915_gem_object_set_to_cpu_domain) thereby discard the last active
> > reference and destroying the object unless we take care.
> 
> Ok, I see it now. Thanks. This one is
> Reviewed-by: Brad Volkin <bradley.d.volkin@intel.com>

Queued for -next, thanks for the patch.
-Daniel
-- 
Daniel Vetter
Software Engineer, Intel Corporation
+41 (0) 79 365 57 48 - http://blog.ffwll.ch

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH 3/3] drm/i915: Introduce mapping of user pages into video memory (userptr) ioctl
  2014-04-17 22:18   ` Volkin, Bradley D
@ 2014-05-16 10:07     ` Chris Wilson
  0 siblings, 0 replies; 17+ messages in thread
From: Chris Wilson @ 2014-05-16 10:07 UTC (permalink / raw)
  To: Volkin, Bradley D; +Cc: intel-gfx, Goel, Akash

On Thu, Apr 17, 2014 at 03:18:48PM -0700, Volkin, Bradley D wrote:
> > +	union {
> > +		struct i915_gem_userptr {
> 
> Out of curiosity, what's the point of using both a union and a
> struct here, given that everything is within the struct?

Because I stuff other things into this union in other patches, and
compacting our object using a union has been on the cards since
introducing subclasses of objects.
 
> > +	struct drm_i915_gem_object *obj = to_intel_bo(gem_obj);
> > +
> > +	if (obj->userptr.mm && obj->userptr.mn == NULL)
> > +		return ERR_PTR(-EINVAL);
> > +
> 
> So this is basically saying we won't export an unsynchronized
> userptr? I don't think we've documented this or the other
> restrictions on unsynchronized userptrs (e.g. root only).

Yes. We cannot control the endpoint of a dmabuf and so we do not know if
the client would be able to control access to the bo accordingly (it
should after all appear to be a regular bo, except for the caveats about
snooped access and lack of CPU mmap support atm etc).

In fact, later I changed to this to obj->ops->export_dmabuf() callback
so that we can demote unsync'ed userptr for exporting.

> > +static void i915_gem_userptr_mn_invalidate_range_start(struct mmu_notifier *_mn,
> > +						       struct mm_struct *mm,
> > +						       unsigned long start,
> > +						       unsigned long end)
> > +{
> > +	struct i915_mmu_notifier *mn = container_of(_mn, struct i915_mmu_notifier, mn);
> > +	struct interval_tree_node *it = NULL;
> > +	unsigned long serial = 0;
> > +
> > +	end--; /* interval ranges are inclusive, but invalidate range is exclusive */
> > +	while (start < end) {
> > +		struct drm_i915_gem_object *obj;
> > +
> > +		obj = NULL;
> > +		spin_lock(&mn->lock);
> > +		if (serial == mn->serial)
> > +			it = interval_tree_iter_next(it, start, end);
> > +		else
> > +			it = interval_tree_iter_first(&mn->objects, start, end);
> 
> Is the issue here just that items being removed from the tree could make 'it'
> invalid on the next call to interval_tree_iter_next()? Or are we also worried
> about items being added to the tree? If items can be added, we've been advancing
> 'start', so I think there would be the potential for adding an item to the
> portion of the invalidate range that we've already processed. Whether that
> could happen given the locking or if the invalidation should apply to such an
> object, I'm not sure.

Both. We have to guard against modifications to the interval-tree, both
by ourselves (through freeing a no longer active bo) and other threads.
Only if the interval-tree was untouched whilst unlocked can we jump
ahead. I presume that the mm will handle serialisation of invalidate
against new users, so that we don't have to continuously walk over the
same range dropping user pages. (i.e. someone won't be able to mmap the
same arena until the earlier munmap is complete)

> > +static int
> > +i915_mmu_notifier_add(struct i915_mmu_notifier *mmu,
> > +		      struct i915_mmu_object *mn)
> > +{
> > +	struct interval_tree_node *it;
> > +	int ret;
> > +
> > +	/* Make sure we drop the final active reference (and thereby
> > +	 * remove the objects from the interval tree) before we do
> > +	 * the check for overlapping objects.
> > +	 */
> 
> I assume this comment refers to the retire_requests call, in which
> case I would move it down.

Once upon a time it was the comment for the function.

> > +	ret = i915_mutex_lock_interruptible(mmu->dev);
> > +	if (ret)
> > +		return ret;
> > +
> > +	i915_gem_retire_requests(mmu->dev);
> > +
> > +	/* Disallow overlapping userptr objects */
> > +	spin_lock(&mmu->lock);
> > +	it = interval_tree_iter_first(&mmu->objects,
> > +				      mn->it.start, mn->it.last);
> > +	if (it) {
> > +		struct drm_i915_gem_object *obj;
> > +
> > +		/* We only need to check the first object as it either
> > +		 * is idle (and in use elsewhere) or we try again in order
> > +		 * to give time for the gup-worker to run and flush its
> > +		 * object references. Afterwards if we find another
> > +		 * object that is idle (and so referenced elsewhere)
> > +		 * we know that the overlap with an pinned object is
> > +		 * genuine.
> > +		 */
> 
> I found this a bit confusing because it refers to the object being idle when
> it's really a question of the worker executing or not. In my mind, the object
> has the opposite idle/active state as the worker e.g. if the worker is active,
> the object is idle w.r.t. being used by the GPU. Hence the earlier suggestion
> r.e. renaming userptr.active.

/* We only need to check the first object in the range as it either
 * has cancelled gup work queued and we need to return back to the user
 * to give time for the gup-workers to flush their object references
 * upon which the object will be removed from the interval-tree, or the
 * the range is still in use by another client and the overlap is
 * invalid.
 */

> > +			uintptr_t ptr;
> > +			unsigned read_only :1;
> > +			unsigned active :4;
> > +#define I915_GEM_USERPTR_MAX_ACTIVE 15
> 
> Maybe s/active/active_workers/ or just 'workers'

Having rewritten the comment, I would go with workers.
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre

^ permalink raw reply	[flat|nested] 17+ messages in thread

* [PATCH 2/3] drm/i915: Do not call retire_requests from wait_for_rendering
  2014-02-21 18:45 Chris Wilson
@ 2014-02-21 18:45 ` Chris Wilson
  0 siblings, 0 replies; 17+ messages in thread
From: Chris Wilson @ 2014-02-21 18:45 UTC (permalink / raw)
  To: intel-gfx

A common issue we have is that retiring requests causes recursion
through GTT manipulation or page table manipulation which we can only
handle at very specific points. However, to maintain internal
consistency (enforced through our sanity checks on write_domain at
various points in the GEM object lifecycle) we do need to retire the
object prior to marking it with a new write_domain, and also clear the
write_domain for the implicit flush following a batch.

Note that this then allows the unbound objects to still be on the active
lists, and so care must be taken when removing objects from unbound lists
(similar to the caveats we face processing the bound lists).

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_gem.c            | 111 ++++++++++++++++-------------
 drivers/gpu/drm/i915/i915_gem_execbuffer.c |   3 +
 2 files changed, 65 insertions(+), 49 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 901f953a0f4c..3fdf74565356 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -44,6 +44,9 @@ static void i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *o
 static __must_check int
 i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj,
 			       bool readonly);
+static void
+i915_gem_object_retire(struct drm_i915_gem_object *obj);
+
 static int i915_gem_phys_pwrite(struct drm_device *dev,
 				struct drm_i915_gem_object *obj,
 				struct drm_i915_gem_pwrite *args,
@@ -529,6 +532,8 @@ i915_gem_shmem_pread(struct drm_device *dev,
 		ret = i915_gem_object_wait_rendering(obj, true);
 		if (ret)
 			return ret;
+
+		i915_gem_object_retire(obj);
 	}
 
 	ret = i915_gem_object_get_pages(obj);
@@ -843,6 +848,8 @@ i915_gem_shmem_pwrite(struct drm_device *dev,
 		ret = i915_gem_object_wait_rendering(obj, false);
 		if (ret)
 			return ret;
+
+		i915_gem_object_retire(obj);
 	}
 	/* Same trick applies to invalidate partially written cachelines read
 	 * before writing. */
@@ -1233,7 +1240,8 @@ static int
 i915_gem_object_wait_rendering__tail(struct drm_i915_gem_object *obj,
 				     struct intel_ring_buffer *ring)
 {
-	i915_gem_retire_requests_ring(ring);
+	if (!obj->active)
+		return 0;
 
 	/* Manually manage the write flush as we may have not yet
 	 * retired the buffer.
@@ -1243,7 +1251,6 @@ i915_gem_object_wait_rendering__tail(struct drm_i915_gem_object *obj,
 	 * we know we have passed the last write.
 	 */
 	obj->last_write_seqno = 0;
-	obj->base.write_domain &= ~I915_GEM_GPU_DOMAINS;
 
 	return 0;
 }
@@ -1885,58 +1892,58 @@ static unsigned long
 __i915_gem_shrink(struct drm_i915_private *dev_priv, long target,
 		  bool purgeable_only)
 {
-	struct list_head still_bound_list;
-	struct drm_i915_gem_object *obj, *next;
+	struct list_head still_in_list;
+	struct drm_i915_gem_object *obj;
 	unsigned long count = 0;
 
-	list_for_each_entry_safe(obj, next,
-				 &dev_priv->mm.unbound_list,
-				 global_list) {
-		if ((i915_gem_object_is_purgeable(obj) || !purgeable_only) &&
-		    i915_gem_object_put_pages(obj) == 0) {
-			count += obj->base.size >> PAGE_SHIFT;
-			if (count >= target)
-				return count;
-		}
-	}
-
 	/*
-	 * As we may completely rewrite the bound list whilst unbinding
+	 * As we may completely rewrite the (un)bound list whilst unbinding
 	 * (due to retiring requests) we have to strictly process only
 	 * one element of the list at the time, and recheck the list
 	 * on every iteration.
+	 *
+	 * In particular, we must hold a reference whilst removing the
+	 * object as we may end up waiting for and/or retiring the objects.
+	 * This might release the final reference (held by the active list)
+	 * and result in the object being freed from under us. This is
+	 * similar to the precautions the eviction code must take whilst
+	 * removing objects.
+	 *
+	 * Also note that although these lists do not hold a reference to
+	 * the object we can safely grab one here: The final object
+	 * unreferencing and the bound_list are both protected by the
+	 * dev->struct_mutex and so we won't ever be able to observe an
+	 * object on the bound_list with a reference count equals 0.
 	 */
-	INIT_LIST_HEAD(&still_bound_list);
+	INIT_LIST_HEAD(&still_in_list);
+	while (count < target && !list_empty(&dev_priv->mm.unbound_list)) {
+		obj = list_first_entry(&dev_priv->mm.unbound_list,
+				       typeof(*obj), global_list);
+		list_move_tail(&obj->global_list, &still_in_list);
+
+		if (!i915_gem_object_is_purgeable(obj) && purgeable_only)
+			continue;
+
+		drm_gem_object_reference(&obj->base);
+
+		if (i915_gem_object_put_pages(obj) == 0)
+			count += obj->base.size >> PAGE_SHIFT;
+
+		drm_gem_object_unreference(&obj->base);
+	}
+	list_splice(&still_in_list, &dev_priv->mm.unbound_list);
+
+	INIT_LIST_HEAD(&still_in_list);
 	while (count < target && !list_empty(&dev_priv->mm.bound_list)) {
 		struct i915_vma *vma, *v;
 
 		obj = list_first_entry(&dev_priv->mm.bound_list,
 				       typeof(*obj), global_list);
-		list_move_tail(&obj->global_list, &still_bound_list);
+		list_move_tail(&obj->global_list, &still_in_list);
 
 		if (!i915_gem_object_is_purgeable(obj) && purgeable_only)
 			continue;
 
-		/*
-		 * Hold a reference whilst we unbind this object, as we may
-		 * end up waiting for and retiring requests. This might
-		 * release the final reference (held by the active list)
-		 * and result in the object being freed from under us.
-		 * in this object being freed.
-		 *
-		 * Note 1: Shrinking the bound list is special since only active
-		 * (and hence bound objects) can contain such limbo objects, so
-		 * we don't need special tricks for shrinking the unbound list.
-		 * The only other place where we have to be careful with active
-		 * objects suddenly disappearing due to retiring requests is the
-		 * eviction code.
-		 *
-		 * Note 2: Even though the bound list doesn't hold a reference
-		 * to the object we can safely grab one here: The final object
-		 * unreferencing and the bound_list are both protected by the
-		 * dev->struct_mutex and so we won't ever be able to observe an
-		 * object on the bound_list with a reference count equals 0.
-		 */
 		drm_gem_object_reference(&obj->base);
 
 		list_for_each_entry_safe(vma, v, &obj->vma_list, vma_link)
@@ -1948,7 +1955,7 @@ __i915_gem_shrink(struct drm_i915_private *dev_priv, long target,
 
 		drm_gem_object_unreference(&obj->base);
 	}
-	list_splice(&still_bound_list, &dev_priv->mm.bound_list);
+	list_splice(&still_in_list, &dev_priv->mm.bound_list);
 
 	return count;
 }
@@ -1962,17 +1969,8 @@ i915_gem_purge(struct drm_i915_private *dev_priv, long target)
 static unsigned long
 i915_gem_shrink_all(struct drm_i915_private *dev_priv)
 {
-	struct drm_i915_gem_object *obj, *next;
-	long freed = 0;
-
 	i915_gem_evict_everything(dev_priv->dev);
-
-	list_for_each_entry_safe(obj, next, &dev_priv->mm.unbound_list,
-				 global_list) {
-		if (i915_gem_object_put_pages(obj) == 0)
-			freed += obj->base.size >> PAGE_SHIFT;
-	}
-	return freed;
+	return __i915_gem_shrink(dev_priv, LONG_MAX, false);
 }
 
 static int
@@ -2189,6 +2187,19 @@ i915_gem_object_move_to_inactive(struct drm_i915_gem_object *obj)
 	WARN_ON(i915_verify_lists(dev));
 }
 
+static void
+i915_gem_object_retire(struct drm_i915_gem_object *obj)
+{
+	struct intel_ring_buffer *ring = obj->ring;
+
+	if (ring == NULL)
+		return;
+
+	if (i915_seqno_passed(ring->get_seqno(ring, true),
+			      obj->last_read_seqno))
+		i915_gem_object_move_to_inactive(obj);
+}
+
 static int
 i915_gem_init_seqno(struct drm_device *dev, u32 seqno)
 {
@@ -3527,6 +3538,7 @@ i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write)
 	if (ret)
 		return ret;
 
+	i915_gem_object_retire(obj);
 	i915_gem_object_flush_cpu_write_domain(obj, false);
 
 	/* Serialise direct access to this object with the barriers for
@@ -3849,6 +3861,7 @@ i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write)
 	if (ret)
 		return ret;
 
+	i915_gem_object_retire(obj);
 	i915_gem_object_flush_gtt_write_domain(obj);
 
 	old_write_domain = obj->base.write_domain;
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index d7229ad2bd22..46caa091c428 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -955,6 +955,9 @@ i915_gem_execbuffer_move_to_active(struct list_head *vmas,
 			if (i915_gem_obj_ggtt_bound(obj) &&
 			    i915_gem_obj_to_ggtt(obj)->pin_count)
 				intel_mark_fb_busy(obj, ring);
+
+			/* update for the implicit flush after a batch */
+			obj->base.write_domain &= ~I915_GEM_GPU_DOMAINS;
 		}
 
 		trace_i915_gem_object_change_domain(obj, old_read, old_write);
-- 
1.9.0

^ permalink raw reply related	[flat|nested] 17+ messages in thread

* [PATCH 2/3] drm/i915: Do not call retire_requests from wait_for_rendering
  2014-01-28 10:34 New API for creating bo from user pages Chris Wilson
@ 2014-01-28 10:34 ` Chris Wilson
  0 siblings, 0 replies; 17+ messages in thread
From: Chris Wilson @ 2014-01-28 10:34 UTC (permalink / raw)
  To: intel-gfx

A common issue we have is that retiring requests causes recursion
through GTT manipulation or page table manipulation which we can only
handle at very specific points. However, to maintain internal
consistency (enforced through our sanity checks on write_domain at
various points in the GEM object lifecycle) we do need to retire the
object prior to marking it with a new write_domain, and also clear the
write_domain for the implicit flush following a batch.

Note that this then allows the unbound objects to still be on the active
lists, and so care must be taken when removing objects from unbound lists
(similar to the caveats we face processing the bound lists).

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_gem.c            | 100 ++++++++++++++++++-----------
 drivers/gpu/drm/i915/i915_gem_execbuffer.c |   3 +
 2 files changed, 64 insertions(+), 39 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 8ccb46daa427..de79dffe15f1 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -43,12 +43,15 @@ static void i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *o
 static __must_check int
 i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj,
 			       bool readonly);
+static void
+i915_gem_object_retire(struct drm_i915_gem_object *obj);
 static __must_check int
 i915_gem_object_bind_to_vm(struct drm_i915_gem_object *obj,
 			   struct i915_address_space *vm,
 			   unsigned alignment,
 			   bool map_and_fenceable,
 			   bool nonblocking);
+
 static int i915_gem_phys_pwrite(struct drm_device *dev,
 				struct drm_i915_gem_object *obj,
 				struct drm_i915_gem_pwrite *args,
@@ -535,6 +538,8 @@ i915_gem_shmem_pread(struct drm_device *dev,
 		ret = i915_gem_object_wait_rendering(obj, true);
 		if (ret)
 			return ret;
+
+		i915_gem_object_retire(obj);
 	}
 
 	ret = i915_gem_object_get_pages(obj);
@@ -849,6 +854,8 @@ i915_gem_shmem_pwrite(struct drm_device *dev,
 		ret = i915_gem_object_wait_rendering(obj, false);
 		if (ret)
 			return ret;
+
+		i915_gem_object_retire(obj);
 	}
 	/* Same trick applies to invalidate partially written cachelines read
 	 * before writing. */
@@ -1238,7 +1245,8 @@ static int
 i915_gem_object_wait_rendering__tail(struct drm_i915_gem_object *obj,
 				     struct intel_ring_buffer *ring)
 {
-	i915_gem_retire_requests_ring(ring);
+	if (!obj->active)
+		return 0;
 
 	/* Manually manage the write flush as we may have not yet
 	 * retired the buffer.
@@ -1248,7 +1256,6 @@ i915_gem_object_wait_rendering__tail(struct drm_i915_gem_object *obj,
 	 * we know we have passed the last write.
 	 */
 	obj->last_write_seqno = 0;
-	obj->base.write_domain &= ~I915_GEM_GPU_DOMAINS;
 
 	return 0;
 }
@@ -1856,58 +1863,58 @@ static unsigned long
 __i915_gem_shrink(struct drm_i915_private *dev_priv, long target,
 		  bool purgeable_only)
 {
-	struct list_head still_bound_list;
-	struct drm_i915_gem_object *obj, *next;
+	struct list_head still_in_list;
+	struct drm_i915_gem_object *obj;
 	unsigned long count = 0;
 
-	list_for_each_entry_safe(obj, next,
-				 &dev_priv->mm.unbound_list,
-				 global_list) {
-		if ((i915_gem_object_is_purgeable(obj) || !purgeable_only) &&
-		    i915_gem_object_put_pages(obj) == 0) {
-			count += obj->base.size >> PAGE_SHIFT;
-			if (count >= target)
-				return count;
-		}
-	}
-
 	/*
-	 * As we may completely rewrite the bound list whilst unbinding
+	 * As we may completely rewrite the (un)bound list whilst unbinding
 	 * (due to retiring requests) we have to strictly process only
 	 * one element of the list at the time, and recheck the list
 	 * on every iteration.
+	 *
+	 * In particular, we must hold a reference whilst removing the
+	 * object as we may end up waiting for and/or retiring the objects.
+	 * This might release the final reference (held by the active list)
+	 * and result in the object being freed from under us. This is
+	 * similar to the precautions the eviction code must take whilst
+	 * removing objects.
+	 *
+	 * Also note that although these lists do not hold a reference to
+	 * the object we can safely grab one here: The final object
+	 * unreferencing and the bound_list are both protected by the
+	 * dev->struct_mutex and so we won't ever be able to observe an
+	 * object on the bound_list with a reference count equals 0.
 	 */
-	INIT_LIST_HEAD(&still_bound_list);
+	INIT_LIST_HEAD(&still_in_list);
+	while (count < target && !list_empty(&dev_priv->mm.unbound_list)) {
+		obj = list_first_entry(&dev_priv->mm.unbound_list,
+				       typeof(*obj), global_list);
+		list_move_tail(&obj->global_list, &still_in_list);
+
+		if (!i915_gem_object_is_purgeable(obj) && purgeable_only)
+			continue;
+
+		drm_gem_object_reference(&obj->base);
+
+		if (i915_gem_object_put_pages(obj) == 0)
+			count += obj->base.size >> PAGE_SHIFT;
+
+		drm_gem_object_unreference(&obj->base);
+	}
+	list_splice(&still_in_list, &dev_priv->mm.unbound_list);
+
+	INIT_LIST_HEAD(&still_in_list);
 	while (count < target && !list_empty(&dev_priv->mm.bound_list)) {
 		struct i915_vma *vma, *v;
 
 		obj = list_first_entry(&dev_priv->mm.bound_list,
 				       typeof(*obj), global_list);
-		list_move_tail(&obj->global_list, &still_bound_list);
+		list_move_tail(&obj->global_list, &still_in_list);
 
 		if (!i915_gem_object_is_purgeable(obj) && purgeable_only)
 			continue;
 
-		/*
-		 * Hold a reference whilst we unbind this object, as we may
-		 * end up waiting for and retiring requests. This might
-		 * release the final reference (held by the active list)
-		 * and result in the object being freed from under us.
-		 * in this object being freed.
-		 *
-		 * Note 1: Shrinking the bound list is special since only active
-		 * (and hence bound objects) can contain such limbo objects, so
-		 * we don't need special tricks for shrinking the unbound list.
-		 * The only other place where we have to be careful with active
-		 * objects suddenly disappearing due to retiring requests is the
-		 * eviction code.
-		 *
-		 * Note 2: Even though the bound list doesn't hold a reference
-		 * to the object we can safely grab one here: The final object
-		 * unreferencing and the bound_list are both protected by the
-		 * dev->struct_mutex and so we won't ever be able to observe an
-		 * object on the bound_list with a reference count equals 0.
-		 */
 		drm_gem_object_reference(&obj->base);
 
 		list_for_each_entry_safe(vma, v, &obj->vma_list, vma_link)
@@ -1919,7 +1926,7 @@ __i915_gem_shrink(struct drm_i915_private *dev_priv, long target,
 
 		drm_gem_object_unreference(&obj->base);
 	}
-	list_splice(&still_bound_list, &dev_priv->mm.bound_list);
+	list_splice(&still_in_list, &dev_priv->mm.bound_list);
 
 	return count;
 }
@@ -2160,6 +2167,19 @@ i915_gem_object_move_to_inactive(struct drm_i915_gem_object *obj)
 	WARN_ON(i915_verify_lists(dev));
 }
 
+static void
+i915_gem_object_retire(struct drm_i915_gem_object *obj)
+{
+	struct intel_ring_buffer *ring = obj->ring;
+
+	if (ring == NULL)
+		return;
+
+	if (i915_seqno_passed(ring->get_seqno(ring, true),
+			      obj->last_read_seqno))
+		i915_gem_object_move_to_inactive(obj);
+}
+
 static int
 i915_gem_init_seqno(struct drm_device *dev, u32 seqno)
 {
@@ -3581,6 +3601,7 @@ i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write)
 	if (ret)
 		return ret;
 
+	i915_gem_object_retire(obj);
 	i915_gem_object_flush_cpu_write_domain(obj, false);
 
 	/* Serialise direct access to this object with the barriers for
@@ -3901,6 +3922,7 @@ i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write)
 	if (ret)
 		return ret;
 
+	i915_gem_object_retire(obj);
 	i915_gem_object_flush_gtt_write_domain(obj);
 
 	old_write_domain = obj->base.write_domain;
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 032def901f98..f67112001703 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -953,6 +953,9 @@ i915_gem_execbuffer_move_to_active(struct list_head *vmas,
 			if (i915_gem_obj_ggtt_bound(obj) &&
 			    i915_gem_obj_to_ggtt(obj)->pin_count)
 				intel_mark_fb_busy(obj, ring);
+
+			/* update for the implicit flush after a batch */
+			obj->base.write_domain &= ~I915_GEM_GPU_DOMAINS;
 		}
 
 		trace_i915_gem_object_change_domain(obj, old_read, old_write);
-- 
1.8.5.3

^ permalink raw reply related	[flat|nested] 17+ messages in thread

* [PATCH 2/3] drm/i915: Do not call retire_requests from wait_for_rendering
  2014-01-21 15:07 [PATCH 1/3] lib: Always build and export interval_tree Chris Wilson
@ 2014-01-21 15:07 ` Chris Wilson
  0 siblings, 0 replies; 17+ messages in thread
From: Chris Wilson @ 2014-01-21 15:07 UTC (permalink / raw)
  To: intel-gfx; +Cc: Akash Goel

A common issue we have is that retiring requests causes recursion
through GTT manipulation or page table manipulation which we can only
handle at very specific points. However, to maintain internal
consistency (enforced through our sanity checks on write_domain at
various points in the GEM object lifecycle) we do need to retire the
object prior to marking it with a new write_domain, and also clear the
write_domain for the implicit flush following a batch.

Note that this then allows the unbound objects to still be on the active
lists, and so care must be taken when removing objects from unbound lists
(similar to the caveats we face processing the bound lists).

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_gem.c            | 100 ++++++++++++++++++-----------
 drivers/gpu/drm/i915/i915_gem_execbuffer.c |   3 +
 2 files changed, 64 insertions(+), 39 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 0e8c9bb6c3da..8912aaa7118a 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -43,12 +43,15 @@ static void i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *o
 static __must_check int
 i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj,
 			       bool readonly);
+static void
+i915_gem_object_retire(struct drm_i915_gem_object *obj);
 static __must_check int
 i915_gem_object_bind_to_vm(struct drm_i915_gem_object *obj,
 			   struct i915_address_space *vm,
 			   unsigned alignment,
 			   bool map_and_fenceable,
 			   bool nonblocking);
+
 static int i915_gem_phys_pwrite(struct drm_device *dev,
 				struct drm_i915_gem_object *obj,
 				struct drm_i915_gem_pwrite *args,
@@ -535,6 +538,8 @@ i915_gem_shmem_pread(struct drm_device *dev,
 		ret = i915_gem_object_wait_rendering(obj, true);
 		if (ret)
 			return ret;
+
+		i915_gem_object_retire(obj);
 	}
 
 	ret = i915_gem_object_get_pages(obj);
@@ -849,6 +854,8 @@ i915_gem_shmem_pwrite(struct drm_device *dev,
 		ret = i915_gem_object_wait_rendering(obj, false);
 		if (ret)
 			return ret;
+
+		i915_gem_object_retire(obj);
 	}
 	/* Same trick applies to invalidate partially written cachelines read
 	 * before writing. */
@@ -1238,7 +1245,8 @@ static int
 i915_gem_object_wait_rendering__tail(struct drm_i915_gem_object *obj,
 				     struct intel_ring_buffer *ring)
 {
-	i915_gem_retire_requests_ring(ring);
+	if (!obj->active)
+		return 0;
 
 	/* Manually manage the write flush as we may have not yet
 	 * retired the buffer.
@@ -1248,7 +1256,6 @@ i915_gem_object_wait_rendering__tail(struct drm_i915_gem_object *obj,
 	 * we know we have passed the last write.
 	 */
 	obj->last_write_seqno = 0;
-	obj->base.write_domain &= ~I915_GEM_GPU_DOMAINS;
 
 	return 0;
 }
@@ -1856,58 +1863,58 @@ static unsigned long
 __i915_gem_shrink(struct drm_i915_private *dev_priv, long target,
 		  bool purgeable_only)
 {
-	struct list_head still_bound_list;
-	struct drm_i915_gem_object *obj, *next;
+	struct list_head still_in_list;
+	struct drm_i915_gem_object *obj;
 	unsigned long count = 0;
 
-	list_for_each_entry_safe(obj, next,
-				 &dev_priv->mm.unbound_list,
-				 global_list) {
-		if ((i915_gem_object_is_purgeable(obj) || !purgeable_only) &&
-		    i915_gem_object_put_pages(obj) == 0) {
-			count += obj->base.size >> PAGE_SHIFT;
-			if (count >= target)
-				return count;
-		}
-	}
-
 	/*
-	 * As we may completely rewrite the bound list whilst unbinding
+	 * As we may completely rewrite the (un)bound list whilst unbinding
 	 * (due to retiring requests) we have to strictly process only
 	 * one element of the list at the time, and recheck the list
 	 * on every iteration.
+	 *
+	 * In particular, we must hold a reference whilst removing the
+	 * object as we may end up waiting for and/or retiring the objects.
+	 * This might release the final reference (held by the active list)
+	 * and result in the object being freed from under us. This is
+	 * similar to the precautions the eviction code must take whilst
+	 * removing objects.
+	 *
+	 * Also note that although these lists do not hold a reference to
+	 * the object we can safely grab one here: The final object
+	 * unreferencing and the bound_list are both protected by the
+	 * dev->struct_mutex and so we won't ever be able to observe an
+	 * object on the bound_list with a reference count equals 0.
 	 */
-	INIT_LIST_HEAD(&still_bound_list);
+	INIT_LIST_HEAD(&still_in_list);
+	while (count < target && !list_empty(&dev_priv->mm.unbound_list)) {
+		obj = list_first_entry(&dev_priv->mm.unbound_list,
+				       typeof(*obj), global_list);
+		list_move_tail(&obj->global_list, &still_in_list);
+
+		if (!i915_gem_object_is_purgeable(obj) && purgeable_only)
+			continue;
+
+		drm_gem_object_reference(&obj->base);
+
+		if (i915_gem_object_put_pages(obj) == 0)
+			count += obj->base.size >> PAGE_SHIFT;
+
+		drm_gem_object_unreference(&obj->base);
+	}
+	list_splice(&still_in_list, &dev_priv->mm.unbound_list);
+
+	INIT_LIST_HEAD(&still_in_list);
 	while (count < target && !list_empty(&dev_priv->mm.bound_list)) {
 		struct i915_vma *vma, *v;
 
 		obj = list_first_entry(&dev_priv->mm.bound_list,
 				       typeof(*obj), global_list);
-		list_move_tail(&obj->global_list, &still_bound_list);
+		list_move_tail(&obj->global_list, &still_in_list);
 
 		if (!i915_gem_object_is_purgeable(obj) && purgeable_only)
 			continue;
 
-		/*
-		 * Hold a reference whilst we unbind this object, as we may
-		 * end up waiting for and retiring requests. This might
-		 * release the final reference (held by the active list)
-		 * and result in the object being freed from under us.
-		 * in this object being freed.
-		 *
-		 * Note 1: Shrinking the bound list is special since only active
-		 * (and hence bound objects) can contain such limbo objects, so
-		 * we don't need special tricks for shrinking the unbound list.
-		 * The only other place where we have to be careful with active
-		 * objects suddenly disappearing due to retiring requests is the
-		 * eviction code.
-		 *
-		 * Note 2: Even though the bound list doesn't hold a reference
-		 * to the object we can safely grab one here: The final object
-		 * unreferencing and the bound_list are both protected by the
-		 * dev->struct_mutex and so we won't ever be able to observe an
-		 * object on the bound_list with a reference count equals 0.
-		 */
 		drm_gem_object_reference(&obj->base);
 
 		list_for_each_entry_safe(vma, v, &obj->vma_list, vma_link)
@@ -1919,7 +1926,7 @@ __i915_gem_shrink(struct drm_i915_private *dev_priv, long target,
 
 		drm_gem_object_unreference(&obj->base);
 	}
-	list_splice(&still_bound_list, &dev_priv->mm.bound_list);
+	list_splice(&still_in_list, &dev_priv->mm.bound_list);
 
 	return count;
 }
@@ -2160,6 +2167,19 @@ i915_gem_object_move_to_inactive(struct drm_i915_gem_object *obj)
 	WARN_ON(i915_verify_lists(dev));
 }
 
+static void
+i915_gem_object_retire(struct drm_i915_gem_object *obj)
+{
+	struct intel_ring_buffer *ring = obj->ring;
+
+	if (ring == NULL)
+		return;
+
+	if (i915_seqno_passed(ring->get_seqno(ring, true),
+			      obj->last_read_seqno))
+		i915_gem_object_move_to_inactive(obj);
+}
+
 static int
 i915_gem_init_seqno(struct drm_device *dev, u32 seqno)
 {
@@ -3581,6 +3601,7 @@ i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write)
 	if (ret)
 		return ret;
 
+	i915_gem_object_retire(obj);
 	i915_gem_object_flush_cpu_write_domain(obj, false);
 
 	/* Serialise direct access to this object with the barriers for
@@ -3901,6 +3922,7 @@ i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write)
 	if (ret)
 		return ret;
 
+	i915_gem_object_retire(obj);
 	i915_gem_object_flush_gtt_write_domain(obj);
 
 	old_write_domain = obj->base.write_domain;
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 0c6bcff740c2..ce0529d48864 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -953,6 +953,9 @@ i915_gem_execbuffer_move_to_active(struct list_head *vmas,
 			if (i915_gem_obj_ggtt_bound(obj) &&
 			    i915_gem_obj_to_ggtt(obj)->pin_count)
 				intel_mark_fb_busy(obj, ring);
+
+			/* update for the implicit flush after a batch */
+			obj->base.write_domain &= ~I915_GEM_GPU_DOMAINS;
 		}
 
 		trace_i915_gem_object_change_domain(obj, old_read, old_write);
-- 
1.8.5.3

^ permalink raw reply related	[flat|nested] 17+ messages in thread

end of thread, other threads:[~2014-05-16 10:07 UTC | newest]

Thread overview: 17+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2014-03-17 12:21 [PATCH 1/3] lib: Export interval_tree Chris Wilson
2014-03-17 12:21 ` [PATCH 2/3] drm/i915: Do not call retire_requests from wait_for_rendering Chris Wilson
2014-03-28 22:58   ` Volkin, Bradley D
2014-04-17 22:20     ` Volkin, Bradley D
2014-04-24  9:22     ` Chris Wilson
2014-04-24 16:20       ` Volkin, Bradley D
2014-04-25 14:18         ` Daniel Vetter
2014-03-17 12:21 ` [PATCH 3/3] drm/i915: Introduce mapping of user pages into video memory (userptr) ioctl Chris Wilson
2014-04-17 22:18   ` Volkin, Bradley D
2014-05-16 10:07     ` Chris Wilson
2014-03-17 12:31 ` [PATCH 1/3] lib: Export interval_tree David Woodhouse
2014-03-17 13:08   ` Michel Lespinasse
2014-03-18  9:56     ` Chris Wilson
2014-04-25 14:17       ` Daniel Vetter
  -- strict thread matches above, loose matches on Subject: below --
2014-02-21 18:45 Chris Wilson
2014-02-21 18:45 ` [PATCH 2/3] drm/i915: Do not call retire_requests from wait_for_rendering Chris Wilson
2014-01-28 10:34 New API for creating bo from user pages Chris Wilson
2014-01-28 10:34 ` [PATCH 2/3] drm/i915: Do not call retire_requests from wait_for_rendering Chris Wilson
2014-01-21 15:07 [PATCH 1/3] lib: Always build and export interval_tree Chris Wilson
2014-01-21 15:07 ` [PATCH 2/3] drm/i915: Do not call retire_requests from wait_for_rendering Chris Wilson

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.