All of lore.kernel.org
 help / color / mirror / Atom feed
* New API for creating bo from user pages
@ 2014-01-28 10:34 Chris Wilson
  2014-01-28 10:34 ` [PATCH 1/3] lib: Export interval_tree Chris Wilson
                   ` (2 more replies)
  0 siblings, 3 replies; 34+ messages in thread
From: Chris Wilson @ 2014-01-28 10:34 UTC (permalink / raw)
  To: intel-gfx

This is a fairly hairy piece of API - the implications of transforming
an ordinary portion of process memory into asynchronous DMA arena for
the GPU opens up a can of worms regarding memory protection and
inter-process sharing. On the other hand, it is a distinct advantage of
UMA that we have failed to take advantage of (since the beginning).

Many thanks to Tvrtko Ursulin for providing an i-g-t test case that
seems to have shaken out most of the bugs.
-Chris

^ permalink raw reply	[flat|nested] 34+ messages in thread

* [PATCH 1/3] lib: Export interval_tree
  2014-01-28 10:34 New API for creating bo from user pages Chris Wilson
@ 2014-01-28 10:34 ` Chris Wilson
  2014-01-28 10:34 ` [PATCH 2/3] drm/i915: Do not call retire_requests from wait_for_rendering Chris Wilson
  2014-01-28 10:34 ` [PATCH 3/3] drm/i915: Introduce mapping of user pages into video memory (userptr) ioctl Chris Wilson
  2 siblings, 0 replies; 34+ messages in thread
From: Chris Wilson @ 2014-01-28 10:34 UTC (permalink / raw)
  To: intel-gfx
  Cc: Andrea Arcangeli, Rik van Riel, Peter Zijlstra, Andrew Morton,
	Michel Lespinasse

lib/interval_tree.c provides a simple interface for an interval-tree
(an augmented red-black tree) but is only built when testing the generic
macros for building interval-trees. For drivers with modest needs,
export the simple interval-tree library as is.

v2: Lots of help from Michel Lespinasse to only compile the code
    as required:
    - make INTERVAL_TREE a config option
    - make INTERVAL_TREE_TEST select the library functions
      and sanitize the filenames & Makefile
    - prepare interval_tree for being built as a module if required

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Michel Lespinasse <walken@google.com>
Cc: Rik van Riel <riel@redhat.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: David Woodhouse <dwmw2@infradead.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Reviewed-by: Michel Lespinasse <walken@google.com>
[Acked for inclusion via drm/i915 by Andrew Morton.]
---
 lib/Kconfig                   |  14 ++++++
 lib/Kconfig.debug             |   1 +
 lib/Makefile                  |   3 +-
 lib/interval_tree.c           |   6 +++
 lib/interval_tree_test.c      | 106 ++++++++++++++++++++++++++++++++++++++++++
 lib/interval_tree_test_main.c | 106 ------------------------------------------
 6 files changed, 128 insertions(+), 108 deletions(-)
 create mode 100644 lib/interval_tree_test.c
 delete mode 100644 lib/interval_tree_test_main.c

diff --git a/lib/Kconfig b/lib/Kconfig
index 991c98bc4a3f..04270aae4b60 100644
--- a/lib/Kconfig
+++ b/lib/Kconfig
@@ -322,6 +322,20 @@ config TEXTSEARCH_FSM
 config BTREE
 	boolean
 
+config INTERVAL_TREE
+	boolean
+	help
+	  Simple, embeddable, interval-tree. Can find the start of an
+	  overlapping range in log(n) time and then iterate over all
+	  overlapping nodes. The algorithm is implemented as an
+	  augmented rbtree.
+
+	  See:
+
+		Documentation/rbtree.txt
+
+	  for more information.
+
 config ASSOCIATIVE_ARRAY
 	bool
 	help
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index db25707aa41b..a29e9b84f102 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -1478,6 +1478,7 @@ config RBTREE_TEST
 config INTERVAL_TREE_TEST
 	tristate "Interval tree test"
 	depends on m && DEBUG_KERNEL
+	select INTERVAL_TREE
 	help
 	  A benchmark measuring the performance of the interval tree library
 
diff --git a/lib/Makefile b/lib/Makefile
index a459c31e8c6b..fc04948548ad 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -47,6 +47,7 @@ CFLAGS_hweight.o = $(subst $(quote),,$(CONFIG_ARCH_HWEIGHT_CFLAGS))
 obj-$(CONFIG_GENERIC_HWEIGHT) += hweight.o
 
 obj-$(CONFIG_BTREE) += btree.o
+obj-$(CONFIG_INTERVAL_TREE) += interval_tree.o
 obj-$(CONFIG_ASSOCIATIVE_ARRAY) += assoc_array.o
 obj-$(CONFIG_DEBUG_PREEMPT) += smp_processor_id.o
 obj-$(CONFIG_DEBUG_LIST) += list_debug.o
@@ -152,8 +153,6 @@ lib-$(CONFIG_LIBFDT) += $(libfdt_files)
 obj-$(CONFIG_RBTREE_TEST) += rbtree_test.o
 obj-$(CONFIG_INTERVAL_TREE_TEST) += interval_tree_test.o
 
-interval_tree_test-objs := interval_tree_test_main.o interval_tree.o
-
 obj-$(CONFIG_PERCPU_TEST) += percpu_test.o
 
 obj-$(CONFIG_ASN1) += asn1_decoder.o
diff --git a/lib/interval_tree.c b/lib/interval_tree.c
index e6eb406f2d65..e4109f624e51 100644
--- a/lib/interval_tree.c
+++ b/lib/interval_tree.c
@@ -1,6 +1,7 @@
 #include <linux/init.h>
 #include <linux/interval_tree.h>
 #include <linux/interval_tree_generic.h>
+#include <linux/module.h>
 
 #define START(node) ((node)->start)
 #define LAST(node)  ((node)->last)
@@ -8,3 +9,8 @@
 INTERVAL_TREE_DEFINE(struct interval_tree_node, rb,
 		     unsigned long, __subtree_last,
 		     START, LAST,, interval_tree)
+
+EXPORT_SYMBOL(interval_tree_insert);
+EXPORT_SYMBOL(interval_tree_remove);
+EXPORT_SYMBOL(interval_tree_iter_first);
+EXPORT_SYMBOL(interval_tree_iter_next);
diff --git a/lib/interval_tree_test.c b/lib/interval_tree_test.c
new file mode 100644
index 000000000000..245900b98c8e
--- /dev/null
+++ b/lib/interval_tree_test.c
@@ -0,0 +1,106 @@
+#include <linux/module.h>
+#include <linux/interval_tree.h>
+#include <linux/random.h>
+#include <asm/timex.h>
+
+#define NODES        100
+#define PERF_LOOPS   100000
+#define SEARCHES     100
+#define SEARCH_LOOPS 10000
+
+static struct rb_root root = RB_ROOT;
+static struct interval_tree_node nodes[NODES];
+static u32 queries[SEARCHES];
+
+static struct rnd_state rnd;
+
+static inline unsigned long
+search(unsigned long query, struct rb_root *root)
+{
+	struct interval_tree_node *node;
+	unsigned long results = 0;
+
+	for (node = interval_tree_iter_first(root, query, query); node;
+	     node = interval_tree_iter_next(node, query, query))
+		results++;
+	return results;
+}
+
+static void init(void)
+{
+	int i;
+	for (i = 0; i < NODES; i++) {
+		u32 a = prandom_u32_state(&rnd);
+		u32 b = prandom_u32_state(&rnd);
+		if (a <= b) {
+			nodes[i].start = a;
+			nodes[i].last = b;
+		} else {
+			nodes[i].start = b;
+			nodes[i].last = a;
+		}
+	}
+	for (i = 0; i < SEARCHES; i++)
+		queries[i] = prandom_u32_state(&rnd);
+}
+
+static int interval_tree_test_init(void)
+{
+	int i, j;
+	unsigned long results;
+	cycles_t time1, time2, time;
+
+	printk(KERN_ALERT "interval tree insert/remove");
+
+	prandom_seed_state(&rnd, 3141592653589793238ULL);
+	init();
+
+	time1 = get_cycles();
+
+	for (i = 0; i < PERF_LOOPS; i++) {
+		for (j = 0; j < NODES; j++)
+			interval_tree_insert(nodes + j, &root);
+		for (j = 0; j < NODES; j++)
+			interval_tree_remove(nodes + j, &root);
+	}
+
+	time2 = get_cycles();
+	time = time2 - time1;
+
+	time = div_u64(time, PERF_LOOPS);
+	printk(" -> %llu cycles\n", (unsigned long long)time);
+
+	printk(KERN_ALERT "interval tree search");
+
+	for (j = 0; j < NODES; j++)
+		interval_tree_insert(nodes + j, &root);
+
+	time1 = get_cycles();
+
+	results = 0;
+	for (i = 0; i < SEARCH_LOOPS; i++)
+		for (j = 0; j < SEARCHES; j++)
+			results += search(queries[j], &root);
+
+	time2 = get_cycles();
+	time = time2 - time1;
+
+	time = div_u64(time, SEARCH_LOOPS);
+	results = div_u64(results, SEARCH_LOOPS);
+	printk(" -> %llu cycles (%lu results)\n",
+	       (unsigned long long)time, results);
+
+	return -EAGAIN; /* Fail will directly unload the module */
+}
+
+static void interval_tree_test_exit(void)
+{
+	printk(KERN_ALERT "test exit\n");
+}
+
+module_init(interval_tree_test_init)
+module_exit(interval_tree_test_exit)
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Michel Lespinasse");
+MODULE_DESCRIPTION("Interval Tree test");
diff --git a/lib/interval_tree_test_main.c b/lib/interval_tree_test_main.c
deleted file mode 100644
index 245900b98c8e..000000000000
--- a/lib/interval_tree_test_main.c
+++ /dev/null
@@ -1,106 +0,0 @@
-#include <linux/module.h>
-#include <linux/interval_tree.h>
-#include <linux/random.h>
-#include <asm/timex.h>
-
-#define NODES        100
-#define PERF_LOOPS   100000
-#define SEARCHES     100
-#define SEARCH_LOOPS 10000
-
-static struct rb_root root = RB_ROOT;
-static struct interval_tree_node nodes[NODES];
-static u32 queries[SEARCHES];
-
-static struct rnd_state rnd;
-
-static inline unsigned long
-search(unsigned long query, struct rb_root *root)
-{
-	struct interval_tree_node *node;
-	unsigned long results = 0;
-
-	for (node = interval_tree_iter_first(root, query, query); node;
-	     node = interval_tree_iter_next(node, query, query))
-		results++;
-	return results;
-}
-
-static void init(void)
-{
-	int i;
-	for (i = 0; i < NODES; i++) {
-		u32 a = prandom_u32_state(&rnd);
-		u32 b = prandom_u32_state(&rnd);
-		if (a <= b) {
-			nodes[i].start = a;
-			nodes[i].last = b;
-		} else {
-			nodes[i].start = b;
-			nodes[i].last = a;
-		}
-	}
-	for (i = 0; i < SEARCHES; i++)
-		queries[i] = prandom_u32_state(&rnd);
-}
-
-static int interval_tree_test_init(void)
-{
-	int i, j;
-	unsigned long results;
-	cycles_t time1, time2, time;
-
-	printk(KERN_ALERT "interval tree insert/remove");
-
-	prandom_seed_state(&rnd, 3141592653589793238ULL);
-	init();
-
-	time1 = get_cycles();
-
-	for (i = 0; i < PERF_LOOPS; i++) {
-		for (j = 0; j < NODES; j++)
-			interval_tree_insert(nodes + j, &root);
-		for (j = 0; j < NODES; j++)
-			interval_tree_remove(nodes + j, &root);
-	}
-
-	time2 = get_cycles();
-	time = time2 - time1;
-
-	time = div_u64(time, PERF_LOOPS);
-	printk(" -> %llu cycles\n", (unsigned long long)time);
-
-	printk(KERN_ALERT "interval tree search");
-
-	for (j = 0; j < NODES; j++)
-		interval_tree_insert(nodes + j, &root);
-
-	time1 = get_cycles();
-
-	results = 0;
-	for (i = 0; i < SEARCH_LOOPS; i++)
-		for (j = 0; j < SEARCHES; j++)
-			results += search(queries[j], &root);
-
-	time2 = get_cycles();
-	time = time2 - time1;
-
-	time = div_u64(time, SEARCH_LOOPS);
-	results = div_u64(results, SEARCH_LOOPS);
-	printk(" -> %llu cycles (%lu results)\n",
-	       (unsigned long long)time, results);
-
-	return -EAGAIN; /* Fail will directly unload the module */
-}
-
-static void interval_tree_test_exit(void)
-{
-	printk(KERN_ALERT "test exit\n");
-}
-
-module_init(interval_tree_test_init)
-module_exit(interval_tree_test_exit)
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Michel Lespinasse");
-MODULE_DESCRIPTION("Interval Tree test");
-- 
1.8.5.3

^ permalink raw reply related	[flat|nested] 34+ messages in thread

* [PATCH 2/3] drm/i915: Do not call retire_requests from wait_for_rendering
  2014-01-28 10:34 New API for creating bo from user pages Chris Wilson
  2014-01-28 10:34 ` [PATCH 1/3] lib: Export interval_tree Chris Wilson
@ 2014-01-28 10:34 ` Chris Wilson
  2014-01-28 10:34 ` [PATCH 3/3] drm/i915: Introduce mapping of user pages into video memory (userptr) ioctl Chris Wilson
  2 siblings, 0 replies; 34+ messages in thread
From: Chris Wilson @ 2014-01-28 10:34 UTC (permalink / raw)
  To: intel-gfx

A common issue we have is that retiring requests causes recursion
through GTT manipulation or page table manipulation which we can only
handle at very specific points. However, to maintain internal
consistency (enforced through our sanity checks on write_domain at
various points in the GEM object lifecycle) we do need to retire the
object prior to marking it with a new write_domain, and also clear the
write_domain for the implicit flush following a batch.

Note that this then allows the unbound objects to still be on the active
lists, and so care must be taken when removing objects from unbound lists
(similar to the caveats we face processing the bound lists).

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_gem.c            | 100 ++++++++++++++++++-----------
 drivers/gpu/drm/i915/i915_gem_execbuffer.c |   3 +
 2 files changed, 64 insertions(+), 39 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 8ccb46daa427..de79dffe15f1 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -43,12 +43,15 @@ static void i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *o
 static __must_check int
 i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj,
 			       bool readonly);
+static void
+i915_gem_object_retire(struct drm_i915_gem_object *obj);
 static __must_check int
 i915_gem_object_bind_to_vm(struct drm_i915_gem_object *obj,
 			   struct i915_address_space *vm,
 			   unsigned alignment,
 			   bool map_and_fenceable,
 			   bool nonblocking);
+
 static int i915_gem_phys_pwrite(struct drm_device *dev,
 				struct drm_i915_gem_object *obj,
 				struct drm_i915_gem_pwrite *args,
@@ -535,6 +538,8 @@ i915_gem_shmem_pread(struct drm_device *dev,
 		ret = i915_gem_object_wait_rendering(obj, true);
 		if (ret)
 			return ret;
+
+		i915_gem_object_retire(obj);
 	}
 
 	ret = i915_gem_object_get_pages(obj);
@@ -849,6 +854,8 @@ i915_gem_shmem_pwrite(struct drm_device *dev,
 		ret = i915_gem_object_wait_rendering(obj, false);
 		if (ret)
 			return ret;
+
+		i915_gem_object_retire(obj);
 	}
 	/* Same trick applies to invalidate partially written cachelines read
 	 * before writing. */
@@ -1238,7 +1245,8 @@ static int
 i915_gem_object_wait_rendering__tail(struct drm_i915_gem_object *obj,
 				     struct intel_ring_buffer *ring)
 {
-	i915_gem_retire_requests_ring(ring);
+	if (!obj->active)
+		return 0;
 
 	/* Manually manage the write flush as we may have not yet
 	 * retired the buffer.
@@ -1248,7 +1256,6 @@ i915_gem_object_wait_rendering__tail(struct drm_i915_gem_object *obj,
 	 * we know we have passed the last write.
 	 */
 	obj->last_write_seqno = 0;
-	obj->base.write_domain &= ~I915_GEM_GPU_DOMAINS;
 
 	return 0;
 }
@@ -1856,58 +1863,58 @@ static unsigned long
 __i915_gem_shrink(struct drm_i915_private *dev_priv, long target,
 		  bool purgeable_only)
 {
-	struct list_head still_bound_list;
-	struct drm_i915_gem_object *obj, *next;
+	struct list_head still_in_list;
+	struct drm_i915_gem_object *obj;
 	unsigned long count = 0;
 
-	list_for_each_entry_safe(obj, next,
-				 &dev_priv->mm.unbound_list,
-				 global_list) {
-		if ((i915_gem_object_is_purgeable(obj) || !purgeable_only) &&
-		    i915_gem_object_put_pages(obj) == 0) {
-			count += obj->base.size >> PAGE_SHIFT;
-			if (count >= target)
-				return count;
-		}
-	}
-
 	/*
-	 * As we may completely rewrite the bound list whilst unbinding
+	 * As we may completely rewrite the (un)bound list whilst unbinding
 	 * (due to retiring requests) we have to strictly process only
 	 * one element of the list at the time, and recheck the list
 	 * on every iteration.
+	 *
+	 * In particular, we must hold a reference whilst removing the
+	 * object as we may end up waiting for and/or retiring the objects.
+	 * This might release the final reference (held by the active list)
+	 * and result in the object being freed from under us. This is
+	 * similar to the precautions the eviction code must take whilst
+	 * removing objects.
+	 *
+	 * Also note that although these lists do not hold a reference to
+	 * the object we can safely grab one here: The final object
+	 * unreferencing and the bound_list are both protected by the
+	 * dev->struct_mutex and so we won't ever be able to observe an
+	 * object on the bound_list with a reference count equals 0.
 	 */
-	INIT_LIST_HEAD(&still_bound_list);
+	INIT_LIST_HEAD(&still_in_list);
+	while (count < target && !list_empty(&dev_priv->mm.unbound_list)) {
+		obj = list_first_entry(&dev_priv->mm.unbound_list,
+				       typeof(*obj), global_list);
+		list_move_tail(&obj->global_list, &still_in_list);
+
+		if (!i915_gem_object_is_purgeable(obj) && purgeable_only)
+			continue;
+
+		drm_gem_object_reference(&obj->base);
+
+		if (i915_gem_object_put_pages(obj) == 0)
+			count += obj->base.size >> PAGE_SHIFT;
+
+		drm_gem_object_unreference(&obj->base);
+	}
+	list_splice(&still_in_list, &dev_priv->mm.unbound_list);
+
+	INIT_LIST_HEAD(&still_in_list);
 	while (count < target && !list_empty(&dev_priv->mm.bound_list)) {
 		struct i915_vma *vma, *v;
 
 		obj = list_first_entry(&dev_priv->mm.bound_list,
 				       typeof(*obj), global_list);
-		list_move_tail(&obj->global_list, &still_bound_list);
+		list_move_tail(&obj->global_list, &still_in_list);
 
 		if (!i915_gem_object_is_purgeable(obj) && purgeable_only)
 			continue;
 
-		/*
-		 * Hold a reference whilst we unbind this object, as we may
-		 * end up waiting for and retiring requests. This might
-		 * release the final reference (held by the active list)
-		 * and result in the object being freed from under us.
-		 * in this object being freed.
-		 *
-		 * Note 1: Shrinking the bound list is special since only active
-		 * (and hence bound objects) can contain such limbo objects, so
-		 * we don't need special tricks for shrinking the unbound list.
-		 * The only other place where we have to be careful with active
-		 * objects suddenly disappearing due to retiring requests is the
-		 * eviction code.
-		 *
-		 * Note 2: Even though the bound list doesn't hold a reference
-		 * to the object we can safely grab one here: The final object
-		 * unreferencing and the bound_list are both protected by the
-		 * dev->struct_mutex and so we won't ever be able to observe an
-		 * object on the bound_list with a reference count equals 0.
-		 */
 		drm_gem_object_reference(&obj->base);
 
 		list_for_each_entry_safe(vma, v, &obj->vma_list, vma_link)
@@ -1919,7 +1926,7 @@ __i915_gem_shrink(struct drm_i915_private *dev_priv, long target,
 
 		drm_gem_object_unreference(&obj->base);
 	}
-	list_splice(&still_bound_list, &dev_priv->mm.bound_list);
+	list_splice(&still_in_list, &dev_priv->mm.bound_list);
 
 	return count;
 }
@@ -2160,6 +2167,19 @@ i915_gem_object_move_to_inactive(struct drm_i915_gem_object *obj)
 	WARN_ON(i915_verify_lists(dev));
 }
 
+static void
+i915_gem_object_retire(struct drm_i915_gem_object *obj)
+{
+	struct intel_ring_buffer *ring = obj->ring;
+
+	if (ring == NULL)
+		return;
+
+	if (i915_seqno_passed(ring->get_seqno(ring, true),
+			      obj->last_read_seqno))
+		i915_gem_object_move_to_inactive(obj);
+}
+
 static int
 i915_gem_init_seqno(struct drm_device *dev, u32 seqno)
 {
@@ -3581,6 +3601,7 @@ i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write)
 	if (ret)
 		return ret;
 
+	i915_gem_object_retire(obj);
 	i915_gem_object_flush_cpu_write_domain(obj, false);
 
 	/* Serialise direct access to this object with the barriers for
@@ -3901,6 +3922,7 @@ i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write)
 	if (ret)
 		return ret;
 
+	i915_gem_object_retire(obj);
 	i915_gem_object_flush_gtt_write_domain(obj);
 
 	old_write_domain = obj->base.write_domain;
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 032def901f98..f67112001703 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -953,6 +953,9 @@ i915_gem_execbuffer_move_to_active(struct list_head *vmas,
 			if (i915_gem_obj_ggtt_bound(obj) &&
 			    i915_gem_obj_to_ggtt(obj)->pin_count)
 				intel_mark_fb_busy(obj, ring);
+
+			/* update for the implicit flush after a batch */
+			obj->base.write_domain &= ~I915_GEM_GPU_DOMAINS;
 		}
 
 		trace_i915_gem_object_change_domain(obj, old_read, old_write);
-- 
1.8.5.3

^ permalink raw reply related	[flat|nested] 34+ messages in thread

* [PATCH 3/3] drm/i915: Introduce mapping of user pages into video memory (userptr) ioctl
  2014-01-28 10:34 New API for creating bo from user pages Chris Wilson
  2014-01-28 10:34 ` [PATCH 1/3] lib: Export interval_tree Chris Wilson
  2014-01-28 10:34 ` [PATCH 2/3] drm/i915: Do not call retire_requests from wait_for_rendering Chris Wilson
@ 2014-01-28 10:34 ` Chris Wilson
  2014-01-28 13:16   ` [PATCH] " Chris Wilson
  2 siblings, 1 reply; 34+ messages in thread
From: Chris Wilson @ 2014-01-28 10:34 UTC (permalink / raw)
  To: intel-gfx; +Cc: Akash Goel

By exporting the ability to map user address and inserting PTEs
representing their backing pages into the GTT, we can exploit UMA in order
to utilize normal application data as a texture source or even as a
render target (depending upon the capabilities of the chipset). This has
a number of uses, with zero-copy downloads to the GPU and efficient
readback making the intermixed streaming of CPU and GPU operations
fairly efficient. This ability has many widespread implications from
faster rendering of client-side software rasterisers (chromium),
mitigation of stalls due to read back (firefox) and to faster pipelining
of texture data (such as pixel buffer objects in GL or data blobs in CL).

v2: Compile with CONFIG_MMU_NOTIFIER
v3: We can sleep while performing invalidate-range, which we can utilise
to drop our page references prior to the kernel manipulating the vma
(for either discard or cloning) and so protect normal users.
v4: Only run the invalidate notifier if the range intercepts the bo.
v5: Prevent userspace from attempting to GTT mmap non-page aligned buffers
v6: Recheck after reacquire mutex for lost mmu.
v7: Fix implicit padding of ioctl struct by rounding to next 64bit boundary.
v8: Fix rebasing error after forwarding porting the back port.
v9: Limit the userptr to page aligned entries. We now expect userspace
    to handle all the offset-in-page adjustments itself.
v10: Prevent vma from being copied across fork to avoid issues with cow.
v11: Drop vma behaviour changes -- locking is nigh on impossible.
     Use a worker to load user pages to avoid lock inversions.
v12: Use get_task_mm()/mmput() for correct refcounting of mm.
v13: Use a worker to release the mmu_notifier to avoid lock inversion
v14: Decouple mmu_notifier from struct_mutex using a custom mmu_notifer
     with its own locking and tree of objects for each mm/mmu_notifier.
v15: Prevent overlapping userptr objects, and invalidate all objects
     within the mmu_notifier range
v16: Fix a typo for iterating over multiple objects in the range and
     rearrange error path to destroy the mmu_notifier locklessly.
     Also close a race between invalidate_range and the get_pages_worker.
v17: Close a race between get_pages_worker/invalidate_range and fresh
     allocations of the same userptr range - and notice that
     struct_mutex was presumed to be held when during creation it wasn't.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com>
Cc: "Gong, Zhipeng" <zhipeng.gong@intel.com>
Cc: Akash Goel <akash.goel@intel.com>
Cc: "Volkin, Bradley D" <bradley.d.volkin@intel.com>
---
 drivers/gpu/drm/i915/Kconfig            |   1 +
 drivers/gpu/drm/i915/Makefile           |   1 +
 drivers/gpu/drm/i915/i915_dma.c         |   1 +
 drivers/gpu/drm/i915/i915_drv.h         |  26 +-
 drivers/gpu/drm/i915/i915_gem.c         |   4 +
 drivers/gpu/drm/i915/i915_gem_userptr.c | 673 ++++++++++++++++++++++++++++++++
 drivers/gpu/drm/i915/i915_gpu_error.c   |   2 +
 include/uapi/drm/i915_drm.h             |  16 +
 8 files changed, 723 insertions(+), 1 deletion(-)
 create mode 100644 drivers/gpu/drm/i915/i915_gem_userptr.c

diff --git a/drivers/gpu/drm/i915/Kconfig b/drivers/gpu/drm/i915/Kconfig
index 73ed59eff139..9940baee10c2 100644
--- a/drivers/gpu/drm/i915/Kconfig
+++ b/drivers/gpu/drm/i915/Kconfig
@@ -5,6 +5,7 @@ config DRM_I915
 	depends on (AGP || AGP=n)
 	select INTEL_GTT
 	select AGP_INTEL if AGP
+	select INTERVAL_TREE
 	# we need shmfs for the swappable backing store, and in particular
 	# the shmem_readpage() which depends upon tmpfs
 	select SHMEM
diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
index 06d27423af78..3c3c69313518 100644
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -15,6 +15,7 @@ i915-y := i915_drv.o i915_dma.o i915_irq.o \
 	  i915_gem_gtt.o \
 	  i915_gem_stolen.o \
 	  i915_gem_tiling.o \
+	  i915_gem_userptr.o \
 	  i915_params.o \
 	  i915_sysfs.o \
 	  i915_trace_points.o \
diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
index 0eef7f067a58..1ff4b065491e 100644
--- a/drivers/gpu/drm/i915/i915_dma.c
+++ b/drivers/gpu/drm/i915/i915_dma.c
@@ -1918,6 +1918,7 @@ const struct drm_ioctl_desc i915_ioctls[] = {
 	DRM_IOCTL_DEF_DRV(I915_REG_READ, i915_reg_read_ioctl, DRM_UNLOCKED|DRM_RENDER_ALLOW),
 	DRM_IOCTL_DEF_DRV(I915_GET_RESET_STATS, i915_get_reset_stats_ioctl, DRM_UNLOCKED|DRM_RENDER_ALLOW),
 	DRM_IOCTL_DEF_DRV(I915_GEM_CREATE2, i915_gem_create2_ioctl, DRM_UNLOCKED|DRM_RENDER_ALLOW),
+	DRM_IOCTL_DEF_DRV(I915_GEM_USERPTR, i915_gem_userptr_ioctl, DRM_UNLOCKED|DRM_RENDER_ALLOW),
 };
 
 int i915_max_ioctl = DRM_ARRAY_SIZE(i915_ioctls);
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index c05c5acf2718..44cf7be40751 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -40,6 +40,7 @@
 #include <linux/i2c-algo-bit.h>
 #include <drm/intel-gtt.h>
 #include <linux/backlight.h>
+#include <linux/hashtable.h>
 #include <linux/intel-iommu.h>
 #include <linux/kref.h>
 #include <linux/pm_qos.h>
@@ -163,6 +164,7 @@ enum hpd_pin {
 		if ((intel_encoder)->base.crtc == (__crtc))
 
 struct drm_i915_private;
+struct i915_mmu_notifier;
 
 enum intel_dpll_id {
 	DPLL_ID_PRIVATE = -1, /* non-shared dpll in use */
@@ -356,6 +358,7 @@ struct drm_i915_error_state {
 		u32 tiling:2;
 		u32 dirty:1;
 		u32 purgeable:1;
+		u32 userptr:1;
 		s32 ring:4;
 		u32 cache_level:3;
 	} **active_bo, **pinned_bo;
@@ -1461,6 +1464,9 @@ typedef struct drm_i915_private {
 	struct i915_gtt gtt; /* VMA representing the global address space */
 
 	struct i915_gem_mm mm;
+#if defined(CONFIG_MMU_NOTIFIER)
+	DECLARE_HASHTABLE(mmu_notifiers, 7);
+#endif
 
 	/* Kernel Modesetting */
 
@@ -1592,6 +1598,7 @@ struct drm_i915_gem_object_ops {
 	 */
 	int (*get_pages)(struct drm_i915_gem_object *);
 	void (*put_pages)(struct drm_i915_gem_object *);
+	void (*release)(struct drm_i915_gem_object *);
 };
 
 struct drm_i915_gem_object {
@@ -1705,9 +1712,23 @@ struct drm_i915_gem_object {
 
 	/** for phy allocated objects */
 	struct drm_i915_gem_phys_object *phys_obj;
+
+	union {
+		struct i915_gem_userptr {
+			uintptr_t ptr;
+			unsigned read_only :1;
+			unsigned active :4;
+#define I915_GEM_USERPTR_MAX_ACTIVE 15
+
+			struct mm_struct *mm;
+#if defined(CONFIG_MMU_NOTIFIER)
+			struct i915_mmu_object *mn;
+#endif
+			struct work_struct *work;
+		} userptr;
+	};
 };
 #define to_gem_object(obj) (&((struct drm_i915_gem_object *)(obj))->base)
-
 #define to_intel_bo(x) container_of(x, struct drm_i915_gem_object, base)
 
 /**
@@ -2017,6 +2038,9 @@ int i915_gem_set_tiling_ioctl(struct drm_device *dev, void *data,
 			      struct drm_file *file_priv);
 int i915_gem_get_tiling_ioctl(struct drm_device *dev, void *data,
 			      struct drm_file *file_priv);
+int i915_gem_init_userptr(struct drm_device *dev);
+int i915_gem_userptr_ioctl(struct drm_device *dev, void *data,
+			   struct drm_file *file);
 int i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data,
 				struct drm_file *file_priv);
 int i915_gem_wait_ioctl(struct drm_device *dev, void *data,
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index de79dffe15f1..8720d4d98ef1 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -4380,6 +4380,9 @@ void i915_gem_free_object(struct drm_gem_object *gem_obj)
 	if (obj->base.import_attach)
 		drm_prime_gem_destroy(&obj->base, NULL);
 
+	if (obj->ops->release)
+		obj->ops->release(obj);
+
 	drm_gem_object_release(&obj->base);
 	i915_gem_info_remove_obj(dev_priv, obj->base.size);
 
@@ -4644,6 +4647,7 @@ int i915_gem_init(struct drm_device *dev)
 			DRM_DEBUG_DRIVER("allow wake ack timed out\n");
 	}
 
+	i915_gem_init_userptr(dev);
 	i915_gem_init_global_gtt(dev);
 
 	ret = i915_gem_context_init(dev);
diff --git a/drivers/gpu/drm/i915/i915_gem_userptr.c b/drivers/gpu/drm/i915/i915_gem_userptr.c
new file mode 100644
index 000000000000..ae66f40271cd
--- /dev/null
+++ b/drivers/gpu/drm/i915/i915_gem_userptr.c
@@ -0,0 +1,673 @@
+/*
+ * Copyright © 2012-2014 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ */
+
+#include "drmP.h"
+#include "i915_drm.h"
+#include "i915_drv.h"
+#include "i915_trace.h"
+#include "intel_drv.h"
+#include <linux/mmu_context.h>
+#include <linux/mmu_notifier.h>
+#include <linux/mempolicy.h>
+#include <linux/swap.h>
+
+#if defined(CONFIG_MMU_NOTIFIER)
+#include <linux/interval_tree.h>
+
+struct i915_mmu_notifier {
+	spinlock_t lock;
+	struct hlist_node node;
+	struct mmu_notifier mn;
+	struct rb_root objects;
+	struct drm_device *dev;
+	struct mm_struct *mm;
+	struct work_struct work;
+	unsigned long count;
+	unsigned long serial;
+};
+
+struct i915_mmu_object {
+	struct i915_mmu_notifier *mmu;
+	struct interval_tree_node it;
+	struct drm_i915_gem_object *obj;
+};
+
+static void i915_gem_userptr_mn_invalidate_range_start(struct mmu_notifier *_mn,
+						       struct mm_struct *mm,
+						       unsigned long start,
+						       unsigned long end)
+{
+	struct i915_mmu_notifier *mn = container_of(_mn, struct i915_mmu_notifier, mn);
+	struct interval_tree_node *it = NULL;
+	unsigned long serial = 0;
+
+	end--; /* interval ranges are inclusive, but invalidate range is exclusive */
+	while (start < end) {
+		struct drm_i915_gem_object *obj;
+
+		obj = NULL;
+		spin_lock(&mn->lock);
+		if (serial == mn->serial)
+			it = interval_tree_iter_next(it, start, end);
+		else
+			it = interval_tree_iter_first(&mn->objects, start, end);
+		if (it != NULL) {
+			obj = container_of(it, struct i915_mmu_object, it)->obj;
+			drm_gem_object_reference(&obj->base);
+			serial = mn->serial;
+		}
+		spin_unlock(&mn->lock);
+		if (obj == NULL)
+			return;
+
+		mutex_lock(&mn->dev->struct_mutex);
+		/* Cancel any active worker and force us to re-evaluate gup */
+		obj->userptr.work = NULL;
+
+		if (obj->pages != NULL) {
+			struct drm_i915_private *dev_priv = to_i915(mn->dev);
+			struct i915_vma *vma, *tmp;
+			bool was_interruptible;
+
+			was_interruptible = dev_priv->mm.interruptible;
+			dev_priv->mm.interruptible = false;
+
+			list_for_each_entry_safe(vma, tmp, &obj->vma_list, vma_link) {
+				int ret = i915_vma_unbind(vma);
+				WARN_ON(ret && ret != -EIO);
+			}
+			WARN_ON(i915_gem_object_put_pages(obj));
+
+			dev_priv->mm.interruptible = was_interruptible;
+		}
+
+		start = obj->userptr.ptr + obj->base.size;
+
+		drm_gem_object_unreference(&obj->base);
+		mutex_unlock(&mn->dev->struct_mutex);
+	}
+}
+
+static const struct mmu_notifier_ops i915_gem_userptr_notifier = {
+	.invalidate_range_start = i915_gem_userptr_mn_invalidate_range_start,
+};
+
+static struct i915_mmu_notifier *
+__i915_mmu_notifier_lookup(struct drm_device *dev, struct mm_struct *mm)
+{
+	struct drm_i915_private *dev_priv = to_i915(dev);
+	struct i915_mmu_notifier *mmu;
+
+	/* Protected by dev->struct_mutex */
+	hash_for_each_possible(dev_priv->mmu_notifiers, mmu, node, (unsigned long)mm)
+		if (mmu->mm == mm)
+			return mmu;
+
+	return NULL;
+}
+
+static struct i915_mmu_notifier *
+i915_mmu_notifier_get(struct drm_device *dev, struct mm_struct *mm)
+{
+	struct drm_i915_private *dev_priv = to_i915(dev);
+	struct i915_mmu_notifier *mmu;
+	int ret;
+
+	lockdep_assert_held(&dev->struct_mutex);
+
+	mmu = __i915_mmu_notifier_lookup(dev, mm);
+	if (mmu)
+		return mmu;
+
+	mmu = kmalloc(sizeof(*mmu), GFP_KERNEL);
+	if (mmu == NULL)
+		return ERR_PTR(-ENOMEM);
+
+	spin_lock_init(&mmu->lock);
+	mmu->dev = dev;
+	mmu->mn.ops = &i915_gem_userptr_notifier;
+	mmu->mm = mm;
+	mmu->objects = RB_ROOT;
+	mmu->count = 0;
+	mmu->serial = 0;
+
+	/* Protected by mmap_sem (write-lock) */
+	ret = __mmu_notifier_register(&mmu->mn, mm);
+	if (ret) {
+		kfree(mmu);
+		return ERR_PTR(ret);
+	}
+
+	/* Protected by dev->struct_mutex */
+	hash_add(dev_priv->mmu_notifiers, &mmu->node, (unsigned long)mm);
+	return mmu;
+}
+
+static void
+__i915_mmu_notifier_destroy_worker(struct work_struct *work)
+{
+	struct i915_mmu_notifier *mmu = container_of(work, typeof(*mmu), work);
+	mmu_notifier_unregister(&mmu->mn, mmu->mm);
+	kfree(mmu);
+}
+
+static void
+__i915_mmu_notifier_destroy(struct i915_mmu_notifier *mmu)
+{
+	lockdep_assert_held(&mmu->dev->struct_mutex);
+
+	/* Protected by dev->struct_mutex */
+	hash_del(&mmu->node);
+
+	/* Our lock ordering is: mmap_sem, mmu_notifier_scru, struct_mutex.
+	 * We enter the function holding struct_mutex, therefore we need
+	 * to drop our mutex prior to calling mmu_notifier_unregister in
+	 * order to prevent lock inversion (and system-wide deadlock)
+	 * between the mmap_sem and struct-mutex.
+	 */
+	INIT_WORK(&mmu->work, __i915_mmu_notifier_destroy_worker);
+	schedule_work(&mmu->work);
+}
+
+static void __i915_mmu_notifier_update_serial(struct i915_mmu_notifier *mmu)
+{
+	if (++mmu->serial == 0)
+		mmu->serial = 1;
+}
+
+static void
+i915_mmu_notifier_del(struct i915_mmu_notifier *mmu,
+		      struct i915_mmu_object *mn)
+{
+	lockdep_assert_held(&mmu->dev->struct_mutex);
+
+	spin_lock(&mmu->lock);
+	interval_tree_remove(&mn->it, &mmu->objects);
+	__i915_mmu_notifier_update_serial(mmu);
+	spin_unlock(&mmu->lock);
+
+	/* Protected against _add() by dev->struct_mutex */
+	if (--mmu->count == 0)
+		__i915_mmu_notifier_destroy(mmu);
+}
+
+static int
+i915_mmu_notifier_add(struct i915_mmu_notifier *mmu,
+		      struct i915_mmu_object *mn)
+{
+	struct interval_tree_node *it;
+	int ret;
+
+	/* Make sure we drop the final active reference (and thereby
+	 * remove the objects from the interval tree) before we do
+	 * the check for overlapping objects.
+	 */
+	ret = i915_mutex_lock_interruptible(mmu->dev);
+	if (ret)
+		return ret;
+
+	i915_gem_retire_requests(mmu->dev);
+
+	/* Disallow overlapping userptr objects */
+	spin_lock(&mmu->lock);
+	it = interval_tree_iter_first(&mmu->objects,
+				      mn->it.start, mn->it.last);
+	if (it) {
+		struct drm_i915_gem_object *obj;
+
+		/* We only need to check the first object as it either
+		 * is idle (and in use elsewhere) or we try again in order
+		 * to give time for the gup-worker to run and flush its
+		 * object references. Afterwards if we find another
+		 * object that is idle (and so referenced elsewhere)
+		 * we know that the overlap with an pinned object is
+		 * genuine.
+		 */
+		obj = container_of(it, struct i915_mmu_object, it)->obj;
+		ret = obj->userptr.active ? -EAGAIN : -EINVAL;
+	} else {
+		interval_tree_insert(&mn->it, &mmu->objects);
+		__i915_mmu_notifier_update_serial(mmu);
+		ret = 0;
+	}
+	spin_unlock(&mmu->lock);
+	mutex_unlock(&mmu->dev->struct_mutex);
+
+	return ret;
+}
+
+static void
+i915_gem_userptr_release__mmu_notifier(struct drm_i915_gem_object *obj)
+{
+	struct i915_mmu_object *mn;
+
+	mn = obj->userptr.mn;
+	if (mn == NULL)
+		return;
+
+	i915_mmu_notifier_del(mn->mmu, mn);
+	obj->userptr.mn = NULL;
+}
+
+static int
+i915_gem_userptr_init__mmu_notifier(struct drm_i915_gem_object *obj,
+				    unsigned flags)
+{
+	struct i915_mmu_notifier *mmu;
+	struct i915_mmu_object *mn;
+	int ret;
+
+	if (flags & I915_USERPTR_UNSYNCHRONIZED)
+		return capable(CAP_SYS_ADMIN) ? 0 : -EPERM;
+
+	down_write(&obj->userptr.mm->mmap_sem);
+	ret = i915_mutex_lock_interruptible(obj->base.dev);
+	if (ret == 0) {
+		mmu = i915_mmu_notifier_get(obj->base.dev, obj->userptr.mm);
+		if (!IS_ERR(mmu))
+			mmu->count++; /* preemptive add to act as a refcount */
+		else
+			ret = PTR_ERR(mmu);
+		mutex_unlock(&obj->base.dev->struct_mutex);
+	}
+	up_write(&obj->userptr.mm->mmap_sem);
+	if (ret)
+		return ret;
+
+	mn = kzalloc(sizeof(*mn), GFP_KERNEL);
+	if (mn == NULL) {
+		ret = -ENOMEM;
+		goto destroy_mmu;
+	}
+
+	mn->mmu = mmu;
+	mn->it.start = obj->userptr.ptr;
+	mn->it.last = mn->it.start + obj->base.size - 1;
+	mn->obj = obj;
+
+	ret = i915_mmu_notifier_add(mmu, mn);
+	if (ret)
+		goto free_mn;
+
+	obj->userptr.mn = mn;
+	return 0;
+
+free_mn:
+	kfree(mn);
+destroy_mmu:
+	mutex_lock(&obj->base.dev->struct_mutex);
+	if (--mmu->count == 0)
+		__i915_mmu_notifier_destroy(mmu);
+	mutex_unlock(&obj->base.dev->struct_mutex);
+	return ret;
+}
+
+#else
+
+static void
+i915_gem_userptr_release__mmu_notifier(struct drm_i915_gem_object *obj)
+{
+}
+
+static int
+i915_gem_userptr_init__mmu_notifier(struct drm_i915_gem_object *obj,
+				    unsigned flags)
+{
+	if ((flags & I915_USERPTR_UNSYNCHRONIZED) == 0)
+		return -ENODEV;
+
+	if (!capable(CAP_SYS_ADMIN))
+		return -EPERM;
+
+	return 0;
+}
+#endif
+
+struct get_pages_work {
+	struct work_struct work;
+	struct drm_i915_gem_object *obj;
+	struct task_struct *task;
+};
+
+static int
+st_set_pages(struct sg_table **st, struct page **pvec, int num_pages)
+{
+	struct scatterlist *sg;
+	int n;
+
+	*st = kmalloc(sizeof(*st), GFP_KERNEL);
+	if (*st == NULL || sg_alloc_table(*st, num_pages, GFP_KERNEL)) {
+		kfree(*st);
+		return -ENOMEM;
+	}
+
+	for_each_sg((*st)->sgl, sg, num_pages, n)
+		sg_set_page(sg, pvec[n], PAGE_SIZE, 0);
+
+	return 0;
+}
+
+static void
+__i915_gem_userptr_get_pages_worker(struct work_struct *_work)
+{
+	struct get_pages_work *work = container_of(_work, typeof(*work), work);
+	struct drm_i915_gem_object *obj = work->obj;
+	struct drm_device *dev = obj->base.dev;
+	const int num_pages = obj->base.size >> PAGE_SHIFT;
+	struct page **pvec;
+	int pinned, ret;
+
+	ret = -ENOMEM;
+	pinned = 0;
+
+	pvec = kmalloc(num_pages*sizeof(struct page *),
+		       GFP_TEMPORARY | __GFP_NOWARN | __GFP_NORETRY);
+	if (pvec == NULL)
+		pvec = drm_malloc_ab(num_pages, sizeof(struct page *));
+	if (pvec != NULL) {
+		struct mm_struct *mm = obj->userptr.mm;
+
+		use_mm(mm);
+		down_read(&mm->mmap_sem);
+		while (pinned < num_pages) {
+			ret = get_user_pages(work->task, mm,
+					     obj->userptr.ptr + pinned * PAGE_SIZE,
+					     num_pages - pinned,
+					     !obj->userptr.read_only, 0,
+					     pvec + pinned, NULL);
+			if (ret < 0)
+				break;
+
+			pinned += ret;
+		}
+		up_read(&mm->mmap_sem);
+		unuse_mm(mm);
+	}
+
+	mutex_lock(&dev->struct_mutex);
+	if (obj->userptr.work != &work->work) {
+		ret = 0;
+	} else if (pinned == num_pages) {
+		ret = st_set_pages(&obj->pages, pvec, num_pages);
+		if (ret == 0) {
+			list_add_tail(&obj->global_list, &to_i915(dev)->mm.unbound_list);
+			pinned = 0;
+		}
+	}
+
+	obj->userptr.work = ERR_PTR(ret);
+	obj->userptr.active--;
+	drm_gem_object_unreference(&obj->base);
+	mutex_unlock(&dev->struct_mutex);
+
+	release_pages(pvec, pinned, 0);
+	drm_free_large(pvec);
+
+	put_task_struct(work->task);
+	kfree(work);
+}
+
+static int
+i915_gem_userptr_get_pages(struct drm_i915_gem_object *obj)
+{
+	const int num_pages = obj->base.size >> PAGE_SHIFT;
+	struct page **pvec;
+	int pinned, ret;
+
+	/* If userspace should engineer that these pages are replaced in
+	 * the vma between us binding this page into the GTT and completion
+	 * of rendering... Their loss. If they change the mapping of their
+	 * pages they need to create a new bo to point to the new vma.
+	 *
+	 * However, that still leaves open the possibility of the vma
+	 * being copied upon fork. Which falls under the same userspace
+	 * synchronisation issue as a regular bo, except that this time
+	 * the process may not be expecting that a particular piece of
+	 * memory is tied to the GPU.
+	 *
+	 * Fortunately, we can hook into the mmu_notifier in order to
+	 * discard the page references prior to anything nasty happening
+	 * to the vma (discard or cloning) which should prevent the more
+	 * egregious cases from causing harm.
+	 */
+
+	pvec = NULL;
+	pinned = 0;
+	if (obj->userptr.mm == current->mm) {
+		pvec = kmalloc(num_pages*sizeof(struct page *),
+			       GFP_TEMPORARY | __GFP_NOWARN | __GFP_NORETRY);
+		if (pvec == NULL) {
+			pvec = drm_malloc_ab(num_pages, sizeof(struct page *));
+			if (pvec == NULL)
+				return -ENOMEM;
+		}
+
+		pinned = __get_user_pages_fast(obj->userptr.ptr, num_pages,
+					       !obj->userptr.read_only, pvec);
+	}
+	if (pinned < num_pages) {
+		if (pinned < 0) {
+			ret = pinned;
+			pinned = 0;
+		} else {
+			/* Spawn a worker so that we can acquire the
+			 * user pages without holding our mutex. Access
+			 * to the user pages requires mmap_sem, and we have
+			 * a strict lock ordering of mmap_sem, struct_mutex -
+			 * we already hold struct_mutex here and so cannot
+			 * call gup without encountering a lock inversion.
+			 *
+			 * Userspace will keep on repeating the operation
+			 * (thanks to EAGAIN) until either we hit the fast
+			 * path or the worker completes. If the worker is
+			 * cancelled or superseded, the task is still run
+			 * but the results ignored. (This leads to
+			 * complications that we may have a stray object
+			 * refcount that we need to be wary of when
+			 * checking for existing objects during creation.)
+			 * If the worker encounters an error, it reports
+			 * that error back to this function through
+			 * obj->userptr.work = ERR_PTR.
+			 */
+			ret = -EAGAIN;
+			if (obj->userptr.work == NULL &&
+			    obj->userptr.active < I915_GEM_USERPTR_MAX_ACTIVE) {
+				struct get_pages_work *work;
+
+				work = kmalloc(sizeof(*work), GFP_KERNEL);
+				if (work != NULL) {
+					obj->userptr.work = &work->work;
+					obj->userptr.active++;
+
+					work->obj = obj;
+					drm_gem_object_reference(&obj->base);
+
+					work->task = current;
+					get_task_struct(work->task);
+
+					INIT_WORK(&work->work, __i915_gem_userptr_get_pages_worker);
+					schedule_work(&work->work);
+				} else
+					ret = -ENOMEM;
+			} else {
+				if (IS_ERR(obj->userptr.work)) {
+					ret = PTR_ERR(obj->userptr.work);
+					obj->userptr.work = NULL;
+				}
+			}
+		}
+	} else {
+		ret = st_set_pages(&obj->pages, pvec, num_pages);
+		if (ret == 0) {
+			obj->userptr.work = NULL;
+			pinned = 0;
+		}
+	}
+
+	release_pages(pvec, pinned, 0);
+	drm_free_large(pvec);
+	return ret;
+}
+
+static void
+i915_gem_userptr_put_pages(struct drm_i915_gem_object *obj)
+{
+	struct scatterlist *sg;
+	int i;
+
+	BUG_ON(obj->userptr.work != NULL);
+
+	if (obj->madv != I915_MADV_WILLNEED)
+		obj->dirty = 0;
+
+	for_each_sg(obj->pages->sgl, sg, obj->pages->nents, i) {
+		struct page *page = sg_page(sg);
+
+		if (obj->dirty)
+			set_page_dirty(page);
+
+		mark_page_accessed(page);
+		page_cache_release(page);
+	}
+	obj->dirty = 0;
+
+	sg_free_table(obj->pages);
+	kfree(obj->pages);
+}
+
+static void
+i915_gem_userptr_release(struct drm_i915_gem_object *obj)
+{
+	i915_gem_userptr_release__mmu_notifier(obj);
+
+	if (obj->userptr.mm) {
+		mmput(obj->userptr.mm);
+		obj->userptr.mm = NULL;
+	}
+}
+
+static const struct drm_i915_gem_object_ops i915_gem_userptr_ops = {
+	.get_pages = i915_gem_userptr_get_pages,
+	.put_pages = i915_gem_userptr_put_pages,
+	.release = i915_gem_userptr_release,
+};
+
+/**
+ * Creates a new mm object that wraps some normal memory from the process
+ * context - user memory.
+ *
+ * We impose several restrictions upon the memory being mapped
+ * into the GPU.
+ * 1. It must be page aligned (both start/end addresses, i.e ptr and size).
+ * 2. It cannot overlap any other userptr object in the same address space.
+ * 3. It must be normal system memory, not a pointer into another map of IO
+ *    space (e.g. it must not be a GTT mmapping of another object).
+ * 4. We only allow a bo as large as we could in theory map into the GTT,
+ *    that is we limit the size to the total size of the GTT.
+ * 5. The bo is marked as being snoopable. The backing pages are left
+ *    accessible directly by the CPU, but reads and writes by the GPU may
+ *    incur the cost of a snoop (unless you have an LLC architecture).
+ *
+ * Synchronisation between multiple users and the GPU is left to userspace
+ * through the normal set-domain-ioctl. The kernel will enforce that the
+ * GPU relinquishes the VMA before it is returned back to the system
+ * i.e. upon free(), munmap() or process termination. However, the userspace
+ * malloc() library may not immediately relinquish the VMA after free() and
+ * instead reuse it whilst the GPU is still reading and writing to the VMA.
+ * Caveat emptor.
+ *
+ * Also note, that the object created here is not currently a "first class"
+ * object, in that several ioctls are banned. These are the CPU access
+ * ioctls: mmap(), pwrite and pread. In practice, you are expected to use
+ * direct access via your pointer rather than use those ioctls.
+ *
+ * If you think this is a good interface to use to pass GPU memory between
+ * drivers, please use dma-buf instead. In fact, wherever possible use
+ * dma-buf instead.
+ */
+int
+i915_gem_userptr_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
+{
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	struct drm_i915_gem_userptr *args = data;
+	struct drm_i915_gem_object *obj;
+	int ret;
+	u32 handle;
+
+	if (args->flags & ~(I915_USERPTR_READ_ONLY |
+			    I915_USERPTR_UNSYNCHRONIZED))
+		return -EINVAL;
+
+	if (offset_in_page(args->user_ptr | args->user_size))
+		return -EINVAL;
+
+	if (args->user_size > dev_priv->gtt.base.total)
+		return -E2BIG;
+
+	if (!access_ok(args->flags & I915_USERPTR_READ_ONLY ? VERIFY_READ : VERIFY_WRITE,
+		       (char __user *)(unsigned long)args->user_ptr, args->user_size))
+		return -EFAULT;
+
+	/* Allocate the new object */
+	obj = i915_gem_object_alloc(dev);
+	if (obj == NULL)
+		return -ENOMEM;
+
+	drm_gem_private_object_init(dev, &obj->base, args->user_size);
+	i915_gem_object_init(obj, &i915_gem_userptr_ops);
+	obj->cache_level = I915_CACHE_LLC;
+	obj->base.write_domain = I915_GEM_DOMAIN_CPU;
+	obj->base.read_domains = I915_GEM_DOMAIN_CPU;
+
+	obj->userptr.ptr = args->user_ptr;
+	obj->userptr.read_only = !!(args->flags & I915_USERPTR_READ_ONLY);
+
+	/* And keep a pointer to the current->mm for resolving the user pages
+	 * at binding. This means that we need to hook into the mmu_notifier
+	 * in order to detect if the mmu is destroyed.
+	 */
+	ret = -ENOMEM;
+	if ((obj->userptr.mm = get_task_mm(current)))
+		ret = i915_gem_userptr_init__mmu_notifier(obj, args->flags);
+	if (ret == 0)
+		ret = drm_gem_handle_create(file, &obj->base, &handle);
+
+	/* drop reference from allocate - handle holds it now */
+	drm_gem_object_unreference_unlocked(&obj->base);
+	if (ret)
+		return ret;
+
+	args->handle = handle;
+	return 0;
+}
+
+int
+i915_gem_init_userptr(struct drm_device *dev)
+{
+#if defined(CONFIG_MMU_NOTIFIER)
+	struct drm_i915_private *dev_priv = to_i915(dev);
+	hash_init(dev_priv->mmu_notifiers);
+#endif
+	return 0;
+}
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
index 4cc916213362..4fc16a12b9dc 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -201,6 +201,7 @@ static void print_error_buffers(struct drm_i915_error_state_buf *m,
 		err_puts(m, tiling_flag(err->tiling));
 		err_puts(m, dirty_flag(err->dirty));
 		err_puts(m, purgeable_flag(err->purgeable));
+		err_puts(m, err->userptr ? " userptr" : "");
 		err_puts(m, err->ring != -1 ? " " : "");
 		err_puts(m, ring_str(err->ring));
 		err_puts(m, i915_cache_level_str(err->cache_level));
@@ -603,6 +604,7 @@ static void capture_bo(struct drm_i915_error_buffer *err,
 	err->tiling = obj->tiling_mode;
 	err->dirty = obj->dirty;
 	err->purgeable = obj->madv != I915_MADV_WILLNEED;
+	err->userptr = obj->userptr.mm != 0;
 	err->ring = obj->ring ? obj->ring->id : -1;
 	err->cache_level = obj->cache_level;
 }
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index 37c8073a8246..6c145a0be250 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -224,6 +224,7 @@ typedef struct _drm_i915_sarea {
 #define DRM_I915_REG_READ		0x31
 #define DRM_I915_GET_RESET_STATS	0x32
 #define DRM_I915_GEM_CREATE2		0x33
+#define DRM_I915_GEM_USERPTR		0x34
 
 #define DRM_IOCTL_I915_INIT		DRM_IOW( DRM_COMMAND_BASE + DRM_I915_INIT, drm_i915_init_t)
 #define DRM_IOCTL_I915_FLUSH		DRM_IO ( DRM_COMMAND_BASE + DRM_I915_FLUSH)
@@ -275,6 +276,7 @@ typedef struct _drm_i915_sarea {
 #define DRM_IOCTL_I915_GEM_CONTEXT_DESTROY	DRM_IOW (DRM_COMMAND_BASE + DRM_I915_GEM_CONTEXT_DESTROY, struct drm_i915_gem_context_destroy)
 #define DRM_IOCTL_I915_REG_READ			DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_REG_READ, struct drm_i915_reg_read)
 #define DRM_IOCTL_I915_GET_RESET_STATS		DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_GET_RESET_STATS, struct drm_i915_reset_stats)
+#define DRM_IOCTL_I915_GEM_USERPTR			DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_GEM_USERPTR, struct drm_i915_gem_userptr)
 
 /* Allow drivers to submit batchbuffers directly to hardware, relying
  * on the security mechanisms provided by hardware.
@@ -1129,4 +1131,18 @@ struct drm_i915_reset_stats {
 	__u32 pad;
 };
 
+struct drm_i915_gem_userptr {
+	__u64 user_ptr;
+	__u64 user_size;
+	__u32 flags;
+#define I915_USERPTR_READ_ONLY 0x1
+#define I915_USERPTR_UNSYNCHRONIZED 0x80000000
+	/**
+	 * Returned handle for the object.
+	 *
+	 * Object handles are nonzero.
+	 */
+	__u32 handle;
+};
+
 #endif /* _UAPI_I915_DRM_H_ */
-- 
1.8.5.3

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 34+ messages in thread

* [PATCH] drm/i915: Introduce mapping of user pages into video memory (userptr) ioctl
  2014-01-28 10:34 ` [PATCH 3/3] drm/i915: Introduce mapping of user pages into video memory (userptr) ioctl Chris Wilson
@ 2014-01-28 13:16   ` Chris Wilson
  2014-01-29 20:25     ` Daniel Vetter
  2014-01-29 20:34     ` Daniel Vetter
  0 siblings, 2 replies; 34+ messages in thread
From: Chris Wilson @ 2014-01-28 13:16 UTC (permalink / raw)
  To: intel-gfx; +Cc: Akash Goel

By exporting the ability to map user address and inserting PTEs
representing their backing pages into the GTT, we can exploit UMA in order
to utilize normal application data as a texture source or even as a
render target (depending upon the capabilities of the chipset). This has
a number of uses, with zero-copy downloads to the GPU and efficient
readback making the intermixed streaming of CPU and GPU operations
fairly efficient. This ability has many widespread implications from
faster rendering of client-side software rasterisers (chromium),
mitigation of stalls due to read back (firefox) and to faster pipelining
of texture data (such as pixel buffer objects in GL or data blobs in CL).

v2: Compile with CONFIG_MMU_NOTIFIER
v3: We can sleep while performing invalidate-range, which we can utilise
to drop our page references prior to the kernel manipulating the vma
(for either discard or cloning) and so protect normal users.
v4: Only run the invalidate notifier if the range intercepts the bo.
v5: Prevent userspace from attempting to GTT mmap non-page aligned buffers
v6: Recheck after reacquire mutex for lost mmu.
v7: Fix implicit padding of ioctl struct by rounding to next 64bit boundary.
v8: Fix rebasing error after forwarding porting the back port.
v9: Limit the userptr to page aligned entries. We now expect userspace
    to handle all the offset-in-page adjustments itself.
v10: Prevent vma from being copied across fork to avoid issues with cow.
v11: Drop vma behaviour changes -- locking is nigh on impossible.
     Use a worker to load user pages to avoid lock inversions.
v12: Use get_task_mm()/mmput() for correct refcounting of mm.
v13: Use a worker to release the mmu_notifier to avoid lock inversion
v14: Decouple mmu_notifier from struct_mutex using a custom mmu_notifer
     with its own locking and tree of objects for each mm/mmu_notifier.
v15: Prevent overlapping userptr objects, and invalidate all objects
     within the mmu_notifier range
v16: Fix a typo for iterating over multiple objects in the range and
     rearrange error path to destroy the mmu_notifier locklessly.
     Also close a race between invalidate_range and the get_pages_worker.
v17: Close a race between get_pages_worker/invalidate_range and fresh
     allocations of the same userptr range - and notice that
     struct_mutex was presumed to be held when during creation it wasn't.
v18: Sigh. Fix the refactor of st_set_pages() to allocate enough memory
     for the struct sg_table and to clear it before reporting an error.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com>
Cc: "Gong, Zhipeng" <zhipeng.gong@intel.com>
Cc: Akash Goel <akash.goel@intel.com>
Cc: "Volkin, Bradley D" <bradley.d.volkin@intel.com>

userptr
---
 drivers/gpu/drm/i915/Kconfig            |   1 +
 drivers/gpu/drm/i915/Makefile           |   1 +
 drivers/gpu/drm/i915/i915_dma.c         |   1 +
 drivers/gpu/drm/i915/i915_drv.h         |  26 +-
 drivers/gpu/drm/i915/i915_gem.c         |   4 +
 drivers/gpu/drm/i915/i915_gem_userptr.c | 674 ++++++++++++++++++++++++++++++++
 drivers/gpu/drm/i915/i915_gpu_error.c   |   2 +
 include/uapi/drm/i915_drm.h             |  16 +
 8 files changed, 724 insertions(+), 1 deletion(-)
 create mode 100644 drivers/gpu/drm/i915/i915_gem_userptr.c

diff --git a/drivers/gpu/drm/i915/Kconfig b/drivers/gpu/drm/i915/Kconfig
index 73ed59eff139..9940baee10c2 100644
--- a/drivers/gpu/drm/i915/Kconfig
+++ b/drivers/gpu/drm/i915/Kconfig
@@ -5,6 +5,7 @@ config DRM_I915
 	depends on (AGP || AGP=n)
 	select INTEL_GTT
 	select AGP_INTEL if AGP
+	select INTERVAL_TREE
 	# we need shmfs for the swappable backing store, and in particular
 	# the shmem_readpage() which depends upon tmpfs
 	select SHMEM
diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
index 06d27423af78..3c3c69313518 100644
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -15,6 +15,7 @@ i915-y := i915_drv.o i915_dma.o i915_irq.o \
 	  i915_gem_gtt.o \
 	  i915_gem_stolen.o \
 	  i915_gem_tiling.o \
+	  i915_gem_userptr.o \
 	  i915_params.o \
 	  i915_sysfs.o \
 	  i915_trace_points.o \
diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
index 0eef7f067a58..1ff4b065491e 100644
--- a/drivers/gpu/drm/i915/i915_dma.c
+++ b/drivers/gpu/drm/i915/i915_dma.c
@@ -1918,6 +1918,7 @@ const struct drm_ioctl_desc i915_ioctls[] = {
 	DRM_IOCTL_DEF_DRV(I915_REG_READ, i915_reg_read_ioctl, DRM_UNLOCKED|DRM_RENDER_ALLOW),
 	DRM_IOCTL_DEF_DRV(I915_GET_RESET_STATS, i915_get_reset_stats_ioctl, DRM_UNLOCKED|DRM_RENDER_ALLOW),
 	DRM_IOCTL_DEF_DRV(I915_GEM_CREATE2, i915_gem_create2_ioctl, DRM_UNLOCKED|DRM_RENDER_ALLOW),
+	DRM_IOCTL_DEF_DRV(I915_GEM_USERPTR, i915_gem_userptr_ioctl, DRM_UNLOCKED|DRM_RENDER_ALLOW),
 };
 
 int i915_max_ioctl = DRM_ARRAY_SIZE(i915_ioctls);
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index c05c5acf2718..44cf7be40751 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -40,6 +40,7 @@
 #include <linux/i2c-algo-bit.h>
 #include <drm/intel-gtt.h>
 #include <linux/backlight.h>
+#include <linux/hashtable.h>
 #include <linux/intel-iommu.h>
 #include <linux/kref.h>
 #include <linux/pm_qos.h>
@@ -163,6 +164,7 @@ enum hpd_pin {
 		if ((intel_encoder)->base.crtc == (__crtc))
 
 struct drm_i915_private;
+struct i915_mmu_notifier;
 
 enum intel_dpll_id {
 	DPLL_ID_PRIVATE = -1, /* non-shared dpll in use */
@@ -356,6 +358,7 @@ struct drm_i915_error_state {
 		u32 tiling:2;
 		u32 dirty:1;
 		u32 purgeable:1;
+		u32 userptr:1;
 		s32 ring:4;
 		u32 cache_level:3;
 	} **active_bo, **pinned_bo;
@@ -1461,6 +1464,9 @@ typedef struct drm_i915_private {
 	struct i915_gtt gtt; /* VMA representing the global address space */
 
 	struct i915_gem_mm mm;
+#if defined(CONFIG_MMU_NOTIFIER)
+	DECLARE_HASHTABLE(mmu_notifiers, 7);
+#endif
 
 	/* Kernel Modesetting */
 
@@ -1592,6 +1598,7 @@ struct drm_i915_gem_object_ops {
 	 */
 	int (*get_pages)(struct drm_i915_gem_object *);
 	void (*put_pages)(struct drm_i915_gem_object *);
+	void (*release)(struct drm_i915_gem_object *);
 };
 
 struct drm_i915_gem_object {
@@ -1705,9 +1712,23 @@ struct drm_i915_gem_object {
 
 	/** for phy allocated objects */
 	struct drm_i915_gem_phys_object *phys_obj;
+
+	union {
+		struct i915_gem_userptr {
+			uintptr_t ptr;
+			unsigned read_only :1;
+			unsigned active :4;
+#define I915_GEM_USERPTR_MAX_ACTIVE 15
+
+			struct mm_struct *mm;
+#if defined(CONFIG_MMU_NOTIFIER)
+			struct i915_mmu_object *mn;
+#endif
+			struct work_struct *work;
+		} userptr;
+	};
 };
 #define to_gem_object(obj) (&((struct drm_i915_gem_object *)(obj))->base)
-
 #define to_intel_bo(x) container_of(x, struct drm_i915_gem_object, base)
 
 /**
@@ -2017,6 +2038,9 @@ int i915_gem_set_tiling_ioctl(struct drm_device *dev, void *data,
 			      struct drm_file *file_priv);
 int i915_gem_get_tiling_ioctl(struct drm_device *dev, void *data,
 			      struct drm_file *file_priv);
+int i915_gem_init_userptr(struct drm_device *dev);
+int i915_gem_userptr_ioctl(struct drm_device *dev, void *data,
+			   struct drm_file *file);
 int i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data,
 				struct drm_file *file_priv);
 int i915_gem_wait_ioctl(struct drm_device *dev, void *data,
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index de79dffe15f1..8720d4d98ef1 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -4380,6 +4380,9 @@ void i915_gem_free_object(struct drm_gem_object *gem_obj)
 	if (obj->base.import_attach)
 		drm_prime_gem_destroy(&obj->base, NULL);
 
+	if (obj->ops->release)
+		obj->ops->release(obj);
+
 	drm_gem_object_release(&obj->base);
 	i915_gem_info_remove_obj(dev_priv, obj->base.size);
 
@@ -4644,6 +4647,7 @@ int i915_gem_init(struct drm_device *dev)
 			DRM_DEBUG_DRIVER("allow wake ack timed out\n");
 	}
 
+	i915_gem_init_userptr(dev);
 	i915_gem_init_global_gtt(dev);
 
 	ret = i915_gem_context_init(dev);
diff --git a/drivers/gpu/drm/i915/i915_gem_userptr.c b/drivers/gpu/drm/i915/i915_gem_userptr.c
new file mode 100644
index 000000000000..32f327e34311
--- /dev/null
+++ b/drivers/gpu/drm/i915/i915_gem_userptr.c
@@ -0,0 +1,674 @@
+/*
+ * Copyright © 2012-2014 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ */
+
+#include "drmP.h"
+#include "i915_drm.h"
+#include "i915_drv.h"
+#include "i915_trace.h"
+#include "intel_drv.h"
+#include <linux/mmu_context.h>
+#include <linux/mmu_notifier.h>
+#include <linux/mempolicy.h>
+#include <linux/swap.h>
+
+#if defined(CONFIG_MMU_NOTIFIER)
+#include <linux/interval_tree.h>
+
+struct i915_mmu_notifier {
+	spinlock_t lock;
+	struct hlist_node node;
+	struct mmu_notifier mn;
+	struct rb_root objects;
+	struct drm_device *dev;
+	struct mm_struct *mm;
+	struct work_struct work;
+	unsigned long count;
+	unsigned long serial;
+};
+
+struct i915_mmu_object {
+	struct i915_mmu_notifier *mmu;
+	struct interval_tree_node it;
+	struct drm_i915_gem_object *obj;
+};
+
+static void i915_gem_userptr_mn_invalidate_range_start(struct mmu_notifier *_mn,
+						       struct mm_struct *mm,
+						       unsigned long start,
+						       unsigned long end)
+{
+	struct i915_mmu_notifier *mn = container_of(_mn, struct i915_mmu_notifier, mn);
+	struct interval_tree_node *it = NULL;
+	unsigned long serial = 0;
+
+	end--; /* interval ranges are inclusive, but invalidate range is exclusive */
+	while (start < end) {
+		struct drm_i915_gem_object *obj;
+
+		obj = NULL;
+		spin_lock(&mn->lock);
+		if (serial == mn->serial)
+			it = interval_tree_iter_next(it, start, end);
+		else
+			it = interval_tree_iter_first(&mn->objects, start, end);
+		if (it != NULL) {
+			obj = container_of(it, struct i915_mmu_object, it)->obj;
+			drm_gem_object_reference(&obj->base);
+			serial = mn->serial;
+		}
+		spin_unlock(&mn->lock);
+		if (obj == NULL)
+			return;
+
+		mutex_lock(&mn->dev->struct_mutex);
+		/* Cancel any active worker and force us to re-evaluate gup */
+		obj->userptr.work = NULL;
+
+		if (obj->pages != NULL) {
+			struct drm_i915_private *dev_priv = to_i915(mn->dev);
+			struct i915_vma *vma, *tmp;
+			bool was_interruptible;
+
+			was_interruptible = dev_priv->mm.interruptible;
+			dev_priv->mm.interruptible = false;
+
+			list_for_each_entry_safe(vma, tmp, &obj->vma_list, vma_link) {
+				int ret = i915_vma_unbind(vma);
+				WARN_ON(ret && ret != -EIO);
+			}
+			WARN_ON(i915_gem_object_put_pages(obj));
+
+			dev_priv->mm.interruptible = was_interruptible;
+		}
+
+		start = obj->userptr.ptr + obj->base.size;
+
+		drm_gem_object_unreference(&obj->base);
+		mutex_unlock(&mn->dev->struct_mutex);
+	}
+}
+
+static const struct mmu_notifier_ops i915_gem_userptr_notifier = {
+	.invalidate_range_start = i915_gem_userptr_mn_invalidate_range_start,
+};
+
+static struct i915_mmu_notifier *
+__i915_mmu_notifier_lookup(struct drm_device *dev, struct mm_struct *mm)
+{
+	struct drm_i915_private *dev_priv = to_i915(dev);
+	struct i915_mmu_notifier *mmu;
+
+	/* Protected by dev->struct_mutex */
+	hash_for_each_possible(dev_priv->mmu_notifiers, mmu, node, (unsigned long)mm)
+		if (mmu->mm == mm)
+			return mmu;
+
+	return NULL;
+}
+
+static struct i915_mmu_notifier *
+i915_mmu_notifier_get(struct drm_device *dev, struct mm_struct *mm)
+{
+	struct drm_i915_private *dev_priv = to_i915(dev);
+	struct i915_mmu_notifier *mmu;
+	int ret;
+
+	lockdep_assert_held(&dev->struct_mutex);
+
+	mmu = __i915_mmu_notifier_lookup(dev, mm);
+	if (mmu)
+		return mmu;
+
+	mmu = kmalloc(sizeof(*mmu), GFP_KERNEL);
+	if (mmu == NULL)
+		return ERR_PTR(-ENOMEM);
+
+	spin_lock_init(&mmu->lock);
+	mmu->dev = dev;
+	mmu->mn.ops = &i915_gem_userptr_notifier;
+	mmu->mm = mm;
+	mmu->objects = RB_ROOT;
+	mmu->count = 0;
+	mmu->serial = 0;
+
+	/* Protected by mmap_sem (write-lock) */
+	ret = __mmu_notifier_register(&mmu->mn, mm);
+	if (ret) {
+		kfree(mmu);
+		return ERR_PTR(ret);
+	}
+
+	/* Protected by dev->struct_mutex */
+	hash_add(dev_priv->mmu_notifiers, &mmu->node, (unsigned long)mm);
+	return mmu;
+}
+
+static void
+__i915_mmu_notifier_destroy_worker(struct work_struct *work)
+{
+	struct i915_mmu_notifier *mmu = container_of(work, typeof(*mmu), work);
+	mmu_notifier_unregister(&mmu->mn, mmu->mm);
+	kfree(mmu);
+}
+
+static void
+__i915_mmu_notifier_destroy(struct i915_mmu_notifier *mmu)
+{
+	lockdep_assert_held(&mmu->dev->struct_mutex);
+
+	/* Protected by dev->struct_mutex */
+	hash_del(&mmu->node);
+
+	/* Our lock ordering is: mmap_sem, mmu_notifier_scru, struct_mutex.
+	 * We enter the function holding struct_mutex, therefore we need
+	 * to drop our mutex prior to calling mmu_notifier_unregister in
+	 * order to prevent lock inversion (and system-wide deadlock)
+	 * between the mmap_sem and struct-mutex.
+	 */
+	INIT_WORK(&mmu->work, __i915_mmu_notifier_destroy_worker);
+	schedule_work(&mmu->work);
+}
+
+static void __i915_mmu_notifier_update_serial(struct i915_mmu_notifier *mmu)
+{
+	if (++mmu->serial == 0)
+		mmu->serial = 1;
+}
+
+static void
+i915_mmu_notifier_del(struct i915_mmu_notifier *mmu,
+		      struct i915_mmu_object *mn)
+{
+	lockdep_assert_held(&mmu->dev->struct_mutex);
+
+	spin_lock(&mmu->lock);
+	interval_tree_remove(&mn->it, &mmu->objects);
+	__i915_mmu_notifier_update_serial(mmu);
+	spin_unlock(&mmu->lock);
+
+	/* Protected against _add() by dev->struct_mutex */
+	if (--mmu->count == 0)
+		__i915_mmu_notifier_destroy(mmu);
+}
+
+static int
+i915_mmu_notifier_add(struct i915_mmu_notifier *mmu,
+		      struct i915_mmu_object *mn)
+{
+	struct interval_tree_node *it;
+	int ret;
+
+	/* Make sure we drop the final active reference (and thereby
+	 * remove the objects from the interval tree) before we do
+	 * the check for overlapping objects.
+	 */
+	ret = i915_mutex_lock_interruptible(mmu->dev);
+	if (ret)
+		return ret;
+
+	i915_gem_retire_requests(mmu->dev);
+
+	/* Disallow overlapping userptr objects */
+	spin_lock(&mmu->lock);
+	it = interval_tree_iter_first(&mmu->objects,
+				      mn->it.start, mn->it.last);
+	if (it) {
+		struct drm_i915_gem_object *obj;
+
+		/* We only need to check the first object as it either
+		 * is idle (and in use elsewhere) or we try again in order
+		 * to give time for the gup-worker to run and flush its
+		 * object references. Afterwards if we find another
+		 * object that is idle (and so referenced elsewhere)
+		 * we know that the overlap with an pinned object is
+		 * genuine.
+		 */
+		obj = container_of(it, struct i915_mmu_object, it)->obj;
+		ret = obj->userptr.active ? -EAGAIN : -EINVAL;
+	} else {
+		interval_tree_insert(&mn->it, &mmu->objects);
+		__i915_mmu_notifier_update_serial(mmu);
+		ret = 0;
+	}
+	spin_unlock(&mmu->lock);
+	mutex_unlock(&mmu->dev->struct_mutex);
+
+	return ret;
+}
+
+static void
+i915_gem_userptr_release__mmu_notifier(struct drm_i915_gem_object *obj)
+{
+	struct i915_mmu_object *mn;
+
+	mn = obj->userptr.mn;
+	if (mn == NULL)
+		return;
+
+	i915_mmu_notifier_del(mn->mmu, mn);
+	obj->userptr.mn = NULL;
+}
+
+static int
+i915_gem_userptr_init__mmu_notifier(struct drm_i915_gem_object *obj,
+				    unsigned flags)
+{
+	struct i915_mmu_notifier *mmu;
+	struct i915_mmu_object *mn;
+	int ret;
+
+	if (flags & I915_USERPTR_UNSYNCHRONIZED)
+		return capable(CAP_SYS_ADMIN) ? 0 : -EPERM;
+
+	down_write(&obj->userptr.mm->mmap_sem);
+	ret = i915_mutex_lock_interruptible(obj->base.dev);
+	if (ret == 0) {
+		mmu = i915_mmu_notifier_get(obj->base.dev, obj->userptr.mm);
+		if (!IS_ERR(mmu))
+			mmu->count++; /* preemptive add to act as a refcount */
+		else
+			ret = PTR_ERR(mmu);
+		mutex_unlock(&obj->base.dev->struct_mutex);
+	}
+	up_write(&obj->userptr.mm->mmap_sem);
+	if (ret)
+		return ret;
+
+	mn = kzalloc(sizeof(*mn), GFP_KERNEL);
+	if (mn == NULL) {
+		ret = -ENOMEM;
+		goto destroy_mmu;
+	}
+
+	mn->mmu = mmu;
+	mn->it.start = obj->userptr.ptr;
+	mn->it.last = mn->it.start + obj->base.size - 1;
+	mn->obj = obj;
+
+	ret = i915_mmu_notifier_add(mmu, mn);
+	if (ret)
+		goto free_mn;
+
+	obj->userptr.mn = mn;
+	return 0;
+
+free_mn:
+	kfree(mn);
+destroy_mmu:
+	mutex_lock(&obj->base.dev->struct_mutex);
+	if (--mmu->count == 0)
+		__i915_mmu_notifier_destroy(mmu);
+	mutex_unlock(&obj->base.dev->struct_mutex);
+	return ret;
+}
+
+#else
+
+static void
+i915_gem_userptr_release__mmu_notifier(struct drm_i915_gem_object *obj)
+{
+}
+
+static int
+i915_gem_userptr_init__mmu_notifier(struct drm_i915_gem_object *obj,
+				    unsigned flags)
+{
+	if ((flags & I915_USERPTR_UNSYNCHRONIZED) == 0)
+		return -ENODEV;
+
+	if (!capable(CAP_SYS_ADMIN))
+		return -EPERM;
+
+	return 0;
+}
+#endif
+
+struct get_pages_work {
+	struct work_struct work;
+	struct drm_i915_gem_object *obj;
+	struct task_struct *task;
+};
+
+static int
+st_set_pages(struct sg_table **st, struct page **pvec, int num_pages)
+{
+	struct scatterlist *sg;
+	int n;
+
+	*st = kmalloc(sizeof(**st), GFP_KERNEL);
+	if (*st == NULL || sg_alloc_table(*st, num_pages, GFP_KERNEL)) {
+		kfree(*st);
+		*st = NULL;
+		return -ENOMEM;
+	}
+
+	for_each_sg((*st)->sgl, sg, num_pages, n)
+		sg_set_page(sg, pvec[n], PAGE_SIZE, 0);
+
+	return 0;
+}
+
+static void
+__i915_gem_userptr_get_pages_worker(struct work_struct *_work)
+{
+	struct get_pages_work *work = container_of(_work, typeof(*work), work);
+	struct drm_i915_gem_object *obj = work->obj;
+	struct drm_device *dev = obj->base.dev;
+	const int num_pages = obj->base.size >> PAGE_SHIFT;
+	struct page **pvec;
+	int pinned, ret;
+
+	ret = -ENOMEM;
+	pinned = 0;
+
+	pvec = kmalloc(num_pages*sizeof(struct page *),
+		       GFP_TEMPORARY | __GFP_NOWARN | __GFP_NORETRY);
+	if (pvec == NULL)
+		pvec = drm_malloc_ab(num_pages, sizeof(struct page *));
+	if (pvec != NULL) {
+		struct mm_struct *mm = obj->userptr.mm;
+
+		use_mm(mm);
+		down_read(&mm->mmap_sem);
+		while (pinned < num_pages) {
+			ret = get_user_pages(work->task, mm,
+					     obj->userptr.ptr + pinned * PAGE_SIZE,
+					     num_pages - pinned,
+					     !obj->userptr.read_only, 0,
+					     pvec + pinned, NULL);
+			if (ret < 0)
+				break;
+
+			pinned += ret;
+		}
+		up_read(&mm->mmap_sem);
+		unuse_mm(mm);
+	}
+
+	mutex_lock(&dev->struct_mutex);
+	if (obj->userptr.work != &work->work) {
+		ret = 0;
+	} else if (pinned == num_pages) {
+		ret = st_set_pages(&obj->pages, pvec, num_pages);
+		if (ret == 0) {
+			list_add_tail(&obj->global_list, &to_i915(dev)->mm.unbound_list);
+			pinned = 0;
+		}
+	}
+
+	obj->userptr.work = ERR_PTR(ret);
+	obj->userptr.active--;
+	drm_gem_object_unreference(&obj->base);
+	mutex_unlock(&dev->struct_mutex);
+
+	release_pages(pvec, pinned, 0);
+	drm_free_large(pvec);
+
+	put_task_struct(work->task);
+	kfree(work);
+}
+
+static int
+i915_gem_userptr_get_pages(struct drm_i915_gem_object *obj)
+{
+	const int num_pages = obj->base.size >> PAGE_SHIFT;
+	struct page **pvec;
+	int pinned, ret;
+
+	/* If userspace should engineer that these pages are replaced in
+	 * the vma between us binding this page into the GTT and completion
+	 * of rendering... Their loss. If they change the mapping of their
+	 * pages they need to create a new bo to point to the new vma.
+	 *
+	 * However, that still leaves open the possibility of the vma
+	 * being copied upon fork. Which falls under the same userspace
+	 * synchronisation issue as a regular bo, except that this time
+	 * the process may not be expecting that a particular piece of
+	 * memory is tied to the GPU.
+	 *
+	 * Fortunately, we can hook into the mmu_notifier in order to
+	 * discard the page references prior to anything nasty happening
+	 * to the vma (discard or cloning) which should prevent the more
+	 * egregious cases from causing harm.
+	 */
+
+	pvec = NULL;
+	pinned = 0;
+	if (obj->userptr.mm == current->mm) {
+		pvec = kmalloc(num_pages*sizeof(struct page *),
+			       GFP_TEMPORARY | __GFP_NOWARN | __GFP_NORETRY);
+		if (pvec == NULL) {
+			pvec = drm_malloc_ab(num_pages, sizeof(struct page *));
+			if (pvec == NULL)
+				return -ENOMEM;
+		}
+
+		pinned = __get_user_pages_fast(obj->userptr.ptr, num_pages,
+					       !obj->userptr.read_only, pvec);
+	}
+	if (pinned < num_pages) {
+		if (pinned < 0) {
+			ret = pinned;
+			pinned = 0;
+		} else {
+			/* Spawn a worker so that we can acquire the
+			 * user pages without holding our mutex. Access
+			 * to the user pages requires mmap_sem, and we have
+			 * a strict lock ordering of mmap_sem, struct_mutex -
+			 * we already hold struct_mutex here and so cannot
+			 * call gup without encountering a lock inversion.
+			 *
+			 * Userspace will keep on repeating the operation
+			 * (thanks to EAGAIN) until either we hit the fast
+			 * path or the worker completes. If the worker is
+			 * cancelled or superseded, the task is still run
+			 * but the results ignored. (This leads to
+			 * complications that we may have a stray object
+			 * refcount that we need to be wary of when
+			 * checking for existing objects during creation.)
+			 * If the worker encounters an error, it reports
+			 * that error back to this function through
+			 * obj->userptr.work = ERR_PTR.
+			 */
+			ret = -EAGAIN;
+			if (obj->userptr.work == NULL &&
+			    obj->userptr.active < I915_GEM_USERPTR_MAX_ACTIVE) {
+				struct get_pages_work *work;
+
+				work = kmalloc(sizeof(*work), GFP_KERNEL);
+				if (work != NULL) {
+					obj->userptr.work = &work->work;
+					obj->userptr.active++;
+
+					work->obj = obj;
+					drm_gem_object_reference(&obj->base);
+
+					work->task = current;
+					get_task_struct(work->task);
+
+					INIT_WORK(&work->work, __i915_gem_userptr_get_pages_worker);
+					schedule_work(&work->work);
+				} else
+					ret = -ENOMEM;
+			} else {
+				if (IS_ERR(obj->userptr.work)) {
+					ret = PTR_ERR(obj->userptr.work);
+					obj->userptr.work = NULL;
+				}
+			}
+		}
+	} else {
+		ret = st_set_pages(&obj->pages, pvec, num_pages);
+		if (ret == 0) {
+			obj->userptr.work = NULL;
+			pinned = 0;
+		}
+	}
+
+	release_pages(pvec, pinned, 0);
+	drm_free_large(pvec);
+	return ret;
+}
+
+static void
+i915_gem_userptr_put_pages(struct drm_i915_gem_object *obj)
+{
+	struct scatterlist *sg;
+	int i;
+
+	BUG_ON(obj->userptr.work != NULL);
+
+	if (obj->madv != I915_MADV_WILLNEED)
+		obj->dirty = 0;
+
+	for_each_sg(obj->pages->sgl, sg, obj->pages->nents, i) {
+		struct page *page = sg_page(sg);
+
+		if (obj->dirty)
+			set_page_dirty(page);
+
+		mark_page_accessed(page);
+		page_cache_release(page);
+	}
+	obj->dirty = 0;
+
+	sg_free_table(obj->pages);
+	kfree(obj->pages);
+}
+
+static void
+i915_gem_userptr_release(struct drm_i915_gem_object *obj)
+{
+	i915_gem_userptr_release__mmu_notifier(obj);
+
+	if (obj->userptr.mm) {
+		mmput(obj->userptr.mm);
+		obj->userptr.mm = NULL;
+	}
+}
+
+static const struct drm_i915_gem_object_ops i915_gem_userptr_ops = {
+	.get_pages = i915_gem_userptr_get_pages,
+	.put_pages = i915_gem_userptr_put_pages,
+	.release = i915_gem_userptr_release,
+};
+
+/**
+ * Creates a new mm object that wraps some normal memory from the process
+ * context - user memory.
+ *
+ * We impose several restrictions upon the memory being mapped
+ * into the GPU.
+ * 1. It must be page aligned (both start/end addresses, i.e ptr and size).
+ * 2. It cannot overlap any other userptr object in the same address space.
+ * 3. It must be normal system memory, not a pointer into another map of IO
+ *    space (e.g. it must not be a GTT mmapping of another object).
+ * 4. We only allow a bo as large as we could in theory map into the GTT,
+ *    that is we limit the size to the total size of the GTT.
+ * 5. The bo is marked as being snoopable. The backing pages are left
+ *    accessible directly by the CPU, but reads and writes by the GPU may
+ *    incur the cost of a snoop (unless you have an LLC architecture).
+ *
+ * Synchronisation between multiple users and the GPU is left to userspace
+ * through the normal set-domain-ioctl. The kernel will enforce that the
+ * GPU relinquishes the VMA before it is returned back to the system
+ * i.e. upon free(), munmap() or process termination. However, the userspace
+ * malloc() library may not immediately relinquish the VMA after free() and
+ * instead reuse it whilst the GPU is still reading and writing to the VMA.
+ * Caveat emptor.
+ *
+ * Also note, that the object created here is not currently a "first class"
+ * object, in that several ioctls are banned. These are the CPU access
+ * ioctls: mmap(), pwrite and pread. In practice, you are expected to use
+ * direct access via your pointer rather than use those ioctls.
+ *
+ * If you think this is a good interface to use to pass GPU memory between
+ * drivers, please use dma-buf instead. In fact, wherever possible use
+ * dma-buf instead.
+ */
+int
+i915_gem_userptr_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
+{
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	struct drm_i915_gem_userptr *args = data;
+	struct drm_i915_gem_object *obj;
+	int ret;
+	u32 handle;
+
+	if (args->flags & ~(I915_USERPTR_READ_ONLY |
+			    I915_USERPTR_UNSYNCHRONIZED))
+		return -EINVAL;
+
+	if (offset_in_page(args->user_ptr | args->user_size))
+		return -EINVAL;
+
+	if (args->user_size > dev_priv->gtt.base.total)
+		return -E2BIG;
+
+	if (!access_ok(args->flags & I915_USERPTR_READ_ONLY ? VERIFY_READ : VERIFY_WRITE,
+		       (char __user *)(unsigned long)args->user_ptr, args->user_size))
+		return -EFAULT;
+
+	/* Allocate the new object */
+	obj = i915_gem_object_alloc(dev);
+	if (obj == NULL)
+		return -ENOMEM;
+
+	drm_gem_private_object_init(dev, &obj->base, args->user_size);
+	i915_gem_object_init(obj, &i915_gem_userptr_ops);
+	obj->cache_level = I915_CACHE_LLC;
+	obj->base.write_domain = I915_GEM_DOMAIN_CPU;
+	obj->base.read_domains = I915_GEM_DOMAIN_CPU;
+
+	obj->userptr.ptr = args->user_ptr;
+	obj->userptr.read_only = !!(args->flags & I915_USERPTR_READ_ONLY);
+
+	/* And keep a pointer to the current->mm for resolving the user pages
+	 * at binding. This means that we need to hook into the mmu_notifier
+	 * in order to detect if the mmu is destroyed.
+	 */
+	ret = -ENOMEM;
+	if ((obj->userptr.mm = get_task_mm(current)))
+		ret = i915_gem_userptr_init__mmu_notifier(obj, args->flags);
+	if (ret == 0)
+		ret = drm_gem_handle_create(file, &obj->base, &handle);
+
+	/* drop reference from allocate - handle holds it now */
+	drm_gem_object_unreference_unlocked(&obj->base);
+	if (ret)
+		return ret;
+
+	args->handle = handle;
+	return 0;
+}
+
+int
+i915_gem_init_userptr(struct drm_device *dev)
+{
+#if defined(CONFIG_MMU_NOTIFIER)
+	struct drm_i915_private *dev_priv = to_i915(dev);
+	hash_init(dev_priv->mmu_notifiers);
+#endif
+	return 0;
+}
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
index 4cc916213362..4fc16a12b9dc 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -201,6 +201,7 @@ static void print_error_buffers(struct drm_i915_error_state_buf *m,
 		err_puts(m, tiling_flag(err->tiling));
 		err_puts(m, dirty_flag(err->dirty));
 		err_puts(m, purgeable_flag(err->purgeable));
+		err_puts(m, err->userptr ? " userptr" : "");
 		err_puts(m, err->ring != -1 ? " " : "");
 		err_puts(m, ring_str(err->ring));
 		err_puts(m, i915_cache_level_str(err->cache_level));
@@ -603,6 +604,7 @@ static void capture_bo(struct drm_i915_error_buffer *err,
 	err->tiling = obj->tiling_mode;
 	err->dirty = obj->dirty;
 	err->purgeable = obj->madv != I915_MADV_WILLNEED;
+	err->userptr = obj->userptr.mm != 0;
 	err->ring = obj->ring ? obj->ring->id : -1;
 	err->cache_level = obj->cache_level;
 }
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index 37c8073a8246..6c145a0be250 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -224,6 +224,7 @@ typedef struct _drm_i915_sarea {
 #define DRM_I915_REG_READ		0x31
 #define DRM_I915_GET_RESET_STATS	0x32
 #define DRM_I915_GEM_CREATE2		0x33
+#define DRM_I915_GEM_USERPTR		0x34
 
 #define DRM_IOCTL_I915_INIT		DRM_IOW( DRM_COMMAND_BASE + DRM_I915_INIT, drm_i915_init_t)
 #define DRM_IOCTL_I915_FLUSH		DRM_IO ( DRM_COMMAND_BASE + DRM_I915_FLUSH)
@@ -275,6 +276,7 @@ typedef struct _drm_i915_sarea {
 #define DRM_IOCTL_I915_GEM_CONTEXT_DESTROY	DRM_IOW (DRM_COMMAND_BASE + DRM_I915_GEM_CONTEXT_DESTROY, struct drm_i915_gem_context_destroy)
 #define DRM_IOCTL_I915_REG_READ			DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_REG_READ, struct drm_i915_reg_read)
 #define DRM_IOCTL_I915_GET_RESET_STATS		DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_GET_RESET_STATS, struct drm_i915_reset_stats)
+#define DRM_IOCTL_I915_GEM_USERPTR			DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_GEM_USERPTR, struct drm_i915_gem_userptr)
 
 /* Allow drivers to submit batchbuffers directly to hardware, relying
  * on the security mechanisms provided by hardware.
@@ -1129,4 +1131,18 @@ struct drm_i915_reset_stats {
 	__u32 pad;
 };
 
+struct drm_i915_gem_userptr {
+	__u64 user_ptr;
+	__u64 user_size;
+	__u32 flags;
+#define I915_USERPTR_READ_ONLY 0x1
+#define I915_USERPTR_UNSYNCHRONIZED 0x80000000
+	/**
+	 * Returned handle for the object.
+	 *
+	 * Object handles are nonzero.
+	 */
+	__u32 handle;
+};
+
 #endif /* _UAPI_I915_DRM_H_ */
-- 
1.8.5.3

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 34+ messages in thread

* Re: [PATCH] drm/i915: Introduce mapping of user pages into video memory (userptr) ioctl
  2014-01-28 13:16   ` [PATCH] " Chris Wilson
@ 2014-01-29 20:25     ` Daniel Vetter
  2014-01-29 21:53       ` Chris Wilson
  2014-01-29 20:34     ` Daniel Vetter
  1 sibling, 1 reply; 34+ messages in thread
From: Daniel Vetter @ 2014-01-29 20:25 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx, Akash Goel

On Tue, Jan 28, 2014 at 01:16:46PM +0000, Chris Wilson wrote:
> By exporting the ability to map user address and inserting PTEs
> representing their backing pages into the GTT, we can exploit UMA in order
> to utilize normal application data as a texture source or even as a
> render target (depending upon the capabilities of the chipset). This has
> a number of uses, with zero-copy downloads to the GPU and efficient
> readback making the intermixed streaming of CPU and GPU operations
> fairly efficient. This ability has many widespread implications from
> faster rendering of client-side software rasterisers (chromium),
> mitigation of stalls due to read back (firefox) and to faster pipelining
> of texture data (such as pixel buffer objects in GL or data blobs in CL).
> 
> v2: Compile with CONFIG_MMU_NOTIFIER
> v3: We can sleep while performing invalidate-range, which we can utilise
> to drop our page references prior to the kernel manipulating the vma
> (for either discard or cloning) and so protect normal users.
> v4: Only run the invalidate notifier if the range intercepts the bo.
> v5: Prevent userspace from attempting to GTT mmap non-page aligned buffers
> v6: Recheck after reacquire mutex for lost mmu.
> v7: Fix implicit padding of ioctl struct by rounding to next 64bit boundary.
> v8: Fix rebasing error after forwarding porting the back port.
> v9: Limit the userptr to page aligned entries. We now expect userspace
>     to handle all the offset-in-page adjustments itself.
> v10: Prevent vma from being copied across fork to avoid issues with cow.
> v11: Drop vma behaviour changes -- locking is nigh on impossible.
>      Use a worker to load user pages to avoid lock inversions.
> v12: Use get_task_mm()/mmput() for correct refcounting of mm.
> v13: Use a worker to release the mmu_notifier to avoid lock inversion
> v14: Decouple mmu_notifier from struct_mutex using a custom mmu_notifer
>      with its own locking and tree of objects for each mm/mmu_notifier.
> v15: Prevent overlapping userptr objects, and invalidate all objects
>      within the mmu_notifier range
> v16: Fix a typo for iterating over multiple objects in the range and
>      rearrange error path to destroy the mmu_notifier locklessly.
>      Also close a race between invalidate_range and the get_pages_worker.
> v17: Close a race between get_pages_worker/invalidate_range and fresh
>      allocations of the same userptr range - and notice that
>      struct_mutex was presumed to be held when during creation it wasn't.
> v18: Sigh. Fix the refactor of st_set_pages() to allocate enough memory
>      for the struct sg_table and to clear it before reporting an error.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com>
> Cc: "Gong, Zhipeng" <zhipeng.gong@intel.com>
> Cc: Akash Goel <akash.goel@intel.com>
> Cc: "Volkin, Bradley D" <bradley.d.volkin@intel.com>

[snip]

> diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
> index 37c8073a8246..6c145a0be250 100644
> --- a/include/uapi/drm/i915_drm.h
> +++ b/include/uapi/drm/i915_drm.h
> @@ -224,6 +224,7 @@ typedef struct _drm_i915_sarea {
>  #define DRM_I915_REG_READ		0x31
>  #define DRM_I915_GET_RESET_STATS	0x32
>  #define DRM_I915_GEM_CREATE2		0x33
> +#define DRM_I915_GEM_USERPTR		0x34
>  
>  #define DRM_IOCTL_I915_INIT		DRM_IOW( DRM_COMMAND_BASE + DRM_I915_INIT, drm_i915_init_t)
>  #define DRM_IOCTL_I915_FLUSH		DRM_IO ( DRM_COMMAND_BASE + DRM_I915_FLUSH)
> @@ -275,6 +276,7 @@ typedef struct _drm_i915_sarea {
>  #define DRM_IOCTL_I915_GEM_CONTEXT_DESTROY	DRM_IOW (DRM_COMMAND_BASE + DRM_I915_GEM_CONTEXT_DESTROY, struct drm_i915_gem_context_destroy)
>  #define DRM_IOCTL_I915_REG_READ			DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_REG_READ, struct drm_i915_reg_read)
>  #define DRM_IOCTL_I915_GET_RESET_STATS		DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_GET_RESET_STATS, struct drm_i915_reset_stats)
> +#define DRM_IOCTL_I915_GEM_USERPTR			DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_GEM_USERPTR, struct drm_i915_gem_userptr)
>  
>  /* Allow drivers to submit batchbuffers directly to hardware, relying
>   * on the security mechanisms provided by hardware.
> @@ -1129,4 +1131,18 @@ struct drm_i915_reset_stats {
>  	__u32 pad;
>  };
>  
> +struct drm_i915_gem_userptr {
> +	__u64 user_ptr;
> +	__u64 user_size;
> +	__u32 flags;
> +#define I915_USERPTR_READ_ONLY 0x1
> +#define I915_USERPTR_UNSYNCHRONIZED 0x80000000

So originally I've thought we need this due to the massive overhead of the
mmu notifier. But now with the nice shared mmu notifiers I've thought that
overhead is gone I prefer to also ditch this option.

Same goes about the MMU_NOTIFIER conditional code, imo we simply should
select this - most distros will have it anyway and users will be really
suprised if they lose userspace driver features for seemingly irrelevant
reasons.

Beside this I think I've run out of stuff to complain about ;-)

Cheers, Daniel

> +	/**
> +	 * Returned handle for the object.
> +	 *
> +	 * Object handles are nonzero.
> +	 */
> +	__u32 handle;
> +};
> +
>  #endif /* _UAPI_I915_DRM_H_ */
> -- 
> 1.8.5.3
> 
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/intel-gfx

-- 
Daniel Vetter
Software Engineer, Intel Corporation
+41 (0) 79 365 57 48 - http://blog.ffwll.ch

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH] drm/i915: Introduce mapping of user pages into video memory (userptr) ioctl
  2014-01-28 13:16   ` [PATCH] " Chris Wilson
  2014-01-29 20:25     ` Daniel Vetter
@ 2014-01-29 20:34     ` Daniel Vetter
  2014-01-29 21:52       ` Chris Wilson
  2014-02-03 15:28       ` Tvrtko Ursulin
  1 sibling, 2 replies; 34+ messages in thread
From: Daniel Vetter @ 2014-01-29 20:34 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx, Akash Goel

Actually I've found something else to complain about:

On Tue, Jan 28, 2014 at 2:16 PM, Chris Wilson <chris@chris-wilson.co.uk> wrote:
> +#define I915_USERPTR_READ_ONLY 0x1

This smells like an insta-root-exploit:
1. mmap /lib/ld-linux.so as read-only
2. userptr bind that mmap'ed area as READ_ONLY
3. blit exploit code over it
4. profit

I also don't see a way we could fix this, at least without the
hardware providing read-only modes in the ptes. Which also requires us
to actually trust it to follow them, even when they exists ...
-Daniel
-- 
Daniel Vetter
Software Engineer, Intel Corporation
+41 (0) 79 365 57 48 - http://blog.ffwll.ch

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH] drm/i915: Introduce mapping of user pages into video memory (userptr) ioctl
  2014-01-29 20:34     ` Daniel Vetter
@ 2014-01-29 21:52       ` Chris Wilson
  2014-02-03 15:28       ` Tvrtko Ursulin
  1 sibling, 0 replies; 34+ messages in thread
From: Chris Wilson @ 2014-01-29 21:52 UTC (permalink / raw)
  To: Daniel Vetter; +Cc: intel-gfx, Akash Goel

On Wed, Jan 29, 2014 at 09:34:42PM +0100, Daniel Vetter wrote:
> Actually I've found something else to complain about:
> 
> On Tue, Jan 28, 2014 at 2:16 PM, Chris Wilson <chris@chris-wilson.co.uk> wrote:
> > +#define I915_USERPTR_READ_ONLY 0x1
> 
> This smells like an insta-root-exploit:
> 1. mmap /lib/ld-linux.so as read-only
> 2. userptr bind that mmap'ed area as READ_ONLY
> 3. blit exploit code over it
> 4. profit
> 
> I also don't see a way we could fix this, at least without the
> hardware providing read-only modes in the ptes. Which also requires us
> to actually trust it to follow them, even when they exists ...

Allow it for root only code then, unless we can expose it on supported
hw ;-)
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH] drm/i915: Introduce mapping of user pages into video memory (userptr) ioctl
  2014-01-29 20:25     ` Daniel Vetter
@ 2014-01-29 21:53       ` Chris Wilson
  2014-01-29 21:58         ` Daniel Vetter
  0 siblings, 1 reply; 34+ messages in thread
From: Chris Wilson @ 2014-01-29 21:53 UTC (permalink / raw)
  To: Daniel Vetter; +Cc: intel-gfx, Akash Goel

On Wed, Jan 29, 2014 at 09:25:51PM +0100, Daniel Vetter wrote:
> So originally I've thought we need this due to the massive overhead of the
> mmu notifier. But now with the nice shared mmu notifiers I've thought that
> overhead is gone I prefer to also ditch this option.
> 
> Same goes about the MMU_NOTIFIER conditional code, imo we simply should
> select this - most distros will have it anyway and users will be really
> suprised if they lose userspace driver features for seemingly irrelevant
> reasons.

Seriously? You think the overhead is magically gone?
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH] drm/i915: Introduce mapping of user pages into video memory (userptr) ioctl
  2014-01-29 21:53       ` Chris Wilson
@ 2014-01-29 21:58         ` Daniel Vetter
  2014-01-30 11:06           ` Chris Wilson
  0 siblings, 1 reply; 34+ messages in thread
From: Daniel Vetter @ 2014-01-29 21:58 UTC (permalink / raw)
  To: Chris Wilson, Daniel Vetter, intel-gfx, Akash Goel

On Wed, Jan 29, 2014 at 10:53 PM, Chris Wilson <chris@chris-wilson.co.uk> wrote:
> On Wed, Jan 29, 2014 at 09:25:51PM +0100, Daniel Vetter wrote:
>> So originally I've thought we need this due to the massive overhead of the
>> mmu notifier. But now with the nice shared mmu notifiers I've thought that
>> overhead is gone I prefer to also ditch this option.
>>
>> Same goes about the MMU_NOTIFIER conditional code, imo we simply should
>> select this - most distros will have it anyway and users will be really
>> suprised if they lose userspace driver features for seemingly irrelevant
>> reasons.
>
> Seriously? You think the overhead is magically gone?

Well the once-per-process overhead is still there, and imo it's ok to
eat that. But the complaints I've heard concerned the per-object
overhead, so I wonder how much of that is still relevant.
-Daniel
-- 
Daniel Vetter
Software Engineer, Intel Corporation
+41 (0) 79 365 57 48 - http://blog.ffwll.ch

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH] drm/i915: Introduce mapping of user pages into video memory (userptr) ioctl
  2014-01-29 21:58         ` Daniel Vetter
@ 2014-01-30 11:06           ` Chris Wilson
  2014-02-03 15:13             ` Tvrtko Ursulin
  0 siblings, 1 reply; 34+ messages in thread
From: Chris Wilson @ 2014-01-30 11:06 UTC (permalink / raw)
  To: Daniel Vetter; +Cc: intel-gfx, Akash Goel

On Wed, Jan 29, 2014 at 10:58:48PM +0100, Daniel Vetter wrote:
> On Wed, Jan 29, 2014 at 10:53 PM, Chris Wilson <chris@chris-wilson.co.uk> wrote:
> > On Wed, Jan 29, 2014 at 09:25:51PM +0100, Daniel Vetter wrote:
> >> So originally I've thought we need this due to the massive overhead of the
> >> mmu notifier. But now with the nice shared mmu notifiers I've thought that
> >> overhead is gone I prefer to also ditch this option.
> >>
> >> Same goes about the MMU_NOTIFIER conditional code, imo we simply should
> >> select this - most distros will have it anyway and users will be really
> >> suprised if they lose userspace driver features for seemingly irrelevant
> >> reasons.
> >
> > Seriously? You think the overhead is magically gone?
> 
> Well the once-per-process overhead is still there, and imo it's ok to
> eat that. But the complaints I've heard concerned the per-object
> overhead, so I wonder how much of that is still relevant.

I am still annoyed by the thought of having to enable an extra feature
in my kernels, and the extra code that is then run on every mm
operation. (Mixing mmu_notifiers + mm debuging was an especially
unpleasant experience that I don't wish to ever do again.)

Numbers talk though, if we can't demonstrate a significant difference
between the two, it can die. Keeping a debug mode to turn off
mmu_notifiers would still be good so that we can keep track of any
impact over time.
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH] drm/i915: Introduce mapping of user pages into video memory (userptr) ioctl
  2014-01-30 11:06           ` Chris Wilson
@ 2014-02-03 15:13             ` Tvrtko Ursulin
  0 siblings, 0 replies; 34+ messages in thread
From: Tvrtko Ursulin @ 2014-02-03 15:13 UTC (permalink / raw)
  To: Chris Wilson, Daniel Vetter, intel-gfx, Akash Goel

On 01/30/2014 11:06 AM, Chris Wilson wrote:
> On Wed, Jan 29, 2014 at 10:58:48PM +0100, Daniel Vetter wrote:
>> On Wed, Jan 29, 2014 at 10:53 PM, Chris Wilson <chris@chris-wilson.co.uk> wrote:
>>> On Wed, Jan 29, 2014 at 09:25:51PM +0100, Daniel Vetter wrote:
>>>> So originally I've thought we need this due to the massive overhead of the
>>>> mmu notifier. But now with the nice shared mmu notifiers I've thought that
>>>> overhead is gone I prefer to also ditch this option.
>>>>
>>>> Same goes about the MMU_NOTIFIER conditional code, imo we simply should
>>>> select this - most distros will have it anyway and users will be really
>>>> suprised if they lose userspace driver features for seemingly irrelevant
>>>> reasons.
>>>
>>> Seriously? You think the overhead is magically gone?
>>
>> Well the once-per-process overhead is still there, and imo it's ok to
>> eat that. But the complaints I've heard concerned the per-object
>> overhead, so I wonder how much of that is still relevant.
>
> I am still annoyed by the thought of having to enable an extra feature
> in my kernels, and the extra code that is then run on every mm
> operation. (Mixing mmu_notifiers + mm debuging was an especially
> unpleasant experience that I don't wish to ever do again.)
>
> Numbers talk though, if we can't demonstrate a significant difference
> between the two, it can die. Keeping a debug mode to turn off
> mmu_notifiers would still be good so that we can keep track of any
> impact over time.

Writing a benchmark for this is next on my userptr to do list following 
completing of the i-g-t test case.

Btw, I did not notice you are discussing this sooner since I got dropped 
from Cc. Only when Rafael mentioned he saw some discussion about 
potential exploit I went looking.

Tvrtko

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH] drm/i915: Introduce mapping of user pages into video memory (userptr) ioctl
  2014-01-29 20:34     ` Daniel Vetter
  2014-01-29 21:52       ` Chris Wilson
@ 2014-02-03 15:28       ` Tvrtko Ursulin
  2014-02-04 10:56         ` Daniel Vetter
  1 sibling, 1 reply; 34+ messages in thread
From: Tvrtko Ursulin @ 2014-02-03 15:28 UTC (permalink / raw)
  To: Daniel Vetter, Chris Wilson; +Cc: intel-gfx, Akash Goel


On 01/29/2014 08:34 PM, Daniel Vetter wrote:
> Actually I've found something else to complain about:
>
> On Tue, Jan 28, 2014 at 2:16 PM, Chris Wilson <chris@chris-wilson.co.uk> wrote:
>> +#define I915_USERPTR_READ_ONLY 0x1
>
> This smells like an insta-root-exploit:
> 1. mmap /lib/ld-linux.so as read-only
> 2. userptr bind that mmap'ed area as READ_ONLY
> 3. blit exploit code over it
> 4. profit
>
> I also don't see a way we could fix this, at least without the
> hardware providing read-only modes in the ptes. Which also requires us
> to actually trust it to follow them, even when they exists ...

Would disallowing mapping of shared pages help and be acceptable 
considering intended use cases?

Tvrtko

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH] drm/i915: Introduce mapping of user pages into video memory (userptr) ioctl
  2014-02-03 15:28       ` Tvrtko Ursulin
@ 2014-02-04 10:56         ` Daniel Vetter
  2014-02-05 15:55           ` Jesse Barnes
  0 siblings, 1 reply; 34+ messages in thread
From: Daniel Vetter @ 2014-02-04 10:56 UTC (permalink / raw)
  To: Tvrtko Ursulin; +Cc: intel-gfx, Akash Goel

On Mon, Feb 03, 2014 at 03:28:37PM +0000, Tvrtko Ursulin wrote:
> 
> On 01/29/2014 08:34 PM, Daniel Vetter wrote:
> >Actually I've found something else to complain about:
> >
> >On Tue, Jan 28, 2014 at 2:16 PM, Chris Wilson <chris@chris-wilson.co.uk> wrote:
> >>+#define I915_USERPTR_READ_ONLY 0x1
> >
> >This smells like an insta-root-exploit:
> >1. mmap /lib/ld-linux.so as read-only
> >2. userptr bind that mmap'ed area as READ_ONLY
> >3. blit exploit code over it
> >4. profit
> >
> >I also don't see a way we could fix this, at least without the
> >hardware providing read-only modes in the ptes. Which also requires us
> >to actually trust it to follow them, even when they exists ...
> 
> Would disallowing mapping of shared pages help and be acceptable
> considering intended use cases?

The above exploit is the simplest one I could come up with, but I expect
the vm in general won't be too happy if we write to pages it never expects
are written to. We could do fun stuff like corrupt pagecache or swap
cache. Which in conjunction with stable kernel pages (which some I/O paths
needed) is rather likely to result in havoc.

Essentially I'm no vm expert, and this definitely needs a full vm audit
even before considering it at all. So I'd like to drop support for it in
the initial version ...
-Daniel
-- 
Daniel Vetter
Software Engineer, Intel Corporation
+41 (0) 79 365 57 48 - http://blog.ffwll.ch

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH] drm/i915: Introduce mapping of user pages into video memory (userptr) ioctl
  2014-02-04 10:56         ` Daniel Vetter
@ 2014-02-05 15:55           ` Jesse Barnes
  0 siblings, 0 replies; 34+ messages in thread
From: Jesse Barnes @ 2014-02-05 15:55 UTC (permalink / raw)
  To: Daniel Vetter; +Cc: intel-gfx, Akash Goel

On Tue, 4 Feb 2014 11:56:47 +0100
Daniel Vetter <daniel@ffwll.ch> wrote:

> On Mon, Feb 03, 2014 at 03:28:37PM +0000, Tvrtko Ursulin wrote:
> > 
> > On 01/29/2014 08:34 PM, Daniel Vetter wrote:
> > >Actually I've found something else to complain about:
> > >
> > >On Tue, Jan 28, 2014 at 2:16 PM, Chris Wilson <chris@chris-wilson.co.uk> wrote:
> > >>+#define I915_USERPTR_READ_ONLY 0x1
> > >
> > >This smells like an insta-root-exploit:
> > >1. mmap /lib/ld-linux.so as read-only
> > >2. userptr bind that mmap'ed area as READ_ONLY
> > >3. blit exploit code over it
> > >4. profit
> > >
> > >I also don't see a way we could fix this, at least without the
> > >hardware providing read-only modes in the ptes. Which also requires us
> > >to actually trust it to follow them, even when they exists ...
> > 
> > Would disallowing mapping of shared pages help and be acceptable
> > considering intended use cases?
> 
> The above exploit is the simplest one I could come up with, but I expect
> the vm in general won't be too happy if we write to pages it never expects
> are written to. We could do fun stuff like corrupt pagecache or swap
> cache. Which in conjunction with stable kernel pages (which some I/O paths
> needed) is rather likely to result in havoc.
> 
> Essentially I'm no vm expert, and this definitely needs a full vm audit
> even before considering it at all. So I'd like to drop support for it in
> the initial version ...

Yeah I think we'd need to only allow this usage for root (i.e. you get
to keep both pieces) or for platforms where we actually have RW[X] GTT
control (e.g. BDW).

A shared mapping restriction *might* be sufficient, but like Daniel
said, the real fix is to properly handle the PROT_* bits...

Seems like it could be kind of a cool feature though, so we should try
to enable it on BDW+.

Jesse

Jesse

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH] drm/i915: Introduce mapping of user pages into video memory (userptr) ioctl
  2014-05-16 15:34 ` Volkin, Bradley D
@ 2014-05-16 16:39   ` Daniel Vetter
  0 siblings, 0 replies; 34+ messages in thread
From: Daniel Vetter @ 2014-05-16 16:39 UTC (permalink / raw)
  To: Volkin, Bradley D; +Cc: intel-gfx, Akash Goel

On Fri, May 16, 2014 at 08:34:52AM -0700, Volkin, Bradley D wrote:
> Reviewed-by: Brad Volkin <bradley.d.volkin@intel.com> 
> 
> On Fri, May 16, 2014 at 02:22:37PM +0100, Chris Wilson wrote:
> > By exporting the ability to map user address and inserting PTEs
> > representing their backing pages into the GTT, we can exploit UMA in order
> > to utilize normal application data as a texture source or even as a
> > render target (depending upon the capabilities of the chipset). This has
> > a number of uses, with zero-copy downloads to the GPU and efficient
> > readback making the intermixed streaming of CPU and GPU operations
> > fairly efficient. This ability has many widespread implications from
> > faster rendering of client-side software rasterisers (chromium),
> > mitigation of stalls due to read back (firefox) and to faster pipelining
> > of texture data (such as pixel buffer objects in GL or data blobs in CL).
> > 
> > v2: Compile with CONFIG_MMU_NOTIFIER
> > v3: We can sleep while performing invalidate-range, which we can utilise
> > to drop our page references prior to the kernel manipulating the vma
> > (for either discard or cloning) and so protect normal users.
> > v4: Only run the invalidate notifier if the range intercepts the bo.
> > v5: Prevent userspace from attempting to GTT mmap non-page aligned buffers
> > v6: Recheck after reacquire mutex for lost mmu.
> > v7: Fix implicit padding of ioctl struct by rounding to next 64bit boundary.
> > v8: Fix rebasing error after forwarding porting the back port.
> > v9: Limit the userptr to page aligned entries. We now expect userspace
> >     to handle all the offset-in-page adjustments itself.
> > v10: Prevent vma from being copied across fork to avoid issues with cow.
> > v11: Drop vma behaviour changes -- locking is nigh on impossible.
> >      Use a worker to load user pages to avoid lock inversions.
> > v12: Use get_task_mm()/mmput() for correct refcounting of mm.
> > v13: Use a worker to release the mmu_notifier to avoid lock inversion
> > v14: Decouple mmu_notifier from struct_mutex using a custom mmu_notifer
> >      with its own locking and tree of objects for each mm/mmu_notifier.
> > v15: Prevent overlapping userptr objects, and invalidate all objects
> >      within the mmu_notifier range
> > v16: Fix a typo for iterating over multiple objects in the range and
> >      rearrange error path to destroy the mmu_notifier locklessly.
> >      Also close a race between invalidate_range and the get_pages_worker.
> > v17: Close a race between get_pages_worker/invalidate_range and fresh
> >      allocations of the same userptr range - and notice that
> >      struct_mutex was presumed to be held when during creation it wasn't.
> > v18: Sigh. Fix the refactor of st_set_pages() to allocate enough memory
> >      for the struct sg_table and to clear it before reporting an error.
> > v19: Always error out on read-only userptr requests as we don't have the
> >      hardware infrastructure to support them at the moment.
> > v20: Refuse to implement read-only support until we have the required
> >      infrastructure - but reserve the bit in flags for future use.
> > v21: use_mm() is not required for get_user_pages(). It is only meant to
> >      be used to fix up the kernel thread's current->mm for use with
> >      copy_user().
> > v22: Use sg_alloc_table_from_pages for that chunky feeling
> > v23: Export a function for sanity checking dma-buf rather than encode
> >      userptr details elsewhere, and clean up comments based on
> >      suggestions by Bradley.
> > 
> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > Cc: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com>
> > Cc: "Gong, Zhipeng" <zhipeng.gong@intel.com>
> > Cc: Akash Goel <akash.goel@intel.com>
> > Cc: "Volkin, Bradley D" <bradley.d.volkin@intel.com>
> > Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com>

Bring on the champagne!

Queued for -next, thanks for the patch.
-Daniel

> > ---
> >  drivers/gpu/drm/i915/Kconfig            |   1 +
> >  drivers/gpu/drm/i915/Makefile           |   1 +
> >  drivers/gpu/drm/i915/i915_dma.c         |   1 +
> >  drivers/gpu/drm/i915/i915_drv.h         |  25 +-
> >  drivers/gpu/drm/i915/i915_gem.c         |   4 +
> >  drivers/gpu/drm/i915/i915_gem_dmabuf.c  |   8 +
> >  drivers/gpu/drm/i915/i915_gem_userptr.c | 711 ++++++++++++++++++++++++++++++++
> >  drivers/gpu/drm/i915/i915_gpu_error.c   |   2 +
> >  include/uapi/drm/i915_drm.h             |  16 +
> >  9 files changed, 768 insertions(+), 1 deletion(-)
> >  create mode 100644 drivers/gpu/drm/i915/i915_gem_userptr.c
> > 
> > diff --git a/drivers/gpu/drm/i915/Kconfig b/drivers/gpu/drm/i915/Kconfig
> > index e4e3c01b8cbc..437e1824d0bf 100644
> > --- a/drivers/gpu/drm/i915/Kconfig
> > +++ b/drivers/gpu/drm/i915/Kconfig
> > @@ -5,6 +5,7 @@ config DRM_I915
> >  	depends on (AGP || AGP=n)
> >  	select INTEL_GTT
> >  	select AGP_INTEL if AGP
> > +	select INTERVAL_TREE
> >  	# we need shmfs for the swappable backing store, and in particular
> >  	# the shmem_readpage() which depends upon tmpfs
> >  	select SHMEM
> > diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
> > index b6ce5640d592..fa9e806259ba 100644
> > --- a/drivers/gpu/drm/i915/Makefile
> > +++ b/drivers/gpu/drm/i915/Makefile
> > @@ -28,6 +28,7 @@ i915-y += i915_cmd_parser.o \
> >  	  i915_gem.o \
> >  	  i915_gem_stolen.o \
> >  	  i915_gem_tiling.o \
> > +	  i915_gem_userptr.o \
> >  	  i915_gpu_error.o \
> >  	  i915_irq.o \
> >  	  i915_trace_points.o \
> > diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
> > index 967c9376856b..307dc2635a9a 100644
> > --- a/drivers/gpu/drm/i915/i915_dma.c
> > +++ b/drivers/gpu/drm/i915/i915_dma.c
> > @@ -1982,6 +1982,7 @@ const struct drm_ioctl_desc i915_ioctls[] = {
> >  	DRM_IOCTL_DEF_DRV(I915_REG_READ, i915_reg_read_ioctl, DRM_UNLOCKED|DRM_RENDER_ALLOW),
> >  	DRM_IOCTL_DEF_DRV(I915_GET_RESET_STATS, i915_get_reset_stats_ioctl, DRM_UNLOCKED|DRM_RENDER_ALLOW),
> >  	DRM_IOCTL_DEF_DRV(I915_GEM_CREATE2, i915_gem_create2_ioctl, DRM_UNLOCKED|DRM_RENDER_ALLOW),
> > +	DRM_IOCTL_DEF_DRV(I915_GEM_USERPTR, i915_gem_userptr_ioctl, DRM_UNLOCKED|DRM_RENDER_ALLOW),
> >  };
> >  
> >  int i915_max_ioctl = DRM_ARRAY_SIZE(i915_ioctls);
> > diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> > index 52249cfb1e25..87cecd36c176 100644
> > --- a/drivers/gpu/drm/i915/i915_drv.h
> > +++ b/drivers/gpu/drm/i915/i915_drv.h
> > @@ -41,6 +41,7 @@
> >  #include <linux/i2c-algo-bit.h>
> >  #include <drm/intel-gtt.h>
> >  #include <linux/backlight.h>
> > +#include <linux/hashtable.h>
> >  #include <linux/intel-iommu.h>
> >  #include <linux/kref.h>
> >  #include <linux/pm_qos.h>
> > @@ -178,6 +179,7 @@ enum hpd_pin {
> >  		if ((intel_connector)->base.encoder == (__encoder))
> >  
> >  struct drm_i915_private;
> > +struct i915_mmu_object;
> >  
> >  enum intel_dpll_id {
> >  	DPLL_ID_PRIVATE = -1, /* non-shared dpll in use */
> > @@ -403,6 +405,7 @@ struct drm_i915_error_state {
> >  		u32 tiling:2;
> >  		u32 dirty:1;
> >  		u32 purgeable:1;
> > +		u32 userptr:1;
> >  		s32 ring:4;
> >  		u32 cache_level:3;
> >  	} **active_bo, **pinned_bo;
> > @@ -1447,6 +1450,9 @@ struct drm_i915_private {
> >  	struct i915_gtt gtt; /* VM representing the global address space */
> >  
> >  	struct i915_gem_mm mm;
> > +#if defined(CONFIG_MMU_NOTIFIER)
> > +	DECLARE_HASHTABLE(mmu_notifiers, 7);
> > +#endif
> >  
> >  	/* Kernel Modesetting */
> >  
> > @@ -1580,6 +1586,8 @@ struct drm_i915_gem_object_ops {
> >  	 */
> >  	int (*get_pages)(struct drm_i915_gem_object *);
> >  	void (*put_pages)(struct drm_i915_gem_object *);
> > +	int (*dmabuf_export)(struct drm_i915_gem_object *);
> > +	void (*release)(struct drm_i915_gem_object *);
> >  };
> >  
> >  struct drm_i915_gem_object {
> > @@ -1693,8 +1701,20 @@ struct drm_i915_gem_object {
> >  
> >  	/** for phy allocated objects */
> >  	struct drm_i915_gem_phys_object *phys_obj;
> > -};
> >  
> > +	union {
> > +		struct i915_gem_userptr {
> > +			uintptr_t ptr;
> > +			unsigned read_only :1;
> > +			unsigned workers :4;
> > +#define I915_GEM_USERPTR_MAX_WORKERS 15
> > +
> > +			struct mm_struct *mm;
> > +			struct i915_mmu_object *mn;
> > +			struct work_struct *work;
> > +		} userptr;
> > +	};
> > +};
> >  #define to_intel_bo(x) container_of(x, struct drm_i915_gem_object, base)
> >  
> >  /**
> > @@ -2124,6 +2144,9 @@ int i915_gem_set_tiling_ioctl(struct drm_device *dev, void *data,
> >  			      struct drm_file *file_priv);
> >  int i915_gem_get_tiling_ioctl(struct drm_device *dev, void *data,
> >  			      struct drm_file *file_priv);
> > +int i915_gem_init_userptr(struct drm_device *dev);
> > +int i915_gem_userptr_ioctl(struct drm_device *dev, void *data,
> > +			   struct drm_file *file);
> >  int i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data,
> >  				struct drm_file *file_priv);
> >  int i915_gem_wait_ioctl(struct drm_device *dev, void *data,
> > diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> > index c84a0101c1c4..704e470bc3be 100644
> > --- a/drivers/gpu/drm/i915/i915_gem.c
> > +++ b/drivers/gpu/drm/i915/i915_gem.c
> > @@ -4460,6 +4460,9 @@ void i915_gem_free_object(struct drm_gem_object *gem_obj)
> >  	if (obj->base.import_attach)
> >  		drm_prime_gem_destroy(&obj->base, NULL);
> >  
> > +	if (obj->ops->release)
> > +		obj->ops->release(obj);
> > +
> >  	drm_gem_object_release(&obj->base);
> >  	i915_gem_info_remove_obj(dev_priv, obj->base.size);
> >  
> > @@ -4739,6 +4742,7 @@ int i915_gem_init(struct drm_device *dev)
> >  			DRM_DEBUG_DRIVER("allow wake ack timed out\n");
> >  	}
> >  
> > +	i915_gem_init_userptr(dev);
> >  	i915_gem_init_global_gtt(dev);
> >  
> >  	ret = i915_gem_context_init(dev);
> > diff --git a/drivers/gpu/drm/i915/i915_gem_dmabuf.c b/drivers/gpu/drm/i915/i915_gem_dmabuf.c
> > index 321102a8374b..580aa42443ed 100644
> > --- a/drivers/gpu/drm/i915/i915_gem_dmabuf.c
> > +++ b/drivers/gpu/drm/i915/i915_gem_dmabuf.c
> > @@ -229,6 +229,14 @@ static const struct dma_buf_ops i915_dmabuf_ops =  {
> >  struct dma_buf *i915_gem_prime_export(struct drm_device *dev,
> >  				      struct drm_gem_object *gem_obj, int flags)
> >  {
> > +	struct drm_i915_gem_object *obj = to_intel_bo(gem_obj);
> > +
> > +	if (obj->ops->dmabuf_export) {
> > +		int ret = obj->ops->dmabuf_export(obj);
> > +		if (ret)
> > +			return ERR_PTR(ret);
> > +	}
> > +
> >  	return dma_buf_export(gem_obj, &i915_dmabuf_ops, gem_obj->size, flags);
> >  }
> >  
> > diff --git a/drivers/gpu/drm/i915/i915_gem_userptr.c b/drivers/gpu/drm/i915/i915_gem_userptr.c
> > new file mode 100644
> > index 000000000000..21ea92886a56
> > --- /dev/null
> > +++ b/drivers/gpu/drm/i915/i915_gem_userptr.c
> > @@ -0,0 +1,711 @@
> > +/*
> > + * Copyright © 2012-2014 Intel Corporation
> > + *
> > + * Permission is hereby granted, free of charge, to any person obtaining a
> > + * copy of this software and associated documentation files (the "Software"),
> > + * to deal in the Software without restriction, including without limitation
> > + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
> > + * and/or sell copies of the Software, and to permit persons to whom the
> > + * Software is furnished to do so, subject to the following conditions:
> > + *
> > + * The above copyright notice and this permission notice (including the next
> > + * paragraph) shall be included in all copies or substantial portions of the
> > + * Software.
> > + *
> > + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> > + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> > + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
> > + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
> > + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
> > + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
> > + * IN THE SOFTWARE.
> > + *
> > + */
> > +
> > +#include "drmP.h"
> > +#include "i915_drm.h"
> > +#include "i915_drv.h"
> > +#include "i915_trace.h"
> > +#include "intel_drv.h"
> > +#include <linux/mmu_context.h>
> > +#include <linux/mmu_notifier.h>
> > +#include <linux/mempolicy.h>
> > +#include <linux/swap.h>
> > +
> > +#if defined(CONFIG_MMU_NOTIFIER)
> > +#include <linux/interval_tree.h>
> > +
> > +struct i915_mmu_notifier {
> > +	spinlock_t lock;
> > +	struct hlist_node node;
> > +	struct mmu_notifier mn;
> > +	struct rb_root objects;
> > +	struct drm_device *dev;
> > +	struct mm_struct *mm;
> > +	struct work_struct work;
> > +	unsigned long count;
> > +	unsigned long serial;
> > +};
> > +
> > +struct i915_mmu_object {
> > +	struct i915_mmu_notifier *mmu;
> > +	struct interval_tree_node it;
> > +	struct drm_i915_gem_object *obj;
> > +};
> > +
> > +static void i915_gem_userptr_mn_invalidate_range_start(struct mmu_notifier *_mn,
> > +						       struct mm_struct *mm,
> > +						       unsigned long start,
> > +						       unsigned long end)
> > +{
> > +	struct i915_mmu_notifier *mn = container_of(_mn, struct i915_mmu_notifier, mn);
> > +	struct interval_tree_node *it = NULL;
> > +	unsigned long serial = 0;
> > +
> > +	end--; /* interval ranges are inclusive, but invalidate range is exclusive */
> > +	while (start < end) {
> > +		struct drm_i915_gem_object *obj;
> > +
> > +		obj = NULL;
> > +		spin_lock(&mn->lock);
> > +		if (serial == mn->serial)
> > +			it = interval_tree_iter_next(it, start, end);
> > +		else
> > +			it = interval_tree_iter_first(&mn->objects, start, end);
> > +		if (it != NULL) {
> > +			obj = container_of(it, struct i915_mmu_object, it)->obj;
> > +			drm_gem_object_reference(&obj->base);
> > +			serial = mn->serial;
> > +		}
> > +		spin_unlock(&mn->lock);
> > +		if (obj == NULL)
> > +			return;
> > +
> > +		mutex_lock(&mn->dev->struct_mutex);
> > +		/* Cancel any active worker and force us to re-evaluate gup */
> > +		obj->userptr.work = NULL;
> > +
> > +		if (obj->pages != NULL) {
> > +			struct drm_i915_private *dev_priv = to_i915(mn->dev);
> > +			struct i915_vma *vma, *tmp;
> > +			bool was_interruptible;
> > +
> > +			was_interruptible = dev_priv->mm.interruptible;
> > +			dev_priv->mm.interruptible = false;
> > +
> > +			list_for_each_entry_safe(vma, tmp, &obj->vma_list, vma_link) {
> > +				int ret = i915_vma_unbind(vma);
> > +				WARN_ON(ret && ret != -EIO);
> > +			}
> > +			WARN_ON(i915_gem_object_put_pages(obj));
> > +
> > +			dev_priv->mm.interruptible = was_interruptible;
> > +		}
> > +
> > +		start = obj->userptr.ptr + obj->base.size;
> > +
> > +		drm_gem_object_unreference(&obj->base);
> > +		mutex_unlock(&mn->dev->struct_mutex);
> > +	}
> > +}
> > +
> > +static const struct mmu_notifier_ops i915_gem_userptr_notifier = {
> > +	.invalidate_range_start = i915_gem_userptr_mn_invalidate_range_start,
> > +};
> > +
> > +static struct i915_mmu_notifier *
> > +__i915_mmu_notifier_lookup(struct drm_device *dev, struct mm_struct *mm)
> > +{
> > +	struct drm_i915_private *dev_priv = to_i915(dev);
> > +	struct i915_mmu_notifier *mmu;
> > +
> > +	/* Protected by dev->struct_mutex */
> > +	hash_for_each_possible(dev_priv->mmu_notifiers, mmu, node, (unsigned long)mm)
> > +		if (mmu->mm == mm)
> > +			return mmu;
> > +
> > +	return NULL;
> > +}
> > +
> > +static struct i915_mmu_notifier *
> > +i915_mmu_notifier_get(struct drm_device *dev, struct mm_struct *mm)
> > +{
> > +	struct drm_i915_private *dev_priv = to_i915(dev);
> > +	struct i915_mmu_notifier *mmu;
> > +	int ret;
> > +
> > +	lockdep_assert_held(&dev->struct_mutex);
> > +
> > +	mmu = __i915_mmu_notifier_lookup(dev, mm);
> > +	if (mmu)
> > +		return mmu;
> > +
> > +	mmu = kmalloc(sizeof(*mmu), GFP_KERNEL);
> > +	if (mmu == NULL)
> > +		return ERR_PTR(-ENOMEM);
> > +
> > +	spin_lock_init(&mmu->lock);
> > +	mmu->dev = dev;
> > +	mmu->mn.ops = &i915_gem_userptr_notifier;
> > +	mmu->mm = mm;
> > +	mmu->objects = RB_ROOT;
> > +	mmu->count = 0;
> > +	mmu->serial = 0;
> > +
> > +	/* Protected by mmap_sem (write-lock) */
> > +	ret = __mmu_notifier_register(&mmu->mn, mm);
> > +	if (ret) {
> > +		kfree(mmu);
> > +		return ERR_PTR(ret);
> > +	}
> > +
> > +	/* Protected by dev->struct_mutex */
> > +	hash_add(dev_priv->mmu_notifiers, &mmu->node, (unsigned long)mm);
> > +	return mmu;
> > +}
> > +
> > +static void
> > +__i915_mmu_notifier_destroy_worker(struct work_struct *work)
> > +{
> > +	struct i915_mmu_notifier *mmu = container_of(work, typeof(*mmu), work);
> > +	mmu_notifier_unregister(&mmu->mn, mmu->mm);
> > +	kfree(mmu);
> > +}
> > +
> > +static void
> > +__i915_mmu_notifier_destroy(struct i915_mmu_notifier *mmu)
> > +{
> > +	lockdep_assert_held(&mmu->dev->struct_mutex);
> > +
> > +	/* Protected by dev->struct_mutex */
> > +	hash_del(&mmu->node);
> > +
> > +	/* Our lock ordering is: mmap_sem, mmu_notifier_scru, struct_mutex.
> > +	 * We enter the function holding struct_mutex, therefore we need
> > +	 * to drop our mutex prior to calling mmu_notifier_unregister in
> > +	 * order to prevent lock inversion (and system-wide deadlock)
> > +	 * between the mmap_sem and struct-mutex. Hence we defer the
> > +	 * unregistration to a workqueue where we hold no locks.
> > +	 */
> > +	INIT_WORK(&mmu->work, __i915_mmu_notifier_destroy_worker);
> > +	schedule_work(&mmu->work);
> > +}
> > +
> > +static void __i915_mmu_notifier_update_serial(struct i915_mmu_notifier *mmu)
> > +{
> > +	if (++mmu->serial == 0)
> > +		mmu->serial = 1;
> > +}
> > +
> > +static void
> > +i915_mmu_notifier_del(struct i915_mmu_notifier *mmu,
> > +		      struct i915_mmu_object *mn)
> > +{
> > +	lockdep_assert_held(&mmu->dev->struct_mutex);
> > +
> > +	spin_lock(&mmu->lock);
> > +	interval_tree_remove(&mn->it, &mmu->objects);
> > +	__i915_mmu_notifier_update_serial(mmu);
> > +	spin_unlock(&mmu->lock);
> > +
> > +	/* Protected against _add() by dev->struct_mutex */
> > +	if (--mmu->count == 0)
> > +		__i915_mmu_notifier_destroy(mmu);
> > +}
> > +
> > +static int
> > +i915_mmu_notifier_add(struct i915_mmu_notifier *mmu,
> > +		      struct i915_mmu_object *mn)
> > +{
> > +	struct interval_tree_node *it;
> > +	int ret;
> > +
> > +	ret = i915_mutex_lock_interruptible(mmu->dev);
> > +	if (ret)
> > +		return ret;
> > +
> > +	/* Make sure we drop the final active reference (and thereby
> > +	 * remove the objects from the interval tree) before we do
> > +	 * the check for overlapping objects.
> > +	 */
> > +	i915_gem_retire_requests(mmu->dev);
> > +
> > +	/* Disallow overlapping userptr objects */
> > +	spin_lock(&mmu->lock);
> > +	it = interval_tree_iter_first(&mmu->objects,
> > +				      mn->it.start, mn->it.last);
> > +	if (it) {
> > +		struct drm_i915_gem_object *obj;
> > +
> > +		/* We only need to check the first object in the range as it
> > +		 * either has cancelled gup work queued and we need to
> > +		 * return back to the user to give time for the gup-workers
> > +		 * to flush their object references upon which the object will
> > +		 * be removed from the interval-tree, or the the range is
> > +		 * still in use by another client and the overlap is invalid.
> > +		 */
> > +
> > +		obj = container_of(it, struct i915_mmu_object, it)->obj;
> > +		ret = obj->userptr.workers ? -EAGAIN : -EINVAL;
> > +	} else {
> > +		interval_tree_insert(&mn->it, &mmu->objects);
> > +		__i915_mmu_notifier_update_serial(mmu);
> > +		ret = 0;
> > +	}
> > +	spin_unlock(&mmu->lock);
> > +	mutex_unlock(&mmu->dev->struct_mutex);
> > +
> > +	return ret;
> > +}
> > +
> > +static void
> > +i915_gem_userptr_release__mmu_notifier(struct drm_i915_gem_object *obj)
> > +{
> > +	struct i915_mmu_object *mn;
> > +
> > +	mn = obj->userptr.mn;
> > +	if (mn == NULL)
> > +		return;
> > +
> > +	i915_mmu_notifier_del(mn->mmu, mn);
> > +	obj->userptr.mn = NULL;
> > +}
> > +
> > +static int
> > +i915_gem_userptr_init__mmu_notifier(struct drm_i915_gem_object *obj,
> > +				    unsigned flags)
> > +{
> > +	struct i915_mmu_notifier *mmu;
> > +	struct i915_mmu_object *mn;
> > +	int ret;
> > +
> > +	if (flags & I915_USERPTR_UNSYNCHRONIZED)
> > +		return capable(CAP_SYS_ADMIN) ? 0 : -EPERM;
> > +
> > +	down_write(&obj->userptr.mm->mmap_sem);
> > +	ret = i915_mutex_lock_interruptible(obj->base.dev);
> > +	if (ret == 0) {
> > +		mmu = i915_mmu_notifier_get(obj->base.dev, obj->userptr.mm);
> > +		if (!IS_ERR(mmu))
> > +			mmu->count++; /* preemptive add to act as a refcount */
> > +		else
> > +			ret = PTR_ERR(mmu);
> > +		mutex_unlock(&obj->base.dev->struct_mutex);
> > +	}
> > +	up_write(&obj->userptr.mm->mmap_sem);
> > +	if (ret)
> > +		return ret;
> > +
> > +	mn = kzalloc(sizeof(*mn), GFP_KERNEL);
> > +	if (mn == NULL) {
> > +		ret = -ENOMEM;
> > +		goto destroy_mmu;
> > +	}
> > +
> > +	mn->mmu = mmu;
> > +	mn->it.start = obj->userptr.ptr;
> > +	mn->it.last = mn->it.start + obj->base.size - 1;
> > +	mn->obj = obj;
> > +
> > +	ret = i915_mmu_notifier_add(mmu, mn);
> > +	if (ret)
> > +		goto free_mn;
> > +
> > +	obj->userptr.mn = mn;
> > +	return 0;
> > +
> > +free_mn:
> > +	kfree(mn);
> > +destroy_mmu:
> > +	mutex_lock(&obj->base.dev->struct_mutex);
> > +	if (--mmu->count == 0)
> > +		__i915_mmu_notifier_destroy(mmu);
> > +	mutex_unlock(&obj->base.dev->struct_mutex);
> > +	return ret;
> > +}
> > +
> > +#else
> > +
> > +static void
> > +i915_gem_userptr_release__mmu_notifier(struct drm_i915_gem_object *obj)
> > +{
> > +}
> > +
> > +static int
> > +i915_gem_userptr_init__mmu_notifier(struct drm_i915_gem_object *obj,
> > +				    unsigned flags)
> > +{
> > +	if ((flags & I915_USERPTR_UNSYNCHRONIZED) == 0)
> > +		return -ENODEV;
> > +
> > +	if (!capable(CAP_SYS_ADMIN))
> > +		return -EPERM;
> > +
> > +	return 0;
> > +}
> > +#endif
> > +
> > +struct get_pages_work {
> > +	struct work_struct work;
> > +	struct drm_i915_gem_object *obj;
> > +	struct task_struct *task;
> > +};
> > +
> > +
> > +#if IS_ENABLED(CONFIG_SWIOTLB)
> > +#define swiotlb_active() swiotlb_nr_tbl()
> > +#else
> > +#define swiotlb_active() 0
> > +#endif
> > +
> > +static int
> > +st_set_pages(struct sg_table **st, struct page **pvec, int num_pages)
> > +{
> > +	struct scatterlist *sg;
> > +	int ret, n;
> > +
> > +	*st = kmalloc(sizeof(**st), GFP_KERNEL);
> > +	if (*st == NULL)
> > +		return -ENOMEM;
> > +
> > +	if (swiotlb_active()) {
> > +		ret = sg_alloc_table(*st, num_pages, GFP_KERNEL);
> > +		if (ret)
> > +			goto err;
> > +
> > +		for_each_sg((*st)->sgl, sg, num_pages, n)
> > +			sg_set_page(sg, pvec[n], PAGE_SIZE, 0);
> > +	} else {
> > +		ret = sg_alloc_table_from_pages(*st, pvec, num_pages,
> > +						0, num_pages << PAGE_SHIFT,
> > +						GFP_KERNEL);
> > +		if (ret)
> > +			goto err;
> > +	}
> > +
> > +	return 0;
> > +
> > +err:
> > +	kfree(*st);
> > +	*st = NULL;
> > +	return ret;
> > +}
> > +
> > +static void
> > +__i915_gem_userptr_get_pages_worker(struct work_struct *_work)
> > +{
> > +	struct get_pages_work *work = container_of(_work, typeof(*work), work);
> > +	struct drm_i915_gem_object *obj = work->obj;
> > +	struct drm_device *dev = obj->base.dev;
> > +	const int num_pages = obj->base.size >> PAGE_SHIFT;
> > +	struct page **pvec;
> > +	int pinned, ret;
> > +
> > +	ret = -ENOMEM;
> > +	pinned = 0;
> > +
> > +	pvec = kmalloc(num_pages*sizeof(struct page *),
> > +		       GFP_TEMPORARY | __GFP_NOWARN | __GFP_NORETRY);
> > +	if (pvec == NULL)
> > +		pvec = drm_malloc_ab(num_pages, sizeof(struct page *));
> > +	if (pvec != NULL) {
> > +		struct mm_struct *mm = obj->userptr.mm;
> > +
> > +		down_read(&mm->mmap_sem);
> > +		while (pinned < num_pages) {
> > +			ret = get_user_pages(work->task, mm,
> > +					     obj->userptr.ptr + pinned * PAGE_SIZE,
> > +					     num_pages - pinned,
> > +					     !obj->userptr.read_only, 0,
> > +					     pvec + pinned, NULL);
> > +			if (ret < 0)
> > +				break;
> > +
> > +			pinned += ret;
> > +		}
> > +		up_read(&mm->mmap_sem);
> > +	}
> > +
> > +	mutex_lock(&dev->struct_mutex);
> > +	if (obj->userptr.work != &work->work) {
> > +		ret = 0;
> > +	} else if (pinned == num_pages) {
> > +		ret = st_set_pages(&obj->pages, pvec, num_pages);
> > +		if (ret == 0) {
> > +			list_add_tail(&obj->global_list, &to_i915(dev)->mm.unbound_list);
> > +			pinned = 0;
> > +		}
> > +	}
> > +
> > +	obj->userptr.work = ERR_PTR(ret);
> > +	obj->userptr.workers--;
> > +	drm_gem_object_unreference(&obj->base);
> > +	mutex_unlock(&dev->struct_mutex);
> > +
> > +	release_pages(pvec, pinned, 0);
> > +	drm_free_large(pvec);
> > +
> > +	put_task_struct(work->task);
> > +	kfree(work);
> > +}
> > +
> > +static int
> > +i915_gem_userptr_get_pages(struct drm_i915_gem_object *obj)
> > +{
> > +	const int num_pages = obj->base.size >> PAGE_SHIFT;
> > +	struct page **pvec;
> > +	int pinned, ret;
> > +
> > +	/* If userspace should engineer that these pages are replaced in
> > +	 * the vma between us binding this page into the GTT and completion
> > +	 * of rendering... Their loss. If they change the mapping of their
> > +	 * pages they need to create a new bo to point to the new vma.
> > +	 *
> > +	 * However, that still leaves open the possibility of the vma
> > +	 * being copied upon fork. Which falls under the same userspace
> > +	 * synchronisation issue as a regular bo, except that this time
> > +	 * the process may not be expecting that a particular piece of
> > +	 * memory is tied to the GPU.
> > +	 *
> > +	 * Fortunately, we can hook into the mmu_notifier in order to
> > +	 * discard the page references prior to anything nasty happening
> > +	 * to the vma (discard or cloning) which should prevent the more
> > +	 * egregious cases from causing harm.
> > +	 */
> > +
> > +	pvec = NULL;
> > +	pinned = 0;
> > +	if (obj->userptr.mm == current->mm) {
> > +		pvec = kmalloc(num_pages*sizeof(struct page *),
> > +			       GFP_TEMPORARY | __GFP_NOWARN | __GFP_NORETRY);
> > +		if (pvec == NULL) {
> > +			pvec = drm_malloc_ab(num_pages, sizeof(struct page *));
> > +			if (pvec == NULL)
> > +				return -ENOMEM;
> > +		}
> > +
> > +		pinned = __get_user_pages_fast(obj->userptr.ptr, num_pages,
> > +					       !obj->userptr.read_only, pvec);
> > +	}
> > +	if (pinned < num_pages) {
> > +		if (pinned < 0) {
> > +			ret = pinned;
> > +			pinned = 0;
> > +		} else {
> > +			/* Spawn a worker so that we can acquire the
> > +			 * user pages without holding our mutex. Access
> > +			 * to the user pages requires mmap_sem, and we have
> > +			 * a strict lock ordering of mmap_sem, struct_mutex -
> > +			 * we already hold struct_mutex here and so cannot
> > +			 * call gup without encountering a lock inversion.
> > +			 *
> > +			 * Userspace will keep on repeating the operation
> > +			 * (thanks to EAGAIN) until either we hit the fast
> > +			 * path or the worker completes. If the worker is
> > +			 * cancelled or superseded, the task is still run
> > +			 * but the results ignored. (This leads to
> > +			 * complications that we may have a stray object
> > +			 * refcount that we need to be wary of when
> > +			 * checking for existing objects during creation.)
> > +			 * If the worker encounters an error, it reports
> > +			 * that error back to this function through
> > +			 * obj->userptr.work = ERR_PTR.
> > +			 */
> > +			ret = -EAGAIN;
> > +			if (obj->userptr.work == NULL &&
> > +			    obj->userptr.workers < I915_GEM_USERPTR_MAX_WORKERS) {
> > +				struct get_pages_work *work;
> > +
> > +				work = kmalloc(sizeof(*work), GFP_KERNEL);
> > +				if (work != NULL) {
> > +					obj->userptr.work = &work->work;
> > +					obj->userptr.workers++;
> > +
> > +					work->obj = obj;
> > +					drm_gem_object_reference(&obj->base);
> > +
> > +					work->task = current;
> > +					get_task_struct(work->task);
> > +
> > +					INIT_WORK(&work->work, __i915_gem_userptr_get_pages_worker);
> > +					schedule_work(&work->work);
> > +				} else
> > +					ret = -ENOMEM;
> > +			} else {
> > +				if (IS_ERR(obj->userptr.work)) {
> > +					ret = PTR_ERR(obj->userptr.work);
> > +					obj->userptr.work = NULL;
> > +				}
> > +			}
> > +		}
> > +	} else {
> > +		ret = st_set_pages(&obj->pages, pvec, num_pages);
> > +		if (ret == 0) {
> > +			obj->userptr.work = NULL;
> > +			pinned = 0;
> > +		}
> > +	}
> > +
> > +	release_pages(pvec, pinned, 0);
> > +	drm_free_large(pvec);
> > +	return ret;
> > +}
> > +
> > +static void
> > +i915_gem_userptr_put_pages(struct drm_i915_gem_object *obj)
> > +{
> > +	struct scatterlist *sg;
> > +	int i;
> > +
> > +	BUG_ON(obj->userptr.work != NULL);
> > +
> > +	if (obj->madv != I915_MADV_WILLNEED)
> > +		obj->dirty = 0;
> > +
> > +	for_each_sg(obj->pages->sgl, sg, obj->pages->nents, i) {
> > +		struct page *page = sg_page(sg);
> > +
> > +		if (obj->dirty)
> > +			set_page_dirty(page);
> > +
> > +		mark_page_accessed(page);
> > +		page_cache_release(page);
> > +	}
> > +	obj->dirty = 0;
> > +
> > +	sg_free_table(obj->pages);
> > +	kfree(obj->pages);
> > +}
> > +
> > +static void
> > +i915_gem_userptr_release(struct drm_i915_gem_object *obj)
> > +{
> > +	i915_gem_userptr_release__mmu_notifier(obj);
> > +
> > +	if (obj->userptr.mm) {
> > +		mmput(obj->userptr.mm);
> > +		obj->userptr.mm = NULL;
> > +	}
> > +}
> > +
> > +static int
> > +i915_gem_userptr_dmabuf_export(struct drm_i915_gem_object *obj)
> > +{
> > +	if (obj->userptr.mn)
> > +		return 0;
> > +
> > +	return i915_gem_userptr_init__mmu_notifier(obj, 0);
> > +}
> > +
> > +static const struct drm_i915_gem_object_ops i915_gem_userptr_ops = {
> > +	.dmabuf_export = i915_gem_userptr_dmabuf_export,
> > +	.get_pages = i915_gem_userptr_get_pages,
> > +	.put_pages = i915_gem_userptr_put_pages,
> > +	.release = i915_gem_userptr_release,
> > +};
> > +
> > +/**
> > + * Creates a new mm object that wraps some normal memory from the process
> > + * context - user memory.
> > + *
> > + * We impose several restrictions upon the memory being mapped
> > + * into the GPU.
> > + * 1. It must be page aligned (both start/end addresses, i.e ptr and size).
> > + * 2. It cannot overlap any other userptr object in the same address space.
> > + * 3. It must be normal system memory, not a pointer into another map of IO
> > + *    space (e.g. it must not be a GTT mmapping of another object).
> > + * 4. We only allow a bo as large as we could in theory map into the GTT,
> > + *    that is we limit the size to the total size of the GTT.
> > + * 5. The bo is marked as being snoopable. The backing pages are left
> > + *    accessible directly by the CPU, but reads and writes by the GPU may
> > + *    incur the cost of a snoop (unless you have an LLC architecture).
> > + *
> > + * Synchronisation between multiple users and the GPU is left to userspace
> > + * through the normal set-domain-ioctl. The kernel will enforce that the
> > + * GPU relinquishes the VMA before it is returned back to the system
> > + * i.e. upon free(), munmap() or process termination. However, the userspace
> > + * malloc() library may not immediately relinquish the VMA after free() and
> > + * instead reuse it whilst the GPU is still reading and writing to the VMA.
> > + * Caveat emptor.
> > + *
> > + * Also note, that the object created here is not currently a "first class"
> > + * object, in that several ioctls are banned. These are the CPU access
> > + * ioctls: mmap(), pwrite and pread. In practice, you are expected to use
> > + * direct access via your pointer rather than use those ioctls.
> > + *
> > + * If you think this is a good interface to use to pass GPU memory between
> > + * drivers, please use dma-buf instead. In fact, wherever possible use
> > + * dma-buf instead.
> > + */
> > +int
> > +i915_gem_userptr_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
> > +{
> > +	struct drm_i915_private *dev_priv = dev->dev_private;
> > +	struct drm_i915_gem_userptr *args = data;
> > +	struct drm_i915_gem_object *obj;
> > +	int ret;
> > +	u32 handle;
> > +
> > +	if (args->flags & ~(I915_USERPTR_READ_ONLY |
> > +			    I915_USERPTR_UNSYNCHRONIZED))
> > +		return -EINVAL;
> > +
> > +	if (offset_in_page(args->user_ptr | args->user_size))
> > +		return -EINVAL;
> > +
> > +	if (args->user_size > dev_priv->gtt.base.total)
> > +		return -E2BIG;
> > +
> > +	if (!access_ok(args->flags & I915_USERPTR_READ_ONLY ? VERIFY_READ : VERIFY_WRITE,
> > +		       (char __user *)(unsigned long)args->user_ptr, args->user_size))
> > +		return -EFAULT;
> > +
> > +	if (args->flags & I915_USERPTR_READ_ONLY) {
> > +		/* On almost all of the current hw, we cannot tell the GPU that a
> > +		 * page is readonly, so this is just a placeholder in the uAPI.
> > +		 */
> > +		return -ENODEV;
> > +	}
> > +
> > +	/* Allocate the new object */
> > +	obj = i915_gem_object_alloc(dev);
> > +	if (obj == NULL)
> > +		return -ENOMEM;
> > +
> > +	drm_gem_private_object_init(dev, &obj->base, args->user_size);
> > +	i915_gem_object_init(obj, &i915_gem_userptr_ops);
> > +	obj->cache_level = I915_CACHE_LLC;
> > +	obj->base.write_domain = I915_GEM_DOMAIN_CPU;
> > +	obj->base.read_domains = I915_GEM_DOMAIN_CPU;
> > +
> > +	obj->userptr.ptr = args->user_ptr;
> > +	obj->userptr.read_only = !!(args->flags & I915_USERPTR_READ_ONLY);
> > +
> > +	/* And keep a pointer to the current->mm for resolving the user pages
> > +	 * at binding. This means that we need to hook into the mmu_notifier
> > +	 * in order to detect if the mmu is destroyed.
> > +	 */
> > +	ret = -ENOMEM;
> > +	if ((obj->userptr.mm = get_task_mm(current)))
> > +		ret = i915_gem_userptr_init__mmu_notifier(obj, args->flags);
> > +	if (ret == 0)
> > +		ret = drm_gem_handle_create(file, &obj->base, &handle);
> > +
> > +	/* drop reference from allocate - handle holds it now */
> > +	drm_gem_object_unreference_unlocked(&obj->base);
> > +	if (ret)
> > +		return ret;
> > +
> > +	args->handle = handle;
> > +	return 0;
> > +}
> > +
> > +int
> > +i915_gem_init_userptr(struct drm_device *dev)
> > +{
> > +#if defined(CONFIG_MMU_NOTIFIER)
> > +	struct drm_i915_private *dev_priv = to_i915(dev);
> > +	hash_init(dev_priv->mmu_notifiers);
> > +#endif
> > +	return 0;
> > +}
> > diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
> > index 2d819858c19b..dde1b0c2ff31 100644
> > --- a/drivers/gpu/drm/i915/i915_gpu_error.c
> > +++ b/drivers/gpu/drm/i915/i915_gpu_error.c
> > @@ -205,6 +205,7 @@ static void print_error_buffers(struct drm_i915_error_state_buf *m,
> >  		err_puts(m, tiling_flag(err->tiling));
> >  		err_puts(m, dirty_flag(err->dirty));
> >  		err_puts(m, purgeable_flag(err->purgeable));
> > +		err_puts(m, err->userptr ? " userptr" : "");
> >  		err_puts(m, err->ring != -1 ? " " : "");
> >  		err_puts(m, ring_str(err->ring));
> >  		err_puts(m, i915_cache_level_str(err->cache_level));
> > @@ -641,6 +642,7 @@ static void capture_bo(struct drm_i915_error_buffer *err,
> >  	err->tiling = obj->tiling_mode;
> >  	err->dirty = obj->dirty;
> >  	err->purgeable = obj->madv != I915_MADV_WILLNEED;
> > +	err->userptr = obj->userptr.mm != 0;
> >  	err->ring = obj->ring ? obj->ring->id : -1;
> >  	err->cache_level = obj->cache_level;
> >  }
> > diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
> > index 4e0711e72cf8..7df2558d84c9 100644
> > --- a/include/uapi/drm/i915_drm.h
> > +++ b/include/uapi/drm/i915_drm.h
> > @@ -224,6 +224,7 @@ typedef struct _drm_i915_sarea {
> >  #define DRM_I915_REG_READ		0x31
> >  #define DRM_I915_GET_RESET_STATS	0x32
> >  #define DRM_I915_GEM_CREATE2		0x33
> > +#define DRM_I915_GEM_USERPTR		0x34
> >  
> >  #define DRM_IOCTL_I915_INIT		DRM_IOW( DRM_COMMAND_BASE + DRM_I915_INIT, drm_i915_init_t)
> >  #define DRM_IOCTL_I915_FLUSH		DRM_IO ( DRM_COMMAND_BASE + DRM_I915_FLUSH)
> > @@ -275,6 +276,7 @@ typedef struct _drm_i915_sarea {
> >  #define DRM_IOCTL_I915_GEM_CONTEXT_DESTROY	DRM_IOW (DRM_COMMAND_BASE + DRM_I915_GEM_CONTEXT_DESTROY, struct drm_i915_gem_context_destroy)
> >  #define DRM_IOCTL_I915_REG_READ			DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_REG_READ, struct drm_i915_reg_read)
> >  #define DRM_IOCTL_I915_GET_RESET_STATS		DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_GET_RESET_STATS, struct drm_i915_reset_stats)
> > +#define DRM_IOCTL_I915_GEM_USERPTR			DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_GEM_USERPTR, struct drm_i915_gem_userptr)
> >  
> >  /* Allow drivers to submit batchbuffers directly to hardware, relying
> >   * on the security mechanisms provided by hardware.
> > @@ -1158,4 +1160,18 @@ struct drm_i915_reset_stats {
> >  	__u32 pad;
> >  };
> >  
> > +struct drm_i915_gem_userptr {
> > +	__u64 user_ptr;
> > +	__u64 user_size;
> > +	__u32 flags;
> > +#define I915_USERPTR_READ_ONLY 0x1
> > +#define I915_USERPTR_UNSYNCHRONIZED 0x80000000
> > +	/**
> > +	 * Returned handle for the object.
> > +	 *
> > +	 * Object handles are nonzero.
> > +	 */
> > +	__u32 handle;
> > +};
> > +
> >  #endif /* _UAPI_I915_DRM_H_ */
> > -- 
> > 2.0.0.rc2
> > 
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/intel-gfx

-- 
Daniel Vetter
Software Engineer, Intel Corporation
+41 (0) 79 365 57 48 - http://blog.ffwll.ch

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH] drm/i915: Introduce mapping of user pages into video memory (userptr) ioctl
  2014-05-16 13:22 Chris Wilson
@ 2014-05-16 15:34 ` Volkin, Bradley D
  2014-05-16 16:39   ` Daniel Vetter
  0 siblings, 1 reply; 34+ messages in thread
From: Volkin, Bradley D @ 2014-05-16 15:34 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx, Akash Goel

Reviewed-by: Brad Volkin <bradley.d.volkin@intel.com> 

On Fri, May 16, 2014 at 02:22:37PM +0100, Chris Wilson wrote:
> By exporting the ability to map user address and inserting PTEs
> representing their backing pages into the GTT, we can exploit UMA in order
> to utilize normal application data as a texture source or even as a
> render target (depending upon the capabilities of the chipset). This has
> a number of uses, with zero-copy downloads to the GPU and efficient
> readback making the intermixed streaming of CPU and GPU operations
> fairly efficient. This ability has many widespread implications from
> faster rendering of client-side software rasterisers (chromium),
> mitigation of stalls due to read back (firefox) and to faster pipelining
> of texture data (such as pixel buffer objects in GL or data blobs in CL).
> 
> v2: Compile with CONFIG_MMU_NOTIFIER
> v3: We can sleep while performing invalidate-range, which we can utilise
> to drop our page references prior to the kernel manipulating the vma
> (for either discard or cloning) and so protect normal users.
> v4: Only run the invalidate notifier if the range intercepts the bo.
> v5: Prevent userspace from attempting to GTT mmap non-page aligned buffers
> v6: Recheck after reacquire mutex for lost mmu.
> v7: Fix implicit padding of ioctl struct by rounding to next 64bit boundary.
> v8: Fix rebasing error after forwarding porting the back port.
> v9: Limit the userptr to page aligned entries. We now expect userspace
>     to handle all the offset-in-page adjustments itself.
> v10: Prevent vma from being copied across fork to avoid issues with cow.
> v11: Drop vma behaviour changes -- locking is nigh on impossible.
>      Use a worker to load user pages to avoid lock inversions.
> v12: Use get_task_mm()/mmput() for correct refcounting of mm.
> v13: Use a worker to release the mmu_notifier to avoid lock inversion
> v14: Decouple mmu_notifier from struct_mutex using a custom mmu_notifer
>      with its own locking and tree of objects for each mm/mmu_notifier.
> v15: Prevent overlapping userptr objects, and invalidate all objects
>      within the mmu_notifier range
> v16: Fix a typo for iterating over multiple objects in the range and
>      rearrange error path to destroy the mmu_notifier locklessly.
>      Also close a race between invalidate_range and the get_pages_worker.
> v17: Close a race between get_pages_worker/invalidate_range and fresh
>      allocations of the same userptr range - and notice that
>      struct_mutex was presumed to be held when during creation it wasn't.
> v18: Sigh. Fix the refactor of st_set_pages() to allocate enough memory
>      for the struct sg_table and to clear it before reporting an error.
> v19: Always error out on read-only userptr requests as we don't have the
>      hardware infrastructure to support them at the moment.
> v20: Refuse to implement read-only support until we have the required
>      infrastructure - but reserve the bit in flags for future use.
> v21: use_mm() is not required for get_user_pages(). It is only meant to
>      be used to fix up the kernel thread's current->mm for use with
>      copy_user().
> v22: Use sg_alloc_table_from_pages for that chunky feeling
> v23: Export a function for sanity checking dma-buf rather than encode
>      userptr details elsewhere, and clean up comments based on
>      suggestions by Bradley.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com>
> Cc: "Gong, Zhipeng" <zhipeng.gong@intel.com>
> Cc: Akash Goel <akash.goel@intel.com>
> Cc: "Volkin, Bradley D" <bradley.d.volkin@intel.com>
> Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com>
> ---
>  drivers/gpu/drm/i915/Kconfig            |   1 +
>  drivers/gpu/drm/i915/Makefile           |   1 +
>  drivers/gpu/drm/i915/i915_dma.c         |   1 +
>  drivers/gpu/drm/i915/i915_drv.h         |  25 +-
>  drivers/gpu/drm/i915/i915_gem.c         |   4 +
>  drivers/gpu/drm/i915/i915_gem_dmabuf.c  |   8 +
>  drivers/gpu/drm/i915/i915_gem_userptr.c | 711 ++++++++++++++++++++++++++++++++
>  drivers/gpu/drm/i915/i915_gpu_error.c   |   2 +
>  include/uapi/drm/i915_drm.h             |  16 +
>  9 files changed, 768 insertions(+), 1 deletion(-)
>  create mode 100644 drivers/gpu/drm/i915/i915_gem_userptr.c
> 
> diff --git a/drivers/gpu/drm/i915/Kconfig b/drivers/gpu/drm/i915/Kconfig
> index e4e3c01b8cbc..437e1824d0bf 100644
> --- a/drivers/gpu/drm/i915/Kconfig
> +++ b/drivers/gpu/drm/i915/Kconfig
> @@ -5,6 +5,7 @@ config DRM_I915
>  	depends on (AGP || AGP=n)
>  	select INTEL_GTT
>  	select AGP_INTEL if AGP
> +	select INTERVAL_TREE
>  	# we need shmfs for the swappable backing store, and in particular
>  	# the shmem_readpage() which depends upon tmpfs
>  	select SHMEM
> diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
> index b6ce5640d592..fa9e806259ba 100644
> --- a/drivers/gpu/drm/i915/Makefile
> +++ b/drivers/gpu/drm/i915/Makefile
> @@ -28,6 +28,7 @@ i915-y += i915_cmd_parser.o \
>  	  i915_gem.o \
>  	  i915_gem_stolen.o \
>  	  i915_gem_tiling.o \
> +	  i915_gem_userptr.o \
>  	  i915_gpu_error.o \
>  	  i915_irq.o \
>  	  i915_trace_points.o \
> diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
> index 967c9376856b..307dc2635a9a 100644
> --- a/drivers/gpu/drm/i915/i915_dma.c
> +++ b/drivers/gpu/drm/i915/i915_dma.c
> @@ -1982,6 +1982,7 @@ const struct drm_ioctl_desc i915_ioctls[] = {
>  	DRM_IOCTL_DEF_DRV(I915_REG_READ, i915_reg_read_ioctl, DRM_UNLOCKED|DRM_RENDER_ALLOW),
>  	DRM_IOCTL_DEF_DRV(I915_GET_RESET_STATS, i915_get_reset_stats_ioctl, DRM_UNLOCKED|DRM_RENDER_ALLOW),
>  	DRM_IOCTL_DEF_DRV(I915_GEM_CREATE2, i915_gem_create2_ioctl, DRM_UNLOCKED|DRM_RENDER_ALLOW),
> +	DRM_IOCTL_DEF_DRV(I915_GEM_USERPTR, i915_gem_userptr_ioctl, DRM_UNLOCKED|DRM_RENDER_ALLOW),
>  };
>  
>  int i915_max_ioctl = DRM_ARRAY_SIZE(i915_ioctls);
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 52249cfb1e25..87cecd36c176 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -41,6 +41,7 @@
>  #include <linux/i2c-algo-bit.h>
>  #include <drm/intel-gtt.h>
>  #include <linux/backlight.h>
> +#include <linux/hashtable.h>
>  #include <linux/intel-iommu.h>
>  #include <linux/kref.h>
>  #include <linux/pm_qos.h>
> @@ -178,6 +179,7 @@ enum hpd_pin {
>  		if ((intel_connector)->base.encoder == (__encoder))
>  
>  struct drm_i915_private;
> +struct i915_mmu_object;
>  
>  enum intel_dpll_id {
>  	DPLL_ID_PRIVATE = -1, /* non-shared dpll in use */
> @@ -403,6 +405,7 @@ struct drm_i915_error_state {
>  		u32 tiling:2;
>  		u32 dirty:1;
>  		u32 purgeable:1;
> +		u32 userptr:1;
>  		s32 ring:4;
>  		u32 cache_level:3;
>  	} **active_bo, **pinned_bo;
> @@ -1447,6 +1450,9 @@ struct drm_i915_private {
>  	struct i915_gtt gtt; /* VM representing the global address space */
>  
>  	struct i915_gem_mm mm;
> +#if defined(CONFIG_MMU_NOTIFIER)
> +	DECLARE_HASHTABLE(mmu_notifiers, 7);
> +#endif
>  
>  	/* Kernel Modesetting */
>  
> @@ -1580,6 +1586,8 @@ struct drm_i915_gem_object_ops {
>  	 */
>  	int (*get_pages)(struct drm_i915_gem_object *);
>  	void (*put_pages)(struct drm_i915_gem_object *);
> +	int (*dmabuf_export)(struct drm_i915_gem_object *);
> +	void (*release)(struct drm_i915_gem_object *);
>  };
>  
>  struct drm_i915_gem_object {
> @@ -1693,8 +1701,20 @@ struct drm_i915_gem_object {
>  
>  	/** for phy allocated objects */
>  	struct drm_i915_gem_phys_object *phys_obj;
> -};
>  
> +	union {
> +		struct i915_gem_userptr {
> +			uintptr_t ptr;
> +			unsigned read_only :1;
> +			unsigned workers :4;
> +#define I915_GEM_USERPTR_MAX_WORKERS 15
> +
> +			struct mm_struct *mm;
> +			struct i915_mmu_object *mn;
> +			struct work_struct *work;
> +		} userptr;
> +	};
> +};
>  #define to_intel_bo(x) container_of(x, struct drm_i915_gem_object, base)
>  
>  /**
> @@ -2124,6 +2144,9 @@ int i915_gem_set_tiling_ioctl(struct drm_device *dev, void *data,
>  			      struct drm_file *file_priv);
>  int i915_gem_get_tiling_ioctl(struct drm_device *dev, void *data,
>  			      struct drm_file *file_priv);
> +int i915_gem_init_userptr(struct drm_device *dev);
> +int i915_gem_userptr_ioctl(struct drm_device *dev, void *data,
> +			   struct drm_file *file);
>  int i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data,
>  				struct drm_file *file_priv);
>  int i915_gem_wait_ioctl(struct drm_device *dev, void *data,
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index c84a0101c1c4..704e470bc3be 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -4460,6 +4460,9 @@ void i915_gem_free_object(struct drm_gem_object *gem_obj)
>  	if (obj->base.import_attach)
>  		drm_prime_gem_destroy(&obj->base, NULL);
>  
> +	if (obj->ops->release)
> +		obj->ops->release(obj);
> +
>  	drm_gem_object_release(&obj->base);
>  	i915_gem_info_remove_obj(dev_priv, obj->base.size);
>  
> @@ -4739,6 +4742,7 @@ int i915_gem_init(struct drm_device *dev)
>  			DRM_DEBUG_DRIVER("allow wake ack timed out\n");
>  	}
>  
> +	i915_gem_init_userptr(dev);
>  	i915_gem_init_global_gtt(dev);
>  
>  	ret = i915_gem_context_init(dev);
> diff --git a/drivers/gpu/drm/i915/i915_gem_dmabuf.c b/drivers/gpu/drm/i915/i915_gem_dmabuf.c
> index 321102a8374b..580aa42443ed 100644
> --- a/drivers/gpu/drm/i915/i915_gem_dmabuf.c
> +++ b/drivers/gpu/drm/i915/i915_gem_dmabuf.c
> @@ -229,6 +229,14 @@ static const struct dma_buf_ops i915_dmabuf_ops =  {
>  struct dma_buf *i915_gem_prime_export(struct drm_device *dev,
>  				      struct drm_gem_object *gem_obj, int flags)
>  {
> +	struct drm_i915_gem_object *obj = to_intel_bo(gem_obj);
> +
> +	if (obj->ops->dmabuf_export) {
> +		int ret = obj->ops->dmabuf_export(obj);
> +		if (ret)
> +			return ERR_PTR(ret);
> +	}
> +
>  	return dma_buf_export(gem_obj, &i915_dmabuf_ops, gem_obj->size, flags);
>  }
>  
> diff --git a/drivers/gpu/drm/i915/i915_gem_userptr.c b/drivers/gpu/drm/i915/i915_gem_userptr.c
> new file mode 100644
> index 000000000000..21ea92886a56
> --- /dev/null
> +++ b/drivers/gpu/drm/i915/i915_gem_userptr.c
> @@ -0,0 +1,711 @@
> +/*
> + * Copyright © 2012-2014 Intel Corporation
> + *
> + * Permission is hereby granted, free of charge, to any person obtaining a
> + * copy of this software and associated documentation files (the "Software"),
> + * to deal in the Software without restriction, including without limitation
> + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
> + * and/or sell copies of the Software, and to permit persons to whom the
> + * Software is furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice (including the next
> + * paragraph) shall be included in all copies or substantial portions of the
> + * Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
> + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
> + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
> + * IN THE SOFTWARE.
> + *
> + */
> +
> +#include "drmP.h"
> +#include "i915_drm.h"
> +#include "i915_drv.h"
> +#include "i915_trace.h"
> +#include "intel_drv.h"
> +#include <linux/mmu_context.h>
> +#include <linux/mmu_notifier.h>
> +#include <linux/mempolicy.h>
> +#include <linux/swap.h>
> +
> +#if defined(CONFIG_MMU_NOTIFIER)
> +#include <linux/interval_tree.h>
> +
> +struct i915_mmu_notifier {
> +	spinlock_t lock;
> +	struct hlist_node node;
> +	struct mmu_notifier mn;
> +	struct rb_root objects;
> +	struct drm_device *dev;
> +	struct mm_struct *mm;
> +	struct work_struct work;
> +	unsigned long count;
> +	unsigned long serial;
> +};
> +
> +struct i915_mmu_object {
> +	struct i915_mmu_notifier *mmu;
> +	struct interval_tree_node it;
> +	struct drm_i915_gem_object *obj;
> +};
> +
> +static void i915_gem_userptr_mn_invalidate_range_start(struct mmu_notifier *_mn,
> +						       struct mm_struct *mm,
> +						       unsigned long start,
> +						       unsigned long end)
> +{
> +	struct i915_mmu_notifier *mn = container_of(_mn, struct i915_mmu_notifier, mn);
> +	struct interval_tree_node *it = NULL;
> +	unsigned long serial = 0;
> +
> +	end--; /* interval ranges are inclusive, but invalidate range is exclusive */
> +	while (start < end) {
> +		struct drm_i915_gem_object *obj;
> +
> +		obj = NULL;
> +		spin_lock(&mn->lock);
> +		if (serial == mn->serial)
> +			it = interval_tree_iter_next(it, start, end);
> +		else
> +			it = interval_tree_iter_first(&mn->objects, start, end);
> +		if (it != NULL) {
> +			obj = container_of(it, struct i915_mmu_object, it)->obj;
> +			drm_gem_object_reference(&obj->base);
> +			serial = mn->serial;
> +		}
> +		spin_unlock(&mn->lock);
> +		if (obj == NULL)
> +			return;
> +
> +		mutex_lock(&mn->dev->struct_mutex);
> +		/* Cancel any active worker and force us to re-evaluate gup */
> +		obj->userptr.work = NULL;
> +
> +		if (obj->pages != NULL) {
> +			struct drm_i915_private *dev_priv = to_i915(mn->dev);
> +			struct i915_vma *vma, *tmp;
> +			bool was_interruptible;
> +
> +			was_interruptible = dev_priv->mm.interruptible;
> +			dev_priv->mm.interruptible = false;
> +
> +			list_for_each_entry_safe(vma, tmp, &obj->vma_list, vma_link) {
> +				int ret = i915_vma_unbind(vma);
> +				WARN_ON(ret && ret != -EIO);
> +			}
> +			WARN_ON(i915_gem_object_put_pages(obj));
> +
> +			dev_priv->mm.interruptible = was_interruptible;
> +		}
> +
> +		start = obj->userptr.ptr + obj->base.size;
> +
> +		drm_gem_object_unreference(&obj->base);
> +		mutex_unlock(&mn->dev->struct_mutex);
> +	}
> +}
> +
> +static const struct mmu_notifier_ops i915_gem_userptr_notifier = {
> +	.invalidate_range_start = i915_gem_userptr_mn_invalidate_range_start,
> +};
> +
> +static struct i915_mmu_notifier *
> +__i915_mmu_notifier_lookup(struct drm_device *dev, struct mm_struct *mm)
> +{
> +	struct drm_i915_private *dev_priv = to_i915(dev);
> +	struct i915_mmu_notifier *mmu;
> +
> +	/* Protected by dev->struct_mutex */
> +	hash_for_each_possible(dev_priv->mmu_notifiers, mmu, node, (unsigned long)mm)
> +		if (mmu->mm == mm)
> +			return mmu;
> +
> +	return NULL;
> +}
> +
> +static struct i915_mmu_notifier *
> +i915_mmu_notifier_get(struct drm_device *dev, struct mm_struct *mm)
> +{
> +	struct drm_i915_private *dev_priv = to_i915(dev);
> +	struct i915_mmu_notifier *mmu;
> +	int ret;
> +
> +	lockdep_assert_held(&dev->struct_mutex);
> +
> +	mmu = __i915_mmu_notifier_lookup(dev, mm);
> +	if (mmu)
> +		return mmu;
> +
> +	mmu = kmalloc(sizeof(*mmu), GFP_KERNEL);
> +	if (mmu == NULL)
> +		return ERR_PTR(-ENOMEM);
> +
> +	spin_lock_init(&mmu->lock);
> +	mmu->dev = dev;
> +	mmu->mn.ops = &i915_gem_userptr_notifier;
> +	mmu->mm = mm;
> +	mmu->objects = RB_ROOT;
> +	mmu->count = 0;
> +	mmu->serial = 0;
> +
> +	/* Protected by mmap_sem (write-lock) */
> +	ret = __mmu_notifier_register(&mmu->mn, mm);
> +	if (ret) {
> +		kfree(mmu);
> +		return ERR_PTR(ret);
> +	}
> +
> +	/* Protected by dev->struct_mutex */
> +	hash_add(dev_priv->mmu_notifiers, &mmu->node, (unsigned long)mm);
> +	return mmu;
> +}
> +
> +static void
> +__i915_mmu_notifier_destroy_worker(struct work_struct *work)
> +{
> +	struct i915_mmu_notifier *mmu = container_of(work, typeof(*mmu), work);
> +	mmu_notifier_unregister(&mmu->mn, mmu->mm);
> +	kfree(mmu);
> +}
> +
> +static void
> +__i915_mmu_notifier_destroy(struct i915_mmu_notifier *mmu)
> +{
> +	lockdep_assert_held(&mmu->dev->struct_mutex);
> +
> +	/* Protected by dev->struct_mutex */
> +	hash_del(&mmu->node);
> +
> +	/* Our lock ordering is: mmap_sem, mmu_notifier_scru, struct_mutex.
> +	 * We enter the function holding struct_mutex, therefore we need
> +	 * to drop our mutex prior to calling mmu_notifier_unregister in
> +	 * order to prevent lock inversion (and system-wide deadlock)
> +	 * between the mmap_sem and struct-mutex. Hence we defer the
> +	 * unregistration to a workqueue where we hold no locks.
> +	 */
> +	INIT_WORK(&mmu->work, __i915_mmu_notifier_destroy_worker);
> +	schedule_work(&mmu->work);
> +}
> +
> +static void __i915_mmu_notifier_update_serial(struct i915_mmu_notifier *mmu)
> +{
> +	if (++mmu->serial == 0)
> +		mmu->serial = 1;
> +}
> +
> +static void
> +i915_mmu_notifier_del(struct i915_mmu_notifier *mmu,
> +		      struct i915_mmu_object *mn)
> +{
> +	lockdep_assert_held(&mmu->dev->struct_mutex);
> +
> +	spin_lock(&mmu->lock);
> +	interval_tree_remove(&mn->it, &mmu->objects);
> +	__i915_mmu_notifier_update_serial(mmu);
> +	spin_unlock(&mmu->lock);
> +
> +	/* Protected against _add() by dev->struct_mutex */
> +	if (--mmu->count == 0)
> +		__i915_mmu_notifier_destroy(mmu);
> +}
> +
> +static int
> +i915_mmu_notifier_add(struct i915_mmu_notifier *mmu,
> +		      struct i915_mmu_object *mn)
> +{
> +	struct interval_tree_node *it;
> +	int ret;
> +
> +	ret = i915_mutex_lock_interruptible(mmu->dev);
> +	if (ret)
> +		return ret;
> +
> +	/* Make sure we drop the final active reference (and thereby
> +	 * remove the objects from the interval tree) before we do
> +	 * the check for overlapping objects.
> +	 */
> +	i915_gem_retire_requests(mmu->dev);
> +
> +	/* Disallow overlapping userptr objects */
> +	spin_lock(&mmu->lock);
> +	it = interval_tree_iter_first(&mmu->objects,
> +				      mn->it.start, mn->it.last);
> +	if (it) {
> +		struct drm_i915_gem_object *obj;
> +
> +		/* We only need to check the first object in the range as it
> +		 * either has cancelled gup work queued and we need to
> +		 * return back to the user to give time for the gup-workers
> +		 * to flush their object references upon which the object will
> +		 * be removed from the interval-tree, or the the range is
> +		 * still in use by another client and the overlap is invalid.
> +		 */
> +
> +		obj = container_of(it, struct i915_mmu_object, it)->obj;
> +		ret = obj->userptr.workers ? -EAGAIN : -EINVAL;
> +	} else {
> +		interval_tree_insert(&mn->it, &mmu->objects);
> +		__i915_mmu_notifier_update_serial(mmu);
> +		ret = 0;
> +	}
> +	spin_unlock(&mmu->lock);
> +	mutex_unlock(&mmu->dev->struct_mutex);
> +
> +	return ret;
> +}
> +
> +static void
> +i915_gem_userptr_release__mmu_notifier(struct drm_i915_gem_object *obj)
> +{
> +	struct i915_mmu_object *mn;
> +
> +	mn = obj->userptr.mn;
> +	if (mn == NULL)
> +		return;
> +
> +	i915_mmu_notifier_del(mn->mmu, mn);
> +	obj->userptr.mn = NULL;
> +}
> +
> +static int
> +i915_gem_userptr_init__mmu_notifier(struct drm_i915_gem_object *obj,
> +				    unsigned flags)
> +{
> +	struct i915_mmu_notifier *mmu;
> +	struct i915_mmu_object *mn;
> +	int ret;
> +
> +	if (flags & I915_USERPTR_UNSYNCHRONIZED)
> +		return capable(CAP_SYS_ADMIN) ? 0 : -EPERM;
> +
> +	down_write(&obj->userptr.mm->mmap_sem);
> +	ret = i915_mutex_lock_interruptible(obj->base.dev);
> +	if (ret == 0) {
> +		mmu = i915_mmu_notifier_get(obj->base.dev, obj->userptr.mm);
> +		if (!IS_ERR(mmu))
> +			mmu->count++; /* preemptive add to act as a refcount */
> +		else
> +			ret = PTR_ERR(mmu);
> +		mutex_unlock(&obj->base.dev->struct_mutex);
> +	}
> +	up_write(&obj->userptr.mm->mmap_sem);
> +	if (ret)
> +		return ret;
> +
> +	mn = kzalloc(sizeof(*mn), GFP_KERNEL);
> +	if (mn == NULL) {
> +		ret = -ENOMEM;
> +		goto destroy_mmu;
> +	}
> +
> +	mn->mmu = mmu;
> +	mn->it.start = obj->userptr.ptr;
> +	mn->it.last = mn->it.start + obj->base.size - 1;
> +	mn->obj = obj;
> +
> +	ret = i915_mmu_notifier_add(mmu, mn);
> +	if (ret)
> +		goto free_mn;
> +
> +	obj->userptr.mn = mn;
> +	return 0;
> +
> +free_mn:
> +	kfree(mn);
> +destroy_mmu:
> +	mutex_lock(&obj->base.dev->struct_mutex);
> +	if (--mmu->count == 0)
> +		__i915_mmu_notifier_destroy(mmu);
> +	mutex_unlock(&obj->base.dev->struct_mutex);
> +	return ret;
> +}
> +
> +#else
> +
> +static void
> +i915_gem_userptr_release__mmu_notifier(struct drm_i915_gem_object *obj)
> +{
> +}
> +
> +static int
> +i915_gem_userptr_init__mmu_notifier(struct drm_i915_gem_object *obj,
> +				    unsigned flags)
> +{
> +	if ((flags & I915_USERPTR_UNSYNCHRONIZED) == 0)
> +		return -ENODEV;
> +
> +	if (!capable(CAP_SYS_ADMIN))
> +		return -EPERM;
> +
> +	return 0;
> +}
> +#endif
> +
> +struct get_pages_work {
> +	struct work_struct work;
> +	struct drm_i915_gem_object *obj;
> +	struct task_struct *task;
> +};
> +
> +
> +#if IS_ENABLED(CONFIG_SWIOTLB)
> +#define swiotlb_active() swiotlb_nr_tbl()
> +#else
> +#define swiotlb_active() 0
> +#endif
> +
> +static int
> +st_set_pages(struct sg_table **st, struct page **pvec, int num_pages)
> +{
> +	struct scatterlist *sg;
> +	int ret, n;
> +
> +	*st = kmalloc(sizeof(**st), GFP_KERNEL);
> +	if (*st == NULL)
> +		return -ENOMEM;
> +
> +	if (swiotlb_active()) {
> +		ret = sg_alloc_table(*st, num_pages, GFP_KERNEL);
> +		if (ret)
> +			goto err;
> +
> +		for_each_sg((*st)->sgl, sg, num_pages, n)
> +			sg_set_page(sg, pvec[n], PAGE_SIZE, 0);
> +	} else {
> +		ret = sg_alloc_table_from_pages(*st, pvec, num_pages,
> +						0, num_pages << PAGE_SHIFT,
> +						GFP_KERNEL);
> +		if (ret)
> +			goto err;
> +	}
> +
> +	return 0;
> +
> +err:
> +	kfree(*st);
> +	*st = NULL;
> +	return ret;
> +}
> +
> +static void
> +__i915_gem_userptr_get_pages_worker(struct work_struct *_work)
> +{
> +	struct get_pages_work *work = container_of(_work, typeof(*work), work);
> +	struct drm_i915_gem_object *obj = work->obj;
> +	struct drm_device *dev = obj->base.dev;
> +	const int num_pages = obj->base.size >> PAGE_SHIFT;
> +	struct page **pvec;
> +	int pinned, ret;
> +
> +	ret = -ENOMEM;
> +	pinned = 0;
> +
> +	pvec = kmalloc(num_pages*sizeof(struct page *),
> +		       GFP_TEMPORARY | __GFP_NOWARN | __GFP_NORETRY);
> +	if (pvec == NULL)
> +		pvec = drm_malloc_ab(num_pages, sizeof(struct page *));
> +	if (pvec != NULL) {
> +		struct mm_struct *mm = obj->userptr.mm;
> +
> +		down_read(&mm->mmap_sem);
> +		while (pinned < num_pages) {
> +			ret = get_user_pages(work->task, mm,
> +					     obj->userptr.ptr + pinned * PAGE_SIZE,
> +					     num_pages - pinned,
> +					     !obj->userptr.read_only, 0,
> +					     pvec + pinned, NULL);
> +			if (ret < 0)
> +				break;
> +
> +			pinned += ret;
> +		}
> +		up_read(&mm->mmap_sem);
> +	}
> +
> +	mutex_lock(&dev->struct_mutex);
> +	if (obj->userptr.work != &work->work) {
> +		ret = 0;
> +	} else if (pinned == num_pages) {
> +		ret = st_set_pages(&obj->pages, pvec, num_pages);
> +		if (ret == 0) {
> +			list_add_tail(&obj->global_list, &to_i915(dev)->mm.unbound_list);
> +			pinned = 0;
> +		}
> +	}
> +
> +	obj->userptr.work = ERR_PTR(ret);
> +	obj->userptr.workers--;
> +	drm_gem_object_unreference(&obj->base);
> +	mutex_unlock(&dev->struct_mutex);
> +
> +	release_pages(pvec, pinned, 0);
> +	drm_free_large(pvec);
> +
> +	put_task_struct(work->task);
> +	kfree(work);
> +}
> +
> +static int
> +i915_gem_userptr_get_pages(struct drm_i915_gem_object *obj)
> +{
> +	const int num_pages = obj->base.size >> PAGE_SHIFT;
> +	struct page **pvec;
> +	int pinned, ret;
> +
> +	/* If userspace should engineer that these pages are replaced in
> +	 * the vma between us binding this page into the GTT and completion
> +	 * of rendering... Their loss. If they change the mapping of their
> +	 * pages they need to create a new bo to point to the new vma.
> +	 *
> +	 * However, that still leaves open the possibility of the vma
> +	 * being copied upon fork. Which falls under the same userspace
> +	 * synchronisation issue as a regular bo, except that this time
> +	 * the process may not be expecting that a particular piece of
> +	 * memory is tied to the GPU.
> +	 *
> +	 * Fortunately, we can hook into the mmu_notifier in order to
> +	 * discard the page references prior to anything nasty happening
> +	 * to the vma (discard or cloning) which should prevent the more
> +	 * egregious cases from causing harm.
> +	 */
> +
> +	pvec = NULL;
> +	pinned = 0;
> +	if (obj->userptr.mm == current->mm) {
> +		pvec = kmalloc(num_pages*sizeof(struct page *),
> +			       GFP_TEMPORARY | __GFP_NOWARN | __GFP_NORETRY);
> +		if (pvec == NULL) {
> +			pvec = drm_malloc_ab(num_pages, sizeof(struct page *));
> +			if (pvec == NULL)
> +				return -ENOMEM;
> +		}
> +
> +		pinned = __get_user_pages_fast(obj->userptr.ptr, num_pages,
> +					       !obj->userptr.read_only, pvec);
> +	}
> +	if (pinned < num_pages) {
> +		if (pinned < 0) {
> +			ret = pinned;
> +			pinned = 0;
> +		} else {
> +			/* Spawn a worker so that we can acquire the
> +			 * user pages without holding our mutex. Access
> +			 * to the user pages requires mmap_sem, and we have
> +			 * a strict lock ordering of mmap_sem, struct_mutex -
> +			 * we already hold struct_mutex here and so cannot
> +			 * call gup without encountering a lock inversion.
> +			 *
> +			 * Userspace will keep on repeating the operation
> +			 * (thanks to EAGAIN) until either we hit the fast
> +			 * path or the worker completes. If the worker is
> +			 * cancelled or superseded, the task is still run
> +			 * but the results ignored. (This leads to
> +			 * complications that we may have a stray object
> +			 * refcount that we need to be wary of when
> +			 * checking for existing objects during creation.)
> +			 * If the worker encounters an error, it reports
> +			 * that error back to this function through
> +			 * obj->userptr.work = ERR_PTR.
> +			 */
> +			ret = -EAGAIN;
> +			if (obj->userptr.work == NULL &&
> +			    obj->userptr.workers < I915_GEM_USERPTR_MAX_WORKERS) {
> +				struct get_pages_work *work;
> +
> +				work = kmalloc(sizeof(*work), GFP_KERNEL);
> +				if (work != NULL) {
> +					obj->userptr.work = &work->work;
> +					obj->userptr.workers++;
> +
> +					work->obj = obj;
> +					drm_gem_object_reference(&obj->base);
> +
> +					work->task = current;
> +					get_task_struct(work->task);
> +
> +					INIT_WORK(&work->work, __i915_gem_userptr_get_pages_worker);
> +					schedule_work(&work->work);
> +				} else
> +					ret = -ENOMEM;
> +			} else {
> +				if (IS_ERR(obj->userptr.work)) {
> +					ret = PTR_ERR(obj->userptr.work);
> +					obj->userptr.work = NULL;
> +				}
> +			}
> +		}
> +	} else {
> +		ret = st_set_pages(&obj->pages, pvec, num_pages);
> +		if (ret == 0) {
> +			obj->userptr.work = NULL;
> +			pinned = 0;
> +		}
> +	}
> +
> +	release_pages(pvec, pinned, 0);
> +	drm_free_large(pvec);
> +	return ret;
> +}
> +
> +static void
> +i915_gem_userptr_put_pages(struct drm_i915_gem_object *obj)
> +{
> +	struct scatterlist *sg;
> +	int i;
> +
> +	BUG_ON(obj->userptr.work != NULL);
> +
> +	if (obj->madv != I915_MADV_WILLNEED)
> +		obj->dirty = 0;
> +
> +	for_each_sg(obj->pages->sgl, sg, obj->pages->nents, i) {
> +		struct page *page = sg_page(sg);
> +
> +		if (obj->dirty)
> +			set_page_dirty(page);
> +
> +		mark_page_accessed(page);
> +		page_cache_release(page);
> +	}
> +	obj->dirty = 0;
> +
> +	sg_free_table(obj->pages);
> +	kfree(obj->pages);
> +}
> +
> +static void
> +i915_gem_userptr_release(struct drm_i915_gem_object *obj)
> +{
> +	i915_gem_userptr_release__mmu_notifier(obj);
> +
> +	if (obj->userptr.mm) {
> +		mmput(obj->userptr.mm);
> +		obj->userptr.mm = NULL;
> +	}
> +}
> +
> +static int
> +i915_gem_userptr_dmabuf_export(struct drm_i915_gem_object *obj)
> +{
> +	if (obj->userptr.mn)
> +		return 0;
> +
> +	return i915_gem_userptr_init__mmu_notifier(obj, 0);
> +}
> +
> +static const struct drm_i915_gem_object_ops i915_gem_userptr_ops = {
> +	.dmabuf_export = i915_gem_userptr_dmabuf_export,
> +	.get_pages = i915_gem_userptr_get_pages,
> +	.put_pages = i915_gem_userptr_put_pages,
> +	.release = i915_gem_userptr_release,
> +};
> +
> +/**
> + * Creates a new mm object that wraps some normal memory from the process
> + * context - user memory.
> + *
> + * We impose several restrictions upon the memory being mapped
> + * into the GPU.
> + * 1. It must be page aligned (both start/end addresses, i.e ptr and size).
> + * 2. It cannot overlap any other userptr object in the same address space.
> + * 3. It must be normal system memory, not a pointer into another map of IO
> + *    space (e.g. it must not be a GTT mmapping of another object).
> + * 4. We only allow a bo as large as we could in theory map into the GTT,
> + *    that is we limit the size to the total size of the GTT.
> + * 5. The bo is marked as being snoopable. The backing pages are left
> + *    accessible directly by the CPU, but reads and writes by the GPU may
> + *    incur the cost of a snoop (unless you have an LLC architecture).
> + *
> + * Synchronisation between multiple users and the GPU is left to userspace
> + * through the normal set-domain-ioctl. The kernel will enforce that the
> + * GPU relinquishes the VMA before it is returned back to the system
> + * i.e. upon free(), munmap() or process termination. However, the userspace
> + * malloc() library may not immediately relinquish the VMA after free() and
> + * instead reuse it whilst the GPU is still reading and writing to the VMA.
> + * Caveat emptor.
> + *
> + * Also note, that the object created here is not currently a "first class"
> + * object, in that several ioctls are banned. These are the CPU access
> + * ioctls: mmap(), pwrite and pread. In practice, you are expected to use
> + * direct access via your pointer rather than use those ioctls.
> + *
> + * If you think this is a good interface to use to pass GPU memory between
> + * drivers, please use dma-buf instead. In fact, wherever possible use
> + * dma-buf instead.
> + */
> +int
> +i915_gem_userptr_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
> +{
> +	struct drm_i915_private *dev_priv = dev->dev_private;
> +	struct drm_i915_gem_userptr *args = data;
> +	struct drm_i915_gem_object *obj;
> +	int ret;
> +	u32 handle;
> +
> +	if (args->flags & ~(I915_USERPTR_READ_ONLY |
> +			    I915_USERPTR_UNSYNCHRONIZED))
> +		return -EINVAL;
> +
> +	if (offset_in_page(args->user_ptr | args->user_size))
> +		return -EINVAL;
> +
> +	if (args->user_size > dev_priv->gtt.base.total)
> +		return -E2BIG;
> +
> +	if (!access_ok(args->flags & I915_USERPTR_READ_ONLY ? VERIFY_READ : VERIFY_WRITE,
> +		       (char __user *)(unsigned long)args->user_ptr, args->user_size))
> +		return -EFAULT;
> +
> +	if (args->flags & I915_USERPTR_READ_ONLY) {
> +		/* On almost all of the current hw, we cannot tell the GPU that a
> +		 * page is readonly, so this is just a placeholder in the uAPI.
> +		 */
> +		return -ENODEV;
> +	}
> +
> +	/* Allocate the new object */
> +	obj = i915_gem_object_alloc(dev);
> +	if (obj == NULL)
> +		return -ENOMEM;
> +
> +	drm_gem_private_object_init(dev, &obj->base, args->user_size);
> +	i915_gem_object_init(obj, &i915_gem_userptr_ops);
> +	obj->cache_level = I915_CACHE_LLC;
> +	obj->base.write_domain = I915_GEM_DOMAIN_CPU;
> +	obj->base.read_domains = I915_GEM_DOMAIN_CPU;
> +
> +	obj->userptr.ptr = args->user_ptr;
> +	obj->userptr.read_only = !!(args->flags & I915_USERPTR_READ_ONLY);
> +
> +	/* And keep a pointer to the current->mm for resolving the user pages
> +	 * at binding. This means that we need to hook into the mmu_notifier
> +	 * in order to detect if the mmu is destroyed.
> +	 */
> +	ret = -ENOMEM;
> +	if ((obj->userptr.mm = get_task_mm(current)))
> +		ret = i915_gem_userptr_init__mmu_notifier(obj, args->flags);
> +	if (ret == 0)
> +		ret = drm_gem_handle_create(file, &obj->base, &handle);
> +
> +	/* drop reference from allocate - handle holds it now */
> +	drm_gem_object_unreference_unlocked(&obj->base);
> +	if (ret)
> +		return ret;
> +
> +	args->handle = handle;
> +	return 0;
> +}
> +
> +int
> +i915_gem_init_userptr(struct drm_device *dev)
> +{
> +#if defined(CONFIG_MMU_NOTIFIER)
> +	struct drm_i915_private *dev_priv = to_i915(dev);
> +	hash_init(dev_priv->mmu_notifiers);
> +#endif
> +	return 0;
> +}
> diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
> index 2d819858c19b..dde1b0c2ff31 100644
> --- a/drivers/gpu/drm/i915/i915_gpu_error.c
> +++ b/drivers/gpu/drm/i915/i915_gpu_error.c
> @@ -205,6 +205,7 @@ static void print_error_buffers(struct drm_i915_error_state_buf *m,
>  		err_puts(m, tiling_flag(err->tiling));
>  		err_puts(m, dirty_flag(err->dirty));
>  		err_puts(m, purgeable_flag(err->purgeable));
> +		err_puts(m, err->userptr ? " userptr" : "");
>  		err_puts(m, err->ring != -1 ? " " : "");
>  		err_puts(m, ring_str(err->ring));
>  		err_puts(m, i915_cache_level_str(err->cache_level));
> @@ -641,6 +642,7 @@ static void capture_bo(struct drm_i915_error_buffer *err,
>  	err->tiling = obj->tiling_mode;
>  	err->dirty = obj->dirty;
>  	err->purgeable = obj->madv != I915_MADV_WILLNEED;
> +	err->userptr = obj->userptr.mm != 0;
>  	err->ring = obj->ring ? obj->ring->id : -1;
>  	err->cache_level = obj->cache_level;
>  }
> diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
> index 4e0711e72cf8..7df2558d84c9 100644
> --- a/include/uapi/drm/i915_drm.h
> +++ b/include/uapi/drm/i915_drm.h
> @@ -224,6 +224,7 @@ typedef struct _drm_i915_sarea {
>  #define DRM_I915_REG_READ		0x31
>  #define DRM_I915_GET_RESET_STATS	0x32
>  #define DRM_I915_GEM_CREATE2		0x33
> +#define DRM_I915_GEM_USERPTR		0x34
>  
>  #define DRM_IOCTL_I915_INIT		DRM_IOW( DRM_COMMAND_BASE + DRM_I915_INIT, drm_i915_init_t)
>  #define DRM_IOCTL_I915_FLUSH		DRM_IO ( DRM_COMMAND_BASE + DRM_I915_FLUSH)
> @@ -275,6 +276,7 @@ typedef struct _drm_i915_sarea {
>  #define DRM_IOCTL_I915_GEM_CONTEXT_DESTROY	DRM_IOW (DRM_COMMAND_BASE + DRM_I915_GEM_CONTEXT_DESTROY, struct drm_i915_gem_context_destroy)
>  #define DRM_IOCTL_I915_REG_READ			DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_REG_READ, struct drm_i915_reg_read)
>  #define DRM_IOCTL_I915_GET_RESET_STATS		DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_GET_RESET_STATS, struct drm_i915_reset_stats)
> +#define DRM_IOCTL_I915_GEM_USERPTR			DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_GEM_USERPTR, struct drm_i915_gem_userptr)
>  
>  /* Allow drivers to submit batchbuffers directly to hardware, relying
>   * on the security mechanisms provided by hardware.
> @@ -1158,4 +1160,18 @@ struct drm_i915_reset_stats {
>  	__u32 pad;
>  };
>  
> +struct drm_i915_gem_userptr {
> +	__u64 user_ptr;
> +	__u64 user_size;
> +	__u32 flags;
> +#define I915_USERPTR_READ_ONLY 0x1
> +#define I915_USERPTR_UNSYNCHRONIZED 0x80000000
> +	/**
> +	 * Returned handle for the object.
> +	 *
> +	 * Object handles are nonzero.
> +	 */
> +	__u32 handle;
> +};
> +
>  #endif /* _UAPI_I915_DRM_H_ */
> -- 
> 2.0.0.rc2
> 

^ permalink raw reply	[flat|nested] 34+ messages in thread

* [PATCH] drm/i915: Introduce mapping of user pages into video memory (userptr) ioctl
@ 2014-05-16 13:22 Chris Wilson
  2014-05-16 15:34 ` Volkin, Bradley D
  0 siblings, 1 reply; 34+ messages in thread
From: Chris Wilson @ 2014-05-16 13:22 UTC (permalink / raw)
  To: intel-gfx; +Cc: Akash Goel

By exporting the ability to map user address and inserting PTEs
representing their backing pages into the GTT, we can exploit UMA in order
to utilize normal application data as a texture source or even as a
render target (depending upon the capabilities of the chipset). This has
a number of uses, with zero-copy downloads to the GPU and efficient
readback making the intermixed streaming of CPU and GPU operations
fairly efficient. This ability has many widespread implications from
faster rendering of client-side software rasterisers (chromium),
mitigation of stalls due to read back (firefox) and to faster pipelining
of texture data (such as pixel buffer objects in GL or data blobs in CL).

v2: Compile with CONFIG_MMU_NOTIFIER
v3: We can sleep while performing invalidate-range, which we can utilise
to drop our page references prior to the kernel manipulating the vma
(for either discard or cloning) and so protect normal users.
v4: Only run the invalidate notifier if the range intercepts the bo.
v5: Prevent userspace from attempting to GTT mmap non-page aligned buffers
v6: Recheck after reacquire mutex for lost mmu.
v7: Fix implicit padding of ioctl struct by rounding to next 64bit boundary.
v8: Fix rebasing error after forwarding porting the back port.
v9: Limit the userptr to page aligned entries. We now expect userspace
    to handle all the offset-in-page adjustments itself.
v10: Prevent vma from being copied across fork to avoid issues with cow.
v11: Drop vma behaviour changes -- locking is nigh on impossible.
     Use a worker to load user pages to avoid lock inversions.
v12: Use get_task_mm()/mmput() for correct refcounting of mm.
v13: Use a worker to release the mmu_notifier to avoid lock inversion
v14: Decouple mmu_notifier from struct_mutex using a custom mmu_notifer
     with its own locking and tree of objects for each mm/mmu_notifier.
v15: Prevent overlapping userptr objects, and invalidate all objects
     within the mmu_notifier range
v16: Fix a typo for iterating over multiple objects in the range and
     rearrange error path to destroy the mmu_notifier locklessly.
     Also close a race between invalidate_range and the get_pages_worker.
v17: Close a race between get_pages_worker/invalidate_range and fresh
     allocations of the same userptr range - and notice that
     struct_mutex was presumed to be held when during creation it wasn't.
v18: Sigh. Fix the refactor of st_set_pages() to allocate enough memory
     for the struct sg_table and to clear it before reporting an error.
v19: Always error out on read-only userptr requests as we don't have the
     hardware infrastructure to support them at the moment.
v20: Refuse to implement read-only support until we have the required
     infrastructure - but reserve the bit in flags for future use.
v21: use_mm() is not required for get_user_pages(). It is only meant to
     be used to fix up the kernel thread's current->mm for use with
     copy_user().
v22: Use sg_alloc_table_from_pages for that chunky feeling
v23: Export a function for sanity checking dma-buf rather than encode
     userptr details elsewhere, and clean up comments based on
     suggestions by Bradley.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com>
Cc: "Gong, Zhipeng" <zhipeng.gong@intel.com>
Cc: Akash Goel <akash.goel@intel.com>
Cc: "Volkin, Bradley D" <bradley.d.volkin@intel.com>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com>
---
 drivers/gpu/drm/i915/Kconfig            |   1 +
 drivers/gpu/drm/i915/Makefile           |   1 +
 drivers/gpu/drm/i915/i915_dma.c         |   1 +
 drivers/gpu/drm/i915/i915_drv.h         |  25 +-
 drivers/gpu/drm/i915/i915_gem.c         |   4 +
 drivers/gpu/drm/i915/i915_gem_dmabuf.c  |   8 +
 drivers/gpu/drm/i915/i915_gem_userptr.c | 711 ++++++++++++++++++++++++++++++++
 drivers/gpu/drm/i915/i915_gpu_error.c   |   2 +
 include/uapi/drm/i915_drm.h             |  16 +
 9 files changed, 768 insertions(+), 1 deletion(-)
 create mode 100644 drivers/gpu/drm/i915/i915_gem_userptr.c

diff --git a/drivers/gpu/drm/i915/Kconfig b/drivers/gpu/drm/i915/Kconfig
index e4e3c01b8cbc..437e1824d0bf 100644
--- a/drivers/gpu/drm/i915/Kconfig
+++ b/drivers/gpu/drm/i915/Kconfig
@@ -5,6 +5,7 @@ config DRM_I915
 	depends on (AGP || AGP=n)
 	select INTEL_GTT
 	select AGP_INTEL if AGP
+	select INTERVAL_TREE
 	# we need shmfs for the swappable backing store, and in particular
 	# the shmem_readpage() which depends upon tmpfs
 	select SHMEM
diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
index b6ce5640d592..fa9e806259ba 100644
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -28,6 +28,7 @@ i915-y += i915_cmd_parser.o \
 	  i915_gem.o \
 	  i915_gem_stolen.o \
 	  i915_gem_tiling.o \
+	  i915_gem_userptr.o \
 	  i915_gpu_error.o \
 	  i915_irq.o \
 	  i915_trace_points.o \
diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
index 967c9376856b..307dc2635a9a 100644
--- a/drivers/gpu/drm/i915/i915_dma.c
+++ b/drivers/gpu/drm/i915/i915_dma.c
@@ -1982,6 +1982,7 @@ const struct drm_ioctl_desc i915_ioctls[] = {
 	DRM_IOCTL_DEF_DRV(I915_REG_READ, i915_reg_read_ioctl, DRM_UNLOCKED|DRM_RENDER_ALLOW),
 	DRM_IOCTL_DEF_DRV(I915_GET_RESET_STATS, i915_get_reset_stats_ioctl, DRM_UNLOCKED|DRM_RENDER_ALLOW),
 	DRM_IOCTL_DEF_DRV(I915_GEM_CREATE2, i915_gem_create2_ioctl, DRM_UNLOCKED|DRM_RENDER_ALLOW),
+	DRM_IOCTL_DEF_DRV(I915_GEM_USERPTR, i915_gem_userptr_ioctl, DRM_UNLOCKED|DRM_RENDER_ALLOW),
 };
 
 int i915_max_ioctl = DRM_ARRAY_SIZE(i915_ioctls);
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 52249cfb1e25..87cecd36c176 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -41,6 +41,7 @@
 #include <linux/i2c-algo-bit.h>
 #include <drm/intel-gtt.h>
 #include <linux/backlight.h>
+#include <linux/hashtable.h>
 #include <linux/intel-iommu.h>
 #include <linux/kref.h>
 #include <linux/pm_qos.h>
@@ -178,6 +179,7 @@ enum hpd_pin {
 		if ((intel_connector)->base.encoder == (__encoder))
 
 struct drm_i915_private;
+struct i915_mmu_object;
 
 enum intel_dpll_id {
 	DPLL_ID_PRIVATE = -1, /* non-shared dpll in use */
@@ -403,6 +405,7 @@ struct drm_i915_error_state {
 		u32 tiling:2;
 		u32 dirty:1;
 		u32 purgeable:1;
+		u32 userptr:1;
 		s32 ring:4;
 		u32 cache_level:3;
 	} **active_bo, **pinned_bo;
@@ -1447,6 +1450,9 @@ struct drm_i915_private {
 	struct i915_gtt gtt; /* VM representing the global address space */
 
 	struct i915_gem_mm mm;
+#if defined(CONFIG_MMU_NOTIFIER)
+	DECLARE_HASHTABLE(mmu_notifiers, 7);
+#endif
 
 	/* Kernel Modesetting */
 
@@ -1580,6 +1586,8 @@ struct drm_i915_gem_object_ops {
 	 */
 	int (*get_pages)(struct drm_i915_gem_object *);
 	void (*put_pages)(struct drm_i915_gem_object *);
+	int (*dmabuf_export)(struct drm_i915_gem_object *);
+	void (*release)(struct drm_i915_gem_object *);
 };
 
 struct drm_i915_gem_object {
@@ -1693,8 +1701,20 @@ struct drm_i915_gem_object {
 
 	/** for phy allocated objects */
 	struct drm_i915_gem_phys_object *phys_obj;
-};
 
+	union {
+		struct i915_gem_userptr {
+			uintptr_t ptr;
+			unsigned read_only :1;
+			unsigned workers :4;
+#define I915_GEM_USERPTR_MAX_WORKERS 15
+
+			struct mm_struct *mm;
+			struct i915_mmu_object *mn;
+			struct work_struct *work;
+		} userptr;
+	};
+};
 #define to_intel_bo(x) container_of(x, struct drm_i915_gem_object, base)
 
 /**
@@ -2124,6 +2144,9 @@ int i915_gem_set_tiling_ioctl(struct drm_device *dev, void *data,
 			      struct drm_file *file_priv);
 int i915_gem_get_tiling_ioctl(struct drm_device *dev, void *data,
 			      struct drm_file *file_priv);
+int i915_gem_init_userptr(struct drm_device *dev);
+int i915_gem_userptr_ioctl(struct drm_device *dev, void *data,
+			   struct drm_file *file);
 int i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data,
 				struct drm_file *file_priv);
 int i915_gem_wait_ioctl(struct drm_device *dev, void *data,
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index c84a0101c1c4..704e470bc3be 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -4460,6 +4460,9 @@ void i915_gem_free_object(struct drm_gem_object *gem_obj)
 	if (obj->base.import_attach)
 		drm_prime_gem_destroy(&obj->base, NULL);
 
+	if (obj->ops->release)
+		obj->ops->release(obj);
+
 	drm_gem_object_release(&obj->base);
 	i915_gem_info_remove_obj(dev_priv, obj->base.size);
 
@@ -4739,6 +4742,7 @@ int i915_gem_init(struct drm_device *dev)
 			DRM_DEBUG_DRIVER("allow wake ack timed out\n");
 	}
 
+	i915_gem_init_userptr(dev);
 	i915_gem_init_global_gtt(dev);
 
 	ret = i915_gem_context_init(dev);
diff --git a/drivers/gpu/drm/i915/i915_gem_dmabuf.c b/drivers/gpu/drm/i915/i915_gem_dmabuf.c
index 321102a8374b..580aa42443ed 100644
--- a/drivers/gpu/drm/i915/i915_gem_dmabuf.c
+++ b/drivers/gpu/drm/i915/i915_gem_dmabuf.c
@@ -229,6 +229,14 @@ static const struct dma_buf_ops i915_dmabuf_ops =  {
 struct dma_buf *i915_gem_prime_export(struct drm_device *dev,
 				      struct drm_gem_object *gem_obj, int flags)
 {
+	struct drm_i915_gem_object *obj = to_intel_bo(gem_obj);
+
+	if (obj->ops->dmabuf_export) {
+		int ret = obj->ops->dmabuf_export(obj);
+		if (ret)
+			return ERR_PTR(ret);
+	}
+
 	return dma_buf_export(gem_obj, &i915_dmabuf_ops, gem_obj->size, flags);
 }
 
diff --git a/drivers/gpu/drm/i915/i915_gem_userptr.c b/drivers/gpu/drm/i915/i915_gem_userptr.c
new file mode 100644
index 000000000000..21ea92886a56
--- /dev/null
+++ b/drivers/gpu/drm/i915/i915_gem_userptr.c
@@ -0,0 +1,711 @@
+/*
+ * Copyright © 2012-2014 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ */
+
+#include "drmP.h"
+#include "i915_drm.h"
+#include "i915_drv.h"
+#include "i915_trace.h"
+#include "intel_drv.h"
+#include <linux/mmu_context.h>
+#include <linux/mmu_notifier.h>
+#include <linux/mempolicy.h>
+#include <linux/swap.h>
+
+#if defined(CONFIG_MMU_NOTIFIER)
+#include <linux/interval_tree.h>
+
+struct i915_mmu_notifier {
+	spinlock_t lock;
+	struct hlist_node node;
+	struct mmu_notifier mn;
+	struct rb_root objects;
+	struct drm_device *dev;
+	struct mm_struct *mm;
+	struct work_struct work;
+	unsigned long count;
+	unsigned long serial;
+};
+
+struct i915_mmu_object {
+	struct i915_mmu_notifier *mmu;
+	struct interval_tree_node it;
+	struct drm_i915_gem_object *obj;
+};
+
+static void i915_gem_userptr_mn_invalidate_range_start(struct mmu_notifier *_mn,
+						       struct mm_struct *mm,
+						       unsigned long start,
+						       unsigned long end)
+{
+	struct i915_mmu_notifier *mn = container_of(_mn, struct i915_mmu_notifier, mn);
+	struct interval_tree_node *it = NULL;
+	unsigned long serial = 0;
+
+	end--; /* interval ranges are inclusive, but invalidate range is exclusive */
+	while (start < end) {
+		struct drm_i915_gem_object *obj;
+
+		obj = NULL;
+		spin_lock(&mn->lock);
+		if (serial == mn->serial)
+			it = interval_tree_iter_next(it, start, end);
+		else
+			it = interval_tree_iter_first(&mn->objects, start, end);
+		if (it != NULL) {
+			obj = container_of(it, struct i915_mmu_object, it)->obj;
+			drm_gem_object_reference(&obj->base);
+			serial = mn->serial;
+		}
+		spin_unlock(&mn->lock);
+		if (obj == NULL)
+			return;
+
+		mutex_lock(&mn->dev->struct_mutex);
+		/* Cancel any active worker and force us to re-evaluate gup */
+		obj->userptr.work = NULL;
+
+		if (obj->pages != NULL) {
+			struct drm_i915_private *dev_priv = to_i915(mn->dev);
+			struct i915_vma *vma, *tmp;
+			bool was_interruptible;
+
+			was_interruptible = dev_priv->mm.interruptible;
+			dev_priv->mm.interruptible = false;
+
+			list_for_each_entry_safe(vma, tmp, &obj->vma_list, vma_link) {
+				int ret = i915_vma_unbind(vma);
+				WARN_ON(ret && ret != -EIO);
+			}
+			WARN_ON(i915_gem_object_put_pages(obj));
+
+			dev_priv->mm.interruptible = was_interruptible;
+		}
+
+		start = obj->userptr.ptr + obj->base.size;
+
+		drm_gem_object_unreference(&obj->base);
+		mutex_unlock(&mn->dev->struct_mutex);
+	}
+}
+
+static const struct mmu_notifier_ops i915_gem_userptr_notifier = {
+	.invalidate_range_start = i915_gem_userptr_mn_invalidate_range_start,
+};
+
+static struct i915_mmu_notifier *
+__i915_mmu_notifier_lookup(struct drm_device *dev, struct mm_struct *mm)
+{
+	struct drm_i915_private *dev_priv = to_i915(dev);
+	struct i915_mmu_notifier *mmu;
+
+	/* Protected by dev->struct_mutex */
+	hash_for_each_possible(dev_priv->mmu_notifiers, mmu, node, (unsigned long)mm)
+		if (mmu->mm == mm)
+			return mmu;
+
+	return NULL;
+}
+
+static struct i915_mmu_notifier *
+i915_mmu_notifier_get(struct drm_device *dev, struct mm_struct *mm)
+{
+	struct drm_i915_private *dev_priv = to_i915(dev);
+	struct i915_mmu_notifier *mmu;
+	int ret;
+
+	lockdep_assert_held(&dev->struct_mutex);
+
+	mmu = __i915_mmu_notifier_lookup(dev, mm);
+	if (mmu)
+		return mmu;
+
+	mmu = kmalloc(sizeof(*mmu), GFP_KERNEL);
+	if (mmu == NULL)
+		return ERR_PTR(-ENOMEM);
+
+	spin_lock_init(&mmu->lock);
+	mmu->dev = dev;
+	mmu->mn.ops = &i915_gem_userptr_notifier;
+	mmu->mm = mm;
+	mmu->objects = RB_ROOT;
+	mmu->count = 0;
+	mmu->serial = 0;
+
+	/* Protected by mmap_sem (write-lock) */
+	ret = __mmu_notifier_register(&mmu->mn, mm);
+	if (ret) {
+		kfree(mmu);
+		return ERR_PTR(ret);
+	}
+
+	/* Protected by dev->struct_mutex */
+	hash_add(dev_priv->mmu_notifiers, &mmu->node, (unsigned long)mm);
+	return mmu;
+}
+
+static void
+__i915_mmu_notifier_destroy_worker(struct work_struct *work)
+{
+	struct i915_mmu_notifier *mmu = container_of(work, typeof(*mmu), work);
+	mmu_notifier_unregister(&mmu->mn, mmu->mm);
+	kfree(mmu);
+}
+
+static void
+__i915_mmu_notifier_destroy(struct i915_mmu_notifier *mmu)
+{
+	lockdep_assert_held(&mmu->dev->struct_mutex);
+
+	/* Protected by dev->struct_mutex */
+	hash_del(&mmu->node);
+
+	/* Our lock ordering is: mmap_sem, mmu_notifier_scru, struct_mutex.
+	 * We enter the function holding struct_mutex, therefore we need
+	 * to drop our mutex prior to calling mmu_notifier_unregister in
+	 * order to prevent lock inversion (and system-wide deadlock)
+	 * between the mmap_sem and struct-mutex. Hence we defer the
+	 * unregistration to a workqueue where we hold no locks.
+	 */
+	INIT_WORK(&mmu->work, __i915_mmu_notifier_destroy_worker);
+	schedule_work(&mmu->work);
+}
+
+static void __i915_mmu_notifier_update_serial(struct i915_mmu_notifier *mmu)
+{
+	if (++mmu->serial == 0)
+		mmu->serial = 1;
+}
+
+static void
+i915_mmu_notifier_del(struct i915_mmu_notifier *mmu,
+		      struct i915_mmu_object *mn)
+{
+	lockdep_assert_held(&mmu->dev->struct_mutex);
+
+	spin_lock(&mmu->lock);
+	interval_tree_remove(&mn->it, &mmu->objects);
+	__i915_mmu_notifier_update_serial(mmu);
+	spin_unlock(&mmu->lock);
+
+	/* Protected against _add() by dev->struct_mutex */
+	if (--mmu->count == 0)
+		__i915_mmu_notifier_destroy(mmu);
+}
+
+static int
+i915_mmu_notifier_add(struct i915_mmu_notifier *mmu,
+		      struct i915_mmu_object *mn)
+{
+	struct interval_tree_node *it;
+	int ret;
+
+	ret = i915_mutex_lock_interruptible(mmu->dev);
+	if (ret)
+		return ret;
+
+	/* Make sure we drop the final active reference (and thereby
+	 * remove the objects from the interval tree) before we do
+	 * the check for overlapping objects.
+	 */
+	i915_gem_retire_requests(mmu->dev);
+
+	/* Disallow overlapping userptr objects */
+	spin_lock(&mmu->lock);
+	it = interval_tree_iter_first(&mmu->objects,
+				      mn->it.start, mn->it.last);
+	if (it) {
+		struct drm_i915_gem_object *obj;
+
+		/* We only need to check the first object in the range as it
+		 * either has cancelled gup work queued and we need to
+		 * return back to the user to give time for the gup-workers
+		 * to flush their object references upon which the object will
+		 * be removed from the interval-tree, or the the range is
+		 * still in use by another client and the overlap is invalid.
+		 */
+
+		obj = container_of(it, struct i915_mmu_object, it)->obj;
+		ret = obj->userptr.workers ? -EAGAIN : -EINVAL;
+	} else {
+		interval_tree_insert(&mn->it, &mmu->objects);
+		__i915_mmu_notifier_update_serial(mmu);
+		ret = 0;
+	}
+	spin_unlock(&mmu->lock);
+	mutex_unlock(&mmu->dev->struct_mutex);
+
+	return ret;
+}
+
+static void
+i915_gem_userptr_release__mmu_notifier(struct drm_i915_gem_object *obj)
+{
+	struct i915_mmu_object *mn;
+
+	mn = obj->userptr.mn;
+	if (mn == NULL)
+		return;
+
+	i915_mmu_notifier_del(mn->mmu, mn);
+	obj->userptr.mn = NULL;
+}
+
+static int
+i915_gem_userptr_init__mmu_notifier(struct drm_i915_gem_object *obj,
+				    unsigned flags)
+{
+	struct i915_mmu_notifier *mmu;
+	struct i915_mmu_object *mn;
+	int ret;
+
+	if (flags & I915_USERPTR_UNSYNCHRONIZED)
+		return capable(CAP_SYS_ADMIN) ? 0 : -EPERM;
+
+	down_write(&obj->userptr.mm->mmap_sem);
+	ret = i915_mutex_lock_interruptible(obj->base.dev);
+	if (ret == 0) {
+		mmu = i915_mmu_notifier_get(obj->base.dev, obj->userptr.mm);
+		if (!IS_ERR(mmu))
+			mmu->count++; /* preemptive add to act as a refcount */
+		else
+			ret = PTR_ERR(mmu);
+		mutex_unlock(&obj->base.dev->struct_mutex);
+	}
+	up_write(&obj->userptr.mm->mmap_sem);
+	if (ret)
+		return ret;
+
+	mn = kzalloc(sizeof(*mn), GFP_KERNEL);
+	if (mn == NULL) {
+		ret = -ENOMEM;
+		goto destroy_mmu;
+	}
+
+	mn->mmu = mmu;
+	mn->it.start = obj->userptr.ptr;
+	mn->it.last = mn->it.start + obj->base.size - 1;
+	mn->obj = obj;
+
+	ret = i915_mmu_notifier_add(mmu, mn);
+	if (ret)
+		goto free_mn;
+
+	obj->userptr.mn = mn;
+	return 0;
+
+free_mn:
+	kfree(mn);
+destroy_mmu:
+	mutex_lock(&obj->base.dev->struct_mutex);
+	if (--mmu->count == 0)
+		__i915_mmu_notifier_destroy(mmu);
+	mutex_unlock(&obj->base.dev->struct_mutex);
+	return ret;
+}
+
+#else
+
+static void
+i915_gem_userptr_release__mmu_notifier(struct drm_i915_gem_object *obj)
+{
+}
+
+static int
+i915_gem_userptr_init__mmu_notifier(struct drm_i915_gem_object *obj,
+				    unsigned flags)
+{
+	if ((flags & I915_USERPTR_UNSYNCHRONIZED) == 0)
+		return -ENODEV;
+
+	if (!capable(CAP_SYS_ADMIN))
+		return -EPERM;
+
+	return 0;
+}
+#endif
+
+struct get_pages_work {
+	struct work_struct work;
+	struct drm_i915_gem_object *obj;
+	struct task_struct *task;
+};
+
+
+#if IS_ENABLED(CONFIG_SWIOTLB)
+#define swiotlb_active() swiotlb_nr_tbl()
+#else
+#define swiotlb_active() 0
+#endif
+
+static int
+st_set_pages(struct sg_table **st, struct page **pvec, int num_pages)
+{
+	struct scatterlist *sg;
+	int ret, n;
+
+	*st = kmalloc(sizeof(**st), GFP_KERNEL);
+	if (*st == NULL)
+		return -ENOMEM;
+
+	if (swiotlb_active()) {
+		ret = sg_alloc_table(*st, num_pages, GFP_KERNEL);
+		if (ret)
+			goto err;
+
+		for_each_sg((*st)->sgl, sg, num_pages, n)
+			sg_set_page(sg, pvec[n], PAGE_SIZE, 0);
+	} else {
+		ret = sg_alloc_table_from_pages(*st, pvec, num_pages,
+						0, num_pages << PAGE_SHIFT,
+						GFP_KERNEL);
+		if (ret)
+			goto err;
+	}
+
+	return 0;
+
+err:
+	kfree(*st);
+	*st = NULL;
+	return ret;
+}
+
+static void
+__i915_gem_userptr_get_pages_worker(struct work_struct *_work)
+{
+	struct get_pages_work *work = container_of(_work, typeof(*work), work);
+	struct drm_i915_gem_object *obj = work->obj;
+	struct drm_device *dev = obj->base.dev;
+	const int num_pages = obj->base.size >> PAGE_SHIFT;
+	struct page **pvec;
+	int pinned, ret;
+
+	ret = -ENOMEM;
+	pinned = 0;
+
+	pvec = kmalloc(num_pages*sizeof(struct page *),
+		       GFP_TEMPORARY | __GFP_NOWARN | __GFP_NORETRY);
+	if (pvec == NULL)
+		pvec = drm_malloc_ab(num_pages, sizeof(struct page *));
+	if (pvec != NULL) {
+		struct mm_struct *mm = obj->userptr.mm;
+
+		down_read(&mm->mmap_sem);
+		while (pinned < num_pages) {
+			ret = get_user_pages(work->task, mm,
+					     obj->userptr.ptr + pinned * PAGE_SIZE,
+					     num_pages - pinned,
+					     !obj->userptr.read_only, 0,
+					     pvec + pinned, NULL);
+			if (ret < 0)
+				break;
+
+			pinned += ret;
+		}
+		up_read(&mm->mmap_sem);
+	}
+
+	mutex_lock(&dev->struct_mutex);
+	if (obj->userptr.work != &work->work) {
+		ret = 0;
+	} else if (pinned == num_pages) {
+		ret = st_set_pages(&obj->pages, pvec, num_pages);
+		if (ret == 0) {
+			list_add_tail(&obj->global_list, &to_i915(dev)->mm.unbound_list);
+			pinned = 0;
+		}
+	}
+
+	obj->userptr.work = ERR_PTR(ret);
+	obj->userptr.workers--;
+	drm_gem_object_unreference(&obj->base);
+	mutex_unlock(&dev->struct_mutex);
+
+	release_pages(pvec, pinned, 0);
+	drm_free_large(pvec);
+
+	put_task_struct(work->task);
+	kfree(work);
+}
+
+static int
+i915_gem_userptr_get_pages(struct drm_i915_gem_object *obj)
+{
+	const int num_pages = obj->base.size >> PAGE_SHIFT;
+	struct page **pvec;
+	int pinned, ret;
+
+	/* If userspace should engineer that these pages are replaced in
+	 * the vma between us binding this page into the GTT and completion
+	 * of rendering... Their loss. If they change the mapping of their
+	 * pages they need to create a new bo to point to the new vma.
+	 *
+	 * However, that still leaves open the possibility of the vma
+	 * being copied upon fork. Which falls under the same userspace
+	 * synchronisation issue as a regular bo, except that this time
+	 * the process may not be expecting that a particular piece of
+	 * memory is tied to the GPU.
+	 *
+	 * Fortunately, we can hook into the mmu_notifier in order to
+	 * discard the page references prior to anything nasty happening
+	 * to the vma (discard or cloning) which should prevent the more
+	 * egregious cases from causing harm.
+	 */
+
+	pvec = NULL;
+	pinned = 0;
+	if (obj->userptr.mm == current->mm) {
+		pvec = kmalloc(num_pages*sizeof(struct page *),
+			       GFP_TEMPORARY | __GFP_NOWARN | __GFP_NORETRY);
+		if (pvec == NULL) {
+			pvec = drm_malloc_ab(num_pages, sizeof(struct page *));
+			if (pvec == NULL)
+				return -ENOMEM;
+		}
+
+		pinned = __get_user_pages_fast(obj->userptr.ptr, num_pages,
+					       !obj->userptr.read_only, pvec);
+	}
+	if (pinned < num_pages) {
+		if (pinned < 0) {
+			ret = pinned;
+			pinned = 0;
+		} else {
+			/* Spawn a worker so that we can acquire the
+			 * user pages without holding our mutex. Access
+			 * to the user pages requires mmap_sem, and we have
+			 * a strict lock ordering of mmap_sem, struct_mutex -
+			 * we already hold struct_mutex here and so cannot
+			 * call gup without encountering a lock inversion.
+			 *
+			 * Userspace will keep on repeating the operation
+			 * (thanks to EAGAIN) until either we hit the fast
+			 * path or the worker completes. If the worker is
+			 * cancelled or superseded, the task is still run
+			 * but the results ignored. (This leads to
+			 * complications that we may have a stray object
+			 * refcount that we need to be wary of when
+			 * checking for existing objects during creation.)
+			 * If the worker encounters an error, it reports
+			 * that error back to this function through
+			 * obj->userptr.work = ERR_PTR.
+			 */
+			ret = -EAGAIN;
+			if (obj->userptr.work == NULL &&
+			    obj->userptr.workers < I915_GEM_USERPTR_MAX_WORKERS) {
+				struct get_pages_work *work;
+
+				work = kmalloc(sizeof(*work), GFP_KERNEL);
+				if (work != NULL) {
+					obj->userptr.work = &work->work;
+					obj->userptr.workers++;
+
+					work->obj = obj;
+					drm_gem_object_reference(&obj->base);
+
+					work->task = current;
+					get_task_struct(work->task);
+
+					INIT_WORK(&work->work, __i915_gem_userptr_get_pages_worker);
+					schedule_work(&work->work);
+				} else
+					ret = -ENOMEM;
+			} else {
+				if (IS_ERR(obj->userptr.work)) {
+					ret = PTR_ERR(obj->userptr.work);
+					obj->userptr.work = NULL;
+				}
+			}
+		}
+	} else {
+		ret = st_set_pages(&obj->pages, pvec, num_pages);
+		if (ret == 0) {
+			obj->userptr.work = NULL;
+			pinned = 0;
+		}
+	}
+
+	release_pages(pvec, pinned, 0);
+	drm_free_large(pvec);
+	return ret;
+}
+
+static void
+i915_gem_userptr_put_pages(struct drm_i915_gem_object *obj)
+{
+	struct scatterlist *sg;
+	int i;
+
+	BUG_ON(obj->userptr.work != NULL);
+
+	if (obj->madv != I915_MADV_WILLNEED)
+		obj->dirty = 0;
+
+	for_each_sg(obj->pages->sgl, sg, obj->pages->nents, i) {
+		struct page *page = sg_page(sg);
+
+		if (obj->dirty)
+			set_page_dirty(page);
+
+		mark_page_accessed(page);
+		page_cache_release(page);
+	}
+	obj->dirty = 0;
+
+	sg_free_table(obj->pages);
+	kfree(obj->pages);
+}
+
+static void
+i915_gem_userptr_release(struct drm_i915_gem_object *obj)
+{
+	i915_gem_userptr_release__mmu_notifier(obj);
+
+	if (obj->userptr.mm) {
+		mmput(obj->userptr.mm);
+		obj->userptr.mm = NULL;
+	}
+}
+
+static int
+i915_gem_userptr_dmabuf_export(struct drm_i915_gem_object *obj)
+{
+	if (obj->userptr.mn)
+		return 0;
+
+	return i915_gem_userptr_init__mmu_notifier(obj, 0);
+}
+
+static const struct drm_i915_gem_object_ops i915_gem_userptr_ops = {
+	.dmabuf_export = i915_gem_userptr_dmabuf_export,
+	.get_pages = i915_gem_userptr_get_pages,
+	.put_pages = i915_gem_userptr_put_pages,
+	.release = i915_gem_userptr_release,
+};
+
+/**
+ * Creates a new mm object that wraps some normal memory from the process
+ * context - user memory.
+ *
+ * We impose several restrictions upon the memory being mapped
+ * into the GPU.
+ * 1. It must be page aligned (both start/end addresses, i.e ptr and size).
+ * 2. It cannot overlap any other userptr object in the same address space.
+ * 3. It must be normal system memory, not a pointer into another map of IO
+ *    space (e.g. it must not be a GTT mmapping of another object).
+ * 4. We only allow a bo as large as we could in theory map into the GTT,
+ *    that is we limit the size to the total size of the GTT.
+ * 5. The bo is marked as being snoopable. The backing pages are left
+ *    accessible directly by the CPU, but reads and writes by the GPU may
+ *    incur the cost of a snoop (unless you have an LLC architecture).
+ *
+ * Synchronisation between multiple users and the GPU is left to userspace
+ * through the normal set-domain-ioctl. The kernel will enforce that the
+ * GPU relinquishes the VMA before it is returned back to the system
+ * i.e. upon free(), munmap() or process termination. However, the userspace
+ * malloc() library may not immediately relinquish the VMA after free() and
+ * instead reuse it whilst the GPU is still reading and writing to the VMA.
+ * Caveat emptor.
+ *
+ * Also note, that the object created here is not currently a "first class"
+ * object, in that several ioctls are banned. These are the CPU access
+ * ioctls: mmap(), pwrite and pread. In practice, you are expected to use
+ * direct access via your pointer rather than use those ioctls.
+ *
+ * If you think this is a good interface to use to pass GPU memory between
+ * drivers, please use dma-buf instead. In fact, wherever possible use
+ * dma-buf instead.
+ */
+int
+i915_gem_userptr_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
+{
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	struct drm_i915_gem_userptr *args = data;
+	struct drm_i915_gem_object *obj;
+	int ret;
+	u32 handle;
+
+	if (args->flags & ~(I915_USERPTR_READ_ONLY |
+			    I915_USERPTR_UNSYNCHRONIZED))
+		return -EINVAL;
+
+	if (offset_in_page(args->user_ptr | args->user_size))
+		return -EINVAL;
+
+	if (args->user_size > dev_priv->gtt.base.total)
+		return -E2BIG;
+
+	if (!access_ok(args->flags & I915_USERPTR_READ_ONLY ? VERIFY_READ : VERIFY_WRITE,
+		       (char __user *)(unsigned long)args->user_ptr, args->user_size))
+		return -EFAULT;
+
+	if (args->flags & I915_USERPTR_READ_ONLY) {
+		/* On almost all of the current hw, we cannot tell the GPU that a
+		 * page is readonly, so this is just a placeholder in the uAPI.
+		 */
+		return -ENODEV;
+	}
+
+	/* Allocate the new object */
+	obj = i915_gem_object_alloc(dev);
+	if (obj == NULL)
+		return -ENOMEM;
+
+	drm_gem_private_object_init(dev, &obj->base, args->user_size);
+	i915_gem_object_init(obj, &i915_gem_userptr_ops);
+	obj->cache_level = I915_CACHE_LLC;
+	obj->base.write_domain = I915_GEM_DOMAIN_CPU;
+	obj->base.read_domains = I915_GEM_DOMAIN_CPU;
+
+	obj->userptr.ptr = args->user_ptr;
+	obj->userptr.read_only = !!(args->flags & I915_USERPTR_READ_ONLY);
+
+	/* And keep a pointer to the current->mm for resolving the user pages
+	 * at binding. This means that we need to hook into the mmu_notifier
+	 * in order to detect if the mmu is destroyed.
+	 */
+	ret = -ENOMEM;
+	if ((obj->userptr.mm = get_task_mm(current)))
+		ret = i915_gem_userptr_init__mmu_notifier(obj, args->flags);
+	if (ret == 0)
+		ret = drm_gem_handle_create(file, &obj->base, &handle);
+
+	/* drop reference from allocate - handle holds it now */
+	drm_gem_object_unreference_unlocked(&obj->base);
+	if (ret)
+		return ret;
+
+	args->handle = handle;
+	return 0;
+}
+
+int
+i915_gem_init_userptr(struct drm_device *dev)
+{
+#if defined(CONFIG_MMU_NOTIFIER)
+	struct drm_i915_private *dev_priv = to_i915(dev);
+	hash_init(dev_priv->mmu_notifiers);
+#endif
+	return 0;
+}
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
index 2d819858c19b..dde1b0c2ff31 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -205,6 +205,7 @@ static void print_error_buffers(struct drm_i915_error_state_buf *m,
 		err_puts(m, tiling_flag(err->tiling));
 		err_puts(m, dirty_flag(err->dirty));
 		err_puts(m, purgeable_flag(err->purgeable));
+		err_puts(m, err->userptr ? " userptr" : "");
 		err_puts(m, err->ring != -1 ? " " : "");
 		err_puts(m, ring_str(err->ring));
 		err_puts(m, i915_cache_level_str(err->cache_level));
@@ -641,6 +642,7 @@ static void capture_bo(struct drm_i915_error_buffer *err,
 	err->tiling = obj->tiling_mode;
 	err->dirty = obj->dirty;
 	err->purgeable = obj->madv != I915_MADV_WILLNEED;
+	err->userptr = obj->userptr.mm != 0;
 	err->ring = obj->ring ? obj->ring->id : -1;
 	err->cache_level = obj->cache_level;
 }
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index 4e0711e72cf8..7df2558d84c9 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -224,6 +224,7 @@ typedef struct _drm_i915_sarea {
 #define DRM_I915_REG_READ		0x31
 #define DRM_I915_GET_RESET_STATS	0x32
 #define DRM_I915_GEM_CREATE2		0x33
+#define DRM_I915_GEM_USERPTR		0x34
 
 #define DRM_IOCTL_I915_INIT		DRM_IOW( DRM_COMMAND_BASE + DRM_I915_INIT, drm_i915_init_t)
 #define DRM_IOCTL_I915_FLUSH		DRM_IO ( DRM_COMMAND_BASE + DRM_I915_FLUSH)
@@ -275,6 +276,7 @@ typedef struct _drm_i915_sarea {
 #define DRM_IOCTL_I915_GEM_CONTEXT_DESTROY	DRM_IOW (DRM_COMMAND_BASE + DRM_I915_GEM_CONTEXT_DESTROY, struct drm_i915_gem_context_destroy)
 #define DRM_IOCTL_I915_REG_READ			DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_REG_READ, struct drm_i915_reg_read)
 #define DRM_IOCTL_I915_GET_RESET_STATS		DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_GET_RESET_STATS, struct drm_i915_reset_stats)
+#define DRM_IOCTL_I915_GEM_USERPTR			DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_GEM_USERPTR, struct drm_i915_gem_userptr)
 
 /* Allow drivers to submit batchbuffers directly to hardware, relying
  * on the security mechanisms provided by hardware.
@@ -1158,4 +1160,18 @@ struct drm_i915_reset_stats {
 	__u32 pad;
 };
 
+struct drm_i915_gem_userptr {
+	__u64 user_ptr;
+	__u64 user_size;
+	__u32 flags;
+#define I915_USERPTR_READ_ONLY 0x1
+#define I915_USERPTR_UNSYNCHRONIZED 0x80000000
+	/**
+	 * Returned handle for the object.
+	 *
+	 * Object handles are nonzero.
+	 */
+	__u32 handle;
+};
+
 #endif /* _UAPI_I915_DRM_H_ */
-- 
2.0.0.rc2

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 34+ messages in thread

* Re: [PATCH] drm/i915: Introduce mapping of user pages into video memory (userptr) ioctl
  2014-01-27 17:56     ` Volkin, Bradley D
@ 2014-01-27 18:09       ` Chris Wilson
  0 siblings, 0 replies; 34+ messages in thread
From: Chris Wilson @ 2014-01-27 18:09 UTC (permalink / raw)
  To: Volkin, Bradley D; +Cc: intel-gfx, Goel, Akash

On Mon, Jan 27, 2014 at 09:56:12AM -0800, Volkin, Bradley D wrote:
> > +static void
> > +i915_mmu_notifier_del(struct i915_mmu_notifier *mmu,
> > +		      struct i915_mmu_object *mn)
> > +{
> > +	bool destroy;
> > +
> > +	spin_lock(&mmu->lock);
> > +	interval_tree_remove(&mn->it, &mmu->objects);
> > +	destroy = --mmu->count == 0;
> > +	__i915_mmu_notifier_update_serial(mmu);
> > +	spin_unlock(&mmu->lock);
> > +
> > +	if (destroy) /* protected against _add() by struct_mutex */
> > +		__i915_mmu_notifier_destroy(mmu);
> 
> I see that we should hold struct_mutex when this function is called,
> but I don't see that we try to get the mutex anywhere on the _add() path.
> Have I missed something?

No, it's fixed in a later patch. I assumed I had the lock taken in the
outermost ioctl routine.

> > +free_mn:
> > +	kfree(mn);
> > +destroy_mmu:
> > +	if (mmu->count == 0)
> > +		__i915_mmu_notifier_destroy(mmu);
> 
> Other accesses to mmu->count are protected by mmu->lock. Again, I may have missed
> something but don't immediately see why that's not required.

mmu->count is protected by struct_mutex. See above.

> > +	if (pinned < num_pages) {
> > +		if (pinned < 0) {
> > +			ret = pinned;
> > +			pinned = 0;
> > +		} else {
> > +			/* Spawn a worker so that we can acquire the
> > +			 * user pages without holding our mutex.
> > +			 */
> > +			ret = -EAGAIN;
> > +			if (obj->userptr.work == NULL) {
> > +				struct get_pages_work *work;
> > +
> > +				work = kmalloc(sizeof(*work), GFP_KERNEL);
> > +				if (work != NULL) {
> > +					obj->userptr.work = &work->work;
> > +
> > +					work->obj = obj;
> > +					drm_gem_object_reference(&obj->base);
> > +
> > +					work->task = current;
> > +					get_task_struct(work->task);
> > +
> > +					INIT_WORK(&work->work, __i915_gem_userptr_get_pages_worker);
> > +					schedule_work(&work->work);
> 
> Any reason to use the system wq instead of the driver wq here?
> It doesn't look like it's the usual "takes modeset locks" justification.

Performance. Using the driver workqueue would serialise the clients, but
using the system workqueue we can do the pagefaulting in parallel.
> 
> > +				} else
> > +					ret = -ENOMEM;
> > +			} else {
> > +				if (IS_ERR(obj->userptr.work)) {
> 
> } else if (...) { ?

No, I think it is clearer as is.
 
> > +					ret = PTR_ERR(obj->userptr.work);
> > +					obj->userptr.work = NULL;
> > +				}
> > +			}
> > +		}
> > +	} else {
> > +		struct sg_table *st;
> > +
> > +		st = kmalloc(sizeof(*st), GFP_KERNEL);
> > +		if (st == NULL || sg_alloc_table(st, num_pages, GFP_KERNEL)) {
> > +			kfree(st);
> > +			ret = -ENOMEM;
> > +		} else {
> > +			struct scatterlist *sg;
> > +			int n;
> > +
> > +			for_each_sg(st->sgl, sg, num_pages, n)
> > +				sg_set_page(sg, pvec[n], PAGE_SIZE, 0);
> > +
> > +			obj->pages = st;
> > +			obj->userptr.work = NULL;
> > +
> > +			pinned = 0;
> > +			ret = 0;
> > +		}
> 
> This block is almost identical to code in the worker. Would it be worth extracting
> the common parts into a helper?

Almost, but subtly and importantly different. Only the loop was the same
at which point I didn't feel like the saving was significant. It is now
even less similar.
 
> > +	}
> > +
> > +	release_pages(pvec, pinned, 0);
> > +	drm_free_large(pvec);
> > +	return ret;
> > +}
> > +
> > +static void
> > +i915_gem_userptr_put_pages(struct drm_i915_gem_object *obj)
> > +{
> > +	struct scatterlist *sg;
> > +	int i;
> > +
> > +	if (obj->madv != I915_MADV_WILLNEED)
> > +		obj->dirty = 0;
> 
> This is subtly different than similar code in the standard put_pages() in that
> it sets dirty=0 for both DONTNEED and PURGED instead of just DONTNEED (w/ BUG_ON(PURGED)).
> I don't think we will ever actually truncate userptr objects, so is there any reason for
> this to be different?

No, there's no reason for the difference. Semantically it is the same, of
course.

> > +/**
> > + * Creates a new mm object that wraps some normal memory from the process
> > + * context - user memory.
> > + *
> > + * We impose several restrictions upon the memory being mapped
> > + * into the GPU.
> > + * 1. It must be page aligned (both start/end addresses, i.e ptr and size).
> > + * 2. We only allow a bo as large as we could in theory map into the GTT,
> > + *    that is we limit the size to the total size of the GTT.
> > + * 3. The bo is marked as being snoopable. The backing pages are left
> > + *    accessible directly by the CPU, but reads by the GPU may incur the cost
> > + *    of a snoop (unless you have an LLC architecture).
> 
> No overlapping ranges

Yes, that is an important addition.
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH] drm/i915: Introduce mapping of user pages into video memory (userptr) ioctl
  2014-01-24  9:00   ` Chris Wilson
@ 2014-01-27 17:56     ` Volkin, Bradley D
  2014-01-27 18:09       ` Chris Wilson
  0 siblings, 1 reply; 34+ messages in thread
From: Volkin, Bradley D @ 2014-01-27 17:56 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx, Goel, Akash

Hi Chris,

A few questions/comments throughout. I may be off the mark on some. Please
bear with me as I try to get more familiar with the gem code.

Thanks,
Brad

[ snip ]

On Fri, Jan 24, 2014 at 01:00:19AM -0800, Chris Wilson wrote:
> +static void
> +__i915_mmu_notifier_destroy_worker(struct work_struct *work)
> +{
> +	struct i915_mmu_notifier *mmu = container_of(work, typeof(*mmu), work);
> +	mmu_notifier_unregister(&mmu->mn, mmu->mm);
> +	kfree(mmu);
> +}
> +
> +static void
> +__i915_mmu_notifier_destroy(struct i915_mmu_notifier *mmu)
> +{
> +	hash_del(&mmu->node);
> +	INIT_WORK(&mmu->work, __i915_mmu_notifier_destroy_worker);
> +	schedule_work(&mmu->work);

The commit message mentions a potential lock inversion as the reason for using a wq.
A comment with the details might be helpful.

> +}
> +
> +static void __i915_mmu_notifier_update_serial(struct i915_mmu_notifier *mmu)
> +{
> +	if (++mmu->serial == 0)
> +		mmu->serial = 1;
> +}
> +
> +static void
> +i915_mmu_notifier_del(struct i915_mmu_notifier *mmu,
> +		      struct i915_mmu_object *mn)
> +{
> +	bool destroy;
> +
> +	spin_lock(&mmu->lock);
> +	interval_tree_remove(&mn->it, &mmu->objects);
> +	destroy = --mmu->count == 0;
> +	__i915_mmu_notifier_update_serial(mmu);
> +	spin_unlock(&mmu->lock);
> +
> +	if (destroy) /* protected against _add() by struct_mutex */
> +		__i915_mmu_notifier_destroy(mmu);

I see that we should hold struct_mutex when this function is called,
but I don't see that we try to get the mutex anywhere on the _add() path.
Have I missed something?

> +}
> +
> +static int
> +i915_mmu_notifier_add(struct i915_mmu_notifier *mmu,
> +		      struct i915_mmu_object *mn)
> +{
> +	int ret = -EINVAL;
> +
> +	spin_lock(&mmu->lock);
> +	/* Disallow overlapping userptr objects */
> +	if (!interval_tree_iter_first(&mmu->objects,
> +				      mn->it.start, mn->it.last)) {
> +		interval_tree_insert(&mn->it, &mmu->objects);
> +		mmu->count++;
> +		__i915_mmu_notifier_update_serial(mmu);
> +		ret = 0;
> +	}
> +	spin_unlock(&mmu->lock);
> +
> +	return ret;
> +}
> +
> +static void
> +i915_gem_userptr_release__mmu_notifier(struct drm_i915_gem_object *obj)
> +{
> +	struct i915_mmu_object *mn;
> +
> +	mn = obj->userptr.mn;
> +	if (mn == NULL)
> +		return;
> +
> +	i915_mmu_notifier_del(mn->mmu, mn);
> +	obj->userptr.mn = NULL;
> +}
> +
> +static int
> +i915_gem_userptr_init__mmu_notifier(struct drm_i915_gem_object *obj,
> +				    unsigned flags)
> +{
> +	struct i915_mmu_notifier *mmu;
> +	struct i915_mmu_object *mn;
> +	int ret;
> +
> +	if (flags & I915_USERPTR_UNSYNCHRONIZED)
> +		return capable(CAP_SYS_ADMIN) ? 0 : -EPERM;
> +
> +	mmu = i915_mmu_notifier_get(obj->base.dev, obj->userptr.mm);
> +	if (IS_ERR(mmu))
> +		return PTR_ERR(mmu);
> +
> +	mn = kzalloc(sizeof(*mn), GFP_KERNEL);
> +	if (mn == NULL) {
> +		ret = -ENOMEM;
> +		goto destroy_mmu;
> +	}
> +
> +	mn->mmu = mmu;
> +	mn->it.start = obj->userptr.ptr;
> +	mn->it.last = mn->it.start + obj->base.size - 1;
> +	mn->obj = obj;
> +
> +	ret = i915_mmu_notifier_add(mmu, mn);
> +	if (ret)
> +		goto free_mn;
> +
> +	obj->userptr.mn = mn;
> +	return 0;
> +
> +free_mn:
> +	kfree(mn);
> +destroy_mmu:
> +	if (mmu->count == 0)
> +		__i915_mmu_notifier_destroy(mmu);

Other accesses to mmu->count are protected by mmu->lock. Again, I may have missed
something but don't immediately see why that's not required.

> +	return ret;
> +}
> +
> +#else
> +
> +static void
> +i915_gem_userptr_release__mmu_notifier(struct drm_i915_gem_object *obj)
> +{
> +}
> +
> +static int
> +i915_gem_userptr_init__mmu_notifier(struct drm_i915_gem_object *obj,
> +				    unsigned flags)
> +{
> +	if ((flags & I915_USERPTR_UNSYNCHRONIZED) == 0)
> +		return -ENODEV;
> +
> +	if (!capable(CAP_SYS_ADMIN))
> +		return -EPERM;
> +
> +	return 0;
> +}
> +#endif
> +
> +struct get_pages_work {
> +	struct work_struct work;
> +	struct drm_i915_gem_object *obj;
> +	struct task_struct *task;
> +};
> +
> +static void
> +__i915_gem_userptr_get_pages_worker(struct work_struct *_work)
> +{
> +	struct get_pages_work *work = container_of(_work, typeof(*work), work);
> +	struct drm_i915_gem_object *obj = work->obj;
> +	struct drm_device *dev = obj->base.dev;
> +	const int num_pages = obj->base.size >> PAGE_SHIFT;
> +	struct page **pvec;
> +	int pinned, ret;
> +
> +	ret = -ENOMEM;
> +	pinned = 0;
> +
> +	pvec = kmalloc(num_pages*sizeof(struct page *),
> +		       GFP_TEMPORARY | __GFP_NOWARN | __GFP_NORETRY);
> +	if (pvec == NULL)
> +		pvec = drm_malloc_ab(num_pages, sizeof(struct page *));
> +	if (pvec != NULL) {
> +		struct mm_struct *mm = obj->userptr.mm;
> +
> +		use_mm(mm);
> +		down_read(&mm->mmap_sem);
> +		while (pinned < num_pages) {
> +			ret = get_user_pages(work->task, mm,
> +					     obj->userptr.ptr + pinned * PAGE_SIZE,
> +					     num_pages - pinned,
> +					     !obj->userptr.read_only, 0,
> +					     pvec + pinned, NULL);
> +			if (ret < 0)
> +				break;
> +
> +			pinned += ret;
> +		}
> +		up_read(&mm->mmap_sem);
> +		unuse_mm(mm);
> +	}
> +
> +	mutex_lock(&dev->struct_mutex);
> +	if (obj->userptr.work != &work->work) {
> +		ret = 0;
> +	} else if (pinned == num_pages) {
> +		struct sg_table *st;
> +
> +		st = kmalloc(sizeof(*st), GFP_KERNEL);
> +		if (st == NULL || sg_alloc_table(st, num_pages, GFP_KERNEL)) {
> +			kfree(st);
> +			ret = -ENOMEM;
> +		} else {
> +			struct scatterlist *sg;
> +			int n;
> +
> +			for_each_sg(st->sgl, sg, num_pages, n)
> +				sg_set_page(sg, pvec[n], PAGE_SIZE, 0);
> +
> +			obj->pages = st;
> +			list_add_tail(&obj->global_list, &to_i915(dev)->mm.unbound_list);
> +			pinned = 0;
> +			ret = 0;
> +		}
> +	}
> +
> +	obj->userptr.work = ERR_PTR(ret);
> +	drm_gem_object_unreference(&obj->base);
> +	mutex_unlock(&dev->struct_mutex);
> +
> +	release_pages(pvec, pinned, 0);
> +	drm_free_large(pvec);
> +
> +	put_task_struct(work->task);
> +	kfree(work);
> +}
> +
> +static int
> +i915_gem_userptr_get_pages(struct drm_i915_gem_object *obj)
> +{
> +	const int num_pages = obj->base.size >> PAGE_SHIFT;
> +	struct page **pvec;
> +	int pinned, ret;
> +
> +	/* If userspace should engineer that these pages are replaced in
> +	 * the vma between us binding this page into the GTT and completion
> +	 * of rendering... Their loss. If they change the mapping of their
> +	 * pages they need to create a new bo to point to the new vma.
> +	 *
> +	 * However, that still leaves open the possibility of the vma
> +	 * being copied upon fork. Which falls under the same userspace
> +	 * synchronisation issue as a regular bo, except that this time
> +	 * the process may not be expecting that a particular piece of
> +	 * memory is tied to the GPU.
> +	 *
> +	 * Fortunately, we can hook into the mmu_notifier in order to
> +	 * discard the page references prior to anything nasty happening
> +	 * to the vma (discard or cloning) which should prevent the more
> +	 * egregious cases from causing harm.
> +	 */
> +
> +	pvec = NULL;
> +	pinned = 0;
> +	if (obj->userptr.mm == current->mm) {
> +		pvec = kmalloc(num_pages*sizeof(struct page *),
> +			       GFP_TEMPORARY | __GFP_NOWARN | __GFP_NORETRY);
> +		if (pvec == NULL) {
> +			pvec = drm_malloc_ab(num_pages, sizeof(struct page *));
> +			if (pvec == NULL)
> +				return -ENOMEM;
> +		}
> +
> +		pinned = __get_user_pages_fast(obj->userptr.ptr, num_pages,
> +					       !obj->userptr.read_only, pvec);
> +	}
> +	if (pinned < num_pages) {
> +		if (pinned < 0) {
> +			ret = pinned;
> +			pinned = 0;
> +		} else {
> +			/* Spawn a worker so that we can acquire the
> +			 * user pages without holding our mutex.
> +			 */
> +			ret = -EAGAIN;
> +			if (obj->userptr.work == NULL) {
> +				struct get_pages_work *work;
> +
> +				work = kmalloc(sizeof(*work), GFP_KERNEL);
> +				if (work != NULL) {
> +					obj->userptr.work = &work->work;
> +
> +					work->obj = obj;
> +					drm_gem_object_reference(&obj->base);
> +
> +					work->task = current;
> +					get_task_struct(work->task);
> +
> +					INIT_WORK(&work->work, __i915_gem_userptr_get_pages_worker);
> +					schedule_work(&work->work);

Any reason to use the system wq instead of the driver wq here?
It doesn't look like it's the usual "takes modeset locks" justification.

> +				} else
> +					ret = -ENOMEM;
> +			} else {
> +				if (IS_ERR(obj->userptr.work)) {

} else if (...) { ?

> +					ret = PTR_ERR(obj->userptr.work);
> +					obj->userptr.work = NULL;
> +				}
> +			}
> +		}
> +	} else {
> +		struct sg_table *st;
> +
> +		st = kmalloc(sizeof(*st), GFP_KERNEL);
> +		if (st == NULL || sg_alloc_table(st, num_pages, GFP_KERNEL)) {
> +			kfree(st);
> +			ret = -ENOMEM;
> +		} else {
> +			struct scatterlist *sg;
> +			int n;
> +
> +			for_each_sg(st->sgl, sg, num_pages, n)
> +				sg_set_page(sg, pvec[n], PAGE_SIZE, 0);
> +
> +			obj->pages = st;
> +			obj->userptr.work = NULL;
> +
> +			pinned = 0;
> +			ret = 0;
> +		}

This block is almost identical to code in the worker. Would it be worth extracting
the common parts into a helper?

> +	}
> +
> +	release_pages(pvec, pinned, 0);
> +	drm_free_large(pvec);
> +	return ret;
> +}
> +
> +static void
> +i915_gem_userptr_put_pages(struct drm_i915_gem_object *obj)
> +{
> +	struct scatterlist *sg;
> +	int i;
> +
> +	if (obj->madv != I915_MADV_WILLNEED)
> +		obj->dirty = 0;

This is subtly different than similar code in the standard put_pages() in that
it sets dirty=0 for both DONTNEED and PURGED instead of just DONTNEED (w/ BUG_ON(PURGED)).
I don't think we will ever actually truncate userptr objects, so is there any reason for
this to be different?

> +
> +	for_each_sg(obj->pages->sgl, sg, obj->pages->nents, i) {
> +		struct page *page = sg_page(sg);
> +
> +		if (obj->dirty)
> +			set_page_dirty(page);
> +
> +		mark_page_accessed(page);
> +		page_cache_release(page);
> +	}
> +	obj->dirty = 0;
> +
> +	sg_free_table(obj->pages);
> +	kfree(obj->pages);
> +
> +	BUG_ON(obj->userptr.work != NULL);
> +}
> +
> +static void
> +i915_gem_userptr_release(struct drm_i915_gem_object *obj)
> +{
> +	i915_gem_userptr_release__mmu_notifier(obj);
> +
> +	if (obj->userptr.mm) {
> +		mmput(obj->userptr.mm);
> +		obj->userptr.mm = NULL;
> +	}
> +}
> +
> +static const struct drm_i915_gem_object_ops i915_gem_userptr_ops = {
> +	.get_pages = i915_gem_userptr_get_pages,
> +	.put_pages = i915_gem_userptr_put_pages,
> +	.release = i915_gem_userptr_release,
> +};
> +
> +/**
> + * Creates a new mm object that wraps some normal memory from the process
> + * context - user memory.
> + *
> + * We impose several restrictions upon the memory being mapped
> + * into the GPU.
> + * 1. It must be page aligned (both start/end addresses, i.e ptr and size).
> + * 2. We only allow a bo as large as we could in theory map into the GTT,
> + *    that is we limit the size to the total size of the GTT.
> + * 3. The bo is marked as being snoopable. The backing pages are left
> + *    accessible directly by the CPU, but reads by the GPU may incur the cost
> + *    of a snoop (unless you have an LLC architecture).

No overlapping ranges

- Brad

> + *
> + * Synchronisation between multiple users and the GPU is left to userspace
> + * through the normal set-domain-ioctl. The kernel will enforce that the
> + * GPU relinquishes the VMA before it is returned back to the system
> + * i.e. upon free(), munmap() or process termination. However, the userspace
> + * malloc() library may not immediately relinquish the VMA after free() and
> + * instead reuse it whilst the GPU is still reading and writing to the VMA.
> + * Caveat emptor.
> + *
> + * Also note, that the object created here is not currently a "first class"
> + * object, in that several ioctls are banned. These are the CPU access
> + * ioctls: mmap(), pwrite and pread. In practice, you are expected to use
> + * direct access via your pointer rather than use those ioctls.
> + *
> + * If you think this is a good interface to use to pass GPU memory between
> + * drivers, please use dma-buf instead. In fact, wherever possible use
> + * dma-buf instead.
> + */
> +int
> +i915_gem_userptr_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
> +{
> +	struct drm_i915_private *dev_priv = dev->dev_private;
> +	struct drm_i915_gem_userptr *args = data;
> +	struct drm_i915_gem_object *obj;
> +	int ret;
> +	u32 handle;
> +
> +	if (args->flags & ~(I915_USERPTR_READ_ONLY |
> +			    I915_USERPTR_UNSYNCHRONIZED))
> +		return -EINVAL;
> +
> +	if (offset_in_page(args->user_ptr | args->user_size))
> +		return -EINVAL;
> +
> +	if (args->user_size > dev_priv->gtt.base.total)
> +		return -E2BIG;
> +
> +	if (!access_ok(args->flags & I915_USERPTR_READ_ONLY ? VERIFY_READ : VERIFY_WRITE,
> +		       (char __user *)(unsigned long)args->user_ptr, args->user_size))
> +		return -EFAULT;
> +
> +	/* Allocate the new object */
> +	obj = i915_gem_object_alloc(dev);
> +	if (obj == NULL)
> +		return -ENOMEM;
> +
> +	drm_gem_private_object_init(dev, &obj->base, args->user_size);
> +	i915_gem_object_init(obj, &i915_gem_userptr_ops);
> +	obj->cache_level = I915_CACHE_LLC;
> +	obj->base.write_domain = I915_GEM_DOMAIN_CPU;
> +	obj->base.read_domains = I915_GEM_DOMAIN_CPU;
> +
> +	obj->userptr.ptr = args->user_ptr;
> +	obj->userptr.read_only = !!(args->flags & I915_USERPTR_READ_ONLY);
> +
> +	/* And keep a pointer to the current->mm for resolving the user pages
> +	 * at binding. This means that we need to hook into the mmu_notifier
> +	 * in order to detect if the mmu is destroyed.
> +	 */
> +	ret = -ENOMEM;
> +	if ((obj->userptr.mm = get_task_mm(current)))
> +		ret = i915_gem_userptr_init__mmu_notifier(obj, args->flags);
> +	if (ret == 0)
> +		ret = drm_gem_handle_create(file, &obj->base, &handle);
> +
> +	/* drop reference from allocate - handle holds it now */
> +	drm_gem_object_unreference_unlocked(&obj->base);
> +	if (ret)
> +		return ret;
> +
> +	args->handle = handle;
> +	return 0;
> +}
> +
> +int
> +i915_gem_init_userptr(struct drm_device *dev)
> +{
> +#if defined(CONFIG_MMU_NOTIFIER)
> +	struct drm_i915_private *dev_priv = to_i915(dev);
> +	hash_init(dev_priv->mmu_notifiers);
> +#endif
> +	return 0;
> +}
> diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
> index ae8cf61b8ce3..cce9f559e3d7 100644
> --- a/drivers/gpu/drm/i915/i915_gpu_error.c
> +++ b/drivers/gpu/drm/i915/i915_gpu_error.c
> @@ -201,6 +201,7 @@ static void print_error_buffers(struct drm_i915_error_state_buf *m,
>  		err_puts(m, tiling_flag(err->tiling));
>  		err_puts(m, dirty_flag(err->dirty));
>  		err_puts(m, purgeable_flag(err->purgeable));
> +		err_puts(m, err->userptr ? " userptr" : "");
>  		err_puts(m, err->ring != -1 ? " " : "");
>  		err_puts(m, ring_str(err->ring));
>  		err_puts(m, i915_cache_level_str(err->cache_level));
> @@ -584,6 +585,7 @@ static void capture_bo(struct drm_i915_error_buffer *err,
>  	err->tiling = obj->tiling_mode;
>  	err->dirty = obj->dirty;
>  	err->purgeable = obj->madv != I915_MADV_WILLNEED;
> +	err->userptr = obj->userptr.mm != 0;
>  	err->ring = obj->ring ? obj->ring->id : -1;
>  	err->cache_level = obj->cache_level;
>  }
> diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
> index 37c8073a8246..6c145a0be250 100644
> --- a/include/uapi/drm/i915_drm.h
> +++ b/include/uapi/drm/i915_drm.h
> @@ -224,6 +224,7 @@ typedef struct _drm_i915_sarea {
>  #define DRM_I915_REG_READ		0x31
>  #define DRM_I915_GET_RESET_STATS	0x32
>  #define DRM_I915_GEM_CREATE2		0x33
> +#define DRM_I915_GEM_USERPTR		0x34
>  
>  #define DRM_IOCTL_I915_INIT		DRM_IOW( DRM_COMMAND_BASE + DRM_I915_INIT, drm_i915_init_t)
>  #define DRM_IOCTL_I915_FLUSH		DRM_IO ( DRM_COMMAND_BASE + DRM_I915_FLUSH)
> @@ -275,6 +276,7 @@ typedef struct _drm_i915_sarea {
>  #define DRM_IOCTL_I915_GEM_CONTEXT_DESTROY	DRM_IOW (DRM_COMMAND_BASE + DRM_I915_GEM_CONTEXT_DESTROY, struct drm_i915_gem_context_destroy)
>  #define DRM_IOCTL_I915_REG_READ			DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_REG_READ, struct drm_i915_reg_read)
>  #define DRM_IOCTL_I915_GET_RESET_STATS		DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_GET_RESET_STATS, struct drm_i915_reset_stats)
> +#define DRM_IOCTL_I915_GEM_USERPTR			DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_GEM_USERPTR, struct drm_i915_gem_userptr)
>  
>  /* Allow drivers to submit batchbuffers directly to hardware, relying
>   * on the security mechanisms provided by hardware.
> @@ -1129,4 +1131,18 @@ struct drm_i915_reset_stats {
>  	__u32 pad;
>  };
>  
> +struct drm_i915_gem_userptr {
> +	__u64 user_ptr;
> +	__u64 user_size;
> +	__u32 flags;
> +#define I915_USERPTR_READ_ONLY 0x1
> +#define I915_USERPTR_UNSYNCHRONIZED 0x80000000
> +	/**
> +	 * Returned handle for the object.
> +	 *
> +	 * Object handles are nonzero.
> +	 */
> +	__u32 handle;
> +};
> +
>  #endif /* _UAPI_I915_DRM_H_ */
> -- 
> 1.8.5.3
> 
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/intel-gfx
> 

^ permalink raw reply	[flat|nested] 34+ messages in thread

* [PATCH] drm/i915: Introduce mapping of user pages into video memory (userptr) ioctl
  2014-01-22  9:46 ` [PATCH] " Chris Wilson
@ 2014-01-24  9:00   ` Chris Wilson
  2014-01-27 17:56     ` Volkin, Bradley D
  0 siblings, 1 reply; 34+ messages in thread
From: Chris Wilson @ 2014-01-24  9:00 UTC (permalink / raw)
  To: intel-gfx; +Cc: Akash Goel

By exporting the ability to map user address and inserting PTEs
representing their backing pages into the GTT, we can exploit UMA in order
to utilize normal application data as a texture source or even as a
render target (depending upon the capabilities of the chipset). This has
a number of uses, with zero-copy downloads to the GPU and efficient
readback making the intermixed streaming of CPU and GPU operations
fairly efficient. This ability has many widespread implications from
faster rendering of client-side software rasterisers (chromium),
mitigation of stalls due to read back (firefox) and to faster pipelining
of texture data (such as pixel buffer objects in GL or data blobs in CL).

v2: Compile with CONFIG_MMU_NOTIFIER
v3: We can sleep while performing invalidate-range, which we can utilise
to drop our page references prior to the kernel manipulating the vma
(for either discard or cloning) and so protect normal users.
v4: Only run the invalidate notifier if the range intercepts the bo.
v5: Prevent userspace from attempting to GTT mmap non-page aligned buffers
v6: Recheck after reacquire mutex for lost mmu.
v7: Fix implicit padding of ioctl struct by rounding to next 64bit boundary.
v8: Fix rebasing error after forwarding porting the back port.
v9: Limit the userptr to page aligned entries. We now expect userspace
    to handle all the offset-in-page adjustments itself.
v10: Prevent vma from being copied across fork to avoid issues with cow.
v11: Drop vma behaviour changes -- locking is nigh on impossible.
     Use a worker to load user pages to avoid lock inversions.
v12: Use get_task_mm()/mmput() for correct refcounting of mm.
v13: Use a worker to release the mmu_notifier to avoid lock inversion
v14: Decouple mmu_notifier from struct_mutex using a custom mmu_notifer
     with its own locking and tree of objects for each mm/mmu_notifier.
v15: Prevent overlapping userptr objects, and invalidate all objects
     within the mmu_notifier range
v16: Fix a typo for iterating over multiple objects in the range and
     rearrange error path to destroy the mmu_notifier locklessly.
     Also close a race between invalidate_range and the get_pages_worker.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com>
Cc: "Gong, Zhipeng" <zhipeng.gong@intel.com>
Cc: Akash Goel <akash.goel@intel.com>
---
 drivers/gpu/drm/i915/Makefile           |   1 +
 drivers/gpu/drm/i915/i915_dma.c         |   1 +
 drivers/gpu/drm/i915/i915_drv.h         |  24 +-
 drivers/gpu/drm/i915/i915_gem.c         |   4 +
 drivers/gpu/drm/i915/i915_gem_userptr.c | 608 ++++++++++++++++++++++++++++++++
 drivers/gpu/drm/i915/i915_gpu_error.c   |   2 +
 include/uapi/drm/i915_drm.h             |  16 +
 7 files changed, 655 insertions(+), 1 deletion(-)
 create mode 100644 drivers/gpu/drm/i915/i915_gem_userptr.c

diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
index 45071d18c730..1dc14e51f3bd 100644
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -15,6 +15,7 @@ i915-y := i915_drv.o i915_dma.o i915_irq.o \
 	  i915_gem_gtt.o \
 	  i915_gem_stolen.o \
 	  i915_gem_tiling.o \
+	  i915_gem_userptr.o \
 	  i915_sysfs.o \
 	  i915_trace_points.o \
 	  i915_ums.o \
diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
index f053d1099d3b..c34f2e7ae6d9 100644
--- a/drivers/gpu/drm/i915/i915_dma.c
+++ b/drivers/gpu/drm/i915/i915_dma.c
@@ -1919,6 +1919,7 @@ const struct drm_ioctl_desc i915_ioctls[] = {
 	DRM_IOCTL_DEF_DRV(I915_REG_READ, i915_reg_read_ioctl, DRM_UNLOCKED|DRM_RENDER_ALLOW),
 	DRM_IOCTL_DEF_DRV(I915_GET_RESET_STATS, i915_get_reset_stats_ioctl, DRM_UNLOCKED|DRM_RENDER_ALLOW),
 	DRM_IOCTL_DEF_DRV(I915_GEM_CREATE2, i915_gem_create2_ioctl, DRM_UNLOCKED|DRM_RENDER_ALLOW),
+	DRM_IOCTL_DEF_DRV(I915_GEM_USERPTR, i915_gem_userptr_ioctl, DRM_UNLOCKED|DRM_RENDER_ALLOW),
 };
 
 int i915_max_ioctl = DRM_ARRAY_SIZE(i915_ioctls);
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 73cff1119247..dac8aeecfa3e 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -40,6 +40,7 @@
 #include <linux/i2c-algo-bit.h>
 #include <drm/intel-gtt.h>
 #include <linux/backlight.h>
+#include <linux/hashtable.h>
 #include <linux/intel-iommu.h>
 #include <linux/kref.h>
 #include <linux/pm_qos.h>
@@ -163,6 +164,7 @@ enum hpd_pin {
 		if ((intel_encoder)->base.crtc == (__crtc))
 
 struct drm_i915_private;
+struct i915_mmu_notifier;
 
 enum intel_dpll_id {
 	DPLL_ID_PRIVATE = -1, /* non-shared dpll in use */
@@ -354,6 +356,7 @@ struct drm_i915_error_state {
 		u32 tiling:2;
 		u32 dirty:1;
 		u32 purgeable:1;
+		u32 userptr:1;
 		s32 ring:4;
 		u32 cache_level:3;
 	} **active_bo, **pinned_bo;
@@ -1463,6 +1466,9 @@ typedef struct drm_i915_private {
 	struct i915_gtt gtt; /* VMA representing the global address space */
 
 	struct i915_gem_mm mm;
+#if defined(CONFIG_MMU_NOTIFIER)
+	DECLARE_HASHTABLE(mmu_notifiers, 7);
+#endif
 
 	/* Kernel Modesetting */
 
@@ -1594,6 +1600,7 @@ struct drm_i915_gem_object_ops {
 	 */
 	int (*get_pages)(struct drm_i915_gem_object *);
 	void (*put_pages)(struct drm_i915_gem_object *);
+	void (*release)(struct drm_i915_gem_object *);
 };
 
 struct drm_i915_gem_object {
@@ -1707,9 +1714,21 @@ struct drm_i915_gem_object {
 
 	/** for phy allocated objects */
 	struct drm_i915_gem_phys_object *phys_obj;
+
+	union {
+		struct i915_gem_userptr {
+			uintptr_t ptr;
+			unsigned read_only :1;
+
+			struct mm_struct *mm;
+#if defined(CONFIG_MMU_NOTIFIER)
+			struct i915_mmu_object *mn;
+#endif
+			struct work_struct *work;
+		} userptr;
+	};
 };
 #define to_gem_object(obj) (&((struct drm_i915_gem_object *)(obj))->base)
-
 #define to_intel_bo(x) container_of(x, struct drm_i915_gem_object, base)
 
 /**
@@ -2010,6 +2029,9 @@ int i915_gem_set_tiling_ioctl(struct drm_device *dev, void *data,
 			      struct drm_file *file_priv);
 int i915_gem_get_tiling_ioctl(struct drm_device *dev, void *data,
 			      struct drm_file *file_priv);
+int i915_gem_init_userptr(struct drm_device *dev);
+int i915_gem_userptr_ioctl(struct drm_device *dev, void *data,
+			   struct drm_file *file);
 int i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data,
 				struct drm_file *file_priv);
 int i915_gem_wait_ioctl(struct drm_device *dev, void *data,
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 8912aaa7118a..0d20af63b876 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -4380,6 +4380,9 @@ void i915_gem_free_object(struct drm_gem_object *gem_obj)
 	if (obj->base.import_attach)
 		drm_prime_gem_destroy(&obj->base, NULL);
 
+	if (obj->ops->release)
+		obj->ops->release(obj);
+
 	drm_gem_object_release(&obj->base);
 	i915_gem_info_remove_obj(dev_priv, obj->base.size);
 
@@ -4638,6 +4641,7 @@ int i915_gem_init(struct drm_device *dev)
 			DRM_DEBUG_DRIVER("allow wake ack timed out\n");
 	}
 
+	i915_gem_init_userptr(dev);
 	i915_gem_init_global_gtt(dev);
 
 	ret = i915_gem_context_init(dev);
diff --git a/drivers/gpu/drm/i915/i915_gem_userptr.c b/drivers/gpu/drm/i915/i915_gem_userptr.c
new file mode 100644
index 000000000000..82c33d1bcb50
--- /dev/null
+++ b/drivers/gpu/drm/i915/i915_gem_userptr.c
@@ -0,0 +1,608 @@
+/*
+ * Copyright © 2012-2014 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ */
+
+#include "drmP.h"
+#include "i915_drm.h"
+#include "i915_drv.h"
+#include "i915_trace.h"
+#include "intel_drv.h"
+#include <linux/mmu_context.h>
+#include <linux/mmu_notifier.h>
+#include <linux/mempolicy.h>
+#include <linux/swap.h>
+
+#if defined(CONFIG_MMU_NOTIFIER)
+#include <linux/interval_tree.h>
+
+struct i915_mmu_notifier {
+	spinlock_t lock;
+	struct hlist_node node;
+	struct mmu_notifier mn;
+	struct rb_root objects;
+	struct drm_device *dev;
+	struct mm_struct *mm;
+	struct work_struct work;
+	unsigned long count;
+	unsigned long serial;
+};
+
+struct i915_mmu_object {
+	struct i915_mmu_notifier *mmu;
+	struct interval_tree_node it;
+	struct drm_i915_gem_object *obj;
+};
+
+static void i915_gem_userptr_mn_invalidate_range_start(struct mmu_notifier *_mn,
+						       struct mm_struct *mm,
+						       unsigned long start,
+						       unsigned long end)
+{
+	struct i915_mmu_notifier *mn = container_of(_mn, struct i915_mmu_notifier, mn);
+	struct interval_tree_node *it = NULL;
+	unsigned long serial = 0;
+
+	end--; /* interval ranges are inclusive, but invalidate range is exclusive */
+	while (start < end) {
+		struct drm_i915_gem_object *obj;
+
+		obj = NULL;
+		spin_lock(&mn->lock);
+		if (serial == mn->serial)
+			it = interval_tree_iter_next(it, start, end);
+		else
+			it = interval_tree_iter_first(&mn->objects, start, end);
+		if (it != NULL) {
+			obj = container_of(it, struct i915_mmu_object, it)->obj;
+			drm_gem_object_reference(&obj->base);
+			serial = mn->serial;
+		}
+		spin_unlock(&mn->lock);
+		if (obj == NULL)
+			return;
+
+		mutex_lock(&mn->dev->struct_mutex);
+	       	/* Cancel any active worker and force us to re-evaluate gup */
+		obj->userptr.work = NULL;
+
+		if (obj->pages != NULL) {
+			struct drm_i915_private *dev_priv = to_i915(mn->dev);
+			struct i915_vma *vma, *tmp;
+			bool was_interruptible;
+
+			was_interruptible = dev_priv->mm.interruptible;
+			dev_priv->mm.interruptible = false;
+
+			list_for_each_entry_safe(vma, tmp, &obj->vma_list, vma_link) {
+				int ret = i915_vma_unbind(vma);
+				WARN_ON(ret && ret != -EIO);
+			}
+			WARN_ON(i915_gem_object_put_pages(obj));
+
+			dev_priv->mm.interruptible = was_interruptible;
+		}
+
+		start = obj->userptr.ptr + obj->base.size;
+
+		drm_gem_object_unreference(&obj->base);
+		mutex_unlock(&mn->dev->struct_mutex);
+	}
+}
+
+static const struct mmu_notifier_ops i915_gem_userptr_notifier = {
+	.invalidate_range_start = i915_gem_userptr_mn_invalidate_range_start,
+};
+
+static struct i915_mmu_notifier *
+__i915_mmu_notifier_lookup(struct drm_device *dev, struct mm_struct *mm)
+{
+	struct drm_i915_private *dev_priv = to_i915(dev);
+	struct i915_mmu_notifier *mmu;
+
+	hash_for_each_possible(dev_priv->mmu_notifiers, mmu, node, (unsigned long)mm)
+		if (mmu->mm == mm)
+			return mmu;
+
+	return NULL;
+}
+
+static struct i915_mmu_notifier *
+i915_mmu_notifier_get(struct drm_device *dev, struct mm_struct *mm)
+{
+	struct drm_i915_private *dev_priv = to_i915(dev);
+	struct i915_mmu_notifier *mmu;
+	int ret;
+
+	mmu = __i915_mmu_notifier_lookup(dev, mm);
+	if (mmu)
+		return mmu;
+
+	mmu = kmalloc(sizeof(*mmu), GFP_KERNEL);
+	if (mmu == NULL)
+		return ERR_PTR(-ENOMEM);
+
+	spin_lock_init(&mmu->lock);
+	mmu->dev = dev;
+	mmu->mn.ops = &i915_gem_userptr_notifier;
+	mmu->mm = mm;
+	mmu->objects = RB_ROOT;
+	mmu->count = 0;
+	mmu->serial = 0;
+
+	ret = mmu_notifier_register(&mmu->mn, mm);
+	if (ret) {
+		kfree(mmu);
+		return ERR_PTR(ret);
+	}
+
+	hash_add(dev_priv->mmu_notifiers, &mmu->node, (unsigned long)mm);
+	return mmu;
+}
+
+static void
+__i915_mmu_notifier_destroy_worker(struct work_struct *work)
+{
+	struct i915_mmu_notifier *mmu = container_of(work, typeof(*mmu), work);
+	mmu_notifier_unregister(&mmu->mn, mmu->mm);
+	kfree(mmu);
+}
+
+static void
+__i915_mmu_notifier_destroy(struct i915_mmu_notifier *mmu)
+{
+	hash_del(&mmu->node);
+	INIT_WORK(&mmu->work, __i915_mmu_notifier_destroy_worker);
+	schedule_work(&mmu->work);
+}
+
+static void __i915_mmu_notifier_update_serial(struct i915_mmu_notifier *mmu)
+{
+	if (++mmu->serial == 0)
+		mmu->serial = 1;
+}
+
+static void
+i915_mmu_notifier_del(struct i915_mmu_notifier *mmu,
+		      struct i915_mmu_object *mn)
+{
+	bool destroy;
+
+	spin_lock(&mmu->lock);
+	interval_tree_remove(&mn->it, &mmu->objects);
+	destroy = --mmu->count == 0;
+	__i915_mmu_notifier_update_serial(mmu);
+	spin_unlock(&mmu->lock);
+
+	if (destroy) /* protected against _add() by struct_mutex */
+		__i915_mmu_notifier_destroy(mmu);
+}
+
+static int
+i915_mmu_notifier_add(struct i915_mmu_notifier *mmu,
+		      struct i915_mmu_object *mn)
+{
+	int ret = -EINVAL;
+
+	spin_lock(&mmu->lock);
+	/* Disallow overlapping userptr objects */
+	if (!interval_tree_iter_first(&mmu->objects,
+				      mn->it.start, mn->it.last)) {
+		interval_tree_insert(&mn->it, &mmu->objects);
+		mmu->count++;
+		__i915_mmu_notifier_update_serial(mmu);
+		ret = 0;
+	}
+	spin_unlock(&mmu->lock);
+
+	return ret;
+}
+
+static void
+i915_gem_userptr_release__mmu_notifier(struct drm_i915_gem_object *obj)
+{
+	struct i915_mmu_object *mn;
+
+	mn = obj->userptr.mn;
+	if (mn == NULL)
+		return;
+
+	i915_mmu_notifier_del(mn->mmu, mn);
+	obj->userptr.mn = NULL;
+}
+
+static int
+i915_gem_userptr_init__mmu_notifier(struct drm_i915_gem_object *obj,
+				    unsigned flags)
+{
+	struct i915_mmu_notifier *mmu;
+	struct i915_mmu_object *mn;
+	int ret;
+
+	if (flags & I915_USERPTR_UNSYNCHRONIZED)
+		return capable(CAP_SYS_ADMIN) ? 0 : -EPERM;
+
+	mmu = i915_mmu_notifier_get(obj->base.dev, obj->userptr.mm);
+	if (IS_ERR(mmu))
+		return PTR_ERR(mmu);
+
+	mn = kzalloc(sizeof(*mn), GFP_KERNEL);
+	if (mn == NULL) {
+		ret = -ENOMEM;
+		goto destroy_mmu;
+	}
+
+	mn->mmu = mmu;
+	mn->it.start = obj->userptr.ptr;
+	mn->it.last = mn->it.start + obj->base.size - 1;
+	mn->obj = obj;
+
+	ret = i915_mmu_notifier_add(mmu, mn);
+	if (ret)
+		goto free_mn;
+
+	obj->userptr.mn = mn;
+	return 0;
+
+free_mn:
+	kfree(mn);
+destroy_mmu:
+	if (mmu->count == 0)
+		__i915_mmu_notifier_destroy(mmu);
+	return ret;
+}
+
+#else
+
+static void
+i915_gem_userptr_release__mmu_notifier(struct drm_i915_gem_object *obj)
+{
+}
+
+static int
+i915_gem_userptr_init__mmu_notifier(struct drm_i915_gem_object *obj,
+				    unsigned flags)
+{
+	if ((flags & I915_USERPTR_UNSYNCHRONIZED) == 0)
+		return -ENODEV;
+
+	if (!capable(CAP_SYS_ADMIN))
+		return -EPERM;
+
+	return 0;
+}
+#endif
+
+struct get_pages_work {
+	struct work_struct work;
+	struct drm_i915_gem_object *obj;
+	struct task_struct *task;
+};
+
+static void
+__i915_gem_userptr_get_pages_worker(struct work_struct *_work)
+{
+	struct get_pages_work *work = container_of(_work, typeof(*work), work);
+	struct drm_i915_gem_object *obj = work->obj;
+	struct drm_device *dev = obj->base.dev;
+	const int num_pages = obj->base.size >> PAGE_SHIFT;
+	struct page **pvec;
+	int pinned, ret;
+
+	ret = -ENOMEM;
+	pinned = 0;
+
+	pvec = kmalloc(num_pages*sizeof(struct page *),
+		       GFP_TEMPORARY | __GFP_NOWARN | __GFP_NORETRY);
+	if (pvec == NULL)
+		pvec = drm_malloc_ab(num_pages, sizeof(struct page *));
+	if (pvec != NULL) {
+		struct mm_struct *mm = obj->userptr.mm;
+
+		use_mm(mm);
+		down_read(&mm->mmap_sem);
+		while (pinned < num_pages) {
+			ret = get_user_pages(work->task, mm,
+					     obj->userptr.ptr + pinned * PAGE_SIZE,
+					     num_pages - pinned,
+					     !obj->userptr.read_only, 0,
+					     pvec + pinned, NULL);
+			if (ret < 0)
+				break;
+
+			pinned += ret;
+		}
+		up_read(&mm->mmap_sem);
+		unuse_mm(mm);
+	}
+
+	mutex_lock(&dev->struct_mutex);
+	if (obj->userptr.work != &work->work) {
+		ret = 0;
+	} else if (pinned == num_pages) {
+		struct sg_table *st;
+
+		st = kmalloc(sizeof(*st), GFP_KERNEL);
+		if (st == NULL || sg_alloc_table(st, num_pages, GFP_KERNEL)) {
+			kfree(st);
+			ret = -ENOMEM;
+		} else {
+			struct scatterlist *sg;
+			int n;
+
+			for_each_sg(st->sgl, sg, num_pages, n)
+				sg_set_page(sg, pvec[n], PAGE_SIZE, 0);
+
+			obj->pages = st;
+			list_add_tail(&obj->global_list, &to_i915(dev)->mm.unbound_list);
+			pinned = 0;
+			ret = 0;
+		}
+	}
+
+	obj->userptr.work = ERR_PTR(ret);
+	drm_gem_object_unreference(&obj->base);
+	mutex_unlock(&dev->struct_mutex);
+
+	release_pages(pvec, pinned, 0);
+	drm_free_large(pvec);
+
+	put_task_struct(work->task);
+	kfree(work);
+}
+
+static int
+i915_gem_userptr_get_pages(struct drm_i915_gem_object *obj)
+{
+	const int num_pages = obj->base.size >> PAGE_SHIFT;
+	struct page **pvec;
+	int pinned, ret;
+
+	/* If userspace should engineer that these pages are replaced in
+	 * the vma between us binding this page into the GTT and completion
+	 * of rendering... Their loss. If they change the mapping of their
+	 * pages they need to create a new bo to point to the new vma.
+	 *
+	 * However, that still leaves open the possibility of the vma
+	 * being copied upon fork. Which falls under the same userspace
+	 * synchronisation issue as a regular bo, except that this time
+	 * the process may not be expecting that a particular piece of
+	 * memory is tied to the GPU.
+	 *
+	 * Fortunately, we can hook into the mmu_notifier in order to
+	 * discard the page references prior to anything nasty happening
+	 * to the vma (discard or cloning) which should prevent the more
+	 * egregious cases from causing harm.
+	 */
+
+	pvec = NULL;
+	pinned = 0;
+	if (obj->userptr.mm == current->mm) {
+		pvec = kmalloc(num_pages*sizeof(struct page *),
+			       GFP_TEMPORARY | __GFP_NOWARN | __GFP_NORETRY);
+		if (pvec == NULL) {
+			pvec = drm_malloc_ab(num_pages, sizeof(struct page *));
+			if (pvec == NULL)
+				return -ENOMEM;
+		}
+
+		pinned = __get_user_pages_fast(obj->userptr.ptr, num_pages,
+					       !obj->userptr.read_only, pvec);
+	}
+	if (pinned < num_pages) {
+		if (pinned < 0) {
+			ret = pinned;
+			pinned = 0;
+		} else {
+			/* Spawn a worker so that we can acquire the
+			 * user pages without holding our mutex.
+			 */
+			ret = -EAGAIN;
+			if (obj->userptr.work == NULL) {
+				struct get_pages_work *work;
+
+				work = kmalloc(sizeof(*work), GFP_KERNEL);
+				if (work != NULL) {
+					obj->userptr.work = &work->work;
+
+					work->obj = obj;
+					drm_gem_object_reference(&obj->base);
+
+					work->task = current;
+					get_task_struct(work->task);
+
+					INIT_WORK(&work->work, __i915_gem_userptr_get_pages_worker);
+					schedule_work(&work->work);
+				} else
+					ret = -ENOMEM;
+			} else {
+				if (IS_ERR(obj->userptr.work)) {
+					ret = PTR_ERR(obj->userptr.work);
+					obj->userptr.work = NULL;
+				}
+			}
+		}
+	} else {
+		struct sg_table *st;
+
+		st = kmalloc(sizeof(*st), GFP_KERNEL);
+		if (st == NULL || sg_alloc_table(st, num_pages, GFP_KERNEL)) {
+			kfree(st);
+			ret = -ENOMEM;
+		} else {
+			struct scatterlist *sg;
+			int n;
+
+			for_each_sg(st->sgl, sg, num_pages, n)
+				sg_set_page(sg, pvec[n], PAGE_SIZE, 0);
+
+			obj->pages = st;
+			obj->userptr.work = NULL;
+
+			pinned = 0;
+			ret = 0;
+		}
+	}
+
+	release_pages(pvec, pinned, 0);
+	drm_free_large(pvec);
+	return ret;
+}
+
+static void
+i915_gem_userptr_put_pages(struct drm_i915_gem_object *obj)
+{
+	struct scatterlist *sg;
+	int i;
+
+	if (obj->madv != I915_MADV_WILLNEED)
+		obj->dirty = 0;
+
+	for_each_sg(obj->pages->sgl, sg, obj->pages->nents, i) {
+		struct page *page = sg_page(sg);
+
+		if (obj->dirty)
+			set_page_dirty(page);
+
+		mark_page_accessed(page);
+		page_cache_release(page);
+	}
+	obj->dirty = 0;
+
+	sg_free_table(obj->pages);
+	kfree(obj->pages);
+
+	BUG_ON(obj->userptr.work != NULL);
+}
+
+static void
+i915_gem_userptr_release(struct drm_i915_gem_object *obj)
+{
+	i915_gem_userptr_release__mmu_notifier(obj);
+
+	if (obj->userptr.mm) {
+		mmput(obj->userptr.mm);
+		obj->userptr.mm = NULL;
+	}
+}
+
+static const struct drm_i915_gem_object_ops i915_gem_userptr_ops = {
+	.get_pages = i915_gem_userptr_get_pages,
+	.put_pages = i915_gem_userptr_put_pages,
+	.release = i915_gem_userptr_release,
+};
+
+/**
+ * Creates a new mm object that wraps some normal memory from the process
+ * context - user memory.
+ *
+ * We impose several restrictions upon the memory being mapped
+ * into the GPU.
+ * 1. It must be page aligned (both start/end addresses, i.e ptr and size).
+ * 2. We only allow a bo as large as we could in theory map into the GTT,
+ *    that is we limit the size to the total size of the GTT.
+ * 3. The bo is marked as being snoopable. The backing pages are left
+ *    accessible directly by the CPU, but reads by the GPU may incur the cost
+ *    of a snoop (unless you have an LLC architecture).
+ *
+ * Synchronisation between multiple users and the GPU is left to userspace
+ * through the normal set-domain-ioctl. The kernel will enforce that the
+ * GPU relinquishes the VMA before it is returned back to the system
+ * i.e. upon free(), munmap() or process termination. However, the userspace
+ * malloc() library may not immediately relinquish the VMA after free() and
+ * instead reuse it whilst the GPU is still reading and writing to the VMA.
+ * Caveat emptor.
+ *
+ * Also note, that the object created here is not currently a "first class"
+ * object, in that several ioctls are banned. These are the CPU access
+ * ioctls: mmap(), pwrite and pread. In practice, you are expected to use
+ * direct access via your pointer rather than use those ioctls.
+ *
+ * If you think this is a good interface to use to pass GPU memory between
+ * drivers, please use dma-buf instead. In fact, wherever possible use
+ * dma-buf instead.
+ */
+int
+i915_gem_userptr_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
+{
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	struct drm_i915_gem_userptr *args = data;
+	struct drm_i915_gem_object *obj;
+	int ret;
+	u32 handle;
+
+	if (args->flags & ~(I915_USERPTR_READ_ONLY |
+			    I915_USERPTR_UNSYNCHRONIZED))
+		return -EINVAL;
+
+	if (offset_in_page(args->user_ptr | args->user_size))
+		return -EINVAL;
+
+	if (args->user_size > dev_priv->gtt.base.total)
+		return -E2BIG;
+
+	if (!access_ok(args->flags & I915_USERPTR_READ_ONLY ? VERIFY_READ : VERIFY_WRITE,
+		       (char __user *)(unsigned long)args->user_ptr, args->user_size))
+		return -EFAULT;
+
+	/* Allocate the new object */
+	obj = i915_gem_object_alloc(dev);
+	if (obj == NULL)
+		return -ENOMEM;
+
+	drm_gem_private_object_init(dev, &obj->base, args->user_size);
+	i915_gem_object_init(obj, &i915_gem_userptr_ops);
+	obj->cache_level = I915_CACHE_LLC;
+	obj->base.write_domain = I915_GEM_DOMAIN_CPU;
+	obj->base.read_domains = I915_GEM_DOMAIN_CPU;
+
+	obj->userptr.ptr = args->user_ptr;
+	obj->userptr.read_only = !!(args->flags & I915_USERPTR_READ_ONLY);
+
+	/* And keep a pointer to the current->mm for resolving the user pages
+	 * at binding. This means that we need to hook into the mmu_notifier
+	 * in order to detect if the mmu is destroyed.
+	 */
+	ret = -ENOMEM;
+	if ((obj->userptr.mm = get_task_mm(current)))
+		ret = i915_gem_userptr_init__mmu_notifier(obj, args->flags);
+	if (ret == 0)
+		ret = drm_gem_handle_create(file, &obj->base, &handle);
+
+	/* drop reference from allocate - handle holds it now */
+	drm_gem_object_unreference_unlocked(&obj->base);
+	if (ret)
+		return ret;
+
+	args->handle = handle;
+	return 0;
+}
+
+int
+i915_gem_init_userptr(struct drm_device *dev)
+{
+#if defined(CONFIG_MMU_NOTIFIER)
+	struct drm_i915_private *dev_priv = to_i915(dev);
+	hash_init(dev_priv->mmu_notifiers);
+#endif
+	return 0;
+}
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
index ae8cf61b8ce3..cce9f559e3d7 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -201,6 +201,7 @@ static void print_error_buffers(struct drm_i915_error_state_buf *m,
 		err_puts(m, tiling_flag(err->tiling));
 		err_puts(m, dirty_flag(err->dirty));
 		err_puts(m, purgeable_flag(err->purgeable));
+		err_puts(m, err->userptr ? " userptr" : "");
 		err_puts(m, err->ring != -1 ? " " : "");
 		err_puts(m, ring_str(err->ring));
 		err_puts(m, i915_cache_level_str(err->cache_level));
@@ -584,6 +585,7 @@ static void capture_bo(struct drm_i915_error_buffer *err,
 	err->tiling = obj->tiling_mode;
 	err->dirty = obj->dirty;
 	err->purgeable = obj->madv != I915_MADV_WILLNEED;
+	err->userptr = obj->userptr.mm != 0;
 	err->ring = obj->ring ? obj->ring->id : -1;
 	err->cache_level = obj->cache_level;
 }
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index 37c8073a8246..6c145a0be250 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -224,6 +224,7 @@ typedef struct _drm_i915_sarea {
 #define DRM_I915_REG_READ		0x31
 #define DRM_I915_GET_RESET_STATS	0x32
 #define DRM_I915_GEM_CREATE2		0x33
+#define DRM_I915_GEM_USERPTR		0x34
 
 #define DRM_IOCTL_I915_INIT		DRM_IOW( DRM_COMMAND_BASE + DRM_I915_INIT, drm_i915_init_t)
 #define DRM_IOCTL_I915_FLUSH		DRM_IO ( DRM_COMMAND_BASE + DRM_I915_FLUSH)
@@ -275,6 +276,7 @@ typedef struct _drm_i915_sarea {
 #define DRM_IOCTL_I915_GEM_CONTEXT_DESTROY	DRM_IOW (DRM_COMMAND_BASE + DRM_I915_GEM_CONTEXT_DESTROY, struct drm_i915_gem_context_destroy)
 #define DRM_IOCTL_I915_REG_READ			DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_REG_READ, struct drm_i915_reg_read)
 #define DRM_IOCTL_I915_GET_RESET_STATS		DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_GET_RESET_STATS, struct drm_i915_reset_stats)
+#define DRM_IOCTL_I915_GEM_USERPTR			DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_GEM_USERPTR, struct drm_i915_gem_userptr)
 
 /* Allow drivers to submit batchbuffers directly to hardware, relying
  * on the security mechanisms provided by hardware.
@@ -1129,4 +1131,18 @@ struct drm_i915_reset_stats {
 	__u32 pad;
 };
 
+struct drm_i915_gem_userptr {
+	__u64 user_ptr;
+	__u64 user_size;
+	__u32 flags;
+#define I915_USERPTR_READ_ONLY 0x1
+#define I915_USERPTR_UNSYNCHRONIZED 0x80000000
+	/**
+	 * Returned handle for the object.
+	 *
+	 * Object handles are nonzero.
+	 */
+	__u32 handle;
+};
+
 #endif /* _UAPI_I915_DRM_H_ */
-- 
1.8.5.3

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 34+ messages in thread

* [PATCH] drm/i915: Introduce mapping of user pages into video memory (userptr) ioctl
  2014-01-21 15:07 [PATCH 3/3] " Chris Wilson
@ 2014-01-22  9:46 ` Chris Wilson
  2014-01-24  9:00   ` Chris Wilson
  0 siblings, 1 reply; 34+ messages in thread
From: Chris Wilson @ 2014-01-22  9:46 UTC (permalink / raw)
  To: intel-gfx; +Cc: Akash Goel

By exporting the ability to map user address and inserting PTEs
representing their backing pages into the GTT, we can exploit UMA in order
to utilize normal application data as a texture source or even as a
render target (depending upon the capabilities of the chipset). This has
a number of uses, with zero-copy downloads to the GPU and efficient
readback making the intermixed streaming of CPU and GPU operations
fairly efficient. This ability has many widespread implications from
faster rendering of client-side software rasterisers (chromium),
mitigation of stalls due to read back (firefox) and to faster pipelining
of texture data (such as pixel buffer objects in GL or data blobs in CL).

v2: Compile with CONFIG_MMU_NOTIFIER
v3: We can sleep while performing invalidate-range, which we can utilise
to drop our page references prior to the kernel manipulating the vma
(for either discard or cloning) and so protect normal users.
v4: Only run the invalidate notifier if the range intercepts the bo.
v5: Prevent userspace from attempting to GTT mmap non-page aligned buffers
v6: Recheck after reacquire mutex for lost mmu.
v7: Fix implicit padding of ioctl struct by rounding to next 64bit boundary.
v8: Fix rebasing error after forwarding porting the back port.
v9: Limit the userptr to page aligned entries. We now expect userspace
    to handle all the offset-in-page adjustments itself.
v10: Prevent vma from being copied across fork to avoid issues with cow.
v11: Drop vma behaviour changes -- locking is nigh on impossible.
     Use a worker to load user pages to avoid lock inversions.
v12: Use get_task_mm()/mmput() for correct refcounting of mm.
v13: Use a worker to release the mmu_notifier to avoid lock inversion
v14: Decouple mmu_notifier from struct_mutex using a custom mmu_notifer
     with its own locking and tree of objects for each mm/mmu_notifier.
v15: Prevent overlapping userptr objects, and invalidate all objects
     within the mmu_notifier range

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com>
Cc: "Gong, Zhipeng" <zhipeng.gong@intel.com>
Cc: Akash Goel <akash.goel@intel.com>
---
 drivers/gpu/drm/i915/Makefile           |   1 +
 drivers/gpu/drm/i915/i915_dma.c         |   1 +
 drivers/gpu/drm/i915/i915_drv.h         |  24 +-
 drivers/gpu/drm/i915/i915_gem.c         |   4 +
 drivers/gpu/drm/i915/i915_gem_userptr.c | 599 ++++++++++++++++++++++++++++++++
 drivers/gpu/drm/i915/i915_gpu_error.c   |   2 +
 include/uapi/drm/i915_drm.h             |  16 +
 7 files changed, 646 insertions(+), 1 deletion(-)
 create mode 100644 drivers/gpu/drm/i915/i915_gem_userptr.c

diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
index 45071d18c730..1dc14e51f3bd 100644
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -15,6 +15,7 @@ i915-y := i915_drv.o i915_dma.o i915_irq.o \
 	  i915_gem_gtt.o \
 	  i915_gem_stolen.o \
 	  i915_gem_tiling.o \
+	  i915_gem_userptr.o \
 	  i915_sysfs.o \
 	  i915_trace_points.o \
 	  i915_ums.o \
diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
index f053d1099d3b..c34f2e7ae6d9 100644
--- a/drivers/gpu/drm/i915/i915_dma.c
+++ b/drivers/gpu/drm/i915/i915_dma.c
@@ -1919,6 +1919,7 @@ const struct drm_ioctl_desc i915_ioctls[] = {
 	DRM_IOCTL_DEF_DRV(I915_REG_READ, i915_reg_read_ioctl, DRM_UNLOCKED|DRM_RENDER_ALLOW),
 	DRM_IOCTL_DEF_DRV(I915_GET_RESET_STATS, i915_get_reset_stats_ioctl, DRM_UNLOCKED|DRM_RENDER_ALLOW),
 	DRM_IOCTL_DEF_DRV(I915_GEM_CREATE2, i915_gem_create2_ioctl, DRM_UNLOCKED|DRM_RENDER_ALLOW),
+	DRM_IOCTL_DEF_DRV(I915_GEM_USERPTR, i915_gem_userptr_ioctl, DRM_UNLOCKED|DRM_RENDER_ALLOW),
 };
 
 int i915_max_ioctl = DRM_ARRAY_SIZE(i915_ioctls);
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 35a2e339c298..fe46be1c48cc 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -40,6 +40,7 @@
 #include <linux/i2c-algo-bit.h>
 #include <drm/intel-gtt.h>
 #include <linux/backlight.h>
+#include <linux/hashtable.h>
 #include <linux/intel-iommu.h>
 #include <linux/kref.h>
 #include <linux/pm_qos.h>
@@ -163,6 +164,7 @@ enum hpd_pin {
 		if ((intel_encoder)->base.crtc == (__crtc))
 
 struct drm_i915_private;
+struct i915_mmu_notifier;
 
 enum intel_dpll_id {
 	DPLL_ID_PRIVATE = -1, /* non-shared dpll in use */
@@ -354,6 +356,7 @@ struct drm_i915_error_state {
 		u32 tiling:2;
 		u32 dirty:1;
 		u32 purgeable:1;
+		u32 userptr:1;
 		s32 ring:4;
 		u32 cache_level:3;
 	} **active_bo, **pinned_bo;
@@ -1465,6 +1468,9 @@ typedef struct drm_i915_private {
 	struct i915_gtt gtt; /* VMA representing the global address space */
 
 	struct i915_gem_mm mm;
+#if defined(CONFIG_MMU_NOTIFIER)
+	DECLARE_HASHTABLE(mmu_notifiers, 7);
+#endif
 
 	/* Kernel Modesetting */
 
@@ -1596,6 +1602,7 @@ struct drm_i915_gem_object_ops {
 	 */
 	int (*get_pages)(struct drm_i915_gem_object *);
 	void (*put_pages)(struct drm_i915_gem_object *);
+	void (*release)(struct drm_i915_gem_object *);
 };
 
 struct drm_i915_gem_object {
@@ -1709,9 +1716,21 @@ struct drm_i915_gem_object {
 
 	/** for phy allocated objects */
 	struct drm_i915_gem_phys_object *phys_obj;
+
+	union {
+		struct i915_gem_userptr {
+			uintptr_t ptr;
+			unsigned read_only :1;
+
+			struct mm_struct *mm;
+#if defined(CONFIG_MMU_NOTIFIER)
+			struct i915_mmu_object *mn;
+#endif
+			struct work_struct *work;
+		} userptr;
+	};
 };
 #define to_gem_object(obj) (&((struct drm_i915_gem_object *)(obj))->base)
-
 #define to_intel_bo(x) container_of(x, struct drm_i915_gem_object, base)
 
 /**
@@ -2012,6 +2031,9 @@ int i915_gem_set_tiling_ioctl(struct drm_device *dev, void *data,
 			      struct drm_file *file_priv);
 int i915_gem_get_tiling_ioctl(struct drm_device *dev, void *data,
 			      struct drm_file *file_priv);
+int i915_gem_init_userptr(struct drm_device *dev);
+int i915_gem_userptr_ioctl(struct drm_device *dev, void *data,
+			   struct drm_file *file);
 int i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data,
 				struct drm_file *file_priv);
 int i915_gem_wait_ioctl(struct drm_device *dev, void *data,
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 8912aaa7118a..0d20af63b876 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -4380,6 +4380,9 @@ void i915_gem_free_object(struct drm_gem_object *gem_obj)
 	if (obj->base.import_attach)
 		drm_prime_gem_destroy(&obj->base, NULL);
 
+	if (obj->ops->release)
+		obj->ops->release(obj);
+
 	drm_gem_object_release(&obj->base);
 	i915_gem_info_remove_obj(dev_priv, obj->base.size);
 
@@ -4638,6 +4641,7 @@ int i915_gem_init(struct drm_device *dev)
 			DRM_DEBUG_DRIVER("allow wake ack timed out\n");
 	}
 
+	i915_gem_init_userptr(dev);
 	i915_gem_init_global_gtt(dev);
 
 	ret = i915_gem_context_init(dev);
diff --git a/drivers/gpu/drm/i915/i915_gem_userptr.c b/drivers/gpu/drm/i915/i915_gem_userptr.c
new file mode 100644
index 000000000000..79d07166c1aa
--- /dev/null
+++ b/drivers/gpu/drm/i915/i915_gem_userptr.c
@@ -0,0 +1,599 @@
+/*
+ * Copyright © 2012-2014 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ */
+
+#include "drmP.h"
+#include "i915_drm.h"
+#include "i915_drv.h"
+#include "i915_trace.h"
+#include "intel_drv.h"
+#include <linux/mmu_context.h>
+#include <linux/mmu_notifier.h>
+#include <linux/mempolicy.h>
+#include <linux/swap.h>
+
+#if defined(CONFIG_MMU_NOTIFIER)
+#include <linux/interval_tree.h>
+
+struct i915_mmu_notifier {
+	spinlock_t lock;
+	struct hlist_node node;
+	struct mmu_notifier mn;
+	struct rb_root objects;
+	struct drm_device *dev;
+	struct mm_struct *mm;
+	struct work_struct work;
+	unsigned long count;
+	unsigned long serial;
+};
+
+struct i915_mmu_object {
+	struct i915_mmu_notifier *mmu;
+	struct interval_tree_node it;
+	struct drm_i915_gem_object *obj;
+};
+
+static void i915_gem_userptr_mn_invalidate_range_start(struct mmu_notifier *_mn,
+						       struct mm_struct *mm,
+						       unsigned long start,
+						       unsigned long end)
+{
+	struct i915_mmu_notifier *mn = container_of(_mn, struct i915_mmu_notifier, mn);
+	struct interval_tree_node *it = NULL;
+	unsigned long serial = 0;
+
+	while (start < end) {
+		struct drm_i915_gem_object *obj;
+
+		obj = NULL;
+		spin_lock(&mn->lock);
+		if (serial == mn->serial)
+			interval_tree_iter_next(it, start, end);
+		else
+			it = interval_tree_iter_first(&mn->objects, start, end);
+		if (it != NULL) {
+			obj = container_of(it, struct i915_mmu_object, it)->obj;
+			drm_gem_object_reference(&obj->base);
+			serial = mn->serial;
+		}
+		spin_unlock(&mn->lock);
+		if (obj == NULL)
+			return;
+
+		mutex_lock(&mn->dev->struct_mutex);
+		if (obj->pages != NULL) {
+			struct drm_i915_private *dev_priv = to_i915(mn->dev);
+			struct i915_vma *vma, *tmp;
+			bool was_interruptible;
+
+			was_interruptible = dev_priv->mm.interruptible;
+			dev_priv->mm.interruptible = false;
+
+			list_for_each_entry_safe(vma, tmp, &obj->vma_list, vma_link) {
+				int ret = i915_vma_unbind(vma);
+				WARN_ON(ret && ret != -EIO);
+			}
+			WARN_ON(i915_gem_object_put_pages(obj));
+
+			dev_priv->mm.interruptible = was_interruptible;
+		}
+		drm_gem_object_unreference(&obj->base);
+		mutex_unlock(&mn->dev->struct_mutex);
+
+		start = obj->userptr.ptr + obj->base.size;
+	}
+}
+
+static const struct mmu_notifier_ops i915_gem_userptr_notifier = {
+	.invalidate_range_start = i915_gem_userptr_mn_invalidate_range_start,
+};
+
+static struct i915_mmu_notifier *
+__i915_mmu_notifier_lookup(struct drm_device *dev, struct mm_struct *mm)
+{
+	struct drm_i915_private *dev_priv = to_i915(dev);
+	struct i915_mmu_notifier *mmu;
+
+	hash_for_each_possible(dev_priv->mmu_notifiers, mmu, node, (unsigned long)mm)
+		if (mmu->mm == mm)
+			return mmu;
+
+	return NULL;
+}
+
+static struct i915_mmu_notifier *
+i915_mmu_notifier_get(struct drm_device *dev, struct mm_struct *mm)
+{
+	struct drm_i915_private *dev_priv = to_i915(dev);
+	struct i915_mmu_notifier *mmu;
+	int ret;
+
+	mmu = __i915_mmu_notifier_lookup(dev, mm);
+	if (mmu)
+		return mmu;
+
+	mmu = kmalloc(sizeof(*mmu), GFP_KERNEL);
+	if (mmu == NULL)
+		return ERR_PTR(-ENOMEM);
+
+	spin_lock_init(&mmu->lock);
+	mmu->dev = dev;
+	mmu->mn.ops = &i915_gem_userptr_notifier;
+	mmu->mm = mm;
+	mmu->objects = RB_ROOT;
+	mmu->count = 0;
+	mmu->serial = 0;
+
+	ret = mmu_notifier_register(&mmu->mn, mm);
+	if (ret) {
+		kfree(mmu);
+		return ERR_PTR(ret);
+	}
+
+	hash_add(dev_priv->mmu_notifiers, &mmu->node, (unsigned long)mm);
+	return mmu;
+}
+
+static void
+__i915_mmu_notifier_destroy(struct i915_mmu_notifier *mmu)
+{
+	mmu_notifier_unregister(&mmu->mn, mmu->mm);
+	kfree(mmu);
+}
+
+static void
+__i915_mmu_notifier_destroy_worker(struct work_struct *work)
+{
+	__i915_mmu_notifier_destroy(container_of(work, typeof(struct i915_mmu_notifier), work));
+}
+
+static void __i915_mmu_notifier_update_serial(struct i915_mmu_notifier *mmu)
+{
+	if (++mmu->serial == 0)
+		mmu->serial = 1;
+}
+
+static void
+i915_mmu_notifier_del(struct i915_mmu_notifier *mmu,
+		      struct i915_mmu_object *mn)
+{
+	bool destroy;
+
+	spin_lock(&mmu->lock);
+	interval_tree_remove(&mn->it, &mmu->objects);
+	destroy = --mmu->count == 0;
+	__i915_mmu_notifier_update_serial(mmu);
+	spin_unlock(&mmu->lock);
+
+	if (destroy) { /* protected against _add() by struct_mutex */
+		hash_del(&mmu->node);
+		INIT_WORK(&mmu->work, __i915_mmu_notifier_destroy_worker);
+		schedule_work(&mmu->work);
+	}
+}
+
+static int
+i915_mmu_notifier_add(struct i915_mmu_notifier *mmu,
+		      struct i915_mmu_object *mn)
+{
+	int ret = -EINVAL;
+
+	spin_lock(&mmu->lock);
+	/* Disallow overlapping userptr objects */
+	if (!interval_tree_iter_first(&mmu->objects,
+				      mn->it.start, mn->it.last)) {
+		interval_tree_insert(&mn->it, &mmu->objects);
+		mmu->count++;
+		__i915_mmu_notifier_update_serial(mmu);
+		ret = 0;
+	}
+	spin_unlock(&mmu->lock);
+
+	return ret;
+}
+
+static void
+i915_gem_userptr_release__mmu_notifier(struct drm_i915_gem_object *obj)
+{
+	struct i915_mmu_object *mn;
+
+	mn = obj->userptr.mn;
+	if (mn == NULL)
+		return;
+
+	i915_mmu_notifier_del(mn->mmu, mn);
+	obj->userptr.mn = NULL;
+}
+
+static int
+i915_gem_userptr_init__mmu_notifier(struct drm_i915_gem_object *obj,
+				    unsigned flags)
+{
+	struct i915_mmu_notifier *mmu;
+	struct i915_mmu_object *mn;
+	int ret;
+
+	if (flags & I915_USERPTR_UNSYNCHRONIZED)
+		return capable(CAP_SYS_ADMIN) ? 0 : -EPERM;
+
+	mmu = i915_mmu_notifier_get(obj->base.dev, obj->userptr.mm);
+	if (IS_ERR(mmu))
+		return PTR_ERR(mmu);
+
+	mn = kzalloc(sizeof(*mn), GFP_KERNEL);
+	if (mn == NULL) {
+		ret = -ENOMEM;
+		goto put_mmu;
+	}
+
+	mn->mmu = mmu;
+	mn->it.start = obj->userptr.ptr;
+	mn->it.last = mn->it.start + obj->base.size - 1;
+	mn->obj = obj;
+
+	ret = i915_mmu_notifier_add(mmu, mn);
+	if (ret)
+		goto free_mn;
+
+	obj->userptr.mn = mn;
+	return 0;
+
+free_mn:
+	kfree(mn);
+put_mmu:
+	if (mmu->count == 0)
+		__i915_mmu_notifier_destroy(mmu);
+	return ret;
+}
+
+#else
+
+static void
+i915_gem_userptr_release__mmu_notifier(struct drm_i915_gem_object *obj)
+{
+}
+
+static int
+i915_gem_userptr_init__mmu_notifier(struct drm_i915_gem_object *obj,
+				    unsigned flags)
+{
+	if ((flags & I915_USERPTR_UNSYNCHRONIZED) == 0)
+		return -ENODEV;
+
+	if (!capable(CAP_SYS_ADMIN))
+		return -EPERM;
+
+	return 0;
+}
+#endif
+
+struct get_pages_work {
+	struct work_struct work;
+	struct drm_i915_gem_object *obj;
+	struct task_struct *task;
+};
+
+static void
+__i915_gem_userptr_get_pages_worker(struct work_struct *_work)
+{
+	struct get_pages_work *work = container_of(_work, typeof(*work), work);
+	struct drm_i915_gem_object *obj = work->obj;
+	struct drm_device *dev = obj->base.dev;
+	const int num_pages = obj->base.size >> PAGE_SHIFT;
+	struct page **pvec;
+	int pinned, ret;
+
+	ret = -ENOMEM;
+	pinned = 0;
+
+	pvec = kmalloc(num_pages*sizeof(struct page *),
+		       GFP_TEMPORARY | __GFP_NOWARN | __GFP_NORETRY);
+	if (pvec == NULL)
+		pvec = drm_malloc_ab(num_pages, sizeof(struct page *));
+	if (pvec != NULL) {
+		struct mm_struct *mm = obj->userptr.mm;
+
+		use_mm(mm);
+		down_read(&mm->mmap_sem);
+		while (pinned < num_pages) {
+			ret = get_user_pages(work->task, mm,
+					     obj->userptr.ptr + pinned * PAGE_SIZE,
+					     num_pages - pinned,
+					     !obj->userptr.read_only, 0,
+					     pvec + pinned, NULL);
+			if (ret < 0)
+				break;
+
+			pinned += ret;
+		}
+		up_read(&mm->mmap_sem);
+		unuse_mm(mm);
+	}
+
+	mutex_lock(&dev->struct_mutex);
+	if (obj->pages) {
+		ret = 0;
+	} else if (pinned == num_pages) {
+		struct sg_table *st;
+
+		st = kmalloc(sizeof(*st), GFP_KERNEL);
+		if (st == NULL || sg_alloc_table(st, num_pages, GFP_KERNEL)) {
+			kfree(st);
+			ret = -ENOMEM;
+		} else {
+			struct scatterlist *sg;
+			int n;
+
+			for_each_sg(st->sgl, sg, num_pages, n)
+				sg_set_page(sg, pvec[n], PAGE_SIZE, 0);
+
+			obj->pages = st;
+			list_add_tail(&obj->global_list, &to_i915(dev)->mm.unbound_list);
+			pinned = 0;
+			ret = 0;
+		}
+	}
+
+	obj->userptr.work = ERR_PTR(ret);
+	drm_gem_object_unreference(&obj->base);
+	mutex_unlock(&dev->struct_mutex);
+
+	release_pages(pvec, pinned, 0);
+	drm_free_large(pvec);
+
+	put_task_struct(work->task);
+	kfree(work);
+}
+
+static int
+i915_gem_userptr_get_pages(struct drm_i915_gem_object *obj)
+{
+	const int num_pages = obj->base.size >> PAGE_SHIFT;
+	struct page **pvec;
+	int pinned, ret;
+
+	/* If userspace should engineer that these pages are replaced in
+	 * the vma between us binding this page into the GTT and completion
+	 * of rendering... Their loss. If they change the mapping of their
+	 * pages they need to create a new bo to point to the new vma.
+	 *
+	 * However, that still leaves open the possibility of the vma
+	 * being copied upon fork. Which falls under the same userspace
+	 * synchronisation issue as a regular bo, except that this time
+	 * the process may not be expecting that a particular piece of
+	 * memory is tied to the GPU.
+	 *
+	 * Fortunately, we can hook into the mmu_notifier in order to
+	 * discard the page references prior to anything nasty happening
+	 * to the vma (discard or cloning) which should prevent the more
+	 * egregious cases from causing harm.
+	 */
+
+	pvec = NULL;
+	pinned = 0;
+	if (obj->userptr.mm == current->mm) {
+		pvec = kmalloc(num_pages*sizeof(struct page *),
+			       GFP_TEMPORARY | __GFP_NOWARN | __GFP_NORETRY);
+		if (pvec == NULL) {
+			pvec = drm_malloc_ab(num_pages, sizeof(struct page *));
+			if (pvec == NULL)
+				return -ENOMEM;
+		}
+
+		pinned = __get_user_pages_fast(obj->userptr.ptr, num_pages,
+					       !obj->userptr.read_only, pvec);
+	}
+	if (pinned < num_pages) {
+		if (pinned < 0) {
+			ret = pinned;
+			pinned = 0;
+		} else {
+			/* Spawn a worker so that we can acquire the
+			 * user pages without holding our mutex.
+			 */
+			ret = -EAGAIN;
+			if (obj->userptr.work == NULL) {
+				struct get_pages_work *work;
+
+				work = kmalloc(sizeof(*work), GFP_KERNEL);
+				if (work != NULL) {
+					obj->userptr.work = &work->work;
+
+					work->obj = obj;
+					drm_gem_object_reference(&obj->base);
+
+					work->task = current;
+					get_task_struct(work->task);
+
+					INIT_WORK(&work->work, __i915_gem_userptr_get_pages_worker);
+					schedule_work(&work->work);
+				} else
+					ret = -ENOMEM;
+			} else {
+				if (IS_ERR(obj->userptr.work)) {
+					ret = PTR_ERR(obj->userptr.work);
+					obj->userptr.work = NULL;
+				}
+			}
+		}
+	} else {
+		struct sg_table *st;
+
+		st = kmalloc(sizeof(*st), GFP_KERNEL);
+		if (st == NULL || sg_alloc_table(st, num_pages, GFP_KERNEL)) {
+			kfree(st);
+			ret = -ENOMEM;
+		} else {
+			struct scatterlist *sg;
+			int n;
+
+			for_each_sg(st->sgl, sg, num_pages, n)
+				sg_set_page(sg, pvec[n], PAGE_SIZE, 0);
+
+			obj->pages = st;
+			pinned = 0;
+			ret = 0;
+		}
+	}
+
+	release_pages(pvec, pinned, 0);
+	drm_free_large(pvec);
+	return ret;
+}
+
+static void
+i915_gem_userptr_put_pages(struct drm_i915_gem_object *obj)
+{
+	struct scatterlist *sg;
+	int i;
+
+	if (obj->madv != I915_MADV_WILLNEED)
+		obj->dirty = 0;
+
+	for_each_sg(obj->pages->sgl, sg, obj->pages->nents, i) {
+		struct page *page = sg_page(sg);
+
+		if (obj->dirty)
+			set_page_dirty(page);
+
+		mark_page_accessed(page);
+		page_cache_release(page);
+	}
+	obj->dirty = 0;
+
+	sg_free_table(obj->pages);
+	kfree(obj->pages);
+}
+
+static void
+i915_gem_userptr_release(struct drm_i915_gem_object *obj)
+{
+	i915_gem_userptr_release__mmu_notifier(obj);
+
+	if (obj->userptr.mm) {
+		mmput(obj->userptr.mm);
+		obj->userptr.mm = NULL;
+	}
+}
+
+static const struct drm_i915_gem_object_ops i915_gem_userptr_ops = {
+	.get_pages = i915_gem_userptr_get_pages,
+	.put_pages = i915_gem_userptr_put_pages,
+	.release = i915_gem_userptr_release,
+};
+
+/**
+ * Creates a new mm object that wraps some normal memory from the process
+ * context - user memory.
+ *
+ * We impose several restrictions upon the memory being mapped
+ * into the GPU.
+ * 1. It must be page aligned (both start/end addresses, i.e ptr and size).
+ * 2. We only allow a bo as large as we could in theory map into the GTT,
+ *    that is we limit the size to the total size of the GTT.
+ * 3. The bo is marked as being snoopable. The backing pages are left
+ *    accessible directly by the CPU, but reads by the GPU may incur the cost
+ *    of a snoop (unless you have an LLC architecture).
+ *
+ * Synchronisation between multiple users and the GPU is left to userspace
+ * through the normal set-domain-ioctl. The kernel will enforce that the
+ * GPU relinquishes the VMA before it is returned back to the system
+ * i.e. upon free(), munmap() or process termination. However, the userspace
+ * malloc() library may not immediately relinquish the VMA after free() and
+ * instead reuse it whilst the GPU is still reading and writing to the VMA.
+ * Caveat emptor.
+ *
+ * Also note, that the object created here is not currently a "first class"
+ * object, in that several ioctls are banned. These are the CPU access
+ * ioctls: mmap(), pwrite and pread. In practice, you are expected to use
+ * direct access via your pointer rather than use those ioctls.
+ *
+ * If you think this is a good interface to use to pass GPU memory between
+ * drivers, please use dma-buf instead. In fact, wherever possible use
+ * dma-buf instead.
+ */
+int
+i915_gem_userptr_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
+{
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	struct drm_i915_gem_userptr *args = data;
+	struct drm_i915_gem_object *obj;
+	int ret;
+	u32 handle;
+
+	if (args->flags & ~(I915_USERPTR_READ_ONLY |
+			    I915_USERPTR_UNSYNCHRONIZED))
+		return -EINVAL;
+
+	if (offset_in_page(args->user_ptr | args->user_size))
+		return -EINVAL;
+
+	if (args->user_size > dev_priv->gtt.base.total)
+		return -E2BIG;
+
+	if (!access_ok(args->flags & I915_USERPTR_READ_ONLY ? VERIFY_READ : VERIFY_WRITE,
+		       (char __user *)(unsigned long)args->user_ptr, args->user_size))
+		return -EFAULT;
+
+	/* Allocate the new object */
+	obj = i915_gem_object_alloc(dev);
+	if (obj == NULL)
+		return -ENOMEM;
+
+	drm_gem_private_object_init(dev, &obj->base, args->user_size);
+	i915_gem_object_init(obj, &i915_gem_userptr_ops);
+	obj->cache_level = I915_CACHE_LLC;
+	obj->base.write_domain = I915_GEM_DOMAIN_CPU;
+	obj->base.read_domains = I915_GEM_DOMAIN_CPU;
+
+	obj->userptr.ptr = args->user_ptr;
+	obj->userptr.read_only = !!(args->flags & I915_USERPTR_READ_ONLY);
+
+	/* And keep a pointer to the current->mm for resolving the user pages
+	 * at binding. This means that we need to hook into the mmu_notifier
+	 * in order to detect if the mmu is destroyed.
+	 */
+	ret = -ENOMEM;
+	if ((obj->userptr.mm = get_task_mm(current)))
+		ret = i915_gem_userptr_init__mmu_notifier(obj, args->flags);
+	if (ret == 0)
+		ret = drm_gem_handle_create(file, &obj->base, &handle);
+
+	/* drop reference from allocate - handle holds it now */
+	drm_gem_object_unreference_unlocked(&obj->base);
+	if (ret)
+		return ret;
+
+	args->handle = handle;
+	return 0;
+}
+
+int
+i915_gem_init_userptr(struct drm_device *dev)
+{
+#if defined(CONFIG_MMU_NOTIFIER)
+	struct drm_i915_private *dev_priv = to_i915(dev);
+	hash_init(dev_priv->mmu_notifiers);
+#endif
+	return 0;
+}
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
index ae8cf61b8ce3..cce9f559e3d7 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -201,6 +201,7 @@ static void print_error_buffers(struct drm_i915_error_state_buf *m,
 		err_puts(m, tiling_flag(err->tiling));
 		err_puts(m, dirty_flag(err->dirty));
 		err_puts(m, purgeable_flag(err->purgeable));
+		err_puts(m, err->userptr ? " userptr" : "");
 		err_puts(m, err->ring != -1 ? " " : "");
 		err_puts(m, ring_str(err->ring));
 		err_puts(m, i915_cache_level_str(err->cache_level));
@@ -584,6 +585,7 @@ static void capture_bo(struct drm_i915_error_buffer *err,
 	err->tiling = obj->tiling_mode;
 	err->dirty = obj->dirty;
 	err->purgeable = obj->madv != I915_MADV_WILLNEED;
+	err->userptr = obj->userptr.mm != 0;
 	err->ring = obj->ring ? obj->ring->id : -1;
 	err->cache_level = obj->cache_level;
 }
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index 37c8073a8246..6c145a0be250 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -224,6 +224,7 @@ typedef struct _drm_i915_sarea {
 #define DRM_I915_REG_READ		0x31
 #define DRM_I915_GET_RESET_STATS	0x32
 #define DRM_I915_GEM_CREATE2		0x33
+#define DRM_I915_GEM_USERPTR		0x34
 
 #define DRM_IOCTL_I915_INIT		DRM_IOW( DRM_COMMAND_BASE + DRM_I915_INIT, drm_i915_init_t)
 #define DRM_IOCTL_I915_FLUSH		DRM_IO ( DRM_COMMAND_BASE + DRM_I915_FLUSH)
@@ -275,6 +276,7 @@ typedef struct _drm_i915_sarea {
 #define DRM_IOCTL_I915_GEM_CONTEXT_DESTROY	DRM_IOW (DRM_COMMAND_BASE + DRM_I915_GEM_CONTEXT_DESTROY, struct drm_i915_gem_context_destroy)
 #define DRM_IOCTL_I915_REG_READ			DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_REG_READ, struct drm_i915_reg_read)
 #define DRM_IOCTL_I915_GET_RESET_STATS		DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_GET_RESET_STATS, struct drm_i915_reset_stats)
+#define DRM_IOCTL_I915_GEM_USERPTR			DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_GEM_USERPTR, struct drm_i915_gem_userptr)
 
 /* Allow drivers to submit batchbuffers directly to hardware, relying
  * on the security mechanisms provided by hardware.
@@ -1129,4 +1131,18 @@ struct drm_i915_reset_stats {
 	__u32 pad;
 };
 
+struct drm_i915_gem_userptr {
+	__u64 user_ptr;
+	__u64 user_size;
+	__u32 flags;
+#define I915_USERPTR_READ_ONLY 0x1
+#define I915_USERPTR_UNSYNCHRONIZED 0x80000000
+	/**
+	 * Returned handle for the object.
+	 *
+	 * Object handles are nonzero.
+	 */
+	__u32 handle;
+};
+
 #endif /* _UAPI_I915_DRM_H_ */
-- 
1.8.5.3

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 34+ messages in thread

* [PATCH] drm/i915: Introduce mapping of user pages into video memory (userptr) ioctl
@ 2014-01-15 11:10 Chris Wilson
  0 siblings, 0 replies; 34+ messages in thread
From: Chris Wilson @ 2014-01-15 11:10 UTC (permalink / raw)
  To: intel-gfx

By exporting the ability to map user address and inserting PTEs
representing their backing pages into the GTT, we can exploit UMA in order
to utilize normal application data as a texture source or even as a
render target (depending upon the capabilities of the chipset). This has
a number of uses, with zero-copy downloads to the GPU and efficient
readback making the intermixed streaming of CPU and GPU operations
fairly efficient. This ability has many widespread implications from
faster rendering of client-side software rasterisers (chromium),
mitigation of stalls due to read back (firefox) and to faster pipelining
of texture data (such as pixel buffer objects in GL or data blobs in CL).

v2: Compile with CONFIG_MMU_NOTIFIER
v3: We can sleep while performing invalidate-range, which we can utilise
to drop our page references prior to the kernel manipulating the vma
(for either discard or cloning) and so protect normal users.
v4: Only run the invalidate notifier if the range intercepts the bo.
v5: Prevent userspace from attempting to GTT mmap non-page aligned buffers
v6: Recheck after reacquire mutex for lost mmu.
v7: Fix implicit padding of ioctl struct by rounding to next 64bit boundary.
v8: Fix rebasing error after forwarding porting the back port.
v9: Limit the userptr to page aligned entries. We now expect userspace
    to handle all the offset-in-page adjustments itself.
v10: Prevent vma from being copied across fork to avoid issues with cow.
v11: Drop vma behaviour changes -- locking is nigh on impossible.
     Use a worker to load user pages to avoid lock inversions.
v12: Use get_task_mm()/mmput() for correct refcounting of mm.

NB: We still have performance concerns over the use of the linear lists
and unfiltered notifies in mmu_notifier which do not scale to our use
case, where we may have many thousands of objects being tracked.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com>
Cc: "Gong, Zhipeng" <zhipeng.gong@intel.com>
---
 drivers/gpu/drm/i915/Makefile           |   1 +
 drivers/gpu/drm/i915/i915_dma.c         |   1 +
 drivers/gpu/drm/i915/i915_drv.h         |  19 +-
 drivers/gpu/drm/i915/i915_gem.c         |   3 +
 drivers/gpu/drm/i915/i915_gem_userptr.c | 413 ++++++++++++++++++++++++++++++++
 drivers/gpu/drm/i915/i915_gpu_error.c   |   2 +
 include/uapi/drm/i915_drm.h             |  16 ++
 7 files changed, 454 insertions(+), 1 deletion(-)
 create mode 100644 drivers/gpu/drm/i915/i915_gem_userptr.c

diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
index 45071d18c730..1dc14e51f3bd 100644
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -15,6 +15,7 @@ i915-y := i915_drv.o i915_dma.o i915_irq.o \
 	  i915_gem_gtt.o \
 	  i915_gem_stolen.o \
 	  i915_gem_tiling.o \
+	  i915_gem_userptr.o \
 	  i915_sysfs.o \
 	  i915_trace_points.o \
 	  i915_ums.o \
diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
index f053d1099d3b..c34f2e7ae6d9 100644
--- a/drivers/gpu/drm/i915/i915_dma.c
+++ b/drivers/gpu/drm/i915/i915_dma.c
@@ -1919,6 +1919,7 @@ const struct drm_ioctl_desc i915_ioctls[] = {
 	DRM_IOCTL_DEF_DRV(I915_REG_READ, i915_reg_read_ioctl, DRM_UNLOCKED|DRM_RENDER_ALLOW),
 	DRM_IOCTL_DEF_DRV(I915_GET_RESET_STATS, i915_get_reset_stats_ioctl, DRM_UNLOCKED|DRM_RENDER_ALLOW),
 	DRM_IOCTL_DEF_DRV(I915_GEM_CREATE2, i915_gem_create2_ioctl, DRM_UNLOCKED|DRM_RENDER_ALLOW),
+	DRM_IOCTL_DEF_DRV(I915_GEM_USERPTR, i915_gem_userptr_ioctl, DRM_UNLOCKED|DRM_RENDER_ALLOW),
 };
 
 int i915_max_ioctl = DRM_ARRAY_SIZE(i915_ioctls);
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 72199e9fa9f4..4f0969b89a3c 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -43,6 +43,7 @@
 #include <linux/intel-iommu.h>
 #include <linux/kref.h>
 #include <linux/pm_qos.h>
+#include <linux/mmu_notifier.h>
 
 /* General customization:
  */
@@ -354,6 +355,7 @@ struct drm_i915_error_state {
 		u32 tiling:2;
 		u32 dirty:1;
 		u32 purgeable:1;
+		u32 userptr:1;
 		s32 ring:4;
 		u32 cache_level:3;
 	} **active_bo, **pinned_bo;
@@ -1598,6 +1600,7 @@ struct drm_i915_gem_object_ops {
 	 */
 	int (*get_pages)(struct drm_i915_gem_object *);
 	void (*put_pages)(struct drm_i915_gem_object *);
+	void (*release)(struct drm_i915_gem_object *);
 };
 
 struct drm_i915_gem_object {
@@ -1711,9 +1714,21 @@ struct drm_i915_gem_object {
 
 	/** for phy allocated objects */
 	struct drm_i915_gem_phys_object *phys_obj;
+
+	union {
+		struct i915_gem_userptr {
+			uintptr_t ptr;
+			unsigned read_only :1;
+
+			struct mm_struct *mm;
+#if defined(CONFIG_MMU_NOTIFIER)
+			struct mmu_notifier mn;
+#endif
+			struct work_struct *work;
+		} userptr;
+	};
 };
 #define to_gem_object(obj) (&((struct drm_i915_gem_object *)(obj))->base)
-
 #define to_intel_bo(x) container_of(x, struct drm_i915_gem_object, base)
 
 /**
@@ -2015,6 +2030,8 @@ int i915_gem_set_tiling_ioctl(struct drm_device *dev, void *data,
 			      struct drm_file *file_priv);
 int i915_gem_get_tiling_ioctl(struct drm_device *dev, void *data,
 			      struct drm_file *file_priv);
+int i915_gem_userptr_ioctl(struct drm_device *dev, void *data,
+			   struct drm_file *file);
 int i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data,
 				struct drm_file *file_priv);
 int i915_gem_wait_ioctl(struct drm_device *dev, void *data,
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 5636896cc288..8e30fb2df237 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -4346,6 +4346,9 @@ void i915_gem_free_object(struct drm_gem_object *gem_obj)
 	if (obj->base.import_attach)
 		drm_prime_gem_destroy(&obj->base, NULL);
 
+	if (obj->ops->release)
+		obj->ops->release(obj);
+
 	drm_gem_object_release(&obj->base);
 	i915_gem_info_remove_obj(dev_priv, obj->base.size);
 
diff --git a/drivers/gpu/drm/i915/i915_gem_userptr.c b/drivers/gpu/drm/i915/i915_gem_userptr.c
new file mode 100644
index 000000000000..7be1e8c079ce
--- /dev/null
+++ b/drivers/gpu/drm/i915/i915_gem_userptr.c
@@ -0,0 +1,413 @@
+/*
+ * Copyright © 2012 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ */
+
+#include "drmP.h"
+#include "i915_drm.h"
+#include "i915_drv.h"
+#include "i915_trace.h"
+#include "intel_drv.h"
+#include <linux/mmu_context.h>
+#include <linux/mmu_notifier.h>
+#include <linux/mempolicy.h>
+#include <linux/swap.h>
+
+#if defined(CONFIG_MMU_NOTIFIER)
+static void i915_gem_userptr_mn_invalidate_range_start(struct mmu_notifier *mn,
+						       struct mm_struct *mm,
+						       unsigned long start,
+						       unsigned long end)
+{
+	struct drm_i915_gem_object *obj = container_of(mn, struct drm_i915_gem_object, userptr.mn);
+	struct drm_device *dev = obj->base.dev;
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	struct i915_vma *vma, *tmp;
+	bool was_interruptible;
+	int ret;
+
+	BUG_ON(obj->userptr.mm != mm);
+
+	if (obj->userptr.ptr >= end ||
+	    obj->userptr.ptr + obj->base.size <= start)
+		return;
+
+	if (obj->pages == NULL) /* opportunistic check */
+		return;
+
+	mutex_lock(&dev->struct_mutex);
+	was_interruptible = dev_priv->mm.interruptible;
+	dev_priv->mm.interruptible = false;
+
+	list_for_each_entry_safe(vma, tmp, &obj->vma_list, vma_link) {
+		ret = i915_vma_unbind(vma);
+		WARN_ON(ret && ret != -EIO);
+	}
+	WARN_ON(i915_gem_object_put_pages(obj));
+
+	dev_priv->mm.interruptible = was_interruptible;
+	mutex_unlock(&dev->struct_mutex);
+}
+
+static void i915_gem_userptr_mn_release(struct mmu_notifier *mn,
+					struct mm_struct *mm)
+{
+	/* XXX Schedule an eventual unbind? E.g. hook into require request?
+	 * However, locking will be complicated.
+	 */
+	BUG_ON(!IS_ERR_OR_NULL(container_of(mn, struct drm_i915_gem_object, userptr.mn)->userptr.work));
+}
+
+static const struct mmu_notifier_ops i915_gem_userptr_notifier = {
+	.invalidate_range_start = i915_gem_userptr_mn_invalidate_range_start,
+	.release = i915_gem_userptr_mn_release,
+};
+
+static void
+i915_gem_userptr_release__mmu_notifier(struct drm_i915_gem_object *obj)
+{
+	if (obj->userptr.mn.ops)
+		mmu_notifier_unregister(&obj->userptr.mn, obj->userptr.mm);
+}
+
+static int
+i915_gem_userptr_init__mmu_notifier(struct drm_i915_gem_object *obj,
+				    unsigned flags)
+{
+	if (flags & I915_USERPTR_UNSYNCHRONIZED)
+		return capable(CAP_SYS_ADMIN) ? 0 : -EPERM;
+
+	obj->userptr.mn.ops = &i915_gem_userptr_notifier;
+	return mmu_notifier_register(&obj->userptr.mn, obj->userptr.mm);
+}
+
+#else
+
+static void
+i915_gem_userptr_release__mmu_notifier(struct drm_i915_gem_object *obj)
+{
+}
+
+static int
+i915_gem_userptr_init__mmu_notifier(struct drm_i915_gem_object *obj,
+				    unsigned flags)
+{
+	if ((flags & I915_USERPTR_UNSYNCHRONIZED) == 0)
+		return -ENODEV;
+
+	if (!capable(CAP_SYS_ADMIN))
+		return -EPERM;
+
+	return 0;
+}
+#endif
+
+struct get_pages_work {
+	struct work_struct work;
+	struct drm_i915_gem_object *obj;
+	struct task_struct *task;
+};
+
+static void
+__i915_gem_userptr_get_pages_worker(struct work_struct *_work)
+{
+	struct get_pages_work *work = container_of(_work, typeof(*work), work);
+	struct drm_i915_gem_object *obj = work->obj;
+	struct drm_device *dev = obj->base.dev;
+	const int num_pages = obj->base.size >> PAGE_SHIFT;
+	struct page **pvec;
+	int pinned, ret;
+
+	ret = -ENOMEM;
+	pinned = 0;
+
+	pvec = kmalloc(num_pages*sizeof(struct page *),
+		       GFP_TEMPORARY | __GFP_NOWARN | __GFP_NORETRY);
+	if (pvec == NULL)
+		pvec = drm_malloc_ab(num_pages, sizeof(struct page *));
+	if (pvec != NULL) {
+		struct mm_struct *mm = obj->userptr.mm;
+
+		down_read(&mm->mmap_sem);
+		while (pinned < num_pages) {
+			ret = get_user_pages(work->task, mm,
+					     obj->userptr.ptr + pinned * PAGE_SIZE,
+					     num_pages - pinned,
+					     !obj->userptr.read_only, 0,
+					     pvec + pinned, NULL);
+			if (ret < 0)
+				break;
+
+			pinned += ret;
+		}
+		up_read(&mm->mmap_sem);
+	}
+
+	mutex_lock(&dev->struct_mutex);
+	if (obj->pages) {
+		ret = 0;
+	} else if (pinned == num_pages) {
+		struct sg_table *st;
+
+		st = kmalloc(sizeof(*st), GFP_KERNEL);
+		if (st == NULL || sg_alloc_table(st, num_pages, GFP_KERNEL)) {
+			kfree(st);
+			ret = -ENOMEM;
+		} else {
+			struct scatterlist *sg;
+			int n;
+
+			for_each_sg(st->sgl, sg, num_pages, n)
+				sg_set_page(sg, pvec[n], PAGE_SIZE, 0);
+
+			obj->pages = st;
+			list_add_tail(&obj->global_list, &to_i915(dev)->mm.unbound_list);
+			pinned = 0;
+			ret = 0;
+		}
+	}
+
+	obj->userptr.work = ERR_PTR(ret);
+	drm_gem_object_unreference(&obj->base);
+	mutex_unlock(&dev->struct_mutex);
+
+	release_pages(pvec, pinned, 0);
+	drm_free_large(pvec);
+
+	put_task_struct(work->task);
+	kfree(work);
+}
+
+static int
+i915_gem_userptr_get_pages(struct drm_i915_gem_object *obj)
+{
+	const int num_pages = obj->base.size >> PAGE_SHIFT;
+	struct page **pvec;
+	int pinned, ret;
+
+	/* If userspace should engineer that these pages are replaced in
+	 * the vma between us binding this page into the GTT and completion
+	 * of rendering... Their loss. If they change the mapping of their
+	 * pages they need to create a new bo to point to the new vma.
+	 *
+	 * However, that still leaves open the possibility of the vma
+	 * being copied upon fork. Which falls under the same userspace
+	 * synchronisation issue as a regular bo, except that this time
+	 * the process may not be expecting that a particular piece of
+	 * memory is tied to the GPU.
+	 *
+	 * Fortunately, we can hook into the mmu_notifier in order to
+	 * discard the page references prior to anything nasty happening
+	 * to the vma (discard or cloning) which should prevent the more
+	 * egregious cases from causing harm.
+	 */
+
+	pvec = NULL;
+	pinned = 0;
+	if (obj->userptr.mm == current->mm) {
+		pvec = kmalloc(num_pages*sizeof(struct page *),
+			       GFP_TEMPORARY | __GFP_NOWARN | __GFP_NORETRY);
+		if (pvec == NULL) {
+			pvec = drm_malloc_ab(num_pages, sizeof(struct page *));
+			if (pvec == NULL)
+				return -ENOMEM;
+		}
+
+		pinned = __get_user_pages_fast(obj->userptr.ptr, num_pages,
+					       !obj->userptr.read_only, pvec);
+	}
+	if (pinned < num_pages) {
+		if (pinned < 0) {
+			ret = pinned;
+			pinned = 0;
+		} else {
+			ret = -EAGAIN;
+			if (obj->userptr.work == NULL) {
+				struct get_pages_work *work;
+
+				work = kmalloc(sizeof(*work), GFP_KERNEL);
+				if (work != NULL) {
+					obj->userptr.work = &work->work;
+
+					work->obj = obj;
+					drm_gem_object_reference(&obj->base);
+
+					work->task = current;
+					get_task_struct(work->task);
+
+					INIT_WORK(&work->work, __i915_gem_userptr_get_pages_worker);
+					//queue_work(to_i915(obj->base.dev)->wq, &work->work);
+					schedule_work(&work->work);
+				} else
+					ret = -ENOMEM;
+			} else {
+				if (IS_ERR(obj->userptr.work)) {
+					ret = PTR_ERR(obj->userptr.work);
+					obj->userptr.work = NULL;
+				}
+			}
+		}
+	} else {
+		struct sg_table *st;
+
+		st = kmalloc(sizeof(*st), GFP_KERNEL);
+		if (st == NULL || sg_alloc_table(st, num_pages, GFP_KERNEL)) {
+			kfree(st);
+			ret = -ENOMEM;
+		} else {
+			struct scatterlist *sg;
+			int n;
+
+			for_each_sg(st->sgl, sg, num_pages, n)
+				sg_set_page(sg, pvec[n], PAGE_SIZE, 0);
+
+			obj->pages = st;
+			pinned = 0;
+			ret = 0;
+		}
+	}
+
+	release_pages(pvec, pinned, 0);
+	drm_free_large(pvec);
+	return ret;
+}
+
+static void
+i915_gem_userptr_put_pages(struct drm_i915_gem_object *obj)
+{
+	struct scatterlist *sg;
+	int i;
+
+	if (obj->madv != I915_MADV_WILLNEED)
+		obj->dirty = 0;
+
+	for_each_sg(obj->pages->sgl, sg, obj->pages->nents, i) {
+		struct page *page = sg_page(sg);
+
+		if (obj->dirty)
+			set_page_dirty(page);
+
+		mark_page_accessed(page);
+		page_cache_release(page);
+	}
+	obj->dirty = 0;
+
+	sg_free_table(obj->pages);
+	kfree(obj->pages);
+}
+
+static void
+i915_gem_userptr_release(struct drm_i915_gem_object *obj)
+{
+	i915_gem_userptr_release__mmu_notifier(obj);
+
+	mmput(obj->userptr.mm);
+	obj->userptr.mm = NULL;
+}
+
+static const struct drm_i915_gem_object_ops i915_gem_userptr_ops = {
+	.get_pages = i915_gem_userptr_get_pages,
+	.put_pages = i915_gem_userptr_put_pages,
+	.release = i915_gem_userptr_release,
+};
+
+/**
+ * Creates a new mm object that wraps some normal memory from the process
+ * context - user memory.
+ *
+ * We impose several restrictions upon the memory being mapped
+ * into the GPU.
+ * 1. It must be page aligned (both start/end addresses, i.e ptr and size).
+ * 2. We only allow a bo as large as we could in theory map into the GTT,
+ *    that is we limit the size to the total size of the GTT.
+ * 3. The bo is marked as being snoopable. The backing pages are left
+ *    accessible directly by the CPU, but reads by the GPU may incur the cost
+ *    of a snoop (unless you have an LLC architecture).
+ *
+ * Synchronisation between multiple users and the GPU is left to userspace
+ * through the normal set-domain-ioctl. The kernel will enforce that the
+ * GPU relinquishes the VMA before it is returned back to the system
+ * i.e. upon free(), munmap() or process termination. However, the userspace
+ * malloc() library may not immediately relinquish the VMA after free() and
+ * instead reuse it whilst the GPU is still reading and writing to the VMA.
+ * Caveat emptor.
+ *
+ * If you think this is a good interface to use to pass GPU memory between
+ * drivers, please use dma-buf instead. In fact, wherever possible use
+ * dma-buf instead.
+ */
+int
+i915_gem_userptr_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
+{
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	struct drm_i915_gem_userptr *args = data;
+	struct drm_i915_gem_object *obj;
+	int ret;
+	u32 handle;
+
+	if (args->flags & ~(I915_USERPTR_READ_ONLY |
+			    I915_USERPTR_UNSYNCHRONIZED))
+		return -EINVAL;
+
+	if (offset_in_page(args->user_ptr | args->user_size))
+		return -EINVAL;
+
+	if (args->user_size > dev_priv->gtt.base.total)
+		return -E2BIG;
+
+	if (!access_ok(args->flags & I915_USERPTR_READ_ONLY ? VERIFY_READ : VERIFY_WRITE,
+		       (char __user *)(unsigned long)args->user_ptr, args->user_size))
+		return -EFAULT;
+
+	/* Allocate the new object */
+	obj = i915_gem_object_alloc(dev);
+	if (obj == NULL)
+		return -ENOMEM;
+
+	drm_gem_private_object_init(dev, &obj->base, args->user_size);
+	i915_gem_object_init(obj, &i915_gem_userptr_ops);
+	obj->cache_level = I915_CACHE_LLC;
+	obj->base.write_domain = I915_GEM_DOMAIN_CPU;
+	obj->base.read_domains = I915_GEM_DOMAIN_CPU;
+
+	obj->userptr.ptr = args->user_ptr;
+	obj->userptr.read_only = !!(args->flags & I915_USERPTR_READ_ONLY);
+
+	/* And keep a pointer to the current->mm for resolving the user pages
+	 * at binding. This means that we need to hook into the mmu_notifier
+	 * in order to detect if the mmu is destroyed.
+	 */
+	ret = 0;
+	obj->userptr.mm = get_task_mm(current);
+	if (obj->userptr.mm)
+		ret = i915_gem_userptr_init__mmu_notifier(obj, args->flags);
+	if (ret == 0)
+		ret = drm_gem_handle_create(file, &obj->base, &handle);
+
+	/* drop reference from allocate - handle holds it now */
+	drm_gem_object_unreference_unlocked(&obj->base);
+	if (ret)
+		return ret;
+
+	args->handle = handle;
+	return 0;
+}
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
index ae8cf61b8ce3..cce9f559e3d7 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -201,6 +201,7 @@ static void print_error_buffers(struct drm_i915_error_state_buf *m,
 		err_puts(m, tiling_flag(err->tiling));
 		err_puts(m, dirty_flag(err->dirty));
 		err_puts(m, purgeable_flag(err->purgeable));
+		err_puts(m, err->userptr ? " userptr" : "");
 		err_puts(m, err->ring != -1 ? " " : "");
 		err_puts(m, ring_str(err->ring));
 		err_puts(m, i915_cache_level_str(err->cache_level));
@@ -584,6 +585,7 @@ static void capture_bo(struct drm_i915_error_buffer *err,
 	err->tiling = obj->tiling_mode;
 	err->dirty = obj->dirty;
 	err->purgeable = obj->madv != I915_MADV_WILLNEED;
+	err->userptr = obj->userptr.mm != 0;
 	err->ring = obj->ring ? obj->ring->id : -1;
 	err->cache_level = obj->cache_level;
 }
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index 37c8073a8246..6c145a0be250 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -224,6 +224,7 @@ typedef struct _drm_i915_sarea {
 #define DRM_I915_REG_READ		0x31
 #define DRM_I915_GET_RESET_STATS	0x32
 #define DRM_I915_GEM_CREATE2		0x33
+#define DRM_I915_GEM_USERPTR		0x34
 
 #define DRM_IOCTL_I915_INIT		DRM_IOW( DRM_COMMAND_BASE + DRM_I915_INIT, drm_i915_init_t)
 #define DRM_IOCTL_I915_FLUSH		DRM_IO ( DRM_COMMAND_BASE + DRM_I915_FLUSH)
@@ -275,6 +276,7 @@ typedef struct _drm_i915_sarea {
 #define DRM_IOCTL_I915_GEM_CONTEXT_DESTROY	DRM_IOW (DRM_COMMAND_BASE + DRM_I915_GEM_CONTEXT_DESTROY, struct drm_i915_gem_context_destroy)
 #define DRM_IOCTL_I915_REG_READ			DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_REG_READ, struct drm_i915_reg_read)
 #define DRM_IOCTL_I915_GET_RESET_STATS		DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_GET_RESET_STATS, struct drm_i915_reset_stats)
+#define DRM_IOCTL_I915_GEM_USERPTR			DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_GEM_USERPTR, struct drm_i915_gem_userptr)
 
 /* Allow drivers to submit batchbuffers directly to hardware, relying
  * on the security mechanisms provided by hardware.
@@ -1129,4 +1131,18 @@ struct drm_i915_reset_stats {
 	__u32 pad;
 };
 
+struct drm_i915_gem_userptr {
+	__u64 user_ptr;
+	__u64 user_size;
+	__u32 flags;
+#define I915_USERPTR_READ_ONLY 0x1
+#define I915_USERPTR_UNSYNCHRONIZED 0x80000000
+	/**
+	 * Returned handle for the object.
+	 *
+	 * Object handles are nonzero.
+	 */
+	__u32 handle;
+};
+
 #endif /* _UAPI_I915_DRM_H_ */
-- 
1.8.5.2

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 34+ messages in thread

* [PATCH] drm/i915: Introduce mapping of user pages into video memory (userptr) ioctl
@ 2013-08-14 10:59 Chris Wilson
  0 siblings, 0 replies; 34+ messages in thread
From: Chris Wilson @ 2013-08-14 10:59 UTC (permalink / raw)
  To: intel-gfx

By exporting the ability to map user address and inserting PTEs
representing their backing pages into the GTT, we can exploit UMA in order
to utilize normal application data as a texture source or even as a
render target (depending upon the capabilities of the chipset). This has
a number of uses, with zero-copy downloads to the GPU and efficient
readback making the intermixed streaming of CPU and GPU operations
fairly efficient. This ability has many widespread implications from
faster rendering of client-side software rasterisers (chromium),
mitigation of stalls due to read back (firefox) and to faster pipelining
of texture data (such as pixel buffer objects in GL or data blobs in CL).

v2: Compile with CONFIG_MMU_NOTIFIER
v3: We can sleep while performing invalidate-range, which we can utilise
to drop our page references prior to the kernel manipulating the vma
(for either discard or cloning) and so protect normal users.
v4: Only run the invalidate notifier if the range intercepts the bo.
v5: Prevent userspace from attempting to GTT mmap non-page aligned buffers
v6: Recheck after reacquire mutex for lost mmu.
v7: Fix implicit padding of ioctl struct by rounding to next 64bit boundary.
v8: Fix rebasing error after forwarding porting the back port.
v9: Limit the userptr to page aligned entries. We now expect userspace
    to handle all the offset-in-page adjustments itself.
v10: Prevent vma from being copied across fork to avoid issues with cow.

NB: We still have performance concerns over the use of the linear lists
and unfiltered notifies in mmu_notifier which do not scale to our use
case, where we may have many thousands of objects being tracked.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/Makefile           |    1 +
 drivers/gpu/drm/i915/i915_dma.c         |    1 +
 drivers/gpu/drm/i915/i915_drv.h         |   18 +-
 drivers/gpu/drm/i915/i915_gem.c         |    3 +
 drivers/gpu/drm/i915/i915_gem_userptr.c |  416 +++++++++++++++++++++++++++++++
 drivers/gpu/drm/i915/i915_gpu_error.c   |    2 +
 include/uapi/drm/i915_drm.h             |   16 ++
 7 files changed, 456 insertions(+), 1 deletion(-)
 create mode 100644 drivers/gpu/drm/i915/i915_gem_userptr.c

diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
index 9d498e5..2369cfe 100644
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -16,6 +16,7 @@ i915-y := i915_drv.o i915_dma.o i915_irq.o \
 	  i915_gem_gtt.o \
 	  i915_gem_stolen.o \
 	  i915_gem_tiling.o \
+	  i915_gem_userptr.o \
 	  i915_sysfs.o \
 	  i915_trace_points.o \
 	  i915_ums.o \
diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
index 3cab741..23a9374 100644
--- a/drivers/gpu/drm/i915/i915_dma.c
+++ b/drivers/gpu/drm/i915/i915_dma.c
@@ -1894,6 +1894,7 @@ const struct drm_ioctl_desc i915_ioctls[] = {
 	DRM_IOCTL_DEF_DRV(I915_GEM_CONTEXT_DESTROY, i915_gem_context_destroy_ioctl, DRM_UNLOCKED),
 	DRM_IOCTL_DEF_DRV(I915_REG_READ, i915_reg_read_ioctl, DRM_UNLOCKED),
 	DRM_IOCTL_DEF_DRV(I915_GEM_CREATE2, i915_gem_create2_ioctl, DRM_AUTH|DRM_UNLOCKED),
+	DRM_IOCTL_DEF_DRV(I915_GEM_USERPTR, i915_gem_userptr_ioctl, DRM_UNLOCKED),
 };
 
 int i915_max_ioctl = DRM_ARRAY_SIZE(i915_ioctls);
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index c2da7e9..d6c6626 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -43,6 +43,7 @@
 #include <linux/intel-iommu.h>
 #include <linux/kref.h>
 #include <linux/pm_qos.h>
+#include <linux/mmu_notifier.h>
 
 /* General customization:
  */
@@ -320,6 +321,7 @@ struct drm_i915_error_state {
 		u32 tiling:2;
 		u32 dirty:1;
 		u32 purgeable:1;
+		u32 userptr:1;
 		s32 ring:4;
 		u32 cache_level:2;
 	} **active_bo, **pinned_bo;
@@ -1296,6 +1298,7 @@ struct drm_i915_gem_object_ops {
 	 */
 	int (*get_pages)(struct drm_i915_gem_object *);
 	void (*put_pages)(struct drm_i915_gem_object *);
+	void (*release)(struct drm_i915_gem_object *);
 };
 
 struct drm_i915_gem_object {
@@ -1425,9 +1428,20 @@ struct drm_i915_gem_object {
 
 	/** for phy allocated objects */
 	struct drm_i915_gem_phys_object *phys_obj;
+
+	union {
+		struct i915_gem_userptr {
+			uintptr_t ptr;
+			bool read_only;
+
+			struct mm_struct *mm;
+#if defined(CONFIG_MMU_NOTIFIER)
+			struct mmu_notifier mn;
+#endif
+		} userptr;
+	};
 };
 #define to_gem_object(obj) (&((struct drm_i915_gem_object *)(obj))->base)
-
 #define to_intel_bo(x) container_of(x, struct drm_i915_gem_object, base)
 
 /**
@@ -1733,6 +1747,8 @@ int i915_gem_set_tiling_ioctl(struct drm_device *dev, void *data,
 			      struct drm_file *file_priv);
 int i915_gem_get_tiling_ioctl(struct drm_device *dev, void *data,
 			      struct drm_file *file_priv);
+int i915_gem_userptr_ioctl(struct drm_device *dev, void *data,
+			   struct drm_file *file);
 int i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data,
 				struct drm_file *file_priv);
 int i915_gem_wait_ioctl(struct drm_device *dev, void *data,
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index af7b8f4..0244c3e 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -4200,6 +4200,9 @@ void i915_gem_free_object(struct drm_gem_object *gem_obj)
 	if (obj->base.import_attach)
 		drm_prime_gem_destroy(&obj->base, NULL);
 
+	if (obj->ops->release)
+		obj->ops->release(obj);
+
 	drm_gem_object_release(&obj->base);
 	i915_gem_info_remove_obj(dev_priv, obj->base.size);
 
diff --git a/drivers/gpu/drm/i915/i915_gem_userptr.c b/drivers/gpu/drm/i915/i915_gem_userptr.c
new file mode 100644
index 0000000..3859ad3
--- /dev/null
+++ b/drivers/gpu/drm/i915/i915_gem_userptr.c
@@ -0,0 +1,416 @@
+/*
+ * Copyright © 2012 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ */
+
+#include "drmP.h"
+#include "i915_drm.h"
+#include "i915_drv.h"
+#include "i915_trace.h"
+#include "intel_drv.h"
+#include <linux/mmu_notifier.h>
+#include <linux/mempolicy.h>
+#include <linux/swap.h>
+
+#if defined(CONFIG_MMU_NOTIFIER)
+static void i915_gem_userptr_mn_invalidate_range_start(struct mmu_notifier *mn,
+						       struct mm_struct *mm,
+						       unsigned long start,
+						       unsigned long end)
+{
+	struct drm_i915_gem_object *obj;
+	struct drm_device *dev;
+
+	/* XXX race between obj unref and mmu notifier? */
+	obj = container_of(mn, struct drm_i915_gem_object, userptr.mn);
+	BUG_ON(obj->userptr.mm != mm);
+
+	if (obj->userptr.ptr >= end ||
+	    obj->userptr.ptr + obj->base.size <= start)
+		return;
+
+	if (obj->pages == NULL) /* opportunistic check */
+		return;
+
+	dev = obj->base.dev;
+	mutex_lock(&dev->struct_mutex);
+	if (i915_gem_obj_bound_any(obj)) {
+		struct drm_i915_private *dev_priv = dev->dev_private;
+		struct i915_vma *vma;
+		bool was_interruptible;
+		int ret;
+
+		was_interruptible = dev_priv->mm.interruptible;
+		dev_priv->mm.interruptible = false;
+
+		list_for_each_entry(vma, &obj->vma_list, vma_link) {
+			ret = i915_vma_unbind(vma);
+			WARN_ON(ret && ret != -EIO);
+		}
+
+		dev_priv->mm.interruptible = was_interruptible;
+	}
+
+	WARN_ON(i915_gem_object_put_pages(obj));
+	mutex_unlock(&dev->struct_mutex);
+}
+
+static void i915_gem_userptr_mn_release(struct mmu_notifier *mn,
+					struct mm_struct *mm)
+{
+	struct drm_i915_gem_object *obj;
+
+	obj = container_of(mn, struct drm_i915_gem_object, userptr.mn);
+	BUG_ON(obj->userptr.mm != mm);
+	obj->userptr.mm = NULL;
+
+	/* XXX Schedule an eventual unbind? E.g. hook into require request?
+	 * However, locking will be complicated.
+	 */
+}
+
+static const struct mmu_notifier_ops i915_gem_userptr_notifier = {
+	.invalidate_range_start = i915_gem_userptr_mn_invalidate_range_start,
+	.release = i915_gem_userptr_mn_release,
+};
+
+static void
+i915_gem_userptr_release__mmu_notifier(struct drm_i915_gem_object *obj)
+{
+	if (obj->userptr.mn.ops && obj->userptr.mm) {
+		mmu_notifier_unregister(&obj->userptr.mn, obj->userptr.mm);
+		obj->userptr.mm = NULL;
+	}
+}
+
+static int
+i915_gem_userptr_init__mmu_notifier(struct drm_i915_gem_object *obj,
+				    unsigned flags)
+{
+	if (flags & I915_USERPTR_UNSYNCHRONIZED)
+		return capable(CAP_SYS_ADMIN) ? 0 : -EPERM;
+
+	obj->userptr.mn.ops = &i915_gem_userptr_notifier;
+	return mmu_notifier_register(&obj->userptr.mn, obj->userptr.mm);
+}
+
+#else
+
+static void
+i915_gem_userptr_release__mmu_notifier(struct drm_i915_gem_object *obj)
+{
+}
+
+static int
+i915_gem_userptr_init__mmu_notifier(struct drm_i915_gem_object *obj,
+				    unsigned flags)
+{
+	if ((flags & I915_USERPTR_UNSYNCHRONIZED) == 0)
+		return -ENODEV;
+
+	if (!capable(CAP_SYS_ADMIN))
+		return -EPERM;
+
+	return 0;
+}
+#endif
+
+static int
+__i915_gem_userptr_set_vma_behaviour(struct drm_i915_gem_object *obj,
+				     unsigned behaviour)
+{
+	struct mm_struct *mm = obj->userptr.mm;
+	unsigned long start = obj->userptr.ptr;
+	unsigned long end = start + obj->base.size;
+	struct vm_area_struct *vma, *prev;
+	unsigned new_flags;
+	int ret = 0;
+
+	down_write(&mm->mmap_sem);
+
+	vma = find_vma_prev(mm, start, &prev);
+	if (vma && start > vma->vm_start)
+		prev = vma;
+
+	new_flags = vma->vm_flags;
+	switch (behaviour) {
+	case MADV_DOFORK:
+		new_flags &= ~VM_DONTCOPY;
+		break;
+	case MADV_DONTFORK:
+		new_flags |= VM_DONTCOPY;
+		break;
+	}
+	if (vma->vm_flags == new_flags)
+		goto unlock;
+
+	prev = vma_merge(mm, prev, start, end, new_flags,
+			 vma->anon_vma, vma->vm_file,
+			 vma->vm_pgoff + ((start - vma->vm_start) >> PAGE_SHIFT),
+			 vma_policy(vma));
+	if (prev) {
+		vma = prev;
+	} else {
+		if (start != vma->vm_start) {
+			ret = split_vma(mm, vma, start, 1);
+			if (ret)
+				goto unlock;
+		}
+
+		if (end != vma->vm_end) {
+			ret = split_vma(mm, vma, end, 0);
+			if (ret)
+				goto unlock;
+		}
+	}
+
+	vma->vm_flags = new_flags;
+
+unlock:
+	up_write(&mm->mmap_sem);
+	return ret;
+}
+
+static int
+i915_gem_userptr_init__vma_behaviour(struct drm_i915_gem_object *obj,
+				     unsigned flags)
+{
+	if (flags & I915_USERPTR_UNSYNCHRONIZED)
+		return 0;
+
+	return __i915_gem_userptr_set_vma_behaviour(obj, MADV_DONTFORK);
+}
+
+static int
+i915_gem_userptr_release__vma_behaviour(struct drm_i915_gem_object *obj)
+{
+	/* XXX How to track/prevent user changes? */
+	return __i915_gem_userptr_set_vma_behaviour(obj, MADV_DOFORK);
+}
+
+static int
+i915_gem_userptr_get_pages(struct drm_i915_gem_object *obj)
+{
+	int num_pages = obj->base.size >> PAGE_SHIFT;
+	struct sg_table *st;
+	struct scatterlist *sg;
+	struct page **pvec;
+	int n, pinned, ret;
+
+	if (obj->userptr.mm == NULL)
+		return -EFAULT;
+
+	if (!access_ok(obj->userptr.read_only ? VERIFY_READ : VERIFY_WRITE,
+		       (char __user *)obj->userptr.ptr, obj->base.size))
+		return -EFAULT;
+
+	/* If userspace should engineer that these pages are replaced in
+	 * the vma between us binding this page into the GTT and completion
+	 * of rendering... Their loss. If they change the mapping of their
+	 * pages they need to create a new bo to point to the new vma.
+	 *
+	 * However, that still leaves open the possibility of the vma
+	 * being copied upon fork. Which falls under the same userspace
+	 * synchronisation issue as a regular bo, except that this time
+	 * the process may not be expecting that a particular piece of
+	 * memory is tied to the GPU.
+	 *
+	 * Fortunately, we can hook into the mmu_notifier in order to
+	 * discard the page references prior to anything nasty happening
+	 * to the vma (discard or cloning) which should prevent the more
+	 * egregious cases from causing harm.
+	 */
+
+	pvec = kmalloc(num_pages*sizeof(struct page *),
+		       GFP_KERNEL | __GFP_NOWARN | __GFP_NORETRY);
+	if (pvec == NULL) {
+		pvec = drm_malloc_ab(num_pages, sizeof(struct page *));
+		if (pvec == NULL)
+			return -ENOMEM;
+	}
+
+	pinned = 0;
+	if (obj->userptr.mm == current->mm)
+		pinned = __get_user_pages_fast(obj->userptr.ptr, num_pages,
+					       !obj->userptr.read_only, pvec);
+	if (pinned < num_pages) {
+		struct mm_struct *mm = obj->userptr.mm;
+
+		mutex_unlock(&obj->base.dev->struct_mutex);
+		down_read(&mm->mmap_sem);
+
+		ret = get_user_pages(current, mm,
+				     obj->userptr.ptr + (pinned << PAGE_SHIFT),
+				     num_pages - pinned,
+				     !obj->userptr.read_only, 0,
+				     pvec + pinned,
+				     NULL);
+
+		up_read(&mm->mmap_sem);
+		mutex_lock(&obj->base.dev->struct_mutex);
+
+		if (ret > 0)
+			pinned += ret;
+
+		if (obj->userptr.mm == NULL || obj->pages || pinned < num_pages) {
+			ret = obj->pages ? 0 : -EFAULT;
+			goto cleanup_pinned;
+		}
+	}
+
+	st = kmalloc(sizeof(*st), GFP_KERNEL);
+	if (st == NULL) {
+		ret = -ENOMEM;
+		goto cleanup_pinned;
+	}
+
+	if (sg_alloc_table(st, num_pages, GFP_KERNEL)) {
+		ret = -ENOMEM;
+		goto cleanup_st;
+	}
+
+	for_each_sg(st->sgl, sg, num_pages, n)
+		sg_set_page(sg, pvec[n], PAGE_SIZE, 0);
+	drm_free_large(pvec);
+
+	obj->pages = st;
+	return 0;
+
+cleanup_st:
+	kfree(st);
+cleanup_pinned:
+	release_pages(pvec, pinned, 0);
+	drm_free_large(pvec);
+	return ret;
+}
+
+static void
+i915_gem_userptr_put_pages(struct drm_i915_gem_object *obj)
+{
+	struct scatterlist *sg;
+	int i;
+
+	if (obj->madv != I915_MADV_WILLNEED)
+		obj->dirty = 0;
+
+	for_each_sg(obj->pages->sgl, sg, obj->pages->nents, i) {
+		struct page *page = sg_page(sg);
+
+		if (obj->dirty)
+			set_page_dirty(page);
+
+		mark_page_accessed(page);
+		page_cache_release(page);
+	}
+	obj->dirty = 0;
+
+	sg_free_table(obj->pages);
+	kfree(obj->pages);
+}
+
+static void
+i915_gem_userptr_release(struct drm_i915_gem_object *obj)
+{
+	i915_gem_userptr_release__mmu_notifier(obj);
+	i915_gem_userptr_release__vma_behaviour(obj);
+}
+
+static const struct drm_i915_gem_object_ops i915_gem_userptr_ops = {
+	.get_pages = i915_gem_userptr_get_pages,
+	.put_pages = i915_gem_userptr_put_pages,
+	.release = i915_gem_userptr_release,
+};
+
+/**
+ * Creates a new mm object that wraps some user memory.
+ *
+ * We impose several restrictions upon the memory being mapped
+ * into the GPU.
+ * 1. It must be page aligned (both start/end addresses, i.e ptr and size).
+ * 2. We mark the pages as MADV_DONTFORK so that they will not be cloned
+ *    to child processes.
+ * 3. We only allow a bo as large as we could in theory map into the GTT,
+ *    that is we limit the size to the size of the GTT.
+ * 4. The bo is marked as being snoopable.
+ *
+ * Synchronisation between multiple users and the GPU is left to userspace
+ * through the normal set-domain-ioctl. The kernel will enforce that the
+ * GPU relinquishes the VMA before it is returned back to the system
+ * i.e. upon free(), munmap() or process termination. However, the userspace
+ * malloc() library may not immediately relinquish the VMA after free() and
+ * instead reuse it whilst the GPU is still reading and writing to the VMA.
+ * Caveat emptor.
+ *
+ * If you think this is a good interface to use to pass GPU memory between
+ * drivers, please use dma-buf instead. In fact, wherever possible use
+ * dma-buf instead.
+ */
+int
+i915_gem_userptr_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
+{
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	struct drm_i915_gem_userptr *args = data;
+	struct drm_i915_gem_object *obj;
+	int ret;
+	u32 handle;
+
+	if (args->flags & ~(I915_USERPTR_READ_ONLY |
+			    I915_USERPTR_UNSYNCHRONIZED))
+		return -EINVAL;
+
+	if (offset_in_page(args->user_ptr | args->user_size))
+		return -EINVAL;
+
+	if (args->user_size > dev_priv->gtt.mappable_end)
+		return -E2BIG;
+
+	/* Allocate the new object */
+	obj = i915_gem_object_alloc(dev);
+	if (obj == NULL)
+		return -ENOMEM;
+
+	drm_gem_private_object_init(dev, &obj->base, args->user_size);
+	i915_gem_object_init(obj, &i915_gem_userptr_ops);
+	obj->cache_level = I915_CACHE_LLC;
+
+	obj->userptr.ptr = args->user_ptr;
+	obj->userptr.read_only = !!(args->flags & I915_USERPTR_READ_ONLY);
+
+	/* And keep a pointer to the current->mm for resolving the user pages
+	 * at binding. This means that we need to hook into the mmu_notifier
+	 * in order to detect if the mmu is destroyed.
+	 */
+	obj->userptr.mm = current->mm;
+	ret = i915_gem_userptr_init__mmu_notifier(obj, args->flags);
+	if (ret == 0)
+		ret = i915_gem_userptr_init__vma_behaviour(obj, args->flags);
+	if (ret == 0)
+		ret = drm_gem_handle_create(file, &obj->base, &handle);
+
+	/* drop reference from allocate - handle holds it now */
+	drm_gem_object_unreference(&obj->base);
+	if (ret)
+		return ret;
+
+	args->handle = handle;
+	return 0;
+}
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
index 558e568..fe47274 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -199,6 +199,7 @@ static void print_error_buffers(struct drm_i915_error_state_buf *m,
 		err_puts(m, tiling_flag(err->tiling));
 		err_puts(m, dirty_flag(err->dirty));
 		err_puts(m, purgeable_flag(err->purgeable));
+		err_puts(m, err->userptr ? " userptr" : "");
 		err_puts(m, err->ring != -1 ? " " : "");
 		err_puts(m, ring_str(err->ring));
 		err_puts(m, i915_cache_level_str(err->cache_level));
@@ -554,6 +555,7 @@ static void capture_bo(struct drm_i915_error_buffer *err,
 	err->tiling = obj->tiling_mode;
 	err->dirty = obj->dirty;
 	err->purgeable = obj->madv != I915_MADV_WILLNEED;
+	err->userptr = obj->userptr.mm != 0;
 	err->ring = obj->ring ? obj->ring->id : -1;
 	err->cache_level = obj->cache_level;
 }
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index 9a1cfba..6d5f963 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -223,6 +223,7 @@ typedef struct _drm_i915_sarea {
 #define DRM_I915_GEM_GET_CACHING	0x30
 #define DRM_I915_REG_READ		0x31
 #define DRM_I915_GEM_CREATE2		0x32
+#define DRM_I915_GEM_USERPTR		0x33
 
 #define DRM_IOCTL_I915_INIT		DRM_IOW( DRM_COMMAND_BASE + DRM_I915_INIT, drm_i915_init_t)
 #define DRM_IOCTL_I915_FLUSH		DRM_IO ( DRM_COMMAND_BASE + DRM_I915_FLUSH)
@@ -273,6 +274,7 @@ typedef struct _drm_i915_sarea {
 #define DRM_IOCTL_I915_GEM_CONTEXT_CREATE	DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_GEM_CONTEXT_CREATE, struct drm_i915_gem_context_create)
 #define DRM_IOCTL_I915_GEM_CONTEXT_DESTROY	DRM_IOW (DRM_COMMAND_BASE + DRM_I915_GEM_CONTEXT_DESTROY, struct drm_i915_gem_context_destroy)
 #define DRM_IOCTL_I915_REG_READ			DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_REG_READ, struct drm_i915_reg_read)
+#define DRM_IOCTL_I915_GEM_USERPTR			DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_GEM_USERPTR, struct drm_i915_gem_userptr)
 
 /* Allow drivers to submit batchbuffers directly to hardware, relying
  * on the security mechanisms provided by hardware.
@@ -1110,4 +1112,18 @@ struct drm_i915_reg_read {
 	__u64 offset;
 	__u64 val; /* Return value */
 };
+
+struct drm_i915_gem_userptr {
+	__u64 user_ptr;
+	__u64 user_size;
+	__u32 flags;
+#define I915_USERPTR_READ_ONLY 0x1
+#define I915_USERPTR_UNSYNCHRONIZED 0x80000000
+	/**
+	 * Returned handle for the object.
+	 *
+	 * Object handles are nonzero.
+	 */
+	__u32 handle;
+};
 #endif /* _UAPI_I915_DRM_H_ */
-- 
1.7.9.5

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 34+ messages in thread

* Re: [PATCH] drm/i915: Introduce mapping of user pages into video memory (userptr) ioctl
  2013-04-08 19:24     ` Daniel Vetter
  2013-04-08 21:48       ` Chris Wilson
  2013-04-08 22:06       ` Eric Anholt
@ 2013-06-24 21:36       ` Jesse Barnes
  2 siblings, 0 replies; 34+ messages in thread
From: Jesse Barnes @ 2013-06-24 21:36 UTC (permalink / raw)
  To: Daniel Vetter; +Cc: intel-gfx

On Mon, 8 Apr 2013 21:24:58 +0200
Daniel Vetter <daniel@ffwll.ch> wrote:

> On Mon, Apr 8, 2013 at 7:40 PM, Chris Wilson <chris@chris-wilson.co.uk> wrote:
> > On Mon, Apr 08, 2013 at 07:18:11PM +0200, Daniel Vetter wrote:
> >> On Tue, Feb 12, 2013 at 02:17:22PM +0000, Chris Wilson wrote:
> >> > By exporting the ability to map user address and inserting PTEs
> >> > representing their backing pages into the GTT, we can exploit UMA in order
> >> > to utilize normal application data as a texture source or even as a
> >> > render target (depending upon the capabilities of the chipset). This has
> >> > a number of uses, with zero-copy downloads to the GPU and efficient
> >> > readback making the intermixed streaming of CPU and GPU operations
> >> > fairly efficient. This ability has many widespread implications from
> >> > faster rendering of client-side software rasterisers (chromium),
> >> > mitigation of stalls due to read back (firefox) and to faster pipelining
> >> > of texture data (such as pixel buffer objects in GL or data blobs in CL).
> >> >
> >> > v2: Compile with CONFIG_MMU_NOTIFIER
> >> > v3: We can sleep while performing invalidate-range, which we can utilise
> >> > to drop our page references prior to the kernel manipulating the vma
> >> > (for either discard or cloning) and so protect normal users.
> >> > v4: Only run the invalidate notifier if the range intercepts the bo.
> >> > v5: Prevent userspace from attempting to GTT mmap non-page aligned buffers
> >> >
> >> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> >>
> >> Quick bikeshed:
> >> - Still not really in favour of the in-page gtt offset handling ... I
> >>   still think that this is just a fancy mmap api, and it better reject
> >>   attempts to not map anything aligned to a full page outright.
> >
> > Strongly disagree.
> 
> Ok, let's dig out the beaten arguments here ;-)
> - Imo the gtt_offset frobbery is a bit fragile, and getting this right
> in the face of ppgtt won't make it better. And yes the only reason we
> still have that field is that you've shot down any patch to remove it
> citing userptr here. So "it's here already" doesn't count ;-)
> - Userptr for i915 is an mmap interface, and that works on pages,
> lying to userspace isn't great.
> - I don't see why userspace can't do this themselves. I've seen that
> it makes things easier in SNA/X, but for a general purpose interface
> that argument doesn't cut too much.
> - I'm also a bit afraid that our code implicitly assumes that
> size/offset are always page-aligned and I kinda want to avoid that we
> have to audit for such issues from here on. We've blown up in the past
> assuming that size > 0 already, I think we're set to blow up on this
> one here.
> 
> In any case, if you really want to stick to this I want this to be
> explictly track in an obj->reloc_gtt_offset_adjustment or something
> which is very loudly yelling at people to make sure no one trips over
> it. Tracking the adjustment in a separate field, which would only ever
> be used in the reloc code would address all my concerns (safe for the
> api ugliness one).

Resurrecting this again.

I'm of two minds on the API here: on the one hand, it can be nicer for
the kernel to handle this stuff if it can be done easily, and save
userspace the trouble.  But OTOH, consistent with existing page based
interfaces makes things a little less jarring...

> 
> >> - I915_USERPTR_UNSYNCHRONIZED ... eeek. That means that despite everyone
> >>   having mmu notifiers enabled in their distro config, you make sure sna
> >>   doesn't hit it. Imo not enough testing coverage ;-) Or this there
> >>   another reason behind this than "mmu notifiers are too slow"?
> >>
> >>   Generally I'm a bit sloppy with going root-only for legacy X stuff (like
> >>   scanline waits), but this here looks very much generally useful. So not
> >>   exemption-material imo.
> >
> > Strongly disagree. Most of my machines do not have mmu-notifiers and
> > would still like to benefit from userptr and I see no reason why we need
> > to force mmu-notifiers.
> 
> Note that I didn't shout against the mmu_notifier-less support
> (although I'm honestly not too happy about it), what I don't like is
> the override bit disabling the mmu_notifiers even if we have them.
> Since that will mean that the code won't be tested through SNA, and so
> has a good chance of being buggy. Once mesa comes around and uses it,
> it'll nicely blow up. And one of the reason Jesse is breathing down my
> neck to merge this is "other guys are interested in this at intel".

I think we'll need good test cases to cover things regardless.  And
yes, an mmu notifier version that doesn't require root would be more
generally useful than a root only interface (or are those items
unrelated at this point?).  Having a flag for root only operation for
clients that know what they're doing is fine though, IMO.

I think one of the nice use cases the Mesa guys have is to save an
extra copy in glReadPixels for certain types of objects, which means
non-root is a requirement.  And it seems like we could do a
glReadPixels extension that would end up being zero copy with this
interface (provided it had pixel accessor functions too to deal with
de-swizzling).

> 
> >> - On a quick read I've seen some gtt mmap support remnants. This scares
> >>   me, a roundabout njet! would appease. Though I think that should already
> >>   happen with the checks we have to reject snoopable buffers?
> >
> > That's because there are platforms where it is theorectically possible
> > and whilst I have no use case for it, I wanted to make it work
> > nevertheless. I still think it is possible, but I could not see a way to
> > do so without completely replacing the drm vm code.
> 
> If I understand things correctly we should be able to block this
> simply by refusing to create an mmap offset for a userptr backed
> object.

Presumably the user has it mapped already through some other means, so
this shouldn't be a big limitation.  Would it simplify things a lot
and/or drop a lot of code?

> >> - I think we should reject set_cacheing requests on usersptr objects.
> >
> > I don't think that is strictly required, just like we should not limit
> > the user from using set_tiling. (Though the user is never actually going
> > to tell the kernel about such tiling...).
> 
> Yeah, I guess we could allow people to shot their foot off. Otoh it
> adds another dimension to the userptr interface, which we need to make
> sure it works. Similar besides set_tiling is probably also
> prime_export.
> 
> The reason I'd prefer to lock things down is that we have fairly nice
> coverage for normal gem objects wrt exercising corner-cases with igt.
> If we don't disallow the same corner-cases with userptr, I'd prefer
> the tests to also cover those ... Disallowing is cheaper ;-)

Do we have a good use case for allowing the tiling and/or caching
requests?  If not, yeah I'd just say punt on it for now.

So, any chance we can land this for 3.12?  Would be nice to have a text
file or something describing usage too...

-- 
Jesse Barnes, Intel Open Source Technology Center

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH] drm/i915: Introduce mapping of user pages into video memory (userptr) ioctl
  2013-04-08 21:48       ` Chris Wilson
@ 2013-04-15 18:37         ` Daniel Vetter
  0 siblings, 0 replies; 34+ messages in thread
From: Daniel Vetter @ 2013-04-15 18:37 UTC (permalink / raw)
  To: Chris Wilson, Daniel Vetter, intel-gfx

On Mon, Apr 8, 2013 at 11:48 PM, Chris Wilson <chris@chris-wilson.co.uk> wrote:
> On Mon, Apr 08, 2013 at 09:24:58PM +0200, Daniel Vetter wrote:
>> On Mon, Apr 8, 2013 at 7:40 PM, Chris Wilson <chris@chris-wilson.co.uk> wrote:
>> > On Mon, Apr 08, 2013 at 07:18:11PM +0200, Daniel Vetter wrote:
>> >> On Tue, Feb 12, 2013 at 02:17:22PM +0000, Chris Wilson wrote:
>> >> > By exporting the ability to map user address and inserting PTEs
>> >> > representing their backing pages into the GTT, we can exploit UMA in order
>> >> > to utilize normal application data as a texture source or even as a
>> >> > render target (depending upon the capabilities of the chipset). This has
>> >> > a number of uses, with zero-copy downloads to the GPU and efficient
>> >> > readback making the intermixed streaming of CPU and GPU operations
>> >> > fairly efficient. This ability has many widespread implications from
>> >> > faster rendering of client-side software rasterisers (chromium),
>> >> > mitigation of stalls due to read back (firefox) and to faster pipelining
>> >> > of texture data (such as pixel buffer objects in GL or data blobs in CL).
>> >> >
>> >> > v2: Compile with CONFIG_MMU_NOTIFIER
>> >> > v3: We can sleep while performing invalidate-range, which we can utilise
>> >> > to drop our page references prior to the kernel manipulating the vma
>> >> > (for either discard or cloning) and so protect normal users.
>> >> > v4: Only run the invalidate notifier if the range intercepts the bo.
>> >> > v5: Prevent userspace from attempting to GTT mmap non-page aligned buffers
>> >> >
>> >> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
>> >>
>> >> Quick bikeshed:
>> >> - Still not really in favour of the in-page gtt offset handling ... I
>> >>   still think that this is just a fancy mmap api, and it better reject
>> >>   attempts to not map anything aligned to a full page outright.
>> >
>> > Strongly disagree.
>>
>> Ok, let's dig out the beaten arguments here ;-)
>> - Imo the gtt_offset frobbery is a bit fragile, and getting this right
>> in the face of ppgtt won't make it better. And yes the only reason we
>> still have that field is that you've shot down any patch to remove it
>> citing userptr here. So "it's here already" doesn't count ;-)
>> - Userptr for i915 is an mmap interface, and that works on pages,
>> lying to userspace isn't great.
>
> No. Due to the nature of existing decades old userspace, I need to map
> byte ranges, so I do not view this as a simple equivalence to mmapping
> a bo.

See below, I need to roll this up from behind ...

>> - I don't see why userspace can't do this themselves. I've seen that
>> it makes things easier in SNA/X, but for a general purpose interface
>> that argument doesn't cut too much.
>
> I have completely opposite viewpoint: A general purpose interface is not
> a page interface, and that this interface trades a small amount of
> kernel complexity (tracking the offset_in_page) so that userspace has a
> flexible interface that matches its requirements.

mmap is the general-purpose map something into cpu address space
thingy, and it works on pages, too. So still don't buy this, and Eric
seems to agree.

But anyway, if you're convinced I'm grumpily ok with those semantics.

>> - I'm also a bit afraid that our code implicitly assumes that
>> size/offset are always page-aligned and I kinda want to avoid that we
>> have to audit for such issues from here on. We've blown up in the past
>> assuming that size > 0 already, I think we're set to blow up on this
>> one here.
>
> Now that we can distinguish between size and num_pages, there is no
> longer a need for size to be page aligned (and is currently redundant).
>
>> In any case, if you really want to stick to this I want this to be
>> explictly track in an obj->reloc_gtt_offset_adjustment or something
>
> Sure, let's call it obj->gtt_offset:12;

As long as it's something with the high 20 bits zero I'm ok. Since
with ppgtt we'll soon have tons of different ->gtt_offsets, so with
your current approach we either need to keep this offset at different
places (and I'd really really dislike to do that, see all the stuff
I've been fighting in modeset land). So I want this separate and
explicit.

>> which is very loudly yelling at people to make sure no one trips over
>> it. Tracking the adjustment in a separate field, which would only ever
>> be used in the reloc code would address all my concerns (safe for the
>> api ugliness one).
>
> And everywhere that deals in GTT addresses.

I've ignored the other cases since I don't see a use-case, but that's
a point to be address more below. So looking at other places we use
gtt address I see two major pieces
- pwrite/pread: Since we also need to deal in struct pages for cpu
access it's easiest to just add the static offset at the beginning.
Again, much clearer when this offset is explicit and stored someplace
separate.
- gtt mmaps: I have no idea how you plan to coax the mmap syscal into
cooperation here.

So addressing your point above that SNA has to deal with 25+ years of
userspace legacy: I really want this to be explicitly done in the
kernel in all the places we need it. At which point I honestly don't
understand why that special offset can't be moved into the kgem
abstraction. For normal objects it would be 0, for un-aligned userptr
objects it would be non-0. In all cases kgem would add that offset
when emitting a relocation.

Now if the argument is that this adds measurable overhead to
relocation emitting, I want this overhead to be measured to justify
adding this to the kernel.

>> >> - I915_USERPTR_UNSYNCHRONIZED ... eeek. That means that despite everyone
>> >>   having mmu notifiers enabled in their distro config, you make sure sna
>> >>   doesn't hit it. Imo not enough testing coverage ;-) Or this there
>> >>   another reason behind this than "mmu notifiers are too slow"?
>> >>
>> >>   Generally I'm a bit sloppy with going root-only for legacy X stuff (like
>> >>   scanline waits), but this here looks very much generally useful. So not
>> >>   exemption-material imo.
>> >
>> > Strongly disagree. Most of my machines do not have mmu-notifiers and
>> > would still like to benefit from userptr and I see no reason why we need
>> > to force mmu-notifiers.
>>
>> Note that I didn't shout against the mmu_notifier-less support
>> (although I'm honestly not too happy about it), what I don't like is
>> the override bit disabling the mmu_notifiers even if we have them.
>> Since that will mean that the code won't be tested through SNA, and so
>> has a good chance of being buggy. Once mesa comes around and uses it,
>> it'll nicely blow up. And one of the reason Jesse is breathing down my
>> neck to merge this is "other guys are interested in this at intel".
>
> I don't see how you can reconcile that viewpoint. In order for userspace
> to be able to use userptr without mmu-notifiers it has to be very
> careful about managing synchronisation of such pointers across vma
> events. So userptr should only be allowed to opt-in to such a precarious
> arrangement. And because it was so easy for you to shoot yourself and
> the entire system in the head by getting such synchronisation wrong, the
> suggestion was to only allow root access to the shotgun. Once it is opting
> in, behaviour of userptr should then be consistent across all
> configurations i.e. it should always avoid the mmu-notifier if userspace
> asks.

So first one thing to clear out: I think we don't need to care about
semantics around unmap, fork and friends - the kernel will hold
references to the backing storage pages in both cases. So I think both
cases are safe from userspace trying to trick the kernel.

Second thing to get out of the way: After all this discussion I think
it's ok to call fork&friends (though not unmap) as simply undefined
behaviour - the kernel might pick the old or the new address space
however it sees fit. Again, not something we need to care about. Now a
big exception would be if the behaviour around doing an early unmap
after a batchbuffer would differe between mmu-notifier based userptr
and pinning userptr. If that's indeed the case I'm seriously wondering
whether it's a good idea to merge both implementations under the same
interface. In that case I'd _much_ prefer we'd just stick with mmu
notifiers and have the same semantics everwhere.

So now the real deal: My argument for originally not merging userptr
is that pinning tons of GFP_MOVEABLE memory isn't nice. Presuming our
shrinker works (big if, I know) there shouldn't be an issue with
memory exhausting in both cases. And imo we really should keep page
migration working given how people constantly invent new uses of it.
Now I see that most of them are for servers, but:
- server-y stuff tends to trickle down
- we seem to have a server gpu product in preparation (the recent pile
of display/PCH-less patches from Ben).
So I want to keep page migration working, even when we have tons of
userptr-backed gem bos around.

Now looking and SNA userptr and this batch the result is that SNA
won't use mmu-notifier-based userptr ever. That's feels a lot like
adding a bit of code (mmu-notifier-based userptr support) to sneak in
a different feature (pinning userptr support). If mmu notifiers are
too damn slow, we need to fix that (or work around it with some
cache), not hide it by flat-out not using them.

Also, Eric's persistent ranting about root-only interfaces moved me
away from them, so now I'm even leaning a bit towards simply requiring
mmu-notifiers for userptr. It feels at least like the Right Thing ;-)

> As for testing using SNA, you can just ask SNA to only use mmu-notifier
> backed userptr and so only run when mmu-notifiers are available.

Given how good we are at testing ugly corner case (much better, but
still with gapping holes apparently), I want mmu-notifier-based
userptr to be tested on as many machines as possible. Not just mine,
once when merging, and then we have a beautiful fireworks once mesa
starts to use it since it all regressed.

>> >> - On a quick read I've seen some gtt mmap support remnants. This scares
>> >>   me, a roundabout njet! would appease. Though I think that should already
>> >>   happen with the checks we have to reject snoopable buffers?
>> >
>> > That's because there are platforms where it is theorectically possible
>> > and whilst I have no use case for it, I wanted to make it work
>> > nevertheless. I still think it is possible, but I could not see a way to
>> > do so without completely replacing the drm vm code.
>>
>> If I understand things correctly we should be able to block this
>> simply by refusing to create an mmap offset for a userptr backed
>> object.
>
> But I want to be able to support GTT mappings wherever possible. The
> basic principle of making sure every bo is first class, so that there
> are no surprises. Such as stolen, dmabuf...

I can buy that argument, but in that case I _really_ want full
test-coverage of all the exposed interfaces. userptr is imo a big
addition with high potential to expose new "interesting" corner cases,
so I want to (ab)use the opportunity to improve our testing coverage.
If that's around, I'll happily accept unrestricted userptr support.
But see below for some comments.

>> >> - I think we should reject set_cacheing requests on usersptr objects.
>> >
>> > I don't think that is strictly required, just like we should not limit
>> > the user from using set_tiling. (Though the user is never actually going
>> > to tell the kernel about such tiling...).
>>
>> Yeah, I guess we could allow people to shot their foot off. Otoh it
>> adds another dimension to the userptr interface, which we need to make
>> sure it works. Similar besides set_tiling is probably also
>> prime_export.
>>
>> The reason I'd prefer to lock things down is that we have fairly nice
>> coverage for normal gem objects wrt exercising corner-cases with igt.
>> If we don't disallow the same corner-cases with userptr, I'd prefer
>> the tests to also cover those ... Disallowing is cheaper ;-)
>
> See above: every bo should be first class...

Already mentioned above, but I do have my doubts that the in-page
offset support really does mesh with gtt (and cpu mmaps fwiw) in a
first-class manner.
-Daniel
--
Daniel Vetter
Software Engineer, Intel Corporation
+41 (0) 79 365 57 48 - http://blog.ffwll.ch

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH] drm/i915: Introduce mapping of user pages into video memory (userptr) ioctl
  2013-04-08 19:24     ` Daniel Vetter
  2013-04-08 21:48       ` Chris Wilson
@ 2013-04-08 22:06       ` Eric Anholt
  2013-06-24 21:36       ` Jesse Barnes
  2 siblings, 0 replies; 34+ messages in thread
From: Eric Anholt @ 2013-04-08 22:06 UTC (permalink / raw)
  To: Daniel Vetter, Chris Wilson


[-- Attachment #1.1: Type: text/plain, Size: 3720 bytes --]

Daniel Vetter <daniel@ffwll.ch> writes:

> On Mon, Apr 8, 2013 at 7:40 PM, Chris Wilson <chris@chris-wilson.co.uk> wrote:
>> On Mon, Apr 08, 2013 at 07:18:11PM +0200, Daniel Vetter wrote:
>>> On Tue, Feb 12, 2013 at 02:17:22PM +0000, Chris Wilson wrote:
>>> > By exporting the ability to map user address and inserting PTEs
>>> > representing their backing pages into the GTT, we can exploit UMA in order
>>> > to utilize normal application data as a texture source or even as a
>>> > render target (depending upon the capabilities of the chipset). This has
>>> > a number of uses, with zero-copy downloads to the GPU and efficient
>>> > readback making the intermixed streaming of CPU and GPU operations
>>> > fairly efficient. This ability has many widespread implications from
>>> > faster rendering of client-side software rasterisers (chromium),
>>> > mitigation of stalls due to read back (firefox) and to faster pipelining
>>> > of texture data (such as pixel buffer objects in GL or data blobs in CL).
>>> >
>>> > v2: Compile with CONFIG_MMU_NOTIFIER
>>> > v3: We can sleep while performing invalidate-range, which we can utilise
>>> > to drop our page references prior to the kernel manipulating the vma
>>> > (for either discard or cloning) and so protect normal users.
>>> > v4: Only run the invalidate notifier if the range intercepts the bo.
>>> > v5: Prevent userspace from attempting to GTT mmap non-page aligned buffers
>>> >
>>> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
>>>
>>> Quick bikeshed:
>>> - Still not really in favour of the in-page gtt offset handling ... I
>>>   still think that this is just a fancy mmap api, and it better reject
>>>   attempts to not map anything aligned to a full page outright.
>>
>> Strongly disagree.
>
> Ok, let's dig out the beaten arguments here ;-)
> - Imo the gtt_offset frobbery is a bit fragile, and getting this right
> in the face of ppgtt won't make it better. And yes the only reason we
> still have that field is that you've shot down any patch to remove it
> citing userptr here. So "it's here already" doesn't count ;-)

Agreed -- given that I need to look at byte offsets for alignment issues
on basically all my usages of memory, having my data have part of its
intra-page offset hidden in the kernel at creation time would be bad for
Mesa.

Access to data is controlled at a page level, so I think this kernel
interface should act at a page level.

>>> - I915_USERPTR_UNSYNCHRONIZED ... eeek. That means that despite everyone
>>>   having mmu notifiers enabled in their distro config, you make sure sna
>>>   doesn't hit it. Imo not enough testing coverage ;-) Or this there
>>>   another reason behind this than "mmu notifiers are too slow"?
>>>
>>>   Generally I'm a bit sloppy with going root-only for legacy X stuff (like
>>>   scanline waits), but this here looks very much generally useful. So not
>>>   exemption-material imo.
>>
>> Strongly disagree. Most of my machines do not have mmu-notifiers and
>> would still like to benefit from userptr and I see no reason why we need
>> to force mmu-notifiers.
>
> Note that I didn't shout against the mmu_notifier-less support
> (although I'm honestly not too happy about it), what I don't like is
> the override bit disabling the mmu_notifiers even if we have them.
> Since that will mean that the code won't be tested through SNA, and so
> has a good chance of being buggy. Once mesa comes around and uses it,
> it'll nicely blow up. And one of the reason Jesse is breathing down my
> neck to merge this is "other guys are interested in this at intel".

I hate root-only interfaces.  It helps lock in root-only X, and means
that X gets special treatment compared to the 3D driver.

[-- Attachment #1.2: Type: application/pgp-signature, Size: 197 bytes --]

[-- Attachment #2: Type: text/plain, Size: 159 bytes --]

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH] drm/i915: Introduce mapping of user pages into video memory (userptr) ioctl
  2013-04-08 19:24     ` Daniel Vetter
@ 2013-04-08 21:48       ` Chris Wilson
  2013-04-15 18:37         ` Daniel Vetter
  2013-04-08 22:06       ` Eric Anholt
  2013-06-24 21:36       ` Jesse Barnes
  2 siblings, 1 reply; 34+ messages in thread
From: Chris Wilson @ 2013-04-08 21:48 UTC (permalink / raw)
  To: Daniel Vetter; +Cc: intel-gfx

On Mon, Apr 08, 2013 at 09:24:58PM +0200, Daniel Vetter wrote:
> On Mon, Apr 8, 2013 at 7:40 PM, Chris Wilson <chris@chris-wilson.co.uk> wrote:
> > On Mon, Apr 08, 2013 at 07:18:11PM +0200, Daniel Vetter wrote:
> >> On Tue, Feb 12, 2013 at 02:17:22PM +0000, Chris Wilson wrote:
> >> > By exporting the ability to map user address and inserting PTEs
> >> > representing their backing pages into the GTT, we can exploit UMA in order
> >> > to utilize normal application data as a texture source or even as a
> >> > render target (depending upon the capabilities of the chipset). This has
> >> > a number of uses, with zero-copy downloads to the GPU and efficient
> >> > readback making the intermixed streaming of CPU and GPU operations
> >> > fairly efficient. This ability has many widespread implications from
> >> > faster rendering of client-side software rasterisers (chromium),
> >> > mitigation of stalls due to read back (firefox) and to faster pipelining
> >> > of texture data (such as pixel buffer objects in GL or data blobs in CL).
> >> >
> >> > v2: Compile with CONFIG_MMU_NOTIFIER
> >> > v3: We can sleep while performing invalidate-range, which we can utilise
> >> > to drop our page references prior to the kernel manipulating the vma
> >> > (for either discard or cloning) and so protect normal users.
> >> > v4: Only run the invalidate notifier if the range intercepts the bo.
> >> > v5: Prevent userspace from attempting to GTT mmap non-page aligned buffers
> >> >
> >> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> >>
> >> Quick bikeshed:
> >> - Still not really in favour of the in-page gtt offset handling ... I
> >>   still think that this is just a fancy mmap api, and it better reject
> >>   attempts to not map anything aligned to a full page outright.
> >
> > Strongly disagree.
> 
> Ok, let's dig out the beaten arguments here ;-)
> - Imo the gtt_offset frobbery is a bit fragile, and getting this right
> in the face of ppgtt won't make it better. And yes the only reason we
> still have that field is that you've shot down any patch to remove it
> citing userptr here. So "it's here already" doesn't count ;-)
> - Userptr for i915 is an mmap interface, and that works on pages,
> lying to userspace isn't great.

No. Due to the nature of existing decades old userspace, I need to map
byte ranges, so I do not view this as a simple equivalence to mmapping
a bo.

> - I don't see why userspace can't do this themselves. I've seen that
> it makes things easier in SNA/X, but for a general purpose interface
> that argument doesn't cut too much.

I have completely opposite viewpoint: A general purpose interface is not
a page interface, and that this interface trades a small amount of
kernel complexity (tracking the offset_in_page) so that userspace has a
flexible interface that matches its requirements.

> - I'm also a bit afraid that our code implicitly assumes that
> size/offset are always page-aligned and I kinda want to avoid that we
> have to audit for such issues from here on. We've blown up in the past
> assuming that size > 0 already, I think we're set to blow up on this
> one here.

Now that we can distinguish between size and num_pages, there is no
longer a need for size to be page aligned (and is currently redundant).
 
> In any case, if you really want to stick to this I want this to be
> explictly track in an obj->reloc_gtt_offset_adjustment or something

Sure, let's call it obj->gtt_offset:12;

> which is very loudly yelling at people to make sure no one trips over
> it. Tracking the adjustment in a separate field, which would only ever
> be used in the reloc code would address all my concerns (safe for the
> api ugliness one).

And everywhere that deals in GTT addresses.
 
> >> - I915_USERPTR_UNSYNCHRONIZED ... eeek. That means that despite everyone
> >>   having mmu notifiers enabled in their distro config, you make sure sna
> >>   doesn't hit it. Imo not enough testing coverage ;-) Or this there
> >>   another reason behind this than "mmu notifiers are too slow"?
> >>
> >>   Generally I'm a bit sloppy with going root-only for legacy X stuff (like
> >>   scanline waits), but this here looks very much generally useful. So not
> >>   exemption-material imo.
> >
> > Strongly disagree. Most of my machines do not have mmu-notifiers and
> > would still like to benefit from userptr and I see no reason why we need
> > to force mmu-notifiers.
> 
> Note that I didn't shout against the mmu_notifier-less support
> (although I'm honestly not too happy about it), what I don't like is
> the override bit disabling the mmu_notifiers even if we have them.
> Since that will mean that the code won't be tested through SNA, and so
> has a good chance of being buggy. Once mesa comes around and uses it,
> it'll nicely blow up. And one of the reason Jesse is breathing down my
> neck to merge this is "other guys are interested in this at intel".

I don't see how you can reconcile that viewpoint. In order for userspace
to be able to use userptr without mmu-notifiers it has to be very
careful about managing synchronisation of such pointers across vma
events. So userptr should only be allowed to opt-in to such a precarious
arrangement. And because it was so easy for you to shoot yourself and
the entire system in the head by getting such synchronisation wrong, the
suggestion was to only allow root access to the shotgun. Once it is opting
in, behaviour of userptr should then be consistent across all
configurations i.e. it should always avoid the mmu-notifier if userspace
asks.

As for testing using SNA, you can just ask SNA to only use mmu-notifier
backed userptr and so only run when mmu-notifiers are available.
 
> >> - On a quick read I've seen some gtt mmap support remnants. This scares
> >>   me, a roundabout njet! would appease. Though I think that should already
> >>   happen with the checks we have to reject snoopable buffers?
> >
> > That's because there are platforms where it is theorectically possible
> > and whilst I have no use case for it, I wanted to make it work
> > nevertheless. I still think it is possible, but I could not see a way to
> > do so without completely replacing the drm vm code.
> 
> If I understand things correctly we should be able to block this
> simply by refusing to create an mmap offset for a userptr backed
> object.

But I want to be able to support GTT mappings wherever possible. The
basic principle of making sure every bo is first class, so that there
are no surprises. Such as stolen, dmabuf...
 
> >> - I think we should reject set_cacheing requests on usersptr objects.
> >
> > I don't think that is strictly required, just like we should not limit
> > the user from using set_tiling. (Though the user is never actually going
> > to tell the kernel about such tiling...).
> 
> Yeah, I guess we could allow people to shot their foot off. Otoh it
> adds another dimension to the userptr interface, which we need to make
> sure it works. Similar besides set_tiling is probably also
> prime_export.
> 
> The reason I'd prefer to lock things down is that we have fairly nice
> coverage for normal gem objects wrt exercising corner-cases with igt.
> If we don't disallow the same corner-cases with userptr, I'd prefer
> the tests to also cover those ... Disallowing is cheaper ;-)

See above: every bo should be first class...
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH] drm/i915: Introduce mapping of user pages into video memory (userptr) ioctl
  2013-04-08 17:40   ` Chris Wilson
@ 2013-04-08 19:24     ` Daniel Vetter
  2013-04-08 21:48       ` Chris Wilson
                         ` (2 more replies)
  0 siblings, 3 replies; 34+ messages in thread
From: Daniel Vetter @ 2013-04-08 19:24 UTC (permalink / raw)
  To: Chris Wilson, Daniel Vetter, intel-gfx

On Mon, Apr 8, 2013 at 7:40 PM, Chris Wilson <chris@chris-wilson.co.uk> wrote:
> On Mon, Apr 08, 2013 at 07:18:11PM +0200, Daniel Vetter wrote:
>> On Tue, Feb 12, 2013 at 02:17:22PM +0000, Chris Wilson wrote:
>> > By exporting the ability to map user address and inserting PTEs
>> > representing their backing pages into the GTT, we can exploit UMA in order
>> > to utilize normal application data as a texture source or even as a
>> > render target (depending upon the capabilities of the chipset). This has
>> > a number of uses, with zero-copy downloads to the GPU and efficient
>> > readback making the intermixed streaming of CPU and GPU operations
>> > fairly efficient. This ability has many widespread implications from
>> > faster rendering of client-side software rasterisers (chromium),
>> > mitigation of stalls due to read back (firefox) and to faster pipelining
>> > of texture data (such as pixel buffer objects in GL or data blobs in CL).
>> >
>> > v2: Compile with CONFIG_MMU_NOTIFIER
>> > v3: We can sleep while performing invalidate-range, which we can utilise
>> > to drop our page references prior to the kernel manipulating the vma
>> > (for either discard or cloning) and so protect normal users.
>> > v4: Only run the invalidate notifier if the range intercepts the bo.
>> > v5: Prevent userspace from attempting to GTT mmap non-page aligned buffers
>> >
>> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
>>
>> Quick bikeshed:
>> - Still not really in favour of the in-page gtt offset handling ... I
>>   still think that this is just a fancy mmap api, and it better reject
>>   attempts to not map anything aligned to a full page outright.
>
> Strongly disagree.

Ok, let's dig out the beaten arguments here ;-)
- Imo the gtt_offset frobbery is a bit fragile, and getting this right
in the face of ppgtt won't make it better. And yes the only reason we
still have that field is that you've shot down any patch to remove it
citing userptr here. So "it's here already" doesn't count ;-)
- Userptr for i915 is an mmap interface, and that works on pages,
lying to userspace isn't great.
- I don't see why userspace can't do this themselves. I've seen that
it makes things easier in SNA/X, but for a general purpose interface
that argument doesn't cut too much.
- I'm also a bit afraid that our code implicitly assumes that
size/offset are always page-aligned and I kinda want to avoid that we
have to audit for such issues from here on. We've blown up in the past
assuming that size > 0 already, I think we're set to blow up on this
one here.

In any case, if you really want to stick to this I want this to be
explictly track in an obj->reloc_gtt_offset_adjustment or something
which is very loudly yelling at people to make sure no one trips over
it. Tracking the adjustment in a separate field, which would only ever
be used in the reloc code would address all my concerns (safe for the
api ugliness one).

>> - I915_USERPTR_UNSYNCHRONIZED ... eeek. That means that despite everyone
>>   having mmu notifiers enabled in their distro config, you make sure sna
>>   doesn't hit it. Imo not enough testing coverage ;-) Or this there
>>   another reason behind this than "mmu notifiers are too slow"?
>>
>>   Generally I'm a bit sloppy with going root-only for legacy X stuff (like
>>   scanline waits), but this here looks very much generally useful. So not
>>   exemption-material imo.
>
> Strongly disagree. Most of my machines do not have mmu-notifiers and
> would still like to benefit from userptr and I see no reason why we need
> to force mmu-notifiers.

Note that I didn't shout against the mmu_notifier-less support
(although I'm honestly not too happy about it), what I don't like is
the override bit disabling the mmu_notifiers even if we have them.
Since that will mean that the code won't be tested through SNA, and so
has a good chance of being buggy. Once mesa comes around and uses it,
it'll nicely blow up. And one of the reason Jesse is breathing down my
neck to merge this is "other guys are interested in this at intel".

>> - On a quick read I've seen some gtt mmap support remnants. This scares
>>   me, a roundabout njet! would appease. Though I think that should already
>>   happen with the checks we have to reject snoopable buffers?
>
> That's because there are platforms where it is theorectically possible
> and whilst I have no use case for it, I wanted to make it work
> nevertheless. I still think it is possible, but I could not see a way to
> do so without completely replacing the drm vm code.

If I understand things correctly we should be able to block this
simply by refusing to create an mmap offset for a userptr backed
object.

>> - I think we should reject set_cacheing requests on usersptr objects.
>
> I don't think that is strictly required, just like we should not limit
> the user from using set_tiling. (Though the user is never actually going
> to tell the kernel about such tiling...).

Yeah, I guess we could allow people to shot their foot off. Otoh it
adds another dimension to the userptr interface, which we need to make
sure it works. Similar besides set_tiling is probably also
prime_export.

The reason I'd prefer to lock things down is that we have fairly nice
coverage for normal gem objects wrt exercising corner-cases with igt.
If we don't disallow the same corner-cases with userptr, I'd prefer
the tests to also cover those ... Disallowing is cheaper ;-)

>> - union drm_i915_gem_objects freaked me out shortly, until I've noticed
>>   that it's only used for our private slab. Maybe and explicit max in
>>   there? Also, this somewhat defeats that you've moved the userptr stuff
>>   out of the base class - this way we won't save any memory ...
>
> The alternative is to use a union inside the object. Long ago, I had a
> few more objects in there.

Yeah, the entire sg rework nicely unified things \o/

Still, the union seems to be killable with a few lines of patch-diff
on top of this ...

>> - Basic igt to check the above api corner-cases return -EINVAL would be
>>   nice.
>
> Been sitting around for ages, just waiting for the interface to be
> agreed upon.
>
>> - I need to check for deadlocks around the mmu notifier handling. Iirc
>>   that thing takes all mm locks, and our own bo unref code can be called
>>   from all kinds of interesting places. Since each vma object also holds a
>>   ref onto a gem bo I suspect that we do have some fun all here ...
>
> The notifier itself takes no locks, so the locking is whatever state the
> caller sets up. The current locking order is (mm, struct mutex) i.e. the
> same ordering as used the notifier callbacks.

Oops, too scared from the fbdev_notifier - the mmu notifier indeed
uses srcu, so we should be fine here. I need to read the callchains
in-depth though and maybe ask lockdep what it thinks about this first
before I can put my concerns at ease ...
-Daniel
--
Daniel Vetter
Software Engineer, Intel Corporation
+41 (0) 79 365 57 48 - http://blog.ffwll.ch

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH] drm/i915: Introduce mapping of user pages into video memory (userptr) ioctl
  2013-04-08 17:18 ` Daniel Vetter
@ 2013-04-08 17:40   ` Chris Wilson
  2013-04-08 19:24     ` Daniel Vetter
  0 siblings, 1 reply; 34+ messages in thread
From: Chris Wilson @ 2013-04-08 17:40 UTC (permalink / raw)
  To: Daniel Vetter; +Cc: intel-gfx

On Mon, Apr 08, 2013 at 07:18:11PM +0200, Daniel Vetter wrote:
> On Tue, Feb 12, 2013 at 02:17:22PM +0000, Chris Wilson wrote:
> > By exporting the ability to map user address and inserting PTEs
> > representing their backing pages into the GTT, we can exploit UMA in order
> > to utilize normal application data as a texture source or even as a
> > render target (depending upon the capabilities of the chipset). This has
> > a number of uses, with zero-copy downloads to the GPU and efficient
> > readback making the intermixed streaming of CPU and GPU operations
> > fairly efficient. This ability has many widespread implications from
> > faster rendering of client-side software rasterisers (chromium),
> > mitigation of stalls due to read back (firefox) and to faster pipelining
> > of texture data (such as pixel buffer objects in GL or data blobs in CL).
> > 
> > v2: Compile with CONFIG_MMU_NOTIFIER
> > v3: We can sleep while performing invalidate-range, which we can utilise
> > to drop our page references prior to the kernel manipulating the vma
> > (for either discard or cloning) and so protect normal users.
> > v4: Only run the invalidate notifier if the range intercepts the bo.
> > v5: Prevent userspace from attempting to GTT mmap non-page aligned buffers
> > 
> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> 
> Quick bikeshed:
> - Still not really in favour of the in-page gtt offset handling ... I
>   still think that this is just a fancy mmap api, and it better reject
>   attempts to not map anything aligned to a full page outright.

Strongly disagree.
 
> - I915_USERPTR_UNSYNCHRONIZED ... eeek. That means that despite everyone
>   having mmu notifiers enabled in their distro config, you make sure sna
>   doesn't hit it. Imo not enough testing coverage ;-) Or this there
>   another reason behind this than "mmu notifiers are too slow"?
> 
>   Generally I'm a bit sloppy with going root-only for legacy X stuff (like
>   scanline waits), but this here looks very much generally useful. So not
>   exemption-material imo.

Strongly disagree. Most of my machines do not have mmu-notifiers and
would still like to benefit from userptr and I see no reason why we need
to force mmu-notifiers.

> - On a quick read I've seen some gtt mmap support remnants. This scares
>   me, a roundabout njet! would appease. Though I think that should already
>   happen with the checks we have to reject snoopable buffers?

That's because there are platforms where it is theorectically possible
and whilst I have no use case for it, I wanted to make it work
nevertheless. I still think it is possible, but I could not see a way to
do so without completely replacing the drm vm code.
 
> - I think we should reject set_cacheing requests on usersptr objects.

I don't think that is strictly required, just like we should not limit
the user from using set_tiling. (Though the user is never actually going
to tell the kernel about such tiling...)
 
> - union drm_i915_gem_objects freaked me out shortly, until I've noticed
>   that it's only used for our private slab. Maybe and explicit max in
>   there? Also, this somewhat defeats that you've moved the userptr stuff
>   out of the base class - this way we won't save any memory ...

The alternative is to use a union inside the object. Long ago, I had a
few more objects in there.
 
> - Basic igt to check the above api corner-cases return -EINVAL would be
>   nice.

Been sitting around for ages, just waiting for the interface to be
agreed upon.
 
> - I need to check for deadlocks around the mmu notifier handling. Iirc
>   that thing takes all mm locks, and our own bo unref code can be called
>   from all kinds of interesting places. Since each vma object also holds a
>   ref onto a gem bo I suspect that we do have some fun all here ...

The notifier itself takes no locks, so the locking is whatever state the
caller sets up. The current locking order is (mm, struct mutex) i.e. the
same ordering as used the notifier callbacks.
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH] drm/i915: Introduce mapping of user pages into video memory (userptr) ioctl
  2013-02-12 14:17 Chris Wilson
  2013-02-13 22:24 ` Reese, Armin C
@ 2013-04-08 17:18 ` Daniel Vetter
  2013-04-08 17:40   ` Chris Wilson
  1 sibling, 1 reply; 34+ messages in thread
From: Daniel Vetter @ 2013-04-08 17:18 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

On Tue, Feb 12, 2013 at 02:17:22PM +0000, Chris Wilson wrote:
> By exporting the ability to map user address and inserting PTEs
> representing their backing pages into the GTT, we can exploit UMA in order
> to utilize normal application data as a texture source or even as a
> render target (depending upon the capabilities of the chipset). This has
> a number of uses, with zero-copy downloads to the GPU and efficient
> readback making the intermixed streaming of CPU and GPU operations
> fairly efficient. This ability has many widespread implications from
> faster rendering of client-side software rasterisers (chromium),
> mitigation of stalls due to read back (firefox) and to faster pipelining
> of texture data (such as pixel buffer objects in GL or data blobs in CL).
> 
> v2: Compile with CONFIG_MMU_NOTIFIER
> v3: We can sleep while performing invalidate-range, which we can utilise
> to drop our page references prior to the kernel manipulating the vma
> (for either discard or cloning) and so protect normal users.
> v4: Only run the invalidate notifier if the range intercepts the bo.
> v5: Prevent userspace from attempting to GTT mmap non-page aligned buffers
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>

Quick bikeshed:
- Still not really in favour of the in-page gtt offset handling ... I
  still think that this is just a fancy mmap api, and it better reject
  attempts to not map anything aligned to a full page outright.

- I915_USERPTR_UNSYNCHRONIZED ... eeek. That means that despite everyone
  having mmu notifiers enabled in their distro config, you make sure sna
  doesn't hit it. Imo not enough testing coverage ;-) Or this there
  another reason behind this than "mmu notifiers are too slow"?

  Generally I'm a bit sloppy with going root-only for legacy X stuff (like
  scanline waits), but this here looks very much generally useful. So not
  exemption-material imo.

- On a quick read I've seen some gtt mmap support remnants. This scares
  me, a roundabout njet! would appease. Though I think that should already
  happen with the checks we have to reject snoopable buffers?

- I think we should reject set_cacheing requests on usersptr objects.

- union drm_i915_gem_objects freaked me out shortly, until I've noticed
  that it's only used for our private slab. Maybe and explicit max in
  there? Also, this somewhat defeats that you've moved the userptr stuff
  out of the base class - this way we won't save any memory ...

- Basic igt to check the above api corner-cases return -EINVAL would be
  nice.

- I need to check for deadlocks around the mmu notifier handling. Iirc
  that thing takes all mm locks, and our own bo unref code can be called
  from all kinds of interesting places. Since each vma object also holds a
  ref onto a gem bo I suspect that we do have some fun all here ...

Cheers, Daniel

> ---
>  drivers/gpu/drm/i915/Makefile              |    1 +
>  drivers/gpu/drm/i915/i915_dma.c            |    1 +
>  drivers/gpu/drm/i915/i915_drv.h            |   22 ++
>  drivers/gpu/drm/i915/i915_gem.c            |   31 ++-
>  drivers/gpu/drm/i915/i915_gem_execbuffer.c |    7 +-
>  drivers/gpu/drm/i915/i915_gem_userptr.c    |  329 ++++++++++++++++++++++++++++
>  include/uapi/drm/i915_drm.h                |   16 ++
>  7 files changed, 393 insertions(+), 14 deletions(-)
>  create mode 100644 drivers/gpu/drm/i915/i915_gem_userptr.c
> 
> diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
> index 91f3ac6..42858f6 100644
> --- a/drivers/gpu/drm/i915/Makefile
> +++ b/drivers/gpu/drm/i915/Makefile
> @@ -14,6 +14,7 @@ i915-y := i915_drv.o i915_dma.o i915_irq.o \
>  	  i915_gem_gtt.o \
>  	  i915_gem_stolen.o \
>  	  i915_gem_tiling.o \
> +	  i915_gem_userptr.o \
>  	  i915_sysfs.o \
>  	  i915_trace_points.o \
>  	  i915_ums.o \
> diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
> index 4fa6beb..9b1984c 100644
> --- a/drivers/gpu/drm/i915/i915_dma.c
> +++ b/drivers/gpu/drm/i915/i915_dma.c
> @@ -1883,6 +1883,7 @@ struct drm_ioctl_desc i915_ioctls[] = {
>  	DRM_IOCTL_DEF_DRV(I915_GEM_CONTEXT_CREATE, i915_gem_context_create_ioctl, DRM_UNLOCKED),
>  	DRM_IOCTL_DEF_DRV(I915_GEM_CONTEXT_DESTROY, i915_gem_context_destroy_ioctl, DRM_UNLOCKED),
>  	DRM_IOCTL_DEF_DRV(I915_REG_READ, i915_reg_read_ioctl, DRM_UNLOCKED),
> +	DRM_IOCTL_DEF_DRV(I915_GEM_USERPTR, i915_gem_userptr_ioctl, DRM_UNLOCKED),
>  };
>  
>  int i915_max_ioctl = DRM_ARRAY_SIZE(i915_ioctls);
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 923dc0a..90070f4 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -42,6 +42,7 @@
>  #include <linux/backlight.h>
>  #include <linux/intel-iommu.h>
>  #include <linux/kref.h>
> +#include <linux/mmu_notifier.h>
>  #include <linux/pm_qos.h>
>  
>  /* General customization:
> @@ -1076,6 +1077,7 @@ struct drm_i915_gem_object_ops {
>  	 */
>  	int (*get_pages)(struct drm_i915_gem_object *);
>  	void (*put_pages)(struct drm_i915_gem_object *);
> +	void (*release)(struct drm_i915_gem_object *);
>  };
>  
>  struct drm_i915_gem_object {
> @@ -1222,6 +1224,23 @@ struct drm_i915_gem_object {
>  };
>  #define to_gem_object(obj) (&((struct drm_i915_gem_object *)(obj))->base)
>  
> +struct i915_gem_userptr_object {
> +	struct drm_i915_gem_object gem;
> +	uintptr_t user_ptr;
> +	size_t user_size;
> +	int read_only;
> +
> +	struct mm_struct *mm;
> +#if defined(CONFIG_MMU_NOTIFIER)
> +	struct mmu_notifier mn;
> +#endif
> +};
> +
> +union drm_i915_gem_objects {
> +	struct drm_i915_gem_object base;
> +	struct i915_gem_userptr_object userptr;
> +};
> +
>  #define to_intel_bo(x) container_of(x, struct drm_i915_gem_object, base)
>  
>  /**
> @@ -1501,6 +1520,8 @@ int i915_gem_entervt_ioctl(struct drm_device *dev, void *data,
>  			   struct drm_file *file_priv);
>  int i915_gem_leavevt_ioctl(struct drm_device *dev, void *data,
>  			   struct drm_file *file_priv);
> +int i915_gem_userptr_ioctl(struct drm_device *dev, void *data,
> +			   struct drm_file *file);
>  int i915_gem_set_tiling(struct drm_device *dev, void *data,
>  			struct drm_file *file_priv);
>  int i915_gem_get_tiling(struct drm_device *dev, void *data,
> @@ -1554,6 +1575,7 @@ static inline void i915_gem_object_unpin_pages(struct drm_i915_gem_object *obj)
>  	BUG_ON(obj->pages_pin_count == 0);
>  	obj->pages_pin_count--;
>  }
> +int i915_gem_object_put_pages(struct drm_i915_gem_object *obj);
>  
>  int __must_check i915_mutex_lock_interruptible(struct drm_device *dev);
>  int i915_gem_object_sync(struct drm_i915_gem_object *obj,
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index 73b1e9e..65a36bf 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -1336,20 +1336,18 @@ int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
>  	struct drm_i915_gem_object *obj = to_intel_bo(vma->vm_private_data);
>  	struct drm_device *dev = obj->base.dev;
>  	drm_i915_private_t *dev_priv = dev->dev_private;
> -	pgoff_t page_offset;
> +	pgoff_t offset;
>  	unsigned long pfn;
>  	int ret = 0;
>  	bool write = !!(vmf->flags & FAULT_FLAG_WRITE);
>  
> -	/* We don't use vmf->pgoff since that has the fake offset */
> -	page_offset = ((unsigned long)vmf->virtual_address - vma->vm_start) >>
> -		PAGE_SHIFT;
> -
>  	ret = i915_mutex_lock_interruptible(dev);
>  	if (ret)
>  		goto out;
>  
> -	trace_i915_gem_object_fault(obj, page_offset, true, write);
> +	trace_i915_gem_object_fault(obj,
> +				    (unsigned long)(vmf->virtual_address - vma->vm_start) >> PAGE_SHIFT,
> +				    true, write);
>  
>  	/* Access to snoopable pages through the GTT is incoherent. */
>  	if (obj->cache_level != I915_CACHE_NONE && !HAS_LLC(dev)) {
> @@ -1372,8 +1370,10 @@ int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
>  
>  	obj->fault_mappable = true;
>  
> -	pfn = ((dev_priv->gtt.mappable_base + obj->gtt_offset) >> PAGE_SHIFT) +
> -		page_offset;
> +	/* We don't use vmf->pgoff since that has the fake offset */
> +	offset = (unsigned long)vmf->virtual_address - vma->vm_start;
> +	offset += obj->gtt_offset;
> +	pfn = (dev_priv->gtt.mappable_base + offset) >> PAGE_SHIFT;
>  
>  	/* Finally, remap it using the new GTT offset */
>  	ret = vm_insert_pfn(vma, (unsigned long)vmf->virtual_address, pfn);
> @@ -1565,6 +1565,12 @@ i915_gem_mmap_gtt(struct drm_file *file,
>  		goto out;
>  	}
>  
> +	if (offset_in_page(obj->gtt_offset)) {
> +		DRM_ERROR("Attempting to mmap an unaligned buffer\n");
> +		ret = -EINVAL;
> +		goto out;
> +	}
> +
>  	ret = i915_gem_object_create_mmap_offset(obj);
>  	if (ret)
>  		goto out;
> @@ -2495,9 +2501,9 @@ i915_gem_object_unbind(struct drm_i915_gem_object *obj)
>  	/* Avoid an unnecessary call to unbind on rebind. */
>  	obj->map_and_fenceable = true;
>  
> +	obj->gtt_offset -= obj->gtt_space->start;
>  	drm_mm_put_block(obj->gtt_space);
>  	obj->gtt_space = NULL;
> -	obj->gtt_offset = 0;
>  
>  	return 0;
>  }
> @@ -2987,7 +2993,7 @@ i915_gem_object_bind_to_gtt(struct drm_i915_gem_object *obj,
>  	list_add_tail(&obj->mm_list, &dev_priv->mm.inactive_list);
>  
>  	obj->gtt_space = node;
> -	obj->gtt_offset = node->start;
> +	obj->gtt_offset += node->start;
>  
>  	fenceable =
>  		node->size == fence_size &&
> @@ -3800,6 +3806,9 @@ void i915_gem_free_object(struct drm_gem_object *gem_obj)
>  	if (obj->base.import_attach)
>  		drm_prime_gem_destroy(&obj->base, NULL);
>  
> +	if (obj->ops->release)
> +		obj->ops->release(obj);
> +
>  	drm_gem_object_release(&obj->base);
>  	i915_gem_info_remove_obj(dev_priv, obj->base.size);
>  
> @@ -4101,7 +4110,7 @@ i915_gem_load(struct drm_device *dev)
>  
>  	dev_priv->slab =
>  		kmem_cache_create("i915_gem_object",
> -				  sizeof(struct drm_i915_gem_object), 0,
> +				  sizeof(union drm_i915_gem_objects), 0,
>  				  SLAB_HWCACHE_ALIGN,
>  				  NULL);
>  
> diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> index 2726910..a3e68af 100644
> --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> @@ -254,14 +254,16 @@ i915_gem_execbuffer_relocate_entry(struct drm_i915_gem_object *obj,
>  		return -EFAULT;
>  
>  	reloc->delta += target_offset;
> +	reloc->offset += obj->gtt_offset;
>  	if (use_cpu_reloc(obj)) {
> -		uint32_t page_offset = reloc->offset & ~PAGE_MASK;
> +		uint32_t page_offset = offset_in_page(reloc->offset);
>  		char *vaddr;
>  
>  		ret = i915_gem_object_set_to_cpu_domain(obj, 1);
>  		if (ret)
>  			return ret;
>  
> +		reloc->offset -= obj->gtt_space->start;
>  		vaddr = kmap_atomic(i915_gem_object_get_page(obj,
>  							     reloc->offset >> PAGE_SHIFT));
>  		*(uint32_t *)(vaddr + page_offset) = reloc->delta;
> @@ -280,11 +282,10 @@ i915_gem_execbuffer_relocate_entry(struct drm_i915_gem_object *obj,
>  			return ret;
>  
>  		/* Map the page containing the relocation we're going to perform.  */
> -		reloc->offset += obj->gtt_offset;
>  		reloc_page = io_mapping_map_atomic_wc(dev_priv->gtt.mappable,
>  						      reloc->offset & PAGE_MASK);
>  		reloc_entry = (uint32_t __iomem *)
> -			(reloc_page + (reloc->offset & ~PAGE_MASK));
> +			(reloc_page + offset_in_page(reloc->offset));
>  		iowrite32(reloc->delta, reloc_entry);
>  		io_mapping_unmap_atomic(reloc_page);
>  	}
> diff --git a/drivers/gpu/drm/i915/i915_gem_userptr.c b/drivers/gpu/drm/i915/i915_gem_userptr.c
> new file mode 100644
> index 0000000..f93fa1b
> --- /dev/null
> +++ b/drivers/gpu/drm/i915/i915_gem_userptr.c
> @@ -0,0 +1,329 @@
> +/*
> + * Copyright © 2012 Intel Corporation
> + *
> + * Permission is hereby granted, free of charge, to any person obtaining a
> + * copy of this software and associated documentation files (the "Software"),
> + * to deal in the Software without restriction, including without limitation
> + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
> + * and/or sell copies of the Software, and to permit persons to whom the
> + * Software is furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice (including the next
> + * paragraph) shall be included in all copies or substantial portions of the
> + * Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
> + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
> + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
> + * IN THE SOFTWARE.
> + *
> + */
> +
> +#include "drmP.h"
> +#include "i915_drm.h"
> +#include "i915_drv.h"
> +#include "i915_trace.h"
> +#include "intel_drv.h"
> +#include <linux/mmu_notifier.h>
> +#include <linux/swap.h>
> +
> +static struct i915_gem_userptr_object *to_userptr_object(struct drm_i915_gem_object *obj)
> +{
> +	return container_of(obj, struct i915_gem_userptr_object, gem);
> +}
> +
> +#if defined(CONFIG_MMU_NOTIFIER)
> +static void i915_gem_userptr_mn_invalidate_range_start(struct mmu_notifier *mn,
> +						       struct mm_struct *mm,
> +						       unsigned long start,
> +						       unsigned long end)
> +{
> +	struct i915_gem_userptr_object *vmap;
> +	struct drm_device *dev;
> +
> +	/* XXX race between obj unref and mmu notifier? */
> +	vmap = container_of(mn, struct i915_gem_userptr_object, mn);
> +	BUG_ON(vmap->mm != mm);
> +
> +	if (vmap->user_ptr >= end || vmap->user_ptr + vmap->user_size <= start)
> +		return;
> +
> +	if (vmap->gem.pages == NULL) /* opportunistic check */
> +		return;
> +
> +	dev = vmap->gem.base.dev;
> +	mutex_lock(&dev->struct_mutex);
> +	if (vmap->gem.gtt_space) {
> +		struct drm_i915_private *dev_priv = dev->dev_private;
> +		bool was_interruptible;
> +		int ret;
> +
> +		was_interruptible = dev_priv->mm.interruptible;
> +		dev_priv->mm.interruptible = false;
> +
> +		ret = i915_gem_object_unbind(&vmap->gem);
> +		BUG_ON(ret && ret != -EIO);
> +
> +		dev_priv->mm.interruptible = was_interruptible;
> +	}
> +
> +	BUG_ON(i915_gem_object_put_pages(&vmap->gem));
> +	mutex_unlock(&dev->struct_mutex);
> +}
> +
> +static void i915_gem_userptr_mn_release(struct mmu_notifier *mn,
> +					struct mm_struct *mm)
> +{
> +	struct i915_gem_userptr_object *vmap;
> +
> +	vmap = container_of(mn, struct i915_gem_userptr_object, mn);
> +	BUG_ON(vmap->mm != mm);
> +	vmap->mm = NULL;
> +
> +	/* XXX Schedule an eventual unbind? E.g. hook into require request?
> +	 * However, locking will be complicated.
> +	 */
> +}
> +
> +static const struct mmu_notifier_ops i915_gem_userptr_notifier = {
> +	.invalidate_range_start = i915_gem_userptr_mn_invalidate_range_start,
> +	.release = i915_gem_userptr_mn_release,
> +};
> +
> +static void
> +i915_gem_userptr_release__mmu_notifier(struct i915_gem_userptr_object *vmap)
> +{
> +	if (vmap->mn.ops && vmap->mm) {
> +		mmu_notifier_unregister(&vmap->mn, vmap->mm);
> +		BUG_ON(vmap->mm);
> +	}
> +}
> +
> +static int
> +i915_gem_userptr_init__mmu_notifier(struct i915_gem_userptr_object *vmap,
> +				    unsigned flags)
> +{
> +	if (flags & I915_USERPTR_UNSYNCHRONIZED)
> +		return capable(CAP_SYS_ADMIN) ? 0 : -EPERM;
> +
> +	vmap->mn.ops = &i915_gem_userptr_notifier;
> +	return mmu_notifier_register(&vmap->mn, vmap->mm);
> +}
> +
> +#else
> +
> +static void
> +i915_gem_userptr_release__mmu_notifier(struct i915_gem_userptr_object *vmap)
> +{
> +}
> +
> +static int
> +i915_gem_userptr_init__mmu_notifier(struct i915_gem_userptr_object *vmap,
> +				    unsigned flags)
> +{
> +	if ((flags & I915_USERPTR_UNSYNCHRONIZED) == 0)
> +		return -ENODEV;
> +
> +	if (!capable(CAP_SYS_ADMIN))
> +		return -EPERM;
> +
> +	return 0;
> +}
> +#endif
> +
> +static int
> +i915_gem_userptr_get_pages(struct drm_i915_gem_object *obj)
> +{
> +	struct i915_gem_userptr_object *vmap = to_userptr_object(obj);
> +	int num_pages = obj->base.size >> PAGE_SHIFT;
> +	struct sg_table *st;
> +	struct scatterlist *sg;
> +	struct page **pvec;
> +	int n, pinned, ret;
> +
> +	if (vmap->mm == NULL)
> +		return -EFAULT;
> +
> +	if (!access_ok(vmap->read_only ? VERIFY_READ : VERIFY_WRITE,
> +		       (char __user *)vmap->user_ptr, vmap->user_size))
> +		return -EFAULT;
> +
> +	/* If userspace should engineer that these pages are replaced in
> +	 * the vma between us binding this page into the GTT and completion
> +	 * of rendering... Their loss. If they change the mapping of their
> +	 * pages they need to create a new bo to point to the new vma.
> +	 *
> +	 * However, that still leaves open the possibility of the vma
> +	 * being copied upon fork. Which falls under the same userspace
> +	 * synchronisation issue as a regular bo, except that this time
> +	 * the process may not be expecting that a particular piece of
> +	 * memory is tied to the GPU.
> +	 *
> +	 * Fortunately, we can hook into the mmu_notifier in order to
> +	 * discard the page references prior to anything nasty happening
> +	 * to the vma (discard or cloning) which should prevent the more
> +	 * egregious cases from causing harm.
> +	 */
> +
> +	pvec = kmalloc(num_pages*sizeof(struct page *),
> +		       GFP_KERNEL | __GFP_NOWARN | __GFP_NORETRY);
> +	if (pvec == NULL) {
> +		pvec = drm_malloc_ab(num_pages, sizeof(struct page *));
> +		if (pvec == NULL)
> +			return -ENOMEM;
> +	}
> +
> +	pinned = 0;
> +	if (vmap->mm == current->mm)
> +		pinned = __get_user_pages_fast(vmap->user_ptr, num_pages,
> +					       !vmap->read_only, pvec);
> +	if (pinned < num_pages) {
> +		struct mm_struct *mm = vmap->mm;
> +		ret = 0;
> +		mutex_unlock(&obj->base.dev->struct_mutex);
> +		down_read(&mm->mmap_sem);
> +		if (vmap->mm != NULL)
> +			ret = get_user_pages(current, mm,
> +					     vmap->user_ptr + (pinned << PAGE_SHIFT),
> +					     num_pages - pinned,
> +					     !vmap->read_only, 0,
> +					     pvec + pinned,
> +					     NULL);
> +		up_read(&mm->mmap_sem);
> +		mutex_lock(&obj->base.dev->struct_mutex);
> +		if (ret > 0)
> +			pinned += ret;
> +
> +		if (obj->pages || pinned < num_pages) {
> +			ret = obj->pages ? 0 : -EFAULT;
> +			goto cleanup_pinned;
> +		}
> +	}
> +
> +	st = kmalloc(sizeof(*st), GFP_KERNEL);
> +	if (st == NULL) {
> +		ret = -ENOMEM;
> +		goto cleanup_pinned;
> +	}
> +
> +	if (sg_alloc_table(st, num_pages, GFP_KERNEL)) {
> +		ret = -ENOMEM;
> +		goto cleanup_st;
> +	}
> +
> +	for_each_sg(st->sgl, sg, num_pages, n)
> +		sg_set_page(sg, pvec[n], PAGE_SIZE, 0);
> +	drm_free_large(pvec);
> +
> +	obj->pages = st;
> +	return 0;
> +
> +cleanup_st:
> +	kfree(st);
> +cleanup_pinned:
> +	release_pages(pvec, pinned, 0);
> +	drm_free_large(pvec);
> +	return ret;
> +}
> +
> +static void
> +i915_gem_userptr_put_pages(struct drm_i915_gem_object *obj)
> +{
> +	struct scatterlist *sg;
> +	int i;
> +
> +	if (obj->madv != I915_MADV_WILLNEED)
> +		obj->dirty = 0;
> +
> +	for_each_sg(obj->pages->sgl, sg, obj->pages->nents, i) {
> +		struct page *page = sg_page(sg);
> +
> +		if (obj->dirty)
> +			set_page_dirty(page);
> +
> +		mark_page_accessed(page);
> +		page_cache_release(page);
> +	}
> +	obj->dirty = 0;
> +
> +	sg_free_table(obj->pages);
> +	kfree(obj->pages);
> +}
> +
> +static void
> +i915_gem_userptr_release(struct drm_i915_gem_object *obj)
> +{
> +	struct i915_gem_userptr_object *vmap = to_userptr_object(obj);
> +
> +	i915_gem_userptr_release__mmu_notifier(vmap);
> +}
> +
> +static const struct drm_i915_gem_object_ops i915_gem_userptr_ops = {
> +	.get_pages = i915_gem_userptr_get_pages,
> +	.put_pages = i915_gem_userptr_put_pages,
> +	.release = i915_gem_userptr_release,
> +};
> +
> +/**
> + * Creates a new mm object that wraps some user memory.
> + */
> +int
> +i915_gem_userptr_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
> +{
> +	struct drm_i915_private *dev_priv = dev->dev_private;
> +	struct drm_i915_gem_userptr *args = data;
> +	struct i915_gem_userptr_object *obj;
> +	loff_t first_data_page, last_data_page;
> +	int num_pages;
> +	int ret;
> +	u32 handle;
> +
> +	if (args->flags & ~(I915_USERPTR_READ_ONLY | I915_USERPTR_UNSYNCHRONIZED))
> +		return -EINVAL;
> +
> +	first_data_page = args->user_ptr / PAGE_SIZE;
> +	last_data_page = (args->user_ptr + args->user_size - 1) / PAGE_SIZE;
> +	num_pages = last_data_page - first_data_page + 1;
> +	if (num_pages * PAGE_SIZE > dev_priv->gtt.total)
> +		return -E2BIG;
> +
> +	/* Allocate the new object */
> +	obj = i915_gem_object_alloc(dev);
> +	if (obj == NULL)
> +		return -ENOMEM;
> +
> +	if (drm_gem_private_object_init(dev, &obj->gem.base,
> +					num_pages * PAGE_SIZE)) {
> +		i915_gem_object_free(&obj->gem);
> +		return -ENOMEM;
> +	}
> +
> +	i915_gem_object_init(&obj->gem, &i915_gem_userptr_ops);
> +	obj->gem.cache_level = I915_CACHE_LLC_MLC;
> +
> +	obj->gem.gtt_offset = offset_in_page(args->user_ptr);
> +	obj->user_ptr = args->user_ptr;
> +	obj->user_size = args->user_size;
> +	obj->read_only = args->flags & I915_USERPTR_READ_ONLY;
> +
> +	/* And keep a pointer to the current->mm for resolving the user pages
> +	 * at binding. This means that we need to hook into the mmu_notifier
> +	 * in order to detect if the mmu is destroyed.
> +	 */
> +	obj->mm = current->mm;
> +	ret = i915_gem_userptr_init__mmu_notifier(obj, args->flags);
> +	if (ret)
> +		return ret;
> +
> +	ret = drm_gem_handle_create(file, &obj->gem.base, &handle);
> +	/* drop reference from allocate - handle holds it now */
> +	drm_gem_object_unreference(&obj->gem.base);
> +	if (ret)
> +		return ret;
> +
> +	args->handle = handle;
> +	return 0;
> +}
> diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
> index 07d5941..20e39be 100644
> --- a/include/uapi/drm/i915_drm.h
> +++ b/include/uapi/drm/i915_drm.h
> @@ -198,6 +198,7 @@ typedef struct _drm_i915_sarea {
>  #define DRM_I915_GEM_SET_CACHING	0x2f
>  #define DRM_I915_GEM_GET_CACHING	0x30
>  #define DRM_I915_REG_READ		0x31
> +#define DRM_I915_GEM_USERPTR		0x32
>  
>  #define DRM_IOCTL_I915_INIT		DRM_IOW( DRM_COMMAND_BASE + DRM_I915_INIT, drm_i915_init_t)
>  #define DRM_IOCTL_I915_FLUSH		DRM_IO ( DRM_COMMAND_BASE + DRM_I915_FLUSH)
> @@ -247,6 +248,7 @@ typedef struct _drm_i915_sarea {
>  #define DRM_IOCTL_I915_GEM_CONTEXT_CREATE	DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_GEM_CONTEXT_CREATE, struct drm_i915_gem_context_create)
>  #define DRM_IOCTL_I915_GEM_CONTEXT_DESTROY	DRM_IOW (DRM_COMMAND_BASE + DRM_I915_GEM_CONTEXT_DESTROY, struct drm_i915_gem_context_destroy)
>  #define DRM_IOCTL_I915_REG_READ			DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_REG_READ, struct drm_i915_reg_read)
> +#define DRM_IOCTL_I915_GEM_USERPTR			DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_GEM_USERPTR, struct drm_i915_gem_userptr)
>  
>  /* Allow drivers to submit batchbuffers directly to hardware, relying
>   * on the security mechanisms provided by hardware.
> @@ -980,4 +982,18 @@ struct drm_i915_reg_read {
>  	__u64 offset;
>  	__u64 val; /* Return value */
>  };
> +
> +struct drm_i915_gem_userptr {
> +	__u64 user_ptr;
> +	__u32 user_size;
> +	__u32 flags;
> +#define I915_USERPTR_READ_ONLY 0x1
> +#define I915_USERPTR_UNSYNCHRONIZED 0x80000000
> +	/**
> +	 * Returned handle for the object.
> +	 *
> +	 * Object handles are nonzero.
> +	 */
> +	__u32 handle;
> +};
>  #endif /* _UAPI_I915_DRM_H_ */
> -- 
> 1.7.10.4
> 
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/intel-gfx

-- 
Daniel Vetter
Software Engineer, Intel Corporation
+41 (0) 79 365 57 48 - http://blog.ffwll.ch

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH] drm/i915: Introduce mapping of user pages into video memory (userptr) ioctl
  2013-02-13 22:24 ` Reese, Armin C
@ 2013-02-13 23:20   ` Chris Wilson
  0 siblings, 0 replies; 34+ messages in thread
From: Chris Wilson @ 2013-02-13 23:20 UTC (permalink / raw)
  To: Reese, Armin C; +Cc: intel-gfx

On Wed, Feb 13, 2013 at 10:24:59PM +0000, Reese, Armin C wrote:
> Thanks for the patch, Chris.
> 
> This is exactly what we were looking for to replace the VMAP feature you submitted a couple of years ago.  We need a method to quickly move data from user mode allocations into video memory (by mapping backing pages into the GTT).
> 
> The Interface appears simple enough, yet fulfills our needs.
> 
> I have one question ... what exactly does the I915_USERPTR_UNSYNCHRONIZED flag do?

Not so loud, Daniel might notice. It is a flag for the user to ask for a
get_user_pages mapping that ignored the synchronization issues
associated with cloning vma. That is the caller fully understood that to
fork and share those pages would require userspace synchronization and
that the caller was also responsible for ensuring that the mapping
was destroyed first before the vma was release - or else the vma could be
reused by the process/shared-memory whilst it was stil active.

The intention was to workaround the limitations and lack-of mmu-notifier
in many cases. I made it a privileged flag because normal users aren't
really meant to be able to cause so much interprocess havoc. Though
really having the GPU also access that memory is not so much more
dangerous than sharing that memory with another process, so I am open to
the suggestion that maybe we should allow normal users to shoot
themselves in the foot as well.

Also note that the current interface prohibits GTT mmaping of userptr bo
on LLC machines if the pointer is not page-aligned. I still think I can
lift that restriction, but my initial attempts were futile, so I'm
ignoring that corner case unless there is a use-case.
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH] drm/i915: Introduce mapping of user pages into video memory (userptr) ioctl
  2013-02-12 14:17 Chris Wilson
@ 2013-02-13 22:24 ` Reese, Armin C
  2013-02-13 23:20   ` Chris Wilson
  2013-04-08 17:18 ` Daniel Vetter
  1 sibling, 1 reply; 34+ messages in thread
From: Reese, Armin C @ 2013-02-13 22:24 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx

Thanks for the patch, Chris.

This is exactly what we were looking for to replace the VMAP feature you submitted a couple of years ago.  We need a method to quickly move data from user mode allocations into video memory (by mapping backing pages into the GTT).

The Interface appears simple enough, yet fulfills our needs.

I have one question ... what exactly does the I915_USERPTR_UNSYNCHRONIZED flag do?

Thanks,
Armin

-----Original Message-----
From: intel-gfx-bounces+armin.c.reese=intel.com@lists.freedesktop.org [mailto:intel-gfx-bounces+armin.c.reese=intel.com@lists.freedesktop.org] On Behalf Of Chris Wilson
Sent: Tuesday, February 12, 2013 6:17 AM
To: intel-gfx@lists.freedesktop.org
Subject: [Intel-gfx] [PATCH] drm/i915: Introduce mapping of user pages into video memory (userptr) ioctl

By exporting the ability to map user address and inserting PTEs representing their backing pages into the GTT, we can exploit UMA in order to utilize normal application data as a texture source or even as a render target (depending upon the capabilities of the chipset). This has a number of uses, with zero-copy downloads to the GPU and efficient readback making the intermixed streaming of CPU and GPU operations fairly efficient. This ability has many widespread implications from faster rendering of client-side software rasterisers (chromium), mitigation of stalls due to read back (firefox) and to faster pipelining of texture data (such as pixel buffer objects in GL or data blobs in CL).

v2: Compile with CONFIG_MMU_NOTIFIER
v3: We can sleep while performing invalidate-range, which we can utilise to drop our page references prior to the kernel manipulating the vma (for either discard or cloning) and so protect normal users.
v4: Only run the invalidate notifier if the range intercepts the bo.
v5: Prevent userspace from attempting to GTT mmap non-page aligned buffers

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/Makefile              |    1 +
 drivers/gpu/drm/i915/i915_dma.c            |    1 +
 drivers/gpu/drm/i915/i915_drv.h            |   22 ++
 drivers/gpu/drm/i915/i915_gem.c            |   31 ++-
 drivers/gpu/drm/i915/i915_gem_execbuffer.c |    7 +-
 drivers/gpu/drm/i915/i915_gem_userptr.c    |  329 ++++++++++++++++++++++++++++
 include/uapi/drm/i915_drm.h                |   16 ++
 7 files changed, 393 insertions(+), 14 deletions(-)  create mode 100644 drivers/gpu/drm/i915/i915_gem_userptr.c

diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile index 91f3ac6..42858f6 100644
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -14,6 +14,7 @@ i915-y := i915_drv.o i915_dma.o i915_irq.o \
 	  i915_gem_gtt.o \
 	  i915_gem_stolen.o \
 	  i915_gem_tiling.o \
+	  i915_gem_userptr.o \
 	  i915_sysfs.o \
 	  i915_trace_points.o \
 	  i915_ums.o \
diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c index 4fa6beb..9b1984c 100644
--- a/drivers/gpu/drm/i915/i915_dma.c
+++ b/drivers/gpu/drm/i915/i915_dma.c
@@ -1883,6 +1883,7 @@ struct drm_ioctl_desc i915_ioctls[] = {
 	DRM_IOCTL_DEF_DRV(I915_GEM_CONTEXT_CREATE, i915_gem_context_create_ioctl, DRM_UNLOCKED),
 	DRM_IOCTL_DEF_DRV(I915_GEM_CONTEXT_DESTROY, i915_gem_context_destroy_ioctl, DRM_UNLOCKED),
 	DRM_IOCTL_DEF_DRV(I915_REG_READ, i915_reg_read_ioctl, DRM_UNLOCKED),
+	DRM_IOCTL_DEF_DRV(I915_GEM_USERPTR, i915_gem_userptr_ioctl, 
+DRM_UNLOCKED),
 };
 
 int i915_max_ioctl = DRM_ARRAY_SIZE(i915_ioctls); diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 923dc0a..90070f4 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -42,6 +42,7 @@
 #include <linux/backlight.h>
 #include <linux/intel-iommu.h>
 #include <linux/kref.h>
+#include <linux/mmu_notifier.h>
 #include <linux/pm_qos.h>
 
 /* General customization:
@@ -1076,6 +1077,7 @@ struct drm_i915_gem_object_ops {
 	 */
 	int (*get_pages)(struct drm_i915_gem_object *);
 	void (*put_pages)(struct drm_i915_gem_object *);
+	void (*release)(struct drm_i915_gem_object *);
 };
 
 struct drm_i915_gem_object {
@@ -1222,6 +1224,23 @@ struct drm_i915_gem_object {  };  #define to_gem_object(obj) (&((struct drm_i915_gem_object *)(obj))->base)
 
+struct i915_gem_userptr_object {
+	struct drm_i915_gem_object gem;
+	uintptr_t user_ptr;
+	size_t user_size;
+	int read_only;
+
+	struct mm_struct *mm;
+#if defined(CONFIG_MMU_NOTIFIER)
+	struct mmu_notifier mn;
+#endif
+};
+
+union drm_i915_gem_objects {
+	struct drm_i915_gem_object base;
+	struct i915_gem_userptr_object userptr; };
+
 #define to_intel_bo(x) container_of(x, struct drm_i915_gem_object, base)
 
 /**
@@ -1501,6 +1520,8 @@ int i915_gem_entervt_ioctl(struct drm_device *dev, void *data,
 			   struct drm_file *file_priv);
 int i915_gem_leavevt_ioctl(struct drm_device *dev, void *data,
 			   struct drm_file *file_priv);
+int i915_gem_userptr_ioctl(struct drm_device *dev, void *data,
+			   struct drm_file *file);
 int i915_gem_set_tiling(struct drm_device *dev, void *data,
 			struct drm_file *file_priv);
 int i915_gem_get_tiling(struct drm_device *dev, void *data, @@ -1554,6 +1575,7 @@ static inline void i915_gem_object_unpin_pages(struct drm_i915_gem_object *obj)
 	BUG_ON(obj->pages_pin_count == 0);
 	obj->pages_pin_count--;
 }
+int i915_gem_object_put_pages(struct drm_i915_gem_object *obj);
 
 int __must_check i915_mutex_lock_interruptible(struct drm_device *dev);  int i915_gem_object_sync(struct drm_i915_gem_object *obj, diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 73b1e9e..65a36bf 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -1336,20 +1336,18 @@ int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 	struct drm_i915_gem_object *obj = to_intel_bo(vma->vm_private_data);
 	struct drm_device *dev = obj->base.dev;
 	drm_i915_private_t *dev_priv = dev->dev_private;
-	pgoff_t page_offset;
+	pgoff_t offset;
 	unsigned long pfn;
 	int ret = 0;
 	bool write = !!(vmf->flags & FAULT_FLAG_WRITE);
 
-	/* We don't use vmf->pgoff since that has the fake offset */
-	page_offset = ((unsigned long)vmf->virtual_address - vma->vm_start) >>
-		PAGE_SHIFT;
-
 	ret = i915_mutex_lock_interruptible(dev);
 	if (ret)
 		goto out;
 
-	trace_i915_gem_object_fault(obj, page_offset, true, write);
+	trace_i915_gem_object_fault(obj,
+				    (unsigned long)(vmf->virtual_address - vma->vm_start) >> PAGE_SHIFT,
+				    true, write);
 
 	/* Access to snoopable pages through the GTT is incoherent. */
 	if (obj->cache_level != I915_CACHE_NONE && !HAS_LLC(dev)) { @@ -1372,8 +1370,10 @@ int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 
 	obj->fault_mappable = true;
 
-	pfn = ((dev_priv->gtt.mappable_base + obj->gtt_offset) >> PAGE_SHIFT) +
-		page_offset;
+	/* We don't use vmf->pgoff since that has the fake offset */
+	offset = (unsigned long)vmf->virtual_address - vma->vm_start;
+	offset += obj->gtt_offset;
+	pfn = (dev_priv->gtt.mappable_base + offset) >> PAGE_SHIFT;
 
 	/* Finally, remap it using the new GTT offset */
 	ret = vm_insert_pfn(vma, (unsigned long)vmf->virtual_address, pfn); @@ -1565,6 +1565,12 @@ i915_gem_mmap_gtt(struct drm_file *file,
 		goto out;
 	}
 
+	if (offset_in_page(obj->gtt_offset)) {
+		DRM_ERROR("Attempting to mmap an unaligned buffer\n");
+		ret = -EINVAL;
+		goto out;
+	}
+
 	ret = i915_gem_object_create_mmap_offset(obj);
 	if (ret)
 		goto out;
@@ -2495,9 +2501,9 @@ i915_gem_object_unbind(struct drm_i915_gem_object *obj)
 	/* Avoid an unnecessary call to unbind on rebind. */
 	obj->map_and_fenceable = true;
 
+	obj->gtt_offset -= obj->gtt_space->start;
 	drm_mm_put_block(obj->gtt_space);
 	obj->gtt_space = NULL;
-	obj->gtt_offset = 0;
 
 	return 0;
 }
@@ -2987,7 +2993,7 @@ i915_gem_object_bind_to_gtt(struct drm_i915_gem_object *obj,
 	list_add_tail(&obj->mm_list, &dev_priv->mm.inactive_list);
 
 	obj->gtt_space = node;
-	obj->gtt_offset = node->start;
+	obj->gtt_offset += node->start;
 
 	fenceable =
 		node->size == fence_size &&
@@ -3800,6 +3806,9 @@ void i915_gem_free_object(struct drm_gem_object *gem_obj)
 	if (obj->base.import_attach)
 		drm_prime_gem_destroy(&obj->base, NULL);
 
+	if (obj->ops->release)
+		obj->ops->release(obj);
+
 	drm_gem_object_release(&obj->base);
 	i915_gem_info_remove_obj(dev_priv, obj->base.size);
 
@@ -4101,7 +4110,7 @@ i915_gem_load(struct drm_device *dev)
 
 	dev_priv->slab =
 		kmem_cache_create("i915_gem_object",
-				  sizeof(struct drm_i915_gem_object), 0,
+				  sizeof(union drm_i915_gem_objects), 0,
 				  SLAB_HWCACHE_ALIGN,
 				  NULL);
 
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 2726910..a3e68af 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -254,14 +254,16 @@ i915_gem_execbuffer_relocate_entry(struct drm_i915_gem_object *obj,
 		return -EFAULT;
 
 	reloc->delta += target_offset;
+	reloc->offset += obj->gtt_offset;
 	if (use_cpu_reloc(obj)) {
-		uint32_t page_offset = reloc->offset & ~PAGE_MASK;
+		uint32_t page_offset = offset_in_page(reloc->offset);
 		char *vaddr;
 
 		ret = i915_gem_object_set_to_cpu_domain(obj, 1);
 		if (ret)
 			return ret;
 
+		reloc->offset -= obj->gtt_space->start;
 		vaddr = kmap_atomic(i915_gem_object_get_page(obj,
 							     reloc->offset >> PAGE_SHIFT));
 		*(uint32_t *)(vaddr + page_offset) = reloc->delta; @@ -280,11 +282,10 @@ i915_gem_execbuffer_relocate_entry(struct drm_i915_gem_object *obj,
 			return ret;
 
 		/* Map the page containing the relocation we're going to perform.  */
-		reloc->offset += obj->gtt_offset;
 		reloc_page = io_mapping_map_atomic_wc(dev_priv->gtt.mappable,
 						      reloc->offset & PAGE_MASK);
 		reloc_entry = (uint32_t __iomem *)
-			(reloc_page + (reloc->offset & ~PAGE_MASK));
+			(reloc_page + offset_in_page(reloc->offset));
 		iowrite32(reloc->delta, reloc_entry);
 		io_mapping_unmap_atomic(reloc_page);
 	}
diff --git a/drivers/gpu/drm/i915/i915_gem_userptr.c b/drivers/gpu/drm/i915/i915_gem_userptr.c
new file mode 100644
index 0000000..f93fa1b
--- /dev/null
+++ b/drivers/gpu/drm/i915/i915_gem_userptr.c
@@ -0,0 +1,329 @@
+/*
+ * Copyright © 2012 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person 
+obtaining a
+ * copy of this software and associated documentation files (the 
+"Software"),
+ * to deal in the Software without restriction, including without 
+limitation
+ * the rights to use, copy, modify, merge, publish, distribute, 
+sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom 
+the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the 
+next
+ * paragraph) shall be included in all copies or substantial portions 
+of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 
+MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT 
+SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR 
+OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 
+ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 
+DEALINGS
+ * IN THE SOFTWARE.
+ *
+ */
+
+#include "drmP.h"
+#include "i915_drm.h"
+#include "i915_drv.h"
+#include "i915_trace.h"
+#include "intel_drv.h"
+#include <linux/mmu_notifier.h>
+#include <linux/swap.h>
+
+static struct i915_gem_userptr_object *to_userptr_object(struct 
+drm_i915_gem_object *obj) {
+	return container_of(obj, struct i915_gem_userptr_object, gem); }
+
+#if defined(CONFIG_MMU_NOTIFIER)
+static void i915_gem_userptr_mn_invalidate_range_start(struct mmu_notifier *mn,
+						       struct mm_struct *mm,
+						       unsigned long start,
+						       unsigned long end)
+{
+	struct i915_gem_userptr_object *vmap;
+	struct drm_device *dev;
+
+	/* XXX race between obj unref and mmu notifier? */
+	vmap = container_of(mn, struct i915_gem_userptr_object, mn);
+	BUG_ON(vmap->mm != mm);
+
+	if (vmap->user_ptr >= end || vmap->user_ptr + vmap->user_size <= start)
+		return;
+
+	if (vmap->gem.pages == NULL) /* opportunistic check */
+		return;
+
+	dev = vmap->gem.base.dev;
+	mutex_lock(&dev->struct_mutex);
+	if (vmap->gem.gtt_space) {
+		struct drm_i915_private *dev_priv = dev->dev_private;
+		bool was_interruptible;
+		int ret;
+
+		was_interruptible = dev_priv->mm.interruptible;
+		dev_priv->mm.interruptible = false;
+
+		ret = i915_gem_object_unbind(&vmap->gem);
+		BUG_ON(ret && ret != -EIO);
+
+		dev_priv->mm.interruptible = was_interruptible;
+	}
+
+	BUG_ON(i915_gem_object_put_pages(&vmap->gem));
+	mutex_unlock(&dev->struct_mutex);
+}
+
+static void i915_gem_userptr_mn_release(struct mmu_notifier *mn,
+					struct mm_struct *mm)
+{
+	struct i915_gem_userptr_object *vmap;
+
+	vmap = container_of(mn, struct i915_gem_userptr_object, mn);
+	BUG_ON(vmap->mm != mm);
+	vmap->mm = NULL;
+
+	/* XXX Schedule an eventual unbind? E.g. hook into require request?
+	 * However, locking will be complicated.
+	 */
+}
+
+static const struct mmu_notifier_ops i915_gem_userptr_notifier = {
+	.invalidate_range_start = i915_gem_userptr_mn_invalidate_range_start,
+	.release = i915_gem_userptr_mn_release, };
+
+static void
+i915_gem_userptr_release__mmu_notifier(struct i915_gem_userptr_object 
+*vmap) {
+	if (vmap->mn.ops && vmap->mm) {
+		mmu_notifier_unregister(&vmap->mn, vmap->mm);
+		BUG_ON(vmap->mm);
+	}
+}
+
+static int
+i915_gem_userptr_init__mmu_notifier(struct i915_gem_userptr_object *vmap,
+				    unsigned flags)
+{
+	if (flags & I915_USERPTR_UNSYNCHRONIZED)
+		return capable(CAP_SYS_ADMIN) ? 0 : -EPERM;
+
+	vmap->mn.ops = &i915_gem_userptr_notifier;
+	return mmu_notifier_register(&vmap->mn, vmap->mm); }
+
+#else
+
+static void
+i915_gem_userptr_release__mmu_notifier(struct i915_gem_userptr_object 
+*vmap) { }
+
+static int
+i915_gem_userptr_init__mmu_notifier(struct i915_gem_userptr_object *vmap,
+				    unsigned flags)
+{
+	if ((flags & I915_USERPTR_UNSYNCHRONIZED) == 0)
+		return -ENODEV;
+
+	if (!capable(CAP_SYS_ADMIN))
+		return -EPERM;
+
+	return 0;
+}
+#endif
+
+static int
+i915_gem_userptr_get_pages(struct drm_i915_gem_object *obj) {
+	struct i915_gem_userptr_object *vmap = to_userptr_object(obj);
+	int num_pages = obj->base.size >> PAGE_SHIFT;
+	struct sg_table *st;
+	struct scatterlist *sg;
+	struct page **pvec;
+	int n, pinned, ret;
+
+	if (vmap->mm == NULL)
+		return -EFAULT;
+
+	if (!access_ok(vmap->read_only ? VERIFY_READ : VERIFY_WRITE,
+		       (char __user *)vmap->user_ptr, vmap->user_size))
+		return -EFAULT;
+
+	/* If userspace should engineer that these pages are replaced in
+	 * the vma between us binding this page into the GTT and completion
+	 * of rendering... Their loss. If they change the mapping of their
+	 * pages they need to create a new bo to point to the new vma.
+	 *
+	 * However, that still leaves open the possibility of the vma
+	 * being copied upon fork. Which falls under the same userspace
+	 * synchronisation issue as a regular bo, except that this time
+	 * the process may not be expecting that a particular piece of
+	 * memory is tied to the GPU.
+	 *
+	 * Fortunately, we can hook into the mmu_notifier in order to
+	 * discard the page references prior to anything nasty happening
+	 * to the vma (discard or cloning) which should prevent the more
+	 * egregious cases from causing harm.
+	 */
+
+	pvec = kmalloc(num_pages*sizeof(struct page *),
+		       GFP_KERNEL | __GFP_NOWARN | __GFP_NORETRY);
+	if (pvec == NULL) {
+		pvec = drm_malloc_ab(num_pages, sizeof(struct page *));
+		if (pvec == NULL)
+			return -ENOMEM;
+	}
+
+	pinned = 0;
+	if (vmap->mm == current->mm)
+		pinned = __get_user_pages_fast(vmap->user_ptr, num_pages,
+					       !vmap->read_only, pvec);
+	if (pinned < num_pages) {
+		struct mm_struct *mm = vmap->mm;
+		ret = 0;
+		mutex_unlock(&obj->base.dev->struct_mutex);
+		down_read(&mm->mmap_sem);
+		if (vmap->mm != NULL)
+			ret = get_user_pages(current, mm,
+					     vmap->user_ptr + (pinned << PAGE_SHIFT),
+					     num_pages - pinned,
+					     !vmap->read_only, 0,
+					     pvec + pinned,
+					     NULL);
+		up_read(&mm->mmap_sem);
+		mutex_lock(&obj->base.dev->struct_mutex);
+		if (ret > 0)
+			pinned += ret;
+
+		if (obj->pages || pinned < num_pages) {
+			ret = obj->pages ? 0 : -EFAULT;
+			goto cleanup_pinned;
+		}
+	}
+
+	st = kmalloc(sizeof(*st), GFP_KERNEL);
+	if (st == NULL) {
+		ret = -ENOMEM;
+		goto cleanup_pinned;
+	}
+
+	if (sg_alloc_table(st, num_pages, GFP_KERNEL)) {
+		ret = -ENOMEM;
+		goto cleanup_st;
+	}
+
+	for_each_sg(st->sgl, sg, num_pages, n)
+		sg_set_page(sg, pvec[n], PAGE_SIZE, 0);
+	drm_free_large(pvec);
+
+	obj->pages = st;
+	return 0;
+
+cleanup_st:
+	kfree(st);
+cleanup_pinned:
+	release_pages(pvec, pinned, 0);
+	drm_free_large(pvec);
+	return ret;
+}
+
+static void
+i915_gem_userptr_put_pages(struct drm_i915_gem_object *obj) {
+	struct scatterlist *sg;
+	int i;
+
+	if (obj->madv != I915_MADV_WILLNEED)
+		obj->dirty = 0;
+
+	for_each_sg(obj->pages->sgl, sg, obj->pages->nents, i) {
+		struct page *page = sg_page(sg);
+
+		if (obj->dirty)
+			set_page_dirty(page);
+
+		mark_page_accessed(page);
+		page_cache_release(page);
+	}
+	obj->dirty = 0;
+
+	sg_free_table(obj->pages);
+	kfree(obj->pages);
+}
+
+static void
+i915_gem_userptr_release(struct drm_i915_gem_object *obj) {
+	struct i915_gem_userptr_object *vmap = to_userptr_object(obj);
+
+	i915_gem_userptr_release__mmu_notifier(vmap);
+}
+
+static const struct drm_i915_gem_object_ops i915_gem_userptr_ops = {
+	.get_pages = i915_gem_userptr_get_pages,
+	.put_pages = i915_gem_userptr_put_pages,
+	.release = i915_gem_userptr_release,
+};
+
+/**
+ * Creates a new mm object that wraps some user memory.
+ */
+int
+i915_gem_userptr_ioctl(struct drm_device *dev, void *data, struct 
+drm_file *file) {
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	struct drm_i915_gem_userptr *args = data;
+	struct i915_gem_userptr_object *obj;
+	loff_t first_data_page, last_data_page;
+	int num_pages;
+	int ret;
+	u32 handle;
+
+	if (args->flags & ~(I915_USERPTR_READ_ONLY | I915_USERPTR_UNSYNCHRONIZED))
+		return -EINVAL;
+
+	first_data_page = args->user_ptr / PAGE_SIZE;
+	last_data_page = (args->user_ptr + args->user_size - 1) / PAGE_SIZE;
+	num_pages = last_data_page - first_data_page + 1;
+	if (num_pages * PAGE_SIZE > dev_priv->gtt.total)
+		return -E2BIG;
+
+	/* Allocate the new object */
+	obj = i915_gem_object_alloc(dev);
+	if (obj == NULL)
+		return -ENOMEM;
+
+	if (drm_gem_private_object_init(dev, &obj->gem.base,
+					num_pages * PAGE_SIZE)) {
+		i915_gem_object_free(&obj->gem);
+		return -ENOMEM;
+	}
+
+	i915_gem_object_init(&obj->gem, &i915_gem_userptr_ops);
+	obj->gem.cache_level = I915_CACHE_LLC_MLC;
+
+	obj->gem.gtt_offset = offset_in_page(args->user_ptr);
+	obj->user_ptr = args->user_ptr;
+	obj->user_size = args->user_size;
+	obj->read_only = args->flags & I915_USERPTR_READ_ONLY;
+
+	/* And keep a pointer to the current->mm for resolving the user pages
+	 * at binding. This means that we need to hook into the mmu_notifier
+	 * in order to detect if the mmu is destroyed.
+	 */
+	obj->mm = current->mm;
+	ret = i915_gem_userptr_init__mmu_notifier(obj, args->flags);
+	if (ret)
+		return ret;
+
+	ret = drm_gem_handle_create(file, &obj->gem.base, &handle);
+	/* drop reference from allocate - handle holds it now */
+	drm_gem_object_unreference(&obj->gem.base);
+	if (ret)
+		return ret;
+
+	args->handle = handle;
+	return 0;
+}
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index 07d5941..20e39be 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -198,6 +198,7 @@ typedef struct _drm_i915_sarea {
 #define DRM_I915_GEM_SET_CACHING	0x2f
 #define DRM_I915_GEM_GET_CACHING	0x30
 #define DRM_I915_REG_READ		0x31
+#define DRM_I915_GEM_USERPTR		0x32
 
 #define DRM_IOCTL_I915_INIT		DRM_IOW( DRM_COMMAND_BASE + DRM_I915_INIT, drm_i915_init_t)
 #define DRM_IOCTL_I915_FLUSH		DRM_IO ( DRM_COMMAND_BASE + DRM_I915_FLUSH)
@@ -247,6 +248,7 @@ typedef struct _drm_i915_sarea {
 #define DRM_IOCTL_I915_GEM_CONTEXT_CREATE	DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_GEM_CONTEXT_CREATE, struct drm_i915_gem_context_create)
 #define DRM_IOCTL_I915_GEM_CONTEXT_DESTROY	DRM_IOW (DRM_COMMAND_BASE + DRM_I915_GEM_CONTEXT_DESTROY, struct drm_i915_gem_context_destroy)
 #define DRM_IOCTL_I915_REG_READ			DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_REG_READ, struct drm_i915_reg_read)
+#define DRM_IOCTL_I915_GEM_USERPTR			DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_GEM_USERPTR, struct drm_i915_gem_userptr)
 
 /* Allow drivers to submit batchbuffers directly to hardware, relying
  * on the security mechanisms provided by hardware.
@@ -980,4 +982,18 @@ struct drm_i915_reg_read {
 	__u64 offset;
 	__u64 val; /* Return value */
 };
+
+struct drm_i915_gem_userptr {
+	__u64 user_ptr;
+	__u32 user_size;
+	__u32 flags;
+#define I915_USERPTR_READ_ONLY 0x1
+#define I915_USERPTR_UNSYNCHRONIZED 0x80000000
+	/**
+	 * Returned handle for the object.
+	 *
+	 * Object handles are nonzero.
+	 */
+	__u32 handle;
+};
 #endif /* _UAPI_I915_DRM_H_ */
--
1.7.10.4

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 34+ messages in thread

* [PATCH] drm/i915: Introduce mapping of user pages into video memory (userptr) ioctl
@ 2013-02-12 14:17 Chris Wilson
  2013-02-13 22:24 ` Reese, Armin C
  2013-04-08 17:18 ` Daniel Vetter
  0 siblings, 2 replies; 34+ messages in thread
From: Chris Wilson @ 2013-02-12 14:17 UTC (permalink / raw)
  To: intel-gfx

By exporting the ability to map user address and inserting PTEs
representing their backing pages into the GTT, we can exploit UMA in order
to utilize normal application data as a texture source or even as a
render target (depending upon the capabilities of the chipset). This has
a number of uses, with zero-copy downloads to the GPU and efficient
readback making the intermixed streaming of CPU and GPU operations
fairly efficient. This ability has many widespread implications from
faster rendering of client-side software rasterisers (chromium),
mitigation of stalls due to read back (firefox) and to faster pipelining
of texture data (such as pixel buffer objects in GL or data blobs in CL).

v2: Compile with CONFIG_MMU_NOTIFIER
v3: We can sleep while performing invalidate-range, which we can utilise
to drop our page references prior to the kernel manipulating the vma
(for either discard or cloning) and so protect normal users.
v4: Only run the invalidate notifier if the range intercepts the bo.
v5: Prevent userspace from attempting to GTT mmap non-page aligned buffers

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/Makefile              |    1 +
 drivers/gpu/drm/i915/i915_dma.c            |    1 +
 drivers/gpu/drm/i915/i915_drv.h            |   22 ++
 drivers/gpu/drm/i915/i915_gem.c            |   31 ++-
 drivers/gpu/drm/i915/i915_gem_execbuffer.c |    7 +-
 drivers/gpu/drm/i915/i915_gem_userptr.c    |  329 ++++++++++++++++++++++++++++
 include/uapi/drm/i915_drm.h                |   16 ++
 7 files changed, 393 insertions(+), 14 deletions(-)
 create mode 100644 drivers/gpu/drm/i915/i915_gem_userptr.c

diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
index 91f3ac6..42858f6 100644
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -14,6 +14,7 @@ i915-y := i915_drv.o i915_dma.o i915_irq.o \
 	  i915_gem_gtt.o \
 	  i915_gem_stolen.o \
 	  i915_gem_tiling.o \
+	  i915_gem_userptr.o \
 	  i915_sysfs.o \
 	  i915_trace_points.o \
 	  i915_ums.o \
diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
index 4fa6beb..9b1984c 100644
--- a/drivers/gpu/drm/i915/i915_dma.c
+++ b/drivers/gpu/drm/i915/i915_dma.c
@@ -1883,6 +1883,7 @@ struct drm_ioctl_desc i915_ioctls[] = {
 	DRM_IOCTL_DEF_DRV(I915_GEM_CONTEXT_CREATE, i915_gem_context_create_ioctl, DRM_UNLOCKED),
 	DRM_IOCTL_DEF_DRV(I915_GEM_CONTEXT_DESTROY, i915_gem_context_destroy_ioctl, DRM_UNLOCKED),
 	DRM_IOCTL_DEF_DRV(I915_REG_READ, i915_reg_read_ioctl, DRM_UNLOCKED),
+	DRM_IOCTL_DEF_DRV(I915_GEM_USERPTR, i915_gem_userptr_ioctl, DRM_UNLOCKED),
 };
 
 int i915_max_ioctl = DRM_ARRAY_SIZE(i915_ioctls);
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 923dc0a..90070f4 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -42,6 +42,7 @@
 #include <linux/backlight.h>
 #include <linux/intel-iommu.h>
 #include <linux/kref.h>
+#include <linux/mmu_notifier.h>
 #include <linux/pm_qos.h>
 
 /* General customization:
@@ -1076,6 +1077,7 @@ struct drm_i915_gem_object_ops {
 	 */
 	int (*get_pages)(struct drm_i915_gem_object *);
 	void (*put_pages)(struct drm_i915_gem_object *);
+	void (*release)(struct drm_i915_gem_object *);
 };
 
 struct drm_i915_gem_object {
@@ -1222,6 +1224,23 @@ struct drm_i915_gem_object {
 };
 #define to_gem_object(obj) (&((struct drm_i915_gem_object *)(obj))->base)
 
+struct i915_gem_userptr_object {
+	struct drm_i915_gem_object gem;
+	uintptr_t user_ptr;
+	size_t user_size;
+	int read_only;
+
+	struct mm_struct *mm;
+#if defined(CONFIG_MMU_NOTIFIER)
+	struct mmu_notifier mn;
+#endif
+};
+
+union drm_i915_gem_objects {
+	struct drm_i915_gem_object base;
+	struct i915_gem_userptr_object userptr;
+};
+
 #define to_intel_bo(x) container_of(x, struct drm_i915_gem_object, base)
 
 /**
@@ -1501,6 +1520,8 @@ int i915_gem_entervt_ioctl(struct drm_device *dev, void *data,
 			   struct drm_file *file_priv);
 int i915_gem_leavevt_ioctl(struct drm_device *dev, void *data,
 			   struct drm_file *file_priv);
+int i915_gem_userptr_ioctl(struct drm_device *dev, void *data,
+			   struct drm_file *file);
 int i915_gem_set_tiling(struct drm_device *dev, void *data,
 			struct drm_file *file_priv);
 int i915_gem_get_tiling(struct drm_device *dev, void *data,
@@ -1554,6 +1575,7 @@ static inline void i915_gem_object_unpin_pages(struct drm_i915_gem_object *obj)
 	BUG_ON(obj->pages_pin_count == 0);
 	obj->pages_pin_count--;
 }
+int i915_gem_object_put_pages(struct drm_i915_gem_object *obj);
 
 int __must_check i915_mutex_lock_interruptible(struct drm_device *dev);
 int i915_gem_object_sync(struct drm_i915_gem_object *obj,
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 73b1e9e..65a36bf 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -1336,20 +1336,18 @@ int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 	struct drm_i915_gem_object *obj = to_intel_bo(vma->vm_private_data);
 	struct drm_device *dev = obj->base.dev;
 	drm_i915_private_t *dev_priv = dev->dev_private;
-	pgoff_t page_offset;
+	pgoff_t offset;
 	unsigned long pfn;
 	int ret = 0;
 	bool write = !!(vmf->flags & FAULT_FLAG_WRITE);
 
-	/* We don't use vmf->pgoff since that has the fake offset */
-	page_offset = ((unsigned long)vmf->virtual_address - vma->vm_start) >>
-		PAGE_SHIFT;
-
 	ret = i915_mutex_lock_interruptible(dev);
 	if (ret)
 		goto out;
 
-	trace_i915_gem_object_fault(obj, page_offset, true, write);
+	trace_i915_gem_object_fault(obj,
+				    (unsigned long)(vmf->virtual_address - vma->vm_start) >> PAGE_SHIFT,
+				    true, write);
 
 	/* Access to snoopable pages through the GTT is incoherent. */
 	if (obj->cache_level != I915_CACHE_NONE && !HAS_LLC(dev)) {
@@ -1372,8 +1370,10 @@ int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 
 	obj->fault_mappable = true;
 
-	pfn = ((dev_priv->gtt.mappable_base + obj->gtt_offset) >> PAGE_SHIFT) +
-		page_offset;
+	/* We don't use vmf->pgoff since that has the fake offset */
+	offset = (unsigned long)vmf->virtual_address - vma->vm_start;
+	offset += obj->gtt_offset;
+	pfn = (dev_priv->gtt.mappable_base + offset) >> PAGE_SHIFT;
 
 	/* Finally, remap it using the new GTT offset */
 	ret = vm_insert_pfn(vma, (unsigned long)vmf->virtual_address, pfn);
@@ -1565,6 +1565,12 @@ i915_gem_mmap_gtt(struct drm_file *file,
 		goto out;
 	}
 
+	if (offset_in_page(obj->gtt_offset)) {
+		DRM_ERROR("Attempting to mmap an unaligned buffer\n");
+		ret = -EINVAL;
+		goto out;
+	}
+
 	ret = i915_gem_object_create_mmap_offset(obj);
 	if (ret)
 		goto out;
@@ -2495,9 +2501,9 @@ i915_gem_object_unbind(struct drm_i915_gem_object *obj)
 	/* Avoid an unnecessary call to unbind on rebind. */
 	obj->map_and_fenceable = true;
 
+	obj->gtt_offset -= obj->gtt_space->start;
 	drm_mm_put_block(obj->gtt_space);
 	obj->gtt_space = NULL;
-	obj->gtt_offset = 0;
 
 	return 0;
 }
@@ -2987,7 +2993,7 @@ i915_gem_object_bind_to_gtt(struct drm_i915_gem_object *obj,
 	list_add_tail(&obj->mm_list, &dev_priv->mm.inactive_list);
 
 	obj->gtt_space = node;
-	obj->gtt_offset = node->start;
+	obj->gtt_offset += node->start;
 
 	fenceable =
 		node->size == fence_size &&
@@ -3800,6 +3806,9 @@ void i915_gem_free_object(struct drm_gem_object *gem_obj)
 	if (obj->base.import_attach)
 		drm_prime_gem_destroy(&obj->base, NULL);
 
+	if (obj->ops->release)
+		obj->ops->release(obj);
+
 	drm_gem_object_release(&obj->base);
 	i915_gem_info_remove_obj(dev_priv, obj->base.size);
 
@@ -4101,7 +4110,7 @@ i915_gem_load(struct drm_device *dev)
 
 	dev_priv->slab =
 		kmem_cache_create("i915_gem_object",
-				  sizeof(struct drm_i915_gem_object), 0,
+				  sizeof(union drm_i915_gem_objects), 0,
 				  SLAB_HWCACHE_ALIGN,
 				  NULL);
 
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 2726910..a3e68af 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -254,14 +254,16 @@ i915_gem_execbuffer_relocate_entry(struct drm_i915_gem_object *obj,
 		return -EFAULT;
 
 	reloc->delta += target_offset;
+	reloc->offset += obj->gtt_offset;
 	if (use_cpu_reloc(obj)) {
-		uint32_t page_offset = reloc->offset & ~PAGE_MASK;
+		uint32_t page_offset = offset_in_page(reloc->offset);
 		char *vaddr;
 
 		ret = i915_gem_object_set_to_cpu_domain(obj, 1);
 		if (ret)
 			return ret;
 
+		reloc->offset -= obj->gtt_space->start;
 		vaddr = kmap_atomic(i915_gem_object_get_page(obj,
 							     reloc->offset >> PAGE_SHIFT));
 		*(uint32_t *)(vaddr + page_offset) = reloc->delta;
@@ -280,11 +282,10 @@ i915_gem_execbuffer_relocate_entry(struct drm_i915_gem_object *obj,
 			return ret;
 
 		/* Map the page containing the relocation we're going to perform.  */
-		reloc->offset += obj->gtt_offset;
 		reloc_page = io_mapping_map_atomic_wc(dev_priv->gtt.mappable,
 						      reloc->offset & PAGE_MASK);
 		reloc_entry = (uint32_t __iomem *)
-			(reloc_page + (reloc->offset & ~PAGE_MASK));
+			(reloc_page + offset_in_page(reloc->offset));
 		iowrite32(reloc->delta, reloc_entry);
 		io_mapping_unmap_atomic(reloc_page);
 	}
diff --git a/drivers/gpu/drm/i915/i915_gem_userptr.c b/drivers/gpu/drm/i915/i915_gem_userptr.c
new file mode 100644
index 0000000..f93fa1b
--- /dev/null
+++ b/drivers/gpu/drm/i915/i915_gem_userptr.c
@@ -0,0 +1,329 @@
+/*
+ * Copyright © 2012 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ */
+
+#include "drmP.h"
+#include "i915_drm.h"
+#include "i915_drv.h"
+#include "i915_trace.h"
+#include "intel_drv.h"
+#include <linux/mmu_notifier.h>
+#include <linux/swap.h>
+
+static struct i915_gem_userptr_object *to_userptr_object(struct drm_i915_gem_object *obj)
+{
+	return container_of(obj, struct i915_gem_userptr_object, gem);
+}
+
+#if defined(CONFIG_MMU_NOTIFIER)
+static void i915_gem_userptr_mn_invalidate_range_start(struct mmu_notifier *mn,
+						       struct mm_struct *mm,
+						       unsigned long start,
+						       unsigned long end)
+{
+	struct i915_gem_userptr_object *vmap;
+	struct drm_device *dev;
+
+	/* XXX race between obj unref and mmu notifier? */
+	vmap = container_of(mn, struct i915_gem_userptr_object, mn);
+	BUG_ON(vmap->mm != mm);
+
+	if (vmap->user_ptr >= end || vmap->user_ptr + vmap->user_size <= start)
+		return;
+
+	if (vmap->gem.pages == NULL) /* opportunistic check */
+		return;
+
+	dev = vmap->gem.base.dev;
+	mutex_lock(&dev->struct_mutex);
+	if (vmap->gem.gtt_space) {
+		struct drm_i915_private *dev_priv = dev->dev_private;
+		bool was_interruptible;
+		int ret;
+
+		was_interruptible = dev_priv->mm.interruptible;
+		dev_priv->mm.interruptible = false;
+
+		ret = i915_gem_object_unbind(&vmap->gem);
+		BUG_ON(ret && ret != -EIO);
+
+		dev_priv->mm.interruptible = was_interruptible;
+	}
+
+	BUG_ON(i915_gem_object_put_pages(&vmap->gem));
+	mutex_unlock(&dev->struct_mutex);
+}
+
+static void i915_gem_userptr_mn_release(struct mmu_notifier *mn,
+					struct mm_struct *mm)
+{
+	struct i915_gem_userptr_object *vmap;
+
+	vmap = container_of(mn, struct i915_gem_userptr_object, mn);
+	BUG_ON(vmap->mm != mm);
+	vmap->mm = NULL;
+
+	/* XXX Schedule an eventual unbind? E.g. hook into require request?
+	 * However, locking will be complicated.
+	 */
+}
+
+static const struct mmu_notifier_ops i915_gem_userptr_notifier = {
+	.invalidate_range_start = i915_gem_userptr_mn_invalidate_range_start,
+	.release = i915_gem_userptr_mn_release,
+};
+
+static void
+i915_gem_userptr_release__mmu_notifier(struct i915_gem_userptr_object *vmap)
+{
+	if (vmap->mn.ops && vmap->mm) {
+		mmu_notifier_unregister(&vmap->mn, vmap->mm);
+		BUG_ON(vmap->mm);
+	}
+}
+
+static int
+i915_gem_userptr_init__mmu_notifier(struct i915_gem_userptr_object *vmap,
+				    unsigned flags)
+{
+	if (flags & I915_USERPTR_UNSYNCHRONIZED)
+		return capable(CAP_SYS_ADMIN) ? 0 : -EPERM;
+
+	vmap->mn.ops = &i915_gem_userptr_notifier;
+	return mmu_notifier_register(&vmap->mn, vmap->mm);
+}
+
+#else
+
+static void
+i915_gem_userptr_release__mmu_notifier(struct i915_gem_userptr_object *vmap)
+{
+}
+
+static int
+i915_gem_userptr_init__mmu_notifier(struct i915_gem_userptr_object *vmap,
+				    unsigned flags)
+{
+	if ((flags & I915_USERPTR_UNSYNCHRONIZED) == 0)
+		return -ENODEV;
+
+	if (!capable(CAP_SYS_ADMIN))
+		return -EPERM;
+
+	return 0;
+}
+#endif
+
+static int
+i915_gem_userptr_get_pages(struct drm_i915_gem_object *obj)
+{
+	struct i915_gem_userptr_object *vmap = to_userptr_object(obj);
+	int num_pages = obj->base.size >> PAGE_SHIFT;
+	struct sg_table *st;
+	struct scatterlist *sg;
+	struct page **pvec;
+	int n, pinned, ret;
+
+	if (vmap->mm == NULL)
+		return -EFAULT;
+
+	if (!access_ok(vmap->read_only ? VERIFY_READ : VERIFY_WRITE,
+		       (char __user *)vmap->user_ptr, vmap->user_size))
+		return -EFAULT;
+
+	/* If userspace should engineer that these pages are replaced in
+	 * the vma between us binding this page into the GTT and completion
+	 * of rendering... Their loss. If they change the mapping of their
+	 * pages they need to create a new bo to point to the new vma.
+	 *
+	 * However, that still leaves open the possibility of the vma
+	 * being copied upon fork. Which falls under the same userspace
+	 * synchronisation issue as a regular bo, except that this time
+	 * the process may not be expecting that a particular piece of
+	 * memory is tied to the GPU.
+	 *
+	 * Fortunately, we can hook into the mmu_notifier in order to
+	 * discard the page references prior to anything nasty happening
+	 * to the vma (discard or cloning) which should prevent the more
+	 * egregious cases from causing harm.
+	 */
+
+	pvec = kmalloc(num_pages*sizeof(struct page *),
+		       GFP_KERNEL | __GFP_NOWARN | __GFP_NORETRY);
+	if (pvec == NULL) {
+		pvec = drm_malloc_ab(num_pages, sizeof(struct page *));
+		if (pvec == NULL)
+			return -ENOMEM;
+	}
+
+	pinned = 0;
+	if (vmap->mm == current->mm)
+		pinned = __get_user_pages_fast(vmap->user_ptr, num_pages,
+					       !vmap->read_only, pvec);
+	if (pinned < num_pages) {
+		struct mm_struct *mm = vmap->mm;
+		ret = 0;
+		mutex_unlock(&obj->base.dev->struct_mutex);
+		down_read(&mm->mmap_sem);
+		if (vmap->mm != NULL)
+			ret = get_user_pages(current, mm,
+					     vmap->user_ptr + (pinned << PAGE_SHIFT),
+					     num_pages - pinned,
+					     !vmap->read_only, 0,
+					     pvec + pinned,
+					     NULL);
+		up_read(&mm->mmap_sem);
+		mutex_lock(&obj->base.dev->struct_mutex);
+		if (ret > 0)
+			pinned += ret;
+
+		if (obj->pages || pinned < num_pages) {
+			ret = obj->pages ? 0 : -EFAULT;
+			goto cleanup_pinned;
+		}
+	}
+
+	st = kmalloc(sizeof(*st), GFP_KERNEL);
+	if (st == NULL) {
+		ret = -ENOMEM;
+		goto cleanup_pinned;
+	}
+
+	if (sg_alloc_table(st, num_pages, GFP_KERNEL)) {
+		ret = -ENOMEM;
+		goto cleanup_st;
+	}
+
+	for_each_sg(st->sgl, sg, num_pages, n)
+		sg_set_page(sg, pvec[n], PAGE_SIZE, 0);
+	drm_free_large(pvec);
+
+	obj->pages = st;
+	return 0;
+
+cleanup_st:
+	kfree(st);
+cleanup_pinned:
+	release_pages(pvec, pinned, 0);
+	drm_free_large(pvec);
+	return ret;
+}
+
+static void
+i915_gem_userptr_put_pages(struct drm_i915_gem_object *obj)
+{
+	struct scatterlist *sg;
+	int i;
+
+	if (obj->madv != I915_MADV_WILLNEED)
+		obj->dirty = 0;
+
+	for_each_sg(obj->pages->sgl, sg, obj->pages->nents, i) {
+		struct page *page = sg_page(sg);
+
+		if (obj->dirty)
+			set_page_dirty(page);
+
+		mark_page_accessed(page);
+		page_cache_release(page);
+	}
+	obj->dirty = 0;
+
+	sg_free_table(obj->pages);
+	kfree(obj->pages);
+}
+
+static void
+i915_gem_userptr_release(struct drm_i915_gem_object *obj)
+{
+	struct i915_gem_userptr_object *vmap = to_userptr_object(obj);
+
+	i915_gem_userptr_release__mmu_notifier(vmap);
+}
+
+static const struct drm_i915_gem_object_ops i915_gem_userptr_ops = {
+	.get_pages = i915_gem_userptr_get_pages,
+	.put_pages = i915_gem_userptr_put_pages,
+	.release = i915_gem_userptr_release,
+};
+
+/**
+ * Creates a new mm object that wraps some user memory.
+ */
+int
+i915_gem_userptr_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
+{
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	struct drm_i915_gem_userptr *args = data;
+	struct i915_gem_userptr_object *obj;
+	loff_t first_data_page, last_data_page;
+	int num_pages;
+	int ret;
+	u32 handle;
+
+	if (args->flags & ~(I915_USERPTR_READ_ONLY | I915_USERPTR_UNSYNCHRONIZED))
+		return -EINVAL;
+
+	first_data_page = args->user_ptr / PAGE_SIZE;
+	last_data_page = (args->user_ptr + args->user_size - 1) / PAGE_SIZE;
+	num_pages = last_data_page - first_data_page + 1;
+	if (num_pages * PAGE_SIZE > dev_priv->gtt.total)
+		return -E2BIG;
+
+	/* Allocate the new object */
+	obj = i915_gem_object_alloc(dev);
+	if (obj == NULL)
+		return -ENOMEM;
+
+	if (drm_gem_private_object_init(dev, &obj->gem.base,
+					num_pages * PAGE_SIZE)) {
+		i915_gem_object_free(&obj->gem);
+		return -ENOMEM;
+	}
+
+	i915_gem_object_init(&obj->gem, &i915_gem_userptr_ops);
+	obj->gem.cache_level = I915_CACHE_LLC_MLC;
+
+	obj->gem.gtt_offset = offset_in_page(args->user_ptr);
+	obj->user_ptr = args->user_ptr;
+	obj->user_size = args->user_size;
+	obj->read_only = args->flags & I915_USERPTR_READ_ONLY;
+
+	/* And keep a pointer to the current->mm for resolving the user pages
+	 * at binding. This means that we need to hook into the mmu_notifier
+	 * in order to detect if the mmu is destroyed.
+	 */
+	obj->mm = current->mm;
+	ret = i915_gem_userptr_init__mmu_notifier(obj, args->flags);
+	if (ret)
+		return ret;
+
+	ret = drm_gem_handle_create(file, &obj->gem.base, &handle);
+	/* drop reference from allocate - handle holds it now */
+	drm_gem_object_unreference(&obj->gem.base);
+	if (ret)
+		return ret;
+
+	args->handle = handle;
+	return 0;
+}
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index 07d5941..20e39be 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -198,6 +198,7 @@ typedef struct _drm_i915_sarea {
 #define DRM_I915_GEM_SET_CACHING	0x2f
 #define DRM_I915_GEM_GET_CACHING	0x30
 #define DRM_I915_REG_READ		0x31
+#define DRM_I915_GEM_USERPTR		0x32
 
 #define DRM_IOCTL_I915_INIT		DRM_IOW( DRM_COMMAND_BASE + DRM_I915_INIT, drm_i915_init_t)
 #define DRM_IOCTL_I915_FLUSH		DRM_IO ( DRM_COMMAND_BASE + DRM_I915_FLUSH)
@@ -247,6 +248,7 @@ typedef struct _drm_i915_sarea {
 #define DRM_IOCTL_I915_GEM_CONTEXT_CREATE	DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_GEM_CONTEXT_CREATE, struct drm_i915_gem_context_create)
 #define DRM_IOCTL_I915_GEM_CONTEXT_DESTROY	DRM_IOW (DRM_COMMAND_BASE + DRM_I915_GEM_CONTEXT_DESTROY, struct drm_i915_gem_context_destroy)
 #define DRM_IOCTL_I915_REG_READ			DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_REG_READ, struct drm_i915_reg_read)
+#define DRM_IOCTL_I915_GEM_USERPTR			DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_GEM_USERPTR, struct drm_i915_gem_userptr)
 
 /* Allow drivers to submit batchbuffers directly to hardware, relying
  * on the security mechanisms provided by hardware.
@@ -980,4 +982,18 @@ struct drm_i915_reg_read {
 	__u64 offset;
 	__u64 val; /* Return value */
 };
+
+struct drm_i915_gem_userptr {
+	__u64 user_ptr;
+	__u32 user_size;
+	__u32 flags;
+#define I915_USERPTR_READ_ONLY 0x1
+#define I915_USERPTR_UNSYNCHRONIZED 0x80000000
+	/**
+	 * Returned handle for the object.
+	 *
+	 * Object handles are nonzero.
+	 */
+	__u32 handle;
+};
 #endif /* _UAPI_I915_DRM_H_ */
-- 
1.7.10.4

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 34+ messages in thread

end of thread, other threads:[~2014-05-16 16:39 UTC | newest]

Thread overview: 34+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2014-01-28 10:34 New API for creating bo from user pages Chris Wilson
2014-01-28 10:34 ` [PATCH 1/3] lib: Export interval_tree Chris Wilson
2014-01-28 10:34 ` [PATCH 2/3] drm/i915: Do not call retire_requests from wait_for_rendering Chris Wilson
2014-01-28 10:34 ` [PATCH 3/3] drm/i915: Introduce mapping of user pages into video memory (userptr) ioctl Chris Wilson
2014-01-28 13:16   ` [PATCH] " Chris Wilson
2014-01-29 20:25     ` Daniel Vetter
2014-01-29 21:53       ` Chris Wilson
2014-01-29 21:58         ` Daniel Vetter
2014-01-30 11:06           ` Chris Wilson
2014-02-03 15:13             ` Tvrtko Ursulin
2014-01-29 20:34     ` Daniel Vetter
2014-01-29 21:52       ` Chris Wilson
2014-02-03 15:28       ` Tvrtko Ursulin
2014-02-04 10:56         ` Daniel Vetter
2014-02-05 15:55           ` Jesse Barnes
  -- strict thread matches above, loose matches on Subject: below --
2014-05-16 13:22 Chris Wilson
2014-05-16 15:34 ` Volkin, Bradley D
2014-05-16 16:39   ` Daniel Vetter
2014-01-21 15:07 [PATCH 3/3] " Chris Wilson
2014-01-22  9:46 ` [PATCH] " Chris Wilson
2014-01-24  9:00   ` Chris Wilson
2014-01-27 17:56     ` Volkin, Bradley D
2014-01-27 18:09       ` Chris Wilson
2014-01-15 11:10 Chris Wilson
2013-08-14 10:59 Chris Wilson
2013-02-12 14:17 Chris Wilson
2013-02-13 22:24 ` Reese, Armin C
2013-02-13 23:20   ` Chris Wilson
2013-04-08 17:18 ` Daniel Vetter
2013-04-08 17:40   ` Chris Wilson
2013-04-08 19:24     ` Daniel Vetter
2013-04-08 21:48       ` Chris Wilson
2013-04-15 18:37         ` Daniel Vetter
2013-04-08 22:06       ` Eric Anholt
2013-06-24 21:36       ` Jesse Barnes

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.