All of lore.kernel.org
 help / color / mirror / Atom feed
From: Chris Wilson <chris@chris-wilson.co.uk>
To: intel-gfx@lists.freedesktop.org
Subject: [PATCH 01/41] drm/i915: Move user fault tracking to a separate list
Date: Thu, 20 Oct 2016 16:03:43 +0100	[thread overview]
Message-ID: <20161020150423.4560-2-chris@chris-wilson.co.uk> (raw)
In-Reply-To: <20161020150423.4560-1-chris@chris-wilson.co.uk>

We want to decouple RPM and struct_mutex, but currently RPM has to walk
the list of bound objects and remove userspace mmapping before we
suspend (otherwise userspace may continue to access the GTT whilst it is
powered down). This currently requires the struct_mutex to walk the
bound_list, but if we move that to a separate list and lock we can take
the first step towards removing the struct_mutex.

v2: Split runtime suspend unmapping vs regular unmapping, to make the
locking (and barriers) clearer. Add the object to the userfault_list
prior to inserting the first PTE, the race between add/revoke depends
upon struct_mutex for regular unmappings and rpm for runtime-suspend.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch> #v1
---
 drivers/gpu/drm/i915/i915_debugfs.c   |  2 +-
 drivers/gpu/drm/i915/i915_drv.h       | 20 +++++++++++------
 drivers/gpu/drm/i915/i915_gem.c       | 42 +++++++++++++++++++++++++++--------
 drivers/gpu/drm/i915/i915_gem_evict.c |  2 +-
 drivers/gpu/drm/i915/i915_gem_fence.c |  2 +-
 5 files changed, 49 insertions(+), 19 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index 20638d22bbad..1b402eebd3d9 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -107,7 +107,7 @@ static char get_tiling_flag(struct drm_i915_gem_object *obj)
 
 static char get_global_flag(struct drm_i915_gem_object *obj)
 {
-	return obj->fault_mappable ? 'g' : ' ';
+	return !list_empty(&obj->userfault_link) ? 'g' : ' ';
 }
 
 static char get_pin_mapped_flag(struct drm_i915_gem_object *obj)
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 5b2b7f3c6e76..489cf5201ee0 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1361,6 +1361,14 @@ struct i915_gem_mm {
 	 */
 	struct list_head unbound_list;
 
+	/** Protects access to the userfault_list */
+	spinlock_t userfault_lock;
+
+	/** List of all objects in gtt_space, currently mmaped by userspace.
+	 * All objects within this list must also be on bound_list.
+	 */
+	struct list_head userfault_list;
+
 	/** Usable portion of the GTT for GEM */
 	unsigned long stolen_base; /* limited to low memory (32-bit) */
 
@@ -2205,6 +2213,11 @@ struct drm_i915_gem_object {
 	struct drm_mm_node *stolen;
 	struct list_head global_list;
 
+	/**
+	 * Whether the object is currently in the GGTT mmap.
+	 */
+	struct list_head userfault_link;
+
 	/** Used in execbuf to temporarily hold a ref */
 	struct list_head obj_exec_link;
 
@@ -2232,13 +2245,6 @@ struct drm_i915_gem_object {
 	 */
 	unsigned int madv:2;
 
-	/**
-	 * Whether the current gtt mapping needs to be mappable (and isn't just
-	 * mappable by accident). Track pin and fault separate for a more
-	 * accurate mappable working set.
-	 */
-	unsigned int fault_mappable:1;
-
 	/*
 	 * Is the object to be mapped as read-only to the GPU
 	 * Only honoured if hardware has relevant pte bit
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 8ed8e24025ac..b7a6bcd15bc9 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -1839,16 +1839,19 @@ int i915_gem_fault(struct vm_area_struct *area, struct vm_fault *vmf)
 	if (ret)
 		goto err_unpin;
 
+	/* Mark as being mmapped into userspace for later revocation */
+	spin_lock(&dev_priv->mm.userfault_lock);
+	if (list_empty(&obj->userfault_link))
+		list_add(&obj->userfault_link, &dev_priv->mm.userfault_list);
+	spin_unlock(&dev_priv->mm.userfault_lock);
+
 	/* Finally, remap it using the new GTT offset */
 	ret = remap_io_mapping(area,
 			       area->vm_start + (vma->ggtt_view.params.partial.offset << PAGE_SHIFT),
 			       (ggtt->mappable_base + vma->node.start) >> PAGE_SHIFT,
 			       min_t(u64, vma->size, area->vm_end - area->vm_start),
 			       &ggtt->mappable);
-	if (ret)
-		goto err_unpin;
 
-	obj->fault_mappable = true;
 err_unpin:
 	__i915_vma_unpin(vma);
 err_unlock:
@@ -1916,13 +1919,22 @@ int i915_gem_fault(struct vm_area_struct *area, struct vm_fault *vmf)
 void
 i915_gem_release_mmap(struct drm_i915_gem_object *obj)
 {
+	struct drm_i915_private *i915 = to_i915(obj->base.dev);
+	bool zap = false;
+
 	/* Serialisation between user GTT access and our code depends upon
 	 * revoking the CPU's PTE whilst the mutex is held. The next user
 	 * pagefault then has to wait until we release the mutex.
 	 */
-	lockdep_assert_held(&obj->base.dev->struct_mutex);
+	lockdep_assert_held(&i915->drm.struct_mutex);
 
-	if (!obj->fault_mappable)
+	spin_lock(&i915->mm.userfault_lock);
+	if (!list_empty(&obj->userfault_link)) {
+		list_del_init(&obj->userfault_link);
+		zap = true;
+	}
+	spin_unlock(&i915->mm.userfault_lock);
+	if (!zap)
 		return;
 
 	drm_vma_node_unmap(&obj->base.vma_node,
@@ -1936,8 +1948,6 @@ i915_gem_release_mmap(struct drm_i915_gem_object *obj)
 	 * memory writes before touching registers / GSM.
 	 */
 	wmb();
-
-	obj->fault_mappable = false;
 }
 
 void
@@ -1945,8 +1955,19 @@ i915_gem_release_all_mmaps(struct drm_i915_private *dev_priv)
 {
 	struct drm_i915_gem_object *obj;
 
-	list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list)
-		i915_gem_release_mmap(obj);
+	spin_lock(&dev_priv->mm.userfault_lock);
+	while ((obj = list_first_entry_or_null(&dev_priv->mm.userfault_list,
+					       struct drm_i915_gem_object,
+					       userfault_link))) {
+		list_del_init(&obj->userfault_link);
+		spin_unlock(&dev_priv->mm.userfault_lock);
+
+		drm_vma_node_unmap(&obj->base.vma_node,
+				   obj->base.dev->anon_inode->i_mapping);
+
+		spin_lock(&dev_priv->mm.userfault_lock);
+	}
+	spin_unlock(&dev_priv->mm.userfault_lock);
 }
 
 /**
@@ -4108,6 +4129,7 @@ void i915_gem_object_init(struct drm_i915_gem_object *obj,
 	int i;
 
 	INIT_LIST_HEAD(&obj->global_list);
+	INIT_LIST_HEAD(&obj->userfault_link);
 	for (i = 0; i < I915_NUM_ENGINES; i++)
 		init_request_active(&obj->last_read[i],
 				    i915_gem_object_retire__read);
@@ -4521,6 +4543,7 @@ int i915_gem_init(struct drm_device *dev)
 	int ret;
 
 	mutex_lock(&dev->struct_mutex);
+	spin_lock_init(&dev_priv->mm.userfault_lock);
 
 	if (!i915.enable_execlists) {
 		dev_priv->gt.resume = intel_legacy_submission_resume;
@@ -4640,6 +4663,7 @@ i915_gem_load_init(struct drm_device *dev)
 	INIT_LIST_HEAD(&dev_priv->mm.unbound_list);
 	INIT_LIST_HEAD(&dev_priv->mm.bound_list);
 	INIT_LIST_HEAD(&dev_priv->mm.fence_list);
+	INIT_LIST_HEAD(&dev_priv->mm.userfault_list);
 	INIT_DELAYED_WORK(&dev_priv->gt.retire_work,
 			  i915_gem_retire_work_handler);
 	INIT_DELAYED_WORK(&dev_priv->gt.idle_work,
diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c b/drivers/gpu/drm/i915/i915_gem_evict.c
index b5e9e669f50f..a934f372c5ce 100644
--- a/drivers/gpu/drm/i915/i915_gem_evict.c
+++ b/drivers/gpu/drm/i915/i915_gem_evict.c
@@ -56,7 +56,7 @@ mark_free(struct i915_vma *vma, unsigned int flags, struct list_head *unwind)
 	if (WARN_ON(!list_empty(&vma->exec_list)))
 		return false;
 
-	if (flags & PIN_NONFAULT && vma->obj->fault_mappable)
+	if (flags & PIN_NONFAULT && !list_empty(&vma->obj->userfault_link))
 		return false;
 
 	list_add(&vma->exec_list, unwind);
diff --git a/drivers/gpu/drm/i915/i915_gem_fence.c b/drivers/gpu/drm/i915/i915_gem_fence.c
index a6daf2deab74..67013179b8ed 100644
--- a/drivers/gpu/drm/i915/i915_gem_fence.c
+++ b/drivers/gpu/drm/i915/i915_gem_fence.c
@@ -391,7 +391,7 @@ void i915_gem_restore_fences(struct drm_device *dev)
 		 */
 		if (vma && !i915_gem_object_is_tiled(vma->obj)) {
 			GEM_BUG_ON(!reg->dirty);
-			GEM_BUG_ON(vma->obj->fault_mappable);
+			GEM_BUG_ON(!list_empty(&vma->obj->userfault_link));
 
 			list_move(&reg->link, &dev_priv->mm.fence_list);
 			vma->fence = NULL;
-- 
2.9.3

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

  reply	other threads:[~2016-10-20 15:04 UTC|newest]

Thread overview: 57+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2016-10-20 15:03 Multiple timelines, multiple times Chris Wilson
2016-10-20 15:03 ` Chris Wilson [this message]
2016-10-20 15:03 ` [PATCH 02/41] drm/i915: Use RPM as the barrier for controlling user mmap access Chris Wilson
2016-10-20 15:03 ` [PATCH 03/41] drm/i915: Remove superfluous locking around userfault_list Chris Wilson
2016-10-20 15:03 ` [PATCH 04/41] drm/i915: Remove RPM sequence checking Chris Wilson
2016-10-21 10:19   ` Imre Deak
2016-10-20 15:03 ` [PATCH 05/41] drm/i915: Move fence cancellation to runtime suspend Chris Wilson
2016-10-21 12:48   ` Imre Deak
2016-10-21 13:52     ` Chris Wilson
2016-10-21 14:05     ` [PATCH v2] " Chris Wilson
2016-10-24  9:55       ` Imre Deak
2016-10-20 15:03 ` [PATCH 06/41] drm/i915: Support asynchronous waits on struct fence from i915_gem_request Chris Wilson
2016-10-20 15:03 ` [PATCH 07/41] drm/i915: Allow i915_sw_fence_await_sw_fence() to allocate Chris Wilson
2016-10-20 15:03 ` [PATCH 08/41] drm/i915: Remove superfluous wait_for_error() from throttle-ioctl Chris Wilson
2016-10-20 15:41   ` Joonas Lahtinen
2016-10-20 15:03 ` [PATCH 09/41] drm/i915: Rearrange i915_wait_request() accounting with callers Chris Wilson
2016-10-20 15:03 ` [PATCH 10/41] drm/i915: Remove unused i915_gem_active_wait() in favour of _unlocked() Chris Wilson
2016-10-20 15:03 ` [PATCH 11/41] drm/i915: Defer active reference until required Chris Wilson
2016-10-20 15:03 ` [PATCH 12/41] drm/i915: Introduce an internal allocator for disposable private objects Chris Wilson
2016-10-20 16:22   ` Tvrtko Ursulin
2016-10-20 20:36     ` Chris Wilson
2016-10-21  7:21       ` Tvrtko Ursulin
2016-10-21  7:50         ` Chris Wilson
2016-10-21  7:53           ` Tvrtko Ursulin
2016-10-21  7:56   ` [PATCH v4] " Chris Wilson
2016-10-21  8:07     ` Tvrtko Ursulin
2016-10-20 15:03 ` [PATCH 13/41] drm/i915: Reuse the active golden render state batch Chris Wilson
2016-10-20 15:03 ` [PATCH 14/41] drm/i915: Markup GEM API with lockdep asserts Chris Wilson
2016-10-20 15:03 ` [PATCH 15/41] drm/i915: Use a radixtree for random access to the object's backing storage Chris Wilson
2016-10-20 15:55   ` Tvrtko Ursulin
2016-10-20 15:03 ` [PATCH 16/41] drm/i915: Use radixtree to jump start intel_partial_pages() Chris Wilson
2016-10-20 15:03 ` [PATCH 17/41] drm/i915: Refactor object page API Chris Wilson
2016-10-20 15:04 ` [PATCH 18/41] drm/i915: Pass around sg_table to get_pages/put_pages backend Chris Wilson
2016-10-20 15:04 ` [PATCH 19/41] drm/i915: Move object backing storage manipulation to its own locking Chris Wilson
2016-10-20 15:04 ` [PATCH 20/41] drm/i915/dmabuf: Acquire the backing storage outside of struct_mutex Chris Wilson
2016-10-20 15:04 ` [PATCH 21/41] drm/i915: Implement pread without struct-mutex Chris Wilson
2016-10-20 15:04 ` [PATCH 22/41] drm/i915: Implement pwrite " Chris Wilson
2016-10-20 15:04 ` [PATCH 23/41] drm/i915: Acquire the backing storage outside of struct_mutex in set-domain Chris Wilson
2016-10-20 15:04 ` [PATCH 24/41] drm/i915: Move object release to a freelist + worker Chris Wilson
2016-10-20 15:04 ` [PATCH 25/41] drm/i915: Use lockless object free Chris Wilson
2016-10-20 15:04 ` [PATCH 26/41] drm/i915: Move GEM activity tracking into a common struct reservation_object Chris Wilson
2016-10-20 15:04 ` [PATCH 27/41] drm/i915: Restore nonblocking awaits for modesetting Chris Wilson
2016-10-20 15:04 ` [PATCH 28/41] drm/i915: Combine seqno + tracking into a global timeline struct Chris Wilson
2016-10-20 15:04 ` [PATCH 29/41] drm/i915: Queue the idling context switch after all other timelines Chris Wilson
2016-10-20 15:04 ` [PATCH 30/41] drm/i915: Wait first for submission, before waiting for request completion Chris Wilson
2016-10-20 15:04 ` [PATCH 31/41] drm/i915: Introduce a global_seqno for each request Chris Wilson
2016-10-20 15:04 ` [PATCH 32/41] drm/i915: Rename ->emit_request to ->emit_breadcrumb Chris Wilson
2016-10-20 15:04 ` [PATCH 33/41] drm/i915: Record space required for breadcrumb emission Chris Wilson
2016-10-20 15:04 ` [PATCH 34/41] drm/i915: Defer " Chris Wilson
2016-10-20 15:04 ` [PATCH 35/41] drm/i915: Move the global sync optimisation to the timeline Chris Wilson
2016-10-20 15:04 ` [PATCH 36/41] drm/i915: Create a unique name for the context Chris Wilson
2016-10-20 15:04 ` [PATCH 37/41] drm/i915: Reserve space in the global seqno during request allocation Chris Wilson
2016-10-20 15:04 ` [PATCH 38/41] drm/i915: Defer setting of global seqno on request to submission Chris Wilson
2016-10-20 15:04 ` [PATCH 39/41] drm/i915: Enable multiple timelines Chris Wilson
2016-10-20 15:04 ` [PATCH 40/41] drm/i915: Enable userspace to opt-out of implicit fencing Chris Wilson
2016-10-20 15:04 ` [PATCH 41/41] drm/i915: Support explicit fencing for execbuf Chris Wilson
  -- strict thread matches above, loose matches on Subject: below --
2016-10-14 12:17 Fencing, fencing, fencing Chris Wilson
2016-10-14 12:17 ` [PATCH 01/41] drm/i915: Move user fault tracking to a separate list Chris Wilson

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20161020150423.4560-2-chris@chris-wilson.co.uk \
    --to=chris@chris-wilson.co.uk \
    --cc=intel-gfx@lists.freedesktop.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.