All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v2 1/5] drm/i915: Move user fault tracking to a separate list
@ 2016-10-14 11:05 Chris Wilson
  2016-10-14 11:05 ` [PATCH v2 2/5] drm/i915: Use RPM as the barrier for controlling user mmap access Chris Wilson
                   ` (4 more replies)
  0 siblings, 5 replies; 7+ messages in thread
From: Chris Wilson @ 2016-10-14 11:05 UTC (permalink / raw)
  To: intel-gfx

We want to decouple RPM and struct_mutex, but currently RPM has to walk
the list of bound objects and remove userspace mmapping before we
suspend (otherwise userspace may continue to access the GTT whilst it is
powered down). This currently requires the struct_mutex to walk the
bound_list, but if we move that to a separate list and lock we can take
the first step towards removing the struct_mutex.

v2: Split runtime suspend unmapping vs regular unmapping, to make the
locking (and barriers) clearer. Add the object to the userfault_list
prior to inserting the first PTE, the race between add/revoke depends
upon struct_mutex for regular unmappings and rpm for runtime-suspend.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch> #v1
---
 drivers/gpu/drm/i915/i915_debugfs.c   |  2 +-
 drivers/gpu/drm/i915/i915_drv.h       | 20 +++++++++++------
 drivers/gpu/drm/i915/i915_gem.c       | 42 +++++++++++++++++++++++++++--------
 drivers/gpu/drm/i915/i915_gem_evict.c |  2 +-
 drivers/gpu/drm/i915/i915_gem_fence.c |  2 +-
 5 files changed, 49 insertions(+), 19 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index b20c1ccbd427..18f840def1b2 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -107,7 +107,7 @@ static char get_tiling_flag(struct drm_i915_gem_object *obj)
 
 static char get_global_flag(struct drm_i915_gem_object *obj)
 {
-	return obj->fault_mappable ? 'g' : ' ';
+	return !list_empty(&obj->userfault_link) ? 'g' : ' ';
 }
 
 static char get_pin_mapped_flag(struct drm_i915_gem_object *obj)
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 9e830b58b06b..593bf3c2063d 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1360,6 +1360,14 @@ struct i915_gem_mm {
 	 */
 	struct list_head unbound_list;
 
+	/** Protects access to the userfault_list */
+	spinlock_t userfault_lock;
+
+	/** List of all objects in gtt_space, currently mmaped by userspace.
+	 * All objects within this list must also be on bound_list.
+	 */
+	struct list_head userfault_list;
+
 	/** Usable portion of the GTT for GEM */
 	unsigned long stolen_base; /* limited to low memory (32-bit) */
 
@@ -2208,6 +2216,11 @@ struct drm_i915_gem_object {
 	struct drm_mm_node *stolen;
 	struct list_head global_list;
 
+	/**
+	 * Whether the object is currently in the GGTT mmap.
+	 */
+	struct list_head userfault_link;
+
 	/** Used in execbuf to temporarily hold a ref */
 	struct list_head obj_exec_link;
 
@@ -2235,13 +2248,6 @@ struct drm_i915_gem_object {
 	 */
 	unsigned int madv:2;
 
-	/**
-	 * Whether the current gtt mapping needs to be mappable (and isn't just
-	 * mappable by accident). Track pin and fault separate for a more
-	 * accurate mappable working set.
-	 */
-	unsigned int fault_mappable:1;
-
 	/*
 	 * Is the object to be mapped as read-only to the GPU
 	 * Only honoured if hardware has relevant pte bit
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index fb460cc2857c..b568694ec370 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -1841,16 +1841,19 @@ int i915_gem_fault(struct vm_area_struct *area, struct vm_fault *vmf)
 	if (ret)
 		goto err_unpin;
 
+	/* Mark as being mmapped into userspace for later revocation */
+	spin_lock(&dev_priv->mm.userfault_lock);
+	if (list_empty(&obj->userfault_link))
+		list_add(&obj->userfault_link, &dev_priv->mm.userfault_list);
+	spin_unlock(&dev_priv->mm.userfault_lock);
+
 	/* Finally, remap it using the new GTT offset */
 	ret = remap_io_mapping(area,
 			       area->vm_start + (vma->ggtt_view.params.partial.offset << PAGE_SHIFT),
 			       (ggtt->mappable_base + vma->node.start) >> PAGE_SHIFT,
 			       min_t(u64, vma->size, area->vm_end - area->vm_start),
 			       &ggtt->mappable);
-	if (ret)
-		goto err_unpin;
 
-	obj->fault_mappable = true;
 err_unpin:
 	__i915_vma_unpin(vma);
 err_unlock:
@@ -1918,13 +1921,22 @@ err:
 void
 i915_gem_release_mmap(struct drm_i915_gem_object *obj)
 {
+	struct drm_i915_private *i915 = to_i915(obj->base.dev);
+	bool zap = false;
+
 	/* Serialisation between user GTT access and our code depends upon
 	 * revoking the CPU's PTE whilst the mutex is held. The next user
 	 * pagefault then has to wait until we release the mutex.
 	 */
-	lockdep_assert_held(&obj->base.dev->struct_mutex);
+	lockdep_assert_held(&i915->drm.struct_mutex);
 
-	if (!obj->fault_mappable)
+	spin_lock(&i915->mm.userfault_lock);
+	if (!list_empty(&obj->userfault_link)) {
+		list_del_init(&obj->userfault_link);
+		zap = true;
+	}
+	spin_unlock(&i915->mm.userfault_lock);
+	if (!zap)
 		return;
 
 	drm_vma_node_unmap(&obj->base.vma_node,
@@ -1938,8 +1950,6 @@ i915_gem_release_mmap(struct drm_i915_gem_object *obj)
 	 * memory writes before touching registers / GSM.
 	 */
 	wmb();
-
-	obj->fault_mappable = false;
 }
 
 void
@@ -1947,8 +1957,19 @@ i915_gem_release_all_mmaps(struct drm_i915_private *dev_priv)
 {
 	struct drm_i915_gem_object *obj;
 
-	list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list)
-		i915_gem_release_mmap(obj);
+	spin_lock(&dev_priv->mm.userfault_lock);
+	while ((obj = list_first_entry_or_null(&dev_priv->mm.userfault_list,
+					       struct drm_i915_gem_object,
+					       userfault_link))) {
+		list_del_init(&obj->userfault_link);
+		spin_unlock(&dev_priv->mm.userfault_lock);
+
+		drm_vma_node_unmap(&obj->base.vma_node,
+				   obj->base.dev->anon_inode->i_mapping);
+
+		spin_lock(&dev_priv->mm.userfault_lock);
+	}
+	spin_unlock(&dev_priv->mm.userfault_lock);
 }
 
 /**
@@ -4109,6 +4130,7 @@ void i915_gem_object_init(struct drm_i915_gem_object *obj,
 	int i;
 
 	INIT_LIST_HEAD(&obj->global_list);
+	INIT_LIST_HEAD(&obj->userfault_link);
 	for (i = 0; i < I915_NUM_ENGINES; i++)
 		init_request_active(&obj->last_read[i],
 				    i915_gem_object_retire__read);
@@ -4509,6 +4531,7 @@ int i915_gem_init(struct drm_device *dev)
 	int ret;
 
 	mutex_lock(&dev->struct_mutex);
+	spin_lock_init(&dev_priv->mm.userfault_lock);
 
 	if (!i915.enable_execlists) {
 		dev_priv->gt.resume = intel_legacy_submission_resume;
@@ -4628,6 +4651,7 @@ i915_gem_load_init(struct drm_device *dev)
 	INIT_LIST_HEAD(&dev_priv->mm.unbound_list);
 	INIT_LIST_HEAD(&dev_priv->mm.bound_list);
 	INIT_LIST_HEAD(&dev_priv->mm.fence_list);
+	INIT_LIST_HEAD(&dev_priv->mm.userfault_list);
 	INIT_DELAYED_WORK(&dev_priv->gt.retire_work,
 			  i915_gem_retire_work_handler);
 	INIT_DELAYED_WORK(&dev_priv->gt.idle_work,
diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c b/drivers/gpu/drm/i915/i915_gem_evict.c
index b5e9e669f50f..a934f372c5ce 100644
--- a/drivers/gpu/drm/i915/i915_gem_evict.c
+++ b/drivers/gpu/drm/i915/i915_gem_evict.c
@@ -56,7 +56,7 @@ mark_free(struct i915_vma *vma, unsigned int flags, struct list_head *unwind)
 	if (WARN_ON(!list_empty(&vma->exec_list)))
 		return false;
 
-	if (flags & PIN_NONFAULT && vma->obj->fault_mappable)
+	if (flags & PIN_NONFAULT && !list_empty(&vma->obj->userfault_link))
 		return false;
 
 	list_add(&vma->exec_list, unwind);
diff --git a/drivers/gpu/drm/i915/i915_gem_fence.c b/drivers/gpu/drm/i915/i915_gem_fence.c
index 2c7ba0ee127c..f7081f4b5d22 100644
--- a/drivers/gpu/drm/i915/i915_gem_fence.c
+++ b/drivers/gpu/drm/i915/i915_gem_fence.c
@@ -391,7 +391,7 @@ void i915_gem_restore_fences(struct drm_device *dev)
 		 */
 		if (vma && !i915_gem_object_is_tiled(vma->obj)) {
 			GEM_BUG_ON(!reg->dirty);
-			GEM_BUG_ON(vma->obj->fault_mappable);
+			GEM_BUG_ON(!list_empty(&vma->obj->userfault_link));
 
 			list_move(&reg->link, &dev_priv->mm.fence_list);
 			vma->fence = NULL;
-- 
2.9.3

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [PATCH v2 2/5] drm/i915: Use RPM as the barrier for controlling user mmap access
  2016-10-14 11:05 [PATCH v2 1/5] drm/i915: Move user fault tracking to a separate list Chris Wilson
@ 2016-10-14 11:05 ` Chris Wilson
  2016-10-14 11:05 ` [PATCH v2 3/5] drm/i915: Remove superfluous locking around userfault_list Chris Wilson
                   ` (3 subsequent siblings)
  4 siblings, 0 replies; 7+ messages in thread
From: Chris Wilson @ 2016-10-14 11:05 UTC (permalink / raw)
  To: intel-gfx

We can remove the false coupling between RPM and struct mutex by the
observation that we can use the RPM wakeref as the barrier around user
mmap access. That is as we tear down the user's PTE atomically from
within rpm suspend and then to fault in new PTE requires the rpm
wakeref, means that no user access is possible through those PTE without
RPM being awake. Having made that observation, we can then remove the
presumption of having to take rpm outside of struct_mutex and so allow
fine grained acquisition of a wakeref around hw access rather than
having to remember to acquire the wakeref early on.

v2: Rejig placement of the new intel_runtime_pm_get() to be as tight
as possible around the GTT pread/pwrite.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Imre Deak <imre.deak@intel.com>
Cc: Daniel Vetter <daniel@ffwll.ch>
Cc: Ville Syrjälä <ville.syrjala@linux.intel.com>
Reviewed-by: Daniel Vetter <daniel@ffwll.ch>
---
 drivers/gpu/drm/i915/i915_debugfs.c    | 22 --------------------
 drivers/gpu/drm/i915/i915_drv.c        | 19 -----------------
 drivers/gpu/drm/i915/i915_gem.c        | 37 ++++++++++++++++++----------------
 drivers/gpu/drm/i915/i915_gem_gtt.c    | 17 ++++++++++++----
 drivers/gpu/drm/i915/i915_gem_tiling.c |  4 ----
 5 files changed, 33 insertions(+), 66 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index 18f840def1b2..ab44b71d079d 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -1396,14 +1396,9 @@ static int i915_hangcheck_info(struct seq_file *m, void *unused)
 static int ironlake_drpc_info(struct seq_file *m)
 {
 	struct drm_i915_private *dev_priv = node_to_i915(m->private);
-	struct drm_device *dev = &dev_priv->drm;
 	u32 rgvmodectl, rstdbyctl;
 	u16 crstandvid;
-	int ret;
 
-	ret = mutex_lock_interruptible(&dev->struct_mutex);
-	if (ret)
-		return ret;
 	intel_runtime_pm_get(dev_priv);
 
 	rgvmodectl = I915_READ(MEMMODECTL);
@@ -1411,7 +1406,6 @@ static int ironlake_drpc_info(struct seq_file *m)
 	crstandvid = I915_READ16(CRSTANDVID);
 
 	intel_runtime_pm_put(dev_priv);
-	mutex_unlock(&dev->struct_mutex);
 
 	seq_printf(m, "HD boost: %s\n", yesno(rgvmodectl & MEMMODE_BOOST_EN));
 	seq_printf(m, "Boost freq: %d\n",
@@ -2091,12 +2085,7 @@ static const char *swizzle_string(unsigned swizzle)
 static int i915_swizzle_info(struct seq_file *m, void *data)
 {
 	struct drm_i915_private *dev_priv = node_to_i915(m->private);
-	struct drm_device *dev = &dev_priv->drm;
-	int ret;
 
-	ret = mutex_lock_interruptible(&dev->struct_mutex);
-	if (ret)
-		return ret;
 	intel_runtime_pm_get(dev_priv);
 
 	seq_printf(m, "bit6 swizzle for X-tiling = %s\n",
@@ -2136,7 +2125,6 @@ static int i915_swizzle_info(struct seq_file *m, void *data)
 		seq_puts(m, "L-shaped memory detected\n");
 
 	intel_runtime_pm_put(dev_priv);
-	mutex_unlock(&dev->struct_mutex);
 
 	return 0;
 }
@@ -4799,13 +4787,9 @@ i915_wedged_set(void *data, u64 val)
 	if (i915_reset_in_progress(&dev_priv->gpu_error))
 		return -EAGAIN;
 
-	intel_runtime_pm_get(dev_priv);
-
 	i915_handle_error(dev_priv, val,
 			  "Manually setting wedged to %llu", val);
 
-	intel_runtime_pm_put(dev_priv);
-
 	return 0;
 }
 
@@ -5040,22 +5024,16 @@ static int
 i915_cache_sharing_get(void *data, u64 *val)
 {
 	struct drm_i915_private *dev_priv = data;
-	struct drm_device *dev = &dev_priv->drm;
 	u32 snpcr;
-	int ret;
 
 	if (!(IS_GEN6(dev_priv) || IS_GEN7(dev_priv)))
 		return -ENODEV;
 
-	ret = mutex_lock_interruptible(&dev->struct_mutex);
-	if (ret)
-		return ret;
 	intel_runtime_pm_get(dev_priv);
 
 	snpcr = I915_READ(GEN6_MBCUNIT_SNPCR);
 
 	intel_runtime_pm_put(dev_priv);
-	mutex_unlock(&dev->struct_mutex);
 
 	*val = (snpcr & GEN6_MBC_SNPCR_MASK) >> GEN6_MBC_SNPCR_SHIFT;
 
diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index 8c3d4761dfa0..d08d6617e3b6 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -2290,24 +2290,6 @@ static int intel_runtime_suspend(struct device *kdev)
 
 	DRM_DEBUG_KMS("Suspending device\n");
 
-	/*
-	 * We could deadlock here in case another thread holding struct_mutex
-	 * calls RPM suspend concurrently, since the RPM suspend will wait
-	 * first for this RPM suspend to finish. In this case the concurrent
-	 * RPM resume will be followed by its RPM suspend counterpart. Still
-	 * for consistency return -EAGAIN, which will reschedule this suspend.
-	 */
-	if (!mutex_trylock(&dev->struct_mutex)) {
-		DRM_DEBUG_KMS("device lock contention, deffering suspend\n");
-		/*
-		 * Bump the expiration timestamp, otherwise the suspend won't
-		 * be rescheduled.
-		 */
-		pm_runtime_mark_last_busy(kdev);
-
-		return -EAGAIN;
-	}
-
 	disable_rpm_wakeref_asserts(dev_priv);
 
 	/*
@@ -2315,7 +2297,6 @@ static int intel_runtime_suspend(struct device *kdev)
 	 * an RPM reference.
 	 */
 	i915_gem_release_all_mmaps(dev_priv);
-	mutex_unlock(&dev->struct_mutex);
 
 	intel_guc_suspend(dev);
 
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index b568694ec370..d1ab59fb7a02 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -826,6 +826,7 @@ i915_gem_gtt_pread(struct drm_device *dev,
 	uint64_t offset;
 	int ret;
 
+	intel_runtime_pm_get(to_i915(dev));
 	vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, PIN_MAPPABLE);
 	if (!IS_ERR(vma)) {
 		node.start = i915_ggtt_offset(vma);
@@ -927,6 +928,7 @@ out_unpin:
 		i915_vma_unpin(vma);
 	}
 out:
+	intel_runtime_pm_put(to_i915(dev));
 	return ret;
 }
 
@@ -1061,12 +1063,9 @@ i915_gem_pread_ioctl(struct drm_device *dev, void *data,
 	ret = i915_gem_shmem_pread(dev, obj, args, file);
 
 	/* pread for non shmem backed objects */
-	if (ret == -EFAULT || ret == -ENODEV) {
-		intel_runtime_pm_get(to_i915(dev));
+	if (ret == -EFAULT || ret == -ENODEV)
 		ret = i915_gem_gtt_pread(dev, obj, args->size,
 					args->offset, args->data_ptr);
-		intel_runtime_pm_put(to_i915(dev));
-	}
 
 	i915_gem_object_put(obj);
 	mutex_unlock(&dev->struct_mutex);
@@ -1127,6 +1126,7 @@ i915_gem_gtt_pwrite_fast(struct drm_i915_private *i915,
 	if (i915_gem_object_is_tiled(obj))
 		return -EFAULT;
 
+	intel_runtime_pm_get(i915);
 	vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0,
 				       PIN_MAPPABLE | PIN_NONBLOCK);
 	if (!IS_ERR(vma)) {
@@ -1236,6 +1236,7 @@ out_unpin:
 		i915_vma_unpin(vma);
 	}
 out:
+	intel_runtime_pm_put(i915);
 	return ret;
 }
 
@@ -1468,12 +1469,12 @@ i915_gem_pwrite_ioctl(struct drm_device *dev, void *data,
 	 * perspective, requiring manual detiling by the client.
 	 */
 	if (!i915_gem_object_has_struct_page(obj) ||
-	    cpu_write_needs_clflush(obj)) {
-		ret = i915_gem_gtt_pwrite_fast(dev_priv, obj, args, file);
+	    cpu_write_needs_clflush(obj))
 		/* Note that the gtt paths might fail with non-page-backed user
 		 * pointers (e.g. gtt mappings when moving data between
-		 * textures). Fallback to the shmem path in that case. */
-	}
+		 * textures). Fallback to the shmem path in that case.
+		 */
+		ret = i915_gem_gtt_pwrite_fast(dev_priv, obj, args, file);
 
 	if (ret == -EFAULT || ret == -ENOSPC) {
 		if (obj->phys_handle)
@@ -1842,6 +1843,7 @@ int i915_gem_fault(struct vm_area_struct *area, struct vm_fault *vmf)
 		goto err_unpin;
 
 	/* Mark as being mmapped into userspace for later revocation */
+	assert_rpm_wakelock_held(dev_priv);
 	spin_lock(&dev_priv->mm.userfault_lock);
 	if (list_empty(&obj->userfault_link))
 		list_add(&obj->userfault_link, &dev_priv->mm.userfault_list);
@@ -1927,8 +1929,13 @@ i915_gem_release_mmap(struct drm_i915_gem_object *obj)
 	/* Serialisation between user GTT access and our code depends upon
 	 * revoking the CPU's PTE whilst the mutex is held. The next user
 	 * pagefault then has to wait until we release the mutex.
+	 *
+	 * Note that RPM complicates somewhat by adding an additional
+	 * requirement that operations to the GGTT be made holding the RPM
+	 * wakeref.
 	 */
 	lockdep_assert_held(&i915->drm.struct_mutex);
+	intel_runtime_pm_get(i915);
 
 	spin_lock(&i915->mm.userfault_lock);
 	if (!list_empty(&obj->userfault_link)) {
@@ -1937,7 +1944,7 @@ i915_gem_release_mmap(struct drm_i915_gem_object *obj)
 	}
 	spin_unlock(&i915->mm.userfault_lock);
 	if (!zap)
-		return;
+		goto out;
 
 	drm_vma_node_unmap(&obj->base.vma_node,
 			   obj->base.dev->anon_inode->i_mapping);
@@ -1950,6 +1957,9 @@ i915_gem_release_mmap(struct drm_i915_gem_object *obj)
 	 * memory writes before touching registers / GSM.
 	 */
 	wmb();
+
+out:
+	intel_runtime_pm_put(i915);
 }
 
 void
@@ -3478,7 +3488,6 @@ int i915_gem_get_caching_ioctl(struct drm_device *dev, void *data,
 int i915_gem_set_caching_ioctl(struct drm_device *dev, void *data,
 			       struct drm_file *file)
 {
-	struct drm_i915_private *dev_priv = to_i915(dev);
 	struct drm_i915_gem_caching *args = data;
 	struct drm_i915_gem_object *obj;
 	enum i915_cache_level level;
@@ -3507,11 +3516,9 @@ int i915_gem_set_caching_ioctl(struct drm_device *dev, void *data,
 		return -EINVAL;
 	}
 
-	intel_runtime_pm_get(dev_priv);
-
 	ret = i915_mutex_lock_interruptible(dev);
 	if (ret)
-		goto rpm_put;
+		return ret;
 
 	obj = i915_gem_object_lookup(file, args->handle);
 	if (!obj) {
@@ -3520,13 +3527,9 @@ int i915_gem_set_caching_ioctl(struct drm_device *dev, void *data,
 	}
 
 	ret = i915_gem_object_set_cache_level(obj, level);
-
 	i915_gem_object_put(obj);
 unlock:
 	mutex_unlock(&dev->struct_mutex);
-rpm_put:
-	intel_runtime_pm_put(dev_priv);
-
 	return ret;
 }
 
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 0a45063b465f..c6e743f96e6d 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -2602,6 +2602,7 @@ static int ggtt_bind_vma(struct i915_vma *vma,
 			 enum i915_cache_level cache_level,
 			 u32 flags)
 {
+	struct drm_i915_private *i915 = to_i915(vma->vm->dev);
 	struct drm_i915_gem_object *obj = vma->obj;
 	u32 pte_flags = 0;
 	int ret;
@@ -2614,8 +2615,10 @@ static int ggtt_bind_vma(struct i915_vma *vma,
 	if (obj->gt_ro)
 		pte_flags |= PTE_READ_ONLY;
 
+	intel_runtime_pm_get(i915);
 	vma->vm->insert_entries(vma->vm, vma->pages, vma->node.start,
 				cache_level, pte_flags);
+	intel_runtime_pm_put(i915);
 
 	/*
 	 * Without aliasing PPGTT there's no difference between
@@ -2631,6 +2634,7 @@ static int aliasing_gtt_bind_vma(struct i915_vma *vma,
 				 enum i915_cache_level cache_level,
 				 u32 flags)
 {
+	struct drm_i915_private *i915 = to_i915(vma->vm->dev);
 	u32 pte_flags;
 	int ret;
 
@@ -2645,14 +2649,15 @@ static int aliasing_gtt_bind_vma(struct i915_vma *vma,
 
 
 	if (flags & I915_VMA_GLOBAL_BIND) {
+		intel_runtime_pm_get(i915);
 		vma->vm->insert_entries(vma->vm,
 					vma->pages, vma->node.start,
 					cache_level, pte_flags);
+		intel_runtime_pm_put(i915);
 	}
 
 	if (flags & I915_VMA_LOCAL_BIND) {
-		struct i915_hw_ppgtt *appgtt =
-			to_i915(vma->vm->dev)->mm.aliasing_ppgtt;
+		struct i915_hw_ppgtt *appgtt = i915->mm.aliasing_ppgtt;
 		appgtt->base.insert_entries(&appgtt->base,
 					    vma->pages, vma->node.start,
 					    cache_level, pte_flags);
@@ -2663,13 +2668,17 @@ static int aliasing_gtt_bind_vma(struct i915_vma *vma,
 
 static void ggtt_unbind_vma(struct i915_vma *vma)
 {
-	struct i915_hw_ppgtt *appgtt = to_i915(vma->vm->dev)->mm.aliasing_ppgtt;
+	struct drm_i915_private *i915 = to_i915(vma->vm->dev);
+	struct i915_hw_ppgtt *appgtt = i915->mm.aliasing_ppgtt;
 	const u64 size = min(vma->size, vma->node.size);
 
-	if (vma->flags & I915_VMA_GLOBAL_BIND)
+	if (vma->flags & I915_VMA_GLOBAL_BIND) {
+		intel_runtime_pm_get(i915);
 		vma->vm->clear_range(vma->vm,
 				     vma->node.start, size,
 				     true);
+		intel_runtime_pm_put(i915);
+	}
 
 	if (vma->flags & I915_VMA_LOCAL_BIND && appgtt)
 		appgtt->base.clear_range(&appgtt->base,
diff --git a/drivers/gpu/drm/i915/i915_gem_tiling.c b/drivers/gpu/drm/i915/i915_gem_tiling.c
index a14b1e3d4c78..08f796a4f5f6 100644
--- a/drivers/gpu/drm/i915/i915_gem_tiling.c
+++ b/drivers/gpu/drm/i915/i915_gem_tiling.c
@@ -204,8 +204,6 @@ i915_gem_set_tiling(struct drm_device *dev, void *data,
 		return -EINVAL;
 	}
 
-	intel_runtime_pm_get(dev_priv);
-
 	mutex_lock(&dev->struct_mutex);
 	if (obj->pin_display || obj->framebuffer_references) {
 		err = -EBUSY;
@@ -301,8 +299,6 @@ err:
 	i915_gem_object_put(obj);
 	mutex_unlock(&dev->struct_mutex);
 
-	intel_runtime_pm_put(dev_priv);
-
 	return err;
 }
 
-- 
2.9.3

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [PATCH v2 3/5] drm/i915: Remove superfluous locking around userfault_list
  2016-10-14 11:05 [PATCH v2 1/5] drm/i915: Move user fault tracking to a separate list Chris Wilson
  2016-10-14 11:05 ` [PATCH v2 2/5] drm/i915: Use RPM as the barrier for controlling user mmap access Chris Wilson
@ 2016-10-14 11:05 ` Chris Wilson
  2016-10-17  7:44   ` Joonas Lahtinen
  2016-10-14 11:05 ` [PATCH v2 4/5] drm/i915: Remove RPM sequence checking Chris Wilson
                   ` (2 subsequent siblings)
  4 siblings, 1 reply; 7+ messages in thread
From: Chris Wilson @ 2016-10-14 11:05 UTC (permalink / raw)
  To: intel-gfx

Now that we have reduced the access to the list to either (a) under the
struct_mutex whilst holding the RPM wakeref (so that concurrent writers to
the list are serialised by struct_mutex) and (b) under the atomic
runtime suspend (which cannot run concurrently with any other accessor due
to the atomic nature of the runtime suspend) we can remove the extra
locking around the list itself.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_drv.h |  3 ---
 drivers/gpu/drm/i915/i915_gem.c | 33 ++++++++++++---------------------
 2 files changed, 12 insertions(+), 24 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 593bf3c2063d..b3c00cc6ab32 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1360,9 +1360,6 @@ struct i915_gem_mm {
 	 */
 	struct list_head unbound_list;
 
-	/** Protects access to the userfault_list */
-	spinlock_t userfault_lock;
-
 	/** List of all objects in gtt_space, currently mmaped by userspace.
 	 * All objects within this list must also be on bound_list.
 	 */
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index d1ab59fb7a02..8819fafbcc7a 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -1844,10 +1844,8 @@ int i915_gem_fault(struct vm_area_struct *area, struct vm_fault *vmf)
 
 	/* Mark as being mmapped into userspace for later revocation */
 	assert_rpm_wakelock_held(dev_priv);
-	spin_lock(&dev_priv->mm.userfault_lock);
 	if (list_empty(&obj->userfault_link))
 		list_add(&obj->userfault_link, &dev_priv->mm.userfault_list);
-	spin_unlock(&dev_priv->mm.userfault_lock);
 
 	/* Finally, remap it using the new GTT offset */
 	ret = remap_io_mapping(area,
@@ -1924,7 +1922,6 @@ void
 i915_gem_release_mmap(struct drm_i915_gem_object *obj)
 {
 	struct drm_i915_private *i915 = to_i915(obj->base.dev);
-	bool zap = false;
 
 	/* Serialisation between user GTT access and our code depends upon
 	 * revoking the CPU's PTE whilst the mutex is held. The next user
@@ -1937,15 +1934,10 @@ i915_gem_release_mmap(struct drm_i915_gem_object *obj)
 	lockdep_assert_held(&i915->drm.struct_mutex);
 	intel_runtime_pm_get(i915);
 
-	spin_lock(&i915->mm.userfault_lock);
-	if (!list_empty(&obj->userfault_link)) {
-		list_del_init(&obj->userfault_link);
-		zap = true;
-	}
-	spin_unlock(&i915->mm.userfault_lock);
-	if (!zap)
+	if (list_empty(&obj->userfault_link))
 		goto out;
 
+	list_del_init(&obj->userfault_link);
 	drm_vma_node_unmap(&obj->base.vma_node,
 			   obj->base.dev->anon_inode->i_mapping);
 
@@ -1965,21 +1957,21 @@ out:
 void
 i915_gem_release_all_mmaps(struct drm_i915_private *dev_priv)
 {
-	struct drm_i915_gem_object *obj;
+	struct drm_i915_gem_object *obj, *on;
 
-	spin_lock(&dev_priv->mm.userfault_lock);
-	while ((obj = list_first_entry_or_null(&dev_priv->mm.userfault_list,
-					       struct drm_i915_gem_object,
-					       userfault_link))) {
-		list_del_init(&obj->userfault_link);
-		spin_unlock(&dev_priv->mm.userfault_lock);
+	/*
+	 * Only called during RPM suspend. All users of the userfault_list
+	 * must be holding an RPM wakeref to ensure that this can not
+	 * run concurrently with themselves (and use the struct_mutex for
+	 * protection between themselves).
+	 */
 
+	list_for_each_entry_safe(obj, on,
+				 &dev_priv->mm.userfault_list, userfault_link) {
+		list_del_init(&obj->userfault_link);
 		drm_vma_node_unmap(&obj->base.vma_node,
 				   obj->base.dev->anon_inode->i_mapping);
-
-		spin_lock(&dev_priv->mm.userfault_lock);
 	}
-	spin_unlock(&dev_priv->mm.userfault_lock);
 }
 
 /**
@@ -4534,7 +4526,6 @@ int i915_gem_init(struct drm_device *dev)
 	int ret;
 
 	mutex_lock(&dev->struct_mutex);
-	spin_lock_init(&dev_priv->mm.userfault_lock);
 
 	if (!i915.enable_execlists) {
 		dev_priv->gt.resume = intel_legacy_submission_resume;
-- 
2.9.3

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [PATCH v2 4/5] drm/i915: Remove RPM sequence checking
  2016-10-14 11:05 [PATCH v2 1/5] drm/i915: Move user fault tracking to a separate list Chris Wilson
  2016-10-14 11:05 ` [PATCH v2 2/5] drm/i915: Use RPM as the barrier for controlling user mmap access Chris Wilson
  2016-10-14 11:05 ` [PATCH v2 3/5] drm/i915: Remove superfluous locking around userfault_list Chris Wilson
@ 2016-10-14 11:05 ` Chris Wilson
  2016-10-14 11:05 ` [PATCH v2 5/5] drm/i915: Move fence cancellation to runtime suspend Chris Wilson
  2016-10-14 13:20 ` ✓ Fi.CI.BAT: success for series starting with [v2,1/5] drm/i915: Move user fault tracking to a separate list Patchwork
  4 siblings, 0 replies; 7+ messages in thread
From: Chris Wilson @ 2016-10-14 11:05 UTC (permalink / raw)
  To: intel-gfx

We only used the RPM sequence checking inside the lowlevel GTT
accessors, when we had to rely on callers taking the wakeref on our
behalf. Now that we take the RPM wakeref inside the GTT management
routines themselves, we can forgo the sanitycheck of the callers.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Imre Deak <imre.deak@intel.com>
---
 drivers/gpu/drm/i915/i915_drv.h         |  1 -
 drivers/gpu/drm/i915/i915_gem_gtt.c     | 55 +--------------------------------
 drivers/gpu/drm/i915/intel_drv.h        | 17 ----------
 drivers/gpu/drm/i915/intel_pm.c         |  1 -
 drivers/gpu/drm/i915/intel_runtime_pm.c |  3 +-
 5 files changed, 2 insertions(+), 75 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index b3c00cc6ab32..7c686145861b 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1690,7 +1690,6 @@ struct skl_wm_level {
  */
 struct i915_runtime_pm {
 	atomic_t wakeref_count;
-	atomic_t atomic_seq;
 	bool suspended;
 	bool irqs_enabled;
 };
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index c6e743f96e6d..430ed500054c 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -2323,16 +2323,11 @@ static void gen8_ggtt_insert_page(struct i915_address_space *vm,
 	gen8_pte_t __iomem *pte =
 		(gen8_pte_t __iomem *)dev_priv->ggtt.gsm +
 		(offset >> PAGE_SHIFT);
-	int rpm_atomic_seq;
-
-	rpm_atomic_seq = assert_rpm_atomic_begin(dev_priv);
 
 	gen8_set_pte(pte, gen8_pte_encode(addr, level, true));
 
 	I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN);
 	POSTING_READ(GFX_FLSH_CNTL_GEN6);
-
-	assert_rpm_atomic_end(dev_priv, rpm_atomic_seq);
 }
 
 static void gen8_ggtt_insert_entries(struct i915_address_space *vm,
@@ -2346,11 +2341,8 @@ static void gen8_ggtt_insert_entries(struct i915_address_space *vm,
 	gen8_pte_t __iomem *gtt_entries;
 	gen8_pte_t gtt_entry;
 	dma_addr_t addr;
-	int rpm_atomic_seq;
 	int i = 0;
 
-	rpm_atomic_seq = assert_rpm_atomic_begin(dev_priv);
-
 	gtt_entries = (gen8_pte_t __iomem *)ggtt->gsm + (start >> PAGE_SHIFT);
 
 	for_each_sgt_dma(addr, sgt_iter, st) {
@@ -2374,8 +2366,6 @@ static void gen8_ggtt_insert_entries(struct i915_address_space *vm,
 	 */
 	I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN);
 	POSTING_READ(GFX_FLSH_CNTL_GEN6);
-
-	assert_rpm_atomic_end(dev_priv, rpm_atomic_seq);
 }
 
 struct insert_entries {
@@ -2414,16 +2404,11 @@ static void gen6_ggtt_insert_page(struct i915_address_space *vm,
 	gen6_pte_t __iomem *pte =
 		(gen6_pte_t __iomem *)dev_priv->ggtt.gsm +
 		(offset >> PAGE_SHIFT);
-	int rpm_atomic_seq;
-
-	rpm_atomic_seq = assert_rpm_atomic_begin(dev_priv);
 
 	iowrite32(vm->pte_encode(addr, level, true, flags), pte);
 
 	I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN);
 	POSTING_READ(GFX_FLSH_CNTL_GEN6);
-
-	assert_rpm_atomic_end(dev_priv, rpm_atomic_seq);
 }
 
 /*
@@ -2443,11 +2428,8 @@ static void gen6_ggtt_insert_entries(struct i915_address_space *vm,
 	gen6_pte_t __iomem *gtt_entries;
 	gen6_pte_t gtt_entry;
 	dma_addr_t addr;
-	int rpm_atomic_seq;
 	int i = 0;
 
-	rpm_atomic_seq = assert_rpm_atomic_begin(dev_priv);
-
 	gtt_entries = (gen6_pte_t __iomem *)ggtt->gsm + (start >> PAGE_SHIFT);
 
 	for_each_sgt_dma(addr, sgt_iter, st) {
@@ -2470,8 +2452,6 @@ static void gen6_ggtt_insert_entries(struct i915_address_space *vm,
 	 */
 	I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN);
 	POSTING_READ(GFX_FLSH_CNTL_GEN6);
-
-	assert_rpm_atomic_end(dev_priv, rpm_atomic_seq);
 }
 
 static void nop_clear_range(struct i915_address_space *vm,
@@ -2486,7 +2466,6 @@ static void gen8_ggtt_clear_range(struct i915_address_space *vm,
 				  uint64_t length,
 				  bool use_scratch)
 {
-	struct drm_i915_private *dev_priv = to_i915(vm->dev);
 	struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
 	unsigned first_entry = start >> PAGE_SHIFT;
 	unsigned num_entries = length >> PAGE_SHIFT;
@@ -2494,9 +2473,6 @@ static void gen8_ggtt_clear_range(struct i915_address_space *vm,
 		(gen8_pte_t __iomem *)ggtt->gsm + first_entry;
 	const int max_entries = ggtt_total_entries(ggtt) - first_entry;
 	int i;
-	int rpm_atomic_seq;
-
-	rpm_atomic_seq = assert_rpm_atomic_begin(dev_priv);
 
 	if (WARN(num_entries > max_entries,
 		 "First entry = %d; Num entries = %d (max=%d)\n",
@@ -2509,8 +2485,6 @@ static void gen8_ggtt_clear_range(struct i915_address_space *vm,
 	for (i = 0; i < num_entries; i++)
 		gen8_set_pte(&gtt_base[i], scratch_pte);
 	readl(gtt_base);
-
-	assert_rpm_atomic_end(dev_priv, rpm_atomic_seq);
 }
 
 static void gen6_ggtt_clear_range(struct i915_address_space *vm,
@@ -2518,7 +2492,6 @@ static void gen6_ggtt_clear_range(struct i915_address_space *vm,
 				  uint64_t length,
 				  bool use_scratch)
 {
-	struct drm_i915_private *dev_priv = to_i915(vm->dev);
 	struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
 	unsigned first_entry = start >> PAGE_SHIFT;
 	unsigned num_entries = length >> PAGE_SHIFT;
@@ -2526,9 +2499,6 @@ static void gen6_ggtt_clear_range(struct i915_address_space *vm,
 		(gen6_pte_t __iomem *)ggtt->gsm + first_entry;
 	const int max_entries = ggtt_total_entries(ggtt) - first_entry;
 	int i;
-	int rpm_atomic_seq;
-
-	rpm_atomic_seq = assert_rpm_atomic_begin(dev_priv);
 
 	if (WARN(num_entries > max_entries,
 		 "First entry = %d; Num entries = %d (max=%d)\n",
@@ -2541,8 +2511,6 @@ static void gen6_ggtt_clear_range(struct i915_address_space *vm,
 	for (i = 0; i < num_entries; i++)
 		iowrite32(scratch_pte, &gtt_base[i]);
 	readl(gtt_base);
-
-	assert_rpm_atomic_end(dev_priv, rpm_atomic_seq);
 }
 
 static void i915_ggtt_insert_page(struct i915_address_space *vm,
@@ -2551,16 +2519,10 @@ static void i915_ggtt_insert_page(struct i915_address_space *vm,
 				  enum i915_cache_level cache_level,
 				  u32 unused)
 {
-	struct drm_i915_private *dev_priv = to_i915(vm->dev);
 	unsigned int flags = (cache_level == I915_CACHE_NONE) ?
 		AGP_USER_MEMORY : AGP_USER_CACHED_MEMORY;
-	int rpm_atomic_seq;
-
-	rpm_atomic_seq = assert_rpm_atomic_begin(dev_priv);
 
 	intel_gtt_insert_page(addr, offset >> PAGE_SHIFT, flags);
-
-	assert_rpm_atomic_end(dev_priv, rpm_atomic_seq);
 }
 
 static void i915_ggtt_insert_entries(struct i915_address_space *vm,
@@ -2568,17 +2530,11 @@ static void i915_ggtt_insert_entries(struct i915_address_space *vm,
 				     uint64_t start,
 				     enum i915_cache_level cache_level, u32 unused)
 {
-	struct drm_i915_private *dev_priv = to_i915(vm->dev);
 	unsigned int flags = (cache_level == I915_CACHE_NONE) ?
 		AGP_USER_MEMORY : AGP_USER_CACHED_MEMORY;
-	int rpm_atomic_seq;
-
-	rpm_atomic_seq = assert_rpm_atomic_begin(dev_priv);
 
 	intel_gtt_insert_sg_entries(pages, start >> PAGE_SHIFT, flags);
 
-	assert_rpm_atomic_end(dev_priv, rpm_atomic_seq);
-
 }
 
 static void i915_ggtt_clear_range(struct i915_address_space *vm,
@@ -2586,16 +2542,7 @@ static void i915_ggtt_clear_range(struct i915_address_space *vm,
 				  uint64_t length,
 				  bool unused)
 {
-	struct drm_i915_private *dev_priv = to_i915(vm->dev);
-	unsigned first_entry = start >> PAGE_SHIFT;
-	unsigned num_entries = length >> PAGE_SHIFT;
-	int rpm_atomic_seq;
-
-	rpm_atomic_seq = assert_rpm_atomic_begin(dev_priv);
-
-	intel_gtt_clear_range(first_entry, num_entries);
-
-	assert_rpm_atomic_end(dev_priv, rpm_atomic_seq);
+	intel_gtt_clear_range(start >> PAGE_SHIFT, length >> PAGE_SHIFT);
 }
 
 static int ggtt_bind_vma(struct i915_vma *vma,
diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h
index 07b93f23b8bf..083c7a3746bd 100644
--- a/drivers/gpu/drm/i915/intel_drv.h
+++ b/drivers/gpu/drm/i915/intel_drv.h
@@ -1648,23 +1648,6 @@ assert_rpm_wakelock_held(struct drm_i915_private *dev_priv)
 		DRM_DEBUG_DRIVER("RPM wakelock ref not held during HW access");
 }
 
-static inline int
-assert_rpm_atomic_begin(struct drm_i915_private *dev_priv)
-{
-	int seq = atomic_read(&dev_priv->pm.atomic_seq);
-
-	assert_rpm_wakelock_held(dev_priv);
-
-	return seq;
-}
-
-static inline void
-assert_rpm_atomic_end(struct drm_i915_private *dev_priv, int begin_seq)
-{
-	WARN_ONCE(atomic_read(&dev_priv->pm.atomic_seq) != begin_seq,
-		  "HW access outside of RPM atomic section\n");
-}
-
 /**
  * disable_rpm_wakeref_asserts - disable the RPM assert checks
  * @dev_priv: i915 device instance
diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
index 15f21c6eb4f4..e7660537ccea 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -8040,5 +8040,4 @@ void intel_pm_setup(struct drm_device *dev)
 
 	dev_priv->pm.suspended = false;
 	atomic_set(&dev_priv->pm.wakeref_count, 0);
-	atomic_set(&dev_priv->pm.atomic_seq, 0);
 }
diff --git a/drivers/gpu/drm/i915/intel_runtime_pm.c b/drivers/gpu/drm/i915/intel_runtime_pm.c
index 6c11168facd6..ca0500abcf34 100644
--- a/drivers/gpu/drm/i915/intel_runtime_pm.c
+++ b/drivers/gpu/drm/i915/intel_runtime_pm.c
@@ -2738,8 +2738,7 @@ void intel_runtime_pm_put(struct drm_i915_private *dev_priv)
 	struct device *kdev = &pdev->dev;
 
 	assert_rpm_wakelock_held(dev_priv);
-	if (atomic_dec_and_test(&dev_priv->pm.wakeref_count))
-		atomic_inc(&dev_priv->pm.atomic_seq);
+	atomic_dec(&dev_priv->pm.wakeref_count);
 
 	pm_runtime_mark_last_busy(kdev);
 	pm_runtime_put_autosuspend(kdev);
-- 
2.9.3

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [PATCH v2 5/5] drm/i915: Move fence cancellation to runtime suspend
  2016-10-14 11:05 [PATCH v2 1/5] drm/i915: Move user fault tracking to a separate list Chris Wilson
                   ` (2 preceding siblings ...)
  2016-10-14 11:05 ` [PATCH v2 4/5] drm/i915: Remove RPM sequence checking Chris Wilson
@ 2016-10-14 11:05 ` Chris Wilson
  2016-10-14 13:20 ` ✓ Fi.CI.BAT: success for series starting with [v2,1/5] drm/i915: Move user fault tracking to a separate list Patchwork
  4 siblings, 0 replies; 7+ messages in thread
From: Chris Wilson @ 2016-10-14 11:05 UTC (permalink / raw)
  To: intel-gfx; +Cc: Daniel Vetter

At the moment, we have dependency on the RPM as a barrier itself in both
i915_gem_release_all_mmaps() and i915_gem_restore_fences().
i915_gem_restore_fences() is also called along !runtime pm paths, but we
can move the markup of lost fences alongside releasing the mmaps into a
common i915_gem_runtime_suspend(). This has the advantage of locating
all the tricky barrier dependencies into one location.

Suggested-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
---
 drivers/gpu/drm/i915/i915_drv.c       |  6 ++----
 drivers/gpu/drm/i915/i915_drv.h       |  3 ++-
 drivers/gpu/drm/i915/i915_gem.c       | 25 +++++++++++++++++++++++--
 drivers/gpu/drm/i915/i915_gem_fence.c | 12 +++++-------
 4 files changed, 32 insertions(+), 14 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index d08d6617e3b6..6473bf58fbe5 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -2267,10 +2267,8 @@ static int vlv_resume_prepare(struct drm_i915_private *dev_priv,
 
 	vlv_check_no_gt_access(dev_priv);
 
-	if (rpm_resume) {
+	if (rpm_resume)
 		intel_init_clock_gating(dev);
-		i915_gem_restore_fences(dev);
-	}
 
 	return ret;
 }
@@ -2296,7 +2294,7 @@ static int intel_runtime_suspend(struct device *kdev)
 	 * We are safe here against re-faults, since the fault handler takes
 	 * an RPM reference.
 	 */
-	i915_gem_release_all_mmaps(dev_priv);
+	i915_gem_runtime_suspend(dev_priv);
 
 	intel_guc_suspend(dev);
 
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 7c686145861b..cc703c38b397 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -3125,9 +3125,10 @@ void i915_vma_destroy(struct i915_vma *vma);
 
 int i915_gem_object_unbind(struct drm_i915_gem_object *obj);
 int i915_gem_object_put_pages(struct drm_i915_gem_object *obj);
-void i915_gem_release_all_mmaps(struct drm_i915_private *dev_priv);
 void i915_gem_release_mmap(struct drm_i915_gem_object *obj);
 
+void i915_gem_runtime_suspend(struct drm_i915_private *dev_priv);
+
 int __must_check i915_gem_object_get_pages(struct drm_i915_gem_object *obj);
 
 static inline int __sg_page_count(struct scatterlist *sg)
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 8819fafbcc7a..d904e5f5d146 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -1954,10 +1954,10 @@ out:
 	intel_runtime_pm_put(i915);
 }
 
-void
-i915_gem_release_all_mmaps(struct drm_i915_private *dev_priv)
+void i915_gem_runtime_suspend(struct drm_i915_private *dev_priv)
 {
 	struct drm_i915_gem_object *obj, *on;
+	int i;
 
 	/*
 	 * Only called during RPM suspend. All users of the userfault_list
@@ -1972,6 +1972,27 @@ i915_gem_release_all_mmaps(struct drm_i915_private *dev_priv)
 		drm_vma_node_unmap(&obj->base.vma_node,
 				   obj->base.dev->anon_inode->i_mapping);
 	}
+
+	/* The fence will be lost when the device powers down. If any were
+	 * in use by hardware (i.e. they are pinned), we should not be powering
+	 * down! All other fences will be reacquired by the user upon waking.
+	 */
+	for (i = 0; i < dev_priv->num_fence_regs; i++) {
+		struct drm_i915_fence_reg *reg = &dev_priv->fence_regs[i];
+		struct i915_vma *vma;
+
+		if (WARN_ON(reg->pin_count))
+			continue;
+
+		vma = fetch_and_zero(&reg->vma);
+		if (!vma)
+			continue;
+
+		GEM_BUG_ON(!list_empty(&vma->obj->userfault_link));
+
+		list_move(&reg->link, &dev_priv->mm.fence_list);
+		vma->fence = NULL;
+	}
 }
 
 /**
diff --git a/drivers/gpu/drm/i915/i915_gem_fence.c b/drivers/gpu/drm/i915/i915_gem_fence.c
index f7081f4b5d22..230edc821823 100644
--- a/drivers/gpu/drm/i915/i915_gem_fence.c
+++ b/drivers/gpu/drm/i915/i915_gem_fence.c
@@ -343,6 +343,9 @@ i915_vma_get_fence(struct i915_vma *vma)
 	struct drm_i915_fence_reg *fence;
 	struct i915_vma *set = i915_gem_object_is_tiled(vma->obj) ? vma : NULL;
 
+	/* Note that we revoke fences on runtime suspend. Therefore the user
+	 * must keep the device awake whilst using the fence.
+	 */
 	assert_rpm_wakelock_held(to_i915(vma->vm->dev));
 
 	/* Just update our place in the LRU if our fence is getting reused. */
@@ -368,19 +371,14 @@ i915_vma_get_fence(struct i915_vma *vma)
  * @dev: DRM device
  *
  * Restore the hw fence state to match the software tracking again, to be called
- * after a gpu reset and on resume.
+ * after a gpu reset and on resume. Note that on runtime suspend we only cancel
+ * the fences, to be reacquired by the user later.
  */
 void i915_gem_restore_fences(struct drm_device *dev)
 {
 	struct drm_i915_private *dev_priv = to_i915(dev);
 	int i;
 
-	/* Note that this may be called outside of struct_mutex, by
-	 * runtime suspend/resume. The barrier we require is enforced by
-	 * rpm itself - all access to fences/GTT are only within an rpm
-	 * wakeref, and to acquire that wakeref you must pass through here.
-	 */
-
 	for (i = 0; i < dev_priv->num_fence_regs; i++) {
 		struct drm_i915_fence_reg *reg = &dev_priv->fence_regs[i];
 		struct i915_vma *vma = reg->vma;
-- 
2.9.3

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 7+ messages in thread

* ✓ Fi.CI.BAT: success for series starting with [v2,1/5] drm/i915: Move user fault tracking to a separate list
  2016-10-14 11:05 [PATCH v2 1/5] drm/i915: Move user fault tracking to a separate list Chris Wilson
                   ` (3 preceding siblings ...)
  2016-10-14 11:05 ` [PATCH v2 5/5] drm/i915: Move fence cancellation to runtime suspend Chris Wilson
@ 2016-10-14 13:20 ` Patchwork
  4 siblings, 0 replies; 7+ messages in thread
From: Patchwork @ 2016-10-14 13:20 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

== Series Details ==

Series: series starting with [v2,1/5] drm/i915: Move user fault tracking to a separate list
URL   : https://patchwork.freedesktop.org/series/13775/
State : success

== Summary ==

Series 13775v1 Series without cover letter
https://patchwork.freedesktop.org/api/1.0/series/13775/revisions/1/mbox/

Test kms_pipe_crc_basic:
        Subgroup read-crc-pipe-b-frame-sequence:
                dmesg-warn -> PASS       (fi-skl-6770hq)
Test vgem_basic:
        Subgroup unload:
                skip       -> PASS       (fi-kbl-7200u)
                skip       -> PASS       (fi-skl-6700k)

fi-bdw-5557u     total:246  pass:231  dwarn:0   dfail:0   fail:0   skip:15 
fi-bsw-n3050     total:246  pass:204  dwarn:0   dfail:0   fail:0   skip:42 
fi-bxt-t5700     total:246  pass:216  dwarn:0   dfail:0   fail:0   skip:30 
fi-hsw-4770      total:246  pass:224  dwarn:0   dfail:0   fail:0   skip:22 
fi-hsw-4770r     total:246  pass:224  dwarn:0   dfail:0   fail:0   skip:22 
fi-ilk-650       total:246  pass:184  dwarn:0   dfail:0   fail:2   skip:60 
fi-ivb-3520m     total:246  pass:221  dwarn:0   dfail:0   fail:0   skip:25 
fi-ivb-3770      total:246  pass:221  dwarn:0   dfail:0   fail:0   skip:25 
fi-kbl-7200u     total:246  pass:222  dwarn:0   dfail:0   fail:0   skip:24 
fi-skl-6260u     total:246  pass:232  dwarn:0   dfail:0   fail:0   skip:14 
fi-skl-6700hq    total:246  pass:223  dwarn:0   dfail:0   fail:0   skip:23 
fi-skl-6700k     total:246  pass:221  dwarn:1   dfail:0   fail:0   skip:24 
fi-skl-6770hq    total:246  pass:230  dwarn:1   dfail:0   fail:1   skip:14 
fi-snb-2520m     total:246  pass:210  dwarn:0   dfail:0   fail:0   skip:36 
fi-snb-2600      total:246  pass:209  dwarn:0   dfail:0   fail:0   skip:37 

Results at /archive/results/CI_IGT_test/Patchwork_2718/

e086610ff079f1bf1fe91d4ab175443590cacb8d drm-intel-nightly: 2016y-10m-14d-11h-43m-09s UTC integration manifest
2ee5600 drm/i915: Move user fault tracking to a separate list

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH v2 3/5] drm/i915: Remove superfluous locking around userfault_list
  2016-10-14 11:05 ` [PATCH v2 3/5] drm/i915: Remove superfluous locking around userfault_list Chris Wilson
@ 2016-10-17  7:44   ` Joonas Lahtinen
  0 siblings, 0 replies; 7+ messages in thread
From: Joonas Lahtinen @ 2016-10-17  7:44 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx

On pe, 2016-10-14 at 12:05 +0100, Chris Wilson wrote:
> Now that we have reduced the access to the list to either (a) under the
> struct_mutex whilst holding the RPM wakeref (so that concurrent writers to
> the list are serialised by struct_mutex) and (b) under the atomic
> runtime suspend (which cannot run concurrently with any other accessor due
> to the atomic nature of the runtime suspend) we can remove the extra
> locking around the list itself.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>

Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>

Regards, Joonas
-- 
Joonas Lahtinen
Open Source Technology Center
Intel Corporation
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2016-10-17  7:44 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2016-10-14 11:05 [PATCH v2 1/5] drm/i915: Move user fault tracking to a separate list Chris Wilson
2016-10-14 11:05 ` [PATCH v2 2/5] drm/i915: Use RPM as the barrier for controlling user mmap access Chris Wilson
2016-10-14 11:05 ` [PATCH v2 3/5] drm/i915: Remove superfluous locking around userfault_list Chris Wilson
2016-10-17  7:44   ` Joonas Lahtinen
2016-10-14 11:05 ` [PATCH v2 4/5] drm/i915: Remove RPM sequence checking Chris Wilson
2016-10-14 11:05 ` [PATCH v2 5/5] drm/i915: Move fence cancellation to runtime suspend Chris Wilson
2016-10-14 13:20 ` ✓ Fi.CI.BAT: success for series starting with [v2,1/5] drm/i915: Move user fault tracking to a separate list Patchwork

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.