All of lore.kernel.org
 help / color / mirror / Atom feed
* RPM vs struct mutex fixes
@ 2016-10-20  7:51 Chris Wilson
  2016-10-20  7:51 ` [PATCH 1/5] drm/i915: Move user fault tracking to a separate list Chris Wilson
                   ` (5 more replies)
  0 siblings, 6 replies; 7+ messages in thread
From: Chris Wilson @ 2016-10-20  7:51 UTC (permalink / raw)
  To: intel-gfx

A little hiccup caught by 0day but not BAT having been resolved with
commit ebc0808fa2da ("drm/i915: Restrict pagefault disabling to just
around copy_from_user()"), let's try again.

I'm still puzzling over how BAT missed the warning.
-Chris

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 7+ messages in thread

* [PATCH 1/5] drm/i915: Move user fault tracking to a separate list
  2016-10-20  7:51 RPM vs struct mutex fixes Chris Wilson
@ 2016-10-20  7:51 ` Chris Wilson
  2016-10-20  7:51 ` [PATCH 2/5] drm/i915: Use RPM as the barrier for controlling user mmap access Chris Wilson
                   ` (4 subsequent siblings)
  5 siblings, 0 replies; 7+ messages in thread
From: Chris Wilson @ 2016-10-20  7:51 UTC (permalink / raw)
  To: intel-gfx

We want to decouple RPM and struct_mutex, but currently RPM has to walk
the list of bound objects and remove userspace mmapping before we
suspend (otherwise userspace may continue to access the GTT whilst it is
powered down). This currently requires the struct_mutex to walk the
bound_list, but if we move that to a separate list and lock we can take
the first step towards removing the struct_mutex.

v2: Split runtime suspend unmapping vs regular unmapping, to make the
locking (and barriers) clearer. Add the object to the userfault_list
prior to inserting the first PTE, the race between add/revoke depends
upon struct_mutex for regular unmappings and rpm for runtime-suspend.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch> #v1
---
 drivers/gpu/drm/i915/i915_debugfs.c   |  2 +-
 drivers/gpu/drm/i915/i915_drv.h       | 20 +++++++++++------
 drivers/gpu/drm/i915/i915_gem.c       | 42 +++++++++++++++++++++++++++--------
 drivers/gpu/drm/i915/i915_gem_evict.c |  2 +-
 drivers/gpu/drm/i915/i915_gem_fence.c |  2 +-
 5 files changed, 49 insertions(+), 19 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index 20638d22bbad..1b402eebd3d9 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -107,7 +107,7 @@ static char get_tiling_flag(struct drm_i915_gem_object *obj)
 
 static char get_global_flag(struct drm_i915_gem_object *obj)
 {
-	return obj->fault_mappable ? 'g' : ' ';
+	return !list_empty(&obj->userfault_link) ? 'g' : ' ';
 }
 
 static char get_pin_mapped_flag(struct drm_i915_gem_object *obj)
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index b339c916f7a9..60a3d879840a 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1360,6 +1360,14 @@ struct i915_gem_mm {
 	 */
 	struct list_head unbound_list;
 
+	/** Protects access to the userfault_list */
+	spinlock_t userfault_lock;
+
+	/** List of all objects in gtt_space, currently mmaped by userspace.
+	 * All objects within this list must also be on bound_list.
+	 */
+	struct list_head userfault_list;
+
 	/** Usable portion of the GTT for GEM */
 	unsigned long stolen_base; /* limited to low memory (32-bit) */
 
@@ -2204,6 +2212,11 @@ struct drm_i915_gem_object {
 	struct drm_mm_node *stolen;
 	struct list_head global_list;
 
+	/**
+	 * Whether the object is currently in the GGTT mmap.
+	 */
+	struct list_head userfault_link;
+
 	/** Used in execbuf to temporarily hold a ref */
 	struct list_head obj_exec_link;
 
@@ -2231,13 +2244,6 @@ struct drm_i915_gem_object {
 	 */
 	unsigned int madv:2;
 
-	/**
-	 * Whether the current gtt mapping needs to be mappable (and isn't just
-	 * mappable by accident). Track pin and fault separate for a more
-	 * accurate mappable working set.
-	 */
-	unsigned int fault_mappable:1;
-
 	/*
 	 * Is the object to be mapped as read-only to the GPU
 	 * Only honoured if hardware has relevant pte bit
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 8ed8e24025ac..b7a6bcd15bc9 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -1839,16 +1839,19 @@ int i915_gem_fault(struct vm_area_struct *area, struct vm_fault *vmf)
 	if (ret)
 		goto err_unpin;
 
+	/* Mark as being mmapped into userspace for later revocation */
+	spin_lock(&dev_priv->mm.userfault_lock);
+	if (list_empty(&obj->userfault_link))
+		list_add(&obj->userfault_link, &dev_priv->mm.userfault_list);
+	spin_unlock(&dev_priv->mm.userfault_lock);
+
 	/* Finally, remap it using the new GTT offset */
 	ret = remap_io_mapping(area,
 			       area->vm_start + (vma->ggtt_view.params.partial.offset << PAGE_SHIFT),
 			       (ggtt->mappable_base + vma->node.start) >> PAGE_SHIFT,
 			       min_t(u64, vma->size, area->vm_end - area->vm_start),
 			       &ggtt->mappable);
-	if (ret)
-		goto err_unpin;
 
-	obj->fault_mappable = true;
 err_unpin:
 	__i915_vma_unpin(vma);
 err_unlock:
@@ -1916,13 +1919,22 @@ int i915_gem_fault(struct vm_area_struct *area, struct vm_fault *vmf)
 void
 i915_gem_release_mmap(struct drm_i915_gem_object *obj)
 {
+	struct drm_i915_private *i915 = to_i915(obj->base.dev);
+	bool zap = false;
+
 	/* Serialisation between user GTT access and our code depends upon
 	 * revoking the CPU's PTE whilst the mutex is held. The next user
 	 * pagefault then has to wait until we release the mutex.
 	 */
-	lockdep_assert_held(&obj->base.dev->struct_mutex);
+	lockdep_assert_held(&i915->drm.struct_mutex);
 
-	if (!obj->fault_mappable)
+	spin_lock(&i915->mm.userfault_lock);
+	if (!list_empty(&obj->userfault_link)) {
+		list_del_init(&obj->userfault_link);
+		zap = true;
+	}
+	spin_unlock(&i915->mm.userfault_lock);
+	if (!zap)
 		return;
 
 	drm_vma_node_unmap(&obj->base.vma_node,
@@ -1936,8 +1948,6 @@ i915_gem_release_mmap(struct drm_i915_gem_object *obj)
 	 * memory writes before touching registers / GSM.
 	 */
 	wmb();
-
-	obj->fault_mappable = false;
 }
 
 void
@@ -1945,8 +1955,19 @@ i915_gem_release_all_mmaps(struct drm_i915_private *dev_priv)
 {
 	struct drm_i915_gem_object *obj;
 
-	list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list)
-		i915_gem_release_mmap(obj);
+	spin_lock(&dev_priv->mm.userfault_lock);
+	while ((obj = list_first_entry_or_null(&dev_priv->mm.userfault_list,
+					       struct drm_i915_gem_object,
+					       userfault_link))) {
+		list_del_init(&obj->userfault_link);
+		spin_unlock(&dev_priv->mm.userfault_lock);
+
+		drm_vma_node_unmap(&obj->base.vma_node,
+				   obj->base.dev->anon_inode->i_mapping);
+
+		spin_lock(&dev_priv->mm.userfault_lock);
+	}
+	spin_unlock(&dev_priv->mm.userfault_lock);
 }
 
 /**
@@ -4108,6 +4129,7 @@ void i915_gem_object_init(struct drm_i915_gem_object *obj,
 	int i;
 
 	INIT_LIST_HEAD(&obj->global_list);
+	INIT_LIST_HEAD(&obj->userfault_link);
 	for (i = 0; i < I915_NUM_ENGINES; i++)
 		init_request_active(&obj->last_read[i],
 				    i915_gem_object_retire__read);
@@ -4521,6 +4543,7 @@ int i915_gem_init(struct drm_device *dev)
 	int ret;
 
 	mutex_lock(&dev->struct_mutex);
+	spin_lock_init(&dev_priv->mm.userfault_lock);
 
 	if (!i915.enable_execlists) {
 		dev_priv->gt.resume = intel_legacy_submission_resume;
@@ -4640,6 +4663,7 @@ i915_gem_load_init(struct drm_device *dev)
 	INIT_LIST_HEAD(&dev_priv->mm.unbound_list);
 	INIT_LIST_HEAD(&dev_priv->mm.bound_list);
 	INIT_LIST_HEAD(&dev_priv->mm.fence_list);
+	INIT_LIST_HEAD(&dev_priv->mm.userfault_list);
 	INIT_DELAYED_WORK(&dev_priv->gt.retire_work,
 			  i915_gem_retire_work_handler);
 	INIT_DELAYED_WORK(&dev_priv->gt.idle_work,
diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c b/drivers/gpu/drm/i915/i915_gem_evict.c
index b5e9e669f50f..a934f372c5ce 100644
--- a/drivers/gpu/drm/i915/i915_gem_evict.c
+++ b/drivers/gpu/drm/i915/i915_gem_evict.c
@@ -56,7 +56,7 @@ mark_free(struct i915_vma *vma, unsigned int flags, struct list_head *unwind)
 	if (WARN_ON(!list_empty(&vma->exec_list)))
 		return false;
 
-	if (flags & PIN_NONFAULT && vma->obj->fault_mappable)
+	if (flags & PIN_NONFAULT && !list_empty(&vma->obj->userfault_link))
 		return false;
 
 	list_add(&vma->exec_list, unwind);
diff --git a/drivers/gpu/drm/i915/i915_gem_fence.c b/drivers/gpu/drm/i915/i915_gem_fence.c
index a6daf2deab74..67013179b8ed 100644
--- a/drivers/gpu/drm/i915/i915_gem_fence.c
+++ b/drivers/gpu/drm/i915/i915_gem_fence.c
@@ -391,7 +391,7 @@ void i915_gem_restore_fences(struct drm_device *dev)
 		 */
 		if (vma && !i915_gem_object_is_tiled(vma->obj)) {
 			GEM_BUG_ON(!reg->dirty);
-			GEM_BUG_ON(vma->obj->fault_mappable);
+			GEM_BUG_ON(!list_empty(&vma->obj->userfault_link));
 
 			list_move(&reg->link, &dev_priv->mm.fence_list);
 			vma->fence = NULL;
-- 
2.9.3

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [PATCH 2/5] drm/i915: Use RPM as the barrier for controlling user mmap access
  2016-10-20  7:51 RPM vs struct mutex fixes Chris Wilson
  2016-10-20  7:51 ` [PATCH 1/5] drm/i915: Move user fault tracking to a separate list Chris Wilson
@ 2016-10-20  7:51 ` Chris Wilson
  2016-10-20  7:51 ` [PATCH 3/5] drm/i915: Remove superfluous locking around userfault_list Chris Wilson
                   ` (3 subsequent siblings)
  5 siblings, 0 replies; 7+ messages in thread
From: Chris Wilson @ 2016-10-20  7:51 UTC (permalink / raw)
  To: intel-gfx

We can remove the false coupling between RPM and struct mutex by the
observation that we can use the RPM wakeref as the barrier around user
mmap access. That is as we tear down the user's PTE atomically from
within rpm suspend and then to fault in new PTE requires the rpm
wakeref, means that no user access is possible through those PTE without
RPM being awake. Having made that observation, we can then remove the
presumption of having to take rpm outside of struct_mutex and so allow
fine grained acquisition of a wakeref around hw access rather than
having to remember to acquire the wakeref early on.

v2: Rejig placement of the new intel_runtime_pm_get() to be as tight
as possible around the GTT pread/pwrite.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Imre Deak <imre.deak@intel.com>
Cc: Daniel Vetter <daniel@ffwll.ch>
Cc: Ville Syrjälä <ville.syrjala@linux.intel.com>
Reviewed-by: Daniel Vetter <daniel@ffwll.ch>
---
 drivers/gpu/drm/i915/i915_debugfs.c    | 56 ++++++++++++++++++++--------------
 drivers/gpu/drm/i915/i915_drv.c        | 19 ------------
 drivers/gpu/drm/i915/i915_gem.c        | 42 +++++++++++++------------
 drivers/gpu/drm/i915/i915_gem_gtt.c    | 17 ++++++++---
 drivers/gpu/drm/i915/i915_gem_tiling.c |  4 ---
 5 files changed, 69 insertions(+), 69 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index 1b402eebd3d9..88c9d222a7e6 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -743,17 +743,32 @@ static int i915_interrupt_info(struct seq_file *m, void *data)
 			   I915_READ(VLV_IIR_RW));
 		seq_printf(m, "Display IMR:\t%08x\n",
 			   I915_READ(VLV_IMR));
-		for_each_pipe(dev_priv, pipe)
+		for_each_pipe(dev_priv, pipe) {
+			enum intel_display_power_domain power_domain;
+
+			power_domain = POWER_DOMAIN_PIPE(pipe);
+			if (!intel_display_power_get_if_enabled(dev_priv,
+								power_domain)) {
+				seq_printf(m, "Pipe %c power disabled\n",
+					   pipe_name(pipe));
+				continue;
+			}
+
 			seq_printf(m, "Pipe %c stat:\t%08x\n",
 				   pipe_name(pipe),
 				   I915_READ(PIPESTAT(pipe)));
 
+			intel_display_power_put(dev_priv, power_domain);
+		}
+
+		intel_display_power_get(dev_priv, POWER_DOMAIN_INIT);
 		seq_printf(m, "Port hotplug:\t%08x\n",
 			   I915_READ(PORT_HOTPLUG_EN));
 		seq_printf(m, "DPFLIPSTAT:\t%08x\n",
 			   I915_READ(VLV_DPFLIPSTAT));
 		seq_printf(m, "DPINVGTT:\t%08x\n",
 			   I915_READ(DPINVGTT));
+		intel_display_power_put(dev_priv, POWER_DOMAIN_INIT);
 
 		for (i = 0; i < 4; i++) {
 			seq_printf(m, "GT Interrupt IMR %d:\t%08x\n",
@@ -1396,14 +1411,9 @@ static int i915_hangcheck_info(struct seq_file *m, void *unused)
 static int ironlake_drpc_info(struct seq_file *m)
 {
 	struct drm_i915_private *dev_priv = node_to_i915(m->private);
-	struct drm_device *dev = &dev_priv->drm;
 	u32 rgvmodectl, rstdbyctl;
 	u16 crstandvid;
-	int ret;
 
-	ret = mutex_lock_interruptible(&dev->struct_mutex);
-	if (ret)
-		return ret;
 	intel_runtime_pm_get(dev_priv);
 
 	rgvmodectl = I915_READ(MEMMODECTL);
@@ -1411,7 +1421,6 @@ static int ironlake_drpc_info(struct seq_file *m)
 	crstandvid = I915_READ16(CRSTANDVID);
 
 	intel_runtime_pm_put(dev_priv);
-	mutex_unlock(&dev->struct_mutex);
 
 	seq_printf(m, "HD boost: %s\n", yesno(rgvmodectl & MEMMODE_BOOST_EN));
 	seq_printf(m, "Boost freq: %d\n",
@@ -1757,6 +1766,7 @@ static int i915_sr_status(struct seq_file *m, void *unused)
 	bool sr_enabled = false;
 
 	intel_runtime_pm_get(dev_priv);
+	intel_display_power_get(dev_priv, POWER_DOMAIN_INIT);
 
 	if (HAS_PCH_SPLIT(dev_priv))
 		sr_enabled = I915_READ(WM1_LP_ILK) & WM1_LP_SR_EN;
@@ -1770,6 +1780,7 @@ static int i915_sr_status(struct seq_file *m, void *unused)
 	else if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv))
 		sr_enabled = I915_READ(FW_BLC_SELF_VLV) & FW_CSPWRDWNEN;
 
+	intel_display_power_put(dev_priv, POWER_DOMAIN_INIT);
 	intel_runtime_pm_put(dev_priv);
 
 	seq_printf(m, "self-refresh: %s\n",
@@ -2091,12 +2102,7 @@ static const char *swizzle_string(unsigned swizzle)
 static int i915_swizzle_info(struct seq_file *m, void *data)
 {
 	struct drm_i915_private *dev_priv = node_to_i915(m->private);
-	struct drm_device *dev = &dev_priv->drm;
-	int ret;
 
-	ret = mutex_lock_interruptible(&dev->struct_mutex);
-	if (ret)
-		return ret;
 	intel_runtime_pm_get(dev_priv);
 
 	seq_printf(m, "bit6 swizzle for X-tiling = %s\n",
@@ -2136,7 +2142,6 @@ static int i915_swizzle_info(struct seq_file *m, void *data)
 		seq_puts(m, "L-shaped memory detected\n");
 
 	intel_runtime_pm_put(dev_priv);
-	mutex_unlock(&dev->struct_mutex);
 
 	return 0;
 }
@@ -2542,11 +2547,22 @@ static int i915_edp_psr_status(struct seq_file *m, void *data)
 		enabled = I915_READ(EDP_PSR_CTL) & EDP_PSR_ENABLE;
 	else {
 		for_each_pipe(dev_priv, pipe) {
+			enum transcoder cpu_transcoder =
+				intel_pipe_to_cpu_transcoder(dev_priv, pipe);
+			enum intel_display_power_domain power_domain;
+
+			power_domain = POWER_DOMAIN_TRANSCODER(cpu_transcoder);
+			if (!intel_display_power_get_if_enabled(dev_priv,
+								power_domain))
+				continue;
+
 			stat[pipe] = I915_READ(VLV_PSRSTAT(pipe)) &
 				VLV_EDP_PSR_CURR_STATE_MASK;
 			if ((stat[pipe] == VLV_EDP_PSR_ACTIVE_NORFB_UP) ||
 			    (stat[pipe] == VLV_EDP_PSR_ACTIVE_SF_UPDATE))
 				enabled = true;
+
+			intel_display_power_put(dev_priv, power_domain);
 		}
 	}
 
@@ -3094,6 +3110,8 @@ static int i915_engine_info(struct seq_file *m, void *unused)
 	struct intel_engine_cs *engine;
 	enum intel_engine_id id;
 
+	intel_runtime_pm_get(dev_priv);
+
 	for_each_engine(engine, dev_priv, id) {
 		struct intel_breadcrumbs *b = &engine->breadcrumbs;
 		struct drm_i915_gem_request *rq;
@@ -3213,6 +3231,8 @@ static int i915_engine_info(struct seq_file *m, void *unused)
 		seq_puts(m, "\n");
 	}
 
+	intel_runtime_pm_put(dev_priv);
+
 	return 0;
 }
 
@@ -4798,13 +4818,9 @@ i915_wedged_set(void *data, u64 val)
 	if (i915_reset_in_progress(&dev_priv->gpu_error))
 		return -EAGAIN;
 
-	intel_runtime_pm_get(dev_priv);
-
 	i915_handle_error(dev_priv, val,
 			  "Manually setting wedged to %llu", val);
 
-	intel_runtime_pm_put(dev_priv);
-
 	return 0;
 }
 
@@ -5039,22 +5055,16 @@ static int
 i915_cache_sharing_get(void *data, u64 *val)
 {
 	struct drm_i915_private *dev_priv = data;
-	struct drm_device *dev = &dev_priv->drm;
 	u32 snpcr;
-	int ret;
 
 	if (!(IS_GEN6(dev_priv) || IS_GEN7(dev_priv)))
 		return -ENODEV;
 
-	ret = mutex_lock_interruptible(&dev->struct_mutex);
-	if (ret)
-		return ret;
 	intel_runtime_pm_get(dev_priv);
 
 	snpcr = I915_READ(GEN6_MBCUNIT_SNPCR);
 
 	intel_runtime_pm_put(dev_priv);
-	mutex_unlock(&dev->struct_mutex);
 
 	*val = (snpcr & GEN6_MBC_SNPCR_MASK) >> GEN6_MBC_SNPCR_SHIFT;
 
diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index 912d5348e3e7..885d33f341f3 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -2301,24 +2301,6 @@ static int intel_runtime_suspend(struct device *kdev)
 
 	DRM_DEBUG_KMS("Suspending device\n");
 
-	/*
-	 * We could deadlock here in case another thread holding struct_mutex
-	 * calls RPM suspend concurrently, since the RPM suspend will wait
-	 * first for this RPM suspend to finish. In this case the concurrent
-	 * RPM resume will be followed by its RPM suspend counterpart. Still
-	 * for consistency return -EAGAIN, which will reschedule this suspend.
-	 */
-	if (!mutex_trylock(&dev->struct_mutex)) {
-		DRM_DEBUG_KMS("device lock contention, deffering suspend\n");
-		/*
-		 * Bump the expiration timestamp, otherwise the suspend won't
-		 * be rescheduled.
-		 */
-		pm_runtime_mark_last_busy(kdev);
-
-		return -EAGAIN;
-	}
-
 	disable_rpm_wakeref_asserts(dev_priv);
 
 	/*
@@ -2326,7 +2308,6 @@ static int intel_runtime_suspend(struct device *kdev)
 	 * an RPM reference.
 	 */
 	i915_gem_release_all_mmaps(dev_priv);
-	mutex_unlock(&dev->struct_mutex);
 
 	intel_guc_suspend(dev);
 
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index b7a6bcd15bc9..744939f652fc 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -826,6 +826,7 @@ i915_gem_gtt_pread(struct drm_device *dev,
 	uint64_t offset;
 	int ret;
 
+	intel_runtime_pm_get(to_i915(dev));
 	vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, PIN_MAPPABLE);
 	if (!IS_ERR(vma)) {
 		node.start = i915_ggtt_offset(vma);
@@ -926,6 +927,7 @@ i915_gem_gtt_pread(struct drm_device *dev,
 		i915_vma_unpin(vma);
 	}
 out:
+	intel_runtime_pm_put(to_i915(dev));
 	return ret;
 }
 
@@ -1060,12 +1062,9 @@ i915_gem_pread_ioctl(struct drm_device *dev, void *data,
 	ret = i915_gem_shmem_pread(dev, obj, args, file);
 
 	/* pread for non shmem backed objects */
-	if (ret == -EFAULT || ret == -ENODEV) {
-		intel_runtime_pm_get(to_i915(dev));
+	if (ret == -EFAULT || ret == -ENODEV)
 		ret = i915_gem_gtt_pread(dev, obj, args->size,
 					args->offset, args->data_ptr);
-		intel_runtime_pm_put(to_i915(dev));
-	}
 
 	i915_gem_object_put(obj);
 	mutex_unlock(&dev->struct_mutex);
@@ -1126,6 +1125,7 @@ i915_gem_gtt_pwrite_fast(struct drm_i915_private *i915,
 	if (i915_gem_object_is_tiled(obj))
 		return -EFAULT;
 
+	intel_runtime_pm_get(i915);
 	vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0,
 				       PIN_MAPPABLE | PIN_NONBLOCK);
 	if (!IS_ERR(vma)) {
@@ -1234,6 +1234,7 @@ i915_gem_gtt_pwrite_fast(struct drm_i915_private *i915,
 		i915_vma_unpin(vma);
 	}
 out:
+	intel_runtime_pm_put(i915);
 	return ret;
 }
 
@@ -1466,12 +1467,12 @@ i915_gem_pwrite_ioctl(struct drm_device *dev, void *data,
 	 * perspective, requiring manual detiling by the client.
 	 */
 	if (!i915_gem_object_has_struct_page(obj) ||
-	    cpu_write_needs_clflush(obj)) {
-		ret = i915_gem_gtt_pwrite_fast(dev_priv, obj, args, file);
+	    cpu_write_needs_clflush(obj))
 		/* Note that the gtt paths might fail with non-page-backed user
 		 * pointers (e.g. gtt mappings when moving data between
-		 * textures). Fallback to the shmem path in that case. */
-	}
+		 * textures). Fallback to the shmem path in that case.
+		 */
+		ret = i915_gem_gtt_pwrite_fast(dev_priv, obj, args, file);
 
 	if (ret == -EFAULT || ret == -ENOSPC) {
 		if (obj->phys_handle)
@@ -1840,6 +1841,7 @@ int i915_gem_fault(struct vm_area_struct *area, struct vm_fault *vmf)
 		goto err_unpin;
 
 	/* Mark as being mmapped into userspace for later revocation */
+	assert_rpm_wakelock_held(dev_priv);
 	spin_lock(&dev_priv->mm.userfault_lock);
 	if (list_empty(&obj->userfault_link))
 		list_add(&obj->userfault_link, &dev_priv->mm.userfault_list);
@@ -1925,8 +1927,13 @@ i915_gem_release_mmap(struct drm_i915_gem_object *obj)
 	/* Serialisation between user GTT access and our code depends upon
 	 * revoking the CPU's PTE whilst the mutex is held. The next user
 	 * pagefault then has to wait until we release the mutex.
+	 *
+	 * Note that RPM complicates somewhat by adding an additional
+	 * requirement that operations to the GGTT be made holding the RPM
+	 * wakeref.
 	 */
 	lockdep_assert_held(&i915->drm.struct_mutex);
+	intel_runtime_pm_get(i915);
 
 	spin_lock(&i915->mm.userfault_lock);
 	if (!list_empty(&obj->userfault_link)) {
@@ -1935,7 +1942,7 @@ i915_gem_release_mmap(struct drm_i915_gem_object *obj)
 	}
 	spin_unlock(&i915->mm.userfault_lock);
 	if (!zap)
-		return;
+		goto out;
 
 	drm_vma_node_unmap(&obj->base.vma_node,
 			   obj->base.dev->anon_inode->i_mapping);
@@ -1948,6 +1955,9 @@ i915_gem_release_mmap(struct drm_i915_gem_object *obj)
 	 * memory writes before touching registers / GSM.
 	 */
 	wmb();
+
+out:
+	intel_runtime_pm_put(i915);
 }
 
 void
@@ -3476,7 +3486,7 @@ int i915_gem_get_caching_ioctl(struct drm_device *dev, void *data,
 int i915_gem_set_caching_ioctl(struct drm_device *dev, void *data,
 			       struct drm_file *file)
 {
-	struct drm_i915_private *dev_priv = to_i915(dev);
+	struct drm_i915_private *i915 = to_i915(dev);
 	struct drm_i915_gem_caching *args = data;
 	struct drm_i915_gem_object *obj;
 	enum i915_cache_level level;
@@ -3493,23 +3503,21 @@ int i915_gem_set_caching_ioctl(struct drm_device *dev, void *data,
 		 * cacheline, whereas normally such cachelines would get
 		 * invalidated.
 		 */
-		if (!HAS_LLC(dev) && !HAS_SNOOP(dev))
+		if (!HAS_LLC(i915) && !HAS_SNOOP(i915))
 			return -ENODEV;
 
 		level = I915_CACHE_LLC;
 		break;
 	case I915_CACHING_DISPLAY:
-		level = HAS_WT(dev_priv) ? I915_CACHE_WT : I915_CACHE_NONE;
+		level = HAS_WT(i915) ? I915_CACHE_WT : I915_CACHE_NONE;
 		break;
 	default:
 		return -EINVAL;
 	}
 
-	intel_runtime_pm_get(dev_priv);
-
 	ret = i915_mutex_lock_interruptible(dev);
 	if (ret)
-		goto rpm_put;
+		return ret;
 
 	obj = i915_gem_object_lookup(file, args->handle);
 	if (!obj) {
@@ -3518,13 +3526,9 @@ int i915_gem_set_caching_ioctl(struct drm_device *dev, void *data,
 	}
 
 	ret = i915_gem_object_set_cache_level(obj, level);
-
 	i915_gem_object_put(obj);
 unlock:
 	mutex_unlock(&dev->struct_mutex);
-rpm_put:
-	intel_runtime_pm_put(dev_priv);
-
 	return ret;
 }
 
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 062fb0ad75da..33036359c170 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -2667,6 +2667,7 @@ static int ggtt_bind_vma(struct i915_vma *vma,
 			 enum i915_cache_level cache_level,
 			 u32 flags)
 {
+	struct drm_i915_private *i915 = to_i915(vma->vm->dev);
 	struct drm_i915_gem_object *obj = vma->obj;
 	u32 pte_flags = 0;
 	int ret;
@@ -2679,8 +2680,10 @@ static int ggtt_bind_vma(struct i915_vma *vma,
 	if (obj->gt_ro)
 		pte_flags |= PTE_READ_ONLY;
 
+	intel_runtime_pm_get(i915);
 	vma->vm->insert_entries(vma->vm, vma->pages, vma->node.start,
 				cache_level, pte_flags);
+	intel_runtime_pm_put(i915);
 
 	/*
 	 * Without aliasing PPGTT there's no difference between
@@ -2696,6 +2699,7 @@ static int aliasing_gtt_bind_vma(struct i915_vma *vma,
 				 enum i915_cache_level cache_level,
 				 u32 flags)
 {
+	struct drm_i915_private *i915 = to_i915(vma->vm->dev);
 	u32 pte_flags;
 	int ret;
 
@@ -2710,14 +2714,15 @@ static int aliasing_gtt_bind_vma(struct i915_vma *vma,
 
 
 	if (flags & I915_VMA_GLOBAL_BIND) {
+		intel_runtime_pm_get(i915);
 		vma->vm->insert_entries(vma->vm,
 					vma->pages, vma->node.start,
 					cache_level, pte_flags);
+		intel_runtime_pm_put(i915);
 	}
 
 	if (flags & I915_VMA_LOCAL_BIND) {
-		struct i915_hw_ppgtt *appgtt =
-			to_i915(vma->vm->dev)->mm.aliasing_ppgtt;
+		struct i915_hw_ppgtt *appgtt = i915->mm.aliasing_ppgtt;
 		appgtt->base.insert_entries(&appgtt->base,
 					    vma->pages, vma->node.start,
 					    cache_level, pte_flags);
@@ -2728,12 +2733,16 @@ static int aliasing_gtt_bind_vma(struct i915_vma *vma,
 
 static void ggtt_unbind_vma(struct i915_vma *vma)
 {
-	struct i915_hw_ppgtt *appgtt = to_i915(vma->vm->dev)->mm.aliasing_ppgtt;
+	struct drm_i915_private *i915 = to_i915(vma->vm->dev);
+	struct i915_hw_ppgtt *appgtt = i915->mm.aliasing_ppgtt;
 	const u64 size = min(vma->size, vma->node.size);
 
-	if (vma->flags & I915_VMA_GLOBAL_BIND)
+	if (vma->flags & I915_VMA_GLOBAL_BIND) {
+		intel_runtime_pm_get(i915);
 		vma->vm->clear_range(vma->vm,
 				     vma->node.start, size);
+		intel_runtime_pm_put(i915);
+	}
 
 	if (vma->flags & I915_VMA_LOCAL_BIND && appgtt)
 		appgtt->base.clear_range(&appgtt->base,
diff --git a/drivers/gpu/drm/i915/i915_gem_tiling.c b/drivers/gpu/drm/i915/i915_gem_tiling.c
index c21bc0068d20..71f80d2a487c 100644
--- a/drivers/gpu/drm/i915/i915_gem_tiling.c
+++ b/drivers/gpu/drm/i915/i915_gem_tiling.c
@@ -205,8 +205,6 @@ i915_gem_set_tiling(struct drm_device *dev, void *data,
 		return -EINVAL;
 	}
 
-	intel_runtime_pm_get(dev_priv);
-
 	mutex_lock(&dev->struct_mutex);
 	if (obj->pin_display || obj->framebuffer_references) {
 		err = -EBUSY;
@@ -302,8 +300,6 @@ i915_gem_set_tiling(struct drm_device *dev, void *data,
 	i915_gem_object_put(obj);
 	mutex_unlock(&dev->struct_mutex);
 
-	intel_runtime_pm_put(dev_priv);
-
 	return err;
 }
 
-- 
2.9.3

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [PATCH 3/5] drm/i915: Remove superfluous locking around userfault_list
  2016-10-20  7:51 RPM vs struct mutex fixes Chris Wilson
  2016-10-20  7:51 ` [PATCH 1/5] drm/i915: Move user fault tracking to a separate list Chris Wilson
  2016-10-20  7:51 ` [PATCH 2/5] drm/i915: Use RPM as the barrier for controlling user mmap access Chris Wilson
@ 2016-10-20  7:51 ` Chris Wilson
  2016-10-20  7:51 ` [PATCH 4/5] drm/i915: Remove RPM sequence checking Chris Wilson
                   ` (2 subsequent siblings)
  5 siblings, 0 replies; 7+ messages in thread
From: Chris Wilson @ 2016-10-20  7:51 UTC (permalink / raw)
  To: intel-gfx

Now that we have reduced the access to the list to either (a) under the
struct_mutex whilst holding the RPM wakeref (so that concurrent writers to
the list are serialised by struct_mutex) and (b) under the atomic
runtime suspend (which cannot run concurrently with any other accessor due
to the atomic nature of the runtime suspend) we can remove the extra
locking around the list itself.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
---
 drivers/gpu/drm/i915/i915_drv.h |  3 ---
 drivers/gpu/drm/i915/i915_gem.c | 33 ++++++++++++---------------------
 2 files changed, 12 insertions(+), 24 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 60a3d879840a..c083ed37572b 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1360,9 +1360,6 @@ struct i915_gem_mm {
 	 */
 	struct list_head unbound_list;
 
-	/** Protects access to the userfault_list */
-	spinlock_t userfault_lock;
-
 	/** List of all objects in gtt_space, currently mmaped by userspace.
 	 * All objects within this list must also be on bound_list.
 	 */
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 744939f652fc..64a88ce4b3c6 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -1842,10 +1842,8 @@ int i915_gem_fault(struct vm_area_struct *area, struct vm_fault *vmf)
 
 	/* Mark as being mmapped into userspace for later revocation */
 	assert_rpm_wakelock_held(dev_priv);
-	spin_lock(&dev_priv->mm.userfault_lock);
 	if (list_empty(&obj->userfault_link))
 		list_add(&obj->userfault_link, &dev_priv->mm.userfault_list);
-	spin_unlock(&dev_priv->mm.userfault_lock);
 
 	/* Finally, remap it using the new GTT offset */
 	ret = remap_io_mapping(area,
@@ -1922,7 +1920,6 @@ void
 i915_gem_release_mmap(struct drm_i915_gem_object *obj)
 {
 	struct drm_i915_private *i915 = to_i915(obj->base.dev);
-	bool zap = false;
 
 	/* Serialisation between user GTT access and our code depends upon
 	 * revoking the CPU's PTE whilst the mutex is held. The next user
@@ -1935,15 +1932,10 @@ i915_gem_release_mmap(struct drm_i915_gem_object *obj)
 	lockdep_assert_held(&i915->drm.struct_mutex);
 	intel_runtime_pm_get(i915);
 
-	spin_lock(&i915->mm.userfault_lock);
-	if (!list_empty(&obj->userfault_link)) {
-		list_del_init(&obj->userfault_link);
-		zap = true;
-	}
-	spin_unlock(&i915->mm.userfault_lock);
-	if (!zap)
+	if (list_empty(&obj->userfault_link))
 		goto out;
 
+	list_del_init(&obj->userfault_link);
 	drm_vma_node_unmap(&obj->base.vma_node,
 			   obj->base.dev->anon_inode->i_mapping);
 
@@ -1963,21 +1955,21 @@ i915_gem_release_mmap(struct drm_i915_gem_object *obj)
 void
 i915_gem_release_all_mmaps(struct drm_i915_private *dev_priv)
 {
-	struct drm_i915_gem_object *obj;
+	struct drm_i915_gem_object *obj, *on;
 
-	spin_lock(&dev_priv->mm.userfault_lock);
-	while ((obj = list_first_entry_or_null(&dev_priv->mm.userfault_list,
-					       struct drm_i915_gem_object,
-					       userfault_link))) {
-		list_del_init(&obj->userfault_link);
-		spin_unlock(&dev_priv->mm.userfault_lock);
+	/*
+	 * Only called during RPM suspend. All users of the userfault_list
+	 * must be holding an RPM wakeref to ensure that this can not
+	 * run concurrently with themselves (and use the struct_mutex for
+	 * protection between themselves).
+	 */
 
+	list_for_each_entry_safe(obj, on,
+				 &dev_priv->mm.userfault_list, userfault_link) {
+		list_del_init(&obj->userfault_link);
 		drm_vma_node_unmap(&obj->base.vma_node,
 				   obj->base.dev->anon_inode->i_mapping);
-
-		spin_lock(&dev_priv->mm.userfault_lock);
 	}
-	spin_unlock(&dev_priv->mm.userfault_lock);
 }
 
 /**
@@ -4547,7 +4539,6 @@ int i915_gem_init(struct drm_device *dev)
 	int ret;
 
 	mutex_lock(&dev->struct_mutex);
-	spin_lock_init(&dev_priv->mm.userfault_lock);
 
 	if (!i915.enable_execlists) {
 		dev_priv->gt.resume = intel_legacy_submission_resume;
-- 
2.9.3

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [PATCH 4/5] drm/i915: Remove RPM sequence checking
  2016-10-20  7:51 RPM vs struct mutex fixes Chris Wilson
                   ` (2 preceding siblings ...)
  2016-10-20  7:51 ` [PATCH 3/5] drm/i915: Remove superfluous locking around userfault_list Chris Wilson
@ 2016-10-20  7:51 ` Chris Wilson
  2016-10-20  7:51 ` [PATCH 5/5] drm/i915: Move fence cancellation to runtime suspend Chris Wilson
  2016-10-20  9:17 ` ✗ Fi.CI.BAT: warning for series starting with [1/5] drm/i915: Move user fault tracking to a separate list Patchwork
  5 siblings, 0 replies; 7+ messages in thread
From: Chris Wilson @ 2016-10-20  7:51 UTC (permalink / raw)
  To: intel-gfx

We only used the RPM sequence checking inside the lowlevel GTT
accessors, when we had to rely on callers taking the wakeref on our
behalf. Now that we take the RPM wakeref inside the GTT management
routines themselves, we can forgo the sanitycheck of the callers.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Imre Deak <imre.deak@intel.com>
---
 drivers/gpu/drm/i915/i915_drv.h         |  1 -
 drivers/gpu/drm/i915/i915_gem_gtt.c     | 55 +--------------------------------
 drivers/gpu/drm/i915/intel_drv.h        | 17 ----------
 drivers/gpu/drm/i915/intel_pm.c         |  1 -
 drivers/gpu/drm/i915/intel_runtime_pm.c |  3 +-
 5 files changed, 2 insertions(+), 75 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index c083ed37572b..c87074b8ef61 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1686,7 +1686,6 @@ struct skl_wm_level {
  */
 struct i915_runtime_pm {
 	atomic_t wakeref_count;
-	atomic_t atomic_seq;
 	bool suspended;
 	bool irqs_enabled;
 };
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 33036359c170..947d5ad51fb7 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -2395,16 +2395,11 @@ static void gen8_ggtt_insert_page(struct i915_address_space *vm,
 	gen8_pte_t __iomem *pte =
 		(gen8_pte_t __iomem *)dev_priv->ggtt.gsm +
 		(offset >> PAGE_SHIFT);
-	int rpm_atomic_seq;
-
-	rpm_atomic_seq = assert_rpm_atomic_begin(dev_priv);
 
 	gen8_set_pte(pte, gen8_pte_encode(addr, level));
 
 	I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN);
 	POSTING_READ(GFX_FLSH_CNTL_GEN6);
-
-	assert_rpm_atomic_end(dev_priv, rpm_atomic_seq);
 }
 
 static void gen8_ggtt_insert_entries(struct i915_address_space *vm,
@@ -2418,11 +2413,8 @@ static void gen8_ggtt_insert_entries(struct i915_address_space *vm,
 	gen8_pte_t __iomem *gtt_entries;
 	gen8_pte_t gtt_entry;
 	dma_addr_t addr;
-	int rpm_atomic_seq;
 	int i = 0;
 
-	rpm_atomic_seq = assert_rpm_atomic_begin(dev_priv);
-
 	gtt_entries = (gen8_pte_t __iomem *)ggtt->gsm + (start >> PAGE_SHIFT);
 
 	for_each_sgt_dma(addr, sgt_iter, st) {
@@ -2446,8 +2438,6 @@ static void gen8_ggtt_insert_entries(struct i915_address_space *vm,
 	 */
 	I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN);
 	POSTING_READ(GFX_FLSH_CNTL_GEN6);
-
-	assert_rpm_atomic_end(dev_priv, rpm_atomic_seq);
 }
 
 struct insert_entries {
@@ -2486,16 +2476,11 @@ static void gen6_ggtt_insert_page(struct i915_address_space *vm,
 	gen6_pte_t __iomem *pte =
 		(gen6_pte_t __iomem *)dev_priv->ggtt.gsm +
 		(offset >> PAGE_SHIFT);
-	int rpm_atomic_seq;
-
-	rpm_atomic_seq = assert_rpm_atomic_begin(dev_priv);
 
 	iowrite32(vm->pte_encode(addr, level, flags), pte);
 
 	I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN);
 	POSTING_READ(GFX_FLSH_CNTL_GEN6);
-
-	assert_rpm_atomic_end(dev_priv, rpm_atomic_seq);
 }
 
 /*
@@ -2515,11 +2500,8 @@ static void gen6_ggtt_insert_entries(struct i915_address_space *vm,
 	gen6_pte_t __iomem *gtt_entries;
 	gen6_pte_t gtt_entry;
 	dma_addr_t addr;
-	int rpm_atomic_seq;
 	int i = 0;
 
-	rpm_atomic_seq = assert_rpm_atomic_begin(dev_priv);
-
 	gtt_entries = (gen6_pte_t __iomem *)ggtt->gsm + (start >> PAGE_SHIFT);
 
 	for_each_sgt_dma(addr, sgt_iter, st) {
@@ -2542,8 +2524,6 @@ static void gen6_ggtt_insert_entries(struct i915_address_space *vm,
 	 */
 	I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN);
 	POSTING_READ(GFX_FLSH_CNTL_GEN6);
-
-	assert_rpm_atomic_end(dev_priv, rpm_atomic_seq);
 }
 
 static void nop_clear_range(struct i915_address_space *vm,
@@ -2554,7 +2534,6 @@ static void nop_clear_range(struct i915_address_space *vm,
 static void gen8_ggtt_clear_range(struct i915_address_space *vm,
 				  uint64_t start, uint64_t length)
 {
-	struct drm_i915_private *dev_priv = to_i915(vm->dev);
 	struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
 	unsigned first_entry = start >> PAGE_SHIFT;
 	unsigned num_entries = length >> PAGE_SHIFT;
@@ -2562,9 +2541,6 @@ static void gen8_ggtt_clear_range(struct i915_address_space *vm,
 		(gen8_pte_t __iomem *)ggtt->gsm + first_entry;
 	const int max_entries = ggtt_total_entries(ggtt) - first_entry;
 	int i;
-	int rpm_atomic_seq;
-
-	rpm_atomic_seq = assert_rpm_atomic_begin(dev_priv);
 
 	if (WARN(num_entries > max_entries,
 		 "First entry = %d; Num entries = %d (max=%d)\n",
@@ -2576,15 +2552,12 @@ static void gen8_ggtt_clear_range(struct i915_address_space *vm,
 	for (i = 0; i < num_entries; i++)
 		gen8_set_pte(&gtt_base[i], scratch_pte);
 	readl(gtt_base);
-
-	assert_rpm_atomic_end(dev_priv, rpm_atomic_seq);
 }
 
 static void gen6_ggtt_clear_range(struct i915_address_space *vm,
 				  uint64_t start,
 				  uint64_t length)
 {
-	struct drm_i915_private *dev_priv = to_i915(vm->dev);
 	struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
 	unsigned first_entry = start >> PAGE_SHIFT;
 	unsigned num_entries = length >> PAGE_SHIFT;
@@ -2592,9 +2565,6 @@ static void gen6_ggtt_clear_range(struct i915_address_space *vm,
 		(gen6_pte_t __iomem *)ggtt->gsm + first_entry;
 	const int max_entries = ggtt_total_entries(ggtt) - first_entry;
 	int i;
-	int rpm_atomic_seq;
-
-	rpm_atomic_seq = assert_rpm_atomic_begin(dev_priv);
 
 	if (WARN(num_entries > max_entries,
 		 "First entry = %d; Num entries = %d (max=%d)\n",
@@ -2607,8 +2577,6 @@ static void gen6_ggtt_clear_range(struct i915_address_space *vm,
 	for (i = 0; i < num_entries; i++)
 		iowrite32(scratch_pte, &gtt_base[i]);
 	readl(gtt_base);
-
-	assert_rpm_atomic_end(dev_priv, rpm_atomic_seq);
 }
 
 static void i915_ggtt_insert_page(struct i915_address_space *vm,
@@ -2617,16 +2585,10 @@ static void i915_ggtt_insert_page(struct i915_address_space *vm,
 				  enum i915_cache_level cache_level,
 				  u32 unused)
 {
-	struct drm_i915_private *dev_priv = to_i915(vm->dev);
 	unsigned int flags = (cache_level == I915_CACHE_NONE) ?
 		AGP_USER_MEMORY : AGP_USER_CACHED_MEMORY;
-	int rpm_atomic_seq;
-
-	rpm_atomic_seq = assert_rpm_atomic_begin(dev_priv);
 
 	intel_gtt_insert_page(addr, offset >> PAGE_SHIFT, flags);
-
-	assert_rpm_atomic_end(dev_priv, rpm_atomic_seq);
 }
 
 static void i915_ggtt_insert_entries(struct i915_address_space *vm,
@@ -2634,33 +2596,18 @@ static void i915_ggtt_insert_entries(struct i915_address_space *vm,
 				     uint64_t start,
 				     enum i915_cache_level cache_level, u32 unused)
 {
-	struct drm_i915_private *dev_priv = to_i915(vm->dev);
 	unsigned int flags = (cache_level == I915_CACHE_NONE) ?
 		AGP_USER_MEMORY : AGP_USER_CACHED_MEMORY;
-	int rpm_atomic_seq;
-
-	rpm_atomic_seq = assert_rpm_atomic_begin(dev_priv);
 
 	intel_gtt_insert_sg_entries(pages, start >> PAGE_SHIFT, flags);
 
-	assert_rpm_atomic_end(dev_priv, rpm_atomic_seq);
-
 }
 
 static void i915_ggtt_clear_range(struct i915_address_space *vm,
 				  uint64_t start,
 				  uint64_t length)
 {
-	struct drm_i915_private *dev_priv = to_i915(vm->dev);
-	unsigned first_entry = start >> PAGE_SHIFT;
-	unsigned num_entries = length >> PAGE_SHIFT;
-	int rpm_atomic_seq;
-
-	rpm_atomic_seq = assert_rpm_atomic_begin(dev_priv);
-
-	intel_gtt_clear_range(first_entry, num_entries);
-
-	assert_rpm_atomic_end(dev_priv, rpm_atomic_seq);
+	intel_gtt_clear_range(start >> PAGE_SHIFT, length >> PAGE_SHIFT);
 }
 
 static int ggtt_bind_vma(struct i915_vma *vma,
diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h
index c06a33e0ff19..95a7d3005a74 100644
--- a/drivers/gpu/drm/i915/intel_drv.h
+++ b/drivers/gpu/drm/i915/intel_drv.h
@@ -1664,23 +1664,6 @@ assert_rpm_wakelock_held(struct drm_i915_private *dev_priv)
 		DRM_DEBUG_DRIVER("RPM wakelock ref not held during HW access");
 }
 
-static inline int
-assert_rpm_atomic_begin(struct drm_i915_private *dev_priv)
-{
-	int seq = atomic_read(&dev_priv->pm.atomic_seq);
-
-	assert_rpm_wakelock_held(dev_priv);
-
-	return seq;
-}
-
-static inline void
-assert_rpm_atomic_end(struct drm_i915_private *dev_priv, int begin_seq)
-{
-	WARN_ONCE(atomic_read(&dev_priv->pm.atomic_seq) != begin_seq,
-		  "HW access outside of RPM atomic section\n");
-}
-
 /**
  * disable_rpm_wakeref_asserts - disable the RPM assert checks
  * @dev_priv: i915 device instance
diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
index 0a9e7f2045d4..f212b71f5bdb 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -8047,5 +8047,4 @@ void intel_pm_setup(struct drm_device *dev)
 
 	dev_priv->pm.suspended = false;
 	atomic_set(&dev_priv->pm.wakeref_count, 0);
-	atomic_set(&dev_priv->pm.atomic_seq, 0);
 }
diff --git a/drivers/gpu/drm/i915/intel_runtime_pm.c b/drivers/gpu/drm/i915/intel_runtime_pm.c
index ee56a8756c07..82edba2f3589 100644
--- a/drivers/gpu/drm/i915/intel_runtime_pm.c
+++ b/drivers/gpu/drm/i915/intel_runtime_pm.c
@@ -2736,8 +2736,7 @@ void intel_runtime_pm_put(struct drm_i915_private *dev_priv)
 	struct device *kdev = &pdev->dev;
 
 	assert_rpm_wakelock_held(dev_priv);
-	if (atomic_dec_and_test(&dev_priv->pm.wakeref_count))
-		atomic_inc(&dev_priv->pm.atomic_seq);
+	atomic_dec(&dev_priv->pm.wakeref_count);
 
 	pm_runtime_mark_last_busy(kdev);
 	pm_runtime_put_autosuspend(kdev);
-- 
2.9.3

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [PATCH 5/5] drm/i915: Move fence cancellation to runtime suspend
  2016-10-20  7:51 RPM vs struct mutex fixes Chris Wilson
                   ` (3 preceding siblings ...)
  2016-10-20  7:51 ` [PATCH 4/5] drm/i915: Remove RPM sequence checking Chris Wilson
@ 2016-10-20  7:51 ` Chris Wilson
  2016-10-20  9:17 ` ✗ Fi.CI.BAT: warning for series starting with [1/5] drm/i915: Move user fault tracking to a separate list Patchwork
  5 siblings, 0 replies; 7+ messages in thread
From: Chris Wilson @ 2016-10-20  7:51 UTC (permalink / raw)
  To: intel-gfx; +Cc: Daniel Vetter

At the moment, we have dependency on the RPM as a barrier itself in both
i915_gem_release_all_mmaps() and i915_gem_restore_fences().
i915_gem_restore_fences() is also called along !runtime pm paths, but we
can move the markup of lost fences alongside releasing the mmaps into a
common i915_gem_runtime_suspend(). This has the advantage of locating
all the tricky barrier dependencies into one location.

Suggested-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
---
 drivers/gpu/drm/i915/i915_drv.c       |  6 ++----
 drivers/gpu/drm/i915/i915_drv.h       |  3 ++-
 drivers/gpu/drm/i915/i915_gem.c       | 25 +++++++++++++++++++++++--
 drivers/gpu/drm/i915/i915_gem_fence.c | 12 +++++-------
 4 files changed, 32 insertions(+), 14 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index 885d33f341f3..99e4e044e958 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -2278,10 +2278,8 @@ static int vlv_resume_prepare(struct drm_i915_private *dev_priv,
 
 	vlv_check_no_gt_access(dev_priv);
 
-	if (rpm_resume) {
+	if (rpm_resume)
 		intel_init_clock_gating(dev);
-		i915_gem_restore_fences(dev);
-	}
 
 	return ret;
 }
@@ -2307,7 +2305,7 @@ static int intel_runtime_suspend(struct device *kdev)
 	 * We are safe here against re-faults, since the fault handler takes
 	 * an RPM reference.
 	 */
-	i915_gem_release_all_mmaps(dev_priv);
+	i915_gem_runtime_suspend(dev_priv);
 
 	intel_guc_suspend(dev);
 
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index c87074b8ef61..4505ece9e302 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -3129,9 +3129,10 @@ void i915_vma_destroy(struct i915_vma *vma);
 
 int i915_gem_object_unbind(struct drm_i915_gem_object *obj);
 int i915_gem_object_put_pages(struct drm_i915_gem_object *obj);
-void i915_gem_release_all_mmaps(struct drm_i915_private *dev_priv);
 void i915_gem_release_mmap(struct drm_i915_gem_object *obj);
 
+void i915_gem_runtime_suspend(struct drm_i915_private *dev_priv);
+
 int __must_check i915_gem_object_get_pages(struct drm_i915_gem_object *obj);
 
 static inline int __sg_page_count(struct scatterlist *sg)
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 64a88ce4b3c6..41088c31ada6 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -1952,10 +1952,10 @@ i915_gem_release_mmap(struct drm_i915_gem_object *obj)
 	intel_runtime_pm_put(i915);
 }
 
-void
-i915_gem_release_all_mmaps(struct drm_i915_private *dev_priv)
+void i915_gem_runtime_suspend(struct drm_i915_private *dev_priv)
 {
 	struct drm_i915_gem_object *obj, *on;
+	int i;
 
 	/*
 	 * Only called during RPM suspend. All users of the userfault_list
@@ -1970,6 +1970,27 @@ i915_gem_release_all_mmaps(struct drm_i915_private *dev_priv)
 		drm_vma_node_unmap(&obj->base.vma_node,
 				   obj->base.dev->anon_inode->i_mapping);
 	}
+
+	/* The fence will be lost when the device powers down. If any were
+	 * in use by hardware (i.e. they are pinned), we should not be powering
+	 * down! All other fences will be reacquired by the user upon waking.
+	 */
+	for (i = 0; i < dev_priv->num_fence_regs; i++) {
+		struct drm_i915_fence_reg *reg = &dev_priv->fence_regs[i];
+		struct i915_vma *vma;
+
+		if (WARN_ON(reg->pin_count))
+			continue;
+
+		vma = fetch_and_zero(&reg->vma);
+		if (!vma)
+			continue;
+
+		GEM_BUG_ON(!list_empty(&vma->obj->userfault_link));
+
+		list_move(&reg->link, &dev_priv->mm.fence_list);
+		vma->fence = NULL;
+	}
 }
 
 /**
diff --git a/drivers/gpu/drm/i915/i915_gem_fence.c b/drivers/gpu/drm/i915/i915_gem_fence.c
index 67013179b8ed..3c5a8082cac3 100644
--- a/drivers/gpu/drm/i915/i915_gem_fence.c
+++ b/drivers/gpu/drm/i915/i915_gem_fence.c
@@ -343,6 +343,9 @@ i915_vma_get_fence(struct i915_vma *vma)
 	struct drm_i915_fence_reg *fence;
 	struct i915_vma *set = i915_gem_object_is_tiled(vma->obj) ? vma : NULL;
 
+	/* Note that we revoke fences on runtime suspend. Therefore the user
+	 * must keep the device awake whilst using the fence.
+	 */
 	assert_rpm_wakelock_held(to_i915(vma->vm->dev));
 
 	/* Just update our place in the LRU if our fence is getting reused. */
@@ -368,19 +371,14 @@ i915_vma_get_fence(struct i915_vma *vma)
  * @dev: DRM device
  *
  * Restore the hw fence state to match the software tracking again, to be called
- * after a gpu reset and on resume.
+ * after a gpu reset and on resume. Note that on runtime suspend we only cancel
+ * the fences, to be reacquired by the user later.
  */
 void i915_gem_restore_fences(struct drm_device *dev)
 {
 	struct drm_i915_private *dev_priv = to_i915(dev);
 	int i;
 
-	/* Note that this may be called outside of struct_mutex, by
-	 * runtime suspend/resume. The barrier we require is enforced by
-	 * rpm itself - all access to fences/GTT are only within an rpm
-	 * wakeref, and to acquire that wakeref you must pass through here.
-	 */
-
 	for (i = 0; i < dev_priv->num_fence_regs; i++) {
 		struct drm_i915_fence_reg *reg = &dev_priv->fence_regs[i];
 		struct i915_vma *vma = reg->vma;
-- 
2.9.3

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 7+ messages in thread

* ✗ Fi.CI.BAT: warning for series starting with [1/5] drm/i915: Move user fault tracking to a separate list
  2016-10-20  7:51 RPM vs struct mutex fixes Chris Wilson
                   ` (4 preceding siblings ...)
  2016-10-20  7:51 ` [PATCH 5/5] drm/i915: Move fence cancellation to runtime suspend Chris Wilson
@ 2016-10-20  9:17 ` Patchwork
  5 siblings, 0 replies; 7+ messages in thread
From: Patchwork @ 2016-10-20  9:17 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

== Series Details ==

Series: series starting with [1/5] drm/i915: Move user fault tracking to a separate list
URL   : https://patchwork.freedesktop.org/series/14080/
State : warning

== Summary ==

Series 14080v1 Series without cover letter
https://patchwork.freedesktop.org/api/1.0/series/14080/revisions/1/mbox/

Test drv_module_reload_basic:
                pass       -> DMESG-WARN (fi-skl-6770hq)

fi-bdw-5557u     total:246  pass:231  dwarn:0   dfail:0   fail:0   skip:15 
fi-bsw-n3050     total:246  pass:204  dwarn:0   dfail:0   fail:0   skip:42 
fi-bxt-t5700     total:246  pass:216  dwarn:0   dfail:0   fail:0   skip:30 
fi-byt-j1900     total:246  pass:215  dwarn:0   dfail:0   fail:0   skip:31 
fi-byt-n2820     total:246  pass:211  dwarn:0   dfail:0   fail:0   skip:35 
fi-hsw-4770      total:246  pass:224  dwarn:0   dfail:0   fail:0   skip:22 
fi-hsw-4770r     total:246  pass:224  dwarn:0   dfail:0   fail:0   skip:22 
fi-ilk-650       total:246  pass:185  dwarn:0   dfail:0   fail:1   skip:60 
fi-ivb-3520m     total:246  pass:221  dwarn:0   dfail:0   fail:0   skip:25 
fi-ivb-3770      total:246  pass:221  dwarn:0   dfail:0   fail:0   skip:25 
fi-kbl-7200u     total:246  pass:222  dwarn:0   dfail:0   fail:0   skip:24 
fi-skl-6260u     total:246  pass:232  dwarn:0   dfail:0   fail:0   skip:14 
fi-skl-6700hq    total:246  pass:222  dwarn:1   dfail:0   fail:0   skip:23 
fi-skl-6700k     total:246  pass:221  dwarn:1   dfail:0   fail:0   skip:24 
fi-skl-6770hq    total:246  pass:231  dwarn:1   dfail:0   fail:0   skip:14 
fi-snb-2520m     total:246  pass:210  dwarn:0   dfail:0   fail:0   skip:36 
fi-snb-2600      total:246  pass:209  dwarn:0   dfail:0   fail:0   skip:37 

Results at /archive/results/CI_IGT_test/Patchwork_2772/

0b12b48da3bc580102665ee9efd7e834f6def00f drm-intel-nightly: 2016y-10m-20d-07h-52m-31s UTC integration manifest
852c2c5 drm/i915: Move fence cancellation to runtime suspend
29f36a3 drm/i915: Remove RPM sequence checking
3b789c0b drm/i915: Remove superfluous locking around userfault_list
95c7dc2 drm/i915: Use RPM as the barrier for controlling user mmap access
72207d3 drm/i915: Move user fault tracking to a separate list

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2016-10-20  9:17 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2016-10-20  7:51 RPM vs struct mutex fixes Chris Wilson
2016-10-20  7:51 ` [PATCH 1/5] drm/i915: Move user fault tracking to a separate list Chris Wilson
2016-10-20  7:51 ` [PATCH 2/5] drm/i915: Use RPM as the barrier for controlling user mmap access Chris Wilson
2016-10-20  7:51 ` [PATCH 3/5] drm/i915: Remove superfluous locking around userfault_list Chris Wilson
2016-10-20  7:51 ` [PATCH 4/5] drm/i915: Remove RPM sequence checking Chris Wilson
2016-10-20  7:51 ` [PATCH 5/5] drm/i915: Move fence cancellation to runtime suspend Chris Wilson
2016-10-20  9:17 ` ✗ Fi.CI.BAT: warning for series starting with [1/5] drm/i915: Move user fault tracking to a separate list Patchwork

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.