All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 01/46] drm/i915: Return immediately if trylock fails for direct-reclaim
@ 2019-01-07 11:54 Chris Wilson
  2019-01-07 11:54 ` [PATCH 02/46] drm/i915: Report the number of closed vma held by each context in debugfs Chris Wilson
                   ` (50 more replies)
  0 siblings, 51 replies; 111+ messages in thread
From: Chris Wilson @ 2019-01-07 11:54 UTC (permalink / raw)
  To: intel-gfx

Ignore trying to shrink from i915 if we fail to acquire the struct_mutex
in the shrinker while performing direct-reclaim. The trade-off being
(much) lower latency for non-i915 clients at an increased risk of being
unable to obtain a page from direct-reclaim without hitting the
oom-notifier. The proviso being that we still keep trying to hard
obtain the lock for kswapd so that we can reap under heavy memory
pressure.

v2: Taint all mutexes taken within the shrinker with the struct_mutex
subclass as an early warning system, and drop I915_SHRINK_ACTIVE from
vmap to reduce the number of dangerous paths. We also have to drop
I915_SHRINK_ACTIVE from oom-notifier to be able to make the same claim
that ACTIVE is only used from outside context, which fits in with a
longer strategy of avoiding stalls due to scanning active during
shrinking.

The danger in using the subclass struct_mutex is that we declare
ourselves more knowledgable than lockdep and deprive ourselves of
automatic coverage. Instead, we require ourselves to mark up any mutex
taken inside the shrinker in order to detect lock-inversion, and if we
miss any we are doomed to a deadlock at the worst possible moment.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 drivers/gpu/drm/i915/i915_drv.h          |  7 +--
 drivers/gpu/drm/i915/i915_gem_gtt.c      |  8 +--
 drivers/gpu/drm/i915/i915_gem_shrinker.c | 68 ++++++++++++++++--------
 3 files changed, 54 insertions(+), 29 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 7fa2a405c5fe..17a017645c5d 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2899,9 +2899,9 @@ i915_gem_object_unpin_pages(struct drm_i915_gem_object *obj)
 	__i915_gem_object_unpin_pages(obj);
 }
 
-enum i915_mm_subclass { /* lockdep subclass for obj->mm.lock */
+enum i915_mm_subclass { /* lockdep subclass for obj->mm.lock/struct_mutex */
 	I915_MM_NORMAL = 0,
-	I915_MM_SHRINKER
+	I915_MM_SHRINKER /* called "recursively" from direct-reclaim-esque */
 };
 
 void __i915_gem_object_put_pages(struct drm_i915_gem_object *obj,
@@ -3187,7 +3187,8 @@ unsigned long i915_gem_shrink(struct drm_i915_private *i915,
 unsigned long i915_gem_shrink_all(struct drm_i915_private *i915);
 void i915_gem_shrinker_register(struct drm_i915_private *i915);
 void i915_gem_shrinker_unregister(struct drm_i915_private *i915);
-void i915_gem_shrinker_taints_mutex(struct mutex *mutex);
+void i915_gem_shrinker_taints_mutex(struct drm_i915_private *i915,
+				    struct mutex *mutex);
 
 /* i915_gem_tiling.c */
 static inline bool i915_gem_object_needs_bit17_swizzle(struct drm_i915_gem_object *obj)
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index d4c5973ea33d..5cc8968eb3bf 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -483,7 +483,7 @@ static void i915_address_space_init(struct i915_address_space *vm,
 	 * attempt holding the lock is immediately reported by lockdep.
 	 */
 	mutex_init(&vm->mutex);
-	i915_gem_shrinker_taints_mutex(&vm->mutex);
+	i915_gem_shrinker_taints_mutex(vm->i915, &vm->mutex);
 
 	GEM_BUG_ON(!vm->total);
 	drm_mm_init(&vm->mm, 0, vm->total);
@@ -2245,7 +2245,8 @@ int i915_gem_gtt_prepare_pages(struct drm_i915_gem_object *obj,
 				     DMA_ATTR_NO_WARN))
 			return 0;
 
-		/* If the DMA remap fails, one cause can be that we have
+		/*
+		 * If the DMA remap fails, one cause can be that we have
 		 * too many objects pinned in a small remapping table,
 		 * such as swiotlb. Incrementally purge all other objects and
 		 * try again - if there are no more pages to remove from
@@ -2255,8 +2256,7 @@ int i915_gem_gtt_prepare_pages(struct drm_i915_gem_object *obj,
 	} while (i915_gem_shrink(to_i915(obj->base.dev),
 				 obj->base.size >> PAGE_SHIFT, NULL,
 				 I915_SHRINK_BOUND |
-				 I915_SHRINK_UNBOUND |
-				 I915_SHRINK_ACTIVE));
+				 I915_SHRINK_UNBOUND));
 
 	return -ENOSPC;
 }
diff --git a/drivers/gpu/drm/i915/i915_gem_shrinker.c b/drivers/gpu/drm/i915/i915_gem_shrinker.c
index ea90d3a0d511..72d6ea0cac7e 100644
--- a/drivers/gpu/drm/i915/i915_gem_shrinker.c
+++ b/drivers/gpu/drm/i915/i915_gem_shrinker.c
@@ -36,7 +36,9 @@
 #include "i915_drv.h"
 #include "i915_trace.h"
 
-static bool shrinker_lock(struct drm_i915_private *i915, bool *unlock)
+static bool shrinker_lock(struct drm_i915_private *i915,
+			  unsigned int flags,
+			  bool *unlock)
 {
 	switch (mutex_trylock_recursive(&i915->drm.struct_mutex)) {
 	case MUTEX_TRYLOCK_RECURSIVE:
@@ -45,15 +47,11 @@ static bool shrinker_lock(struct drm_i915_private *i915, bool *unlock)
 
 	case MUTEX_TRYLOCK_FAILED:
 		*unlock = false;
-		preempt_disable();
-		do {
-			cpu_relax();
-			if (mutex_trylock(&i915->drm.struct_mutex)) {
-				*unlock = true;
-				break;
-			}
-		} while (!need_resched());
-		preempt_enable();
+		if (flags & I915_SHRINK_ACTIVE) {
+			mutex_lock_nested(&i915->drm.struct_mutex,
+					  I915_MM_SHRINKER);
+			*unlock = true;
+		}
 		return *unlock;
 
 	case MUTEX_TRYLOCK_SUCCESS:
@@ -160,7 +158,7 @@ i915_gem_shrink(struct drm_i915_private *i915,
 	unsigned long scanned = 0;
 	bool unlock;
 
-	if (!shrinker_lock(i915, &unlock))
+	if (!shrinker_lock(i915, flags, &unlock))
 		return 0;
 
 	/*
@@ -357,7 +355,7 @@ i915_gem_shrinker_scan(struct shrinker *shrinker, struct shrink_control *sc)
 
 	sc->nr_scanned = 0;
 
-	if (!shrinker_lock(i915, &unlock))
+	if (!shrinker_lock(i915, 0, &unlock))
 		return SHRINK_STOP;
 
 	freed = i915_gem_shrink(i915,
@@ -397,7 +395,7 @@ shrinker_lock_uninterruptible(struct drm_i915_private *i915, bool *unlock,
 	do {
 		if (i915_gem_wait_for_idle(i915,
 					   0, MAX_SCHEDULE_TIMEOUT) == 0 &&
-		    shrinker_lock(i915, unlock))
+		    shrinker_lock(i915, 0, unlock))
 			break;
 
 		schedule_timeout_killable(1);
@@ -421,7 +419,11 @@ i915_gem_shrinker_oom(struct notifier_block *nb, unsigned long event, void *ptr)
 	struct drm_i915_gem_object *obj;
 	unsigned long unevictable, bound, unbound, freed_pages;
 
-	freed_pages = i915_gem_shrink_all(i915);
+	intel_runtime_pm_get(i915);
+	freed_pages = i915_gem_shrink(i915, -1UL, NULL,
+				      I915_SHRINK_BOUND |
+				      I915_SHRINK_UNBOUND);
+	intel_runtime_pm_put(i915);
 
 	/* Because we may be allocating inside our own driver, we cannot
 	 * assert that there are no objects with pinned pages that are not
@@ -447,10 +449,6 @@ i915_gem_shrinker_oom(struct notifier_block *nb, unsigned long event, void *ptr)
 		pr_info("Purging GPU memory, %lu pages freed, "
 			"%lu pages still pinned.\n",
 			freed_pages, unevictable);
-	if (unbound || bound)
-		pr_err("%lu and %lu pages still available in the "
-		       "bound and unbound GPU page lists.\n",
-		       bound, unbound);
 
 	*(unsigned long *)ptr += freed_pages;
 	return NOTIFY_DONE;
@@ -480,7 +478,6 @@ i915_gem_shrinker_vmap(struct notifier_block *nb, unsigned long event, void *ptr
 	freed_pages += i915_gem_shrink(i915, -1UL, NULL,
 				       I915_SHRINK_BOUND |
 				       I915_SHRINK_UNBOUND |
-				       I915_SHRINK_ACTIVE |
 				       I915_SHRINK_VMAPS);
 	intel_runtime_pm_put(i915);
 
@@ -533,13 +530,40 @@ void i915_gem_shrinker_unregister(struct drm_i915_private *i915)
 	unregister_shrinker(&i915->mm.shrinker);
 }
 
-void i915_gem_shrinker_taints_mutex(struct mutex *mutex)
+void i915_gem_shrinker_taints_mutex(struct drm_i915_private *i915,
+				    struct mutex *mutex)
 {
+	bool unlock = false;
+
 	if (!IS_ENABLED(CONFIG_LOCKDEP))
 		return;
 
+	if (!lockdep_is_held_type(&i915->drm.struct_mutex, -1)) {
+		mutex_acquire(&i915->drm.struct_mutex.dep_map,
+			      I915_MM_NORMAL, 0, _RET_IP_);
+		unlock = true;
+	}
+
 	fs_reclaim_acquire(GFP_KERNEL);
-	mutex_lock(mutex);
-	mutex_unlock(mutex);
+
+	/*
+	 * As we invariably rely on the struct_mutex within the shrinker,
+	 * but have a complicated recursion dance, taint all the mutexes used
+	 * within the shrinker with the struct_mutex. For completeness, we
+	 * taint with all subclass of struct_mutex, even though we should
+	 * only need tainting by I915_MM_NORMAL to catch possible ABBA
+	 * deadlocks from using struct_mutex inside @mutex.
+	 */
+	mutex_acquire(&i915->drm.struct_mutex.dep_map,
+		      I915_MM_SHRINKER, 0, _RET_IP_);
+
+	mutex_acquire(&mutex->dep_map, 0, 0, _RET_IP_);
+	mutex_release(&mutex->dep_map, 0, _RET_IP_);
+
+	mutex_release(&i915->drm.struct_mutex.dep_map, 0, _RET_IP_);
+
 	fs_reclaim_release(GFP_KERNEL);
+
+	if (unlock)
+		mutex_release(&i915->drm.struct_mutex.dep_map, 0, _RET_IP_);
 }
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 111+ messages in thread

* [PATCH 02/46] drm/i915: Report the number of closed vma held by each context in debugfs
  2019-01-07 11:54 [PATCH 01/46] drm/i915: Return immediately if trylock fails for direct-reclaim Chris Wilson
@ 2019-01-07 11:54 ` Chris Wilson
  2019-01-07 12:35   ` Mika Kuoppala
  2019-01-07 11:54 ` [PATCH 03/46] drm/i915: Track all held rpm wakerefs Chris Wilson
                   ` (49 subsequent siblings)
  50 siblings, 1 reply; 111+ messages in thread
From: Chris Wilson @ 2019-01-07 11:54 UTC (permalink / raw)
  To: intel-gfx

Include the total size of closed vma when reporting the per_ctx_stats of
debugfs/i915_gem_objects.

Whilst adjusting the context tracking, note that we can simply use our
list of contexts in i915->contexts rather than circumlocute via
dev->filelist and the per-file context idr, with the result that we can
show objects allocated to different vm (i.e. contexts within a file).

We change the output to show every context of each client, with its own
unique set of objects (for full-ppgtt machines, i.e. gen7+, for older
hardware all objects are in the global gtt and so can not be associated
with a single context). That should result in no loss of information,
and for gen7+, no duplication of active objects.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_debugfs.c | 124 +++++++++++-----------------
 1 file changed, 47 insertions(+), 77 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index 193823048f96..c77326a7d058 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -297,11 +297,12 @@ static int i915_gem_stolen_list_info(struct seq_file *m, void *data)
 }
 
 struct file_stats {
-	struct drm_i915_file_private *file_priv;
+	struct i915_address_space *vm;
 	unsigned long count;
 	u64 total, unbound;
 	u64 global, shared;
 	u64 active, inactive;
+	u64 closed;
 };
 
 static int per_file_stats(int id, void *ptr, void *data)
@@ -326,9 +327,7 @@ static int per_file_stats(int id, void *ptr, void *data)
 		if (i915_vma_is_ggtt(vma)) {
 			stats->global += vma->node.size;
 		} else {
-			struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vma->vm);
-
-			if (ppgtt->vm.file != stats->file_priv)
+			if (vma->vm != stats->vm)
 				continue;
 		}
 
@@ -336,6 +335,9 @@ static int per_file_stats(int id, void *ptr, void *data)
 			stats->active += vma->node.size;
 		else
 			stats->inactive += vma->node.size;
+
+		if (i915_vma_is_closed(vma))
+			stats->closed += vma->node.size;
 	}
 
 	return 0;
@@ -343,7 +345,7 @@ static int per_file_stats(int id, void *ptr, void *data)
 
 #define print_file_stats(m, name, stats) do { \
 	if (stats.count) \
-		seq_printf(m, "%s: %lu objects, %llu bytes (%llu active, %llu inactive, %llu global, %llu shared, %llu unbound)\n", \
+		seq_printf(m, "%s: %lu objects, %llu bytes (%llu active, %llu inactive, %llu global, %llu shared, %llu unbound, %llu closed)\n", \
 			   name, \
 			   stats.count, \
 			   stats.total, \
@@ -351,20 +353,19 @@ static int per_file_stats(int id, void *ptr, void *data)
 			   stats.inactive, \
 			   stats.global, \
 			   stats.shared, \
-			   stats.unbound); \
+			   stats.unbound, \
+			   stats.closed); \
 } while (0)
 
 static void print_batch_pool_stats(struct seq_file *m,
 				   struct drm_i915_private *dev_priv)
 {
 	struct drm_i915_gem_object *obj;
-	struct file_stats stats;
 	struct intel_engine_cs *engine;
+	struct file_stats stats = {};
 	enum intel_engine_id id;
 	int j;
 
-	memset(&stats, 0, sizeof(stats));
-
 	for_each_engine(engine, dev_priv, id) {
 		for (j = 0; j < ARRAY_SIZE(engine->batch_pool.cache_list); j++) {
 			list_for_each_entry(obj,
@@ -377,44 +378,47 @@ static void print_batch_pool_stats(struct seq_file *m,
 	print_file_stats(m, "[k]batch pool", stats);
 }
 
-static int per_file_ctx_stats(int idx, void *ptr, void *data)
+static void print_context_stats(struct seq_file *m,
+				struct drm_i915_private *i915)
 {
-	struct i915_gem_context *ctx = ptr;
-	struct intel_engine_cs *engine;
-	enum intel_engine_id id;
+	struct file_stats kstats = {};
+	struct i915_gem_context *ctx;
 
-	for_each_engine(engine, ctx->i915, id) {
-		struct intel_context *ce = to_intel_context(ctx, engine);
+	list_for_each_entry(ctx, &i915->contexts.list, link) {
+		struct intel_engine_cs *engine;
+		enum intel_engine_id id;
 
-		if (ce->state)
-			per_file_stats(0, ce->state->obj, data);
-		if (ce->ring)
-			per_file_stats(0, ce->ring->vma->obj, data);
-	}
+		for_each_engine(engine, i915, id) {
+			struct intel_context *ce = to_intel_context(ctx, engine);
 
-	return 0;
-}
+			if (ce->state)
+				per_file_stats(0, ce->state->obj, &kstats);
+			if (ce->ring)
+				per_file_stats(0, ce->ring->vma->obj, &kstats);
+		}
 
-static void print_context_stats(struct seq_file *m,
-				struct drm_i915_private *dev_priv)
-{
-	struct drm_device *dev = &dev_priv->drm;
-	struct file_stats stats;
-	struct drm_file *file;
+		if (!IS_ERR_OR_NULL(ctx->file_priv)) {
+			struct file_stats stats = { .vm = &ctx->ppgtt->vm, };
+			struct drm_file *file = ctx->file_priv->file;
+			struct task_struct *task;
+			char name[80];
 
-	memset(&stats, 0, sizeof(stats));
+			spin_lock(&file->table_lock);
+			idr_for_each(&file->object_idr, per_file_stats, &stats);
+			spin_unlock(&file->table_lock);
 
-	mutex_lock(&dev->struct_mutex);
-	if (dev_priv->kernel_context)
-		per_file_ctx_stats(0, dev_priv->kernel_context, &stats);
+			rcu_read_lock();
+			task = pid_task(ctx->pid ?: file->pid, PIDTYPE_PID);
+			snprintf(name, sizeof(name), "%s/%d",
+				 task ? task->comm : "<unknown>",
+				 ctx->user_handle);
+			rcu_read_unlock();
 
-	list_for_each_entry(file, &dev->filelist, lhead) {
-		struct drm_i915_file_private *fpriv = file->driver_priv;
-		idr_for_each(&fpriv->context_idr, per_file_ctx_stats, &stats);
+			print_file_stats(m, name, stats);
+		}
 	}
-	mutex_unlock(&dev->struct_mutex);
 
-	print_file_stats(m, "[k]contexts", stats);
+	print_file_stats(m, "[k]contexts", kstats);
 }
 
 static int i915_gem_object_info(struct seq_file *m, void *data)
@@ -426,14 +430,9 @@ static int i915_gem_object_info(struct seq_file *m, void *data)
 	u64 size, mapped_size, purgeable_size, dpy_size, huge_size;
 	struct drm_i915_gem_object *obj;
 	unsigned int page_sizes = 0;
-	struct drm_file *file;
 	char buf[80];
 	int ret;
 
-	ret = mutex_lock_interruptible(&dev->struct_mutex);
-	if (ret)
-		return ret;
-
 	seq_printf(m, "%u objects, %llu bytes\n",
 		   dev_priv->mm.object_count,
 		   dev_priv->mm.object_memory);
@@ -514,43 +513,14 @@ static int i915_gem_object_info(struct seq_file *m, void *data)
 					buf, sizeof(buf)));
 
 	seq_putc(m, '\n');
-	print_batch_pool_stats(m, dev_priv);
-	mutex_unlock(&dev->struct_mutex);
-
-	mutex_lock(&dev->filelist_mutex);
-	print_context_stats(m, dev_priv);
-	list_for_each_entry_reverse(file, &dev->filelist, lhead) {
-		struct file_stats stats;
-		struct drm_i915_file_private *file_priv = file->driver_priv;
-		struct i915_request *request;
-		struct task_struct *task;
-
-		mutex_lock(&dev->struct_mutex);
 
-		memset(&stats, 0, sizeof(stats));
-		stats.file_priv = file->driver_priv;
-		spin_lock(&file->table_lock);
-		idr_for_each(&file->object_idr, per_file_stats, &stats);
-		spin_unlock(&file->table_lock);
-		/*
-		 * Although we have a valid reference on file->pid, that does
-		 * not guarantee that the task_struct who called get_pid() is
-		 * still alive (e.g. get_pid(current) => fork() => exit()).
-		 * Therefore, we need to protect this ->comm access using RCU.
-		 */
-		request = list_first_entry_or_null(&file_priv->mm.request_list,
-						   struct i915_request,
-						   client_link);
-		rcu_read_lock();
-		task = pid_task(request && request->gem_context->pid ?
-				request->gem_context->pid : file->pid,
-				PIDTYPE_PID);
-		print_file_stats(m, task ? task->comm : "<unknown>", stats);
-		rcu_read_unlock();
+	ret = mutex_lock_interruptible(&dev->struct_mutex);
+	if (ret)
+		return ret;
 
-		mutex_unlock(&dev->struct_mutex);
-	}
-	mutex_unlock(&dev->filelist_mutex);
+	print_batch_pool_stats(m, dev_priv);
+	print_context_stats(m, dev_priv);
+	mutex_unlock(&dev->struct_mutex);
 
 	return 0;
 }
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 111+ messages in thread

* [PATCH 03/46] drm/i915: Track all held rpm wakerefs
  2019-01-07 11:54 [PATCH 01/46] drm/i915: Return immediately if trylock fails for direct-reclaim Chris Wilson
  2019-01-07 11:54 ` [PATCH 02/46] drm/i915: Report the number of closed vma held by each context in debugfs Chris Wilson
@ 2019-01-07 11:54 ` Chris Wilson
  2019-01-07 13:14   ` Mika Kuoppala
                     ` (2 more replies)
  2019-01-07 11:54 ` [PATCH 04/46] drm/i915: Markup paired operations on wakerefs Chris Wilson
                   ` (48 subsequent siblings)
  50 siblings, 3 replies; 111+ messages in thread
From: Chris Wilson @ 2019-01-07 11:54 UTC (permalink / raw)
  To: intel-gfx; +Cc: Jani Nikula

Everytime we take a wakeref, record the stack trace of where it was
taken; clearing the set if we ever drop back to no owners. For debugging
a rpm leak, we can look at all the current wakerefs and check if they
have a matching rpm_put.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Jani Nikula <jani.nikula@intel.com>
---
 drivers/gpu/drm/i915/Kconfig.debug            |   2 +-
 drivers/gpu/drm/i915/i915_debugfs.c           |   6 +
 drivers/gpu/drm/i915/i915_drv.c               |   8 +-
 drivers/gpu/drm/i915/i915_drv.h               |   7 +
 drivers/gpu/drm/i915/intel_drv.h              |  44 ++-
 drivers/gpu/drm/i915/intel_runtime_pm.c       | 267 ++++++++++++++++--
 .../gpu/drm/i915/selftests/mock_gem_device.c  |   8 +-
 7 files changed, 292 insertions(+), 50 deletions(-)

diff --git a/drivers/gpu/drm/i915/Kconfig.debug b/drivers/gpu/drm/i915/Kconfig.debug
index 9e36ffb5eb7c..a97929c47466 100644
--- a/drivers/gpu/drm/i915/Kconfig.debug
+++ b/drivers/gpu/drm/i915/Kconfig.debug
@@ -21,11 +21,11 @@ config DRM_I915_DEBUG
         select DEBUG_FS
         select PREEMPT_COUNT
         select I2C_CHARDEV
+        select STACKDEPOT
         select DRM_DP_AUX_CHARDEV
         select X86_MSR # used by igt/pm_rpm
         select DRM_VGEM # used by igt/prime_vgem (dmabuf interop checks)
         select DRM_DEBUG_MM if DRM=y
-        select STACKDEPOT if DRM=y # for DRM_DEBUG_MM
 	select DRM_DEBUG_SELFTEST
 	select SW_SYNC # signaling validation framework (igt/syncobj*)
 	select DRM_I915_SW_FENCE_DEBUG_OBJECTS
diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index c77326a7d058..3a369245d7e6 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -2702,6 +2702,12 @@ static int i915_runtime_pm_status(struct seq_file *m, void *unused)
 		   pci_power_name(pdev->current_state),
 		   pdev->current_state);
 
+	if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_RUNTIME_PM)) {
+		struct drm_printer p = drm_seq_file_printer(m);
+
+		print_intel_runtime_pm_wakeref(dev_priv, &p);
+	}
+
 	return 0;
 }
 
diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index 17fca3ba343e..e2f4753ca21f 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -906,6 +906,7 @@ static int i915_driver_init_early(struct drm_i915_private *dev_priv)
 	mutex_init(&dev_priv->pps_mutex);
 
 	i915_memcpy_init_early(dev_priv);
+	intel_runtime_pm_init_early(dev_priv);
 
 	ret = i915_workqueues_init(dev_priv);
 	if (ret < 0)
@@ -1808,8 +1809,7 @@ void i915_driver_unload(struct drm_device *dev)
 	i915_driver_cleanup_mmio(dev_priv);
 
 	enable_rpm_wakeref_asserts(dev_priv);
-
-	WARN_ON(atomic_read(&dev_priv->runtime_pm.wakeref_count));
+	intel_runtime_pm_cleanup(dev_priv);
 }
 
 static void i915_driver_release(struct drm_device *dev)
@@ -2011,6 +2011,8 @@ static int i915_drm_suspend_late(struct drm_device *dev, bool hibernation)
 
 out:
 	enable_rpm_wakeref_asserts(dev_priv);
+	if (!dev_priv->uncore.user_forcewake.count)
+		intel_runtime_pm_cleanup(dev_priv);
 
 	return ret;
 }
@@ -2966,7 +2968,7 @@ static int intel_runtime_suspend(struct device *kdev)
 	}
 
 	enable_rpm_wakeref_asserts(dev_priv);
-	WARN_ON_ONCE(atomic_read(&dev_priv->runtime_pm.wakeref_count));
+	intel_runtime_pm_cleanup(dev_priv);
 
 	if (intel_uncore_arm_unclaimed_mmio_detection(dev_priv))
 		DRM_ERROR("Unclaimed access detected prior to suspending\n");
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 17a017645c5d..60b98103aba3 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -45,6 +45,7 @@
 #include <linux/pm_qos.h>
 #include <linux/reservation.h>
 #include <linux/shmem_fs.h>
+#include <linux/stackdepot.h>
 
 #include <drm/drmP.h>
 #include <drm/intel-gtt.h>
@@ -1156,6 +1157,12 @@ struct i915_runtime_pm {
 	atomic_t wakeref_count;
 	bool suspended;
 	bool irqs_enabled;
+
+#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_RUNTIME_PM)
+	spinlock_t debug_lock;
+	depot_stack_handle_t *debug_owners;
+	unsigned long debug_count;
+#endif
 };
 
 enum intel_pipe_crc_source {
diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h
index 1a11c2beb7f3..ac513fd70315 100644
--- a/drivers/gpu/drm/i915/intel_drv.h
+++ b/drivers/gpu/drm/i915/intel_drv.h
@@ -41,6 +41,8 @@
 #include <drm/drm_atomic.h>
 #include <media/cec-notifier.h>
 
+struct drm_printer;
+
 /**
  * __wait_for - magic wait macro
  *
@@ -2084,6 +2086,7 @@ bool intel_psr_enabled(struct intel_dp *intel_dp);
 void intel_init_quirks(struct drm_i915_private *dev_priv);
 
 /* intel_runtime_pm.c */
+void intel_runtime_pm_init_early(struct drm_i915_private *dev_priv);
 int intel_power_domains_init(struct drm_i915_private *);
 void intel_power_domains_cleanup(struct drm_i915_private *dev_priv);
 void intel_power_domains_init_hw(struct drm_i915_private *dev_priv, bool resume);
@@ -2106,6 +2109,7 @@ void bxt_display_core_init(struct drm_i915_private *dev_priv, bool resume);
 void bxt_display_core_uninit(struct drm_i915_private *dev_priv);
 void intel_runtime_pm_enable(struct drm_i915_private *dev_priv);
 void intel_runtime_pm_disable(struct drm_i915_private *dev_priv);
+void intel_runtime_pm_cleanup(struct drm_i915_private *dev_priv);
 const char *
 intel_display_power_domain_str(enum intel_display_power_domain domain);
 
@@ -2123,23 +2127,23 @@ void icl_dbuf_slices_update(struct drm_i915_private *dev_priv,
 			    u8 req_slices);
 
 static inline void
-assert_rpm_device_not_suspended(struct drm_i915_private *dev_priv)
+assert_rpm_device_not_suspended(struct drm_i915_private *i915)
 {
-	WARN_ONCE(dev_priv->runtime_pm.suspended,
+	WARN_ONCE(i915->runtime_pm.suspended,
 		  "Device suspended during HW access\n");
 }
 
 static inline void
-assert_rpm_wakelock_held(struct drm_i915_private *dev_priv)
+assert_rpm_wakelock_held(struct drm_i915_private *i915)
 {
-	assert_rpm_device_not_suspended(dev_priv);
-	WARN_ONCE(!atomic_read(&dev_priv->runtime_pm.wakeref_count),
+	assert_rpm_device_not_suspended(i915);
+	WARN_ONCE(!atomic_read(&i915->runtime_pm.wakeref_count),
 		  "RPM wakelock ref not held during HW access");
 }
 
 /**
  * disable_rpm_wakeref_asserts - disable the RPM assert checks
- * @dev_priv: i915 device instance
+ * @i915: i915 device instance
  *
  * This function disable asserts that check if we hold an RPM wakelock
  * reference, while keeping the device-not-suspended checks still enabled.
@@ -2156,14 +2160,14 @@ assert_rpm_wakelock_held(struct drm_i915_private *dev_priv)
  * enable_rpm_wakeref_asserts().
  */
 static inline void
-disable_rpm_wakeref_asserts(struct drm_i915_private *dev_priv)
+disable_rpm_wakeref_asserts(struct drm_i915_private *i915)
 {
-	atomic_inc(&dev_priv->runtime_pm.wakeref_count);
+	atomic_inc(&i915->runtime_pm.wakeref_count);
 }
 
 /**
  * enable_rpm_wakeref_asserts - re-enable the RPM assert checks
- * @dev_priv: i915 device instance
+ * @i915: i915 device instance
  *
  * This function re-enables the RPM assert checks after disabling them with
  * disable_rpm_wakeref_asserts. It's meant to be used only in special
@@ -2173,15 +2177,25 @@ disable_rpm_wakeref_asserts(struct drm_i915_private *dev_priv)
  * disable_rpm_wakeref_asserts().
  */
 static inline void
-enable_rpm_wakeref_asserts(struct drm_i915_private *dev_priv)
+enable_rpm_wakeref_asserts(struct drm_i915_private *i915)
 {
-	atomic_dec(&dev_priv->runtime_pm.wakeref_count);
+	atomic_dec(&i915->runtime_pm.wakeref_count);
 }
 
-void intel_runtime_pm_get(struct drm_i915_private *dev_priv);
-bool intel_runtime_pm_get_if_in_use(struct drm_i915_private *dev_priv);
-void intel_runtime_pm_get_noresume(struct drm_i915_private *dev_priv);
-void intel_runtime_pm_put(struct drm_i915_private *dev_priv);
+void intel_runtime_pm_get(struct drm_i915_private *i915);
+bool intel_runtime_pm_get_if_in_use(struct drm_i915_private *i915);
+void intel_runtime_pm_get_noresume(struct drm_i915_private *i915);
+void intel_runtime_pm_put(struct drm_i915_private *i915);
+
+#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_RUNTIME_PM)
+void print_intel_runtime_pm_wakeref(struct drm_i915_private *i915,
+				    struct drm_printer *p);
+#else
+static inline void print_intel_runtime_pm_wakeref(struct drm_i915_private *i915,
+						  struct drm_printer *p)
+{
+}
+#endif
 
 void chv_phy_powergate_lanes(struct intel_encoder *encoder,
 			     bool override, unsigned int mask);
diff --git a/drivers/gpu/drm/i915/intel_runtime_pm.c b/drivers/gpu/drm/i915/intel_runtime_pm.c
index 9e9501f82f06..67d71cc604f1 100644
--- a/drivers/gpu/drm/i915/intel_runtime_pm.c
+++ b/drivers/gpu/drm/i915/intel_runtime_pm.c
@@ -29,6 +29,8 @@
 #include <linux/pm_runtime.h>
 #include <linux/vgaarb.h>
 
+#include <drm/drm_print.h>
+
 #include "i915_drv.h"
 #include "intel_drv.h"
 
@@ -49,6 +51,189 @@
  * present for a given platform.
  */
 
+#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_RUNTIME_PM)
+
+#include <linux/sort.h>
+
+#define STACKDEPTH 8
+
+static void init_intel_runtime_pm_wakeref(struct drm_i915_private *i915)
+{
+	spin_lock_init(&i915->runtime_pm.debug_lock);
+}
+
+static noinline void
+track_intel_runtime_pm_wakeref(struct drm_i915_private *i915)
+{
+	struct i915_runtime_pm *rpm = &i915->runtime_pm;
+	unsigned long entries[STACKDEPTH];
+	struct stack_trace trace = {
+		.entries = entries,
+		.max_entries = ARRAY_SIZE(entries),
+		.skip = 1
+	};
+	unsigned long flags;
+	depot_stack_handle_t stack, *stacks;
+
+	if (!HAS_RUNTIME_PM(i915))
+		return;
+
+	save_stack_trace(&trace);
+	if (trace.nr_entries &&
+	    trace.entries[trace.nr_entries - 1] == ULONG_MAX)
+		trace.nr_entries--;
+
+	stack = depot_save_stack(&trace, GFP_NOWAIT | __GFP_NOWARN);
+	if (!stack)
+		return;
+
+	spin_lock_irqsave(&rpm->debug_lock, flags);
+	stacks = krealloc(rpm->debug_owners,
+			  (rpm->debug_count + 1) * sizeof(*stacks),
+			  GFP_NOWAIT | __GFP_NOWARN);
+	if (stacks) {
+		stacks[rpm->debug_count++] = stack;
+		rpm->debug_owners = stacks;
+	}
+	spin_unlock_irqrestore(&rpm->debug_lock, flags);
+}
+
+static void untrack_intel_runtime_pm_wakeref(struct drm_i915_private *i915)
+{
+	struct i915_runtime_pm *rpm = &i915->runtime_pm;
+	depot_stack_handle_t *stacks;
+	unsigned long flags;
+
+	spin_lock_irqsave(&rpm->debug_lock, flags);
+	stacks = fetch_and_zero(&rpm->debug_owners);
+	rpm->debug_count = 0;
+	spin_unlock_irqrestore(&rpm->debug_lock, flags);
+
+	kfree(stacks);
+}
+
+static int cmphandle(const void *_a, const void *_b)
+{
+	const depot_stack_handle_t * const a = _a, * const b = _b;
+
+	if (*a < *b)
+		return -1;
+	else if (*a > *b)
+		return 1;
+	else
+		return 0;
+}
+
+static void __print_intel_runtime_pm_wakeref(struct drm_printer *p,
+					     depot_stack_handle_t *stacks,
+					     unsigned long count)
+{
+	unsigned long entries[STACKDEPTH];
+	unsigned long i;
+	char *buf;
+
+	drm_printf(p, "Wakeref count: %lu\n", count);
+
+	buf = kmalloc(PAGE_SIZE, GFP_KERNEL);
+	if (!buf)
+		return;
+
+	sort(stacks, count, sizeof(*stacks), cmphandle, NULL);
+
+	for (i = 0; i < count; i++) {
+		struct stack_trace trace = {
+			.entries = entries,
+			.max_entries = ARRAY_SIZE(entries),
+		};
+		depot_stack_handle_t stack = stacks[i];
+		unsigned long rep;
+
+		rep = 1;
+		while (i + 1 < count && stacks[i + 1] == stack)
+			rep++, i++;
+		depot_fetch_stack(stack, &trace);
+		snprint_stack_trace(buf, PAGE_SIZE, &trace, 2);
+		drm_printf(p, "Wakeref x%lu taken at:\n%s", rep, buf);
+	}
+
+	kfree(buf);
+}
+
+static void show_intel_runtime_pm_wakeref(struct drm_i915_private *i915)
+{
+	struct i915_runtime_pm *rpm = &i915->runtime_pm;
+	depot_stack_handle_t *stacks;
+	unsigned long flags, count;
+	struct drm_printer p;
+
+	spin_lock_irqsave(&rpm->debug_lock, flags);
+	stacks = fetch_and_zero(&rpm->debug_owners);
+	count = fetch_and_zero(&rpm->debug_count);
+	spin_unlock_irqrestore(&rpm->debug_lock, flags);
+	if (!count)
+		return;
+
+	p = drm_debug_printer("i915");
+	__print_intel_runtime_pm_wakeref(&p, stacks, count);
+
+	kfree(stacks);
+}
+
+void print_intel_runtime_pm_wakeref(struct drm_i915_private *i915,
+				    struct drm_printer *p)
+{
+	depot_stack_handle_t *stacks = NULL;
+	unsigned long count = 0;
+
+	do {
+		struct i915_runtime_pm *rpm = &i915->runtime_pm;
+		unsigned long alloc = count;
+		depot_stack_handle_t *s;
+
+		spin_lock_irq(&rpm->debug_lock);
+		count = rpm->debug_count;
+		if (count == alloc) {
+			memcpy(stacks,
+			       rpm->debug_owners,
+			       count * sizeof(*stacks));
+		}
+		spin_unlock_irq(&rpm->debug_lock);
+		if (count == alloc)
+			break;
+
+		s = krealloc(stacks, count * sizeof(*stacks), GFP_KERNEL);
+		if (!s)
+			goto out;
+
+		stacks = s;
+	} while (1);
+
+	__print_intel_runtime_pm_wakeref(p, stacks, count);
+
+out:
+	kfree(stacks);
+}
+
+#else
+
+static void init_intel_runtime_pm_wakeref(struct drm_i915_private *i915)
+{
+}
+
+static void track_intel_runtime_pm_wakeref(struct drm_i915_private *i915)
+{
+}
+
+static void untrack_intel_runtime_pm_wakeref(struct drm_i915_private *i915)
+{
+}
+
+static void show_intel_runtime_pm_wakeref(struct drm_i915_private *i915)
+{
+}
+
+#endif
+
 bool intel_display_power_well_is_enabled(struct drm_i915_private *dev_priv,
 					 enum i915_power_well_id power_well_id);
 
@@ -3986,7 +4171,7 @@ static void intel_power_domains_verify_state(struct drm_i915_private *dev_priv)
 
 /**
  * intel_runtime_pm_get - grab a runtime pm reference
- * @dev_priv: i915 device instance
+ * @i915: i915 device instance
  *
  * This function grabs a device-level runtime pm reference (mostly used for GEM
  * code to ensure the GTT or GT is on) and ensures that it is powered up.
@@ -3994,22 +4179,24 @@ static void intel_power_domains_verify_state(struct drm_i915_private *dev_priv)
  * Any runtime pm reference obtained by this function must have a symmetric
  * call to intel_runtime_pm_put() to release the reference again.
  */
-void intel_runtime_pm_get(struct drm_i915_private *dev_priv)
+void intel_runtime_pm_get(struct drm_i915_private *i915)
 {
-	struct pci_dev *pdev = dev_priv->drm.pdev;
+	struct pci_dev *pdev = i915->drm.pdev;
 	struct device *kdev = &pdev->dev;
 	int ret;
 
 	ret = pm_runtime_get_sync(kdev);
 	WARN_ONCE(ret < 0, "pm_runtime_get_sync() failed: %d\n", ret);
 
-	atomic_inc(&dev_priv->runtime_pm.wakeref_count);
-	assert_rpm_wakelock_held(dev_priv);
+	atomic_inc(&i915->runtime_pm.wakeref_count);
+	assert_rpm_wakelock_held(i915);
+
+	track_intel_runtime_pm_wakeref(i915);
 }
 
 /**
  * intel_runtime_pm_get_if_in_use - grab a runtime pm reference if device in use
- * @dev_priv: i915 device instance
+ * @i915: i915 device instance
  *
  * This function grabs a device-level runtime pm reference if the device is
  * already in use and ensures that it is powered up. It is illegal to try
@@ -4020,10 +4207,10 @@ void intel_runtime_pm_get(struct drm_i915_private *dev_priv)
  *
  * Returns: True if the wakeref was acquired, or False otherwise.
  */
-bool intel_runtime_pm_get_if_in_use(struct drm_i915_private *dev_priv)
+bool intel_runtime_pm_get_if_in_use(struct drm_i915_private *i915)
 {
 	if (IS_ENABLED(CONFIG_PM)) {
-		struct pci_dev *pdev = dev_priv->drm.pdev;
+		struct pci_dev *pdev = i915->drm.pdev;
 		struct device *kdev = &pdev->dev;
 
 		/*
@@ -4036,15 +4223,17 @@ bool intel_runtime_pm_get_if_in_use(struct drm_i915_private *dev_priv)
 			return false;
 	}
 
-	atomic_inc(&dev_priv->runtime_pm.wakeref_count);
-	assert_rpm_wakelock_held(dev_priv);
+	atomic_inc(&i915->runtime_pm.wakeref_count);
+	assert_rpm_wakelock_held(i915);
+
+	track_intel_runtime_pm_wakeref(i915);
 
 	return true;
 }
 
 /**
  * intel_runtime_pm_get_noresume - grab a runtime pm reference
- * @dev_priv: i915 device instance
+ * @i915: i915 device instance
  *
  * This function grabs a device-level runtime pm reference (mostly used for GEM
  * code to ensure the GTT or GT is on).
@@ -4059,32 +4248,35 @@ bool intel_runtime_pm_get_if_in_use(struct drm_i915_private *dev_priv)
  * Any runtime pm reference obtained by this function must have a symmetric
  * call to intel_runtime_pm_put() to release the reference again.
  */
-void intel_runtime_pm_get_noresume(struct drm_i915_private *dev_priv)
+void intel_runtime_pm_get_noresume(struct drm_i915_private *i915)
 {
-	struct pci_dev *pdev = dev_priv->drm.pdev;
+	struct pci_dev *pdev = i915->drm.pdev;
 	struct device *kdev = &pdev->dev;
 
-	assert_rpm_wakelock_held(dev_priv);
+	assert_rpm_wakelock_held(i915);
 	pm_runtime_get_noresume(kdev);
 
-	atomic_inc(&dev_priv->runtime_pm.wakeref_count);
+	atomic_inc(&i915->runtime_pm.wakeref_count);
+
+	track_intel_runtime_pm_wakeref(i915);
 }
 
 /**
  * intel_runtime_pm_put - release a runtime pm reference
- * @dev_priv: i915 device instance
+ * @i915: i915 device instance
  *
  * This function drops the device-level runtime pm reference obtained by
  * intel_runtime_pm_get() and might power down the corresponding
  * hardware block right away if this is the last reference.
  */
-void intel_runtime_pm_put(struct drm_i915_private *dev_priv)
+void intel_runtime_pm_put(struct drm_i915_private *i915)
 {
-	struct pci_dev *pdev = dev_priv->drm.pdev;
+	struct pci_dev *pdev = i915->drm.pdev;
 	struct device *kdev = &pdev->dev;
 
-	assert_rpm_wakelock_held(dev_priv);
-	atomic_dec(&dev_priv->runtime_pm.wakeref_count);
+	assert_rpm_wakelock_held(i915);
+	if (atomic_dec_and_test(&i915->runtime_pm.wakeref_count))
+		untrack_intel_runtime_pm_wakeref(i915);
 
 	pm_runtime_mark_last_busy(kdev);
 	pm_runtime_put_autosuspend(kdev);
@@ -4092,7 +4284,7 @@ void intel_runtime_pm_put(struct drm_i915_private *dev_priv)
 
 /**
  * intel_runtime_pm_enable - enable runtime pm
- * @dev_priv: i915 device instance
+ * @i915: i915 device instance
  *
  * This function enables runtime pm at the end of the driver load sequence.
  *
@@ -4100,9 +4292,9 @@ void intel_runtime_pm_put(struct drm_i915_private *dev_priv)
  * subordinate display power domains. That is done by
  * intel_power_domains_enable().
  */
-void intel_runtime_pm_enable(struct drm_i915_private *dev_priv)
+void intel_runtime_pm_enable(struct drm_i915_private *i915)
 {
-	struct pci_dev *pdev = dev_priv->drm.pdev;
+	struct pci_dev *pdev = i915->drm.pdev;
 	struct device *kdev = &pdev->dev;
 
 	/*
@@ -4124,7 +4316,7 @@ void intel_runtime_pm_enable(struct drm_i915_private *dev_priv)
 	 * so the driver's own RPM reference tracking asserts also work on
 	 * platforms without RPM support.
 	 */
-	if (!HAS_RUNTIME_PM(dev_priv)) {
+	if (!HAS_RUNTIME_PM(i915)) {
 		int ret;
 
 		pm_runtime_dont_use_autosuspend(kdev);
@@ -4142,17 +4334,36 @@ void intel_runtime_pm_enable(struct drm_i915_private *dev_priv)
 	pm_runtime_put_autosuspend(kdev);
 }
 
-void intel_runtime_pm_disable(struct drm_i915_private *dev_priv)
+void intel_runtime_pm_disable(struct drm_i915_private *i915)
 {
-	struct pci_dev *pdev = dev_priv->drm.pdev;
+	struct pci_dev *pdev = i915->drm.pdev;
 	struct device *kdev = &pdev->dev;
 
 	/* Transfer rpm ownership back to core */
-	WARN(pm_runtime_get_sync(&dev_priv->drm.pdev->dev) < 0,
+	WARN(pm_runtime_get_sync(kdev) < 0,
 	     "Failed to pass rpm ownership back to core\n");
 
 	pm_runtime_dont_use_autosuspend(kdev);
 
-	if (!HAS_RUNTIME_PM(dev_priv))
+	if (!HAS_RUNTIME_PM(i915))
 		pm_runtime_put(kdev);
 }
+
+void intel_runtime_pm_cleanup(struct drm_i915_private *i915)
+{
+	struct i915_runtime_pm *rpm = &i915->runtime_pm;
+
+	if (WARN(atomic_read(&rpm->wakeref_count),
+		 "i915->runtime_pm.wakeref_count=%d on cleanup\n",
+		 atomic_read(&rpm->wakeref_count))) {
+		show_intel_runtime_pm_wakeref(i915);
+		atomic_set(&rpm->wakeref_count, 0);
+	}
+
+	untrack_intel_runtime_pm_wakeref(i915);
+}
+
+void intel_runtime_pm_init_early(struct drm_i915_private *i915)
+{
+	init_intel_runtime_pm_wakeref(i915);
+}
diff --git a/drivers/gpu/drm/i915/selftests/mock_gem_device.c b/drivers/gpu/drm/i915/selftests/mock_gem_device.c
index 43ed8b28aeaa..0eb283e7fc96 100644
--- a/drivers/gpu/drm/i915/selftests/mock_gem_device.c
+++ b/drivers/gpu/drm/i915/selftests/mock_gem_device.c
@@ -154,15 +154,17 @@ struct drm_i915_private *mock_gem_device(void)
 	pdev->dev.archdata.iommu = (void *)-1;
 #endif
 
+	i915 = (struct drm_i915_private *)(pdev + 1);
+	pci_set_drvdata(pdev, i915);
+
+	intel_runtime_pm_init_early(i915);
+
 	dev_pm_domain_set(&pdev->dev, &pm_domain);
 	pm_runtime_enable(&pdev->dev);
 	pm_runtime_dont_use_autosuspend(&pdev->dev);
 	if (pm_runtime_enabled(&pdev->dev))
 		WARN_ON(pm_runtime_get_sync(&pdev->dev));
 
-	i915 = (struct drm_i915_private *)(pdev + 1);
-	pci_set_drvdata(pdev, i915);
-
 	err = drm_dev_init(&i915->drm, &mock_driver, &pdev->dev);
 	if (err) {
 		pr_err("Failed to initialise mock GEM device: err=%d\n", err);
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 111+ messages in thread

* [PATCH 04/46] drm/i915: Markup paired operations on wakerefs
  2019-01-07 11:54 [PATCH 01/46] drm/i915: Return immediately if trylock fails for direct-reclaim Chris Wilson
  2019-01-07 11:54 ` [PATCH 02/46] drm/i915: Report the number of closed vma held by each context in debugfs Chris Wilson
  2019-01-07 11:54 ` [PATCH 03/46] drm/i915: Track all held rpm wakerefs Chris Wilson
@ 2019-01-07 11:54 ` Chris Wilson
  2019-01-08 16:23   ` Mika Kuoppala
  2019-01-07 11:54 ` [PATCH 05/46] drm/i915: Track GT wakeref Chris Wilson
                   ` (47 subsequent siblings)
  50 siblings, 1 reply; 111+ messages in thread
From: Chris Wilson @ 2019-01-07 11:54 UTC (permalink / raw)
  To: intel-gfx; +Cc: Jani Nikula

The majority of runtime-pm operations are bounded and scoped within a
function; these are easy to verify that the wakeref are handled
correctly. We can employ the compiler to help us, and reduce the number
of wakerefs tracked when debugging, by passing around cookies provided
by the various rpm_get functions to their rpm_put counterpart. This
makes the pairing explicit, and given the required wakeref cookie the
compiler can verify that we pass an initialised value to the rpm_put
(quite handy for double checking error paths).

For regular builds, the compiler should be able to eliminate the unused
local variables and the program growth should be minimal. Fwiw, it came
out as a net improvement as gcc was able to refactor rpm_get and
rpm_get_if_in_use together,

v2: Just s/rpm_put/rpm_put_unchecked/ everywhere, leaving the manual
mark up for smaller more targeted patches.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Jani Nikula <jani.nikula@intel.com>
---
 drivers/gpu/drm/i915/Kconfig.debug            |  1 +
 drivers/gpu/drm/i915/gvt/aperture_gm.c        |  8 +-
 drivers/gpu/drm/i915/gvt/gvt.h                |  2 +-
 drivers/gpu/drm/i915/gvt/sched_policy.c       |  2 +-
 drivers/gpu/drm/i915/gvt/scheduler.c          |  4 +-
 drivers/gpu/drm/i915/i915_debugfs.c           | 54 +++++------
 drivers/gpu/drm/i915/i915_drv.h               |  2 +
 drivers/gpu/drm/i915/i915_gem.c               | 20 ++---
 drivers/gpu/drm/i915/i915_gem_execbuffer.c    |  2 +-
 drivers/gpu/drm/i915/i915_gem_fence_reg.c     |  2 +-
 drivers/gpu/drm/i915/i915_gem_gtt.c           |  8 +-
 drivers/gpu/drm/i915/i915_gem_shrinker.c      | 10 +--
 drivers/gpu/drm/i915/i915_irq.c               |  2 +-
 drivers/gpu/drm/i915/i915_perf.c              |  4 +-
 drivers/gpu/drm/i915/i915_pmu.c               |  6 +-
 drivers/gpu/drm/i915/i915_sysfs.c             | 12 +--
 drivers/gpu/drm/i915/intel_display.c          |  2 +-
 drivers/gpu/drm/i915/intel_drv.h              | 15 +++-
 drivers/gpu/drm/i915/intel_engine_cs.c        |  4 +-
 drivers/gpu/drm/i915/intel_fbdev.c            |  4 +-
 drivers/gpu/drm/i915/intel_guc_log.c          |  6 +-
 drivers/gpu/drm/i915/intel_hotplug.c          |  2 +-
 drivers/gpu/drm/i915/intel_huc.c              |  2 +-
 drivers/gpu/drm/i915/intel_panel.c            |  2 +-
 drivers/gpu/drm/i915/intel_runtime_pm.c       | 89 +++++++++++++++----
 drivers/gpu/drm/i915/intel_uncore.c           |  2 +-
 drivers/gpu/drm/i915/selftests/huge_pages.c   |  2 +-
 drivers/gpu/drm/i915/selftests/i915_gem.c     | 10 +--
 .../drm/i915/selftests/i915_gem_coherency.c   |  2 +-
 .../gpu/drm/i915/selftests/i915_gem_context.c | 10 +--
 .../gpu/drm/i915/selftests/i915_gem_evict.c   |  2 +-
 drivers/gpu/drm/i915/selftests/i915_gem_gtt.c |  4 +-
 .../gpu/drm/i915/selftests/i915_gem_object.c  |  6 +-
 drivers/gpu/drm/i915/selftests/i915_request.c |  8 +-
 drivers/gpu/drm/i915/selftests/intel_guc.c    |  4 +-
 .../gpu/drm/i915/selftests/intel_hangcheck.c  |  6 +-
 drivers/gpu/drm/i915/selftests/intel_lrc.c    | 10 +--
 .../drm/i915/selftests/intel_workarounds.c    | 10 +--
 38 files changed, 203 insertions(+), 138 deletions(-)

diff --git a/drivers/gpu/drm/i915/Kconfig.debug b/drivers/gpu/drm/i915/Kconfig.debug
index a97929c47466..ad4d71161dda 100644
--- a/drivers/gpu/drm/i915/Kconfig.debug
+++ b/drivers/gpu/drm/i915/Kconfig.debug
@@ -173,6 +173,7 @@ config DRM_I915_DEBUG_RUNTIME_PM
 	bool "Enable extra state checking for runtime PM"
 	depends on DRM_I915
 	default n
+	select STACKDEPOT
 	help
 	  Choose this option to turn on extra state checking for the
 	  runtime PM functionality. This may introduce overhead during
diff --git a/drivers/gpu/drm/i915/gvt/aperture_gm.c b/drivers/gpu/drm/i915/gvt/aperture_gm.c
index 359d37d5c958..1fa2f65c3cd1 100644
--- a/drivers/gpu/drm/i915/gvt/aperture_gm.c
+++ b/drivers/gpu/drm/i915/gvt/aperture_gm.c
@@ -180,7 +180,7 @@ static void free_vgpu_fence(struct intel_vgpu *vgpu)
 	}
 	mutex_unlock(&dev_priv->drm.struct_mutex);
 
-	intel_runtime_pm_put(dev_priv);
+	intel_runtime_pm_put_unchecked(dev_priv);
 }
 
 static int alloc_vgpu_fence(struct intel_vgpu *vgpu)
@@ -206,7 +206,7 @@ static int alloc_vgpu_fence(struct intel_vgpu *vgpu)
 	_clear_vgpu_fence(vgpu);
 
 	mutex_unlock(&dev_priv->drm.struct_mutex);
-	intel_runtime_pm_put(dev_priv);
+	intel_runtime_pm_put_unchecked(dev_priv);
 	return 0;
 out_free_fence:
 	gvt_vgpu_err("Failed to alloc fences\n");
@@ -219,7 +219,7 @@ static int alloc_vgpu_fence(struct intel_vgpu *vgpu)
 		vgpu->fence.regs[i] = NULL;
 	}
 	mutex_unlock(&dev_priv->drm.struct_mutex);
-	intel_runtime_pm_put(dev_priv);
+	intel_runtime_pm_put_unchecked(dev_priv);
 	return -ENOSPC;
 }
 
@@ -317,7 +317,7 @@ void intel_vgpu_reset_resource(struct intel_vgpu *vgpu)
 
 	intel_runtime_pm_get(dev_priv);
 	_clear_vgpu_fence(vgpu);
-	intel_runtime_pm_put(dev_priv);
+	intel_runtime_pm_put_unchecked(dev_priv);
 }
 
 /**
diff --git a/drivers/gpu/drm/i915/gvt/gvt.h b/drivers/gpu/drm/i915/gvt/gvt.h
index b4ab1dad0143..435c746c3f73 100644
--- a/drivers/gpu/drm/i915/gvt/gvt.h
+++ b/drivers/gpu/drm/i915/gvt/gvt.h
@@ -597,7 +597,7 @@ static inline void mmio_hw_access_pre(struct drm_i915_private *dev_priv)
 
 static inline void mmio_hw_access_post(struct drm_i915_private *dev_priv)
 {
-	intel_runtime_pm_put(dev_priv);
+	intel_runtime_pm_put_unchecked(dev_priv);
 }
 
 /**
diff --git a/drivers/gpu/drm/i915/gvt/sched_policy.c b/drivers/gpu/drm/i915/gvt/sched_policy.c
index c32e7d5e8629..f04b3b965bfc 100644
--- a/drivers/gpu/drm/i915/gvt/sched_policy.c
+++ b/drivers/gpu/drm/i915/gvt/sched_policy.c
@@ -474,6 +474,6 @@ void intel_vgpu_stop_schedule(struct intel_vgpu *vgpu)
 		}
 	}
 	spin_unlock_bh(&scheduler->mmio_context_lock);
-	intel_runtime_pm_put(dev_priv);
+	intel_runtime_pm_put_unchecked(dev_priv);
 	mutex_unlock(&vgpu->gvt->sched_lock);
 }
diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c
index 1ad8c5e1455d..3816dcae2185 100644
--- a/drivers/gpu/drm/i915/gvt/scheduler.c
+++ b/drivers/gpu/drm/i915/gvt/scheduler.c
@@ -997,7 +997,7 @@ static int workload_thread(void *priv)
 			intel_uncore_forcewake_put(gvt->dev_priv,
 					FORCEWAKE_ALL);
 
-		intel_runtime_pm_put(gvt->dev_priv);
+		intel_runtime_pm_put_unchecked(gvt->dev_priv);
 		if (ret && (vgpu_is_vm_unhealthy(ret)))
 			enter_failsafe_mode(vgpu, GVT_FAILSAFE_GUEST_ERR);
 	}
@@ -1451,7 +1451,7 @@ intel_vgpu_create_workload(struct intel_vgpu *vgpu, int ring_id,
 		mutex_lock(&dev_priv->drm.struct_mutex);
 		ret = intel_gvt_scan_and_shadow_workload(workload);
 		mutex_unlock(&dev_priv->drm.struct_mutex);
-		intel_runtime_pm_put(dev_priv);
+		intel_runtime_pm_put_unchecked(dev_priv);
 	}
 
 	if (ret && (vgpu_is_vm_unhealthy(ret))) {
diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index 3a369245d7e6..6b8da14f213b 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -877,7 +877,7 @@ static int i915_interrupt_info(struct seq_file *m, void *data)
 		}
 	}
 
-	intel_runtime_pm_put(dev_priv);
+	intel_runtime_pm_put_unchecked(dev_priv);
 
 	return 0;
 }
@@ -953,7 +953,7 @@ static int i915_gpu_info_open(struct inode *inode, struct file *file)
 
 	intel_runtime_pm_get(i915);
 	gpu = i915_capture_gpu_state(i915);
-	intel_runtime_pm_put(i915);
+	intel_runtime_pm_put_unchecked(i915);
 	if (IS_ERR(gpu))
 		return PTR_ERR(gpu);
 
@@ -1226,7 +1226,7 @@ static int i915_frequency_info(struct seq_file *m, void *unused)
 	seq_printf(m, "Max CD clock frequency: %d kHz\n", dev_priv->max_cdclk_freq);
 	seq_printf(m, "Max pixel clock frequency: %d kHz\n", dev_priv->max_dotclk_freq);
 
-	intel_runtime_pm_put(dev_priv);
+	intel_runtime_pm_put_unchecked(dev_priv);
 	return ret;
 }
 
@@ -1292,7 +1292,7 @@ static int i915_hangcheck_info(struct seq_file *m, void *unused)
 
 	intel_engine_get_instdone(dev_priv->engine[RCS], &instdone);
 
-	intel_runtime_pm_put(dev_priv);
+	intel_runtime_pm_put_unchecked(dev_priv);
 
 	if (timer_pending(&dev_priv->gpu_error.hangcheck_work.timer))
 		seq_printf(m, "Hangcheck active, timer fires in %dms\n",
@@ -1579,7 +1579,7 @@ static int i915_drpc_info(struct seq_file *m, void *unused)
 	else
 		err = ironlake_drpc_info(m);
 
-	intel_runtime_pm_put(dev_priv);
+	intel_runtime_pm_put_unchecked(dev_priv);
 
 	return err;
 }
@@ -1632,7 +1632,7 @@ static int i915_fbc_status(struct seq_file *m, void *unused)
 	}
 
 	mutex_unlock(&fbc->lock);
-	intel_runtime_pm_put(dev_priv);
+	intel_runtime_pm_put_unchecked(dev_priv);
 
 	return 0;
 }
@@ -1695,7 +1695,7 @@ static int i915_ips_status(struct seq_file *m, void *unused)
 			seq_puts(m, "Currently: disabled\n");
 	}
 
-	intel_runtime_pm_put(dev_priv);
+	intel_runtime_pm_put_unchecked(dev_priv);
 
 	return 0;
 }
@@ -1723,7 +1723,7 @@ static int i915_sr_status(struct seq_file *m, void *unused)
 		sr_enabled = I915_READ(FW_BLC_SELF_VLV) & FW_CSPWRDWNEN;
 
 	intel_display_power_put(dev_priv, POWER_DOMAIN_INIT);
-	intel_runtime_pm_put(dev_priv);
+	intel_runtime_pm_put_unchecked(dev_priv);
 
 	seq_printf(m, "self-refresh: %s\n", enableddisabled(sr_enabled));
 
@@ -1756,7 +1756,7 @@ static int i915_emon_status(struct seq_file *m, void *unused)
 	seq_printf(m, "GFX power: %ld\n", gfx);
 	seq_printf(m, "Total power: %ld\n", chipset + gfx);
 
-	intel_runtime_pm_put(dev_priv);
+	intel_runtime_pm_put_unchecked(dev_priv);
 
 	return 0;
 }
@@ -1805,7 +1805,7 @@ static int i915_ring_freq_table(struct seq_file *m, void *unused)
 	mutex_unlock(&dev_priv->pcu_lock);
 
 out:
-	intel_runtime_pm_put(dev_priv);
+	intel_runtime_pm_put_unchecked(dev_priv);
 	return ret;
 }
 
@@ -2017,7 +2017,7 @@ static int i915_swizzle_info(struct seq_file *m, void *data)
 	if (dev_priv->quirks & QUIRK_PIN_SWIZZLED_PAGES)
 		seq_puts(m, "L-shaped memory detected\n");
 
-	intel_runtime_pm_put(dev_priv);
+	intel_runtime_pm_put_unchecked(dev_priv);
 
 	return 0;
 }
@@ -2067,7 +2067,7 @@ static int i915_rps_boost_info(struct seq_file *m, void *data)
 			act_freq = intel_get_cagf(dev_priv,
 						  I915_READ(GEN6_RPSTAT1));
 		}
-		intel_runtime_pm_put(dev_priv);
+		intel_runtime_pm_put_unchecked(dev_priv);
 	}
 
 	seq_printf(m, "RPS enabled? %d\n", rps->enabled);
@@ -2160,7 +2160,7 @@ static int i915_huc_load_status_info(struct seq_file *m, void *data)
 
 	intel_runtime_pm_get(dev_priv);
 	seq_printf(m, "\nHuC status 0x%08x:\n", I915_READ(HUC_STATUS2));
-	intel_runtime_pm_put(dev_priv);
+	intel_runtime_pm_put_unchecked(dev_priv);
 
 	return 0;
 }
@@ -2192,7 +2192,7 @@ static int i915_guc_load_status_info(struct seq_file *m, void *data)
 	for (i = 0; i < 16; i++)
 		seq_printf(m, "\t%2d: \t0x%x\n", i, I915_READ(SOFT_SCRATCH(i)));
 
-	intel_runtime_pm_put(dev_priv);
+	intel_runtime_pm_put_unchecked(dev_priv);
 
 	return 0;
 }
@@ -2601,7 +2601,7 @@ static int i915_edp_psr_status(struct seq_file *m, void *data)
 			   dev_priv->psr.last_exit);
 	}
 
-	intel_runtime_pm_put(dev_priv);
+	intel_runtime_pm_put_unchecked(dev_priv);
 	return 0;
 }
 
@@ -2632,7 +2632,7 @@ i915_edp_psr_debug_set(void *data, u64 val)
 	drm_modeset_drop_locks(&ctx);
 	drm_modeset_acquire_fini(&ctx);
 
-	intel_runtime_pm_put(dev_priv);
+	intel_runtime_pm_put_unchecked(dev_priv);
 
 	return ret;
 }
@@ -2665,7 +2665,7 @@ static int i915_energy_uJ(struct seq_file *m, void *data)
 	intel_runtime_pm_get(dev_priv);
 
 	if (rdmsrl_safe(MSR_RAPL_POWER_UNIT, &power)) {
-		intel_runtime_pm_put(dev_priv);
+		intel_runtime_pm_put_unchecked(dev_priv);
 		return -ENODEV;
 	}
 
@@ -2673,7 +2673,7 @@ static int i915_energy_uJ(struct seq_file *m, void *data)
 	power = I915_READ(MCH_SECP_NRG_STTS);
 	power = (1000000 * power) >> units; /* convert to uJ */
 
-	intel_runtime_pm_put(dev_priv);
+	intel_runtime_pm_put_unchecked(dev_priv);
 
 	seq_printf(m, "%llu", power);
 
@@ -2775,7 +2775,7 @@ static int i915_dmc_info(struct seq_file *m, void *unused)
 	seq_printf(m, "ssp base: 0x%08x\n", I915_READ(CSR_SSP_BASE));
 	seq_printf(m, "htp: 0x%08x\n", I915_READ(CSR_HTP_SKL));
 
-	intel_runtime_pm_put(dev_priv);
+	intel_runtime_pm_put_unchecked(dev_priv);
 
 	return 0;
 }
@@ -3114,7 +3114,7 @@ static int i915_display_info(struct seq_file *m, void *unused)
 	drm_connector_list_iter_end(&conn_iter);
 	mutex_unlock(&dev->mode_config.mutex);
 
-	intel_runtime_pm_put(dev_priv);
+	intel_runtime_pm_put_unchecked(dev_priv);
 
 	return 0;
 }
@@ -3139,7 +3139,7 @@ static int i915_engine_info(struct seq_file *m, void *unused)
 	for_each_engine(engine, dev_priv, id)
 		intel_engine_dump(engine, &p, "%s\n", engine->name);
 
-	intel_runtime_pm_put(dev_priv);
+	intel_runtime_pm_put_unchecked(dev_priv);
 
 	return 0;
 }
@@ -3265,7 +3265,7 @@ static ssize_t i915_ipc_status_write(struct file *file, const char __user *ubuf,
 	dev_priv->wm.distrust_bios_wm = true;
 	dev_priv->ipc_enabled = enable;
 	intel_enable_ipc(dev_priv);
-	intel_runtime_pm_put(dev_priv);
+	intel_runtime_pm_put_unchecked(dev_priv);
 
 	return len;
 }
@@ -4090,7 +4090,7 @@ i915_drop_caches_set(void *data, u64 val)
 		i915_gem_drain_freed_objects(i915);
 
 out:
-	intel_runtime_pm_put(i915);
+	intel_runtime_pm_put_unchecked(i915);
 
 	return ret;
 }
@@ -4112,7 +4112,7 @@ i915_cache_sharing_get(void *data, u64 *val)
 
 	snpcr = I915_READ(GEN6_MBCUNIT_SNPCR);
 
-	intel_runtime_pm_put(dev_priv);
+	intel_runtime_pm_put_unchecked(dev_priv);
 
 	*val = (snpcr & GEN6_MBC_SNPCR_MASK) >> GEN6_MBC_SNPCR_SHIFT;
 
@@ -4140,7 +4140,7 @@ i915_cache_sharing_set(void *data, u64 val)
 	snpcr |= (val << GEN6_MBC_SNPCR_SHIFT);
 	I915_WRITE(GEN6_MBCUNIT_SNPCR, snpcr);
 
-	intel_runtime_pm_put(dev_priv);
+	intel_runtime_pm_put_unchecked(dev_priv);
 	return 0;
 }
 
@@ -4388,7 +4388,7 @@ static int i915_sseu_status(struct seq_file *m, void *unused)
 		gen10_sseu_device_status(dev_priv, &sseu);
 	}
 
-	intel_runtime_pm_put(dev_priv);
+	intel_runtime_pm_put_unchecked(dev_priv);
 
 	i915_print_sseu_info(m, false, &sseu);
 
@@ -4416,7 +4416,7 @@ static int i915_forcewake_release(struct inode *inode, struct file *file)
 		return 0;
 
 	intel_uncore_forcewake_user_put(i915);
-	intel_runtime_pm_put(i915);
+	intel_runtime_pm_put_unchecked(i915);
 
 	return 0;
 }
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 60b98103aba3..464ff89d1464 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -131,6 +131,8 @@ bool i915_error_injected(void);
 	__i915_printk(i915, i915_error_injected() ? KERN_DEBUG : KERN_ERR, \
 		      fmt, ##__VA_ARGS__)
 
+typedef depot_stack_handle_t intel_wakeref_t;
+
 enum hpd_pin {
 	HPD_NONE = 0,
 	HPD_TV = HPD_NONE,     /* TV is known to be unreliable */
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 987acbb8280f..5ac32ea4c8fa 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -176,7 +176,7 @@ static u32 __i915_gem_park(struct drm_i915_private *i915)
 
 	intel_display_power_put(i915, POWER_DOMAIN_GT_IRQ);
 
-	intel_runtime_pm_put(i915);
+	intel_runtime_pm_put_unchecked(i915);
 
 	return i915->gt.epoch;
 }
@@ -815,7 +815,7 @@ void i915_gem_flush_ggtt_writes(struct drm_i915_private *dev_priv)
 	POSTING_READ_FW(RING_HEAD(RENDER_RING_BASE));
 
 	spin_unlock_irq(&dev_priv->uncore.lock);
-	intel_runtime_pm_put(dev_priv);
+	intel_runtime_pm_put_unchecked(dev_priv);
 }
 
 static void
@@ -1150,7 +1150,7 @@ i915_gem_gtt_pread(struct drm_i915_gem_object *obj,
 		i915_vma_unpin(vma);
 	}
 out_unlock:
-	intel_runtime_pm_put(i915);
+	intel_runtime_pm_put_unchecked(i915);
 	mutex_unlock(&i915->drm.struct_mutex);
 
 	return ret;
@@ -1357,7 +1357,7 @@ i915_gem_gtt_pwrite_fast(struct drm_i915_gem_object *obj,
 		i915_vma_unpin(vma);
 	}
 out_rpm:
-	intel_runtime_pm_put(i915);
+	intel_runtime_pm_put_unchecked(i915);
 out_unlock:
 	mutex_unlock(&i915->drm.struct_mutex);
 	return ret;
@@ -1969,7 +1969,7 @@ vm_fault_t i915_gem_fault(struct vm_fault *vmf)
 err_unlock:
 	mutex_unlock(&dev->struct_mutex);
 err_rpm:
-	intel_runtime_pm_put(dev_priv);
+	intel_runtime_pm_put_unchecked(dev_priv);
 	i915_gem_object_unpin_pages(obj);
 err:
 	switch (ret) {
@@ -2069,7 +2069,7 @@ i915_gem_release_mmap(struct drm_i915_gem_object *obj)
 	wmb();
 
 out:
-	intel_runtime_pm_put(i915);
+	intel_runtime_pm_put_unchecked(i915);
 }
 
 void i915_gem_runtime_suspend(struct drm_i915_private *dev_priv)
@@ -4766,7 +4766,7 @@ static void __i915_gem_free_objects(struct drm_i915_private *i915,
 		if (on)
 			cond_resched();
 	}
-	intel_runtime_pm_put(i915);
+	intel_runtime_pm_put_unchecked(i915);
 }
 
 static void i915_gem_flush_free_objects(struct drm_i915_private *i915)
@@ -4902,7 +4902,7 @@ void i915_gem_sanitize(struct drm_i915_private *i915)
 	intel_engines_sanitize(i915, false);
 
 	intel_uncore_forcewake_put(i915, FORCEWAKE_ALL);
-	intel_runtime_pm_put(i915);
+	intel_runtime_pm_put_unchecked(i915);
 
 	i915_gem_contexts_lost(i915);
 	mutex_unlock(&i915->drm.struct_mutex);
@@ -4966,12 +4966,12 @@ int i915_gem_suspend(struct drm_i915_private *i915)
 	if (WARN_ON(!intel_engines_are_idle(i915)))
 		i915_gem_set_wedged(i915); /* no hope, discard everything */
 
-	intel_runtime_pm_put(i915);
+	intel_runtime_pm_put_unchecked(i915);
 	return 0;
 
 err_unlock:
 	mutex_unlock(&i915->drm.struct_mutex);
-	intel_runtime_pm_put(i915);
+	intel_runtime_pm_put_unchecked(i915);
 	return ret;
 }
 
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 5b7cd7add63e..a52fa42ed8b1 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -2425,7 +2425,7 @@ i915_gem_do_execbuffer(struct drm_device *dev,
 		eb_release_vmas(&eb);
 	mutex_unlock(&dev->struct_mutex);
 err_rpm:
-	intel_runtime_pm_put(eb.i915);
+	intel_runtime_pm_put_unchecked(eb.i915);
 	i915_gem_context_put(eb.ctx);
 err_destroy:
 	eb_destroy(&eb);
diff --git a/drivers/gpu/drm/i915/i915_gem_fence_reg.c b/drivers/gpu/drm/i915/i915_gem_fence_reg.c
index 24df2e2a8fc1..1f72f5047945 100644
--- a/drivers/gpu/drm/i915/i915_gem_fence_reg.c
+++ b/drivers/gpu/drm/i915/i915_gem_fence_reg.c
@@ -259,7 +259,7 @@ static int fence_update(struct drm_i915_fence_reg *fence,
 	 */
 	if (intel_runtime_pm_get_if_in_use(fence->i915)) {
 		fence_write(fence, vma);
-		intel_runtime_pm_put(fence->i915);
+		intel_runtime_pm_put_unchecked(fence->i915);
 	}
 
 	if (vma) {
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 5cc8968eb3bf..6dac9614f7ba 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -2537,7 +2537,7 @@ static int ggtt_bind_vma(struct i915_vma *vma,
 
 	intel_runtime_pm_get(i915);
 	vma->vm->insert_entries(vma->vm, vma, cache_level, pte_flags);
-	intel_runtime_pm_put(i915);
+	intel_runtime_pm_put_unchecked(i915);
 
 	vma->page_sizes.gtt = I915_GTT_PAGE_SIZE;
 
@@ -2557,7 +2557,7 @@ static void ggtt_unbind_vma(struct i915_vma *vma)
 
 	intel_runtime_pm_get(i915);
 	vma->vm->clear_range(vma->vm, vma->node.start, vma->size);
-	intel_runtime_pm_put(i915);
+	intel_runtime_pm_put_unchecked(i915);
 }
 
 static int aliasing_gtt_bind_vma(struct i915_vma *vma,
@@ -2591,7 +2591,7 @@ static int aliasing_gtt_bind_vma(struct i915_vma *vma,
 	if (flags & I915_VMA_GLOBAL_BIND) {
 		intel_runtime_pm_get(i915);
 		vma->vm->insert_entries(vma->vm, vma, cache_level, pte_flags);
-		intel_runtime_pm_put(i915);
+		intel_runtime_pm_put_unchecked(i915);
 	}
 
 	return 0;
@@ -2604,7 +2604,7 @@ static void aliasing_gtt_unbind_vma(struct i915_vma *vma)
 	if (vma->flags & I915_VMA_GLOBAL_BIND) {
 		intel_runtime_pm_get(i915);
 		vma->vm->clear_range(vma->vm, vma->node.start, vma->size);
-		intel_runtime_pm_put(i915);
+		intel_runtime_pm_put_unchecked(i915);
 	}
 
 	if (vma->flags & I915_VMA_LOCAL_BIND) {
diff --git a/drivers/gpu/drm/i915/i915_gem_shrinker.c b/drivers/gpu/drm/i915/i915_gem_shrinker.c
index 72d6ea0cac7e..16693dd4d019 100644
--- a/drivers/gpu/drm/i915/i915_gem_shrinker.c
+++ b/drivers/gpu/drm/i915/i915_gem_shrinker.c
@@ -266,7 +266,7 @@ i915_gem_shrink(struct drm_i915_private *i915,
 	}
 
 	if (flags & I915_SHRINK_BOUND)
-		intel_runtime_pm_put(i915);
+		intel_runtime_pm_put_unchecked(i915);
 
 	i915_retire_requests(i915);
 
@@ -300,7 +300,7 @@ unsigned long i915_gem_shrink_all(struct drm_i915_private *i915)
 				I915_SHRINK_BOUND |
 				I915_SHRINK_UNBOUND |
 				I915_SHRINK_ACTIVE);
-	intel_runtime_pm_put(i915);
+	intel_runtime_pm_put_unchecked(i915);
 
 	return freed;
 }
@@ -378,7 +378,7 @@ i915_gem_shrinker_scan(struct shrinker *shrinker, struct shrink_control *sc)
 					 I915_SHRINK_ACTIVE |
 					 I915_SHRINK_BOUND |
 					 I915_SHRINK_UNBOUND);
-		intel_runtime_pm_put(i915);
+		intel_runtime_pm_put_unchecked(i915);
 	}
 
 	shrinker_unlock(i915, unlock);
@@ -423,7 +423,7 @@ i915_gem_shrinker_oom(struct notifier_block *nb, unsigned long event, void *ptr)
 	freed_pages = i915_gem_shrink(i915, -1UL, NULL,
 				      I915_SHRINK_BOUND |
 				      I915_SHRINK_UNBOUND);
-	intel_runtime_pm_put(i915);
+	intel_runtime_pm_put_unchecked(i915);
 
 	/* Because we may be allocating inside our own driver, we cannot
 	 * assert that there are no objects with pinned pages that are not
@@ -479,7 +479,7 @@ i915_gem_shrinker_vmap(struct notifier_block *nb, unsigned long event, void *ptr
 				       I915_SHRINK_BOUND |
 				       I915_SHRINK_UNBOUND |
 				       I915_SHRINK_VMAPS);
-	intel_runtime_pm_put(i915);
+	intel_runtime_pm_put_unchecked(i915);
 
 	/* We also want to clear any cached iomaps as they wrap vmap */
 	list_for_each_entry_safe(vma, next,
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index fbb094ecf6c9..72b799c5e8f6 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -3375,7 +3375,7 @@ void i915_handle_error(struct drm_i915_private *dev_priv,
 	wake_up_all(&dev_priv->gpu_error.reset_queue);
 
 out:
-	intel_runtime_pm_put(dev_priv);
+	intel_runtime_pm_put_unchecked(dev_priv);
 }
 
 /* Called from drm generic code, passed 'crtc' which
diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
index 5b1ae5ed97b3..e4dfd1477c78 100644
--- a/drivers/gpu/drm/i915/i915_perf.c
+++ b/drivers/gpu/drm/i915/i915_perf.c
@@ -1365,7 +1365,7 @@ static void i915_oa_stream_destroy(struct i915_perf_stream *stream)
 	free_oa_buffer(dev_priv);
 
 	intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
-	intel_runtime_pm_put(dev_priv);
+	intel_runtime_pm_put_unchecked(dev_priv);
 
 	if (stream->ctx)
 		oa_put_render_ctx_id(stream);
@@ -2123,7 +2123,7 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream,
 	put_oa_config(dev_priv, stream->oa_config);
 
 	intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
-	intel_runtime_pm_put(dev_priv);
+	intel_runtime_pm_put_unchecked(dev_priv);
 
 err_config:
 	if (stream->ctx)
diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c
index d6c8f8fdfda5..c99fcfce79d5 100644
--- a/drivers/gpu/drm/i915/i915_pmu.c
+++ b/drivers/gpu/drm/i915/i915_pmu.c
@@ -210,7 +210,7 @@ engines_sample(struct drm_i915_private *dev_priv, unsigned int period_ns)
 	if (fw)
 		intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
 
-	intel_runtime_pm_put(dev_priv);
+	intel_runtime_pm_put_unchecked(dev_priv);
 }
 
 static void
@@ -231,7 +231,7 @@ frequency_sample(struct drm_i915_private *dev_priv, unsigned int period_ns)
 		    intel_runtime_pm_get_if_in_use(dev_priv)) {
 			val = intel_get_cagf(dev_priv,
 					     I915_READ_NOTRACE(GEN6_RPSTAT1));
-			intel_runtime_pm_put(dev_priv);
+			intel_runtime_pm_put_unchecked(dev_priv);
 		}
 
 		add_sample_mult(&dev_priv->pmu.sample[__I915_SAMPLE_FREQ_ACT],
@@ -448,7 +448,7 @@ static u64 get_rc6(struct drm_i915_private *i915)
 
 	if (intel_runtime_pm_get_if_in_use(i915)) {
 		val = __get_rc6(i915);
-		intel_runtime_pm_put(i915);
+		intel_runtime_pm_put_unchecked(i915);
 
 		/*
 		 * If we are coming back from being runtime suspended we must
diff --git a/drivers/gpu/drm/i915/i915_sysfs.c b/drivers/gpu/drm/i915/i915_sysfs.c
index c0cfe7ae2ba5..53c20e103d56 100644
--- a/drivers/gpu/drm/i915/i915_sysfs.c
+++ b/drivers/gpu/drm/i915/i915_sysfs.c
@@ -46,7 +46,7 @@ static u32 calc_residency(struct drm_i915_private *dev_priv,
 
 	intel_runtime_pm_get(dev_priv);
 	res = intel_rc6_residency_us(dev_priv, reg);
-	intel_runtime_pm_put(dev_priv);
+	intel_runtime_pm_put_unchecked(dev_priv);
 
 	return DIV_ROUND_CLOSEST_ULL(res, 1000);
 }
@@ -274,7 +274,7 @@ static ssize_t gt_act_freq_mhz_show(struct device *kdev,
 	}
 	mutex_unlock(&dev_priv->pcu_lock);
 
-	intel_runtime_pm_put(dev_priv);
+	intel_runtime_pm_put_unchecked(dev_priv);
 
 	return snprintf(buf, PAGE_SIZE, "%d\n", ret);
 }
@@ -371,7 +371,7 @@ static ssize_t gt_max_freq_mhz_store(struct device *kdev,
 	    val > rps->max_freq ||
 	    val < rps->min_freq_softlimit) {
 		mutex_unlock(&dev_priv->pcu_lock);
-		intel_runtime_pm_put(dev_priv);
+		intel_runtime_pm_put_unchecked(dev_priv);
 		return -EINVAL;
 	}
 
@@ -392,7 +392,7 @@ static ssize_t gt_max_freq_mhz_store(struct device *kdev,
 
 	mutex_unlock(&dev_priv->pcu_lock);
 
-	intel_runtime_pm_put(dev_priv);
+	intel_runtime_pm_put_unchecked(dev_priv);
 
 	return ret ?: count;
 }
@@ -429,7 +429,7 @@ static ssize_t gt_min_freq_mhz_store(struct device *kdev,
 	    val > rps->max_freq ||
 	    val > rps->max_freq_softlimit) {
 		mutex_unlock(&dev_priv->pcu_lock);
-		intel_runtime_pm_put(dev_priv);
+		intel_runtime_pm_put_unchecked(dev_priv);
 		return -EINVAL;
 	}
 
@@ -446,7 +446,7 @@ static ssize_t gt_min_freq_mhz_store(struct device *kdev,
 
 	mutex_unlock(&dev_priv->pcu_lock);
 
-	intel_runtime_pm_put(dev_priv);
+	intel_runtime_pm_put_unchecked(dev_priv);
 
 	return ret ?: count;
 }
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index 696e6f5680df..c6000aa47a8d 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -2102,7 +2102,7 @@ intel_pin_and_fence_fb_obj(struct drm_framebuffer *fb,
 err:
 	atomic_dec(&dev_priv->gpu_error.pending_fb_pin);
 
-	intel_runtime_pm_put(dev_priv);
+	intel_runtime_pm_put_unchecked(dev_priv);
 	return vma;
 }
 
diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h
index ac513fd70315..a1e4e1033289 100644
--- a/drivers/gpu/drm/i915/intel_drv.h
+++ b/drivers/gpu/drm/i915/intel_drv.h
@@ -29,6 +29,7 @@
 #include <linux/i2c.h>
 #include <linux/hdmi.h>
 #include <linux/sched/clock.h>
+#include <linux/stackdepot.h>
 #include <drm/i915_drm.h>
 #include "i915_drv.h"
 #include <drm/drm_crtc.h>
@@ -2182,10 +2183,16 @@ enable_rpm_wakeref_asserts(struct drm_i915_private *i915)
 	atomic_dec(&i915->runtime_pm.wakeref_count);
 }
 
-void intel_runtime_pm_get(struct drm_i915_private *i915);
-bool intel_runtime_pm_get_if_in_use(struct drm_i915_private *i915);
-void intel_runtime_pm_get_noresume(struct drm_i915_private *i915);
-void intel_runtime_pm_put(struct drm_i915_private *i915);
+intel_wakeref_t intel_runtime_pm_get(struct drm_i915_private *i915);
+intel_wakeref_t intel_runtime_pm_get_if_in_use(struct drm_i915_private *i915);
+intel_wakeref_t intel_runtime_pm_get_noresume(struct drm_i915_private *i915);
+
+void intel_runtime_pm_put_unchecked(struct drm_i915_private *i915);
+#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_RUNTIME_PM)
+void intel_runtime_pm_put(struct drm_i915_private *i915, intel_wakeref_t wref);
+#else
+#define intel_runtime_pm_put(i915, wref) intel_runtime_pm_put_unchecked(i915)
+#endif
 
 #if IS_ENABLED(CONFIG_DRM_I915_DEBUG_RUNTIME_PM)
 void print_intel_runtime_pm_wakeref(struct drm_i915_private *i915,
diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c
index 236cd040f271..85131166589c 100644
--- a/drivers/gpu/drm/i915/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/intel_engine_cs.c
@@ -928,7 +928,7 @@ static bool ring_is_idle(struct intel_engine_cs *engine)
 	if (INTEL_GEN(dev_priv) > 2 && !(I915_READ_MODE(engine) & MODE_IDLE))
 		idle = false;
 
-	intel_runtime_pm_put(dev_priv);
+	intel_runtime_pm_put_unchecked(dev_priv);
 
 	return idle;
 }
@@ -1485,7 +1485,7 @@ void intel_engine_dump(struct intel_engine_cs *engine,
 
 	if (intel_runtime_pm_get_if_in_use(engine->i915)) {
 		intel_engine_print_registers(engine, m);
-		intel_runtime_pm_put(engine->i915);
+		intel_runtime_pm_put_unchecked(engine->i915);
 	} else {
 		drm_printf(m, "\tDevice is asleep; skipping register dump\n");
 	}
diff --git a/drivers/gpu/drm/i915/intel_fbdev.c b/drivers/gpu/drm/i915/intel_fbdev.c
index fb5bb5b32a60..11d877b908e2 100644
--- a/drivers/gpu/drm/i915/intel_fbdev.c
+++ b/drivers/gpu/drm/i915/intel_fbdev.c
@@ -277,7 +277,7 @@ static int intelfb_create(struct drm_fb_helper *helper,
 	ifbdev->vma = vma;
 	ifbdev->vma_flags = flags;
 
-	intel_runtime_pm_put(dev_priv);
+	intel_runtime_pm_put_unchecked(dev_priv);
 	mutex_unlock(&dev->struct_mutex);
 	vga_switcheroo_client_fb_set(pdev, info);
 	return 0;
@@ -285,7 +285,7 @@ static int intelfb_create(struct drm_fb_helper *helper,
 out_unpin:
 	intel_unpin_fb_vma(vma, flags);
 out_unlock:
-	intel_runtime_pm_put(dev_priv);
+	intel_runtime_pm_put_unchecked(dev_priv);
 	mutex_unlock(&dev->struct_mutex);
 	return ret;
 }
diff --git a/drivers/gpu/drm/i915/intel_guc_log.c b/drivers/gpu/drm/i915/intel_guc_log.c
index d3ebdbc0182e..1b1581a42aa1 100644
--- a/drivers/gpu/drm/i915/intel_guc_log.c
+++ b/drivers/gpu/drm/i915/intel_guc_log.c
@@ -445,7 +445,7 @@ static void guc_log_capture_logs(struct intel_guc_log *log)
 	 */
 	intel_runtime_pm_get(dev_priv);
 	guc_action_flush_log_complete(guc);
-	intel_runtime_pm_put(dev_priv);
+	intel_runtime_pm_put_unchecked(dev_priv);
 }
 
 int intel_guc_log_create(struct intel_guc_log *log)
@@ -528,7 +528,7 @@ int intel_guc_log_set_level(struct intel_guc_log *log, u32 level)
 	ret = guc_action_control_log(guc, GUC_LOG_LEVEL_IS_VERBOSE(level),
 				     GUC_LOG_LEVEL_IS_ENABLED(level),
 				     GUC_LOG_LEVEL_TO_VERBOSITY(level));
-	intel_runtime_pm_put(dev_priv);
+	intel_runtime_pm_put_unchecked(dev_priv);
 	if (ret) {
 		DRM_DEBUG_DRIVER("guc_log_control action failed %d\n", ret);
 		goto out_unlock;
@@ -610,7 +610,7 @@ void intel_guc_log_relay_flush(struct intel_guc_log *log)
 
 	intel_runtime_pm_get(i915);
 	guc_action_flush_log(guc);
-	intel_runtime_pm_put(i915);
+	intel_runtime_pm_put_unchecked(i915);
 
 	/* GuC would have updated log buffer by now, so capture it */
 	guc_log_capture_logs(log);
diff --git a/drivers/gpu/drm/i915/intel_hotplug.c b/drivers/gpu/drm/i915/intel_hotplug.c
index e24174d08fed..067277ca7cff 100644
--- a/drivers/gpu/drm/i915/intel_hotplug.c
+++ b/drivers/gpu/drm/i915/intel_hotplug.c
@@ -262,7 +262,7 @@ static void intel_hpd_irq_storm_reenable_work(struct work_struct *work)
 		dev_priv->display.hpd_irq_setup(dev_priv);
 	spin_unlock_irq(&dev_priv->irq_lock);
 
-	intel_runtime_pm_put(dev_priv);
+	intel_runtime_pm_put_unchecked(dev_priv);
 }
 
 bool intel_encoder_hotplug(struct intel_encoder *encoder,
diff --git a/drivers/gpu/drm/i915/intel_huc.c b/drivers/gpu/drm/i915/intel_huc.c
index bc27b691d824..c2b076e9bada 100644
--- a/drivers/gpu/drm/i915/intel_huc.c
+++ b/drivers/gpu/drm/i915/intel_huc.c
@@ -122,7 +122,7 @@ int intel_huc_check_status(struct intel_huc *huc)
 
 	intel_runtime_pm_get(dev_priv);
 	status = I915_READ(HUC_STATUS2) & HUC_FW_VERIFIED;
-	intel_runtime_pm_put(dev_priv);
+	intel_runtime_pm_put_unchecked(dev_priv);
 
 	return status;
 }
diff --git a/drivers/gpu/drm/i915/intel_panel.c b/drivers/gpu/drm/i915/intel_panel.c
index ee3e0842d542..c2b7455a023e 100644
--- a/drivers/gpu/drm/i915/intel_panel.c
+++ b/drivers/gpu/drm/i915/intel_panel.c
@@ -1213,7 +1213,7 @@ static int intel_backlight_device_get_brightness(struct backlight_device *bd)
 	ret = scale_hw_to_user(connector, hw_level, bd->props.max_brightness);
 
 	drm_modeset_unlock(&dev->mode_config.connection_mutex);
-	intel_runtime_pm_put(dev_priv);
+	intel_runtime_pm_put_unchecked(dev_priv);
 
 	return ret;
 }
diff --git a/drivers/gpu/drm/i915/intel_runtime_pm.c b/drivers/gpu/drm/i915/intel_runtime_pm.c
index 67d71cc604f1..38c22fb7152e 100644
--- a/drivers/gpu/drm/i915/intel_runtime_pm.c
+++ b/drivers/gpu/drm/i915/intel_runtime_pm.c
@@ -62,7 +62,7 @@ static void init_intel_runtime_pm_wakeref(struct drm_i915_private *i915)
 	spin_lock_init(&i915->runtime_pm.debug_lock);
 }
 
-static noinline void
+static noinline depot_stack_handle_t
 track_intel_runtime_pm_wakeref(struct drm_i915_private *i915)
 {
 	struct i915_runtime_pm *rpm = &i915->runtime_pm;
@@ -76,7 +76,7 @@ track_intel_runtime_pm_wakeref(struct drm_i915_private *i915)
 	depot_stack_handle_t stack, *stacks;
 
 	if (!HAS_RUNTIME_PM(i915))
-		return;
+		return -1;
 
 	save_stack_trace(&trace);
 	if (trace.nr_entries &&
@@ -85,7 +85,7 @@ track_intel_runtime_pm_wakeref(struct drm_i915_private *i915)
 
 	stack = depot_save_stack(&trace, GFP_NOWAIT | __GFP_NOWARN);
 	if (!stack)
-		return;
+		return -1;
 
 	spin_lock_irqsave(&rpm->debug_lock, flags);
 	stacks = krealloc(rpm->debug_owners,
@@ -94,8 +94,55 @@ track_intel_runtime_pm_wakeref(struct drm_i915_private *i915)
 	if (stacks) {
 		stacks[rpm->debug_count++] = stack;
 		rpm->debug_owners = stacks;
+	} else {
+		stack = -1;
 	}
 	spin_unlock_irqrestore(&rpm->debug_lock, flags);
+
+	return stack;
+}
+
+static void cancel_intel_runtime_pm_wakeref(struct drm_i915_private *i915,
+					    depot_stack_handle_t stack)
+{
+	struct i915_runtime_pm *rpm = &i915->runtime_pm;
+	unsigned long flags, n;
+	bool found = false;
+
+	if (unlikely(stack == -1))
+		return;
+
+	spin_lock_irqsave(&rpm->debug_lock, flags);
+	for (n = rpm->debug_count; n--; ) {
+		if (rpm->debug_owners[n] == stack) {
+			memmove(rpm->debug_owners + n,
+				rpm->debug_owners + n + 1,
+				(--rpm->debug_count - n) * sizeof(stack));
+			found = true;
+			break;
+		}
+	}
+	spin_unlock_irqrestore(&rpm->debug_lock, flags);
+
+	if (WARN(!found,
+		 "Unmatched wakeref (tracking %lu), count %u\n",
+		 rpm->debug_count, atomic_read(&rpm->wakeref_count))) {
+		unsigned long entries[STACKDEPTH];
+		struct stack_trace trace = {
+			.entries = entries,
+			.max_entries = ARRAY_SIZE(entries),
+		};
+		char *buf;
+
+		buf = kmalloc(PAGE_SIZE, GFP_KERNEL);
+		if (!buf)
+			return;
+
+		depot_fetch_stack(stack, &trace);
+		snprint_stack_trace(buf, PAGE_SIZE, &trace, 0);
+		DRM_DEBUG_DRIVER("wakeref %x from\n%s", stack, buf);
+		kfree(buf);
+	}
 }
 
 static void untrack_intel_runtime_pm_wakeref(struct drm_i915_private *i915)
@@ -220,8 +267,10 @@ static void init_intel_runtime_pm_wakeref(struct drm_i915_private *i915)
 {
 }
 
-static void track_intel_runtime_pm_wakeref(struct drm_i915_private *i915)
+static depot_stack_handle_t
+track_intel_runtime_pm_wakeref(struct drm_i915_private *i915)
 {
+	return -1;
 }
 
 static void untrack_intel_runtime_pm_wakeref(struct drm_i915_private *i915)
@@ -1823,7 +1872,7 @@ bool intel_display_power_get_if_enabled(struct drm_i915_private *dev_priv,
 	mutex_unlock(&power_domains->lock);
 
 	if (!is_enabled)
-		intel_runtime_pm_put(dev_priv);
+		intel_runtime_pm_put_unchecked(dev_priv);
 
 	return is_enabled;
 }
@@ -1857,7 +1906,7 @@ void intel_display_power_put(struct drm_i915_private *dev_priv,
 
 	mutex_unlock(&power_domains->lock);
 
-	intel_runtime_pm_put(dev_priv);
+	intel_runtime_pm_put_unchecked(dev_priv);
 }
 
 #define I830_PIPES_POWER_DOMAINS (		\
@@ -3965,7 +4014,7 @@ void intel_power_domains_init_hw(struct drm_i915_private *dev_priv, bool resume)
 void intel_power_domains_fini_hw(struct drm_i915_private *dev_priv)
 {
 	/* Keep the power well enabled, but cancel its rpm wakeref. */
-	intel_runtime_pm_put(dev_priv);
+	intel_runtime_pm_put_unchecked(dev_priv);
 
 	/* Remove the refcount we took to keep power well support disabled. */
 	if (!i915_modparams.disable_power_well)
@@ -4179,7 +4228,7 @@ static void intel_power_domains_verify_state(struct drm_i915_private *dev_priv)
  * Any runtime pm reference obtained by this function must have a symmetric
  * call to intel_runtime_pm_put() to release the reference again.
  */
-void intel_runtime_pm_get(struct drm_i915_private *i915)
+intel_wakeref_t intel_runtime_pm_get(struct drm_i915_private *i915)
 {
 	struct pci_dev *pdev = i915->drm.pdev;
 	struct device *kdev = &pdev->dev;
@@ -4191,7 +4240,7 @@ void intel_runtime_pm_get(struct drm_i915_private *i915)
 	atomic_inc(&i915->runtime_pm.wakeref_count);
 	assert_rpm_wakelock_held(i915);
 
-	track_intel_runtime_pm_wakeref(i915);
+	return track_intel_runtime_pm_wakeref(i915);
 }
 
 /**
@@ -4207,7 +4256,7 @@ void intel_runtime_pm_get(struct drm_i915_private *i915)
  *
  * Returns: True if the wakeref was acquired, or False otherwise.
  */
-bool intel_runtime_pm_get_if_in_use(struct drm_i915_private *i915)
+intel_wakeref_t intel_runtime_pm_get_if_in_use(struct drm_i915_private *i915)
 {
 	if (IS_ENABLED(CONFIG_PM)) {
 		struct pci_dev *pdev = i915->drm.pdev;
@@ -4220,15 +4269,13 @@ bool intel_runtime_pm_get_if_in_use(struct drm_i915_private *i915)
 		 * atm to the late/early system suspend/resume handlers.
 		 */
 		if (pm_runtime_get_if_in_use(kdev) <= 0)
-			return false;
+			return 0;
 	}
 
 	atomic_inc(&i915->runtime_pm.wakeref_count);
 	assert_rpm_wakelock_held(i915);
 
-	track_intel_runtime_pm_wakeref(i915);
-
-	return true;
+	return track_intel_runtime_pm_wakeref(i915);
 }
 
 /**
@@ -4248,7 +4295,7 @@ bool intel_runtime_pm_get_if_in_use(struct drm_i915_private *i915)
  * Any runtime pm reference obtained by this function must have a symmetric
  * call to intel_runtime_pm_put() to release the reference again.
  */
-void intel_runtime_pm_get_noresume(struct drm_i915_private *i915)
+intel_wakeref_t intel_runtime_pm_get_noresume(struct drm_i915_private *i915)
 {
 	struct pci_dev *pdev = i915->drm.pdev;
 	struct device *kdev = &pdev->dev;
@@ -4258,7 +4305,7 @@ void intel_runtime_pm_get_noresume(struct drm_i915_private *i915)
 
 	atomic_inc(&i915->runtime_pm.wakeref_count);
 
-	track_intel_runtime_pm_wakeref(i915);
+	return track_intel_runtime_pm_wakeref(i915);
 }
 
 /**
@@ -4269,7 +4316,7 @@ void intel_runtime_pm_get_noresume(struct drm_i915_private *i915)
  * intel_runtime_pm_get() and might power down the corresponding
  * hardware block right away if this is the last reference.
  */
-void intel_runtime_pm_put(struct drm_i915_private *i915)
+void intel_runtime_pm_put_unchecked(struct drm_i915_private *i915)
 {
 	struct pci_dev *pdev = i915->drm.pdev;
 	struct device *kdev = &pdev->dev;
@@ -4282,6 +4329,14 @@ void intel_runtime_pm_put(struct drm_i915_private *i915)
 	pm_runtime_put_autosuspend(kdev);
 }
 
+#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_RUNTIME_PM)
+void intel_runtime_pm_put(struct drm_i915_private *i915, intel_wakeref_t wref)
+{
+	cancel_intel_runtime_pm_wakeref(i915, wref);
+	intel_runtime_pm_put_unchecked(i915);
+}
+#endif
+
 /**
  * intel_runtime_pm_enable - enable runtime pm
  * @i915: i915 device instance
diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c
index fff468f17d2d..8d4c76ac0e7d 100644
--- a/drivers/gpu/drm/i915/intel_uncore.c
+++ b/drivers/gpu/drm/i915/intel_uncore.c
@@ -1709,7 +1709,7 @@ int i915_reg_read_ioctl(struct drm_device *dev,
 		reg->val = I915_READ8(entry->offset_ldw);
 	else
 		ret = -EINVAL;
-	intel_runtime_pm_put(dev_priv);
+	intel_runtime_pm_put_unchecked(dev_priv);
 
 	return ret;
 }
diff --git a/drivers/gpu/drm/i915/selftests/huge_pages.c b/drivers/gpu/drm/i915/selftests/huge_pages.c
index a0c7cbc212ba..731dfd3d3fc8 100644
--- a/drivers/gpu/drm/i915/selftests/huge_pages.c
+++ b/drivers/gpu/drm/i915/selftests/huge_pages.c
@@ -1789,7 +1789,7 @@ int i915_gem_huge_page_live_selftests(struct drm_i915_private *dev_priv)
 	err = i915_subtests(tests, ctx);
 
 out_unlock:
-	intel_runtime_pm_put(dev_priv);
+	intel_runtime_pm_put_unchecked(dev_priv);
 	mutex_unlock(&dev_priv->drm.struct_mutex);
 
 	mock_file_free(dev_priv, file);
diff --git a/drivers/gpu/drm/i915/selftests/i915_gem.c b/drivers/gpu/drm/i915/selftests/i915_gem.c
index bdcc53e15e75..762e1a7125f5 100644
--- a/drivers/gpu/drm/i915/selftests/i915_gem.c
+++ b/drivers/gpu/drm/i915/selftests/i915_gem.c
@@ -32,7 +32,7 @@ static int switch_to_context(struct drm_i915_private *i915,
 		i915_request_add(rq);
 	}
 
-	intel_runtime_pm_put(i915);
+	intel_runtime_pm_put_unchecked(i915);
 
 	return err;
 }
@@ -76,7 +76,7 @@ static void simulate_hibernate(struct drm_i915_private *i915)
 	 */
 	trash_stolen(i915);
 
-	intel_runtime_pm_put(i915);
+	intel_runtime_pm_put_unchecked(i915);
 }
 
 static int pm_prepare(struct drm_i915_private *i915)
@@ -98,7 +98,7 @@ static void pm_suspend(struct drm_i915_private *i915)
 	i915_gem_suspend_gtt_mappings(i915);
 	i915_gem_suspend_late(i915);
 
-	intel_runtime_pm_put(i915);
+	intel_runtime_pm_put_unchecked(i915);
 }
 
 static void pm_hibernate(struct drm_i915_private *i915)
@@ -110,7 +110,7 @@ static void pm_hibernate(struct drm_i915_private *i915)
 	i915_gem_freeze(i915);
 	i915_gem_freeze_late(i915);
 
-	intel_runtime_pm_put(i915);
+	intel_runtime_pm_put_unchecked(i915);
 }
 
 static void pm_resume(struct drm_i915_private *i915)
@@ -125,7 +125,7 @@ static void pm_resume(struct drm_i915_private *i915)
 	i915_gem_sanitize(i915);
 	i915_gem_resume(i915);
 
-	intel_runtime_pm_put(i915);
+	intel_runtime_pm_put_unchecked(i915);
 }
 
 static int igt_gem_suspend(void *arg)
diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_coherency.c b/drivers/gpu/drm/i915/selftests/i915_gem_coherency.c
index f7392c1ffe75..eea4fc2445ae 100644
--- a/drivers/gpu/drm/i915/selftests/i915_gem_coherency.c
+++ b/drivers/gpu/drm/i915/selftests/i915_gem_coherency.c
@@ -376,7 +376,7 @@ static int igt_gem_coherency(void *arg)
 		}
 	}
 unlock:
-	intel_runtime_pm_put(i915);
+	intel_runtime_pm_put_unchecked(i915);
 	mutex_unlock(&i915->drm.struct_mutex);
 	kfree(offsets);
 	return err;
diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/selftests/i915_gem_context.c
index d00cdf3c2939..6e1a0711d201 100644
--- a/drivers/gpu/drm/i915/selftests/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/selftests/i915_gem_context.c
@@ -243,7 +243,7 @@ static int live_nop_switch(void *arg)
 	}
 
 out_unlock:
-	intel_runtime_pm_put(i915);
+	intel_runtime_pm_put_unchecked(i915);
 	mutex_unlock(&i915->drm.struct_mutex);
 	mock_file_free(i915, file);
 	return err;
@@ -609,7 +609,7 @@ static int igt_ctx_exec(void *arg)
 
 			intel_runtime_pm_get(i915);
 			err = gpu_fill(obj, ctx, engine, dw);
-			intel_runtime_pm_put(i915);
+			intel_runtime_pm_put_unchecked(i915);
 			if (err) {
 				pr_err("Failed to fill dword %lu [%lu/%lu] with gpu (%s) in ctx %u [full-ppgtt? %s], err=%d\n",
 				       ndwords, dw, max_dwords(obj),
@@ -715,7 +715,7 @@ static int igt_ctx_readonly(void *arg)
 
 			intel_runtime_pm_get(i915);
 			err = gpu_fill(obj, ctx, engine, dw);
-			intel_runtime_pm_put(i915);
+			intel_runtime_pm_put_unchecked(i915);
 			if (err) {
 				pr_err("Failed to fill dword %lu [%lu/%lu] with gpu (%s) in ctx %u [full-ppgtt? %s], err=%d\n",
 				       ndwords, dw, max_dwords(obj),
@@ -1067,7 +1067,7 @@ static int igt_vm_isolation(void *arg)
 		count, RUNTIME_INFO(i915)->num_rings);
 
 out_rpm:
-	intel_runtime_pm_put(i915);
+	intel_runtime_pm_put_unchecked(i915);
 out_unlock:
 	if (end_live_test(&t))
 		err = -EIO;
@@ -1200,7 +1200,7 @@ static int igt_switch_to_kernel_context(void *arg)
 	if (igt_flush_test(i915, I915_WAIT_LOCKED))
 		err = -EIO;
 
-	intel_runtime_pm_put(i915);
+	intel_runtime_pm_put_unchecked(i915);
 	mutex_unlock(&i915->drm.struct_mutex);
 
 	kernel_context_close(ctx);
diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_evict.c b/drivers/gpu/drm/i915/selftests/i915_gem_evict.c
index 4365979d8222..8d22f73a9b63 100644
--- a/drivers/gpu/drm/i915/selftests/i915_gem_evict.c
+++ b/drivers/gpu/drm/i915/selftests/i915_gem_evict.c
@@ -464,7 +464,7 @@ static int igt_evict_contexts(void *arg)
 	}
 	if (drm_mm_node_allocated(&hole))
 		drm_mm_remove_node(&hole);
-	intel_runtime_pm_put(i915);
+	intel_runtime_pm_put_unchecked(i915);
 	mutex_unlock(&i915->drm.struct_mutex);
 
 	return err;
diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c
index a9ed0ecc94e2..87cb0602a5fc 100644
--- a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c
@@ -295,7 +295,7 @@ static int lowlevel_hole(struct drm_i915_private *i915,
 
 			intel_runtime_pm_get(i915);
 			vm->insert_entries(vm, &mock_vma, I915_CACHE_NONE, 0);
-			intel_runtime_pm_put(i915);
+			intel_runtime_pm_put_unchecked(i915);
 		}
 		count = n;
 
@@ -1216,7 +1216,7 @@ static int igt_ggtt_page(void *arg)
 	kfree(order);
 out_remove:
 	ggtt->vm.clear_range(&ggtt->vm, tmp.start, tmp.size);
-	intel_runtime_pm_put(i915);
+	intel_runtime_pm_put_unchecked(i915);
 	drm_mm_remove_node(&tmp);
 out_unpin:
 	i915_gem_object_unpin_pages(obj);
diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_object.c b/drivers/gpu/drm/i915/selftests/i915_gem_object.c
index be7ecb66ad11..b03890c590d7 100644
--- a/drivers/gpu/drm/i915/selftests/i915_gem_object.c
+++ b/drivers/gpu/drm/i915/selftests/i915_gem_object.c
@@ -444,7 +444,7 @@ next_tiling: ;
 	}
 
 out_unlock:
-	intel_runtime_pm_put(i915);
+	intel_runtime_pm_put_unchecked(i915);
 	mutex_unlock(&i915->drm.struct_mutex);
 	i915_gem_object_unpin_pages(obj);
 out:
@@ -508,7 +508,7 @@ static void disable_retire_worker(struct drm_i915_private *i915)
 	if (!i915->gt.active_requests++) {
 		intel_runtime_pm_get(i915);
 		i915_gem_unpark(i915);
-		intel_runtime_pm_put(i915);
+		intel_runtime_pm_put_unchecked(i915);
 	}
 	mutex_unlock(&i915->drm.struct_mutex);
 	cancel_delayed_work_sync(&i915->gt.retire_work);
@@ -590,7 +590,7 @@ static int igt_mmap_offset_exhaustion(void *arg)
 		mutex_lock(&i915->drm.struct_mutex);
 		intel_runtime_pm_get(i915);
 		err = make_obj_busy(obj);
-		intel_runtime_pm_put(i915);
+		intel_runtime_pm_put_unchecked(i915);
 		mutex_unlock(&i915->drm.struct_mutex);
 		if (err) {
 			pr_err("[loop %d] Failed to busy the object\n", loop);
diff --git a/drivers/gpu/drm/i915/selftests/i915_request.c b/drivers/gpu/drm/i915/selftests/i915_request.c
index 07e557815308..e8880cabd5c7 100644
--- a/drivers/gpu/drm/i915/selftests/i915_request.c
+++ b/drivers/gpu/drm/i915/selftests/i915_request.c
@@ -403,7 +403,7 @@ static int live_nop_request(void *arg)
 	}
 
 out_unlock:
-	intel_runtime_pm_put(i915);
+	intel_runtime_pm_put_unchecked(i915);
 	mutex_unlock(&i915->drm.struct_mutex);
 	return err;
 }
@@ -553,7 +553,7 @@ static int live_empty_request(void *arg)
 	i915_vma_unpin(batch);
 	i915_vma_put(batch);
 out_unlock:
-	intel_runtime_pm_put(i915);
+	intel_runtime_pm_put_unchecked(i915);
 	mutex_unlock(&i915->drm.struct_mutex);
 	return err;
 }
@@ -731,7 +731,7 @@ static int live_all_engines(void *arg)
 	i915_vma_unpin(batch);
 	i915_vma_put(batch);
 out_unlock:
-	intel_runtime_pm_put(i915);
+	intel_runtime_pm_put_unchecked(i915);
 	mutex_unlock(&i915->drm.struct_mutex);
 	return err;
 }
@@ -860,7 +860,7 @@ static int live_sequential_engines(void *arg)
 		i915_request_put(request[id]);
 	}
 out_unlock:
-	intel_runtime_pm_put(i915);
+	intel_runtime_pm_put_unchecked(i915);
 	mutex_unlock(&i915->drm.struct_mutex);
 	return err;
 }
diff --git a/drivers/gpu/drm/i915/selftests/intel_guc.c b/drivers/gpu/drm/i915/selftests/intel_guc.c
index 32cba4cae31a..3590ba3d8897 100644
--- a/drivers/gpu/drm/i915/selftests/intel_guc.c
+++ b/drivers/gpu/drm/i915/selftests/intel_guc.c
@@ -225,7 +225,7 @@ static int igt_guc_clients(void *args)
 	guc_clients_create(guc);
 	guc_clients_enable(guc);
 unlock:
-	intel_runtime_pm_put(dev_priv);
+	intel_runtime_pm_put_unchecked(dev_priv);
 	mutex_unlock(&dev_priv->drm.struct_mutex);
 	return err;
 }
@@ -337,7 +337,7 @@ static int igt_guc_doorbells(void *arg)
 			guc_client_free(clients[i]);
 		}
 unlock:
-	intel_runtime_pm_put(dev_priv);
+	intel_runtime_pm_put_unchecked(dev_priv);
 	mutex_unlock(&dev_priv->drm.struct_mutex);
 	return err;
 }
diff --git a/drivers/gpu/drm/i915/selftests/intel_hangcheck.c b/drivers/gpu/drm/i915/selftests/intel_hangcheck.c
index 0aadbd9c7d56..33bd3c4b6fa3 100644
--- a/drivers/gpu/drm/i915/selftests/intel_hangcheck.c
+++ b/drivers/gpu/drm/i915/selftests/intel_hangcheck.c
@@ -402,7 +402,7 @@ static int igt_wedged_reset(void *arg)
 	i915_reset(i915, ALL_ENGINES, NULL);
 	GEM_BUG_ON(test_bit(I915_RESET_HANDOFF, &i915->gpu_error.flags));
 
-	intel_runtime_pm_put(i915);
+	intel_runtime_pm_put_unchecked(i915);
 	mutex_unlock(&i915->drm.struct_mutex);
 	igt_global_reset_unlock(i915);
 
@@ -1636,7 +1636,7 @@ static int igt_atomic_reset(void *arg)
 	force_reset(i915);
 
 unlock:
-	intel_runtime_pm_put(i915);
+	intel_runtime_pm_put_unchecked(i915);
 	mutex_unlock(&i915->drm.struct_mutex);
 	igt_global_reset_unlock(i915);
 
@@ -1679,7 +1679,7 @@ int intel_hangcheck_live_selftests(struct drm_i915_private *i915)
 	mutex_unlock(&i915->drm.struct_mutex);
 
 	i915_modparams.enable_hangcheck = saved_hangcheck;
-	intel_runtime_pm_put(i915);
+	intel_runtime_pm_put_unchecked(i915);
 
 	return err;
 }
diff --git a/drivers/gpu/drm/i915/selftests/intel_lrc.c b/drivers/gpu/drm/i915/selftests/intel_lrc.c
index 00caaa00f02f..ac1b18a17f3c 100644
--- a/drivers/gpu/drm/i915/selftests/intel_lrc.c
+++ b/drivers/gpu/drm/i915/selftests/intel_lrc.c
@@ -65,7 +65,7 @@ static int live_sanitycheck(void *arg)
 	igt_spinner_fini(&spin);
 err_unlock:
 	igt_flush_test(i915, I915_WAIT_LOCKED);
-	intel_runtime_pm_put(i915);
+	intel_runtime_pm_put_unchecked(i915);
 	mutex_unlock(&i915->drm.struct_mutex);
 	return err;
 }
@@ -158,7 +158,7 @@ static int live_preempt(void *arg)
 	igt_spinner_fini(&spin_hi);
 err_unlock:
 	igt_flush_test(i915, I915_WAIT_LOCKED);
-	intel_runtime_pm_put(i915);
+	intel_runtime_pm_put_unchecked(i915);
 	mutex_unlock(&i915->drm.struct_mutex);
 	return err;
 }
@@ -251,7 +251,7 @@ static int live_late_preempt(void *arg)
 	igt_spinner_fini(&spin_hi);
 err_unlock:
 	igt_flush_test(i915, I915_WAIT_LOCKED);
-	intel_runtime_pm_put(i915);
+	intel_runtime_pm_put_unchecked(i915);
 	mutex_unlock(&i915->drm.struct_mutex);
 	return err;
 
@@ -374,7 +374,7 @@ static int live_preempt_hang(void *arg)
 	igt_spinner_fini(&spin_hi);
 err_unlock:
 	igt_flush_test(i915, I915_WAIT_LOCKED);
-	intel_runtime_pm_put(i915);
+	intel_runtime_pm_put_unchecked(i915);
 	mutex_unlock(&i915->drm.struct_mutex);
 	return err;
 }
@@ -627,7 +627,7 @@ static int live_preempt_smoke(void *arg)
 err_batch:
 	i915_gem_object_put(smoke.batch);
 err_unlock:
-	intel_runtime_pm_put(smoke.i915);
+	intel_runtime_pm_put_unchecked(smoke.i915);
 	mutex_unlock(&smoke.i915->drm.struct_mutex);
 	kfree(smoke.contexts);
 
diff --git a/drivers/gpu/drm/i915/selftests/intel_workarounds.c b/drivers/gpu/drm/i915/selftests/intel_workarounds.c
index c2b3cd8fcc34..54f5c2de3d08 100644
--- a/drivers/gpu/drm/i915/selftests/intel_workarounds.c
+++ b/drivers/gpu/drm/i915/selftests/intel_workarounds.c
@@ -49,7 +49,7 @@ read_nonprivs(struct i915_gem_context *ctx, struct intel_engine_cs *engine)
 
 	intel_runtime_pm_get(engine->i915);
 	rq = i915_request_alloc(engine, ctx);
-	intel_runtime_pm_put(engine->i915);
+	intel_runtime_pm_put_unchecked(engine->i915);
 	if (IS_ERR(rq)) {
 		err = PTR_ERR(rq);
 		goto err_pin;
@@ -196,7 +196,7 @@ switch_to_scratch_context(struct intel_engine_cs *engine,
 	else
 		rq = i915_request_alloc(engine, ctx);
 
-	intel_runtime_pm_put(engine->i915);
+	intel_runtime_pm_put_unchecked(engine->i915);
 
 	kernel_context_close(ctx);
 
@@ -255,7 +255,7 @@ static int check_whitelist_across_reset(struct intel_engine_cs *engine,
 
 	intel_runtime_pm_get(i915);
 	err = reset(engine);
-	intel_runtime_pm_put(i915);
+	intel_runtime_pm_put_unchecked(i915);
 
 	if (want_spin) {
 		igt_spinner_end(&spin);
@@ -364,7 +364,7 @@ live_gpu_reset_gt_engine_workarounds(void *arg)
 	ok = verify_gt_engine_wa(i915, "after reset");
 
 out:
-	intel_runtime_pm_put(i915);
+	intel_runtime_pm_put_unchecked(i915);
 	igt_global_reset_unlock(i915);
 
 	return ok ? 0 : -ESRCH;
@@ -443,7 +443,7 @@ live_engine_reset_gt_engine_workarounds(void *arg)
 	}
 
 err:
-	intel_runtime_pm_put(i915);
+	intel_runtime_pm_put_unchecked(i915);
 	igt_global_reset_unlock(i915);
 	kernel_context_close(ctx);
 
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 111+ messages in thread

* [PATCH 05/46] drm/i915: Track GT wakeref
  2019-01-07 11:54 [PATCH 01/46] drm/i915: Return immediately if trylock fails for direct-reclaim Chris Wilson
                   ` (2 preceding siblings ...)
  2019-01-07 11:54 ` [PATCH 04/46] drm/i915: Markup paired operations on wakerefs Chris Wilson
@ 2019-01-07 11:54 ` Chris Wilson
  2019-01-09  9:52   ` Mika Kuoppala
  2019-01-07 11:54 ` [PATCH 06/46] drm/i915: Track the rpm wakerefs for error handling Chris Wilson
                   ` (46 subsequent siblings)
  50 siblings, 1 reply; 111+ messages in thread
From: Chris Wilson @ 2019-01-07 11:54 UTC (permalink / raw)
  To: intel-gfx; +Cc: Jani Nikula

Record the wakeref used for keeping the device awake as the GPU is
executing requests and be sure to cancel the tracking upon parking.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Jani Nikula <jani.nikula@intel.com>
---
 drivers/gpu/drm/i915/i915_drv.h |  2 +-
 drivers/gpu/drm/i915/i915_gem.c | 11 +++++++----
 2 files changed, 8 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 464ff89d1464..a20bd2ec48de 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1966,7 +1966,7 @@ struct drm_i915_private {
 		 * In order to reduce the effect on performance, there
 		 * is a slight delay before we do so.
 		 */
-		bool awake;
+		intel_wakeref_t awake;
 
 		/**
 		 * The number of times we have woken up.
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 5ac32ea4c8fa..27f207cbabd9 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -139,6 +139,8 @@ int i915_mutex_lock_interruptible(struct drm_device *dev)
 
 static u32 __i915_gem_park(struct drm_i915_private *i915)
 {
+	intel_wakeref_t wakeref;
+
 	GEM_TRACE("\n");
 
 	lockdep_assert_held(&i915->drm.struct_mutex);
@@ -169,14 +171,15 @@ static u32 __i915_gem_park(struct drm_i915_private *i915)
 	i915_pmu_gt_parked(i915);
 	i915_vma_parked(i915);
 
-	i915->gt.awake = false;
+	wakeref = fetch_and_zero(&i915->gt.awake);
+	GEM_BUG_ON(!wakeref);
 
 	if (INTEL_GEN(i915) >= 6)
 		gen6_rps_idle(i915);
 
 	intel_display_power_put(i915, POWER_DOMAIN_GT_IRQ);
 
-	intel_runtime_pm_put_unchecked(i915);
+	intel_runtime_pm_put(i915, wakeref);
 
 	return i915->gt.epoch;
 }
@@ -205,7 +208,8 @@ void i915_gem_unpark(struct drm_i915_private *i915)
 	if (i915->gt.awake)
 		return;
 
-	intel_runtime_pm_get_noresume(i915);
+	i915->gt.awake = intel_runtime_pm_get_noresume(i915);
+	GEM_BUG_ON(!i915->gt.awake);
 
 	/*
 	 * It seems that the DMC likes to transition between the DC states a lot
@@ -220,7 +224,6 @@ void i915_gem_unpark(struct drm_i915_private *i915)
 	 */
 	intel_display_power_get(i915, POWER_DOMAIN_GT_IRQ);
 
-	i915->gt.awake = true;
 	if (unlikely(++i915->gt.epoch == 0)) /* keep 0 as invalid */
 		i915->gt.epoch = 1;
 
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 111+ messages in thread

* [PATCH 06/46] drm/i915: Track the rpm wakerefs for error handling
  2019-01-07 11:54 [PATCH 01/46] drm/i915: Return immediately if trylock fails for direct-reclaim Chris Wilson
                   ` (3 preceding siblings ...)
  2019-01-07 11:54 ` [PATCH 05/46] drm/i915: Track GT wakeref Chris Wilson
@ 2019-01-07 11:54 ` Chris Wilson
  2019-01-09 10:12   ` Mika Kuoppala
  2019-01-07 11:54 ` [PATCH 07/46] drm/i915: Mark up sysfs with rpm wakeref tracking Chris Wilson
                   ` (45 subsequent siblings)
  50 siblings, 1 reply; 111+ messages in thread
From: Chris Wilson @ 2019-01-07 11:54 UTC (permalink / raw)
  To: intel-gfx; +Cc: Jani Nikula

Keep hold of the local wakeref used in error handling, to cancel
the tracking upon release so that leaks can be identified.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Jani Nikula <jani.nikula@intel.com>
---
 drivers/gpu/drm/i915/i915_irq.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index 72b799c5e8f6..3272bd02c3cf 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -3292,6 +3292,7 @@ void i915_handle_error(struct drm_i915_private *dev_priv,
 		       const char *fmt, ...)
 {
 	struct intel_engine_cs *engine;
+	intel_wakeref_t wakeref;
 	unsigned int tmp;
 	char error_msg[80];
 	char *msg = NULL;
@@ -3313,7 +3314,7 @@ void i915_handle_error(struct drm_i915_private *dev_priv,
 	 * isn't the case at least when we get here by doing a
 	 * simulated reset via debugfs, so get an RPM reference.
 	 */
-	intel_runtime_pm_get(dev_priv);
+	wakeref = intel_runtime_pm_get(dev_priv);
 
 	engine_mask &= INTEL_INFO(dev_priv)->ring_mask;
 
@@ -3375,7 +3376,7 @@ void i915_handle_error(struct drm_i915_private *dev_priv,
 	wake_up_all(&dev_priv->gpu_error.reset_queue);
 
 out:
-	intel_runtime_pm_put_unchecked(dev_priv);
+	intel_runtime_pm_put(dev_priv, wakeref);
 }
 
 /* Called from drm generic code, passed 'crtc' which
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 111+ messages in thread

* [PATCH 07/46] drm/i915: Mark up sysfs with rpm wakeref tracking
  2019-01-07 11:54 [PATCH 01/46] drm/i915: Return immediately if trylock fails for direct-reclaim Chris Wilson
                   ` (4 preceding siblings ...)
  2019-01-07 11:54 ` [PATCH 06/46] drm/i915: Track the rpm wakerefs for error handling Chris Wilson
@ 2019-01-07 11:54 ` Chris Wilson
  2019-01-09 10:13   ` Mika Kuoppala
  2019-01-07 11:54 ` [PATCH 08/46] drm/i915: Mark up debugfs " Chris Wilson
                   ` (44 subsequent siblings)
  50 siblings, 1 reply; 111+ messages in thread
From: Chris Wilson @ 2019-01-07 11:54 UTC (permalink / raw)
  To: intel-gfx; +Cc: Jani Nikula

As sysfs has a simple pattern of taking a rpm wakeref around the user
access, we can track the local reference and drop it as soon as
possible.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Jani Nikula <jani.nikula@intel.com>
---
 drivers/gpu/drm/i915/i915_sysfs.c | 24 ++++++++++++++----------
 1 file changed, 14 insertions(+), 10 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_sysfs.c b/drivers/gpu/drm/i915/i915_sysfs.c
index 53c20e103d56..2cbbf165d179 100644
--- a/drivers/gpu/drm/i915/i915_sysfs.c
+++ b/drivers/gpu/drm/i915/i915_sysfs.c
@@ -42,11 +42,12 @@ static inline struct drm_i915_private *kdev_minor_to_i915(struct device *kdev)
 static u32 calc_residency(struct drm_i915_private *dev_priv,
 			  i915_reg_t reg)
 {
+	intel_wakeref_t wakeref;
 	u64 res;
 
-	intel_runtime_pm_get(dev_priv);
+	wakeref = intel_runtime_pm_get(dev_priv);
 	res = intel_rc6_residency_us(dev_priv, reg);
-	intel_runtime_pm_put_unchecked(dev_priv);
+	intel_runtime_pm_put(dev_priv, wakeref);
 
 	return DIV_ROUND_CLOSEST_ULL(res, 1000);
 }
@@ -258,9 +259,10 @@ static ssize_t gt_act_freq_mhz_show(struct device *kdev,
 				    struct device_attribute *attr, char *buf)
 {
 	struct drm_i915_private *dev_priv = kdev_minor_to_i915(kdev);
+	intel_wakeref_t wakeref;
 	int ret;
 
-	intel_runtime_pm_get(dev_priv);
+	wakeref = intel_runtime_pm_get(dev_priv);
 
 	mutex_lock(&dev_priv->pcu_lock);
 	if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) {
@@ -274,7 +276,7 @@ static ssize_t gt_act_freq_mhz_show(struct device *kdev,
 	}
 	mutex_unlock(&dev_priv->pcu_lock);
 
-	intel_runtime_pm_put_unchecked(dev_priv);
+	intel_runtime_pm_put(dev_priv, wakeref);
 
 	return snprintf(buf, PAGE_SIZE, "%d\n", ret);
 }
@@ -354,6 +356,7 @@ static ssize_t gt_max_freq_mhz_store(struct device *kdev,
 {
 	struct drm_i915_private *dev_priv = kdev_minor_to_i915(kdev);
 	struct intel_rps *rps = &dev_priv->gt_pm.rps;
+	intel_wakeref_t wakeref;
 	u32 val;
 	ssize_t ret;
 
@@ -361,7 +364,7 @@ static ssize_t gt_max_freq_mhz_store(struct device *kdev,
 	if (ret)
 		return ret;
 
-	intel_runtime_pm_get(dev_priv);
+	wakeref = intel_runtime_pm_get(dev_priv);
 
 	mutex_lock(&dev_priv->pcu_lock);
 
@@ -371,7 +374,7 @@ static ssize_t gt_max_freq_mhz_store(struct device *kdev,
 	    val > rps->max_freq ||
 	    val < rps->min_freq_softlimit) {
 		mutex_unlock(&dev_priv->pcu_lock);
-		intel_runtime_pm_put_unchecked(dev_priv);
+		intel_runtime_pm_put(dev_priv, wakeref);
 		return -EINVAL;
 	}
 
@@ -392,7 +395,7 @@ static ssize_t gt_max_freq_mhz_store(struct device *kdev,
 
 	mutex_unlock(&dev_priv->pcu_lock);
 
-	intel_runtime_pm_put_unchecked(dev_priv);
+	intel_runtime_pm_put(dev_priv, wakeref);
 
 	return ret ?: count;
 }
@@ -412,6 +415,7 @@ static ssize_t gt_min_freq_mhz_store(struct device *kdev,
 {
 	struct drm_i915_private *dev_priv = kdev_minor_to_i915(kdev);
 	struct intel_rps *rps = &dev_priv->gt_pm.rps;
+	intel_wakeref_t wakeref;
 	u32 val;
 	ssize_t ret;
 
@@ -419,7 +423,7 @@ static ssize_t gt_min_freq_mhz_store(struct device *kdev,
 	if (ret)
 		return ret;
 
-	intel_runtime_pm_get(dev_priv);
+	wakeref = intel_runtime_pm_get(dev_priv);
 
 	mutex_lock(&dev_priv->pcu_lock);
 
@@ -429,7 +433,7 @@ static ssize_t gt_min_freq_mhz_store(struct device *kdev,
 	    val > rps->max_freq ||
 	    val > rps->max_freq_softlimit) {
 		mutex_unlock(&dev_priv->pcu_lock);
-		intel_runtime_pm_put_unchecked(dev_priv);
+		intel_runtime_pm_put(dev_priv, wakeref);
 		return -EINVAL;
 	}
 
@@ -446,7 +450,7 @@ static ssize_t gt_min_freq_mhz_store(struct device *kdev,
 
 	mutex_unlock(&dev_priv->pcu_lock);
 
-	intel_runtime_pm_put_unchecked(dev_priv);
+	intel_runtime_pm_put(dev_priv, wakeref);
 
 	return ret ?: count;
 }
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 111+ messages in thread

* [PATCH 08/46] drm/i915: Mark up debugfs with rpm wakeref tracking
  2019-01-07 11:54 [PATCH 01/46] drm/i915: Return immediately if trylock fails for direct-reclaim Chris Wilson
                   ` (5 preceding siblings ...)
  2019-01-07 11:54 ` [PATCH 07/46] drm/i915: Mark up sysfs with rpm wakeref tracking Chris Wilson
@ 2019-01-07 11:54 ` Chris Wilson
  2019-01-09 10:20   ` Mika Kuoppala
  2019-01-07 11:54 ` [PATCH 09/46] drm/i915/perf: Track the rpm wakeref Chris Wilson
                   ` (43 subsequent siblings)
  50 siblings, 1 reply; 111+ messages in thread
From: Chris Wilson @ 2019-01-07 11:54 UTC (permalink / raw)
  To: intel-gfx; +Cc: Jani Nikula

As debugfs has a simple pattern of taking a rpm wakeref around the user
access, we can track the local reference and drop it as soon as
possible.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Jani Nikula <jani.nikula@intel.com>
---
 drivers/gpu/drm/i915/i915_debugfs.c | 135 +++++++++++++++++-----------
 1 file changed, 82 insertions(+), 53 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index 6b8da14f213b..d667b05e7ca4 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -674,9 +674,10 @@ static int i915_interrupt_info(struct seq_file *m, void *data)
 	struct drm_i915_private *dev_priv = node_to_i915(m->private);
 	struct intel_engine_cs *engine;
 	enum intel_engine_id id;
+	intel_wakeref_t wakeref;
 	int i, pipe;
 
-	intel_runtime_pm_get(dev_priv);
+	wakeref = intel_runtime_pm_get(dev_priv);
 
 	if (IS_CHERRYVIEW(dev_priv)) {
 		seq_printf(m, "Master Interrupt Control:\t%08x\n",
@@ -877,7 +878,7 @@ static int i915_interrupt_info(struct seq_file *m, void *data)
 		}
 	}
 
-	intel_runtime_pm_put_unchecked(dev_priv);
+	intel_runtime_pm_put(dev_priv, wakeref);
 
 	return 0;
 }
@@ -950,10 +951,11 @@ static int i915_gpu_info_open(struct inode *inode, struct file *file)
 {
 	struct drm_i915_private *i915 = inode->i_private;
 	struct i915_gpu_state *gpu;
+	intel_wakeref_t wakeref;
 
-	intel_runtime_pm_get(i915);
+	wakeref = intel_runtime_pm_get(i915);
 	gpu = i915_capture_gpu_state(i915);
-	intel_runtime_pm_put_unchecked(i915);
+	intel_runtime_pm_put(i915, wakeref);
 	if (IS_ERR(gpu))
 		return PTR_ERR(gpu);
 
@@ -1012,9 +1014,10 @@ static int i915_frequency_info(struct seq_file *m, void *unused)
 {
 	struct drm_i915_private *dev_priv = node_to_i915(m->private);
 	struct intel_rps *rps = &dev_priv->gt_pm.rps;
+	intel_wakeref_t wakeref;
 	int ret = 0;
 
-	intel_runtime_pm_get(dev_priv);
+	wakeref = intel_runtime_pm_get(dev_priv);
 
 	if (IS_GEN(dev_priv, 5)) {
 		u16 rgvswctl = I915_READ16(MEMSWCTL);
@@ -1226,7 +1229,7 @@ static int i915_frequency_info(struct seq_file *m, void *unused)
 	seq_printf(m, "Max CD clock frequency: %d kHz\n", dev_priv->max_cdclk_freq);
 	seq_printf(m, "Max pixel clock frequency: %d kHz\n", dev_priv->max_dotclk_freq);
 
-	intel_runtime_pm_put_unchecked(dev_priv);
+	intel_runtime_pm_put(dev_priv, wakeref);
 	return ret;
 }
 
@@ -1265,6 +1268,7 @@ static int i915_hangcheck_info(struct seq_file *m, void *unused)
 	u64 acthd[I915_NUM_ENGINES];
 	u32 seqno[I915_NUM_ENGINES];
 	struct intel_instdone instdone;
+	intel_wakeref_t wakeref;
 	enum intel_engine_id id;
 
 	if (test_bit(I915_WEDGED, &dev_priv->gpu_error.flags))
@@ -1283,7 +1287,7 @@ static int i915_hangcheck_info(struct seq_file *m, void *unused)
 		return 0;
 	}
 
-	intel_runtime_pm_get(dev_priv);
+	wakeref = intel_runtime_pm_get(dev_priv);
 
 	for_each_engine(engine, dev_priv, id) {
 		acthd[id] = intel_engine_get_active_head(engine);
@@ -1292,7 +1296,7 @@ static int i915_hangcheck_info(struct seq_file *m, void *unused)
 
 	intel_engine_get_instdone(dev_priv->engine[RCS], &instdone);
 
-	intel_runtime_pm_put_unchecked(dev_priv);
+	intel_runtime_pm_put(dev_priv, wakeref);
 
 	if (timer_pending(&dev_priv->gpu_error.hangcheck_work.timer))
 		seq_printf(m, "Hangcheck active, timer fires in %dms\n",
@@ -1568,9 +1572,10 @@ static int gen6_drpc_info(struct seq_file *m)
 static int i915_drpc_info(struct seq_file *m, void *unused)
 {
 	struct drm_i915_private *dev_priv = node_to_i915(m->private);
+	intel_wakeref_t wakeref;
 	int err;
 
-	intel_runtime_pm_get(dev_priv);
+	wakeref = intel_runtime_pm_get(dev_priv);
 
 	if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv))
 		err = vlv_drpc_info(m);
@@ -1579,7 +1584,7 @@ static int i915_drpc_info(struct seq_file *m, void *unused)
 	else
 		err = ironlake_drpc_info(m);
 
-	intel_runtime_pm_put_unchecked(dev_priv);
+	intel_runtime_pm_put(dev_priv, wakeref);
 
 	return err;
 }
@@ -1601,11 +1606,12 @@ static int i915_fbc_status(struct seq_file *m, void *unused)
 {
 	struct drm_i915_private *dev_priv = node_to_i915(m->private);
 	struct intel_fbc *fbc = &dev_priv->fbc;
+	intel_wakeref_t wakeref;
 
 	if (!HAS_FBC(dev_priv))
 		return -ENODEV;
 
-	intel_runtime_pm_get(dev_priv);
+	wakeref = intel_runtime_pm_get(dev_priv);
 	mutex_lock(&fbc->lock);
 
 	if (intel_fbc_is_active(dev_priv))
@@ -1632,7 +1638,7 @@ static int i915_fbc_status(struct seq_file *m, void *unused)
 	}
 
 	mutex_unlock(&fbc->lock);
-	intel_runtime_pm_put_unchecked(dev_priv);
+	intel_runtime_pm_put(dev_priv, wakeref);
 
 	return 0;
 }
@@ -1677,11 +1683,12 @@ DEFINE_SIMPLE_ATTRIBUTE(i915_fbc_false_color_fops,
 static int i915_ips_status(struct seq_file *m, void *unused)
 {
 	struct drm_i915_private *dev_priv = node_to_i915(m->private);
+	intel_wakeref_t wakeref;
 
 	if (!HAS_IPS(dev_priv))
 		return -ENODEV;
 
-	intel_runtime_pm_get(dev_priv);
+	wakeref = intel_runtime_pm_get(dev_priv);
 
 	seq_printf(m, "Enabled by kernel parameter: %s\n",
 		   yesno(i915_modparams.enable_ips));
@@ -1695,7 +1702,7 @@ static int i915_ips_status(struct seq_file *m, void *unused)
 			seq_puts(m, "Currently: disabled\n");
 	}
 
-	intel_runtime_pm_put_unchecked(dev_priv);
+	intel_runtime_pm_put(dev_priv, wakeref);
 
 	return 0;
 }
@@ -1703,9 +1710,10 @@ static int i915_ips_status(struct seq_file *m, void *unused)
 static int i915_sr_status(struct seq_file *m, void *unused)
 {
 	struct drm_i915_private *dev_priv = node_to_i915(m->private);
+	intel_wakeref_t wakeref;
 	bool sr_enabled = false;
 
-	intel_runtime_pm_get(dev_priv);
+	wakeref = intel_runtime_pm_get(dev_priv);
 	intel_display_power_get(dev_priv, POWER_DOMAIN_INIT);
 
 	if (INTEL_GEN(dev_priv) >= 9)
@@ -1723,7 +1731,7 @@ static int i915_sr_status(struct seq_file *m, void *unused)
 		sr_enabled = I915_READ(FW_BLC_SELF_VLV) & FW_CSPWRDWNEN;
 
 	intel_display_power_put(dev_priv, POWER_DOMAIN_INIT);
-	intel_runtime_pm_put_unchecked(dev_priv);
+	intel_runtime_pm_put(dev_priv, wakeref);
 
 	seq_printf(m, "self-refresh: %s\n", enableddisabled(sr_enabled));
 
@@ -1735,29 +1743,30 @@ static int i915_emon_status(struct seq_file *m, void *unused)
 	struct drm_i915_private *dev_priv = node_to_i915(m->private);
 	struct drm_device *dev = &dev_priv->drm;
 	unsigned long temp, chipset, gfx;
+	intel_wakeref_t wakeref;
 	int ret;
 
 	if (!IS_GEN(dev_priv, 5))
 		return -ENODEV;
 
-	intel_runtime_pm_get(dev_priv);
-
 	ret = mutex_lock_interruptible(&dev->struct_mutex);
 	if (ret)
 		return ret;
 
+	wakeref = intel_runtime_pm_get(dev_priv);
+
 	temp = i915_mch_val(dev_priv);
 	chipset = i915_chipset_val(dev_priv);
 	gfx = i915_gfx_val(dev_priv);
 	mutex_unlock(&dev->struct_mutex);
 
+	intel_runtime_pm_put(dev_priv, wakeref);
+
 	seq_printf(m, "GMCH temp: %ld\n", temp);
 	seq_printf(m, "Chipset power: %ld\n", chipset);
 	seq_printf(m, "GFX power: %ld\n", gfx);
 	seq_printf(m, "Total power: %ld\n", chipset + gfx);
 
-	intel_runtime_pm_put_unchecked(dev_priv);
-
 	return 0;
 }
 
@@ -1766,13 +1775,14 @@ static int i915_ring_freq_table(struct seq_file *m, void *unused)
 	struct drm_i915_private *dev_priv = node_to_i915(m->private);
 	struct intel_rps *rps = &dev_priv->gt_pm.rps;
 	unsigned int max_gpu_freq, min_gpu_freq;
+	intel_wakeref_t wakeref;
 	int gpu_freq, ia_freq;
 	int ret;
 
 	if (!HAS_LLC(dev_priv))
 		return -ENODEV;
 
-	intel_runtime_pm_get(dev_priv);
+	wakeref = intel_runtime_pm_get(dev_priv);
 
 	ret = mutex_lock_interruptible(&dev_priv->pcu_lock);
 	if (ret)
@@ -1805,7 +1815,7 @@ static int i915_ring_freq_table(struct seq_file *m, void *unused)
 	mutex_unlock(&dev_priv->pcu_lock);
 
 out:
-	intel_runtime_pm_put_unchecked(dev_priv);
+	intel_runtime_pm_put(dev_priv, wakeref);
 	return ret;
 }
 
@@ -1978,8 +1988,9 @@ static const char *swizzle_string(unsigned swizzle)
 static int i915_swizzle_info(struct seq_file *m, void *data)
 {
 	struct drm_i915_private *dev_priv = node_to_i915(m->private);
+	intel_wakeref_t wakeref;
 
-	intel_runtime_pm_get(dev_priv);
+	wakeref = intel_runtime_pm_get(dev_priv);
 
 	seq_printf(m, "bit6 swizzle for X-tiling = %s\n",
 		   swizzle_string(dev_priv->mm.bit_6_swizzle_x));
@@ -2017,7 +2028,7 @@ static int i915_swizzle_info(struct seq_file *m, void *data)
 	if (dev_priv->quirks & QUIRK_PIN_SWIZZLED_PAGES)
 		seq_puts(m, "L-shaped memory detected\n");
 
-	intel_runtime_pm_put_unchecked(dev_priv);
+	intel_runtime_pm_put(dev_priv, wakeref);
 
 	return 0;
 }
@@ -2054,9 +2065,11 @@ static int i915_rps_boost_info(struct seq_file *m, void *data)
 	struct drm_device *dev = &dev_priv->drm;
 	struct intel_rps *rps = &dev_priv->gt_pm.rps;
 	u32 act_freq = rps->cur_freq;
+	intel_wakeref_t wakeref;
 	struct drm_file *file;
 
-	if (intel_runtime_pm_get_if_in_use(dev_priv)) {
+	wakeref = intel_runtime_pm_get_if_in_use(dev_priv);
+	if (wakeref) {
 		if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) {
 			mutex_lock(&dev_priv->pcu_lock);
 			act_freq = vlv_punit_read(dev_priv,
@@ -2067,7 +2080,7 @@ static int i915_rps_boost_info(struct seq_file *m, void *data)
 			act_freq = intel_get_cagf(dev_priv,
 						  I915_READ(GEN6_RPSTAT1));
 		}
-		intel_runtime_pm_put_unchecked(dev_priv);
+		intel_runtime_pm_put(dev_priv, wakeref);
 	}
 
 	seq_printf(m, "RPS enabled? %d\n", rps->enabled);
@@ -2150,6 +2163,7 @@ static int i915_llc(struct seq_file *m, void *data)
 static int i915_huc_load_status_info(struct seq_file *m, void *data)
 {
 	struct drm_i915_private *dev_priv = node_to_i915(m->private);
+	intel_wakeref_t wakeref;
 	struct drm_printer p;
 
 	if (!HAS_HUC(dev_priv))
@@ -2158,9 +2172,9 @@ static int i915_huc_load_status_info(struct seq_file *m, void *data)
 	p = drm_seq_file_printer(m);
 	intel_uc_fw_dump(&dev_priv->huc.fw, &p);
 
-	intel_runtime_pm_get(dev_priv);
+	wakeref = intel_runtime_pm_get(dev_priv);
 	seq_printf(m, "\nHuC status 0x%08x:\n", I915_READ(HUC_STATUS2));
-	intel_runtime_pm_put_unchecked(dev_priv);
+	intel_runtime_pm_put(dev_priv, wakeref);
 
 	return 0;
 }
@@ -2168,6 +2182,7 @@ static int i915_huc_load_status_info(struct seq_file *m, void *data)
 static int i915_guc_load_status_info(struct seq_file *m, void *data)
 {
 	struct drm_i915_private *dev_priv = node_to_i915(m->private);
+	intel_wakeref_t wakeref;
 	struct drm_printer p;
 	u32 tmp, i;
 
@@ -2177,7 +2192,7 @@ static int i915_guc_load_status_info(struct seq_file *m, void *data)
 	p = drm_seq_file_printer(m);
 	intel_uc_fw_dump(&dev_priv->guc.fw, &p);
 
-	intel_runtime_pm_get(dev_priv);
+	wakeref = intel_runtime_pm_get(dev_priv);
 
 	tmp = I915_READ(GUC_STATUS);
 
@@ -2192,7 +2207,7 @@ static int i915_guc_load_status_info(struct seq_file *m, void *data)
 	for (i = 0; i < 16; i++)
 		seq_printf(m, "\t%2d: \t0x%x\n", i, I915_READ(SOFT_SCRATCH(i)));
 
-	intel_runtime_pm_put_unchecked(dev_priv);
+	intel_runtime_pm_put(dev_priv, wakeref);
 
 	return 0;
 }
@@ -2550,6 +2565,7 @@ psr_source_status(struct drm_i915_private *dev_priv, struct seq_file *m)
 static int i915_edp_psr_status(struct seq_file *m, void *data)
 {
 	struct drm_i915_private *dev_priv = node_to_i915(m->private);
+	intel_wakeref_t wakeref;
 	u32 psrperf = 0;
 	bool enabled = false;
 	bool sink_support;
@@ -2562,7 +2578,7 @@ static int i915_edp_psr_status(struct seq_file *m, void *data)
 	if (!sink_support)
 		return 0;
 
-	intel_runtime_pm_get(dev_priv);
+	wakeref = intel_runtime_pm_get(dev_priv);
 
 	mutex_lock(&dev_priv->psr.lock);
 	seq_printf(m, "PSR mode: %s\n",
@@ -2601,7 +2617,7 @@ static int i915_edp_psr_status(struct seq_file *m, void *data)
 			   dev_priv->psr.last_exit);
 	}
 
-	intel_runtime_pm_put_unchecked(dev_priv);
+	intel_runtime_pm_put(dev_priv, wakeref);
 	return 0;
 }
 
@@ -2610,6 +2626,7 @@ i915_edp_psr_debug_set(void *data, u64 val)
 {
 	struct drm_i915_private *dev_priv = data;
 	struct drm_modeset_acquire_ctx ctx;
+	intel_wakeref_t wakeref;
 	int ret;
 
 	if (!CAN_PSR(dev_priv))
@@ -2617,7 +2634,7 @@ i915_edp_psr_debug_set(void *data, u64 val)
 
 	DRM_DEBUG_KMS("Setting PSR debug to %llx\n", val);
 
-	intel_runtime_pm_get(dev_priv);
+	wakeref = intel_runtime_pm_get(dev_priv);
 
 	drm_modeset_acquire_init(&ctx, DRM_MODESET_ACQUIRE_INTERRUPTIBLE);
 
@@ -2632,7 +2649,7 @@ i915_edp_psr_debug_set(void *data, u64 val)
 	drm_modeset_drop_locks(&ctx);
 	drm_modeset_acquire_fini(&ctx);
 
-	intel_runtime_pm_put_unchecked(dev_priv);
+	intel_runtime_pm_put(dev_priv, wakeref);
 
 	return ret;
 }
@@ -2657,15 +2674,16 @@ static int i915_energy_uJ(struct seq_file *m, void *data)
 {
 	struct drm_i915_private *dev_priv = node_to_i915(m->private);
 	unsigned long long power;
+	intel_wakeref_t wakeref;
 	u32 units;
 
 	if (INTEL_GEN(dev_priv) < 6)
 		return -ENODEV;
 
-	intel_runtime_pm_get(dev_priv);
+	wakeref = intel_runtime_pm_get(dev_priv);
 
 	if (rdmsrl_safe(MSR_RAPL_POWER_UNIT, &power)) {
-		intel_runtime_pm_put_unchecked(dev_priv);
+		intel_runtime_pm_put(dev_priv, wakeref);
 		return -ENODEV;
 	}
 
@@ -2673,7 +2691,7 @@ static int i915_energy_uJ(struct seq_file *m, void *data)
 	power = I915_READ(MCH_SECP_NRG_STTS);
 	power = (1000000 * power) >> units; /* convert to uJ */
 
-	intel_runtime_pm_put_unchecked(dev_priv);
+	intel_runtime_pm_put(dev_priv, wakeref);
 
 	seq_printf(m, "%llu", power);
 
@@ -2742,6 +2760,7 @@ static int i915_power_domain_info(struct seq_file *m, void *unused)
 static int i915_dmc_info(struct seq_file *m, void *unused)
 {
 	struct drm_i915_private *dev_priv = node_to_i915(m->private);
+	intel_wakeref_t wakeref;
 	struct intel_csr *csr;
 
 	if (!HAS_CSR(dev_priv))
@@ -2749,7 +2768,7 @@ static int i915_dmc_info(struct seq_file *m, void *unused)
 
 	csr = &dev_priv->csr;
 
-	intel_runtime_pm_get(dev_priv);
+	wakeref = intel_runtime_pm_get(dev_priv);
 
 	seq_printf(m, "fw loaded: %s\n", yesno(csr->dmc_payload != NULL));
 	seq_printf(m, "path: %s\n", csr->fw_path);
@@ -2775,7 +2794,7 @@ static int i915_dmc_info(struct seq_file *m, void *unused)
 	seq_printf(m, "ssp base: 0x%08x\n", I915_READ(CSR_SSP_BASE));
 	seq_printf(m, "htp: 0x%08x\n", I915_READ(CSR_HTP_SKL));
 
-	intel_runtime_pm_put_unchecked(dev_priv);
+	intel_runtime_pm_put(dev_priv, wakeref);
 
 	return 0;
 }
@@ -3065,8 +3084,10 @@ static int i915_display_info(struct seq_file *m, void *unused)
 	struct intel_crtc *crtc;
 	struct drm_connector *connector;
 	struct drm_connector_list_iter conn_iter;
+	intel_wakeref_t wakeref;
+
+	wakeref = intel_runtime_pm_get(dev_priv);
 
-	intel_runtime_pm_get(dev_priv);
 	seq_printf(m, "CRTC info\n");
 	seq_printf(m, "---------\n");
 	for_each_intel_crtc(dev, crtc) {
@@ -3114,7 +3135,7 @@ static int i915_display_info(struct seq_file *m, void *unused)
 	drm_connector_list_iter_end(&conn_iter);
 	mutex_unlock(&dev->mode_config.mutex);
 
-	intel_runtime_pm_put_unchecked(dev_priv);
+	intel_runtime_pm_put(dev_priv, wakeref);
 
 	return 0;
 }
@@ -3123,10 +3144,11 @@ static int i915_engine_info(struct seq_file *m, void *unused)
 {
 	struct drm_i915_private *dev_priv = node_to_i915(m->private);
 	struct intel_engine_cs *engine;
+	intel_wakeref_t wakeref;
 	enum intel_engine_id id;
 	struct drm_printer p;
 
-	intel_runtime_pm_get(dev_priv);
+	wakeref = intel_runtime_pm_get(dev_priv);
 
 	seq_printf(m, "GT awake? %s (epoch %u)\n",
 		   yesno(dev_priv->gt.awake), dev_priv->gt.epoch);
@@ -3139,7 +3161,7 @@ static int i915_engine_info(struct seq_file *m, void *unused)
 	for_each_engine(engine, dev_priv, id)
 		intel_engine_dump(engine, &p, "%s\n", engine->name);
 
-	intel_runtime_pm_put_unchecked(dev_priv);
+	intel_runtime_pm_put(dev_priv, wakeref);
 
 	return 0;
 }
@@ -3252,6 +3274,7 @@ static ssize_t i915_ipc_status_write(struct file *file, const char __user *ubuf,
 {
 	struct seq_file *m = file->private_data;
 	struct drm_i915_private *dev_priv = m->private;
+	intel_wakeref_t wakeref;
 	int ret;
 	bool enable;
 
@@ -3259,13 +3282,15 @@ static ssize_t i915_ipc_status_write(struct file *file, const char __user *ubuf,
 	if (ret < 0)
 		return ret;
 
-	intel_runtime_pm_get(dev_priv);
+	wakeref = intel_runtime_pm_get(dev_priv);
+
 	if (!dev_priv->ipc_enabled && enable)
 		DRM_INFO("Enabling IPC: WM will be proper only after next commit\n");
 	dev_priv->wm.distrust_bios_wm = true;
 	dev_priv->ipc_enabled = enable;
 	intel_enable_ipc(dev_priv);
-	intel_runtime_pm_put_unchecked(dev_priv);
+
+	intel_runtime_pm_put(dev_priv, wakeref);
 
 	return len;
 }
@@ -4031,11 +4056,12 @@ static int
 i915_drop_caches_set(void *data, u64 val)
 {
 	struct drm_i915_private *i915 = data;
+	intel_wakeref_t wakeref;
 	int ret = 0;
 
 	DRM_DEBUG("Dropping caches: 0x%08llx [0x%08llx]\n",
 		  val, val & DROP_ALL);
-	intel_runtime_pm_get(i915);
+	wakeref = intel_runtime_pm_get(i915);
 
 	if (val & DROP_RESET_ACTIVE && !intel_engines_are_idle(i915))
 		i915_gem_set_wedged(i915);
@@ -4090,7 +4116,7 @@ i915_drop_caches_set(void *data, u64 val)
 		i915_gem_drain_freed_objects(i915);
 
 out:
-	intel_runtime_pm_put_unchecked(i915);
+	intel_runtime_pm_put(i915, wakeref);
 
 	return ret;
 }
@@ -4103,16 +4129,17 @@ static int
 i915_cache_sharing_get(void *data, u64 *val)
 {
 	struct drm_i915_private *dev_priv = data;
+	intel_wakeref_t wakeref;
 	u32 snpcr;
 
 	if (!(IS_GEN_RANGE(dev_priv, 6, 7)))
 		return -ENODEV;
 
-	intel_runtime_pm_get(dev_priv);
+	wakeref = intel_runtime_pm_get(dev_priv);
 
 	snpcr = I915_READ(GEN6_MBCUNIT_SNPCR);
 
-	intel_runtime_pm_put_unchecked(dev_priv);
+	intel_runtime_pm_put(dev_priv, wakeref);
 
 	*val = (snpcr & GEN6_MBC_SNPCR_MASK) >> GEN6_MBC_SNPCR_SHIFT;
 
@@ -4123,6 +4150,7 @@ static int
 i915_cache_sharing_set(void *data, u64 val)
 {
 	struct drm_i915_private *dev_priv = data;
+	intel_wakeref_t wakeref;
 	u32 snpcr;
 
 	if (!(IS_GEN_RANGE(dev_priv, 6, 7)))
@@ -4131,7 +4159,7 @@ i915_cache_sharing_set(void *data, u64 val)
 	if (val > 3)
 		return -EINVAL;
 
-	intel_runtime_pm_get(dev_priv);
+	wakeref = intel_runtime_pm_get(dev_priv);
 	DRM_DEBUG_DRIVER("Manually setting uncore sharing to %llu\n", val);
 
 	/* Update the cache sharing policy here as well */
@@ -4140,7 +4168,7 @@ i915_cache_sharing_set(void *data, u64 val)
 	snpcr |= (val << GEN6_MBC_SNPCR_SHIFT);
 	I915_WRITE(GEN6_MBCUNIT_SNPCR, snpcr);
 
-	intel_runtime_pm_put_unchecked(dev_priv);
+	intel_runtime_pm_put(dev_priv, wakeref);
 	return 0;
 }
 
@@ -4362,6 +4390,7 @@ static int i915_sseu_status(struct seq_file *m, void *unused)
 {
 	struct drm_i915_private *dev_priv = node_to_i915(m->private);
 	struct sseu_dev_info sseu;
+	intel_wakeref_t wakeref;
 
 	if (INTEL_GEN(dev_priv) < 8)
 		return -ENODEV;
@@ -4376,7 +4405,7 @@ static int i915_sseu_status(struct seq_file *m, void *unused)
 	sseu.max_eus_per_subslice =
 		RUNTIME_INFO(dev_priv)->sseu.max_eus_per_subslice;
 
-	intel_runtime_pm_get(dev_priv);
+	wakeref = intel_runtime_pm_get(dev_priv);
 
 	if (IS_CHERRYVIEW(dev_priv)) {
 		cherryview_sseu_device_status(dev_priv, &sseu);
@@ -4388,7 +4417,7 @@ static int i915_sseu_status(struct seq_file *m, void *unused)
 		gen10_sseu_device_status(dev_priv, &sseu);
 	}
 
-	intel_runtime_pm_put_unchecked(dev_priv);
+	intel_runtime_pm_put(dev_priv, wakeref);
 
 	i915_print_sseu_info(m, false, &sseu);
 
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 111+ messages in thread

* [PATCH 09/46] drm/i915/perf: Track the rpm wakeref
  2019-01-07 11:54 [PATCH 01/46] drm/i915: Return immediately if trylock fails for direct-reclaim Chris Wilson
                   ` (6 preceding siblings ...)
  2019-01-07 11:54 ` [PATCH 08/46] drm/i915: Mark up debugfs " Chris Wilson
@ 2019-01-07 11:54 ` Chris Wilson
  2019-01-09 10:30   ` Mika Kuoppala
  2019-01-07 11:54 ` [PATCH 10/46] drm/i915/pmu: Track " Chris Wilson
                   ` (42 subsequent siblings)
  50 siblings, 1 reply; 111+ messages in thread
From: Chris Wilson @ 2019-01-07 11:54 UTC (permalink / raw)
  To: intel-gfx; +Cc: Jani Nikula

Keep track of our wakeref used to keep the device awake so we can catch
any leak.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Jani Nikula <jani.nikula@intel.com>
---
 drivers/gpu/drm/i915/i915_drv.h  |  2 ++
 drivers/gpu/drm/i915/i915_perf.c | 10 +++++-----
 2 files changed, 7 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index a20bd2ec48de..bf25ae92f5de 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1320,6 +1320,8 @@ struct i915_perf_stream {
 	 */
 	struct list_head link;
 
+	intel_wakeref_t wakeref;
+
 	/**
 	 * @sample_flags: Flags representing the `DRM_I915_PERF_PROP_SAMPLE_*`
 	 * properties given when opening a stream, representing the contents
diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
index e4dfd1477c78..b0cbad2e83c5 100644
--- a/drivers/gpu/drm/i915/i915_perf.c
+++ b/drivers/gpu/drm/i915/i915_perf.c
@@ -1364,14 +1364,14 @@ static void i915_oa_stream_destroy(struct i915_perf_stream *stream)
 
 	free_oa_buffer(dev_priv);
 
+	put_oa_config(dev_priv, stream->oa_config);
+
 	intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
-	intel_runtime_pm_put_unchecked(dev_priv);
+	intel_runtime_pm_put(dev_priv, stream->wakeref);
 
 	if (stream->ctx)
 		oa_put_render_ctx_id(stream);
 
-	put_oa_config(dev_priv, stream->oa_config);
-
 	if (dev_priv->perf.oa.spurious_report_rs.missed) {
 		DRM_NOTE("%d spurious OA report notices suppressed due to ratelimiting\n",
 			 dev_priv->perf.oa.spurious_report_rs.missed);
@@ -2087,7 +2087,7 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream,
 	 *   In our case we are expecting that taking pm + FORCEWAKE
 	 *   references will effectively disable RC6.
 	 */
-	intel_runtime_pm_get(dev_priv);
+	stream->wakeref = intel_runtime_pm_get(dev_priv);
 	intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
 
 	ret = alloc_oa_buffer(dev_priv);
@@ -2123,7 +2123,7 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream,
 	put_oa_config(dev_priv, stream->oa_config);
 
 	intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
-	intel_runtime_pm_put_unchecked(dev_priv);
+	intel_runtime_pm_put(dev_priv, stream->wakeref);
 
 err_config:
 	if (stream->ctx)
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 111+ messages in thread

* [PATCH 10/46] drm/i915/pmu: Track rpm wakeref
  2019-01-07 11:54 [PATCH 01/46] drm/i915: Return immediately if trylock fails for direct-reclaim Chris Wilson
                   ` (7 preceding siblings ...)
  2019-01-07 11:54 ` [PATCH 09/46] drm/i915/perf: Track the rpm wakeref Chris Wilson
@ 2019-01-07 11:54 ` Chris Wilson
  2019-01-09 10:37   ` Mika Kuoppala
  2019-01-07 11:54 ` [PATCH 11/46] drm/i915/guc: Track the " Chris Wilson
                   ` (41 subsequent siblings)
  50 siblings, 1 reply; 111+ messages in thread
From: Chris Wilson @ 2019-01-07 11:54 UTC (permalink / raw)
  To: intel-gfx; +Cc: Jani Nikula

Track the wakeref used for temporary access to the device, and discard
it upon release so that leaks can be identified.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Jani Nikula <jani.nikula@intel.com>
---
 drivers/gpu/drm/i915/i915_pmu.c | 26 +++++++++++++++++---------
 1 file changed, 17 insertions(+), 9 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c
index c99fcfce79d5..3d43fc9dd25d 100644
--- a/drivers/gpu/drm/i915/i915_pmu.c
+++ b/drivers/gpu/drm/i915/i915_pmu.c
@@ -167,6 +167,7 @@ engines_sample(struct drm_i915_private *dev_priv, unsigned int period_ns)
 {
 	struct intel_engine_cs *engine;
 	enum intel_engine_id id;
+	intel_wakeref_t wakeref;
 	bool fw = false;
 
 	if ((dev_priv->pmu.enable & ENGINE_SAMPLE_MASK) == 0)
@@ -175,7 +176,8 @@ engines_sample(struct drm_i915_private *dev_priv, unsigned int period_ns)
 	if (!dev_priv->gt.awake)
 		return;
 
-	if (!intel_runtime_pm_get_if_in_use(dev_priv))
+	wakeref = intel_runtime_pm_get_if_in_use(dev_priv);
+	if (!wakeref)
 		return;
 
 	for_each_engine(engine, dev_priv, id) {
@@ -210,7 +212,7 @@ engines_sample(struct drm_i915_private *dev_priv, unsigned int period_ns)
 	if (fw)
 		intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
 
-	intel_runtime_pm_put_unchecked(dev_priv);
+	intel_runtime_pm_put(dev_priv, wakeref);
 }
 
 static void
@@ -227,11 +229,15 @@ frequency_sample(struct drm_i915_private *dev_priv, unsigned int period_ns)
 		u32 val;
 
 		val = dev_priv->gt_pm.rps.cur_freq;
-		if (dev_priv->gt.awake &&
-		    intel_runtime_pm_get_if_in_use(dev_priv)) {
-			val = intel_get_cagf(dev_priv,
-					     I915_READ_NOTRACE(GEN6_RPSTAT1));
-			intel_runtime_pm_put_unchecked(dev_priv);
+		if (dev_priv->gt.awake) {
+			intel_wakeref_t wakeref =
+				intel_runtime_pm_get_if_in_use(dev_priv);
+
+			if (wakeref) {
+				val = intel_get_cagf(dev_priv,
+						     I915_READ_NOTRACE(GEN6_RPSTAT1));
+				intel_runtime_pm_put(dev_priv, wakeref);
+			}
 		}
 
 		add_sample_mult(&dev_priv->pmu.sample[__I915_SAMPLE_FREQ_ACT],
@@ -443,12 +449,14 @@ static u64 __get_rc6(struct drm_i915_private *i915)
 static u64 get_rc6(struct drm_i915_private *i915)
 {
 #if IS_ENABLED(CONFIG_PM)
+	intel_wakeref_t wakeref;
 	unsigned long flags;
 	u64 val;
 
-	if (intel_runtime_pm_get_if_in_use(i915)) {
+	wakeref = intel_runtime_pm_get_if_in_use(i915);
+	if (wakeref) {
 		val = __get_rc6(i915);
-		intel_runtime_pm_put_unchecked(i915);
+		intel_runtime_pm_put(i915, wakeref);
 
 		/*
 		 * If we are coming back from being runtime suspended we must
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 111+ messages in thread

* [PATCH 11/46] drm/i915/guc: Track the rpm wakeref
  2019-01-07 11:54 [PATCH 01/46] drm/i915: Return immediately if trylock fails for direct-reclaim Chris Wilson
                   ` (8 preceding siblings ...)
  2019-01-07 11:54 ` [PATCH 10/46] drm/i915/pmu: Track " Chris Wilson
@ 2019-01-07 11:54 ` Chris Wilson
  2019-01-09 10:53   ` Mika Kuoppala
  2019-01-07 11:54 ` [PATCH 12/46] drm/i915/gem: Track the rpm wakerefs Chris Wilson
                   ` (40 subsequent siblings)
  50 siblings, 1 reply; 111+ messages in thread
From: Chris Wilson @ 2019-01-07 11:54 UTC (permalink / raw)
  To: intel-gfx; +Cc: Jani Nikula

Keep track of our acquired wakeref for interacting with the guc, so that
we can cancel it upon release and so clearly identify leaks.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Jani Nikula <jani.nikula@intel.com>
---
 drivers/gpu/drm/i915/intel_guc_log.c | 15 +++++++++------
 drivers/gpu/drm/i915/intel_huc.c     |  5 +++--
 2 files changed, 12 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_guc_log.c b/drivers/gpu/drm/i915/intel_guc_log.c
index 1b1581a42aa1..20c0b36d748e 100644
--- a/drivers/gpu/drm/i915/intel_guc_log.c
+++ b/drivers/gpu/drm/i915/intel_guc_log.c
@@ -436,6 +436,7 @@ static void guc_log_capture_logs(struct intel_guc_log *log)
 {
 	struct intel_guc *guc = log_to_guc(log);
 	struct drm_i915_private *dev_priv = guc_to_i915(guc);
+	intel_wakeref_t wakeref;
 
 	guc_read_update_log_buffer(log);
 
@@ -443,9 +444,9 @@ static void guc_log_capture_logs(struct intel_guc_log *log)
 	 * Generally device is expected to be active only at this
 	 * time, so get/put should be really quick.
 	 */
-	intel_runtime_pm_get(dev_priv);
+	wakeref = intel_runtime_pm_get(dev_priv);
 	guc_action_flush_log_complete(guc);
-	intel_runtime_pm_put_unchecked(dev_priv);
+	intel_runtime_pm_put(dev_priv, wakeref);
 }
 
 int intel_guc_log_create(struct intel_guc_log *log)
@@ -505,6 +506,7 @@ int intel_guc_log_set_level(struct intel_guc_log *log, u32 level)
 {
 	struct intel_guc *guc = log_to_guc(log);
 	struct drm_i915_private *dev_priv = guc_to_i915(guc);
+	intel_wakeref_t wakeref;
 	int ret;
 
 	BUILD_BUG_ON(GUC_LOG_VERBOSITY_MIN != 0);
@@ -524,11 +526,11 @@ int intel_guc_log_set_level(struct intel_guc_log *log, u32 level)
 		goto out_unlock;
 	}
 
-	intel_runtime_pm_get(dev_priv);
+	wakeref = intel_runtime_pm_get(dev_priv);
 	ret = guc_action_control_log(guc, GUC_LOG_LEVEL_IS_VERBOSE(level),
 				     GUC_LOG_LEVEL_IS_ENABLED(level),
 				     GUC_LOG_LEVEL_TO_VERBOSITY(level));
-	intel_runtime_pm_put_unchecked(dev_priv);
+	intel_runtime_pm_put(dev_priv, wakeref);
 	if (ret) {
 		DRM_DEBUG_DRIVER("guc_log_control action failed %d\n", ret);
 		goto out_unlock;
@@ -601,6 +603,7 @@ void intel_guc_log_relay_flush(struct intel_guc_log *log)
 {
 	struct intel_guc *guc = log_to_guc(log);
 	struct drm_i915_private *i915 = guc_to_i915(guc);
+	intel_wakeref_t wakeref;
 
 	/*
 	 * Before initiating the forceful flush, wait for any pending/ongoing
@@ -608,9 +611,9 @@ void intel_guc_log_relay_flush(struct intel_guc_log *log)
 	 */
 	flush_work(&log->relay.flush_work);
 
-	intel_runtime_pm_get(i915);
+	wakeref = intel_runtime_pm_get(i915);
 	guc_action_flush_log(guc);
-	intel_runtime_pm_put_unchecked(i915);
+	intel_runtime_pm_put(i915, wakeref);
 
 	/* GuC would have updated log buffer by now, so capture it */
 	guc_log_capture_logs(log);
diff --git a/drivers/gpu/drm/i915/intel_huc.c b/drivers/gpu/drm/i915/intel_huc.c
index c2b076e9bada..3e8c18b6a42d 100644
--- a/drivers/gpu/drm/i915/intel_huc.c
+++ b/drivers/gpu/drm/i915/intel_huc.c
@@ -115,14 +115,15 @@ int intel_huc_auth(struct intel_huc *huc)
 int intel_huc_check_status(struct intel_huc *huc)
 {
 	struct drm_i915_private *dev_priv = huc_to_i915(huc);
+	intel_wakeref_t wakeref;
 	bool status;
 
 	if (!HAS_HUC(dev_priv))
 		return -ENODEV;
 
-	intel_runtime_pm_get(dev_priv);
+	wakeref = intel_runtime_pm_get(dev_priv);
 	status = I915_READ(HUC_STATUS2) & HUC_FW_VERIFIED;
-	intel_runtime_pm_put_unchecked(dev_priv);
+	intel_runtime_pm_put(dev_priv, wakeref);
 
 	return status;
 }
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 111+ messages in thread

* [PATCH 12/46] drm/i915/gem: Track the rpm wakerefs
  2019-01-07 11:54 [PATCH 01/46] drm/i915: Return immediately if trylock fails for direct-reclaim Chris Wilson
                   ` (9 preceding siblings ...)
  2019-01-07 11:54 ` [PATCH 11/46] drm/i915/guc: Track the " Chris Wilson
@ 2019-01-07 11:54 ` Chris Wilson
  2019-01-09 11:16   ` Mika Kuoppala
  2019-01-07 11:54 ` [PATCH 13/46] drm/i915/fb: Track " Chris Wilson
                   ` (39 subsequent siblings)
  50 siblings, 1 reply; 111+ messages in thread
From: Chris Wilson @ 2019-01-07 11:54 UTC (permalink / raw)
  To: intel-gfx; +Cc: Jani Nikula

Keep track of the temporary rpm wakerefs used for user access to the
device, so that we can cancel them upon release and clearly identify any
leaks.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Jani Nikula <jani.nikula@intel.com>
---
 drivers/gpu/drm/i915/i915_gem.c            | 47 +++++++++++++---------
 drivers/gpu/drm/i915/i915_gem_execbuffer.c |  5 ++-
 drivers/gpu/drm/i915/i915_gem_fence_reg.c  |  6 ++-
 drivers/gpu/drm/i915/i915_gem_gtt.c        | 22 ++++++----
 drivers/gpu/drm/i915/i915_gem_shrinker.c   | 32 +++++++++------
 drivers/gpu/drm/i915/intel_engine_cs.c     | 12 ++++--
 drivers/gpu/drm/i915/intel_uncore.c        |  5 ++-
 7 files changed, 81 insertions(+), 48 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 27f207cbabd9..e04dadeca879 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -786,6 +786,8 @@ fb_write_origin(struct drm_i915_gem_object *obj, unsigned int domain)
 
 void i915_gem_flush_ggtt_writes(struct drm_i915_private *dev_priv)
 {
+	intel_wakeref_t wakeref;
+
 	/*
 	 * No actual flushing is required for the GTT write domain for reads
 	 * from the GTT domain. Writes to it "immediately" go to main memory
@@ -812,13 +814,13 @@ void i915_gem_flush_ggtt_writes(struct drm_i915_private *dev_priv)
 
 	i915_gem_chipset_flush(dev_priv);
 
-	intel_runtime_pm_get(dev_priv);
+	wakeref = intel_runtime_pm_get(dev_priv);
 	spin_lock_irq(&dev_priv->uncore.lock);
 
 	POSTING_READ_FW(RING_HEAD(RENDER_RING_BASE));
 
 	spin_unlock_irq(&dev_priv->uncore.lock);
-	intel_runtime_pm_put_unchecked(dev_priv);
+	intel_runtime_pm_put(dev_priv, wakeref);
 }
 
 static void
@@ -1070,6 +1072,7 @@ i915_gem_gtt_pread(struct drm_i915_gem_object *obj,
 {
 	struct drm_i915_private *i915 = to_i915(obj->base.dev);
 	struct i915_ggtt *ggtt = &i915->ggtt;
+	intel_wakeref_t wakeref;
 	struct drm_mm_node node;
 	struct i915_vma *vma;
 	void __user *user_data;
@@ -1080,7 +1083,7 @@ i915_gem_gtt_pread(struct drm_i915_gem_object *obj,
 	if (ret)
 		return ret;
 
-	intel_runtime_pm_get(i915);
+	wakeref = intel_runtime_pm_get(i915);
 	vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0,
 				       PIN_MAPPABLE |
 				       PIN_NONFAULT |
@@ -1153,7 +1156,7 @@ i915_gem_gtt_pread(struct drm_i915_gem_object *obj,
 		i915_vma_unpin(vma);
 	}
 out_unlock:
-	intel_runtime_pm_put_unchecked(i915);
+	intel_runtime_pm_put(i915, wakeref);
 	mutex_unlock(&i915->drm.struct_mutex);
 
 	return ret;
@@ -1254,6 +1257,7 @@ i915_gem_gtt_pwrite_fast(struct drm_i915_gem_object *obj,
 {
 	struct drm_i915_private *i915 = to_i915(obj->base.dev);
 	struct i915_ggtt *ggtt = &i915->ggtt;
+	intel_wakeref_t wakeref;
 	struct drm_mm_node node;
 	struct i915_vma *vma;
 	u64 remain, offset;
@@ -1272,13 +1276,14 @@ i915_gem_gtt_pwrite_fast(struct drm_i915_gem_object *obj,
 		 * This easily dwarfs any performance advantage from
 		 * using the cache bypass of indirect GGTT access.
 		 */
-		if (!intel_runtime_pm_get_if_in_use(i915)) {
+		wakeref = intel_runtime_pm_get_if_in_use(i915);
+		if (!wakeref) {
 			ret = -EFAULT;
 			goto out_unlock;
 		}
 	} else {
 		/* No backing pages, no fallback, we must force GGTT access */
-		intel_runtime_pm_get(i915);
+		wakeref = intel_runtime_pm_get(i915);
 	}
 
 	vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0,
@@ -1360,7 +1365,7 @@ i915_gem_gtt_pwrite_fast(struct drm_i915_gem_object *obj,
 		i915_vma_unpin(vma);
 	}
 out_rpm:
-	intel_runtime_pm_put_unchecked(i915);
+	intel_runtime_pm_put(i915, wakeref);
 out_unlock:
 	mutex_unlock(&i915->drm.struct_mutex);
 	return ret;
@@ -1865,6 +1870,7 @@ vm_fault_t i915_gem_fault(struct vm_fault *vmf)
 	struct drm_i915_private *dev_priv = to_i915(dev);
 	struct i915_ggtt *ggtt = &dev_priv->ggtt;
 	bool write = area->vm_flags & VM_WRITE;
+	intel_wakeref_t wakeref;
 	struct i915_vma *vma;
 	pgoff_t page_offset;
 	int ret;
@@ -1894,7 +1900,7 @@ vm_fault_t i915_gem_fault(struct vm_fault *vmf)
 	if (ret)
 		goto err;
 
-	intel_runtime_pm_get(dev_priv);
+	wakeref = intel_runtime_pm_get(dev_priv);
 
 	ret = i915_mutex_lock_interruptible(dev);
 	if (ret)
@@ -1972,7 +1978,7 @@ vm_fault_t i915_gem_fault(struct vm_fault *vmf)
 err_unlock:
 	mutex_unlock(&dev->struct_mutex);
 err_rpm:
-	intel_runtime_pm_put_unchecked(dev_priv);
+	intel_runtime_pm_put(dev_priv, wakeref);
 	i915_gem_object_unpin_pages(obj);
 err:
 	switch (ret) {
@@ -2045,6 +2051,7 @@ void
 i915_gem_release_mmap(struct drm_i915_gem_object *obj)
 {
 	struct drm_i915_private *i915 = to_i915(obj->base.dev);
+	intel_wakeref_t wakeref;
 
 	/* Serialisation between user GTT access and our code depends upon
 	 * revoking the CPU's PTE whilst the mutex is held. The next user
@@ -2055,7 +2062,7 @@ i915_gem_release_mmap(struct drm_i915_gem_object *obj)
 	 * wakeref.
 	 */
 	lockdep_assert_held(&i915->drm.struct_mutex);
-	intel_runtime_pm_get(i915);
+	wakeref = intel_runtime_pm_get(i915);
 
 	if (!obj->userfault_count)
 		goto out;
@@ -2072,7 +2079,7 @@ i915_gem_release_mmap(struct drm_i915_gem_object *obj)
 	wmb();
 
 out:
-	intel_runtime_pm_put_unchecked(i915);
+	intel_runtime_pm_put(i915, wakeref);
 }
 
 void i915_gem_runtime_suspend(struct drm_i915_private *dev_priv)
@@ -4707,8 +4714,9 @@ static void __i915_gem_free_objects(struct drm_i915_private *i915,
 				    struct llist_node *freed)
 {
 	struct drm_i915_gem_object *obj, *on;
+	intel_wakeref_t wakeref;
 
-	intel_runtime_pm_get(i915);
+	wakeref = intel_runtime_pm_get(i915);
 	llist_for_each_entry_safe(obj, on, freed, freed) {
 		struct i915_vma *vma, *vn;
 
@@ -4769,7 +4777,7 @@ static void __i915_gem_free_objects(struct drm_i915_private *i915,
 		if (on)
 			cond_resched();
 	}
-	intel_runtime_pm_put_unchecked(i915);
+	intel_runtime_pm_put(i915, wakeref);
 }
 
 static void i915_gem_flush_free_objects(struct drm_i915_private *i915)
@@ -4878,11 +4886,13 @@ void __i915_gem_object_release_unless_active(struct drm_i915_gem_object *obj)
 
 void i915_gem_sanitize(struct drm_i915_private *i915)
 {
+	intel_wakeref_t wakeref;
+
 	GEM_TRACE("\n");
 
 	mutex_lock(&i915->drm.struct_mutex);
 
-	intel_runtime_pm_get(i915);
+	wakeref = intel_runtime_pm_get(i915);
 	intel_uncore_forcewake_get(i915, FORCEWAKE_ALL);
 
 	/*
@@ -4905,7 +4915,7 @@ void i915_gem_sanitize(struct drm_i915_private *i915)
 	intel_engines_sanitize(i915, false);
 
 	intel_uncore_forcewake_put(i915, FORCEWAKE_ALL);
-	intel_runtime_pm_put_unchecked(i915);
+	intel_runtime_pm_put(i915, wakeref);
 
 	i915_gem_contexts_lost(i915);
 	mutex_unlock(&i915->drm.struct_mutex);
@@ -4913,11 +4923,12 @@ void i915_gem_sanitize(struct drm_i915_private *i915)
 
 int i915_gem_suspend(struct drm_i915_private *i915)
 {
+	intel_wakeref_t wakeref;
 	int ret;
 
 	GEM_TRACE("\n");
 
-	intel_runtime_pm_get(i915);
+	wakeref = intel_runtime_pm_get(i915);
 	intel_suspend_gt_powersave(i915);
 
 	mutex_lock(&i915->drm.struct_mutex);
@@ -4969,12 +4980,12 @@ int i915_gem_suspend(struct drm_i915_private *i915)
 	if (WARN_ON(!intel_engines_are_idle(i915)))
 		i915_gem_set_wedged(i915); /* no hope, discard everything */
 
-	intel_runtime_pm_put_unchecked(i915);
+	intel_runtime_pm_put(i915, wakeref);
 	return 0;
 
 err_unlock:
 	mutex_unlock(&i915->drm.struct_mutex);
-	intel_runtime_pm_put_unchecked(i915);
+	intel_runtime_pm_put(i915, wakeref);
 	return ret;
 }
 
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index a52fa42ed8b1..76bb1a89e530 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -2203,6 +2203,7 @@ i915_gem_do_execbuffer(struct drm_device *dev,
 	struct i915_execbuffer eb;
 	struct dma_fence *in_fence = NULL;
 	struct sync_file *out_fence = NULL;
+	intel_wakeref_t wakeref;
 	int out_fence_fd = -1;
 	int err;
 
@@ -2273,7 +2274,7 @@ i915_gem_do_execbuffer(struct drm_device *dev,
 	 * wakeref that we hold until the GPU has been idle for at least
 	 * 100ms.
 	 */
-	intel_runtime_pm_get(eb.i915);
+	wakeref = intel_runtime_pm_get(eb.i915);
 
 	err = i915_mutex_lock_interruptible(dev);
 	if (err)
@@ -2425,7 +2426,7 @@ i915_gem_do_execbuffer(struct drm_device *dev,
 		eb_release_vmas(&eb);
 	mutex_unlock(&dev->struct_mutex);
 err_rpm:
-	intel_runtime_pm_put_unchecked(eb.i915);
+	intel_runtime_pm_put(eb.i915, wakeref);
 	i915_gem_context_put(eb.ctx);
 err_destroy:
 	eb_destroy(&eb);
diff --git a/drivers/gpu/drm/i915/i915_gem_fence_reg.c b/drivers/gpu/drm/i915/i915_gem_fence_reg.c
index 1f72f5047945..e6edcd83450c 100644
--- a/drivers/gpu/drm/i915/i915_gem_fence_reg.c
+++ b/drivers/gpu/drm/i915/i915_gem_fence_reg.c
@@ -210,6 +210,7 @@ static void fence_write(struct drm_i915_fence_reg *fence,
 static int fence_update(struct drm_i915_fence_reg *fence,
 			struct i915_vma *vma)
 {
+	intel_wakeref_t wakeref;
 	int ret;
 
 	if (vma) {
@@ -257,9 +258,10 @@ static int fence_update(struct drm_i915_fence_reg *fence,
 	 * If the device is currently powered down, we will defer the write
 	 * to the runtime resume, see i915_gem_restore_fences().
 	 */
-	if (intel_runtime_pm_get_if_in_use(fence->i915)) {
+	wakeref = intel_runtime_pm_get_if_in_use(fence->i915);
+	if (wakeref) {
 		fence_write(fence, vma);
-		intel_runtime_pm_put_unchecked(fence->i915);
+		intel_runtime_pm_put(fence->i915, wakeref);
 	}
 
 	if (vma) {
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 6dac9614f7ba..4bec10286487 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -2528,6 +2528,7 @@ static int ggtt_bind_vma(struct i915_vma *vma,
 {
 	struct drm_i915_private *i915 = vma->vm->i915;
 	struct drm_i915_gem_object *obj = vma->obj;
+	intel_wakeref_t wakeref;
 	u32 pte_flags;
 
 	/* Applicable to VLV (gen8+ do not support RO in the GGTT) */
@@ -2535,9 +2536,9 @@ static int ggtt_bind_vma(struct i915_vma *vma,
 	if (i915_gem_object_is_readonly(obj))
 		pte_flags |= PTE_READ_ONLY;
 
-	intel_runtime_pm_get(i915);
+	wakeref = intel_runtime_pm_get(i915);
 	vma->vm->insert_entries(vma->vm, vma, cache_level, pte_flags);
-	intel_runtime_pm_put_unchecked(i915);
+	intel_runtime_pm_put(i915, wakeref);
 
 	vma->page_sizes.gtt = I915_GTT_PAGE_SIZE;
 
@@ -2554,10 +2555,11 @@ static int ggtt_bind_vma(struct i915_vma *vma,
 static void ggtt_unbind_vma(struct i915_vma *vma)
 {
 	struct drm_i915_private *i915 = vma->vm->i915;
+	intel_wakeref_t wakeref;
 
-	intel_runtime_pm_get(i915);
+	wakeref = intel_runtime_pm_get(i915);
 	vma->vm->clear_range(vma->vm, vma->node.start, vma->size);
-	intel_runtime_pm_put_unchecked(i915);
+	intel_runtime_pm_put(i915, wakeref);
 }
 
 static int aliasing_gtt_bind_vma(struct i915_vma *vma,
@@ -2589,9 +2591,11 @@ static int aliasing_gtt_bind_vma(struct i915_vma *vma,
 	}
 
 	if (flags & I915_VMA_GLOBAL_BIND) {
-		intel_runtime_pm_get(i915);
+		intel_wakeref_t wakeref;
+
+		wakeref = intel_runtime_pm_get(i915);
 		vma->vm->insert_entries(vma->vm, vma, cache_level, pte_flags);
-		intel_runtime_pm_put_unchecked(i915);
+		intel_runtime_pm_put(i915, wakeref);
 	}
 
 	return 0;
@@ -2602,9 +2606,11 @@ static void aliasing_gtt_unbind_vma(struct i915_vma *vma)
 	struct drm_i915_private *i915 = vma->vm->i915;
 
 	if (vma->flags & I915_VMA_GLOBAL_BIND) {
-		intel_runtime_pm_get(i915);
+		intel_wakeref_t wakeref;
+
+		wakeref = intel_runtime_pm_get(i915);
 		vma->vm->clear_range(vma->vm, vma->node.start, vma->size);
-		intel_runtime_pm_put_unchecked(i915);
+		intel_runtime_pm_put(i915, wakeref);
 	}
 
 	if (vma->flags & I915_VMA_LOCAL_BIND) {
diff --git a/drivers/gpu/drm/i915/i915_gem_shrinker.c b/drivers/gpu/drm/i915/i915_gem_shrinker.c
index 16693dd4d019..bc230e43b98f 100644
--- a/drivers/gpu/drm/i915/i915_gem_shrinker.c
+++ b/drivers/gpu/drm/i915/i915_gem_shrinker.c
@@ -154,6 +154,7 @@ i915_gem_shrink(struct drm_i915_private *i915,
 		{ &i915->mm.bound_list, I915_SHRINK_BOUND },
 		{ NULL, 0 },
 	}, *phase;
+	intel_wakeref_t wakeref = 0;
 	unsigned long count = 0;
 	unsigned long scanned = 0;
 	bool unlock;
@@ -183,9 +184,11 @@ i915_gem_shrink(struct drm_i915_private *i915,
 	 * device just to recover a little memory. If absolutely necessary,
 	 * we will force the wake during oom-notifier.
 	 */
-	if ((flags & I915_SHRINK_BOUND) &&
-	    !intel_runtime_pm_get_if_in_use(i915))
-		flags &= ~I915_SHRINK_BOUND;
+	if (flags & I915_SHRINK_BOUND) {
+		wakeref = intel_runtime_pm_get_if_in_use(i915);
+		if (!wakeref)
+			flags &= ~I915_SHRINK_BOUND;
+	}
 
 	/*
 	 * As we may completely rewrite the (un)bound list whilst unbinding
@@ -266,7 +269,7 @@ i915_gem_shrink(struct drm_i915_private *i915,
 	}
 
 	if (flags & I915_SHRINK_BOUND)
-		intel_runtime_pm_put_unchecked(i915);
+		intel_runtime_pm_put(i915, wakeref);
 
 	i915_retire_requests(i915);
 
@@ -293,14 +296,15 @@ i915_gem_shrink(struct drm_i915_private *i915,
  */
 unsigned long i915_gem_shrink_all(struct drm_i915_private *i915)
 {
+	intel_wakeref_t wakeref;
 	unsigned long freed;
 
-	intel_runtime_pm_get(i915);
+	wakeref = intel_runtime_pm_get(i915);
 	freed = i915_gem_shrink(i915, -1UL, NULL,
 				I915_SHRINK_BOUND |
 				I915_SHRINK_UNBOUND |
 				I915_SHRINK_ACTIVE);
-	intel_runtime_pm_put_unchecked(i915);
+	intel_runtime_pm_put(i915, wakeref);
 
 	return freed;
 }
@@ -371,14 +375,16 @@ i915_gem_shrinker_scan(struct shrinker *shrinker, struct shrink_control *sc)
 					 I915_SHRINK_BOUND |
 					 I915_SHRINK_UNBOUND);
 	if (sc->nr_scanned < sc->nr_to_scan && current_is_kswapd()) {
-		intel_runtime_pm_get(i915);
+		intel_wakeref_t wakeref;
+
+		wakeref = intel_runtime_pm_get(i915);
 		freed += i915_gem_shrink(i915,
 					 sc->nr_to_scan - sc->nr_scanned,
 					 &sc->nr_scanned,
 					 I915_SHRINK_ACTIVE |
 					 I915_SHRINK_BOUND |
 					 I915_SHRINK_UNBOUND);
-		intel_runtime_pm_put_unchecked(i915);
+		intel_runtime_pm_put(i915, wakeref);
 	}
 
 	shrinker_unlock(i915, unlock);
@@ -418,12 +424,13 @@ i915_gem_shrinker_oom(struct notifier_block *nb, unsigned long event, void *ptr)
 		container_of(nb, struct drm_i915_private, mm.oom_notifier);
 	struct drm_i915_gem_object *obj;
 	unsigned long unevictable, bound, unbound, freed_pages;
+	intel_wakeref_t wakeref;
 
-	intel_runtime_pm_get(i915);
+	wakeref = intel_runtime_pm_get(i915);
 	freed_pages = i915_gem_shrink(i915, -1UL, NULL,
 				      I915_SHRINK_BOUND |
 				      I915_SHRINK_UNBOUND);
-	intel_runtime_pm_put_unchecked(i915);
+	intel_runtime_pm_put(i915, wakeref);
 
 	/* Because we may be allocating inside our own driver, we cannot
 	 * assert that there are no objects with pinned pages that are not
@@ -461,6 +468,7 @@ i915_gem_shrinker_vmap(struct notifier_block *nb, unsigned long event, void *ptr
 		container_of(nb, struct drm_i915_private, mm.vmap_notifier);
 	struct i915_vma *vma, *next;
 	unsigned long freed_pages = 0;
+	intel_wakeref_t wakeref;
 	bool unlock;
 	int ret;
 
@@ -474,12 +482,12 @@ i915_gem_shrinker_vmap(struct notifier_block *nb, unsigned long event, void *ptr
 	if (ret)
 		goto out;
 
-	intel_runtime_pm_get(i915);
+	wakeref = intel_runtime_pm_get(i915);
 	freed_pages += i915_gem_shrink(i915, -1UL, NULL,
 				       I915_SHRINK_BOUND |
 				       I915_SHRINK_UNBOUND |
 				       I915_SHRINK_VMAPS);
-	intel_runtime_pm_put_unchecked(i915);
+	intel_runtime_pm_put(i915, wakeref);
 
 	/* We also want to clear any cached iomaps as they wrap vmap */
 	list_for_each_entry_safe(vma, next,
diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c
index 85131166589c..bf4dae2649ab 100644
--- a/drivers/gpu/drm/i915/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/intel_engine_cs.c
@@ -913,10 +913,12 @@ void intel_engine_get_instdone(struct intel_engine_cs *engine,
 static bool ring_is_idle(struct intel_engine_cs *engine)
 {
 	struct drm_i915_private *dev_priv = engine->i915;
+	intel_wakeref_t wakeref;
 	bool idle = true;
 
 	/* If the whole device is asleep, the engine must be idle */
-	if (!intel_runtime_pm_get_if_in_use(dev_priv))
+	wakeref = intel_runtime_pm_get_if_in_use(dev_priv);
+	if (!wakeref)
 		return true;
 
 	/* First check that no commands are left in the ring */
@@ -928,7 +930,7 @@ static bool ring_is_idle(struct intel_engine_cs *engine)
 	if (INTEL_GEN(dev_priv) > 2 && !(I915_READ_MODE(engine) & MODE_IDLE))
 		idle = false;
 
-	intel_runtime_pm_put_unchecked(dev_priv);
+	intel_runtime_pm_put(dev_priv, wakeref);
 
 	return idle;
 }
@@ -1425,6 +1427,7 @@ void intel_engine_dump(struct intel_engine_cs *engine,
 	const struct intel_engine_execlists * const execlists = &engine->execlists;
 	struct i915_gpu_error * const error = &engine->i915->gpu_error;
 	struct i915_request *rq, *last;
+	intel_wakeref_t wakeref;
 	unsigned long flags;
 	struct rb_node *rb;
 	int count;
@@ -1483,9 +1486,10 @@ void intel_engine_dump(struct intel_engine_cs *engine,
 
 	rcu_read_unlock();
 
-	if (intel_runtime_pm_get_if_in_use(engine->i915)) {
+	wakeref = intel_runtime_pm_get_if_in_use(engine->i915);
+	if (wakeref) {
 		intel_engine_print_registers(engine, m);
-		intel_runtime_pm_put_unchecked(engine->i915);
+		intel_runtime_pm_put(engine->i915, wakeref);
 	} else {
 		drm_printf(m, "\tDevice is asleep; skipping register dump\n");
 	}
diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c
index 8d4c76ac0e7d..d494d92da02c 100644
--- a/drivers/gpu/drm/i915/intel_uncore.c
+++ b/drivers/gpu/drm/i915/intel_uncore.c
@@ -1670,6 +1670,7 @@ int i915_reg_read_ioctl(struct drm_device *dev,
 	struct drm_i915_private *dev_priv = to_i915(dev);
 	struct drm_i915_reg_read *reg = data;
 	struct reg_whitelist const *entry;
+	intel_wakeref_t wakeref;
 	unsigned int flags;
 	int remain;
 	int ret = 0;
@@ -1695,7 +1696,7 @@ int i915_reg_read_ioctl(struct drm_device *dev,
 
 	flags = reg->offset & (entry->size - 1);
 
-	intel_runtime_pm_get(dev_priv);
+	wakeref = intel_runtime_pm_get(dev_priv);
 	if (entry->size == 8 && flags == I915_REG_READ_8B_WA)
 		reg->val = I915_READ64_2x32(entry->offset_ldw,
 					    entry->offset_udw);
@@ -1709,7 +1710,7 @@ int i915_reg_read_ioctl(struct drm_device *dev,
 		reg->val = I915_READ8(entry->offset_ldw);
 	else
 		ret = -EINVAL;
-	intel_runtime_pm_put_unchecked(dev_priv);
+	intel_runtime_pm_put(dev_priv, wakeref);
 
 	return ret;
 }
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 111+ messages in thread

* [PATCH 13/46] drm/i915/fb: Track rpm wakerefs
  2019-01-07 11:54 [PATCH 01/46] drm/i915: Return immediately if trylock fails for direct-reclaim Chris Wilson
                   ` (10 preceding siblings ...)
  2019-01-07 11:54 ` [PATCH 12/46] drm/i915/gem: Track the rpm wakerefs Chris Wilson
@ 2019-01-07 11:54 ` Chris Wilson
  2019-01-09 11:39   ` Mika Kuoppala
  2019-01-07 11:54 ` [PATCH 14/46] drm/i915/hotplug: Track temporary rpm wakeref Chris Wilson
                   ` (38 subsequent siblings)
  50 siblings, 1 reply; 111+ messages in thread
From: Chris Wilson @ 2019-01-07 11:54 UTC (permalink / raw)
  To: intel-gfx; +Cc: Jani Nikula

Keep track of the rpm wakeref used for framebuffer access so that we can
cancel upon release and so more clearly identify leaks.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Jani Nikula <jani.nikula@intel.com>
---
 drivers/gpu/drm/i915/intel_display.c | 5 +++--
 drivers/gpu/drm/i915/intel_fbdev.c   | 9 +++++----
 2 files changed, 8 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index c6000aa47a8d..ea70cb8cf50a 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -2024,6 +2024,7 @@ intel_pin_and_fence_fb_obj(struct drm_framebuffer *fb,
 	struct drm_device *dev = fb->dev;
 	struct drm_i915_private *dev_priv = to_i915(dev);
 	struct drm_i915_gem_object *obj = intel_fb_obj(fb);
+	intel_wakeref_t wakeref;
 	struct i915_vma *vma;
 	unsigned int pinctl;
 	u32 alignment;
@@ -2047,7 +2048,7 @@ intel_pin_and_fence_fb_obj(struct drm_framebuffer *fb,
 	 * intel_runtime_pm_put(), so it is correct to wrap only the
 	 * pin/unpin/fence and not more.
 	 */
-	intel_runtime_pm_get(dev_priv);
+	wakeref = intel_runtime_pm_get(dev_priv);
 
 	atomic_inc(&dev_priv->gpu_error.pending_fb_pin);
 
@@ -2102,7 +2103,7 @@ intel_pin_and_fence_fb_obj(struct drm_framebuffer *fb,
 err:
 	atomic_dec(&dev_priv->gpu_error.pending_fb_pin);
 
-	intel_runtime_pm_put_unchecked(dev_priv);
+	intel_runtime_pm_put(dev_priv, wakeref);
 	return vma;
 }
 
diff --git a/drivers/gpu/drm/i915/intel_fbdev.c b/drivers/gpu/drm/i915/intel_fbdev.c
index 11d877b908e2..de14cd78aa0f 100644
--- a/drivers/gpu/drm/i915/intel_fbdev.c
+++ b/drivers/gpu/drm/i915/intel_fbdev.c
@@ -178,8 +178,9 @@ static int intelfb_create(struct drm_fb_helper *helper,
 	const struct i915_ggtt_view view = {
 		.type = I915_GGTT_VIEW_NORMAL,
 	};
-	struct fb_info *info;
 	struct drm_framebuffer *fb;
+	intel_wakeref_t wakeref;
+	struct fb_info *info;
 	struct i915_vma *vma;
 	unsigned long flags = 0;
 	bool prealloc = false;
@@ -210,7 +211,7 @@ static int intelfb_create(struct drm_fb_helper *helper,
 	}
 
 	mutex_lock(&dev->struct_mutex);
-	intel_runtime_pm_get(dev_priv);
+	wakeref = intel_runtime_pm_get(dev_priv);
 
 	/* Pin the GGTT vma for our access via info->screen_base.
 	 * This also validates that any existing fb inherited from the
@@ -277,7 +278,7 @@ static int intelfb_create(struct drm_fb_helper *helper,
 	ifbdev->vma = vma;
 	ifbdev->vma_flags = flags;
 
-	intel_runtime_pm_put_unchecked(dev_priv);
+	intel_runtime_pm_put(dev_priv, wakeref);
 	mutex_unlock(&dev->struct_mutex);
 	vga_switcheroo_client_fb_set(pdev, info);
 	return 0;
@@ -285,7 +286,7 @@ static int intelfb_create(struct drm_fb_helper *helper,
 out_unpin:
 	intel_unpin_fb_vma(vma, flags);
 out_unlock:
-	intel_runtime_pm_put_unchecked(dev_priv);
+	intel_runtime_pm_put(dev_priv, wakeref);
 	mutex_unlock(&dev->struct_mutex);
 	return ret;
 }
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 111+ messages in thread

* [PATCH 14/46] drm/i915/hotplug: Track temporary rpm wakeref
  2019-01-07 11:54 [PATCH 01/46] drm/i915: Return immediately if trylock fails for direct-reclaim Chris Wilson
                   ` (11 preceding siblings ...)
  2019-01-07 11:54 ` [PATCH 13/46] drm/i915/fb: Track " Chris Wilson
@ 2019-01-07 11:54 ` Chris Wilson
  2019-01-09 11:40   ` Mika Kuoppala
  2019-01-07 11:54 ` [PATCH 15/46] drm/i915/panel: " Chris Wilson
                   ` (37 subsequent siblings)
  50 siblings, 1 reply; 111+ messages in thread
From: Chris Wilson @ 2019-01-07 11:54 UTC (permalink / raw)
  To: intel-gfx; +Cc: Jani Nikula

Keep track of the temporary rpm wakeref inside hotplug detection, so
that we can cancel it immediately upon release and so clearly identify
leaks.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Jani Nikula <jani.nikula@intel.com>
---
 drivers/gpu/drm/i915/intel_hotplug.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_hotplug.c b/drivers/gpu/drm/i915/intel_hotplug.c
index 067277ca7cff..6df8820b8b80 100644
--- a/drivers/gpu/drm/i915/intel_hotplug.c
+++ b/drivers/gpu/drm/i915/intel_hotplug.c
@@ -227,9 +227,10 @@ static void intel_hpd_irq_storm_reenable_work(struct work_struct *work)
 		container_of(work, typeof(*dev_priv),
 			     hotplug.reenable_work.work);
 	struct drm_device *dev = &dev_priv->drm;
+	intel_wakeref_t wakeref;
 	enum hpd_pin pin;
 
-	intel_runtime_pm_get(dev_priv);
+	wakeref = intel_runtime_pm_get(dev_priv);
 
 	spin_lock_irq(&dev_priv->irq_lock);
 	for_each_hpd_pin(pin) {
@@ -262,7 +263,7 @@ static void intel_hpd_irq_storm_reenable_work(struct work_struct *work)
 		dev_priv->display.hpd_irq_setup(dev_priv);
 	spin_unlock_irq(&dev_priv->irq_lock);
 
-	intel_runtime_pm_put_unchecked(dev_priv);
+	intel_runtime_pm_put(dev_priv, wakeref);
 }
 
 bool intel_encoder_hotplug(struct intel_encoder *encoder,
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 111+ messages in thread

* [PATCH 15/46] drm/i915/panel: Track temporary rpm wakeref
  2019-01-07 11:54 [PATCH 01/46] drm/i915: Return immediately if trylock fails for direct-reclaim Chris Wilson
                   ` (12 preceding siblings ...)
  2019-01-07 11:54 ` [PATCH 14/46] drm/i915/hotplug: Track temporary rpm wakeref Chris Wilson
@ 2019-01-07 11:54 ` Chris Wilson
  2019-01-09 11:41   ` Mika Kuoppala
  2019-01-07 11:54 ` [PATCH 16/46] drm/i915/selftests: Mark up rpm wakerefs Chris Wilson
                   ` (36 subsequent siblings)
  50 siblings, 1 reply; 111+ messages in thread
From: Chris Wilson @ 2019-01-07 11:54 UTC (permalink / raw)
  To: intel-gfx; +Cc: Jani Nikula

Keep track of the temporary rpm wakeref used for panel backlight access,
so that we can cancel it immediately upon release and so more clearly
identify leaks.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Jani Nikula <jani.nikula@intel.com>
---
 drivers/gpu/drm/i915/intel_panel.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_panel.c b/drivers/gpu/drm/i915/intel_panel.c
index c2b7455a023e..93a2e4b5c54c 100644
--- a/drivers/gpu/drm/i915/intel_panel.c
+++ b/drivers/gpu/drm/i915/intel_panel.c
@@ -1203,17 +1203,18 @@ static int intel_backlight_device_get_brightness(struct backlight_device *bd)
 	struct intel_connector *connector = bl_get_data(bd);
 	struct drm_device *dev = connector->base.dev;
 	struct drm_i915_private *dev_priv = to_i915(dev);
+	intel_wakeref_t wakeref;
 	u32 hw_level;
 	int ret;
 
-	intel_runtime_pm_get(dev_priv);
+	wakeref = intel_runtime_pm_get(dev_priv);
 	drm_modeset_lock(&dev->mode_config.connection_mutex, NULL);
 
 	hw_level = intel_panel_get_backlight(connector);
 	ret = scale_hw_to_user(connector, hw_level, bd->props.max_brightness);
 
 	drm_modeset_unlock(&dev->mode_config.connection_mutex);
-	intel_runtime_pm_put_unchecked(dev_priv);
+	intel_runtime_pm_put(dev_priv, wakeref);
 
 	return ret;
 }
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 111+ messages in thread

* [PATCH 16/46] drm/i915/selftests: Mark up rpm wakerefs
  2019-01-07 11:54 [PATCH 01/46] drm/i915: Return immediately if trylock fails for direct-reclaim Chris Wilson
                   ` (13 preceding siblings ...)
  2019-01-07 11:54 ` [PATCH 15/46] drm/i915/panel: " Chris Wilson
@ 2019-01-07 11:54 ` Chris Wilson
  2019-01-09 12:54   ` Mika Kuoppala
  2019-01-07 11:54 ` [PATCH 17/46] drm/i915: Syntatic sugar for using intel_runtime_pm Chris Wilson
                   ` (35 subsequent siblings)
  50 siblings, 1 reply; 111+ messages in thread
From: Chris Wilson @ 2019-01-07 11:54 UTC (permalink / raw)
  To: intel-gfx; +Cc: Jani Nikula

Track the temporary wakerefs used within the selftests so that leaks are
clear.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Jani Nikula <jani.nikula@intel.com>
---
 drivers/gpu/drm/i915/selftests/huge_pages.c   |  5 ++--
 drivers/gpu/drm/i915/selftests/i915_gem.c     | 29 ++++++++++++-------
 .../drm/i915/selftests/i915_gem_coherency.c   |  5 ++--
 .../gpu/drm/i915/selftests/i915_gem_context.c | 27 ++++++++++-------
 .../gpu/drm/i915/selftests/i915_gem_evict.c   | 11 ++++---
 drivers/gpu/drm/i915/selftests/i915_gem_gtt.c | 10 ++++---
 .../gpu/drm/i915/selftests/i915_gem_object.c  | 18 ++++++++----
 drivers/gpu/drm/i915/selftests/i915_request.c | 22 ++++++++------
 drivers/gpu/drm/i915/selftests/intel_guc.c    | 10 ++++---
 .../gpu/drm/i915/selftests/intel_hangcheck.c  | 15 ++++++----
 drivers/gpu/drm/i915/selftests/intel_lrc.c    | 25 +++++++++-------
 .../drm/i915/selftests/intel_workarounds.c    | 27 ++++++++++-------
 12 files changed, 126 insertions(+), 78 deletions(-)

diff --git a/drivers/gpu/drm/i915/selftests/huge_pages.c b/drivers/gpu/drm/i915/selftests/huge_pages.c
index 731dfd3d3fc8..c7a4599173bb 100644
--- a/drivers/gpu/drm/i915/selftests/huge_pages.c
+++ b/drivers/gpu/drm/i915/selftests/huge_pages.c
@@ -1760,6 +1760,7 @@ int i915_gem_huge_page_live_selftests(struct drm_i915_private *dev_priv)
 	};
 	struct drm_file *file;
 	struct i915_gem_context *ctx;
+	intel_wakeref_t wakeref;
 	int err;
 
 	if (!HAS_PPGTT(dev_priv)) {
@@ -1775,7 +1776,7 @@ int i915_gem_huge_page_live_selftests(struct drm_i915_private *dev_priv)
 		return PTR_ERR(file);
 
 	mutex_lock(&dev_priv->drm.struct_mutex);
-	intel_runtime_pm_get(dev_priv);
+	wakeref = intel_runtime_pm_get(dev_priv);
 
 	ctx = live_context(dev_priv, file);
 	if (IS_ERR(ctx)) {
@@ -1789,7 +1790,7 @@ int i915_gem_huge_page_live_selftests(struct drm_i915_private *dev_priv)
 	err = i915_subtests(tests, ctx);
 
 out_unlock:
-	intel_runtime_pm_put_unchecked(dev_priv);
+	intel_runtime_pm_put(dev_priv, wakeref);
 	mutex_unlock(&dev_priv->drm.struct_mutex);
 
 	mock_file_free(dev_priv, file);
diff --git a/drivers/gpu/drm/i915/selftests/i915_gem.c b/drivers/gpu/drm/i915/selftests/i915_gem.c
index 762e1a7125f5..01a46c46fe25 100644
--- a/drivers/gpu/drm/i915/selftests/i915_gem.c
+++ b/drivers/gpu/drm/i915/selftests/i915_gem.c
@@ -16,9 +16,10 @@ static int switch_to_context(struct drm_i915_private *i915,
 {
 	struct intel_engine_cs *engine;
 	enum intel_engine_id id;
+	intel_wakeref_t wakeref;
 	int err = 0;
 
-	intel_runtime_pm_get(i915);
+	wakeref = intel_runtime_pm_get(i915);
 
 	for_each_engine(engine, i915, id) {
 		struct i915_request *rq;
@@ -32,7 +33,7 @@ static int switch_to_context(struct drm_i915_private *i915,
 		i915_request_add(rq);
 	}
 
-	intel_runtime_pm_put_unchecked(i915);
+	intel_runtime_pm_put(i915, wakeref);
 
 	return err;
 }
@@ -65,7 +66,9 @@ static void trash_stolen(struct drm_i915_private *i915)
 
 static void simulate_hibernate(struct drm_i915_private *i915)
 {
-	intel_runtime_pm_get(i915);
+	intel_wakeref_t wakeref;
+
+	wakeref = intel_runtime_pm_get(i915);
 
 	/*
 	 * As a final sting in the tail, invalidate stolen. Under a real S4,
@@ -76,7 +79,7 @@ static void simulate_hibernate(struct drm_i915_private *i915)
 	 */
 	trash_stolen(i915);
 
-	intel_runtime_pm_put_unchecked(i915);
+	intel_runtime_pm_put(i915, wakeref);
 }
 
 static int pm_prepare(struct drm_i915_private *i915)
@@ -93,39 +96,45 @@ static int pm_prepare(struct drm_i915_private *i915)
 
 static void pm_suspend(struct drm_i915_private *i915)
 {
-	intel_runtime_pm_get(i915);
+	intel_wakeref_t wakeref;
+
+	wakeref = intel_runtime_pm_get(i915);
 
 	i915_gem_suspend_gtt_mappings(i915);
 	i915_gem_suspend_late(i915);
 
-	intel_runtime_pm_put_unchecked(i915);
+	intel_runtime_pm_put(i915, wakeref);
 }
 
 static void pm_hibernate(struct drm_i915_private *i915)
 {
-	intel_runtime_pm_get(i915);
+	intel_wakeref_t wakeref;
+
+	wakeref = intel_runtime_pm_get(i915);
 
 	i915_gem_suspend_gtt_mappings(i915);
 
 	i915_gem_freeze(i915);
 	i915_gem_freeze_late(i915);
 
-	intel_runtime_pm_put_unchecked(i915);
+	intel_runtime_pm_put(i915, wakeref);
 }
 
 static void pm_resume(struct drm_i915_private *i915)
 {
+	intel_wakeref_t wakeref;
+
 	/*
 	 * Both suspend and hibernate follow the same wakeup path and assume
 	 * that runtime-pm just works.
 	 */
-	intel_runtime_pm_get(i915);
+	wakeref = intel_runtime_pm_get(i915);
 
 	intel_engines_sanitize(i915, false);
 	i915_gem_sanitize(i915);
 	i915_gem_resume(i915);
 
-	intel_runtime_pm_put_unchecked(i915);
+	intel_runtime_pm_put(i915, wakeref);
 }
 
 static int igt_gem_suspend(void *arg)
diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_coherency.c b/drivers/gpu/drm/i915/selftests/i915_gem_coherency.c
index eea4fc2445ae..fd89a5a33c1a 100644
--- a/drivers/gpu/drm/i915/selftests/i915_gem_coherency.c
+++ b/drivers/gpu/drm/i915/selftests/i915_gem_coherency.c
@@ -279,6 +279,7 @@ static int igt_gem_coherency(void *arg)
 	struct drm_i915_private *i915 = arg;
 	const struct igt_coherency_mode *read, *write, *over;
 	struct drm_i915_gem_object *obj;
+	intel_wakeref_t wakeref;
 	unsigned long count, n;
 	u32 *offsets, *values;
 	int err = 0;
@@ -298,7 +299,7 @@ static int igt_gem_coherency(void *arg)
 	values = offsets + ncachelines;
 
 	mutex_lock(&i915->drm.struct_mutex);
-	intel_runtime_pm_get(i915);
+	wakeref = intel_runtime_pm_get(i915);
 	for (over = igt_coherency_mode; over->name; over++) {
 		if (!over->set)
 			continue;
@@ -376,7 +377,7 @@ static int igt_gem_coherency(void *arg)
 		}
 	}
 unlock:
-	intel_runtime_pm_put_unchecked(i915);
+	intel_runtime_pm_put(i915, wakeref);
 	mutex_unlock(&i915->drm.struct_mutex);
 	kfree(offsets);
 	return err;
diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/selftests/i915_gem_context.c
index 6e1a0711d201..7a9b1f20b019 100644
--- a/drivers/gpu/drm/i915/selftests/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/selftests/i915_gem_context.c
@@ -119,6 +119,7 @@ static int live_nop_switch(void *arg)
 	struct intel_engine_cs *engine;
 	struct i915_gem_context **ctx;
 	enum intel_engine_id id;
+	intel_wakeref_t wakeref;
 	struct drm_file *file;
 	struct live_test t;
 	unsigned long n;
@@ -140,7 +141,7 @@ static int live_nop_switch(void *arg)
 		return PTR_ERR(file);
 
 	mutex_lock(&i915->drm.struct_mutex);
-	intel_runtime_pm_get(i915);
+	wakeref = intel_runtime_pm_get(i915);
 
 	ctx = kcalloc(nctx, sizeof(*ctx), GFP_KERNEL);
 	if (!ctx) {
@@ -243,7 +244,7 @@ static int live_nop_switch(void *arg)
 	}
 
 out_unlock:
-	intel_runtime_pm_put_unchecked(i915);
+	intel_runtime_pm_put(i915, wakeref);
 	mutex_unlock(&i915->drm.struct_mutex);
 	mock_file_free(i915, file);
 	return err;
@@ -593,6 +594,8 @@ static int igt_ctx_exec(void *arg)
 		}
 
 		for_each_engine(engine, i915, id) {
+			intel_wakeref_t wakeref;
+
 			if (!engine->context_size)
 				continue; /* No logical context support in HW */
 
@@ -607,9 +610,9 @@ static int igt_ctx_exec(void *arg)
 				}
 			}
 
-			intel_runtime_pm_get(i915);
+			wakeref = intel_runtime_pm_get(i915);
 			err = gpu_fill(obj, ctx, engine, dw);
-			intel_runtime_pm_put_unchecked(i915);
+			intel_runtime_pm_put(i915, wakeref);
 			if (err) {
 				pr_err("Failed to fill dword %lu [%lu/%lu] with gpu (%s) in ctx %u [full-ppgtt? %s], err=%d\n",
 				       ndwords, dw, max_dwords(obj),
@@ -699,6 +702,8 @@ static int igt_ctx_readonly(void *arg)
 		unsigned int id;
 
 		for_each_engine(engine, i915, id) {
+			intel_wakeref_t wakeref;
+
 			if (!intel_engine_can_store_dword(engine))
 				continue;
 
@@ -713,9 +718,9 @@ static int igt_ctx_readonly(void *arg)
 					i915_gem_object_set_readonly(obj);
 			}
 
-			intel_runtime_pm_get(i915);
+			wakeref = intel_runtime_pm_get(i915);
 			err = gpu_fill(obj, ctx, engine, dw);
-			intel_runtime_pm_put_unchecked(i915);
+			intel_runtime_pm_put(i915, wakeref);
 			if (err) {
 				pr_err("Failed to fill dword %lu [%lu/%lu] with gpu (%s) in ctx %u [full-ppgtt? %s], err=%d\n",
 				       ndwords, dw, max_dwords(obj),
@@ -976,6 +981,7 @@ static int igt_vm_isolation(void *arg)
 	struct drm_i915_private *i915 = arg;
 	struct i915_gem_context *ctx_a, *ctx_b;
 	struct intel_engine_cs *engine;
+	intel_wakeref_t wakeref;
 	struct drm_file *file;
 	I915_RND_STATE(prng);
 	unsigned long count;
@@ -1022,7 +1028,7 @@ static int igt_vm_isolation(void *arg)
 	GEM_BUG_ON(ctx_b->ppgtt->vm.total != vm_total);
 	vm_total -= I915_GTT_PAGE_SIZE;
 
-	intel_runtime_pm_get(i915);
+	wakeref = intel_runtime_pm_get(i915);
 
 	count = 0;
 	for_each_engine(engine, i915, id) {
@@ -1067,7 +1073,7 @@ static int igt_vm_isolation(void *arg)
 		count, RUNTIME_INFO(i915)->num_rings);
 
 out_rpm:
-	intel_runtime_pm_put_unchecked(i915);
+	intel_runtime_pm_put(i915, wakeref);
 out_unlock:
 	if (end_live_test(&t))
 		err = -EIO;
@@ -1165,6 +1171,7 @@ static int igt_switch_to_kernel_context(void *arg)
 	struct intel_engine_cs *engine;
 	struct i915_gem_context *ctx;
 	enum intel_engine_id id;
+	intel_wakeref_t wakeref;
 	int err;
 
 	/*
@@ -1175,7 +1182,7 @@ static int igt_switch_to_kernel_context(void *arg)
 	 */
 
 	mutex_lock(&i915->drm.struct_mutex);
-	intel_runtime_pm_get(i915);
+	wakeref = intel_runtime_pm_get(i915);
 
 	ctx = kernel_context(i915);
 	if (IS_ERR(ctx)) {
@@ -1200,7 +1207,7 @@ static int igt_switch_to_kernel_context(void *arg)
 	if (igt_flush_test(i915, I915_WAIT_LOCKED))
 		err = -EIO;
 
-	intel_runtime_pm_put_unchecked(i915);
+	intel_runtime_pm_put(i915, wakeref);
 	mutex_unlock(&i915->drm.struct_mutex);
 
 	kernel_context_close(ctx);
diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_evict.c b/drivers/gpu/drm/i915/selftests/i915_gem_evict.c
index 8d22f73a9b63..e1ff6a1c2cb0 100644
--- a/drivers/gpu/drm/i915/selftests/i915_gem_evict.c
+++ b/drivers/gpu/drm/i915/selftests/i915_gem_evict.c
@@ -336,6 +336,7 @@ static int igt_evict_contexts(void *arg)
 		struct drm_mm_node node;
 		struct reserved *next;
 	} *reserved = NULL;
+	intel_wakeref_t wakeref;
 	struct drm_mm_node hole;
 	unsigned long count;
 	int err;
@@ -355,7 +356,7 @@ static int igt_evict_contexts(void *arg)
 		return 0;
 
 	mutex_lock(&i915->drm.struct_mutex);
-	intel_runtime_pm_get(i915);
+	wakeref = intel_runtime_pm_get(i915);
 
 	/* Reserve a block so that we know we have enough to fit a few rq */
 	memset(&hole, 0, sizeof(hole));
@@ -400,8 +401,10 @@ static int igt_evict_contexts(void *arg)
 		struct drm_file *file;
 
 		file = mock_file(i915);
-		if (IS_ERR(file))
-			return PTR_ERR(file);
+		if (IS_ERR(file)) {
+			err = PTR_ERR(file);
+			break;
+		}
 
 		count = 0;
 		mutex_lock(&i915->drm.struct_mutex);
@@ -464,7 +467,7 @@ static int igt_evict_contexts(void *arg)
 	}
 	if (drm_mm_node_allocated(&hole))
 		drm_mm_remove_node(&hole);
-	intel_runtime_pm_put_unchecked(i915);
+	intel_runtime_pm_put(i915, wakeref);
 	mutex_unlock(&i915->drm.struct_mutex);
 
 	return err;
diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c
index 87cb0602a5fc..fea8ab14e79d 100644
--- a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c
@@ -275,6 +275,7 @@ static int lowlevel_hole(struct drm_i915_private *i915,
 
 		for (n = 0; n < count; n++) {
 			u64 addr = hole_start + order[n] * BIT_ULL(size);
+			intel_wakeref_t wakeref;
 
 			GEM_BUG_ON(addr + BIT_ULL(size) > vm->total);
 
@@ -293,9 +294,9 @@ static int lowlevel_hole(struct drm_i915_private *i915,
 			mock_vma.node.size = BIT_ULL(size);
 			mock_vma.node.start = addr;
 
-			intel_runtime_pm_get(i915);
+			wakeref = intel_runtime_pm_get(i915);
 			vm->insert_entries(vm, &mock_vma, I915_CACHE_NONE, 0);
-			intel_runtime_pm_put_unchecked(i915);
+			intel_runtime_pm_put(i915, wakeref);
 		}
 		count = n;
 
@@ -1144,6 +1145,7 @@ static int igt_ggtt_page(void *arg)
 	struct drm_i915_private *i915 = arg;
 	struct i915_ggtt *ggtt = &i915->ggtt;
 	struct drm_i915_gem_object *obj;
+	intel_wakeref_t wakeref;
 	struct drm_mm_node tmp;
 	unsigned int *order, n;
 	int err;
@@ -1169,7 +1171,7 @@ static int igt_ggtt_page(void *arg)
 	if (err)
 		goto out_unpin;
 
-	intel_runtime_pm_get(i915);
+	wakeref = intel_runtime_pm_get(i915);
 
 	for (n = 0; n < count; n++) {
 		u64 offset = tmp.start + n * PAGE_SIZE;
@@ -1216,7 +1218,7 @@ static int igt_ggtt_page(void *arg)
 	kfree(order);
 out_remove:
 	ggtt->vm.clear_range(&ggtt->vm, tmp.start, tmp.size);
-	intel_runtime_pm_put_unchecked(i915);
+	intel_runtime_pm_put(i915, wakeref);
 	drm_mm_remove_node(&tmp);
 out_unpin:
 	i915_gem_object_unpin_pages(obj);
diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_object.c b/drivers/gpu/drm/i915/selftests/i915_gem_object.c
index b03890c590d7..3575e1387c3f 100644
--- a/drivers/gpu/drm/i915/selftests/i915_gem_object.c
+++ b/drivers/gpu/drm/i915/selftests/i915_gem_object.c
@@ -308,6 +308,7 @@ static int igt_partial_tiling(void *arg)
 	const unsigned int nreal = 1 << 12; /* largest tile row x2 */
 	struct drm_i915_private *i915 = arg;
 	struct drm_i915_gem_object *obj;
+	intel_wakeref_t wakeref;
 	int tiling;
 	int err;
 
@@ -333,7 +334,7 @@ static int igt_partial_tiling(void *arg)
 	}
 
 	mutex_lock(&i915->drm.struct_mutex);
-	intel_runtime_pm_get(i915);
+	wakeref = intel_runtime_pm_get(i915);
 
 	if (1) {
 		IGT_TIMEOUT(end);
@@ -444,7 +445,7 @@ next_tiling: ;
 	}
 
 out_unlock:
-	intel_runtime_pm_put_unchecked(i915);
+	intel_runtime_pm_put(i915, wakeref);
 	mutex_unlock(&i915->drm.struct_mutex);
 	i915_gem_object_unpin_pages(obj);
 out:
@@ -506,11 +507,14 @@ static void disable_retire_worker(struct drm_i915_private *i915)
 
 	mutex_lock(&i915->drm.struct_mutex);
 	if (!i915->gt.active_requests++) {
-		intel_runtime_pm_get(i915);
+		intel_wakeref_t wakeref;
+
+		wakeref = intel_runtime_pm_get(i915);
 		i915_gem_unpark(i915);
-		intel_runtime_pm_put_unchecked(i915);
+		intel_runtime_pm_put(i915, wakeref);
 	}
 	mutex_unlock(&i915->drm.struct_mutex);
+
 	cancel_delayed_work_sync(&i915->gt.retire_work);
 	cancel_delayed_work_sync(&i915->gt.idle_work);
 }
@@ -578,6 +582,8 @@ static int igt_mmap_offset_exhaustion(void *arg)
 
 	/* Now fill with busy dead objects that we expect to reap */
 	for (loop = 0; loop < 3; loop++) {
+		intel_wakeref_t wakeref;
+
 		if (i915_terminally_wedged(&i915->gpu_error))
 			break;
 
@@ -588,9 +594,9 @@ static int igt_mmap_offset_exhaustion(void *arg)
 		}
 
 		mutex_lock(&i915->drm.struct_mutex);
-		intel_runtime_pm_get(i915);
+		wakeref = intel_runtime_pm_get(i915);
 		err = make_obj_busy(obj);
-		intel_runtime_pm_put_unchecked(i915);
+		intel_runtime_pm_put(i915, wakeref);
 		mutex_unlock(&i915->drm.struct_mutex);
 		if (err) {
 			pr_err("[loop %d] Failed to busy the object\n", loop);
diff --git a/drivers/gpu/drm/i915/selftests/i915_request.c b/drivers/gpu/drm/i915/selftests/i915_request.c
index e8880cabd5c7..8b73a8c21377 100644
--- a/drivers/gpu/drm/i915/selftests/i915_request.c
+++ b/drivers/gpu/drm/i915/selftests/i915_request.c
@@ -332,6 +332,7 @@ static int live_nop_request(void *arg)
 {
 	struct drm_i915_private *i915 = arg;
 	struct intel_engine_cs *engine;
+	intel_wakeref_t wakeref;
 	struct live_test t;
 	unsigned int id;
 	int err = -ENODEV;
@@ -342,7 +343,7 @@ static int live_nop_request(void *arg)
 	 */
 
 	mutex_lock(&i915->drm.struct_mutex);
-	intel_runtime_pm_get(i915);
+	wakeref = intel_runtime_pm_get(i915);
 
 	for_each_engine(engine, i915, id) {
 		struct i915_request *request = NULL;
@@ -403,7 +404,7 @@ static int live_nop_request(void *arg)
 	}
 
 out_unlock:
-	intel_runtime_pm_put_unchecked(i915);
+	intel_runtime_pm_put(i915, wakeref);
 	mutex_unlock(&i915->drm.struct_mutex);
 	return err;
 }
@@ -478,8 +479,9 @@ static int live_empty_request(void *arg)
 {
 	struct drm_i915_private *i915 = arg;
 	struct intel_engine_cs *engine;
-	struct live_test t;
+	intel_wakeref_t wakeref;
 	struct i915_vma *batch;
+	struct live_test t;
 	unsigned int id;
 	int err = 0;
 
@@ -489,7 +491,7 @@ static int live_empty_request(void *arg)
 	 */
 
 	mutex_lock(&i915->drm.struct_mutex);
-	intel_runtime_pm_get(i915);
+	wakeref = intel_runtime_pm_get(i915);
 
 	batch = empty_batch(i915);
 	if (IS_ERR(batch)) {
@@ -553,7 +555,7 @@ static int live_empty_request(void *arg)
 	i915_vma_unpin(batch);
 	i915_vma_put(batch);
 out_unlock:
-	intel_runtime_pm_put_unchecked(i915);
+	intel_runtime_pm_put(i915, wakeref);
 	mutex_unlock(&i915->drm.struct_mutex);
 	return err;
 }
@@ -637,6 +639,7 @@ static int live_all_engines(void *arg)
 	struct drm_i915_private *i915 = arg;
 	struct intel_engine_cs *engine;
 	struct i915_request *request[I915_NUM_ENGINES];
+	intel_wakeref_t wakeref;
 	struct i915_vma *batch;
 	struct live_test t;
 	unsigned int id;
@@ -648,7 +651,7 @@ static int live_all_engines(void *arg)
 	 */
 
 	mutex_lock(&i915->drm.struct_mutex);
-	intel_runtime_pm_get(i915);
+	wakeref = intel_runtime_pm_get(i915);
 
 	err = begin_live_test(&t, i915, __func__, "");
 	if (err)
@@ -731,7 +734,7 @@ static int live_all_engines(void *arg)
 	i915_vma_unpin(batch);
 	i915_vma_put(batch);
 out_unlock:
-	intel_runtime_pm_put_unchecked(i915);
+	intel_runtime_pm_put(i915, wakeref);
 	mutex_unlock(&i915->drm.struct_mutex);
 	return err;
 }
@@ -742,6 +745,7 @@ static int live_sequential_engines(void *arg)
 	struct i915_request *request[I915_NUM_ENGINES] = {};
 	struct i915_request *prev = NULL;
 	struct intel_engine_cs *engine;
+	intel_wakeref_t wakeref;
 	struct live_test t;
 	unsigned int id;
 	int err;
@@ -753,7 +757,7 @@ static int live_sequential_engines(void *arg)
 	 */
 
 	mutex_lock(&i915->drm.struct_mutex);
-	intel_runtime_pm_get(i915);
+	wakeref = intel_runtime_pm_get(i915);
 
 	err = begin_live_test(&t, i915, __func__, "");
 	if (err)
@@ -860,7 +864,7 @@ static int live_sequential_engines(void *arg)
 		i915_request_put(request[id]);
 	}
 out_unlock:
-	intel_runtime_pm_put_unchecked(i915);
+	intel_runtime_pm_put(i915, wakeref);
 	mutex_unlock(&i915->drm.struct_mutex);
 	return err;
 }
diff --git a/drivers/gpu/drm/i915/selftests/intel_guc.c b/drivers/gpu/drm/i915/selftests/intel_guc.c
index 3590ba3d8897..c5e0a0e98fcb 100644
--- a/drivers/gpu/drm/i915/selftests/intel_guc.c
+++ b/drivers/gpu/drm/i915/selftests/intel_guc.c
@@ -137,12 +137,13 @@ static bool client_doorbell_in_sync(struct intel_guc_client *client)
 static int igt_guc_clients(void *args)
 {
 	struct drm_i915_private *dev_priv = args;
+	intel_wakeref_t wakeref;
 	struct intel_guc *guc;
 	int err = 0;
 
 	GEM_BUG_ON(!HAS_GUC(dev_priv));
 	mutex_lock(&dev_priv->drm.struct_mutex);
-	intel_runtime_pm_get(dev_priv);
+	wakeref = intel_runtime_pm_get(dev_priv);
 
 	guc = &dev_priv->guc;
 	if (!guc) {
@@ -225,7 +226,7 @@ static int igt_guc_clients(void *args)
 	guc_clients_create(guc);
 	guc_clients_enable(guc);
 unlock:
-	intel_runtime_pm_put_unchecked(dev_priv);
+	intel_runtime_pm_put(dev_priv, wakeref);
 	mutex_unlock(&dev_priv->drm.struct_mutex);
 	return err;
 }
@@ -238,13 +239,14 @@ static int igt_guc_clients(void *args)
 static int igt_guc_doorbells(void *arg)
 {
 	struct drm_i915_private *dev_priv = arg;
+	intel_wakeref_t wakeref;
 	struct intel_guc *guc;
 	int i, err = 0;
 	u16 db_id;
 
 	GEM_BUG_ON(!HAS_GUC(dev_priv));
 	mutex_lock(&dev_priv->drm.struct_mutex);
-	intel_runtime_pm_get(dev_priv);
+	wakeref = intel_runtime_pm_get(dev_priv);
 
 	guc = &dev_priv->guc;
 	if (!guc) {
@@ -337,7 +339,7 @@ static int igt_guc_doorbells(void *arg)
 			guc_client_free(clients[i]);
 		}
 unlock:
-	intel_runtime_pm_put_unchecked(dev_priv);
+	intel_runtime_pm_put(dev_priv, wakeref);
 	mutex_unlock(&dev_priv->drm.struct_mutex);
 	return err;
 }
diff --git a/drivers/gpu/drm/i915/selftests/intel_hangcheck.c b/drivers/gpu/drm/i915/selftests/intel_hangcheck.c
index 33bd3c4b6fa3..12550b55c42f 100644
--- a/drivers/gpu/drm/i915/selftests/intel_hangcheck.c
+++ b/drivers/gpu/drm/i915/selftests/intel_hangcheck.c
@@ -388,12 +388,13 @@ static int igt_global_reset(void *arg)
 static int igt_wedged_reset(void *arg)
 {
 	struct drm_i915_private *i915 = arg;
+	intel_wakeref_t wakeref;
 
 	/* Check that we can recover a wedged device with a GPU reset */
 
 	igt_global_reset_lock(i915);
 	mutex_lock(&i915->drm.struct_mutex);
-	intel_runtime_pm_get(i915);
+	wakeref = intel_runtime_pm_get(i915);
 
 	i915_gem_set_wedged(i915);
 	GEM_BUG_ON(!i915_terminally_wedged(&i915->gpu_error));
@@ -402,7 +403,7 @@ static int igt_wedged_reset(void *arg)
 	i915_reset(i915, ALL_ENGINES, NULL);
 	GEM_BUG_ON(test_bit(I915_RESET_HANDOFF, &i915->gpu_error.flags));
 
-	intel_runtime_pm_put_unchecked(i915);
+	intel_runtime_pm_put(i915, wakeref);
 	mutex_unlock(&i915->drm.struct_mutex);
 	igt_global_reset_unlock(i915);
 
@@ -1600,6 +1601,7 @@ static int igt_atomic_reset(void *arg)
 		{ }
 	};
 	struct drm_i915_private *i915 = arg;
+	intel_wakeref_t wakeref;
 	int err = 0;
 
 	/* Check that the resets are usable from atomic context */
@@ -1609,7 +1611,7 @@ static int igt_atomic_reset(void *arg)
 
 	igt_global_reset_lock(i915);
 	mutex_lock(&i915->drm.struct_mutex);
-	intel_runtime_pm_get(i915);
+	wakeref = intel_runtime_pm_get(i915);
 
 	/* Flush any requests before we get started and check basics */
 	force_reset(i915);
@@ -1636,7 +1638,7 @@ static int igt_atomic_reset(void *arg)
 	force_reset(i915);
 
 unlock:
-	intel_runtime_pm_put_unchecked(i915);
+	intel_runtime_pm_put(i915, wakeref);
 	mutex_unlock(&i915->drm.struct_mutex);
 	igt_global_reset_unlock(i915);
 
@@ -1660,6 +1662,7 @@ int intel_hangcheck_live_selftests(struct drm_i915_private *i915)
 		SUBTEST(igt_handle_error),
 		SUBTEST(igt_atomic_reset),
 	};
+	intel_wakeref_t wakeref;
 	bool saved_hangcheck;
 	int err;
 
@@ -1669,7 +1672,7 @@ int intel_hangcheck_live_selftests(struct drm_i915_private *i915)
 	if (i915_terminally_wedged(&i915->gpu_error))
 		return -EIO; /* we're long past hope of a successful reset */
 
-	intel_runtime_pm_get(i915);
+	wakeref = intel_runtime_pm_get(i915);
 	saved_hangcheck = fetch_and_zero(&i915_modparams.enable_hangcheck);
 
 	err = i915_subtests(tests, i915);
@@ -1679,7 +1682,7 @@ int intel_hangcheck_live_selftests(struct drm_i915_private *i915)
 	mutex_unlock(&i915->drm.struct_mutex);
 
 	i915_modparams.enable_hangcheck = saved_hangcheck;
-	intel_runtime_pm_put_unchecked(i915);
+	intel_runtime_pm_put(i915, wakeref);
 
 	return err;
 }
diff --git a/drivers/gpu/drm/i915/selftests/intel_lrc.c b/drivers/gpu/drm/i915/selftests/intel_lrc.c
index ac1b18a17f3c..e6073cd4719c 100644
--- a/drivers/gpu/drm/i915/selftests/intel_lrc.c
+++ b/drivers/gpu/drm/i915/selftests/intel_lrc.c
@@ -18,13 +18,14 @@ static int live_sanitycheck(void *arg)
 	struct i915_gem_context *ctx;
 	enum intel_engine_id id;
 	struct igt_spinner spin;
+	intel_wakeref_t wakeref;
 	int err = -ENOMEM;
 
 	if (!HAS_LOGICAL_RING_CONTEXTS(i915))
 		return 0;
 
 	mutex_lock(&i915->drm.struct_mutex);
-	intel_runtime_pm_get(i915);
+	wakeref = intel_runtime_pm_get(i915);
 
 	if (igt_spinner_init(&spin, i915))
 		goto err_unlock;
@@ -65,7 +66,7 @@ static int live_sanitycheck(void *arg)
 	igt_spinner_fini(&spin);
 err_unlock:
 	igt_flush_test(i915, I915_WAIT_LOCKED);
-	intel_runtime_pm_put_unchecked(i915);
+	intel_runtime_pm_put(i915, wakeref);
 	mutex_unlock(&i915->drm.struct_mutex);
 	return err;
 }
@@ -77,13 +78,14 @@ static int live_preempt(void *arg)
 	struct igt_spinner spin_hi, spin_lo;
 	struct intel_engine_cs *engine;
 	enum intel_engine_id id;
+	intel_wakeref_t wakeref;
 	int err = -ENOMEM;
 
 	if (!HAS_LOGICAL_RING_PREEMPTION(i915))
 		return 0;
 
 	mutex_lock(&i915->drm.struct_mutex);
-	intel_runtime_pm_get(i915);
+	wakeref = intel_runtime_pm_get(i915);
 
 	if (igt_spinner_init(&spin_hi, i915))
 		goto err_unlock;
@@ -158,7 +160,7 @@ static int live_preempt(void *arg)
 	igt_spinner_fini(&spin_hi);
 err_unlock:
 	igt_flush_test(i915, I915_WAIT_LOCKED);
-	intel_runtime_pm_put_unchecked(i915);
+	intel_runtime_pm_put(i915, wakeref);
 	mutex_unlock(&i915->drm.struct_mutex);
 	return err;
 }
@@ -171,13 +173,14 @@ static int live_late_preempt(void *arg)
 	struct intel_engine_cs *engine;
 	struct i915_sched_attr attr = {};
 	enum intel_engine_id id;
+	intel_wakeref_t wakeref;
 	int err = -ENOMEM;
 
 	if (!HAS_LOGICAL_RING_PREEMPTION(i915))
 		return 0;
 
 	mutex_lock(&i915->drm.struct_mutex);
-	intel_runtime_pm_get(i915);
+	wakeref = intel_runtime_pm_get(i915);
 
 	if (igt_spinner_init(&spin_hi, i915))
 		goto err_unlock;
@@ -251,7 +254,7 @@ static int live_late_preempt(void *arg)
 	igt_spinner_fini(&spin_hi);
 err_unlock:
 	igt_flush_test(i915, I915_WAIT_LOCKED);
-	intel_runtime_pm_put_unchecked(i915);
+	intel_runtime_pm_put(i915, wakeref);
 	mutex_unlock(&i915->drm.struct_mutex);
 	return err;
 
@@ -270,6 +273,7 @@ static int live_preempt_hang(void *arg)
 	struct igt_spinner spin_hi, spin_lo;
 	struct intel_engine_cs *engine;
 	enum intel_engine_id id;
+	intel_wakeref_t wakeref;
 	int err = -ENOMEM;
 
 	if (!HAS_LOGICAL_RING_PREEMPTION(i915))
@@ -279,7 +283,7 @@ static int live_preempt_hang(void *arg)
 		return 0;
 
 	mutex_lock(&i915->drm.struct_mutex);
-	intel_runtime_pm_get(i915);
+	wakeref = intel_runtime_pm_get(i915);
 
 	if (igt_spinner_init(&spin_hi, i915))
 		goto err_unlock;
@@ -374,7 +378,7 @@ static int live_preempt_hang(void *arg)
 	igt_spinner_fini(&spin_hi);
 err_unlock:
 	igt_flush_test(i915, I915_WAIT_LOCKED);
-	intel_runtime_pm_put_unchecked(i915);
+	intel_runtime_pm_put(i915, wakeref);
 	mutex_unlock(&i915->drm.struct_mutex);
 	return err;
 }
@@ -562,6 +566,7 @@ static int live_preempt_smoke(void *arg)
 		.ncontext = 1024,
 	};
 	const unsigned int phase[] = { 0, BATCH };
+	intel_wakeref_t wakeref;
 	int err = -ENOMEM;
 	u32 *cs;
 	int n;
@@ -576,7 +581,7 @@ static int live_preempt_smoke(void *arg)
 		return -ENOMEM;
 
 	mutex_lock(&smoke.i915->drm.struct_mutex);
-	intel_runtime_pm_get(smoke.i915);
+	wakeref = intel_runtime_pm_get(smoke.i915);
 
 	smoke.batch = i915_gem_object_create_internal(smoke.i915, PAGE_SIZE);
 	if (IS_ERR(smoke.batch)) {
@@ -627,7 +632,7 @@ static int live_preempt_smoke(void *arg)
 err_batch:
 	i915_gem_object_put(smoke.batch);
 err_unlock:
-	intel_runtime_pm_put_unchecked(smoke.i915);
+	intel_runtime_pm_put(smoke.i915, wakeref);
 	mutex_unlock(&smoke.i915->drm.struct_mutex);
 	kfree(smoke.contexts);
 
diff --git a/drivers/gpu/drm/i915/selftests/intel_workarounds.c b/drivers/gpu/drm/i915/selftests/intel_workarounds.c
index 54f5c2de3d08..47e62e1999a9 100644
--- a/drivers/gpu/drm/i915/selftests/intel_workarounds.c
+++ b/drivers/gpu/drm/i915/selftests/intel_workarounds.c
@@ -15,10 +15,11 @@
 static struct drm_i915_gem_object *
 read_nonprivs(struct i915_gem_context *ctx, struct intel_engine_cs *engine)
 {
+	const u32 base = engine->mmio_base;
 	struct drm_i915_gem_object *result;
+	intel_wakeref_t wakeref;
 	struct i915_request *rq;
 	struct i915_vma *vma;
-	const u32 base = engine->mmio_base;
 	u32 srm, *cs;
 	int err;
 	int i;
@@ -47,9 +48,9 @@ read_nonprivs(struct i915_gem_context *ctx, struct intel_engine_cs *engine)
 	if (err)
 		goto err_obj;
 
-	intel_runtime_pm_get(engine->i915);
+	wakeref = intel_runtime_pm_get(engine->i915);
 	rq = i915_request_alloc(engine, ctx);
-	intel_runtime_pm_put_unchecked(engine->i915);
+	intel_runtime_pm_put(engine->i915, wakeref);
 	if (IS_ERR(rq)) {
 		err = PTR_ERR(rq);
 		goto err_pin;
@@ -183,20 +184,21 @@ switch_to_scratch_context(struct intel_engine_cs *engine,
 {
 	struct i915_gem_context *ctx;
 	struct i915_request *rq;
+	intel_wakeref_t wakeref;
 	int err = 0;
 
 	ctx = kernel_context(engine->i915);
 	if (IS_ERR(ctx))
 		return PTR_ERR(ctx);
 
-	intel_runtime_pm_get(engine->i915);
+	wakeref = intel_runtime_pm_get(engine->i915);
 
 	if (spin)
 		rq = igt_spinner_create_request(spin, ctx, engine, MI_NOOP);
 	else
 		rq = i915_request_alloc(engine, ctx);
 
-	intel_runtime_pm_put_unchecked(engine->i915);
+	intel_runtime_pm_put(engine->i915, wakeref);
 
 	kernel_context_close(ctx);
 
@@ -228,6 +230,7 @@ static int check_whitelist_across_reset(struct intel_engine_cs *engine,
 	bool want_spin = reset == do_engine_reset;
 	struct i915_gem_context *ctx;
 	struct igt_spinner spin;
+	intel_wakeref_t wakeref;
 	int err;
 
 	pr_info("Checking %d whitelisted registers (RING_NONPRIV) [%s]\n",
@@ -253,9 +256,9 @@ static int check_whitelist_across_reset(struct intel_engine_cs *engine,
 	if (err)
 		goto out;
 
-	intel_runtime_pm_get(i915);
+	wakeref = intel_runtime_pm_get(i915);
 	err = reset(engine);
-	intel_runtime_pm_put_unchecked(i915);
+	intel_runtime_pm_put(i915, wakeref);
 
 	if (want_spin) {
 		igt_spinner_end(&spin);
@@ -344,6 +347,7 @@ live_gpu_reset_gt_engine_workarounds(void *arg)
 {
 	struct drm_i915_private *i915 = arg;
 	struct i915_gpu_error *error = &i915->gpu_error;
+	intel_wakeref_t wakeref;
 	bool ok;
 
 	if (!intel_has_gpu_reset(i915))
@@ -352,7 +356,7 @@ live_gpu_reset_gt_engine_workarounds(void *arg)
 	pr_info("Verifying after GPU reset...\n");
 
 	igt_global_reset_lock(i915);
-	intel_runtime_pm_get(i915);
+	wakeref = intel_runtime_pm_get(i915);
 
 	ok = verify_gt_engine_wa(i915, "before reset");
 	if (!ok)
@@ -364,7 +368,7 @@ live_gpu_reset_gt_engine_workarounds(void *arg)
 	ok = verify_gt_engine_wa(i915, "after reset");
 
 out:
-	intel_runtime_pm_put_unchecked(i915);
+	intel_runtime_pm_put(i915, wakeref);
 	igt_global_reset_unlock(i915);
 
 	return ok ? 0 : -ESRCH;
@@ -379,6 +383,7 @@ live_engine_reset_gt_engine_workarounds(void *arg)
 	struct igt_spinner spin;
 	enum intel_engine_id id;
 	struct i915_request *rq;
+	intel_wakeref_t wakeref;
 	int ret = 0;
 
 	if (!intel_has_reset_engine(i915))
@@ -389,7 +394,7 @@ live_engine_reset_gt_engine_workarounds(void *arg)
 		return PTR_ERR(ctx);
 
 	igt_global_reset_lock(i915);
-	intel_runtime_pm_get(i915);
+	wakeref = intel_runtime_pm_get(i915);
 
 	for_each_engine(engine, i915, id) {
 		bool ok;
@@ -443,7 +448,7 @@ live_engine_reset_gt_engine_workarounds(void *arg)
 	}
 
 err:
-	intel_runtime_pm_put_unchecked(i915);
+	intel_runtime_pm_put(i915, wakeref);
 	igt_global_reset_unlock(i915);
 	kernel_context_close(ctx);
 
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 111+ messages in thread

* [PATCH 17/46] drm/i915: Syntatic sugar for using intel_runtime_pm
  2019-01-07 11:54 [PATCH 01/46] drm/i915: Return immediately if trylock fails for direct-reclaim Chris Wilson
                   ` (14 preceding siblings ...)
  2019-01-07 11:54 ` [PATCH 16/46] drm/i915/selftests: Mark up rpm wakerefs Chris Wilson
@ 2019-01-07 11:54 ` Chris Wilson
  2019-01-09 14:30   ` Mika Kuoppala
  2019-01-10  0:24   ` John Harrison
  2019-01-07 11:54 ` [PATCH 18/46] drm/i915: Markup paired operations on display power domains Chris Wilson
                   ` (34 subsequent siblings)
  50 siblings, 2 replies; 111+ messages in thread
From: Chris Wilson @ 2019-01-07 11:54 UTC (permalink / raw)
  To: intel-gfx; +Cc: Jani Nikula

Frequently, we use intel_runtime_pm_get/_put around a small block.
Formalise that usage by providing a macro to define such a block with an
automatic closure to scope the intel_runtime_pm wakeref to that block,
i.e. macro abuse smelling of python.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Jani Nikula <jani.nikula@intel.com>
---
 drivers/gpu/drm/i915/i915_debugfs.c           | 162 ++++++++----------
 drivers/gpu/drm/i915/i915_gem.c               |  10 +-
 drivers/gpu/drm/i915/i915_gem_gtt.c           |  23 ++-
 drivers/gpu/drm/i915/i915_gem_shrinker.c      |  51 +++---
 drivers/gpu/drm/i915/i915_pmu.c               |   7 +-
 drivers/gpu/drm/i915/i915_sysfs.c             |   7 +-
 drivers/gpu/drm/i915/intel_drv.h              |   8 +
 drivers/gpu/drm/i915/intel_guc_log.c          |  26 ++-
 drivers/gpu/drm/i915/intel_huc.c              |   7 +-
 drivers/gpu/drm/i915/intel_panel.c            |  18 +-
 drivers/gpu/drm/i915/intel_uncore.c           |  30 ++--
 drivers/gpu/drm/i915/selftests/i915_gem.c     |  34 ++--
 .../gpu/drm/i915/selftests/i915_gem_context.c |  12 +-
 .../gpu/drm/i915/selftests/i915_gem_object.c  |  11 +-
 .../drm/i915/selftests/intel_workarounds.c    |  28 +--
 15 files changed, 203 insertions(+), 231 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index d667b05e7ca4..1521e08861d1 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -953,9 +953,9 @@ static int i915_gpu_info_open(struct inode *inode, struct file *file)
 	struct i915_gpu_state *gpu;
 	intel_wakeref_t wakeref;
 
-	wakeref = intel_runtime_pm_get(i915);
-	gpu = i915_capture_gpu_state(i915);
-	intel_runtime_pm_put(i915, wakeref);
+	gpu = NULL;
+	with_intel_runtime_pm(i915, wakeref)
+		gpu = i915_capture_gpu_state(i915);
 	if (IS_ERR(gpu))
 		return PTR_ERR(gpu);
 
@@ -1287,17 +1287,15 @@ static int i915_hangcheck_info(struct seq_file *m, void *unused)
 		return 0;
 	}
 
-	wakeref = intel_runtime_pm_get(dev_priv);
+	with_intel_runtime_pm(dev_priv, wakeref) {
+		for_each_engine(engine, dev_priv, id) {
+			acthd[id] = intel_engine_get_active_head(engine);
+			seqno[id] = intel_engine_get_seqno(engine);
+		}
 
-	for_each_engine(engine, dev_priv, id) {
-		acthd[id] = intel_engine_get_active_head(engine);
-		seqno[id] = intel_engine_get_seqno(engine);
+		intel_engine_get_instdone(dev_priv->engine[RCS], &instdone);
 	}
 
-	intel_engine_get_instdone(dev_priv->engine[RCS], &instdone);
-
-	intel_runtime_pm_put(dev_priv, wakeref);
-
 	if (timer_pending(&dev_priv->gpu_error.hangcheck_work.timer))
 		seq_printf(m, "Hangcheck active, timer fires in %dms\n",
 			   jiffies_to_msecs(dev_priv->gpu_error.hangcheck_work.timer.expires -
@@ -1573,18 +1571,16 @@ static int i915_drpc_info(struct seq_file *m, void *unused)
 {
 	struct drm_i915_private *dev_priv = node_to_i915(m->private);
 	intel_wakeref_t wakeref;
-	int err;
-
-	wakeref = intel_runtime_pm_get(dev_priv);
-
-	if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv))
-		err = vlv_drpc_info(m);
-	else if (INTEL_GEN(dev_priv) >= 6)
-		err = gen6_drpc_info(m);
-	else
-		err = ironlake_drpc_info(m);
+	int err = -ENODEV;
 
-	intel_runtime_pm_put(dev_priv, wakeref);
+	with_intel_runtime_pm(dev_priv, wakeref) {
+		if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv))
+			err = vlv_drpc_info(m);
+		else if (INTEL_GEN(dev_priv) >= 6)
+			err = gen6_drpc_info(m);
+		else
+			err = ironlake_drpc_info(m);
+	}
 
 	return err;
 }
@@ -2068,8 +2064,7 @@ static int i915_rps_boost_info(struct seq_file *m, void *data)
 	intel_wakeref_t wakeref;
 	struct drm_file *file;
 
-	wakeref = intel_runtime_pm_get_if_in_use(dev_priv);
-	if (wakeref) {
+	with_intel_runtime_pm_if_in_use(dev_priv, wakeref) {
 		if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) {
 			mutex_lock(&dev_priv->pcu_lock);
 			act_freq = vlv_punit_read(dev_priv,
@@ -2080,7 +2075,6 @@ static int i915_rps_boost_info(struct seq_file *m, void *data)
 			act_freq = intel_get_cagf(dev_priv,
 						  I915_READ(GEN6_RPSTAT1));
 		}
-		intel_runtime_pm_put(dev_priv, wakeref);
 	}
 
 	seq_printf(m, "RPS enabled? %d\n", rps->enabled);
@@ -2172,9 +2166,8 @@ static int i915_huc_load_status_info(struct seq_file *m, void *data)
 	p = drm_seq_file_printer(m);
 	intel_uc_fw_dump(&dev_priv->huc.fw, &p);
 
-	wakeref = intel_runtime_pm_get(dev_priv);
-	seq_printf(m, "\nHuC status 0x%08x:\n", I915_READ(HUC_STATUS2));
-	intel_runtime_pm_put(dev_priv, wakeref);
+	with_intel_runtime_pm(dev_priv, wakeref)
+		seq_printf(m, "\nHuC status 0x%08x:\n", I915_READ(HUC_STATUS2));
 
 	return 0;
 }
@@ -2184,7 +2177,6 @@ static int i915_guc_load_status_info(struct seq_file *m, void *data)
 	struct drm_i915_private *dev_priv = node_to_i915(m->private);
 	intel_wakeref_t wakeref;
 	struct drm_printer p;
-	u32 tmp, i;
 
 	if (!HAS_GUC(dev_priv))
 		return -ENODEV;
@@ -2192,22 +2184,23 @@ static int i915_guc_load_status_info(struct seq_file *m, void *data)
 	p = drm_seq_file_printer(m);
 	intel_uc_fw_dump(&dev_priv->guc.fw, &p);
 
-	wakeref = intel_runtime_pm_get(dev_priv);
-
-	tmp = I915_READ(GUC_STATUS);
-
-	seq_printf(m, "\nGuC status 0x%08x:\n", tmp);
-	seq_printf(m, "\tBootrom status = 0x%x\n",
-		(tmp & GS_BOOTROM_MASK) >> GS_BOOTROM_SHIFT);
-	seq_printf(m, "\tuKernel status = 0x%x\n",
-		(tmp & GS_UKERNEL_MASK) >> GS_UKERNEL_SHIFT);
-	seq_printf(m, "\tMIA Core status = 0x%x\n",
-		(tmp & GS_MIA_MASK) >> GS_MIA_SHIFT);
-	seq_puts(m, "\nScratch registers:\n");
-	for (i = 0; i < 16; i++)
-		seq_printf(m, "\t%2d: \t0x%x\n", i, I915_READ(SOFT_SCRATCH(i)));
-
-	intel_runtime_pm_put(dev_priv, wakeref);
+	with_intel_runtime_pm(dev_priv, wakeref) {
+		u32 tmp = I915_READ(GUC_STATUS);
+		u32 i;
+
+		seq_printf(m, "\nGuC status 0x%08x:\n", tmp);
+		seq_printf(m, "\tBootrom status = 0x%x\n",
+			   (tmp & GS_BOOTROM_MASK) >> GS_BOOTROM_SHIFT);
+		seq_printf(m, "\tuKernel status = 0x%x\n",
+			   (tmp & GS_UKERNEL_MASK) >> GS_UKERNEL_SHIFT);
+		seq_printf(m, "\tMIA Core status = 0x%x\n",
+			   (tmp & GS_MIA_MASK) >> GS_MIA_SHIFT);
+		seq_puts(m, "\nScratch registers:\n");
+		for (i = 0; i < 16; i++) {
+			seq_printf(m, "\t%2d: \t0x%x\n",
+				   i, I915_READ(SOFT_SCRATCH(i)));
+		}
+	}
 
 	return 0;
 }
@@ -2680,19 +2673,14 @@ static int i915_energy_uJ(struct seq_file *m, void *data)
 	if (INTEL_GEN(dev_priv) < 6)
 		return -ENODEV;
 
-	wakeref = intel_runtime_pm_get(dev_priv);
-
-	if (rdmsrl_safe(MSR_RAPL_POWER_UNIT, &power)) {
-		intel_runtime_pm_put(dev_priv, wakeref);
+	if (rdmsrl_safe(MSR_RAPL_POWER_UNIT, &power))
 		return -ENODEV;
-	}
 
 	units = (power & 0x1f00) >> 8;
-	power = I915_READ(MCH_SECP_NRG_STTS);
-	power = (1000000 * power) >> units; /* convert to uJ */
-
-	intel_runtime_pm_put(dev_priv, wakeref);
+	with_intel_runtime_pm(dev_priv, wakeref)
+		power = I915_READ(MCH_SECP_NRG_STTS);
 
+	power = (1000000 * power) >> units; /* convert to uJ */
 	seq_printf(m, "%llu", power);
 
 	return 0;
@@ -3275,22 +3263,20 @@ static ssize_t i915_ipc_status_write(struct file *file, const char __user *ubuf,
 	struct seq_file *m = file->private_data;
 	struct drm_i915_private *dev_priv = m->private;
 	intel_wakeref_t wakeref;
-	int ret;
 	bool enable;
+	int ret;
 
 	ret = kstrtobool_from_user(ubuf, len, &enable);
 	if (ret < 0)
 		return ret;
 
-	wakeref = intel_runtime_pm_get(dev_priv);
-
-	if (!dev_priv->ipc_enabled && enable)
-		DRM_INFO("Enabling IPC: WM will be proper only after next commit\n");
-	dev_priv->wm.distrust_bios_wm = true;
-	dev_priv->ipc_enabled = enable;
-	intel_enable_ipc(dev_priv);
-
-	intel_runtime_pm_put(dev_priv, wakeref);
+	with_intel_runtime_pm(dev_priv, wakeref) {
+		if (!dev_priv->ipc_enabled && enable)
+			DRM_INFO("Enabling IPC: WM will be proper only after next commit\n");
+		dev_priv->wm.distrust_bios_wm = true;
+		dev_priv->ipc_enabled = enable;
+		intel_enable_ipc(dev_priv);
+	}
 
 	return len;
 }
@@ -4130,16 +4116,13 @@ i915_cache_sharing_get(void *data, u64 *val)
 {
 	struct drm_i915_private *dev_priv = data;
 	intel_wakeref_t wakeref;
-	u32 snpcr;
+	u32 snpcr = 0;
 
 	if (!(IS_GEN_RANGE(dev_priv, 6, 7)))
 		return -ENODEV;
 
-	wakeref = intel_runtime_pm_get(dev_priv);
-
-	snpcr = I915_READ(GEN6_MBCUNIT_SNPCR);
-
-	intel_runtime_pm_put(dev_priv, wakeref);
+	with_intel_runtime_pm(dev_priv, wakeref)
+		snpcr = I915_READ(GEN6_MBCUNIT_SNPCR);
 
 	*val = (snpcr & GEN6_MBC_SNPCR_MASK) >> GEN6_MBC_SNPCR_SHIFT;
 
@@ -4151,7 +4134,6 @@ i915_cache_sharing_set(void *data, u64 val)
 {
 	struct drm_i915_private *dev_priv = data;
 	intel_wakeref_t wakeref;
-	u32 snpcr;
 
 	if (!(IS_GEN_RANGE(dev_priv, 6, 7)))
 		return -ENODEV;
@@ -4159,16 +4141,17 @@ i915_cache_sharing_set(void *data, u64 val)
 	if (val > 3)
 		return -EINVAL;
 
-	wakeref = intel_runtime_pm_get(dev_priv);
 	DRM_DEBUG_DRIVER("Manually setting uncore sharing to %llu\n", val);
+	with_intel_runtime_pm(dev_priv, wakeref) {
+		u32 snpcr;
+
+		/* Update the cache sharing policy here as well */
+		snpcr = I915_READ(GEN6_MBCUNIT_SNPCR);
+		snpcr &= ~GEN6_MBC_SNPCR_MASK;
+		snpcr |= val << GEN6_MBC_SNPCR_SHIFT;
+		I915_WRITE(GEN6_MBCUNIT_SNPCR, snpcr);
+	}
 
-	/* Update the cache sharing policy here as well */
-	snpcr = I915_READ(GEN6_MBCUNIT_SNPCR);
-	snpcr &= ~GEN6_MBC_SNPCR_MASK;
-	snpcr |= (val << GEN6_MBC_SNPCR_SHIFT);
-	I915_WRITE(GEN6_MBCUNIT_SNPCR, snpcr);
-
-	intel_runtime_pm_put(dev_priv, wakeref);
 	return 0;
 }
 
@@ -4405,20 +4388,17 @@ static int i915_sseu_status(struct seq_file *m, void *unused)
 	sseu.max_eus_per_subslice =
 		RUNTIME_INFO(dev_priv)->sseu.max_eus_per_subslice;
 
-	wakeref = intel_runtime_pm_get(dev_priv);
-
-	if (IS_CHERRYVIEW(dev_priv)) {
-		cherryview_sseu_device_status(dev_priv, &sseu);
-	} else if (IS_BROADWELL(dev_priv)) {
-		broadwell_sseu_device_status(dev_priv, &sseu);
-	} else if (IS_GEN(dev_priv, 9)) {
-		gen9_sseu_device_status(dev_priv, &sseu);
-	} else if (INTEL_GEN(dev_priv) >= 10) {
-		gen10_sseu_device_status(dev_priv, &sseu);
+	with_intel_runtime_pm(dev_priv, wakeref) {
+		if (IS_CHERRYVIEW(dev_priv))
+			cherryview_sseu_device_status(dev_priv, &sseu);
+		else if (IS_BROADWELL(dev_priv))
+			broadwell_sseu_device_status(dev_priv, &sseu);
+		else if (IS_GEN(dev_priv, 9))
+			gen9_sseu_device_status(dev_priv, &sseu);
+		else if (INTEL_GEN(dev_priv) >= 10)
+			gen10_sseu_device_status(dev_priv, &sseu);
 	}
 
-	intel_runtime_pm_put(dev_priv, wakeref);
-
 	i915_print_sseu_info(m, false, &sseu);
 
 	return 0;
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index e04dadeca879..9dd31c3236fb 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -814,13 +814,13 @@ void i915_gem_flush_ggtt_writes(struct drm_i915_private *dev_priv)
 
 	i915_gem_chipset_flush(dev_priv);
 
-	wakeref = intel_runtime_pm_get(dev_priv);
-	spin_lock_irq(&dev_priv->uncore.lock);
+	with_intel_runtime_pm(dev_priv, wakeref) {
+		spin_lock_irq(&dev_priv->uncore.lock);
 
-	POSTING_READ_FW(RING_HEAD(RENDER_RING_BASE));
+		POSTING_READ_FW(RING_HEAD(RENDER_RING_BASE));
 
-	spin_unlock_irq(&dev_priv->uncore.lock);
-	intel_runtime_pm_put(dev_priv, wakeref);
+		spin_unlock_irq(&dev_priv->uncore.lock);
+	}
 }
 
 static void
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 4bec10286487..9e9ce31142b1 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -2536,9 +2536,8 @@ static int ggtt_bind_vma(struct i915_vma *vma,
 	if (i915_gem_object_is_readonly(obj))
 		pte_flags |= PTE_READ_ONLY;
 
-	wakeref = intel_runtime_pm_get(i915);
-	vma->vm->insert_entries(vma->vm, vma, cache_level, pte_flags);
-	intel_runtime_pm_put(i915, wakeref);
+	with_intel_runtime_pm(i915, wakeref)
+		vma->vm->insert_entries(vma->vm, vma, cache_level, pte_flags);
 
 	vma->page_sizes.gtt = I915_GTT_PAGE_SIZE;
 
@@ -2557,9 +2556,8 @@ static void ggtt_unbind_vma(struct i915_vma *vma)
 	struct drm_i915_private *i915 = vma->vm->i915;
 	intel_wakeref_t wakeref;
 
-	wakeref = intel_runtime_pm_get(i915);
-	vma->vm->clear_range(vma->vm, vma->node.start, vma->size);
-	intel_runtime_pm_put(i915, wakeref);
+	with_intel_runtime_pm(i915, wakeref)
+		vma->vm->clear_range(vma->vm, vma->node.start, vma->size);
 }
 
 static int aliasing_gtt_bind_vma(struct i915_vma *vma,
@@ -2593,9 +2591,10 @@ static int aliasing_gtt_bind_vma(struct i915_vma *vma,
 	if (flags & I915_VMA_GLOBAL_BIND) {
 		intel_wakeref_t wakeref;
 
-		wakeref = intel_runtime_pm_get(i915);
-		vma->vm->insert_entries(vma->vm, vma, cache_level, pte_flags);
-		intel_runtime_pm_put(i915, wakeref);
+		with_intel_runtime_pm(i915, wakeref) {
+			vma->vm->insert_entries(vma->vm, vma,
+						cache_level, pte_flags);
+		}
 	}
 
 	return 0;
@@ -2606,11 +2605,11 @@ static void aliasing_gtt_unbind_vma(struct i915_vma *vma)
 	struct drm_i915_private *i915 = vma->vm->i915;
 
 	if (vma->flags & I915_VMA_GLOBAL_BIND) {
+		struct i915_address_space *vm = vma->vm;
 		intel_wakeref_t wakeref;
 
-		wakeref = intel_runtime_pm_get(i915);
-		vma->vm->clear_range(vma->vm, vma->node.start, vma->size);
-		intel_runtime_pm_put(i915, wakeref);
+		with_intel_runtime_pm(i915, wakeref)
+			vm->clear_range(vm, vma->node.start, vma->size);
 	}
 
 	if (vma->flags & I915_VMA_LOCAL_BIND) {
diff --git a/drivers/gpu/drm/i915/i915_gem_shrinker.c b/drivers/gpu/drm/i915/i915_gem_shrinker.c
index bc230e43b98f..e9a79059bc43 100644
--- a/drivers/gpu/drm/i915/i915_gem_shrinker.c
+++ b/drivers/gpu/drm/i915/i915_gem_shrinker.c
@@ -297,14 +297,14 @@ i915_gem_shrink(struct drm_i915_private *i915,
 unsigned long i915_gem_shrink_all(struct drm_i915_private *i915)
 {
 	intel_wakeref_t wakeref;
-	unsigned long freed;
+	unsigned long freed = 0;
 
-	wakeref = intel_runtime_pm_get(i915);
-	freed = i915_gem_shrink(i915, -1UL, NULL,
-				I915_SHRINK_BOUND |
-				I915_SHRINK_UNBOUND |
-				I915_SHRINK_ACTIVE);
-	intel_runtime_pm_put(i915, wakeref);
+	with_intel_runtime_pm(i915, wakeref) {
+		freed = i915_gem_shrink(i915, -1UL, NULL,
+					I915_SHRINK_BOUND |
+					I915_SHRINK_UNBOUND |
+					I915_SHRINK_ACTIVE);
+	}
 
 	return freed;
 }
@@ -377,14 +377,14 @@ i915_gem_shrinker_scan(struct shrinker *shrinker, struct shrink_control *sc)
 	if (sc->nr_scanned < sc->nr_to_scan && current_is_kswapd()) {
 		intel_wakeref_t wakeref;
 
-		wakeref = intel_runtime_pm_get(i915);
-		freed += i915_gem_shrink(i915,
-					 sc->nr_to_scan - sc->nr_scanned,
-					 &sc->nr_scanned,
-					 I915_SHRINK_ACTIVE |
-					 I915_SHRINK_BOUND |
-					 I915_SHRINK_UNBOUND);
-		intel_runtime_pm_put(i915, wakeref);
+		with_intel_runtime_pm(i915, wakeref) {
+			freed += i915_gem_shrink(i915,
+						 sc->nr_to_scan - sc->nr_scanned,
+						 &sc->nr_scanned,
+						 I915_SHRINK_ACTIVE |
+						 I915_SHRINK_BOUND |
+						 I915_SHRINK_UNBOUND);
+		}
 	}
 
 	shrinker_unlock(i915, unlock);
@@ -426,11 +426,11 @@ i915_gem_shrinker_oom(struct notifier_block *nb, unsigned long event, void *ptr)
 	unsigned long unevictable, bound, unbound, freed_pages;
 	intel_wakeref_t wakeref;
 
-	wakeref = intel_runtime_pm_get(i915);
-	freed_pages = i915_gem_shrink(i915, -1UL, NULL,
-				      I915_SHRINK_BOUND |
-				      I915_SHRINK_UNBOUND);
-	intel_runtime_pm_put(i915, wakeref);
+	freed_pages = 0;
+	with_intel_runtime_pm(i915, wakeref)
+		freed_pages += i915_gem_shrink(i915, -1UL, NULL,
+					       I915_SHRINK_BOUND |
+					       I915_SHRINK_UNBOUND);
 
 	/* Because we may be allocating inside our own driver, we cannot
 	 * assert that there are no objects with pinned pages that are not
@@ -482,12 +482,11 @@ i915_gem_shrinker_vmap(struct notifier_block *nb, unsigned long event, void *ptr
 	if (ret)
 		goto out;
 
-	wakeref = intel_runtime_pm_get(i915);
-	freed_pages += i915_gem_shrink(i915, -1UL, NULL,
-				       I915_SHRINK_BOUND |
-				       I915_SHRINK_UNBOUND |
-				       I915_SHRINK_VMAPS);
-	intel_runtime_pm_put(i915, wakeref);
+	with_intel_runtime_pm(i915, wakeref)
+		freed_pages += i915_gem_shrink(i915, -1UL, NULL,
+					       I915_SHRINK_BOUND |
+					       I915_SHRINK_UNBOUND |
+					       I915_SHRINK_VMAPS);
 
 	/* We also want to clear any cached iomaps as they wrap vmap */
 	list_for_each_entry_safe(vma, next,
diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c
index 3d43fc9dd25d..b1cb2d3cae16 100644
--- a/drivers/gpu/drm/i915/i915_pmu.c
+++ b/drivers/gpu/drm/i915/i915_pmu.c
@@ -230,14 +230,11 @@ frequency_sample(struct drm_i915_private *dev_priv, unsigned int period_ns)
 
 		val = dev_priv->gt_pm.rps.cur_freq;
 		if (dev_priv->gt.awake) {
-			intel_wakeref_t wakeref =
-				intel_runtime_pm_get_if_in_use(dev_priv);
+			intel_wakeref_t wakeref;
 
-			if (wakeref) {
+			with_intel_runtime_pm_if_in_use(dev_priv, wakeref)
 				val = intel_get_cagf(dev_priv,
 						     I915_READ_NOTRACE(GEN6_RPSTAT1));
-				intel_runtime_pm_put(dev_priv, wakeref);
-			}
 		}
 
 		add_sample_mult(&dev_priv->pmu.sample[__I915_SAMPLE_FREQ_ACT],
diff --git a/drivers/gpu/drm/i915/i915_sysfs.c b/drivers/gpu/drm/i915/i915_sysfs.c
index 2cbbf165d179..41313005af42 100644
--- a/drivers/gpu/drm/i915/i915_sysfs.c
+++ b/drivers/gpu/drm/i915/i915_sysfs.c
@@ -43,11 +43,10 @@ static u32 calc_residency(struct drm_i915_private *dev_priv,
 			  i915_reg_t reg)
 {
 	intel_wakeref_t wakeref;
-	u64 res;
+	u64 res = 0;
 
-	wakeref = intel_runtime_pm_get(dev_priv);
-	res = intel_rc6_residency_us(dev_priv, reg);
-	intel_runtime_pm_put(dev_priv, wakeref);
+	with_intel_runtime_pm(dev_priv, wakeref)
+		res = intel_rc6_residency_us(dev_priv, reg);
 
 	return DIV_ROUND_CLOSEST_ULL(res, 1000);
 }
diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h
index a1e4e1033289..4272c260b6e1 100644
--- a/drivers/gpu/drm/i915/intel_drv.h
+++ b/drivers/gpu/drm/i915/intel_drv.h
@@ -2187,6 +2187,14 @@ intel_wakeref_t intel_runtime_pm_get(struct drm_i915_private *i915);
 intel_wakeref_t intel_runtime_pm_get_if_in_use(struct drm_i915_private *i915);
 intel_wakeref_t intel_runtime_pm_get_noresume(struct drm_i915_private *i915);
 
+#define with_intel_runtime_pm(i915, wf) \
+	for (wf = intel_runtime_pm_get(i915); wf; \
+	     intel_runtime_pm_put(i915, wf), wf = 0)
+
+#define with_intel_runtime_pm_if_in_use(i915, wf) \
+	for (wf = intel_runtime_pm_get_if_in_use(i915); wf; \
+	     intel_runtime_pm_put(i915, wf), wf = 0)
+
 void intel_runtime_pm_put_unchecked(struct drm_i915_private *i915);
 #if IS_ENABLED(CONFIG_DRM_I915_DEBUG_RUNTIME_PM)
 void intel_runtime_pm_put(struct drm_i915_private *i915, intel_wakeref_t wref);
diff --git a/drivers/gpu/drm/i915/intel_guc_log.c b/drivers/gpu/drm/i915/intel_guc_log.c
index 20c0b36d748e..b53582c0c6c1 100644
--- a/drivers/gpu/drm/i915/intel_guc_log.c
+++ b/drivers/gpu/drm/i915/intel_guc_log.c
@@ -444,9 +444,8 @@ static void guc_log_capture_logs(struct intel_guc_log *log)
 	 * Generally device is expected to be active only at this
 	 * time, so get/put should be really quick.
 	 */
-	wakeref = intel_runtime_pm_get(dev_priv);
-	guc_action_flush_log_complete(guc);
-	intel_runtime_pm_put(dev_priv, wakeref);
+	with_intel_runtime_pm(dev_priv, wakeref)
+		guc_action_flush_log_complete(guc);
 }
 
 int intel_guc_log_create(struct intel_guc_log *log)
@@ -507,7 +506,7 @@ int intel_guc_log_set_level(struct intel_guc_log *log, u32 level)
 	struct intel_guc *guc = log_to_guc(log);
 	struct drm_i915_private *dev_priv = guc_to_i915(guc);
 	intel_wakeref_t wakeref;
-	int ret;
+	int ret = 0;
 
 	BUILD_BUG_ON(GUC_LOG_VERBOSITY_MIN != 0);
 	GEM_BUG_ON(!log->vma);
@@ -521,16 +520,14 @@ int intel_guc_log_set_level(struct intel_guc_log *log, u32 level)
 
 	mutex_lock(&dev_priv->drm.struct_mutex);
 
-	if (log->level == level) {
-		ret = 0;
+	if (log->level == level)
 		goto out_unlock;
-	}
 
-	wakeref = intel_runtime_pm_get(dev_priv);
-	ret = guc_action_control_log(guc, GUC_LOG_LEVEL_IS_VERBOSE(level),
-				     GUC_LOG_LEVEL_IS_ENABLED(level),
-				     GUC_LOG_LEVEL_TO_VERBOSITY(level));
-	intel_runtime_pm_put(dev_priv, wakeref);
+	with_intel_runtime_pm(dev_priv, wakeref)
+		ret = guc_action_control_log(guc,
+					     GUC_LOG_LEVEL_IS_VERBOSE(level),
+					     GUC_LOG_LEVEL_IS_ENABLED(level),
+					     GUC_LOG_LEVEL_TO_VERBOSITY(level));
 	if (ret) {
 		DRM_DEBUG_DRIVER("guc_log_control action failed %d\n", ret);
 		goto out_unlock;
@@ -611,9 +608,8 @@ void intel_guc_log_relay_flush(struct intel_guc_log *log)
 	 */
 	flush_work(&log->relay.flush_work);
 
-	wakeref = intel_runtime_pm_get(i915);
-	guc_action_flush_log(guc);
-	intel_runtime_pm_put(i915, wakeref);
+	with_intel_runtime_pm(i915, wakeref)
+		guc_action_flush_log(guc);
 
 	/* GuC would have updated log buffer by now, so capture it */
 	guc_log_capture_logs(log);
diff --git a/drivers/gpu/drm/i915/intel_huc.c b/drivers/gpu/drm/i915/intel_huc.c
index 3e8c18b6a42d..9bd1c9002c2a 100644
--- a/drivers/gpu/drm/i915/intel_huc.c
+++ b/drivers/gpu/drm/i915/intel_huc.c
@@ -116,14 +116,13 @@ int intel_huc_check_status(struct intel_huc *huc)
 {
 	struct drm_i915_private *dev_priv = huc_to_i915(huc);
 	intel_wakeref_t wakeref;
-	bool status;
+	bool status = false;
 
 	if (!HAS_HUC(dev_priv))
 		return -ENODEV;
 
-	wakeref = intel_runtime_pm_get(dev_priv);
-	status = I915_READ(HUC_STATUS2) & HUC_FW_VERIFIED;
-	intel_runtime_pm_put(dev_priv, wakeref);
+	with_intel_runtime_pm(dev_priv, wakeref)
+		status = I915_READ(HUC_STATUS2) & HUC_FW_VERIFIED;
 
 	return status;
 }
diff --git a/drivers/gpu/drm/i915/intel_panel.c b/drivers/gpu/drm/i915/intel_panel.c
index 93a2e4b5c54c..5a39a6347a7a 100644
--- a/drivers/gpu/drm/i915/intel_panel.c
+++ b/drivers/gpu/drm/i915/intel_panel.c
@@ -1204,17 +1204,19 @@ static int intel_backlight_device_get_brightness(struct backlight_device *bd)
 	struct drm_device *dev = connector->base.dev;
 	struct drm_i915_private *dev_priv = to_i915(dev);
 	intel_wakeref_t wakeref;
-	u32 hw_level;
-	int ret;
+	int ret = 0;
 
-	wakeref = intel_runtime_pm_get(dev_priv);
-	drm_modeset_lock(&dev->mode_config.connection_mutex, NULL);
+	with_intel_runtime_pm(dev_priv, wakeref) {
+		u32 hw_level;
 
-	hw_level = intel_panel_get_backlight(connector);
-	ret = scale_hw_to_user(connector, hw_level, bd->props.max_brightness);
+		drm_modeset_lock(&dev->mode_config.connection_mutex, NULL);
 
-	drm_modeset_unlock(&dev->mode_config.connection_mutex);
-	intel_runtime_pm_put(dev_priv, wakeref);
+		hw_level = intel_panel_get_backlight(connector);
+		ret = scale_hw_to_user(connector,
+				       hw_level, bd->props.max_brightness);
+
+		drm_modeset_unlock(&dev->mode_config.connection_mutex);
+	}
 
 	return ret;
 }
diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c
index d494d92da02c..681ea532585e 100644
--- a/drivers/gpu/drm/i915/intel_uncore.c
+++ b/drivers/gpu/drm/i915/intel_uncore.c
@@ -1696,21 +1696,21 @@ int i915_reg_read_ioctl(struct drm_device *dev,
 
 	flags = reg->offset & (entry->size - 1);
 
-	wakeref = intel_runtime_pm_get(dev_priv);
-	if (entry->size == 8 && flags == I915_REG_READ_8B_WA)
-		reg->val = I915_READ64_2x32(entry->offset_ldw,
-					    entry->offset_udw);
-	else if (entry->size == 8 && flags == 0)
-		reg->val = I915_READ64(entry->offset_ldw);
-	else if (entry->size == 4 && flags == 0)
-		reg->val = I915_READ(entry->offset_ldw);
-	else if (entry->size == 2 && flags == 0)
-		reg->val = I915_READ16(entry->offset_ldw);
-	else if (entry->size == 1 && flags == 0)
-		reg->val = I915_READ8(entry->offset_ldw);
-	else
-		ret = -EINVAL;
-	intel_runtime_pm_put(dev_priv, wakeref);
+	with_intel_runtime_pm(dev_priv, wakeref) {
+		if (entry->size == 8 && flags == I915_REG_READ_8B_WA)
+			reg->val = I915_READ64_2x32(entry->offset_ldw,
+						    entry->offset_udw);
+		else if (entry->size == 8 && flags == 0)
+			reg->val = I915_READ64(entry->offset_ldw);
+		else if (entry->size == 4 && flags == 0)
+			reg->val = I915_READ(entry->offset_ldw);
+		else if (entry->size == 2 && flags == 0)
+			reg->val = I915_READ16(entry->offset_ldw);
+		else if (entry->size == 1 && flags == 0)
+			reg->val = I915_READ8(entry->offset_ldw);
+		else
+			ret = -EINVAL;
+	}
 
 	return ret;
 }
diff --git a/drivers/gpu/drm/i915/selftests/i915_gem.c b/drivers/gpu/drm/i915/selftests/i915_gem.c
index 01a46c46fe25..e77b7ed449ae 100644
--- a/drivers/gpu/drm/i915/selftests/i915_gem.c
+++ b/drivers/gpu/drm/i915/selftests/i915_gem.c
@@ -98,26 +98,22 @@ static void pm_suspend(struct drm_i915_private *i915)
 {
 	intel_wakeref_t wakeref;
 
-	wakeref = intel_runtime_pm_get(i915);
-
-	i915_gem_suspend_gtt_mappings(i915);
-	i915_gem_suspend_late(i915);
-
-	intel_runtime_pm_put(i915, wakeref);
+	with_intel_runtime_pm(i915, wakeref) {
+		i915_gem_suspend_gtt_mappings(i915);
+		i915_gem_suspend_late(i915);
+	}
 }
 
 static void pm_hibernate(struct drm_i915_private *i915)
 {
 	intel_wakeref_t wakeref;
 
-	wakeref = intel_runtime_pm_get(i915);
-
-	i915_gem_suspend_gtt_mappings(i915);
-
-	i915_gem_freeze(i915);
-	i915_gem_freeze_late(i915);
+	with_intel_runtime_pm(i915, wakeref) {
+		i915_gem_suspend_gtt_mappings(i915);
 
-	intel_runtime_pm_put(i915, wakeref);
+		i915_gem_freeze(i915);
+		i915_gem_freeze_late(i915);
+	}
 }
 
 static void pm_resume(struct drm_i915_private *i915)
@@ -128,13 +124,11 @@ static void pm_resume(struct drm_i915_private *i915)
 	 * Both suspend and hibernate follow the same wakeup path and assume
 	 * that runtime-pm just works.
 	 */
-	wakeref = intel_runtime_pm_get(i915);
-
-	intel_engines_sanitize(i915, false);
-	i915_gem_sanitize(i915);
-	i915_gem_resume(i915);
-
-	intel_runtime_pm_put(i915, wakeref);
+	with_intel_runtime_pm(i915, wakeref) {
+		intel_engines_sanitize(i915, false);
+		i915_gem_sanitize(i915);
+		i915_gem_resume(i915);
+	}
 }
 
 static int igt_gem_suspend(void *arg)
diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/selftests/i915_gem_context.c
index 7a9b1f20b019..4cba50679607 100644
--- a/drivers/gpu/drm/i915/selftests/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/selftests/i915_gem_context.c
@@ -610,9 +610,9 @@ static int igt_ctx_exec(void *arg)
 				}
 			}
 
-			wakeref = intel_runtime_pm_get(i915);
-			err = gpu_fill(obj, ctx, engine, dw);
-			intel_runtime_pm_put(i915, wakeref);
+			err = 0;
+			with_intel_runtime_pm(i915, wakeref)
+				err = gpu_fill(obj, ctx, engine, dw);
 			if (err) {
 				pr_err("Failed to fill dword %lu [%lu/%lu] with gpu (%s) in ctx %u [full-ppgtt? %s], err=%d\n",
 				       ndwords, dw, max_dwords(obj),
@@ -718,9 +718,9 @@ static int igt_ctx_readonly(void *arg)
 					i915_gem_object_set_readonly(obj);
 			}
 
-			wakeref = intel_runtime_pm_get(i915);
-			err = gpu_fill(obj, ctx, engine, dw);
-			intel_runtime_pm_put(i915, wakeref);
+			err = 0;
+			with_intel_runtime_pm(i915, wakeref)
+				err = gpu_fill(obj, ctx, engine, dw);
 			if (err) {
 				pr_err("Failed to fill dword %lu [%lu/%lu] with gpu (%s) in ctx %u [full-ppgtt? %s], err=%d\n",
 				       ndwords, dw, max_dwords(obj),
diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_object.c b/drivers/gpu/drm/i915/selftests/i915_gem_object.c
index 3575e1387c3f..395ae878e0f7 100644
--- a/drivers/gpu/drm/i915/selftests/i915_gem_object.c
+++ b/drivers/gpu/drm/i915/selftests/i915_gem_object.c
@@ -509,9 +509,8 @@ static void disable_retire_worker(struct drm_i915_private *i915)
 	if (!i915->gt.active_requests++) {
 		intel_wakeref_t wakeref;
 
-		wakeref = intel_runtime_pm_get(i915);
-		i915_gem_unpark(i915);
-		intel_runtime_pm_put(i915, wakeref);
+		with_intel_runtime_pm(i915, wakeref)
+			i915_gem_unpark(i915);
 	}
 	mutex_unlock(&i915->drm.struct_mutex);
 
@@ -593,10 +592,10 @@ static int igt_mmap_offset_exhaustion(void *arg)
 			goto out;
 		}
 
+		err = 0;
 		mutex_lock(&i915->drm.struct_mutex);
-		wakeref = intel_runtime_pm_get(i915);
-		err = make_obj_busy(obj);
-		intel_runtime_pm_put(i915, wakeref);
+		with_intel_runtime_pm(i915, wakeref)
+			err = make_obj_busy(obj);
 		mutex_unlock(&i915->drm.struct_mutex);
 		if (err) {
 			pr_err("[loop %d] Failed to busy the object\n", loop);
diff --git a/drivers/gpu/drm/i915/selftests/intel_workarounds.c b/drivers/gpu/drm/i915/selftests/intel_workarounds.c
index 47e62e1999a9..be2ffc9cd38d 100644
--- a/drivers/gpu/drm/i915/selftests/intel_workarounds.c
+++ b/drivers/gpu/drm/i915/selftests/intel_workarounds.c
@@ -48,9 +48,9 @@ read_nonprivs(struct i915_gem_context *ctx, struct intel_engine_cs *engine)
 	if (err)
 		goto err_obj;
 
-	wakeref = intel_runtime_pm_get(engine->i915);
-	rq = i915_request_alloc(engine, ctx);
-	intel_runtime_pm_put(engine->i915, wakeref);
+	rq = ERR_PTR(-ENODEV);
+	with_intel_runtime_pm(engine->i915, wakeref)
+		rq = i915_request_alloc(engine, ctx);
 	if (IS_ERR(rq)) {
 		err = PTR_ERR(rq);
 		goto err_pin;
@@ -191,14 +191,15 @@ switch_to_scratch_context(struct intel_engine_cs *engine,
 	if (IS_ERR(ctx))
 		return PTR_ERR(ctx);
 
-	wakeref = intel_runtime_pm_get(engine->i915);
-
-	if (spin)
-		rq = igt_spinner_create_request(spin, ctx, engine, MI_NOOP);
-	else
-		rq = i915_request_alloc(engine, ctx);
-
-	intel_runtime_pm_put(engine->i915, wakeref);
+	rq = ERR_PTR(-ENODEV);
+	with_intel_runtime_pm(engine->i915, wakeref) {
+		if (spin)
+			rq = igt_spinner_create_request(spin,
+							ctx, engine,
+							MI_NOOP);
+		else
+			rq = i915_request_alloc(engine, ctx);
+	}
 
 	kernel_context_close(ctx);
 
@@ -256,9 +257,8 @@ static int check_whitelist_across_reset(struct intel_engine_cs *engine,
 	if (err)
 		goto out;
 
-	wakeref = intel_runtime_pm_get(i915);
-	err = reset(engine);
-	intel_runtime_pm_put(i915, wakeref);
+	with_intel_runtime_pm(i915, wakeref)
+		err = reset(engine);
 
 	if (want_spin) {
 		igt_spinner_end(&spin);
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 111+ messages in thread

* [PATCH 18/46] drm/i915: Markup paired operations on display power domains
  2019-01-07 11:54 [PATCH 01/46] drm/i915: Return immediately if trylock fails for direct-reclaim Chris Wilson
                   ` (15 preceding siblings ...)
  2019-01-07 11:54 ` [PATCH 17/46] drm/i915: Syntatic sugar for using intel_runtime_pm Chris Wilson
@ 2019-01-07 11:54 ` Chris Wilson
  2019-01-10  0:55   ` John Harrison
  2019-01-07 11:54 ` [PATCH 19/46] drm/i915: Track the wakeref used to initialise " Chris Wilson
                   ` (33 subsequent siblings)
  50 siblings, 1 reply; 111+ messages in thread
From: Chris Wilson @ 2019-01-07 11:54 UTC (permalink / raw)
  To: intel-gfx; +Cc: Jani Nikula

The majority of runtime-pm operations are bounded and scoped within a
function; these are easy to verify that the wakeref are handled
correctly. We can employ the compiler to help us, and reduce the number
of wakerefs tracked when debugging, by passing around cookies provided
by the various rpm_get functions to their rpm_put counterpart. This
makes the pairing explicit, and given the required wakeref cookie the
compiler can verify that we pass an initialised value to the rpm_put
(quite handy for double checking error paths).

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Jani Nikula <jani.nikula@intel.com>
---
 drivers/gpu/drm/i915/i915_debugfs.c     | 35 ++++++------
 drivers/gpu/drm/i915/i915_drv.h         |  2 +
 drivers/gpu/drm/i915/i915_gem.c         |  4 +-
 drivers/gpu/drm/i915/i915_perf.c        |  4 +-
 drivers/gpu/drm/i915/icl_dsi.c          | 36 ++++++++-----
 drivers/gpu/drm/i915/intel_audio.c      |  3 +-
 drivers/gpu/drm/i915/intel_cdclk.c      | 10 ++--
 drivers/gpu/drm/i915/intel_crt.c        | 25 +++++----
 drivers/gpu/drm/i915/intel_csr.c        | 25 +++++++--
 drivers/gpu/drm/i915/intel_ddi.c        | 36 ++++++++-----
 drivers/gpu/drm/i915/intel_display.c    | 68 ++++++++++++++---------
 drivers/gpu/drm/i915/intel_dp.c         | 38 +++++++------
 drivers/gpu/drm/i915/intel_dpll_mgr.c   | 66 +++++++++++++++--------
 drivers/gpu/drm/i915/intel_drv.h        | 17 ++++--
 drivers/gpu/drm/i915/intel_dsi.h        |  1 +
 drivers/gpu/drm/i915/intel_hdmi.c       | 18 ++++---
 drivers/gpu/drm/i915/intel_i2c.c        | 20 +++----
 drivers/gpu/drm/i915/intel_lvds.c       |  8 +--
 drivers/gpu/drm/i915/intel_pipe_crc.c   |  6 ++-
 drivers/gpu/drm/i915/intel_pm.c         |  6 ++-
 drivers/gpu/drm/i915/intel_runtime_pm.c | 71 ++++++++++++++++---------
 drivers/gpu/drm/i915/intel_sprite.c     | 24 ++++++---
 drivers/gpu/drm/i915/intel_vdsc.c       |  4 +-
 drivers/gpu/drm/i915/vlv_dsi.c          | 14 +++--
 24 files changed, 349 insertions(+), 192 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index 1521e08861d1..f11e5dc41f17 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -626,10 +626,12 @@ static void gen8_display_interrupt_info(struct seq_file *m)
 
 	for_each_pipe(dev_priv, pipe) {
 		enum intel_display_power_domain power_domain;
+		intel_wakeref_t wakeref;
 
 		power_domain = POWER_DOMAIN_PIPE(pipe);
-		if (!intel_display_power_get_if_enabled(dev_priv,
-							power_domain)) {
+		wakeref = intel_display_power_get_if_enabled(dev_priv,
+							     power_domain);
+		if (!wakeref) {
 			seq_printf(m, "Pipe %c power disabled\n",
 				   pipe_name(pipe));
 			continue;
@@ -644,7 +646,7 @@ static void gen8_display_interrupt_info(struct seq_file *m)
 			   pipe_name(pipe),
 			   I915_READ(GEN8_DE_PIPE_IER(pipe)));
 
-		intel_display_power_put(dev_priv, power_domain);
+		intel_display_power_put(dev_priv, power_domain, wakeref);
 	}
 
 	seq_printf(m, "Display Engine port interrupt mask:\t%08x\n",
@@ -680,6 +682,8 @@ static int i915_interrupt_info(struct seq_file *m, void *data)
 	wakeref = intel_runtime_pm_get(dev_priv);
 
 	if (IS_CHERRYVIEW(dev_priv)) {
+		intel_wakeref_t pref;
+
 		seq_printf(m, "Master Interrupt Control:\t%08x\n",
 			   I915_READ(GEN8_MASTER_IRQ));
 
@@ -695,8 +699,9 @@ static int i915_interrupt_info(struct seq_file *m, void *data)
 			enum intel_display_power_domain power_domain;
 
 			power_domain = POWER_DOMAIN_PIPE(pipe);
-			if (!intel_display_power_get_if_enabled(dev_priv,
-								power_domain)) {
+			pref = intel_display_power_get_if_enabled(dev_priv,
+								  power_domain);
+			if (!pref) {
 				seq_printf(m, "Pipe %c power disabled\n",
 					   pipe_name(pipe));
 				continue;
@@ -706,17 +711,17 @@ static int i915_interrupt_info(struct seq_file *m, void *data)
 				   pipe_name(pipe),
 				   I915_READ(PIPESTAT(pipe)));
 
-			intel_display_power_put(dev_priv, power_domain);
+			intel_display_power_put(dev_priv, power_domain, pref);
 		}
 
-		intel_display_power_get(dev_priv, POWER_DOMAIN_INIT);
+		pref = intel_display_power_get(dev_priv, POWER_DOMAIN_INIT);
 		seq_printf(m, "Port hotplug:\t%08x\n",
 			   I915_READ(PORT_HOTPLUG_EN));
 		seq_printf(m, "DPFLIPSTAT:\t%08x\n",
 			   I915_READ(VLV_DPFLIPSTAT));
 		seq_printf(m, "DPINVGTT:\t%08x\n",
 			   I915_READ(DPINVGTT));
-		intel_display_power_put(dev_priv, POWER_DOMAIN_INIT);
+		intel_display_power_put(dev_priv, POWER_DOMAIN_INIT, pref);
 
 		for (i = 0; i < 4; i++) {
 			seq_printf(m, "GT Interrupt IMR %d:\t%08x\n",
@@ -779,10 +784,12 @@ static int i915_interrupt_info(struct seq_file *m, void *data)
 			   I915_READ(VLV_IMR));
 		for_each_pipe(dev_priv, pipe) {
 			enum intel_display_power_domain power_domain;
+			intel_wakeref_t pref;
 
 			power_domain = POWER_DOMAIN_PIPE(pipe);
-			if (!intel_display_power_get_if_enabled(dev_priv,
-								power_domain)) {
+			pref = intel_display_power_get_if_enabled(dev_priv,
+								  power_domain);
+			if (!pref) {
 				seq_printf(m, "Pipe %c power disabled\n",
 					   pipe_name(pipe));
 				continue;
@@ -791,7 +798,7 @@ static int i915_interrupt_info(struct seq_file *m, void *data)
 			seq_printf(m, "Pipe %c stat:\t%08x\n",
 				   pipe_name(pipe),
 				   I915_READ(PIPESTAT(pipe)));
-			intel_display_power_put(dev_priv, power_domain);
+			intel_display_power_put(dev_priv, power_domain, pref);
 		}
 
 		seq_printf(m, "Master IER:\t%08x\n",
@@ -1709,8 +1716,7 @@ static int i915_sr_status(struct seq_file *m, void *unused)
 	intel_wakeref_t wakeref;
 	bool sr_enabled = false;
 
-	wakeref = intel_runtime_pm_get(dev_priv);
-	intel_display_power_get(dev_priv, POWER_DOMAIN_INIT);
+	wakeref = intel_display_power_get(dev_priv, POWER_DOMAIN_INIT);
 
 	if (INTEL_GEN(dev_priv) >= 9)
 		/* no global SR status; inspect per-plane WM */;
@@ -1726,8 +1732,7 @@ static int i915_sr_status(struct seq_file *m, void *unused)
 	else if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv))
 		sr_enabled = I915_READ(FW_BLC_SELF_VLV) & FW_CSPWRDWNEN;
 
-	intel_display_power_put(dev_priv, POWER_DOMAIN_INIT);
-	intel_runtime_pm_put(dev_priv, wakeref);
+	intel_display_power_put(dev_priv, POWER_DOMAIN_INIT, wakeref);
 
 	seq_printf(m, "self-refresh: %s\n", enableddisabled(sr_enabled));
 
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index bf25ae92f5de..7f688f63cbaf 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -344,6 +344,7 @@ struct intel_csr {
 	uint32_t mmiodata[8];
 	uint32_t dc_state;
 	uint32_t allowed_dc_mask;
+	intel_wakeref_t wakeref;
 };
 
 enum i915_cache_level {
@@ -1969,6 +1970,7 @@ struct drm_i915_private {
 		 * is a slight delay before we do so.
 		 */
 		intel_wakeref_t awake;
+		intel_wakeref_t power;
 
 		/**
 		 * The number of times we have woken up.
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 9dd31c3236fb..7e4db2cb8501 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -177,7 +177,7 @@ static u32 __i915_gem_park(struct drm_i915_private *i915)
 	if (INTEL_GEN(i915) >= 6)
 		gen6_rps_idle(i915);
 
-	intel_display_power_put(i915, POWER_DOMAIN_GT_IRQ);
+	intel_display_power_put(i915, POWER_DOMAIN_GT_IRQ, i915->gt.power);
 
 	intel_runtime_pm_put(i915, wakeref);
 
@@ -222,7 +222,7 @@ void i915_gem_unpark(struct drm_i915_private *i915)
 	 * Work around it by grabbing a GT IRQ power domain whilst there is any
 	 * GT activity, preventing any DC state transitions.
 	 */
-	intel_display_power_get(i915, POWER_DOMAIN_GT_IRQ);
+	i915->gt.power = intel_display_power_get(i915, POWER_DOMAIN_GT_IRQ);
 
 	if (unlikely(++i915->gt.epoch == 0)) /* keep 0 as invalid */
 		i915->gt.epoch = 1;
diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
index b0cbad2e83c5..faff6cf1aaa1 100644
--- a/drivers/gpu/drm/i915/i915_perf.c
+++ b/drivers/gpu/drm/i915/i915_perf.c
@@ -1364,14 +1364,14 @@ static void i915_oa_stream_destroy(struct i915_perf_stream *stream)
 
 	free_oa_buffer(dev_priv);
 
-	put_oa_config(dev_priv, stream->oa_config);
-
 	intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
 	intel_runtime_pm_put(dev_priv, stream->wakeref);
 
 	if (stream->ctx)
 		oa_put_render_ctx_id(stream);
 
+	put_oa_config(dev_priv, stream->oa_config);
+
 	if (dev_priv->perf.oa.spurious_report_rs.missed) {
 		DRM_NOTE("%d spurious OA report notices suppressed due to ratelimiting\n",
 			 dev_priv->perf.oa.spurious_report_rs.missed);
diff --git a/drivers/gpu/drm/i915/icl_dsi.c b/drivers/gpu/drm/i915/icl_dsi.c
index 4dd793b78996..f3a5f03646ce 100644
--- a/drivers/gpu/drm/i915/icl_dsi.c
+++ b/drivers/gpu/drm/i915/icl_dsi.c
@@ -337,9 +337,11 @@ static void gen11_dsi_enable_io_power(struct intel_encoder *encoder)
 	}
 
 	for_each_dsi_port(port, intel_dsi->ports) {
-		intel_display_power_get(dev_priv, port == PORT_A ?
-					POWER_DOMAIN_PORT_DDI_A_IO :
-					POWER_DOMAIN_PORT_DDI_B_IO);
+		intel_dsi->io_wakeref[port] =
+			intel_display_power_get(dev_priv,
+						port == PORT_A ?
+						POWER_DOMAIN_PORT_DDI_A_IO :
+						POWER_DOMAIN_PORT_DDI_B_IO);
 	}
 }
 
@@ -1125,10 +1127,18 @@ static void gen11_dsi_disable_io_power(struct intel_encoder *encoder)
 	enum port port;
 	u32 tmp;
 
-	intel_display_power_put(dev_priv, POWER_DOMAIN_PORT_DDI_A_IO);
-
-	if (intel_dsi->dual_link)
-		intel_display_power_put(dev_priv, POWER_DOMAIN_PORT_DDI_B_IO);
+	for_each_dsi_port(port, intel_dsi->ports) {
+		intel_wakeref_t wakeref;
+
+		wakeref = fetch_and_zero(&intel_dsi->io_wakeref[port]);
+		if (wakeref) {
+			intel_display_power_put(dev_priv,
+						port == PORT_A ?
+						POWER_DOMAIN_PORT_DDI_A_IO :
+						POWER_DOMAIN_PORT_DDI_B_IO,
+						wakeref);
+		}
+	}
 
 	/* set mode to DDI */
 	for_each_dsi_port(port, intel_dsi->ports) {
@@ -1229,13 +1239,15 @@ static bool gen11_dsi_get_hw_state(struct intel_encoder *encoder,
 {
 	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
 	struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base);
-	u32 tmp;
-	enum port port;
 	enum transcoder dsi_trans;
+	intel_wakeref_t wakeref;
+	enum port port;
 	bool ret = false;
+	u32 tmp;
 
-	if (!intel_display_power_get_if_enabled(dev_priv,
-						encoder->power_domain))
+	wakeref = intel_display_power_get_if_enabled(dev_priv,
+						     encoder->power_domain);
+	if (!wakeref)
 		return false;
 
 	for_each_dsi_port(port, intel_dsi->ports) {
@@ -1260,7 +1272,7 @@ static bool gen11_dsi_get_hw_state(struct intel_encoder *encoder,
 		ret = tmp & PIPECONF_ENABLE;
 	}
 out:
-	intel_display_power_put(dev_priv, encoder->power_domain);
+	intel_display_power_put(dev_priv, encoder->power_domain, wakeref);
 	return ret;
 }
 
diff --git a/drivers/gpu/drm/i915/intel_audio.c b/drivers/gpu/drm/i915/intel_audio.c
index 0571aa2846a7..d4c7bb43442a 100644
--- a/drivers/gpu/drm/i915/intel_audio.c
+++ b/drivers/gpu/drm/i915/intel_audio.c
@@ -749,7 +749,8 @@ static void i915_audio_component_get_power(struct device *kdev)
 
 static void i915_audio_component_put_power(struct device *kdev)
 {
-	intel_display_power_put(kdev_to_i915(kdev), POWER_DOMAIN_AUDIO);
+	intel_display_power_put_unchecked(kdev_to_i915(kdev),
+					  POWER_DOMAIN_AUDIO);
 }
 
 static void i915_audio_component_codec_wake_override(struct device *kdev,
diff --git a/drivers/gpu/drm/i915/intel_cdclk.c b/drivers/gpu/drm/i915/intel_cdclk.c
index 2021e484a287..73cb7250118e 100644
--- a/drivers/gpu/drm/i915/intel_cdclk.c
+++ b/drivers/gpu/drm/i915/intel_cdclk.c
@@ -520,6 +520,7 @@ static void vlv_set_cdclk(struct drm_i915_private *dev_priv,
 {
 	int cdclk = cdclk_state->cdclk;
 	u32 val, cmd = cdclk_state->voltage_level;
+	intel_wakeref_t wakeref;
 
 	switch (cdclk) {
 	case 400000:
@@ -539,7 +540,7 @@ static void vlv_set_cdclk(struct drm_i915_private *dev_priv,
 	 * a system suspend.  So grab the PIPE-A domain, which covers
 	 * the HW blocks needed for the following programming.
 	 */
-	intel_display_power_get(dev_priv, POWER_DOMAIN_PIPE_A);
+	wakeref = intel_display_power_get(dev_priv, POWER_DOMAIN_PIPE_A);
 
 	mutex_lock(&dev_priv->pcu_lock);
 	val = vlv_punit_read(dev_priv, PUNIT_REG_DSPFREQ);
@@ -593,7 +594,7 @@ static void vlv_set_cdclk(struct drm_i915_private *dev_priv,
 
 	vlv_program_pfi_credits(dev_priv);
 
-	intel_display_power_put(dev_priv, POWER_DOMAIN_PIPE_A);
+	intel_display_power_put(dev_priv, POWER_DOMAIN_PIPE_A, wakeref);
 }
 
 static void chv_set_cdclk(struct drm_i915_private *dev_priv,
@@ -601,6 +602,7 @@ static void chv_set_cdclk(struct drm_i915_private *dev_priv,
 {
 	int cdclk = cdclk_state->cdclk;
 	u32 val, cmd = cdclk_state->voltage_level;
+	intel_wakeref_t wakeref;
 
 	switch (cdclk) {
 	case 333333:
@@ -619,7 +621,7 @@ static void chv_set_cdclk(struct drm_i915_private *dev_priv,
 	 * a system suspend.  So grab the PIPE-A domain, which covers
 	 * the HW blocks needed for the following programming.
 	 */
-	intel_display_power_get(dev_priv, POWER_DOMAIN_PIPE_A);
+	wakeref = intel_display_power_get(dev_priv, POWER_DOMAIN_PIPE_A);
 
 	mutex_lock(&dev_priv->pcu_lock);
 	val = vlv_punit_read(dev_priv, PUNIT_REG_DSPFREQ);
@@ -637,7 +639,7 @@ static void chv_set_cdclk(struct drm_i915_private *dev_priv,
 
 	vlv_program_pfi_credits(dev_priv);
 
-	intel_display_power_put(dev_priv, POWER_DOMAIN_PIPE_A);
+	intel_display_power_put(dev_priv, POWER_DOMAIN_PIPE_A, wakeref);
 }
 
 static int bdw_calc_cdclk(int min_cdclk)
diff --git a/drivers/gpu/drm/i915/intel_crt.c b/drivers/gpu/drm/i915/intel_crt.c
index 0a41e58d61de..84054a377f4a 100644
--- a/drivers/gpu/drm/i915/intel_crt.c
+++ b/drivers/gpu/drm/i915/intel_crt.c
@@ -84,15 +84,17 @@ static bool intel_crt_get_hw_state(struct intel_encoder *encoder,
 {
 	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
 	struct intel_crt *crt = intel_encoder_to_crt(encoder);
+	intel_wakeref_t wakeref;
 	bool ret;
 
-	if (!intel_display_power_get_if_enabled(dev_priv,
-						encoder->power_domain))
+	wakeref = intel_display_power_get_if_enabled(dev_priv,
+						     encoder->power_domain);
+	if (!wakeref)
 		return false;
 
 	ret = intel_crt_port_enabled(dev_priv, crt->adpa_reg, pipe);
 
-	intel_display_power_put(dev_priv, encoder->power_domain);
+	intel_display_power_put(dev_priv, encoder->power_domain, wakeref);
 
 	return ret;
 }
@@ -777,6 +779,7 @@ intel_crt_detect(struct drm_connector *connector,
 	struct drm_i915_private *dev_priv = to_i915(connector->dev);
 	struct intel_crt *crt = intel_attached_crt(connector);
 	struct intel_encoder *intel_encoder = &crt->base;
+	intel_wakeref_t wakeref;
 	int status, ret;
 	struct intel_load_detect_pipe tmp;
 
@@ -785,7 +788,8 @@ intel_crt_detect(struct drm_connector *connector,
 		      force);
 
 	if (i915_modparams.load_detect_test) {
-		intel_display_power_get(dev_priv, intel_encoder->power_domain);
+		wakeref = intel_display_power_get(dev_priv,
+						  intel_encoder->power_domain);
 		goto load_detect;
 	}
 
@@ -793,7 +797,8 @@ intel_crt_detect(struct drm_connector *connector,
 	if (dmi_check_system(intel_spurious_crt_detect))
 		return connector_status_disconnected;
 
-	intel_display_power_get(dev_priv, intel_encoder->power_domain);
+	wakeref = intel_display_power_get(dev_priv,
+					  intel_encoder->power_domain);
 
 	if (I915_HAS_HOTPLUG(dev_priv)) {
 		/* We can not rely on the HPD pin always being correctly wired
@@ -848,7 +853,7 @@ intel_crt_detect(struct drm_connector *connector,
 	}
 
 out:
-	intel_display_power_put(dev_priv, intel_encoder->power_domain);
+	intel_display_power_put(dev_priv, intel_encoder->power_domain, wakeref);
 	return status;
 }
 
@@ -858,10 +863,12 @@ static int intel_crt_get_modes(struct drm_connector *connector)
 	struct drm_i915_private *dev_priv = to_i915(dev);
 	struct intel_crt *crt = intel_attached_crt(connector);
 	struct intel_encoder *intel_encoder = &crt->base;
-	int ret;
+	intel_wakeref_t wakeref;
 	struct i2c_adapter *i2c;
+	int ret;
 
-	intel_display_power_get(dev_priv, intel_encoder->power_domain);
+	wakeref = intel_display_power_get(dev_priv,
+					  intel_encoder->power_domain);
 
 	i2c = intel_gmbus_get_adapter(dev_priv, dev_priv->vbt.crt_ddc_pin);
 	ret = intel_crt_ddc_get_modes(connector, i2c);
@@ -873,7 +880,7 @@ static int intel_crt_get_modes(struct drm_connector *connector)
 	ret = intel_crt_ddc_get_modes(connector, i2c);
 
 out:
-	intel_display_power_put(dev_priv, intel_encoder->power_domain);
+	intel_display_power_put(dev_priv, intel_encoder->power_domain, wakeref);
 
 	return ret;
 }
diff --git a/drivers/gpu/drm/i915/intel_csr.c b/drivers/gpu/drm/i915/intel_csr.c
index a516697bf57d..ea5fb64d33dd 100644
--- a/drivers/gpu/drm/i915/intel_csr.c
+++ b/drivers/gpu/drm/i915/intel_csr.c
@@ -409,6 +409,21 @@ static uint32_t *parse_csr_fw(struct drm_i915_private *dev_priv,
 	return memcpy(dmc_payload, &fw->data[readcount], nbytes);
 }
 
+static void intel_csr_runtime_pm_get(struct drm_i915_private *dev_priv)
+{
+	WARN_ON(dev_priv->csr.wakeref);
+	dev_priv->csr.wakeref =
+		intel_display_power_get(dev_priv, POWER_DOMAIN_INIT);
+}
+
+static void intel_csr_runtime_pm_put(struct drm_i915_private *dev_priv)
+{
+	intel_wakeref_t wakeref __maybe_unused =
+		fetch_and_zero(&dev_priv->csr.wakeref);
+
+	intel_display_power_put(dev_priv, POWER_DOMAIN_INIT, wakeref);
+}
+
 static void csr_load_work_fn(struct work_struct *work)
 {
 	struct drm_i915_private *dev_priv;
@@ -424,8 +439,7 @@ static void csr_load_work_fn(struct work_struct *work)
 
 	if (dev_priv->csr.dmc_payload) {
 		intel_csr_load_program(dev_priv);
-
-		intel_display_power_put(dev_priv, POWER_DOMAIN_INIT);
+		intel_csr_runtime_pm_put(dev_priv);
 
 		DRM_INFO("Finished loading DMC firmware %s (v%u.%u)\n",
 			 dev_priv->csr.fw_path,
@@ -467,7 +481,7 @@ void intel_csr_ucode_init(struct drm_i915_private *dev_priv)
 	 * suspend as runtime suspend *requires* a working CSR for whatever
 	 * reason.
 	 */
-	intel_display_power_get(dev_priv, POWER_DOMAIN_INIT);
+	intel_csr_runtime_pm_get(dev_priv);
 
 	if (INTEL_GEN(dev_priv) >= 12) {
 		/* Allow to load fw via parameter using the last known size */
@@ -538,7 +552,7 @@ void intel_csr_ucode_suspend(struct drm_i915_private *dev_priv)
 
 	/* Drop the reference held in case DMC isn't loaded. */
 	if (!dev_priv->csr.dmc_payload)
-		intel_display_power_put(dev_priv, POWER_DOMAIN_INIT);
+		intel_csr_runtime_pm_put(dev_priv);
 }
 
 /**
@@ -558,7 +572,7 @@ void intel_csr_ucode_resume(struct drm_i915_private *dev_priv)
 	 * loaded.
 	 */
 	if (!dev_priv->csr.dmc_payload)
-		intel_display_power_get(dev_priv, POWER_DOMAIN_INIT);
+		intel_csr_runtime_pm_get(dev_priv);
 }
 
 /**
@@ -574,6 +588,7 @@ void intel_csr_ucode_fini(struct drm_i915_private *dev_priv)
 		return;
 
 	intel_csr_ucode_suspend(dev_priv);
+	WARN_ON(dev_priv->csr.wakeref);
 
 	kfree(dev_priv->csr.dmc_payload);
 }
diff --git a/drivers/gpu/drm/i915/intel_ddi.c b/drivers/gpu/drm/i915/intel_ddi.c
index 2d6ed990a232..7f3cd055de50 100644
--- a/drivers/gpu/drm/i915/intel_ddi.c
+++ b/drivers/gpu/drm/i915/intel_ddi.c
@@ -1860,12 +1860,14 @@ int intel_ddi_toggle_hdcp_signalling(struct intel_encoder *intel_encoder,
 {
 	struct drm_device *dev = intel_encoder->base.dev;
 	struct drm_i915_private *dev_priv = to_i915(dev);
+	intel_wakeref_t wakeref;
 	enum pipe pipe = 0;
 	int ret = 0;
 	uint32_t tmp;
 
-	if (WARN_ON(!intel_display_power_get_if_enabled(dev_priv,
-						intel_encoder->power_domain)))
+	wakeref = intel_display_power_get_if_enabled(dev_priv,
+						     intel_encoder->power_domain);
+	if (WARN_ON(!wakeref))
 		return -ENXIO;
 
 	if (WARN_ON(!intel_encoder->get_hw_state(intel_encoder, &pipe))) {
@@ -1880,7 +1882,7 @@ int intel_ddi_toggle_hdcp_signalling(struct intel_encoder *intel_encoder,
 		tmp &= ~TRANS_DDI_HDCP_SIGNALLING;
 	I915_WRITE(TRANS_DDI_FUNC_CTL(pipe), tmp);
 out:
-	intel_display_power_put(dev_priv, intel_encoder->power_domain);
+	intel_display_power_put(dev_priv, intel_encoder->power_domain, wakeref);
 	return ret;
 }
 
@@ -1891,13 +1893,15 @@ bool intel_ddi_connector_get_hw_state(struct intel_connector *intel_connector)
 	struct intel_encoder *encoder = intel_connector->encoder;
 	int type = intel_connector->base.connector_type;
 	enum port port = encoder->port;
-	enum pipe pipe = 0;
 	enum transcoder cpu_transcoder;
+	intel_wakeref_t wakeref;
+	enum pipe pipe = 0;
 	uint32_t tmp;
 	bool ret;
 
-	if (!intel_display_power_get_if_enabled(dev_priv,
-						encoder->power_domain))
+	wakeref = intel_display_power_get_if_enabled(dev_priv,
+						     encoder->power_domain);
+	if (!wakeref)
 		return false;
 
 	if (!encoder->get_hw_state(encoder, &pipe)) {
@@ -1939,7 +1943,7 @@ bool intel_ddi_connector_get_hw_state(struct intel_connector *intel_connector)
 	}
 
 out:
-	intel_display_power_put(dev_priv, encoder->power_domain);
+	intel_display_power_put(dev_priv, encoder->power_domain, wakeref);
 
 	return ret;
 }
@@ -1950,6 +1954,7 @@ static void intel_ddi_get_encoder_pipes(struct intel_encoder *encoder,
 	struct drm_device *dev = encoder->base.dev;
 	struct drm_i915_private *dev_priv = to_i915(dev);
 	enum port port = encoder->port;
+	intel_wakeref_t wakeref;
 	enum pipe p;
 	u32 tmp;
 	u8 mst_pipe_mask;
@@ -1957,8 +1962,9 @@ static void intel_ddi_get_encoder_pipes(struct intel_encoder *encoder,
 	*pipe_mask = 0;
 	*is_dp_mst = false;
 
-	if (!intel_display_power_get_if_enabled(dev_priv,
-						encoder->power_domain))
+	wakeref = intel_display_power_get_if_enabled(dev_priv,
+						     encoder->power_domain);
+	if (!wakeref)
 		return;
 
 	tmp = I915_READ(DDI_BUF_CTL(port));
@@ -2029,7 +2035,7 @@ static void intel_ddi_get_encoder_pipes(struct intel_encoder *encoder,
 				  "(PHY_CTL %08x)\n", port_name(port), tmp);
 	}
 
-	intel_display_power_put(dev_priv, encoder->power_domain);
+	intel_display_power_put(dev_priv, encoder->power_domain, wakeref);
 }
 
 bool intel_ddi_get_hw_state(struct intel_encoder *encoder,
@@ -3286,7 +3292,8 @@ static void intel_ddi_post_disable_dp(struct intel_encoder *encoder,
 	intel_edp_panel_vdd_on(intel_dp);
 	intel_edp_panel_off(intel_dp);
 
-	intel_display_power_put(dev_priv, dig_port->ddi_io_power_domain);
+	intel_display_power_put_unchecked(dev_priv,
+					  dig_port->ddi_io_power_domain);
 
 	intel_ddi_clk_disable(encoder);
 }
@@ -3306,7 +3313,8 @@ static void intel_ddi_post_disable_hdmi(struct intel_encoder *encoder,
 
 	intel_disable_ddi_buf(encoder, old_crtc_state);
 
-	intel_display_power_put(dev_priv, dig_port->ddi_io_power_domain);
+	intel_display_power_put_unchecked(dev_priv,
+					  dig_port->ddi_io_power_domain);
 
 	intel_ddi_clk_disable(encoder);
 
@@ -3626,8 +3634,8 @@ intel_ddi_post_pll_disable(struct intel_encoder *encoder,
 
 	if (intel_crtc_has_dp_encoder(crtc_state) ||
 	    intel_port_is_tc(dev_priv, encoder->port))
-		intel_display_power_put(dev_priv,
-					intel_ddi_main_link_aux_domain(dig_port));
+		intel_display_power_put_unchecked(dev_priv,
+						  intel_ddi_main_link_aux_domain(dig_port));
 }
 
 void intel_ddi_prepare_link_retrain(struct intel_dp *intel_dp)
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index ea70cb8cf50a..42fc362591a8 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -1198,17 +1198,19 @@ void assert_pipe(struct drm_i915_private *dev_priv,
 	enum transcoder cpu_transcoder = intel_pipe_to_cpu_transcoder(dev_priv,
 								      pipe);
 	enum intel_display_power_domain power_domain;
+	intel_wakeref_t wakeref;
 
 	/* we keep both pipes enabled on 830 */
 	if (IS_I830(dev_priv))
 		state = true;
 
 	power_domain = POWER_DOMAIN_TRANSCODER(cpu_transcoder);
-	if (intel_display_power_get_if_enabled(dev_priv, power_domain)) {
+	wakeref = intel_display_power_get_if_enabled(dev_priv, power_domain);
+	if (wakeref) {
 		u32 val = I915_READ(PIPECONF(cpu_transcoder));
 		cur_state = !!(val & PIPECONF_ENABLE);
 
-		intel_display_power_put(dev_priv, power_domain);
+		intel_display_power_put(dev_priv, power_domain, wakeref);
 	} else {
 		cur_state = false;
 	}
@@ -3413,6 +3415,7 @@ static bool i9xx_plane_get_hw_state(struct intel_plane *plane,
 	struct drm_i915_private *dev_priv = to_i915(plane->base.dev);
 	enum intel_display_power_domain power_domain;
 	enum i9xx_plane_id i9xx_plane = plane->i9xx_plane;
+	intel_wakeref_t wakeref;
 	bool ret;
 	u32 val;
 
@@ -3422,7 +3425,8 @@ static bool i9xx_plane_get_hw_state(struct intel_plane *plane,
 	 * display power wells.
 	 */
 	power_domain = POWER_DOMAIN_PIPE(plane->pipe);
-	if (!intel_display_power_get_if_enabled(dev_priv, power_domain))
+	wakeref = intel_display_power_get_if_enabled(dev_priv, power_domain);
+	if (!wakeref)
 		return false;
 
 	val = I915_READ(DSPCNTR(i9xx_plane));
@@ -3435,7 +3439,7 @@ static bool i9xx_plane_get_hw_state(struct intel_plane *plane,
 		*pipe = (val & DISPPLANE_SEL_PIPE_MASK) >>
 			DISPPLANE_SEL_PIPE_SHIFT;
 
-	intel_display_power_put(dev_priv, power_domain);
+	intel_display_power_put(dev_priv, power_domain, wakeref);
 
 	return ret;
 }
@@ -6108,7 +6112,7 @@ static void modeset_put_power_domains(struct drm_i915_private *dev_priv,
 	enum intel_display_power_domain domain;
 
 	for_each_power_domain(domain, domains)
-		intel_display_power_put(dev_priv, domain);
+		intel_display_power_put_unchecked(dev_priv, domain);
 }
 
 static void valleyview_crtc_enable(struct intel_crtc_state *pipe_config,
@@ -6355,7 +6359,7 @@ static void intel_crtc_disable_noatomic(struct drm_crtc *crtc,
 
 	domains = intel_crtc->enabled_power_domains;
 	for_each_power_domain(domain, domains)
-		intel_display_power_put(dev_priv, domain);
+		intel_display_power_put_unchecked(dev_priv, domain);
 	intel_crtc->enabled_power_domains = 0;
 
 	dev_priv->active_crtcs &= ~(1 << intel_crtc->pipe);
@@ -7967,11 +7971,13 @@ static bool i9xx_get_pipe_config(struct intel_crtc *crtc,
 {
 	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
 	enum intel_display_power_domain power_domain;
+	intel_wakeref_t wakeref;
 	uint32_t tmp;
 	bool ret;
 
 	power_domain = POWER_DOMAIN_PIPE(crtc->pipe);
-	if (!intel_display_power_get_if_enabled(dev_priv, power_domain))
+	wakeref = intel_display_power_get_if_enabled(dev_priv, power_domain);
+	if (!wakeref)
 		return false;
 
 	pipe_config->output_format = INTEL_OUTPUT_FORMAT_RGB;
@@ -8072,7 +8078,7 @@ static bool i9xx_get_pipe_config(struct intel_crtc *crtc,
 	ret = true;
 
 out:
-	intel_display_power_put(dev_priv, power_domain);
+	intel_display_power_put(dev_priv, power_domain, wakeref);
 
 	return ret;
 }
@@ -9039,11 +9045,13 @@ static bool ironlake_get_pipe_config(struct intel_crtc *crtc,
 	struct drm_device *dev = crtc->base.dev;
 	struct drm_i915_private *dev_priv = to_i915(dev);
 	enum intel_display_power_domain power_domain;
+	intel_wakeref_t wakeref;
 	uint32_t tmp;
 	bool ret;
 
 	power_domain = POWER_DOMAIN_PIPE(crtc->pipe);
-	if (!intel_display_power_get_if_enabled(dev_priv, power_domain))
+	wakeref = intel_display_power_get_if_enabled(dev_priv, power_domain);
+	if (!wakeref)
 		return false;
 
 	pipe_config->output_format = INTEL_OUTPUT_FORMAT_RGB;
@@ -9126,7 +9134,7 @@ static bool ironlake_get_pipe_config(struct intel_crtc *crtc,
 	ret = true;
 
 out:
-	intel_display_power_put(dev_priv, power_domain);
+	intel_display_power_put(dev_priv, power_domain, wakeref);
 
 	return ret;
 }
@@ -9735,7 +9743,7 @@ static bool haswell_get_pipe_config(struct intel_crtc *crtc,
 
 out:
 	for_each_power_domain(power_domain, power_domain_mask)
-		intel_display_power_put(dev_priv, power_domain);
+		intel_display_power_put_unchecked(dev_priv, power_domain);
 
 	return active;
 }
@@ -9985,17 +9993,19 @@ static bool i845_cursor_get_hw_state(struct intel_plane *plane,
 {
 	struct drm_i915_private *dev_priv = to_i915(plane->base.dev);
 	enum intel_display_power_domain power_domain;
+	intel_wakeref_t wakeref;
 	bool ret;
 
 	power_domain = POWER_DOMAIN_PIPE(PIPE_A);
-	if (!intel_display_power_get_if_enabled(dev_priv, power_domain))
+	wakeref = intel_display_power_get_if_enabled(dev_priv, power_domain);
+	if (!wakeref)
 		return false;
 
 	ret = I915_READ(CURCNTR(PIPE_A)) & CURSOR_ENABLE;
 
 	*pipe = PIPE_A;
 
-	intel_display_power_put(dev_priv, power_domain);
+	intel_display_power_put(dev_priv, power_domain, wakeref);
 
 	return ret;
 }
@@ -10218,6 +10228,7 @@ static bool i9xx_cursor_get_hw_state(struct intel_plane *plane,
 {
 	struct drm_i915_private *dev_priv = to_i915(plane->base.dev);
 	enum intel_display_power_domain power_domain;
+	intel_wakeref_t wakeref;
 	bool ret;
 	u32 val;
 
@@ -10227,7 +10238,8 @@ static bool i9xx_cursor_get_hw_state(struct intel_plane *plane,
 	 * display power wells.
 	 */
 	power_domain = POWER_DOMAIN_PIPE(plane->pipe);
-	if (!intel_display_power_get_if_enabled(dev_priv, power_domain))
+	wakeref = intel_display_power_get_if_enabled(dev_priv, power_domain);
+	if (!wakeref)
 		return false;
 
 	val = I915_READ(CURCNTR(plane->pipe));
@@ -10240,7 +10252,7 @@ static bool i9xx_cursor_get_hw_state(struct intel_plane *plane,
 		*pipe = (val & MCURSOR_PIPE_SELECT_MASK) >>
 			MCURSOR_PIPE_SELECT_SHIFT;
 
-	intel_display_power_put(dev_priv, power_domain);
+	intel_display_power_put(dev_priv, power_domain, wakeref);
 
 	return ret;
 }
@@ -12951,6 +12963,7 @@ static void intel_atomic_commit_tail(struct drm_atomic_state *state)
 	struct drm_crtc *crtc;
 	struct intel_crtc *intel_crtc;
 	u64 put_domains[I915_MAX_PIPES] = {};
+	intel_wakeref_t wakeref = 0;
 	int i;
 
 	intel_atomic_commit_fence_wait(intel_state);
@@ -12958,7 +12971,7 @@ static void intel_atomic_commit_tail(struct drm_atomic_state *state)
 	drm_atomic_helper_wait_for_dependencies(state);
 
 	if (intel_state->modeset)
-		intel_display_power_get(dev_priv, POWER_DOMAIN_MODESET);
+		wakeref = intel_display_power_get(dev_priv, POWER_DOMAIN_MODESET);
 
 	for_each_oldnew_crtc_in_state(state, crtc, old_crtc_state, new_crtc_state, i) {
 		old_intel_crtc_state = to_intel_crtc_state(old_crtc_state);
@@ -13095,7 +13108,7 @@ static void intel_atomic_commit_tail(struct drm_atomic_state *state)
 		 * the culprit.
 		 */
 		intel_uncore_arm_unclaimed_mmio_detection(dev_priv);
-		intel_display_power_put(dev_priv, POWER_DOMAIN_MODESET);
+		intel_display_power_put(dev_priv, POWER_DOMAIN_MODESET, wakeref);
 	}
 
 	/*
@@ -15497,19 +15510,25 @@ void i915_redisable_vga_power_on(struct drm_i915_private *dev_priv)
 
 void i915_redisable_vga(struct drm_i915_private *dev_priv)
 {
-	/* This function can be called both from intel_modeset_setup_hw_state or
+	intel_wakeref_t wakeref;
+
+	/*
+	 * This function can be called both from intel_modeset_setup_hw_state or
 	 * at a very early point in our resume sequence, where the power well
 	 * structures are not yet restored. Since this function is at a very
 	 * paranoid "someone might have enabled VGA while we were not looking"
 	 * level, just check if the power well is enabled instead of trying to
 	 * follow the "don't touch the power well if we don't need it" policy
-	 * the rest of the driver uses. */
-	if (!intel_display_power_get_if_enabled(dev_priv, POWER_DOMAIN_VGA))
+	 * the rest of the driver uses.
+	 */
+	wakeref = intel_display_power_get_if_enabled(dev_priv,
+						     POWER_DOMAIN_VGA);
+	if (!wakeref)
 		return;
 
 	i915_redisable_vga_power_on(dev_priv);
 
-	intel_display_power_put(dev_priv, POWER_DOMAIN_VGA);
+	intel_display_power_put(dev_priv, POWER_DOMAIN_VGA, wakeref);
 }
 
 /* FIXME read out full plane state for all planes */
@@ -15809,12 +15828,13 @@ intel_modeset_setup_hw_state(struct drm_device *dev,
 			     struct drm_modeset_acquire_ctx *ctx)
 {
 	struct drm_i915_private *dev_priv = to_i915(dev);
-	struct intel_crtc *crtc;
 	struct intel_crtc_state *crtc_state;
 	struct intel_encoder *encoder;
+	struct intel_crtc *crtc;
+	intel_wakeref_t wakeref;
 	int i;
 
-	intel_display_power_get(dev_priv, POWER_DOMAIN_INIT);
+	wakeref = intel_display_power_get(dev_priv, POWER_DOMAIN_INIT);
 
 	intel_early_display_was(dev_priv);
 	intel_modeset_readout_hw_state(dev);
@@ -15884,7 +15904,7 @@ intel_modeset_setup_hw_state(struct drm_device *dev,
 			modeset_put_power_domains(dev_priv, put_domains);
 	}
 
-	intel_display_power_put(dev_priv, POWER_DOMAIN_INIT);
+	intel_display_power_put(dev_priv, POWER_DOMAIN_INIT, wakeref);
 
 	intel_fbc_init_pipe_state(dev_priv);
 }
diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c
index 62fd11540942..e8031daacee2 100644
--- a/drivers/gpu/drm/i915/intel_dp.c
+++ b/drivers/gpu/drm/i915/intel_dp.c
@@ -622,8 +622,8 @@ static void pps_unlock(struct intel_dp *intel_dp)
 
 	mutex_unlock(&dev_priv->pps_mutex);
 
-	intel_display_power_put(dev_priv,
-				intel_aux_power_domain(dp_to_dig_port(intel_dp)));
+	intel_display_power_put_unchecked(dev_priv,
+					  intel_aux_power_domain(dp_to_dig_port(intel_dp)));
 }
 
 static void
@@ -2512,8 +2512,8 @@ static void edp_panel_vdd_off_sync(struct intel_dp *intel_dp)
 	if ((pp & PANEL_POWER_ON) == 0)
 		intel_dp->panel_power_off_time = ktime_get_boottime();
 
-	intel_display_power_put(dev_priv,
-				intel_aux_power_domain(intel_dig_port));
+	intel_display_power_put_unchecked(dev_priv,
+					  intel_aux_power_domain(intel_dig_port));
 }
 
 static void edp_panel_vdd_work(struct work_struct *__work)
@@ -2658,7 +2658,7 @@ static void edp_panel_off(struct intel_dp *intel_dp)
 	intel_dp->panel_power_off_time = ktime_get_boottime();
 
 	/* We got a reference when we enabled the VDD. */
-	intel_display_power_put(dev_priv, intel_aux_power_domain(dig_port));
+	intel_display_power_put_unchecked(dev_priv, intel_aux_power_domain(dig_port));
 }
 
 void intel_edp_panel_off(struct intel_dp *intel_dp)
@@ -2984,16 +2984,18 @@ static bool intel_dp_get_hw_state(struct intel_encoder *encoder,
 {
 	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
 	struct intel_dp *intel_dp = enc_to_intel_dp(&encoder->base);
+	intel_wakeref_t wakeref;
 	bool ret;
 
-	if (!intel_display_power_get_if_enabled(dev_priv,
-						encoder->power_domain))
+	wakeref = intel_display_power_get_if_enabled(dev_priv,
+						     encoder->power_domain);
+	if (!wakeref)
 		return false;
 
 	ret = intel_dp_port_enabled(dev_priv, intel_dp->output_reg,
 				    encoder->port, pipe);
 
-	intel_display_power_put(dev_priv, encoder->power_domain);
+	intel_display_power_put(dev_priv, encoder->power_domain, wakeref);
 
 	return ret;
 }
@@ -5366,12 +5368,13 @@ intel_dp_detect(struct drm_connector *connector,
 	enum drm_connector_status status;
 	enum intel_display_power_domain aux_domain =
 		intel_aux_power_domain(dig_port);
+	intel_wakeref_t wakeref;
 
 	DRM_DEBUG_KMS("[CONNECTOR:%d:%s]\n",
 		      connector->base.id, connector->name);
 	WARN_ON(!drm_modeset_is_locked(&dev_priv->drm.mode_config.connection_mutex));
 
-	intel_display_power_get(dev_priv, aux_domain);
+	wakeref = intel_display_power_get(dev_priv, aux_domain);
 
 	/* Can't disconnect eDP */
 	if (intel_dp_is_edp(intel_dp))
@@ -5437,7 +5440,7 @@ intel_dp_detect(struct drm_connector *connector,
 
 		ret = intel_dp_retrain_link(encoder, ctx);
 		if (ret) {
-			intel_display_power_put(dev_priv, aux_domain);
+			intel_display_power_put(dev_priv, aux_domain, wakeref);
 			return ret;
 		}
 	}
@@ -5461,7 +5464,7 @@ intel_dp_detect(struct drm_connector *connector,
 	if (status != connector_status_connected && !intel_dp->is_mst)
 		intel_dp_unset_edid(intel_dp);
 
-	intel_display_power_put(dev_priv, aux_domain);
+	intel_display_power_put(dev_priv, aux_domain, wakeref);
 	return status;
 }
 
@@ -5474,6 +5477,7 @@ intel_dp_force(struct drm_connector *connector)
 	struct drm_i915_private *dev_priv = to_i915(intel_encoder->base.dev);
 	enum intel_display_power_domain aux_domain =
 		intel_aux_power_domain(dig_port);
+	intel_wakeref_t wakeref;
 
 	DRM_DEBUG_KMS("[CONNECTOR:%d:%s]\n",
 		      connector->base.id, connector->name);
@@ -5482,11 +5486,11 @@ intel_dp_force(struct drm_connector *connector)
 	if (connector->status != connector_status_connected)
 		return;
 
-	intel_display_power_get(dev_priv, aux_domain);
+	wakeref = intel_display_power_get(dev_priv, aux_domain);
 
 	intel_dp_set_edid(intel_dp);
 
-	intel_display_power_put(dev_priv, aux_domain);
+	intel_display_power_put(dev_priv, aux_domain, wakeref);
 }
 
 static int intel_dp_get_modes(struct drm_connector *connector)
@@ -5932,6 +5936,7 @@ intel_dp_hpd_pulse(struct intel_digital_port *intel_dig_port, bool long_hpd)
 	struct intel_dp *intel_dp = &intel_dig_port->dp;
 	struct drm_i915_private *dev_priv = dp_to_i915(intel_dp);
 	enum irqreturn ret = IRQ_NONE;
+	intel_wakeref_t wakeref;
 
 	if (long_hpd && intel_dig_port->base.type == INTEL_OUTPUT_EDP) {
 		/*
@@ -5954,8 +5959,8 @@ intel_dp_hpd_pulse(struct intel_digital_port *intel_dig_port, bool long_hpd)
 		return IRQ_NONE;
 	}
 
-	intel_display_power_get(dev_priv,
-				intel_aux_power_domain(intel_dig_port));
+	wakeref = intel_display_power_get(dev_priv,
+					  intel_aux_power_domain(intel_dig_port));
 
 	if (intel_dp->is_mst) {
 		if (intel_dp_check_mst_status(intel_dp) == -EINVAL) {
@@ -5985,7 +5990,8 @@ intel_dp_hpd_pulse(struct intel_digital_port *intel_dig_port, bool long_hpd)
 
 put_power:
 	intel_display_power_put(dev_priv,
-				intel_aux_power_domain(intel_dig_port));
+				intel_aux_power_domain(intel_dig_port),
+				wakeref);
 
 	return ret;
 }
diff --git a/drivers/gpu/drm/i915/intel_dpll_mgr.c b/drivers/gpu/drm/i915/intel_dpll_mgr.c
index d513ca875c67..04870e960537 100644
--- a/drivers/gpu/drm/i915/intel_dpll_mgr.c
+++ b/drivers/gpu/drm/i915/intel_dpll_mgr.c
@@ -345,9 +345,12 @@ static bool ibx_pch_dpll_get_hw_state(struct drm_i915_private *dev_priv,
 				      struct intel_dpll_hw_state *hw_state)
 {
 	const enum intel_dpll_id id = pll->info->id;
+	intel_wakeref_t wakeref;
 	uint32_t val;
 
-	if (!intel_display_power_get_if_enabled(dev_priv, POWER_DOMAIN_PLLS))
+	wakeref = intel_display_power_get_if_enabled(dev_priv,
+						     POWER_DOMAIN_PLLS);
+	if (!wakeref)
 		return false;
 
 	val = I915_READ(PCH_DPLL(id));
@@ -355,7 +358,7 @@ static bool ibx_pch_dpll_get_hw_state(struct drm_i915_private *dev_priv,
 	hw_state->fp0 = I915_READ(PCH_FP0(id));
 	hw_state->fp1 = I915_READ(PCH_FP1(id));
 
-	intel_display_power_put(dev_priv, POWER_DOMAIN_PLLS);
+	intel_display_power_put(dev_priv, POWER_DOMAIN_PLLS, wakeref);
 
 	return val & DPLL_VCO_ENABLE;
 }
@@ -509,15 +512,18 @@ static bool hsw_ddi_wrpll_get_hw_state(struct drm_i915_private *dev_priv,
 				       struct intel_dpll_hw_state *hw_state)
 {
 	const enum intel_dpll_id id = pll->info->id;
+	intel_wakeref_t wakeref;
 	uint32_t val;
 
-	if (!intel_display_power_get_if_enabled(dev_priv, POWER_DOMAIN_PLLS))
+	wakeref = intel_display_power_get_if_enabled(dev_priv,
+						     POWER_DOMAIN_PLLS);
+	if (!wakeref)
 		return false;
 
 	val = I915_READ(WRPLL_CTL(id));
 	hw_state->wrpll = val;
 
-	intel_display_power_put(dev_priv, POWER_DOMAIN_PLLS);
+	intel_display_power_put(dev_priv, POWER_DOMAIN_PLLS, wakeref);
 
 	return val & WRPLL_PLL_ENABLE;
 }
@@ -526,15 +532,18 @@ static bool hsw_ddi_spll_get_hw_state(struct drm_i915_private *dev_priv,
 				      struct intel_shared_dpll *pll,
 				      struct intel_dpll_hw_state *hw_state)
 {
+	intel_wakeref_t wakeref;
 	uint32_t val;
 
-	if (!intel_display_power_get_if_enabled(dev_priv, POWER_DOMAIN_PLLS))
+	wakeref = intel_display_power_get_if_enabled(dev_priv,
+						     POWER_DOMAIN_PLLS);
+	if (!wakeref)
 		return false;
 
 	val = I915_READ(SPLL_CTL);
 	hw_state->spll = val;
 
-	intel_display_power_put(dev_priv, POWER_DOMAIN_PLLS);
+	intel_display_power_put(dev_priv, POWER_DOMAIN_PLLS, wakeref);
 
 	return val & SPLL_PLL_ENABLE;
 }
@@ -989,9 +998,12 @@ static bool skl_ddi_pll_get_hw_state(struct drm_i915_private *dev_priv,
 	uint32_t val;
 	const struct skl_dpll_regs *regs = skl_dpll_regs;
 	const enum intel_dpll_id id = pll->info->id;
+	intel_wakeref_t wakeref;
 	bool ret;
 
-	if (!intel_display_power_get_if_enabled(dev_priv, POWER_DOMAIN_PLLS))
+	wakeref = intel_display_power_get_if_enabled(dev_priv,
+						     POWER_DOMAIN_PLLS);
+	if (!wakeref)
 		return false;
 
 	ret = false;
@@ -1011,7 +1023,7 @@ static bool skl_ddi_pll_get_hw_state(struct drm_i915_private *dev_priv,
 	ret = true;
 
 out:
-	intel_display_power_put(dev_priv, POWER_DOMAIN_PLLS);
+	intel_display_power_put(dev_priv, POWER_DOMAIN_PLLS, wakeref);
 
 	return ret;
 }
@@ -1020,12 +1032,15 @@ static bool skl_ddi_dpll0_get_hw_state(struct drm_i915_private *dev_priv,
 				       struct intel_shared_dpll *pll,
 				       struct intel_dpll_hw_state *hw_state)
 {
-	uint32_t val;
 	const struct skl_dpll_regs *regs = skl_dpll_regs;
 	const enum intel_dpll_id id = pll->info->id;
+	intel_wakeref_t wakeref;
+	uint32_t val;
 	bool ret;
 
-	if (!intel_display_power_get_if_enabled(dev_priv, POWER_DOMAIN_PLLS))
+	wakeref = intel_display_power_get_if_enabled(dev_priv,
+						     POWER_DOMAIN_PLLS);
+	if (!wakeref)
 		return false;
 
 	ret = false;
@@ -1041,7 +1056,7 @@ static bool skl_ddi_dpll0_get_hw_state(struct drm_i915_private *dev_priv,
 	ret = true;
 
 out:
-	intel_display_power_put(dev_priv, POWER_DOMAIN_PLLS);
+	intel_display_power_put(dev_priv, POWER_DOMAIN_PLLS, wakeref);
 
 	return ret;
 }
@@ -1579,14 +1594,17 @@ static bool bxt_ddi_pll_get_hw_state(struct drm_i915_private *dev_priv,
 					struct intel_dpll_hw_state *hw_state)
 {
 	enum port port = (enum port)pll->info->id; /* 1:1 port->PLL mapping */
-	uint32_t val;
-	bool ret;
+	intel_wakeref_t wakeref;
 	enum dpio_phy phy;
 	enum dpio_channel ch;
+	uint32_t val;
+	bool ret;
 
 	bxt_port_to_phy_channel(dev_priv, port, &phy, &ch);
 
-	if (!intel_display_power_get_if_enabled(dev_priv, POWER_DOMAIN_PLLS))
+	wakeref = intel_display_power_get_if_enabled(dev_priv,
+						     POWER_DOMAIN_PLLS);
+	if (!wakeref)
 		return false;
 
 	ret = false;
@@ -1643,7 +1661,7 @@ static bool bxt_ddi_pll_get_hw_state(struct drm_i915_private *dev_priv,
 	ret = true;
 
 out:
-	intel_display_power_put(dev_priv, POWER_DOMAIN_PLLS);
+	intel_display_power_put(dev_priv, POWER_DOMAIN_PLLS, wakeref);
 
 	return ret;
 }
@@ -2091,10 +2109,13 @@ static bool cnl_ddi_pll_get_hw_state(struct drm_i915_private *dev_priv,
 				     struct intel_dpll_hw_state *hw_state)
 {
 	const enum intel_dpll_id id = pll->info->id;
+	intel_wakeref_t wakeref;
 	uint32_t val;
 	bool ret;
 
-	if (!intel_display_power_get_if_enabled(dev_priv, POWER_DOMAIN_PLLS))
+	wakeref = intel_display_power_get_if_enabled(dev_priv,
+						     POWER_DOMAIN_PLLS);
+	if (!wakeref)
 		return false;
 
 	ret = false;
@@ -2113,7 +2134,7 @@ static bool cnl_ddi_pll_get_hw_state(struct drm_i915_private *dev_priv,
 	ret = true;
 
 out:
-	intel_display_power_put(dev_priv, POWER_DOMAIN_PLLS);
+	intel_display_power_put(dev_priv, POWER_DOMAIN_PLLS, wakeref);
 
 	return ret;
 }
@@ -2950,11 +2971,14 @@ static bool icl_pll_get_hw_state(struct drm_i915_private *dev_priv,
 				 struct intel_dpll_hw_state *hw_state)
 {
 	const enum intel_dpll_id id = pll->info->id;
-	uint32_t val;
-	enum port port;
+	intel_wakeref_t wakeref;
 	bool ret = false;
+	enum port port;
+	uint32_t val;
 
-	if (!intel_display_power_get_if_enabled(dev_priv, POWER_DOMAIN_PLLS))
+	wakeref = intel_display_power_get_if_enabled(dev_priv,
+						     POWER_DOMAIN_PLLS);
+	if (!wakeref)
 		return false;
 
 	val = I915_READ(icl_pll_id_to_enable_reg(id));
@@ -3007,7 +3031,7 @@ static bool icl_pll_get_hw_state(struct drm_i915_private *dev_priv,
 
 	ret = true;
 out:
-	intel_display_power_put(dev_priv, POWER_DOMAIN_PLLS);
+	intel_display_power_put(dev_priv, POWER_DOMAIN_PLLS, wakeref);
 	return ret;
 }
 
diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h
index 4272c260b6e1..9382c28fc37c 100644
--- a/drivers/gpu/drm/i915/intel_drv.h
+++ b/drivers/gpu/drm/i915/intel_drv.h
@@ -2118,12 +2118,21 @@ bool intel_display_power_is_enabled(struct drm_i915_private *dev_priv,
 				    enum intel_display_power_domain domain);
 bool __intel_display_power_is_enabled(struct drm_i915_private *dev_priv,
 				      enum intel_display_power_domain domain);
-void intel_display_power_get(struct drm_i915_private *dev_priv,
-			     enum intel_display_power_domain domain);
-bool intel_display_power_get_if_enabled(struct drm_i915_private *dev_priv,
+intel_wakeref_t intel_display_power_get(struct drm_i915_private *dev_priv,
 					enum intel_display_power_domain domain);
+intel_wakeref_t
+intel_display_power_get_if_enabled(struct drm_i915_private *dev_priv,
+				   enum intel_display_power_domain domain);
+void intel_display_power_put_unchecked(struct drm_i915_private *dev_priv,
+				       enum intel_display_power_domain domain);
+#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_RUNTIME_PM)
 void intel_display_power_put(struct drm_i915_private *dev_priv,
-			     enum intel_display_power_domain domain);
+			     enum intel_display_power_domain domain,
+			     intel_wakeref_t wakeref);
+#else
+#define intel_display_power_put(i915, domain, wakeref) \
+	intel_display_power_put_unchecked(i915, domain)
+#endif
 void icl_dbuf_slices_update(struct drm_i915_private *dev_priv,
 			    u8 req_slices);
 
diff --git a/drivers/gpu/drm/i915/intel_dsi.h b/drivers/gpu/drm/i915/intel_dsi.h
index d968f1f13e09..1c3f44e23e5d 100644
--- a/drivers/gpu/drm/i915/intel_dsi.h
+++ b/drivers/gpu/drm/i915/intel_dsi.h
@@ -40,6 +40,7 @@ struct intel_dsi {
 	struct intel_encoder base;
 
 	struct intel_dsi_host *dsi_hosts[I915_MAX_PORTS];
+	intel_wakeref_t io_wakeref[I915_MAX_PORTS];
 
 	/* GPIO Desc for CRC based Panel control */
 	struct gpio_desc *gpio_panel;
diff --git a/drivers/gpu/drm/i915/intel_hdmi.c b/drivers/gpu/drm/i915/intel_hdmi.c
index 07e803a604bd..284a08a8bd2a 100644
--- a/drivers/gpu/drm/i915/intel_hdmi.c
+++ b/drivers/gpu/drm/i915/intel_hdmi.c
@@ -1191,15 +1191,17 @@ static bool intel_hdmi_get_hw_state(struct intel_encoder *encoder,
 {
 	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
 	struct intel_hdmi *intel_hdmi = enc_to_intel_hdmi(&encoder->base);
+	intel_wakeref_t wakeref;
 	bool ret;
 
-	if (!intel_display_power_get_if_enabled(dev_priv,
-						encoder->power_domain))
+	wakeref = intel_display_power_get_if_enabled(dev_priv,
+						     encoder->power_domain);
+	if (!wakeref)
 		return false;
 
 	ret = intel_sdvo_port_enabled(dev_priv, intel_hdmi->hdmi_reg, pipe);
 
-	intel_display_power_put(dev_priv, encoder->power_domain);
+	intel_display_power_put(dev_priv, encoder->power_domain, wakeref);
 
 	return ret;
 }
@@ -1896,11 +1898,12 @@ intel_hdmi_set_edid(struct drm_connector *connector)
 {
 	struct drm_i915_private *dev_priv = to_i915(connector->dev);
 	struct intel_hdmi *intel_hdmi = intel_attached_hdmi(connector);
+	intel_wakeref_t wakeref;
 	struct edid *edid;
 	bool connected = false;
 	struct i2c_adapter *i2c;
 
-	intel_display_power_get(dev_priv, POWER_DOMAIN_GMBUS);
+	wakeref = intel_display_power_get(dev_priv, POWER_DOMAIN_GMBUS);
 
 	i2c = intel_gmbus_get_adapter(dev_priv, intel_hdmi->ddc_bus);
 
@@ -1915,7 +1918,7 @@ intel_hdmi_set_edid(struct drm_connector *connector)
 
 	intel_hdmi_dp_dual_mode_detect(connector, edid != NULL);
 
-	intel_display_power_put(dev_priv, POWER_DOMAIN_GMBUS);
+	intel_display_power_put(dev_priv, POWER_DOMAIN_GMBUS, wakeref);
 
 	to_intel_connector(connector)->detect_edid = edid;
 	if (edid && edid->input & DRM_EDID_INPUT_DIGITAL) {
@@ -1940,11 +1943,12 @@ intel_hdmi_detect(struct drm_connector *connector, bool force)
 	struct drm_i915_private *dev_priv = to_i915(connector->dev);
 	struct intel_hdmi *intel_hdmi = intel_attached_hdmi(connector);
 	struct intel_encoder *encoder = &hdmi_to_dig_port(intel_hdmi)->base;
+	intel_wakeref_t wakeref;
 
 	DRM_DEBUG_KMS("[CONNECTOR:%d:%s]\n",
 		      connector->base.id, connector->name);
 
-	intel_display_power_get(dev_priv, POWER_DOMAIN_GMBUS);
+	wakeref = intel_display_power_get(dev_priv, POWER_DOMAIN_GMBUS);
 
 	if (IS_ICELAKE(dev_priv) &&
 	    !intel_digital_port_connected(encoder))
@@ -1956,7 +1960,7 @@ intel_hdmi_detect(struct drm_connector *connector, bool force)
 		status = connector_status_connected;
 
 out:
-	intel_display_power_put(dev_priv, POWER_DOMAIN_GMBUS);
+	intel_display_power_put(dev_priv, POWER_DOMAIN_GMBUS, wakeref);
 
 	if (status != connector_status_connected)
 		cec_notifier_phys_addr_invalidate(intel_hdmi->cec_notifier);
diff --git a/drivers/gpu/drm/i915/intel_i2c.c b/drivers/gpu/drm/i915/intel_i2c.c
index 802d0394ccc4..012341a66431 100644
--- a/drivers/gpu/drm/i915/intel_i2c.c
+++ b/drivers/gpu/drm/i915/intel_i2c.c
@@ -698,12 +698,13 @@ do_gmbus_xfer(struct i2c_adapter *adapter, struct i2c_msg *msgs, int num,
 static int
 gmbus_xfer(struct i2c_adapter *adapter, struct i2c_msg *msgs, int num)
 {
-	struct intel_gmbus *bus = container_of(adapter, struct intel_gmbus,
-					       adapter);
+	struct intel_gmbus *bus =
+		container_of(adapter, struct intel_gmbus, adapter);
 	struct drm_i915_private *dev_priv = bus->dev_priv;
+	intel_wakeref_t wakeref;
 	int ret;
 
-	intel_display_power_get(dev_priv, POWER_DOMAIN_GMBUS);
+	wakeref = intel_display_power_get(dev_priv, POWER_DOMAIN_GMBUS);
 
 	if (bus->force_bit) {
 		ret = i2c_bit_algo.master_xfer(adapter, msgs, num);
@@ -715,17 +716,16 @@ gmbus_xfer(struct i2c_adapter *adapter, struct i2c_msg *msgs, int num)
 			bus->force_bit |= GMBUS_FORCE_BIT_RETRY;
 	}
 
-	intel_display_power_put(dev_priv, POWER_DOMAIN_GMBUS);
+	intel_display_power_put(dev_priv, POWER_DOMAIN_GMBUS, wakeref);
 
 	return ret;
 }
 
 int intel_gmbus_output_aksv(struct i2c_adapter *adapter)
 {
-	struct intel_gmbus *bus = container_of(adapter, struct intel_gmbus,
-					       adapter);
+	struct intel_gmbus *bus =
+		container_of(adapter, struct intel_gmbus, adapter);
 	struct drm_i915_private *dev_priv = bus->dev_priv;
-	int ret;
 	u8 cmd = DRM_HDCP_DDC_AKSV;
 	u8 buf[DRM_HDCP_KSV_LEN] = { 0 };
 	struct i2c_msg msgs[] = {
@@ -742,8 +742,10 @@ int intel_gmbus_output_aksv(struct i2c_adapter *adapter)
 			.buf = buf,
 		}
 	};
+	intel_wakeref_t wakeref;
+	int ret;
 
-	intel_display_power_get(dev_priv, POWER_DOMAIN_GMBUS);
+	wakeref = intel_display_power_get(dev_priv, POWER_DOMAIN_GMBUS);
 	mutex_lock(&dev_priv->gmbus_mutex);
 
 	/*
@@ -754,7 +756,7 @@ int intel_gmbus_output_aksv(struct i2c_adapter *adapter)
 	ret = do_gmbus_xfer(adapter, msgs, ARRAY_SIZE(msgs), GMBUS_AKSV_SELECT);
 
 	mutex_unlock(&dev_priv->gmbus_mutex);
-	intel_display_power_put(dev_priv, POWER_DOMAIN_GMBUS);
+	intel_display_power_put(dev_priv, POWER_DOMAIN_GMBUS, wakeref);
 
 	return ret;
 }
diff --git a/drivers/gpu/drm/i915/intel_lvds.c b/drivers/gpu/drm/i915/intel_lvds.c
index b85e195f7c8a..1f9832119f76 100644
--- a/drivers/gpu/drm/i915/intel_lvds.c
+++ b/drivers/gpu/drm/i915/intel_lvds.c
@@ -95,15 +95,17 @@ static bool intel_lvds_get_hw_state(struct intel_encoder *encoder,
 {
 	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
 	struct intel_lvds_encoder *lvds_encoder = to_lvds_encoder(&encoder->base);
+	intel_wakeref_t wakeref;
 	bool ret;
 
-	if (!intel_display_power_get_if_enabled(dev_priv,
-						encoder->power_domain))
+	wakeref = intel_display_power_get_if_enabled(dev_priv,
+						     encoder->power_domain);
+	if (!wakeref)
 		return false;
 
 	ret = intel_lvds_port_enabled(dev_priv, lvds_encoder->reg, pipe);
 
-	intel_display_power_put(dev_priv, encoder->power_domain);
+	intel_display_power_put(dev_priv, encoder->power_domain, wakeref);
 
 	return ret;
 }
diff --git a/drivers/gpu/drm/i915/intel_pipe_crc.c b/drivers/gpu/drm/i915/intel_pipe_crc.c
index bdabcfab8090..56d614b02302 100644
--- a/drivers/gpu/drm/i915/intel_pipe_crc.c
+++ b/drivers/gpu/drm/i915/intel_pipe_crc.c
@@ -589,6 +589,7 @@ int intel_crtc_set_crc_source(struct drm_crtc *crtc, const char *source_name)
 	struct intel_pipe_crc *pipe_crc = &dev_priv->pipe_crc[crtc->index];
 	enum intel_display_power_domain power_domain;
 	enum intel_pipe_crc_source source;
+	intel_wakeref_t wakeref;
 	u32 val = 0; /* shut up gcc */
 	int ret = 0;
 
@@ -598,7 +599,8 @@ int intel_crtc_set_crc_source(struct drm_crtc *crtc, const char *source_name)
 	}
 
 	power_domain = POWER_DOMAIN_PIPE(crtc->index);
-	if (!intel_display_power_get_if_enabled(dev_priv, power_domain)) {
+	wakeref = intel_display_power_get_if_enabled(dev_priv, power_domain);
+	if (!wakeref) {
 		DRM_DEBUG_KMS("Trying to capture CRC while pipe is off\n");
 		return -EIO;
 	}
@@ -624,7 +626,7 @@ int intel_crtc_set_crc_source(struct drm_crtc *crtc, const char *source_name)
 	pipe_crc->skipped = 0;
 
 out:
-	intel_display_power_put(dev_priv, power_domain);
+	intel_display_power_put(dev_priv, power_domain, wakeref);
 
 	return ret;
 }
diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
index 83b01cde8113..ab7257720c7e 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -3989,10 +3989,12 @@ void skl_pipe_ddb_get_hw_state(struct intel_crtc *crtc,
 	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
 	enum intel_display_power_domain power_domain;
 	enum pipe pipe = crtc->pipe;
+	intel_wakeref_t wakeref;
 	enum plane_id plane_id;
 
 	power_domain = POWER_DOMAIN_PIPE(pipe);
-	if (!intel_display_power_get_if_enabled(dev_priv, power_domain))
+	wakeref = intel_display_power_get_if_enabled(dev_priv, power_domain);
+	if (!wakeref)
 		return;
 
 	for_each_plane_id_on_crtc(crtc, plane_id)
@@ -4001,7 +4003,7 @@ void skl_pipe_ddb_get_hw_state(struct intel_crtc *crtc,
 					   &ddb_y[plane_id],
 					   &ddb_uv[plane_id]);
 
-	intel_display_power_put(dev_priv, power_domain);
+	intel_display_power_put(dev_priv, power_domain, wakeref);
 }
 
 void skl_ddb_get_hw_state(struct drm_i915_private *dev_priv,
diff --git a/drivers/gpu/drm/i915/intel_runtime_pm.c b/drivers/gpu/drm/i915/intel_runtime_pm.c
index 38c22fb7152e..60bd310f8358 100644
--- a/drivers/gpu/drm/i915/intel_runtime_pm.c
+++ b/drivers/gpu/drm/i915/intel_runtime_pm.c
@@ -1825,18 +1825,19 @@ __intel_display_power_get_domain(struct drm_i915_private *dev_priv,
  * Any power domain reference obtained by this function must have a symmetric
  * call to intel_display_power_put() to release the reference again.
  */
-void intel_display_power_get(struct drm_i915_private *dev_priv,
-			     enum intel_display_power_domain domain)
+intel_wakeref_t intel_display_power_get(struct drm_i915_private *dev_priv,
+					enum intel_display_power_domain domain)
 {
 	struct i915_power_domains *power_domains = &dev_priv->power_domains;
-
-	intel_runtime_pm_get(dev_priv);
+	intel_wakeref_t wakeref = intel_runtime_pm_get(dev_priv);
 
 	mutex_lock(&power_domains->lock);
 
 	__intel_display_power_get_domain(dev_priv, domain);
 
 	mutex_unlock(&power_domains->lock);
+
+	return wakeref;
 }
 
 /**
@@ -1851,13 +1852,16 @@ void intel_display_power_get(struct drm_i915_private *dev_priv,
  * Any power domain reference obtained by this function must have a symmetric
  * call to intel_display_power_put() to release the reference again.
  */
-bool intel_display_power_get_if_enabled(struct drm_i915_private *dev_priv,
-					enum intel_display_power_domain domain)
+intel_wakeref_t
+intel_display_power_get_if_enabled(struct drm_i915_private *dev_priv,
+				   enum intel_display_power_domain domain)
 {
 	struct i915_power_domains *power_domains = &dev_priv->power_domains;
+	intel_wakeref_t wakeref;
 	bool is_enabled;
 
-	if (!intel_runtime_pm_get_if_in_use(dev_priv))
+	wakeref = intel_runtime_pm_get_if_in_use(dev_priv);
+	if (!wakeref)
 		return false;
 
 	mutex_lock(&power_domains->lock);
@@ -1871,23 +1875,16 @@ bool intel_display_power_get_if_enabled(struct drm_i915_private *dev_priv,
 
 	mutex_unlock(&power_domains->lock);
 
-	if (!is_enabled)
-		intel_runtime_pm_put_unchecked(dev_priv);
+	if (!is_enabled) {
+		intel_runtime_pm_put(dev_priv, wakeref);
+		wakeref = 0;
+	}
 
-	return is_enabled;
+	return wakeref;
 }
 
-/**
- * intel_display_power_put - release a power domain reference
- * @dev_priv: i915 device instance
- * @domain: power domain to reference
- *
- * This function drops the power domain reference obtained by
- * intel_display_power_get() and might power down the corresponding hardware
- * block right away if this is the last reference.
- */
-void intel_display_power_put(struct drm_i915_private *dev_priv,
-			     enum intel_display_power_domain domain)
+static void __intel_display_power_put(struct drm_i915_private *dev_priv,
+				      enum intel_display_power_domain domain)
 {
 	struct i915_power_domains *power_domains;
 	struct i915_power_well *power_well;
@@ -1905,10 +1902,34 @@ void intel_display_power_put(struct drm_i915_private *dev_priv,
 		intel_power_well_put(dev_priv, power_well);
 
 	mutex_unlock(&power_domains->lock);
+}
 
+/**
+ * intel_display_power_put - release a power domain reference
+ * @dev_priv: i915 device instance
+ * @domain: power domain to reference
+ *
+ * This function drops the power domain reference obtained by
+ * intel_display_power_get() and might power down the corresponding hardware
+ * block right away if this is the last reference.
+ */
+void intel_display_power_put_unchecked(struct drm_i915_private *dev_priv,
+				       enum intel_display_power_domain domain)
+{
+	__intel_display_power_put(dev_priv, domain);
 	intel_runtime_pm_put_unchecked(dev_priv);
 }
 
+#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_RUNTIME_PM)
+void intel_display_power_put(struct drm_i915_private *dev_priv,
+			     enum intel_display_power_domain domain,
+			     intel_wakeref_t wakeref)
+{
+	__intel_display_power_put(dev_priv, domain);
+	intel_runtime_pm_put(dev_priv, wakeref);
+}
+#endif
+
 #define I830_PIPES_POWER_DOMAINS (		\
 	BIT_ULL(POWER_DOMAIN_PIPE_A) |		\
 	BIT_ULL(POWER_DOMAIN_PIPE_B) |		\
@@ -4018,7 +4039,7 @@ void intel_power_domains_fini_hw(struct drm_i915_private *dev_priv)
 
 	/* Remove the refcount we took to keep power well support disabled. */
 	if (!i915_modparams.disable_power_well)
-		intel_display_power_put(dev_priv, POWER_DOMAIN_INIT);
+		intel_display_power_put_unchecked(dev_priv, POWER_DOMAIN_INIT);
 
 	intel_power_domains_verify_state(dev_priv);
 }
@@ -4037,7 +4058,7 @@ void intel_power_domains_fini_hw(struct drm_i915_private *dev_priv)
  */
 void intel_power_domains_enable(struct drm_i915_private *dev_priv)
 {
-	intel_display_power_put(dev_priv, POWER_DOMAIN_INIT);
+	intel_display_power_put_unchecked(dev_priv, POWER_DOMAIN_INIT);
 
 	intel_power_domains_verify_state(dev_priv);
 }
@@ -4072,7 +4093,7 @@ void intel_power_domains_suspend(struct drm_i915_private *dev_priv,
 {
 	struct i915_power_domains *power_domains = &dev_priv->power_domains;
 
-	intel_display_power_put(dev_priv, POWER_DOMAIN_INIT);
+	intel_display_power_put_unchecked(dev_priv, POWER_DOMAIN_INIT);
 
 	/*
 	 * In case of suspend-to-idle (aka S0ix) on a DMC platform without DC9
@@ -4093,7 +4114,7 @@ void intel_power_domains_suspend(struct drm_i915_private *dev_priv,
 	 * power wells if power domains must be deinitialized for suspend.
 	 */
 	if (!i915_modparams.disable_power_well) {
-		intel_display_power_put(dev_priv, POWER_DOMAIN_INIT);
+		intel_display_power_put_unchecked(dev_priv, POWER_DOMAIN_INIT);
 		intel_power_domains_verify_state(dev_priv);
 	}
 
diff --git a/drivers/gpu/drm/i915/intel_sprite.c b/drivers/gpu/drm/i915/intel_sprite.c
index f70d2c607902..d4c327390fdb 100644
--- a/drivers/gpu/drm/i915/intel_sprite.c
+++ b/drivers/gpu/drm/i915/intel_sprite.c
@@ -619,17 +619,19 @@ skl_plane_get_hw_state(struct intel_plane *plane,
 	struct drm_i915_private *dev_priv = to_i915(plane->base.dev);
 	enum intel_display_power_domain power_domain;
 	enum plane_id plane_id = plane->id;
+	intel_wakeref_t wakeref;
 	bool ret;
 
 	power_domain = POWER_DOMAIN_PIPE(plane->pipe);
-	if (!intel_display_power_get_if_enabled(dev_priv, power_domain))
+	wakeref = intel_display_power_get_if_enabled(dev_priv, power_domain);
+	if (!wakeref)
 		return false;
 
 	ret = I915_READ(PLANE_CTL(plane->pipe, plane_id)) & PLANE_CTL_ENABLE;
 
 	*pipe = plane->pipe;
 
-	intel_display_power_put(dev_priv, power_domain);
+	intel_display_power_put(dev_priv, power_domain, wakeref);
 
 	return ret;
 }
@@ -883,17 +885,19 @@ vlv_plane_get_hw_state(struct intel_plane *plane,
 	struct drm_i915_private *dev_priv = to_i915(plane->base.dev);
 	enum intel_display_power_domain power_domain;
 	enum plane_id plane_id = plane->id;
+	intel_wakeref_t wakeref;
 	bool ret;
 
 	power_domain = POWER_DOMAIN_PIPE(plane->pipe);
-	if (!intel_display_power_get_if_enabled(dev_priv, power_domain))
+	wakeref = intel_display_power_get_if_enabled(dev_priv, power_domain);
+	if (!wakeref)
 		return false;
 
 	ret = I915_READ(SPCNTR(plane->pipe, plane_id)) & SP_ENABLE;
 
 	*pipe = plane->pipe;
 
-	intel_display_power_put(dev_priv, power_domain);
+	intel_display_power_put(dev_priv, power_domain, wakeref);
 
 	return ret;
 }
@@ -1052,17 +1056,19 @@ ivb_plane_get_hw_state(struct intel_plane *plane,
 {
 	struct drm_i915_private *dev_priv = to_i915(plane->base.dev);
 	enum intel_display_power_domain power_domain;
+	intel_wakeref_t wakeref;
 	bool ret;
 
 	power_domain = POWER_DOMAIN_PIPE(plane->pipe);
-	if (!intel_display_power_get_if_enabled(dev_priv, power_domain))
+	wakeref = intel_display_power_get_if_enabled(dev_priv, power_domain);
+	if (!wakeref)
 		return false;
 
 	ret =  I915_READ(SPRCTL(plane->pipe)) & SPRITE_ENABLE;
 
 	*pipe = plane->pipe;
 
-	intel_display_power_put(dev_priv, power_domain);
+	intel_display_power_put(dev_priv, power_domain, wakeref);
 
 	return ret;
 }
@@ -1218,17 +1224,19 @@ g4x_plane_get_hw_state(struct intel_plane *plane,
 {
 	struct drm_i915_private *dev_priv = to_i915(plane->base.dev);
 	enum intel_display_power_domain power_domain;
+	intel_wakeref_t wakeref;
 	bool ret;
 
 	power_domain = POWER_DOMAIN_PIPE(plane->pipe);
-	if (!intel_display_power_get_if_enabled(dev_priv, power_domain))
+	wakeref = intel_display_power_get_if_enabled(dev_priv, power_domain);
+	if (!wakeref)
 		return false;
 
 	ret = I915_READ(DVSCNTR(plane->pipe)) & DVS_ENABLE;
 
 	*pipe = plane->pipe;
 
-	intel_display_power_put(dev_priv, power_domain);
+	intel_display_power_put(dev_priv, power_domain, wakeref);
 
 	return ret;
 }
diff --git a/drivers/gpu/drm/i915/intel_vdsc.c b/drivers/gpu/drm/i915/intel_vdsc.c
index c56ba0e04044..0754eb5f4ae7 100644
--- a/drivers/gpu/drm/i915/intel_vdsc.c
+++ b/drivers/gpu/drm/i915/intel_vdsc.c
@@ -1083,6 +1083,6 @@ void intel_dsc_disable(const struct intel_crtc_state *old_crtc_state)
 	I915_WRITE(dss_ctl2_reg, dss_ctl2_val);
 
 	/* Disable Power wells for VDSC/joining */
-	intel_display_power_put(dev_priv,
-				intel_dsc_power_domain(old_crtc_state));
+	intel_display_power_put_unchecked(dev_priv,
+					  intel_dsc_power_domain(old_crtc_state));
 }
diff --git a/drivers/gpu/drm/i915/vlv_dsi.c b/drivers/gpu/drm/i915/vlv_dsi.c
index 361e962a7969..397da55de28c 100644
--- a/drivers/gpu/drm/i915/vlv_dsi.c
+++ b/drivers/gpu/drm/i915/vlv_dsi.c
@@ -960,13 +960,15 @@ static bool intel_dsi_get_hw_state(struct intel_encoder *encoder,
 {
 	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
 	struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base);
+	intel_wakeref_t wakeref;
 	enum port port;
 	bool active = false;
 
 	DRM_DEBUG_KMS("\n");
 
-	if (!intel_display_power_get_if_enabled(dev_priv,
-						encoder->power_domain))
+	wakeref = intel_display_power_get_if_enabled(dev_priv,
+						     encoder->power_domain);
+	if (!wakeref)
 		return false;
 
 	/*
@@ -1022,7 +1024,7 @@ static bool intel_dsi_get_hw_state(struct intel_encoder *encoder,
 	}
 
 out_put_power:
-	intel_display_power_put(dev_priv, encoder->power_domain);
+	intel_display_power_put(dev_priv, encoder->power_domain, wakeref);
 
 	return active;
 }
@@ -1575,6 +1577,7 @@ vlv_dsi_get_hw_panel_orientation(struct intel_connector *connector)
 	enum drm_panel_orientation orientation;
 	struct intel_plane *plane;
 	struct intel_crtc *crtc;
+	intel_wakeref_t wakeref;
 	enum pipe pipe;
 	u32 val;
 
@@ -1585,7 +1588,8 @@ vlv_dsi_get_hw_panel_orientation(struct intel_connector *connector)
 	plane = to_intel_plane(crtc->base.primary);
 
 	power_domain = POWER_DOMAIN_PIPE(pipe);
-	if (!intel_display_power_get_if_enabled(dev_priv, power_domain))
+	wakeref = intel_display_power_get_if_enabled(dev_priv, power_domain);
+	if (!wakeref)
 		return DRM_MODE_PANEL_ORIENTATION_UNKNOWN;
 
 	val = I915_READ(DSPCNTR(plane->i9xx_plane));
@@ -1597,7 +1601,7 @@ vlv_dsi_get_hw_panel_orientation(struct intel_connector *connector)
 	else
 		orientation = DRM_MODE_PANEL_ORIENTATION_NORMAL;
 
-	intel_display_power_put(dev_priv, power_domain);
+	intel_display_power_put(dev_priv, power_domain, wakeref);
 
 	return orientation;
 }
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 111+ messages in thread

* [PATCH 19/46] drm/i915: Track the wakeref used to initialise display power domains
  2019-01-07 11:54 [PATCH 01/46] drm/i915: Return immediately if trylock fails for direct-reclaim Chris Wilson
                   ` (16 preceding siblings ...)
  2019-01-07 11:54 ` [PATCH 18/46] drm/i915: Markup paired operations on display power domains Chris Wilson
@ 2019-01-07 11:54 ` Chris Wilson
  2019-01-07 11:54 ` [PATCH 20/46] drm/i915: Combined gt.awake/gt.power wakerefs Chris Wilson
                   ` (32 subsequent siblings)
  50 siblings, 0 replies; 111+ messages in thread
From: Chris Wilson @ 2019-01-07 11:54 UTC (permalink / raw)
  To: intel-gfx; +Cc: Jani Nikula

On module load and unload, we grab the POWER_DOMAIN_INIT powerwells and
transfer them to the runtime-pm code. We can use our wakeref tracking to
verify that the wakeref is indeed passed from init to enable, and
disable to fini; and across suspend.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Jani Nikula <jani.nikula@intel.com>
---
 drivers/gpu/drm/i915/i915_debugfs.c     |   3 +
 drivers/gpu/drm/i915/i915_drv.h         |   2 +
 drivers/gpu/drm/i915/intel_runtime_pm.c | 151 +++++++++++++-----------
 3 files changed, 88 insertions(+), 68 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index f11e5dc41f17..b7be5eb5c062 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -2699,6 +2699,9 @@ static int i915_runtime_pm_status(struct seq_file *m, void *unused)
 	if (!HAS_RUNTIME_PM(dev_priv))
 		seq_puts(m, "Runtime power management not supported\n");
 
+	seq_printf(m, "Runtime power management: %s\n",
+		   enableddisabled(!dev_priv->power_domains.wakeref));
+
 	seq_printf(m, "GPU idle: %s (epoch %u)\n",
 		   yesno(!dev_priv->gt.awake), dev_priv->gt.epoch);
 	seq_printf(m, "IRQs disabled: %s\n",
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 7f688f63cbaf..81a5dad712d7 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -822,6 +822,8 @@ struct i915_power_domains {
 	bool display_core_suspended;
 	int power_well_count;
 
+	intel_wakeref_t wakeref;
+
 	struct mutex lock;
 	int domain_use_count[POWER_DOMAIN_NUM];
 	struct i915_power_well *power_wells;
diff --git a/drivers/gpu/drm/i915/intel_runtime_pm.c b/drivers/gpu/drm/i915/intel_runtime_pm.c
index 60bd310f8358..9c971feaa459 100644
--- a/drivers/gpu/drm/i915/intel_runtime_pm.c
+++ b/drivers/gpu/drm/i915/intel_runtime_pm.c
@@ -3967,7 +3967,7 @@ static void intel_power_domains_verify_state(struct drm_i915_private *dev_priv);
 
 /**
  * intel_power_domains_init_hw - initialize hardware power domain state
- * @dev_priv: i915 device instance
+ * @i915: i915 device instance
  * @resume: Called from resume code paths or not
  *
  * This function initializes the hardware power domain state and enables all
@@ -3981,30 +3981,31 @@ static void intel_power_domains_verify_state(struct drm_i915_private *dev_priv);
  * intel_power_domains_enable()) and must be paired with
  * intel_power_domains_fini_hw().
  */
-void intel_power_domains_init_hw(struct drm_i915_private *dev_priv, bool resume)
+void intel_power_domains_init_hw(struct drm_i915_private *i915, bool resume)
 {
-	struct i915_power_domains *power_domains = &dev_priv->power_domains;
+	struct i915_power_domains *power_domains = &i915->power_domains;
 
 	power_domains->initializing = true;
 
-	if (IS_ICELAKE(dev_priv)) {
-		icl_display_core_init(dev_priv, resume);
-	} else if (IS_CANNONLAKE(dev_priv)) {
-		cnl_display_core_init(dev_priv, resume);
-	} else if (IS_GEN9_BC(dev_priv)) {
-		skl_display_core_init(dev_priv, resume);
-	} else if (IS_GEN9_LP(dev_priv)) {
-		bxt_display_core_init(dev_priv, resume);
-	} else if (IS_CHERRYVIEW(dev_priv)) {
+	if (IS_ICELAKE(i915)) {
+		icl_display_core_init(i915, resume);
+	} else if (IS_CANNONLAKE(i915)) {
+		cnl_display_core_init(i915, resume);
+	} else if (IS_GEN9_BC(i915)) {
+		skl_display_core_init(i915, resume);
+	} else if (IS_GEN9_LP(i915)) {
+		bxt_display_core_init(i915, resume);
+	} else if (IS_CHERRYVIEW(i915)) {
 		mutex_lock(&power_domains->lock);
-		chv_phy_control_init(dev_priv);
+		chv_phy_control_init(i915);
 		mutex_unlock(&power_domains->lock);
-	} else if (IS_VALLEYVIEW(dev_priv)) {
+	} else if (IS_VALLEYVIEW(i915)) {
 		mutex_lock(&power_domains->lock);
-		vlv_cmnlane_wa(dev_priv);
+		vlv_cmnlane_wa(i915);
 		mutex_unlock(&power_domains->lock);
-	} else if (IS_IVYBRIDGE(dev_priv) || INTEL_GEN(dev_priv) >= 7)
-		intel_pch_reset_handshake(dev_priv, !HAS_PCH_NOP(dev_priv));
+	} else if (IS_IVYBRIDGE(i915) || INTEL_GEN(i915) >= 7) {
+		intel_pch_reset_handshake(i915, !HAS_PCH_NOP(i915));
+	}
 
 	/*
 	 * Keep all power wells enabled for any dependent HW access during
@@ -4012,18 +4013,20 @@ void intel_power_domains_init_hw(struct drm_i915_private *dev_priv, bool resume)
 	 * resources powered until display HW readout is complete. We drop
 	 * this reference in intel_power_domains_enable().
 	 */
-	intel_display_power_get(dev_priv, POWER_DOMAIN_INIT);
+	power_domains->wakeref =
+		intel_display_power_get(i915, POWER_DOMAIN_INIT);
+
 	/* Disable power support if the user asked so. */
 	if (!i915_modparams.disable_power_well)
-		intel_display_power_get(dev_priv, POWER_DOMAIN_INIT);
-	intel_power_domains_sync_hw(dev_priv);
+		intel_display_power_get(i915, POWER_DOMAIN_INIT);
+	intel_power_domains_sync_hw(i915);
 
 	power_domains->initializing = false;
 }
 
 /**
  * intel_power_domains_fini_hw - deinitialize hw power domain state
- * @dev_priv: i915 device instance
+ * @i915: i915 device instance
  *
  * De-initializes the display power domain HW state. It also ensures that the
  * device stays powered up so that the driver can be reloaded.
@@ -4032,21 +4035,24 @@ void intel_power_domains_init_hw(struct drm_i915_private *dev_priv, bool resume)
  * intel_power_domains_disable()) and must be paired with
  * intel_power_domains_init_hw().
  */
-void intel_power_domains_fini_hw(struct drm_i915_private *dev_priv)
+void intel_power_domains_fini_hw(struct drm_i915_private *i915)
 {
-	/* Keep the power well enabled, but cancel its rpm wakeref. */
-	intel_runtime_pm_put_unchecked(dev_priv);
+	intel_wakeref_t wakeref __maybe_unused =
+		fetch_and_zero(&i915->power_domains.wakeref);
 
 	/* Remove the refcount we took to keep power well support disabled. */
 	if (!i915_modparams.disable_power_well)
-		intel_display_power_put_unchecked(dev_priv, POWER_DOMAIN_INIT);
+		intel_display_power_put_unchecked(i915, POWER_DOMAIN_INIT);
+
+	intel_power_domains_verify_state(i915);
 
-	intel_power_domains_verify_state(dev_priv);
+	/* Keep the power well enabled, but cancel its rpm wakeref. */
+	intel_runtime_pm_put(i915, wakeref);
 }
 
 /**
  * intel_power_domains_enable - enable toggling of display power wells
- * @dev_priv: i915 device instance
+ * @i915: i915 device instance
  *
  * Enable the ondemand enabling/disabling of the display power wells. Note that
  * power wells not belonging to POWER_DOMAIN_INIT are allowed to be toggled
@@ -4056,30 +4062,36 @@ void intel_power_domains_fini_hw(struct drm_i915_private *dev_priv)
  * of display HW readout (which will acquire the power references reflecting
  * the current HW state).
  */
-void intel_power_domains_enable(struct drm_i915_private *dev_priv)
+void intel_power_domains_enable(struct drm_i915_private *i915)
 {
-	intel_display_power_put_unchecked(dev_priv, POWER_DOMAIN_INIT);
+	intel_wakeref_t wakeref __maybe_unused =
+		fetch_and_zero(&i915->power_domains.wakeref);
 
-	intel_power_domains_verify_state(dev_priv);
+	intel_display_power_put(i915, POWER_DOMAIN_INIT, wakeref);
+	intel_power_domains_verify_state(i915);
 }
 
 /**
  * intel_power_domains_disable - disable toggling of display power wells
- * @dev_priv: i915 device instance
+ * @i915: i915 device instance
  *
  * Disable the ondemand enabling/disabling of the display power wells. See
  * intel_power_domains_enable() for which power wells this call controls.
  */
-void intel_power_domains_disable(struct drm_i915_private *dev_priv)
+void intel_power_domains_disable(struct drm_i915_private *i915)
 {
-	intel_display_power_get(dev_priv, POWER_DOMAIN_INIT);
+	struct i915_power_domains *power_domains = &i915->power_domains;
 
-	intel_power_domains_verify_state(dev_priv);
+	WARN_ON(power_domains->wakeref);
+	power_domains->wakeref =
+		intel_display_power_get(i915, POWER_DOMAIN_INIT);
+
+	intel_power_domains_verify_state(i915);
 }
 
 /**
  * intel_power_domains_suspend - suspend power domain state
- * @dev_priv: i915 device instance
+ * @i915: i915 device instance
  * @suspend_mode: specifies the target suspend state (idle, mem, hibernation)
  *
  * This function prepares the hardware power domain state before entering
@@ -4088,12 +4100,14 @@ void intel_power_domains_disable(struct drm_i915_private *dev_priv)
  * It must be called with power domains already disabled (after a call to
  * intel_power_domains_disable()) and paired with intel_power_domains_resume().
  */
-void intel_power_domains_suspend(struct drm_i915_private *dev_priv,
+void intel_power_domains_suspend(struct drm_i915_private *i915,
 				 enum i915_drm_suspend_mode suspend_mode)
 {
-	struct i915_power_domains *power_domains = &dev_priv->power_domains;
+	struct i915_power_domains *power_domains = &i915->power_domains;
+	intel_wakeref_t wakeref __maybe_unused =
+		fetch_and_zero(&power_domains->wakeref);
 
-	intel_display_power_put_unchecked(dev_priv, POWER_DOMAIN_INIT);
+	intel_display_power_put(i915, POWER_DOMAIN_INIT, wakeref);
 
 	/*
 	 * In case of suspend-to-idle (aka S0ix) on a DMC platform without DC9
@@ -4102,10 +4116,10 @@ void intel_power_domains_suspend(struct drm_i915_private *dev_priv,
 	 * resources as required and also enable deeper system power states
 	 * that would be blocked if the firmware was inactive.
 	 */
-	if (!(dev_priv->csr.allowed_dc_mask & DC_STATE_EN_DC9) &&
+	if (!(i915->csr.allowed_dc_mask & DC_STATE_EN_DC9) &&
 	    suspend_mode == I915_DRM_SUSPEND_IDLE &&
-	    dev_priv->csr.dmc_payload != NULL) {
-		intel_power_domains_verify_state(dev_priv);
+	    i915->csr.dmc_payload) {
+		intel_power_domains_verify_state(i915);
 		return;
 	}
 
@@ -4114,25 +4128,25 @@ void intel_power_domains_suspend(struct drm_i915_private *dev_priv,
 	 * power wells if power domains must be deinitialized for suspend.
 	 */
 	if (!i915_modparams.disable_power_well) {
-		intel_display_power_put_unchecked(dev_priv, POWER_DOMAIN_INIT);
-		intel_power_domains_verify_state(dev_priv);
+		intel_display_power_put_unchecked(i915, POWER_DOMAIN_INIT);
+		intel_power_domains_verify_state(i915);
 	}
 
-	if (IS_ICELAKE(dev_priv))
-		icl_display_core_uninit(dev_priv);
-	else if (IS_CANNONLAKE(dev_priv))
-		cnl_display_core_uninit(dev_priv);
-	else if (IS_GEN9_BC(dev_priv))
-		skl_display_core_uninit(dev_priv);
-	else if (IS_GEN9_LP(dev_priv))
-		bxt_display_core_uninit(dev_priv);
+	if (IS_ICELAKE(i915))
+		icl_display_core_uninit(i915);
+	else if (IS_CANNONLAKE(i915))
+		cnl_display_core_uninit(i915);
+	else if (IS_GEN9_BC(i915))
+		skl_display_core_uninit(i915);
+	else if (IS_GEN9_LP(i915))
+		bxt_display_core_uninit(i915);
 
 	power_domains->display_core_suspended = true;
 }
 
 /**
  * intel_power_domains_resume - resume power domain state
- * @dev_priv: i915 device instance
+ * @i915: i915 device instance
  *
  * This function resume the hardware power domain state during system resume.
  *
@@ -4140,28 +4154,30 @@ void intel_power_domains_suspend(struct drm_i915_private *dev_priv,
  * intel_power_domains_enable()) and must be paired with
  * intel_power_domains_suspend().
  */
-void intel_power_domains_resume(struct drm_i915_private *dev_priv)
+void intel_power_domains_resume(struct drm_i915_private *i915)
 {
-	struct i915_power_domains *power_domains = &dev_priv->power_domains;
+	struct i915_power_domains *power_domains = &i915->power_domains;
 
 	if (power_domains->display_core_suspended) {
-		intel_power_domains_init_hw(dev_priv, true);
+		intel_power_domains_init_hw(i915, true);
 		power_domains->display_core_suspended = false;
 	} else {
-		intel_display_power_get(dev_priv, POWER_DOMAIN_INIT);
+		WARN_ON(power_domains->wakeref);
+		power_domains->wakeref =
+			intel_display_power_get(i915, POWER_DOMAIN_INIT);
 	}
 
-	intel_power_domains_verify_state(dev_priv);
+	intel_power_domains_verify_state(i915);
 }
 
 #if IS_ENABLED(CONFIG_DRM_I915_DEBUG_RUNTIME_PM)
 
-static void intel_power_domains_dump_info(struct drm_i915_private *dev_priv)
+static void intel_power_domains_dump_info(struct drm_i915_private *i915)
 {
-	struct i915_power_domains *power_domains = &dev_priv->power_domains;
+	struct i915_power_domains *power_domains = &i915->power_domains;
 	struct i915_power_well *power_well;
 
-	for_each_power_well(dev_priv, power_well) {
+	for_each_power_well(i915, power_well) {
 		enum intel_display_power_domain domain;
 
 		DRM_DEBUG_DRIVER("%-25s %d\n",
@@ -4176,7 +4192,7 @@ static void intel_power_domains_dump_info(struct drm_i915_private *dev_priv)
 
 /**
  * intel_power_domains_verify_state - verify the HW/SW state for all power wells
- * @dev_priv: i915 device instance
+ * @i915: i915 device instance
  *
  * Verify if the reference count of each power well matches its HW enabled
  * state and the total refcount of the domains it belongs to. This must be
@@ -4184,22 +4200,21 @@ static void intel_power_domains_dump_info(struct drm_i915_private *dev_priv)
  * acquiring reference counts for any power wells in use and disabling the
  * ones left on by BIOS but not required by any active output.
  */
-static void intel_power_domains_verify_state(struct drm_i915_private *dev_priv)
+static void intel_power_domains_verify_state(struct drm_i915_private *i915)
 {
-	struct i915_power_domains *power_domains = &dev_priv->power_domains;
+	struct i915_power_domains *power_domains = &i915->power_domains;
 	struct i915_power_well *power_well;
 	bool dump_domain_info;
 
 	mutex_lock(&power_domains->lock);
 
 	dump_domain_info = false;
-	for_each_power_well(dev_priv, power_well) {
+	for_each_power_well(i915, power_well) {
 		enum intel_display_power_domain domain;
 		int domains_count;
 		bool enabled;
 
-		enabled = power_well->desc->ops->is_enabled(dev_priv,
-							    power_well);
+		enabled = power_well->desc->ops->is_enabled(i915, power_well);
 		if ((power_well->count || power_well->desc->always_on) !=
 		    enabled)
 			DRM_ERROR("power well %s state mismatch (refcount %d/enabled %d)",
@@ -4223,7 +4238,7 @@ static void intel_power_domains_verify_state(struct drm_i915_private *dev_priv)
 		static bool dumped;
 
 		if (!dumped) {
-			intel_power_domains_dump_info(dev_priv);
+			intel_power_domains_dump_info(i915);
 			dumped = true;
 		}
 	}
@@ -4233,7 +4248,7 @@ static void intel_power_domains_verify_state(struct drm_i915_private *dev_priv)
 
 #else
 
-static void intel_power_domains_verify_state(struct drm_i915_private *dev_priv)
+static void intel_power_domains_verify_state(struct drm_i915_private *i915)
 {
 }
 
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 111+ messages in thread

* [PATCH 20/46] drm/i915: Combined gt.awake/gt.power wakerefs
  2019-01-07 11:54 [PATCH 01/46] drm/i915: Return immediately if trylock fails for direct-reclaim Chris Wilson
                   ` (17 preceding siblings ...)
  2019-01-07 11:54 ` [PATCH 19/46] drm/i915: Track the wakeref used to initialise " Chris Wilson
@ 2019-01-07 11:54 ` Chris Wilson
  2019-01-07 11:54 ` [PATCH 21/46] drm/i915/dp: Markup pps lock power well Chris Wilson
                   ` (31 subsequent siblings)
  50 siblings, 0 replies; 111+ messages in thread
From: Chris Wilson @ 2019-01-07 11:54 UTC (permalink / raw)
  To: intel-gfx; +Cc: Jani Nikula

As the GT_IRQ power domain implies a wakeref, we can use it inplace of
our existing redundant rpm grab.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Jani Nikula <jani.nikula@intel.com>
---
 drivers/gpu/drm/i915/i915_drv.h                  |  1 -
 drivers/gpu/drm/i915/i915_gem.c                  | 11 ++++-------
 drivers/gpu/drm/i915/intel_lrc.c                 |  2 +-
 drivers/gpu/drm/i915/selftests/mock_gem_device.c |  1 +
 4 files changed, 6 insertions(+), 9 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 81a5dad712d7..e3c51c40dc9d 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1972,7 +1972,6 @@ struct drm_i915_private {
 		 * is a slight delay before we do so.
 		 */
 		intel_wakeref_t awake;
-		intel_wakeref_t power;
 
 		/**
 		 * The number of times we have woken up.
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 7e4db2cb8501..a3dd5bbd6700 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -177,9 +177,7 @@ static u32 __i915_gem_park(struct drm_i915_private *i915)
 	if (INTEL_GEN(i915) >= 6)
 		gen6_rps_idle(i915);
 
-	intel_display_power_put(i915, POWER_DOMAIN_GT_IRQ, i915->gt.power);
-
-	intel_runtime_pm_put(i915, wakeref);
+	intel_display_power_put(i915, POWER_DOMAIN_GT_IRQ, wakeref);
 
 	return i915->gt.epoch;
 }
@@ -204,13 +202,11 @@ void i915_gem_unpark(struct drm_i915_private *i915)
 
 	lockdep_assert_held(&i915->drm.struct_mutex);
 	GEM_BUG_ON(!i915->gt.active_requests);
+	assert_rpm_wakelock_held(i915);
 
 	if (i915->gt.awake)
 		return;
 
-	i915->gt.awake = intel_runtime_pm_get_noresume(i915);
-	GEM_BUG_ON(!i915->gt.awake);
-
 	/*
 	 * It seems that the DMC likes to transition between the DC states a lot
 	 * when there are no connected displays (no active power domains) during
@@ -222,7 +218,8 @@ void i915_gem_unpark(struct drm_i915_private *i915)
 	 * Work around it by grabbing a GT IRQ power domain whilst there is any
 	 * GT activity, preventing any DC state transitions.
 	 */
-	i915->gt.power = intel_display_power_get(i915, POWER_DOMAIN_GT_IRQ);
+	i915->gt.awake = intel_display_power_get(i915, POWER_DOMAIN_GT_IRQ);
+	GEM_BUG_ON(!i915->gt.awake);
 
 	if (unlikely(++i915->gt.epoch == 0)) /* keep 0 as invalid */
 		i915->gt.epoch = 1;
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 6c98fb7cebf2..644aa9251307 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -1046,7 +1046,7 @@ static void execlists_submission_tasklet(unsigned long data)
 
 	GEM_TRACE("%s awake?=%d, active=%x\n",
 		  engine->name,
-		  engine->i915->gt.awake,
+		  !!engine->i915->gt.awake,
 		  engine->execlists.active);
 
 	spin_lock_irqsave(&engine->timeline.lock, flags);
diff --git a/drivers/gpu/drm/i915/selftests/mock_gem_device.c b/drivers/gpu/drm/i915/selftests/mock_gem_device.c
index 0eb283e7fc96..aa4ddae94aca 100644
--- a/drivers/gpu/drm/i915/selftests/mock_gem_device.c
+++ b/drivers/gpu/drm/i915/selftests/mock_gem_device.c
@@ -164,6 +164,7 @@ struct drm_i915_private *mock_gem_device(void)
 	pm_runtime_dont_use_autosuspend(&pdev->dev);
 	if (pm_runtime_enabled(&pdev->dev))
 		WARN_ON(pm_runtime_get_sync(&pdev->dev));
+	disable_rpm_wakeref_asserts(i915);
 
 	err = drm_dev_init(&i915->drm, &mock_driver, &pdev->dev);
 	if (err) {
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 111+ messages in thread

* [PATCH 21/46] drm/i915/dp: Markup pps lock power well
  2019-01-07 11:54 [PATCH 01/46] drm/i915: Return immediately if trylock fails for direct-reclaim Chris Wilson
                   ` (18 preceding siblings ...)
  2019-01-07 11:54 ` [PATCH 20/46] drm/i915: Combined gt.awake/gt.power wakerefs Chris Wilson
@ 2019-01-07 11:54 ` Chris Wilson
  2019-01-07 11:54 ` [PATCH 22/46] drm/i915: Complain if hsw_get_pipe_config acquires the same power well twice Chris Wilson
                   ` (30 subsequent siblings)
  50 siblings, 0 replies; 111+ messages in thread
From: Chris Wilson @ 2019-01-07 11:54 UTC (permalink / raw)
  To: intel-gfx; +Cc: Jani Nikula

Track where and when we acquire and release the power well for pps
access along the dp aux link, with a view to detecting if we leak any
wakerefs.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Jani Nikula <jani.nikula@intel.com>
---
 drivers/gpu/drm/i915/intel_dp.c | 231 +++++++++++++++++---------------
 1 file changed, 121 insertions(+), 110 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c
index e8031daacee2..25c101e7f992 100644
--- a/drivers/gpu/drm/i915/intel_dp.c
+++ b/drivers/gpu/drm/i915/intel_dp.c
@@ -602,30 +602,39 @@ intel_dp_init_panel_power_sequencer_registers(struct intel_dp *intel_dp,
 static void
 intel_dp_pps_init(struct intel_dp *intel_dp);
 
-static void pps_lock(struct intel_dp *intel_dp)
+static intel_wakeref_t
+pps_lock(struct intel_dp *intel_dp)
 {
 	struct drm_i915_private *dev_priv = dp_to_i915(intel_dp);
+	intel_wakeref_t wakeref;
 
 	/*
 	 * See intel_power_sequencer_reset() why we need
 	 * a power domain reference here.
 	 */
-	intel_display_power_get(dev_priv,
-				intel_aux_power_domain(dp_to_dig_port(intel_dp)));
+	wakeref = intel_display_power_get(dev_priv,
+					  intel_aux_power_domain(dp_to_dig_port(intel_dp)));
 
 	mutex_lock(&dev_priv->pps_mutex);
+
+	return wakeref;
 }
 
-static void pps_unlock(struct intel_dp *intel_dp)
+static intel_wakeref_t
+pps_unlock(struct intel_dp *intel_dp, intel_wakeref_t wakeref)
 {
 	struct drm_i915_private *dev_priv = dp_to_i915(intel_dp);
 
 	mutex_unlock(&dev_priv->pps_mutex);
-
-	intel_display_power_put_unchecked(dev_priv,
-					  intel_aux_power_domain(dp_to_dig_port(intel_dp)));
+	intel_display_power_put(dev_priv,
+				intel_aux_power_domain(dp_to_dig_port(intel_dp)),
+				wakeref);
+	return 0;
 }
 
+#define with_pps_lock(dp, wf) \
+	for (wf = pps_lock(dp); wf; wf = pps_unlock(dp, wf))
+
 static void
 vlv_power_sequencer_kick(struct intel_dp *intel_dp)
 {
@@ -974,30 +983,30 @@ static int edp_notify_handler(struct notifier_block *this, unsigned long code,
 	struct intel_dp *intel_dp = container_of(this, typeof(* intel_dp),
 						 edp_notifier);
 	struct drm_i915_private *dev_priv = dp_to_i915(intel_dp);
+	intel_wakeref_t wakeref;
 
 	if (!intel_dp_is_edp(intel_dp) || code != SYS_RESTART)
 		return 0;
 
-	pps_lock(intel_dp);
-
-	if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) {
-		enum pipe pipe = vlv_power_sequencer_pipe(intel_dp);
-		i915_reg_t pp_ctrl_reg, pp_div_reg;
-		u32 pp_div;
-
-		pp_ctrl_reg = PP_CONTROL(pipe);
-		pp_div_reg  = PP_DIVISOR(pipe);
-		pp_div = I915_READ(pp_div_reg);
-		pp_div &= PP_REFERENCE_DIVIDER_MASK;
-
-		/* 0x1F write to PP_DIV_REG sets max cycle delay */
-		I915_WRITE(pp_div_reg, pp_div | 0x1F);
-		I915_WRITE(pp_ctrl_reg, PANEL_UNLOCK_REGS | PANEL_POWER_OFF);
-		msleep(intel_dp->panel_power_cycle_delay);
+	with_pps_lock(intel_dp, wakeref) {
+		if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) {
+			enum pipe pipe = vlv_power_sequencer_pipe(intel_dp);
+			i915_reg_t pp_ctrl_reg, pp_div_reg;
+			u32 pp_div;
+
+			pp_ctrl_reg = PP_CONTROL(pipe);
+			pp_div_reg  = PP_DIVISOR(pipe);
+			pp_div = I915_READ(pp_div_reg);
+			pp_div &= PP_REFERENCE_DIVIDER_MASK;
+
+			/* 0x1F write to PP_DIV_REG sets max cycle delay */
+			I915_WRITE(pp_div_reg, pp_div | 0x1F);
+			I915_WRITE(pp_ctrl_reg,
+				   PANEL_UNLOCK_REGS | PANEL_POWER_OFF);
+			msleep(intel_dp->panel_power_cycle_delay);
+		}
 	}
 
-	pps_unlock(intel_dp);
-
 	return 0;
 }
 
@@ -1185,16 +1194,17 @@ intel_dp_aux_xfer(struct intel_dp *intel_dp,
 			to_i915(intel_dig_port->base.base.dev);
 	i915_reg_t ch_ctl, ch_data[5];
 	uint32_t aux_clock_divider;
+	intel_wakeref_t wakeref;
 	int i, ret, recv_bytes;
-	uint32_t status;
 	int try, clock = 0;
+	uint32_t status;
 	bool vdd;
 
 	ch_ctl = intel_dp->aux_ch_ctl_reg(intel_dp);
 	for (i = 0; i < ARRAY_SIZE(ch_data); i++)
 		ch_data[i] = intel_dp->aux_ch_data_reg(intel_dp, i);
 
-	pps_lock(intel_dp);
+	wakeref = pps_lock(intel_dp);
 
 	/*
 	 * We will be called with VDD already enabled for dpcd/edid/oui reads.
@@ -1338,7 +1348,7 @@ intel_dp_aux_xfer(struct intel_dp *intel_dp,
 	if (vdd)
 		edp_panel_vdd_off(intel_dp, false);
 
-	pps_unlock(intel_dp);
+	pps_unlock(intel_dp, wakeref);
 
 	return ret;
 }
@@ -2465,15 +2475,15 @@ static bool edp_panel_vdd_on(struct intel_dp *intel_dp)
  */
 void intel_edp_panel_vdd_on(struct intel_dp *intel_dp)
 {
+	intel_wakeref_t wakeref;
 	bool vdd;
 
 	if (!intel_dp_is_edp(intel_dp))
 		return;
 
-	pps_lock(intel_dp);
-	vdd = edp_panel_vdd_on(intel_dp);
-	pps_unlock(intel_dp);
-
+	vdd = false;
+	with_pps_lock(intel_dp, wakeref)
+		vdd = edp_panel_vdd_on(intel_dp);
 	I915_STATE_WARN(!vdd, "eDP port %c VDD already requested on\n",
 	     port_name(dp_to_dig_port(intel_dp)->base.port));
 }
@@ -2518,13 +2528,15 @@ static void edp_panel_vdd_off_sync(struct intel_dp *intel_dp)
 
 static void edp_panel_vdd_work(struct work_struct *__work)
 {
-	struct intel_dp *intel_dp = container_of(to_delayed_work(__work),
-						 struct intel_dp, panel_vdd_work);
+	struct intel_dp *intel_dp =
+		container_of(to_delayed_work(__work),
+			     struct intel_dp, panel_vdd_work);
+	intel_wakeref_t wakeref;
 
-	pps_lock(intel_dp);
-	if (!intel_dp->want_panel_vdd)
-		edp_panel_vdd_off_sync(intel_dp);
-	pps_unlock(intel_dp);
+	with_pps_lock(intel_dp, wakeref) {
+		if (!intel_dp->want_panel_vdd)
+			edp_panel_vdd_off_sync(intel_dp);
+	}
 }
 
 static void edp_panel_vdd_schedule_off(struct intel_dp *intel_dp)
@@ -2614,12 +2626,13 @@ static void edp_panel_on(struct intel_dp *intel_dp)
 
 void intel_edp_panel_on(struct intel_dp *intel_dp)
 {
+	intel_wakeref_t wakeref;
+
 	if (!intel_dp_is_edp(intel_dp))
 		return;
 
-	pps_lock(intel_dp);
-	edp_panel_on(intel_dp);
-	pps_unlock(intel_dp);
+	with_pps_lock(intel_dp, wakeref)
+		edp_panel_on(intel_dp);
 }
 
 
@@ -2663,20 +2676,20 @@ static void edp_panel_off(struct intel_dp *intel_dp)
 
 void intel_edp_panel_off(struct intel_dp *intel_dp)
 {
+	intel_wakeref_t wakeref;
+
 	if (!intel_dp_is_edp(intel_dp))
 		return;
 
-	pps_lock(intel_dp);
-	edp_panel_off(intel_dp);
-	pps_unlock(intel_dp);
+	with_pps_lock(intel_dp, wakeref)
+		edp_panel_off(intel_dp);
 }
 
 /* Enable backlight in the panel power control. */
 static void _intel_edp_backlight_on(struct intel_dp *intel_dp)
 {
 	struct drm_i915_private *dev_priv = dp_to_i915(intel_dp);
-	u32 pp;
-	i915_reg_t pp_ctrl_reg;
+	intel_wakeref_t wakeref;
 
 	/*
 	 * If we enable the backlight right away following a panel power
@@ -2686,17 +2699,16 @@ static void _intel_edp_backlight_on(struct intel_dp *intel_dp)
 	 */
 	wait_backlight_on(intel_dp);
 
-	pps_lock(intel_dp);
+	with_pps_lock(intel_dp, wakeref) {
+		i915_reg_t pp_ctrl_reg = _pp_ctrl_reg(intel_dp);
+		u32 pp;
 
-	pp = ironlake_get_pp_control(intel_dp);
-	pp |= EDP_BLC_ENABLE;
-
-	pp_ctrl_reg = _pp_ctrl_reg(intel_dp);
-
-	I915_WRITE(pp_ctrl_reg, pp);
-	POSTING_READ(pp_ctrl_reg);
+		pp = ironlake_get_pp_control(intel_dp);
+		pp |= EDP_BLC_ENABLE;
 
-	pps_unlock(intel_dp);
+		I915_WRITE(pp_ctrl_reg, pp);
+		POSTING_READ(pp_ctrl_reg);
+	}
 }
 
 /* Enable backlight PWM and backlight PP control. */
@@ -2718,23 +2730,21 @@ void intel_edp_backlight_on(const struct intel_crtc_state *crtc_state,
 static void _intel_edp_backlight_off(struct intel_dp *intel_dp)
 {
 	struct drm_i915_private *dev_priv = dp_to_i915(intel_dp);
-	u32 pp;
-	i915_reg_t pp_ctrl_reg;
+	intel_wakeref_t wakeref;
 
 	if (!intel_dp_is_edp(intel_dp))
 		return;
 
-	pps_lock(intel_dp);
-
-	pp = ironlake_get_pp_control(intel_dp);
-	pp &= ~EDP_BLC_ENABLE;
-
-	pp_ctrl_reg = _pp_ctrl_reg(intel_dp);
+	with_pps_lock(intel_dp, wakeref) {
+		i915_reg_t pp_ctrl_reg = _pp_ctrl_reg(intel_dp);
+		u32 pp;
 
-	I915_WRITE(pp_ctrl_reg, pp);
-	POSTING_READ(pp_ctrl_reg);
+		pp = ironlake_get_pp_control(intel_dp);
+		pp &= ~EDP_BLC_ENABLE;
 
-	pps_unlock(intel_dp);
+		I915_WRITE(pp_ctrl_reg, pp);
+		POSTING_READ(pp_ctrl_reg);
+	}
 
 	intel_dp->last_backlight_off = jiffies;
 	edp_wait_backlight_off(intel_dp);
@@ -2762,12 +2772,12 @@ static void intel_edp_backlight_power(struct intel_connector *connector,
 				      bool enable)
 {
 	struct intel_dp *intel_dp = intel_attached_dp(&connector->base);
+	intel_wakeref_t wakeref;
 	bool is_enabled;
 
-	pps_lock(intel_dp);
-	is_enabled = ironlake_get_pp_control(intel_dp) & EDP_BLC_ENABLE;
-	pps_unlock(intel_dp);
-
+	is_enabled = false;
+	with_pps_lock(intel_dp, wakeref)
+		is_enabled = ironlake_get_pp_control(intel_dp) & EDP_BLC_ENABLE;
 	if (is_enabled == enable)
 		return;
 
@@ -3277,22 +3287,21 @@ static void intel_enable_dp(struct intel_encoder *encoder,
 	struct intel_crtc *crtc = to_intel_crtc(pipe_config->base.crtc);
 	uint32_t dp_reg = I915_READ(intel_dp->output_reg);
 	enum pipe pipe = crtc->pipe;
+	intel_wakeref_t wakeref;
 
 	if (WARN_ON(dp_reg & DP_PORT_EN))
 		return;
 
-	pps_lock(intel_dp);
-
-	if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv))
-		vlv_init_panel_power_sequencer(encoder, pipe_config);
-
-	intel_dp_enable_port(intel_dp, pipe_config);
+	with_pps_lock(intel_dp, wakeref) {
+		if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv))
+			vlv_init_panel_power_sequencer(encoder, pipe_config);
 
-	edp_panel_vdd_on(intel_dp);
-	edp_panel_on(intel_dp);
-	edp_panel_vdd_off(intel_dp, true);
+		intel_dp_enable_port(intel_dp, pipe_config);
 
-	pps_unlock(intel_dp);
+		edp_panel_vdd_on(intel_dp);
+		edp_panel_on(intel_dp);
+		edp_panel_vdd_off(intel_dp, true);
+	}
 
 	if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) {
 		unsigned int lane_mask = 0x0;
@@ -3990,9 +3999,10 @@ intel_dp_link_down(struct intel_encoder *encoder,
 	intel_dp->DP = DP;
 
 	if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) {
-		pps_lock(intel_dp);
-		intel_dp->active_pipe = INVALID_PIPE;
-		pps_unlock(intel_dp);
+		intel_wakeref_t wakeref;
+
+		with_pps_lock(intel_dp, wakeref)
+			intel_dp->active_pipe = INVALID_PIPE;
 	}
 }
 
@@ -5562,14 +5572,15 @@ void intel_dp_encoder_flush_work(struct drm_encoder *encoder)
 
 	intel_dp_mst_encoder_cleanup(intel_dig_port);
 	if (intel_dp_is_edp(intel_dp)) {
+		intel_wakeref_t wakeref;
+
 		cancel_delayed_work_sync(&intel_dp->panel_vdd_work);
 		/*
 		 * vdd might still be enabled do to the delayed vdd off.
 		 * Make sure vdd is actually turned off here.
 		 */
-		pps_lock(intel_dp);
-		edp_panel_vdd_off_sync(intel_dp);
-		pps_unlock(intel_dp);
+		with_pps_lock(intel_dp, wakeref)
+			edp_panel_vdd_off_sync(intel_dp);
 
 		if (intel_dp->edp_notifier.notifier_call) {
 			unregister_reboot_notifier(&intel_dp->edp_notifier);
@@ -5591,6 +5602,7 @@ static void intel_dp_encoder_destroy(struct drm_encoder *encoder)
 void intel_dp_encoder_suspend(struct intel_encoder *intel_encoder)
 {
 	struct intel_dp *intel_dp = enc_to_intel_dp(&intel_encoder->base);
+	intel_wakeref_t wakeref;
 
 	if (!intel_dp_is_edp(intel_dp))
 		return;
@@ -5600,9 +5612,8 @@ void intel_dp_encoder_suspend(struct intel_encoder *intel_encoder)
 	 * Make sure vdd is actually turned off here.
 	 */
 	cancel_delayed_work_sync(&intel_dp->panel_vdd_work);
-	pps_lock(intel_dp);
-	edp_panel_vdd_off_sync(intel_dp);
-	pps_unlock(intel_dp);
+	with_pps_lock(intel_dp, wakeref)
+		edp_panel_vdd_off_sync(intel_dp);
 }
 
 static
@@ -5883,6 +5894,7 @@ void intel_dp_encoder_reset(struct drm_encoder *encoder)
 	struct drm_i915_private *dev_priv = to_i915(encoder->dev);
 	struct intel_dp *intel_dp = enc_to_intel_dp(encoder);
 	struct intel_lspcon *lspcon = dp_to_lspcon(intel_dp);
+	intel_wakeref_t wakeref;
 
 	if (!HAS_DDI(dev_priv))
 		intel_dp->DP = I915_READ(intel_dp->output_reg);
@@ -5892,18 +5904,19 @@ void intel_dp_encoder_reset(struct drm_encoder *encoder)
 
 	intel_dp->reset_link_params = true;
 
-	pps_lock(intel_dp);
-
-	if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv))
-		intel_dp->active_pipe = vlv_active_pipe(intel_dp);
+	with_pps_lock(intel_dp, wakeref) {
+		if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv))
+			intel_dp->active_pipe = vlv_active_pipe(intel_dp);
 
-	if (intel_dp_is_edp(intel_dp)) {
-		/* Reinit the power sequencer, in case BIOS did something with it. */
-		intel_dp_pps_init(intel_dp);
-		intel_edp_panel_vdd_sanitize(intel_dp);
+		if (intel_dp_is_edp(intel_dp)) {
+			/*
+			 * Reinit the power sequencer, in case BIOS did
+			 * something nasty with it.
+			 */
+			intel_dp_pps_init(intel_dp);
+			intel_edp_panel_vdd_sanitize(intel_dp);
+		}
 	}
-
-	pps_unlock(intel_dp);
 }
 
 static const struct drm_connector_funcs intel_dp_connector_funcs = {
@@ -6699,8 +6712,9 @@ static bool intel_edp_init_connector(struct intel_dp *intel_dp,
 	struct drm_display_mode *downclock_mode = NULL;
 	bool has_dpcd;
 	struct drm_display_mode *scan;
-	struct edid *edid;
 	enum pipe pipe = INVALID_PIPE;
+	intel_wakeref_t wakeref;
+	struct edid *edid;
 
 	if (!intel_dp_is_edp(intel_dp))
 		return true;
@@ -6720,13 +6734,11 @@ static bool intel_edp_init_connector(struct intel_dp *intel_dp,
 		return false;
 	}
 
-	pps_lock(intel_dp);
-
-	intel_dp_init_panel_power_timestamps(intel_dp);
-	intel_dp_pps_init(intel_dp);
-	intel_edp_panel_vdd_sanitize(intel_dp);
-
-	pps_unlock(intel_dp);
+	with_pps_lock(intel_dp, wakeref) {
+		intel_dp_init_panel_power_timestamps(intel_dp);
+		intel_dp_pps_init(intel_dp);
+		intel_edp_panel_vdd_sanitize(intel_dp);
+	}
 
 	/* Cache DPCD and EDID for edp. */
 	has_dpcd = intel_edp_init_dpcd(intel_dp);
@@ -6811,9 +6823,8 @@ static bool intel_edp_init_connector(struct intel_dp *intel_dp,
 	 * vdd might still be enabled do to the delayed vdd off.
 	 * Make sure vdd is actually turned off here.
 	 */
-	pps_lock(intel_dp);
-	edp_panel_vdd_off_sync(intel_dp);
-	pps_unlock(intel_dp);
+	with_pps_lock(intel_dp, wakeref)
+		edp_panel_vdd_off_sync(intel_dp);
 
 	return false;
 }
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 111+ messages in thread

* [PATCH 22/46] drm/i915: Complain if hsw_get_pipe_config acquires the same power well twice
  2019-01-07 11:54 [PATCH 01/46] drm/i915: Return immediately if trylock fails for direct-reclaim Chris Wilson
                   ` (19 preceding siblings ...)
  2019-01-07 11:54 ` [PATCH 21/46] drm/i915/dp: Markup pps lock power well Chris Wilson
@ 2019-01-07 11:54 ` Chris Wilson
  2019-01-07 11:54 ` [PATCH 23/46] drm/i915: Mark up Ironlake ips with rpm wakerefs Chris Wilson
                   ` (29 subsequent siblings)
  50 siblings, 0 replies; 111+ messages in thread
From: Chris Wilson @ 2019-01-07 11:54 UTC (permalink / raw)
  To: intel-gfx; +Cc: Jani Nikula

As we only release each power well once, we assume that each transcoder
maps to a different domain. Complain if this is not so.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Jani Nikula <jani.nikula@intel.com>
---
 drivers/gpu/drm/i915/intel_display.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index 42fc362591a8..ff9d404a33c8 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -9570,6 +9570,8 @@ static bool hsw_get_transcoder_state(struct intel_crtc *crtc,
 	power_domain = POWER_DOMAIN_TRANSCODER(pipe_config->cpu_transcoder);
 	if (!intel_display_power_get_if_enabled(dev_priv, power_domain))
 		return false;
+
+	WARN_ON(*power_domain_mask & BIT_ULL(power_domain));
 	*power_domain_mask |= BIT_ULL(power_domain);
 
 	tmp = I915_READ(PIPECONF(pipe_config->cpu_transcoder));
@@ -9597,6 +9599,8 @@ static bool bxt_get_dsi_transcoder_state(struct intel_crtc *crtc,
 		power_domain = POWER_DOMAIN_TRANSCODER(cpu_transcoder);
 		if (!intel_display_power_get_if_enabled(dev_priv, power_domain))
 			continue;
+
+		WARN_ON(*power_domain_mask & BIT_ULL(power_domain));
 		*power_domain_mask |= BIT_ULL(power_domain);
 
 		/*
@@ -9713,7 +9717,9 @@ static bool haswell_get_pipe_config(struct intel_crtc *crtc,
 
 	power_domain = POWER_DOMAIN_PIPE_PANEL_FITTER(crtc->pipe);
 	if (intel_display_power_get_if_enabled(dev_priv, power_domain)) {
+		WARN_ON(power_domain_mask & BIT_ULL(power_domain));
 		power_domain_mask |= BIT_ULL(power_domain);
+
 		if (INTEL_GEN(dev_priv) >= 9)
 			skylake_get_pfit_config(crtc, pipe_config);
 		else
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 111+ messages in thread

* [PATCH 23/46] drm/i915: Mark up Ironlake ips with rpm wakerefs
  2019-01-07 11:54 [PATCH 01/46] drm/i915: Return immediately if trylock fails for direct-reclaim Chris Wilson
                   ` (20 preceding siblings ...)
  2019-01-07 11:54 ` [PATCH 22/46] drm/i915: Complain if hsw_get_pipe_config acquires the same power well twice Chris Wilson
@ 2019-01-07 11:54 ` Chris Wilson
  2019-01-07 11:54 ` [PATCH 24/46] drm/i915: Serialise concurrent calls to i915_gem_set_wedged() Chris Wilson
                   ` (28 subsequent siblings)
  50 siblings, 0 replies; 111+ messages in thread
From: Chris Wilson @ 2019-01-07 11:54 UTC (permalink / raw)
  To: intel-gfx; +Cc: Jani Nikula

Currently Ironlake operates under the assumption that rpm awake (and its
error checking is disabled). As such, we have missed a few places where we
access registers without taking the rpm wakeref and thus trigger
warnings. intel_ips being one culprit.

As this involved adding a potentially sleeping rpm_get, we have to
rearrange the spinlocks slightly and so switch to acquiring a device-ref
under the spinlock rather than hold the spinlock for the whole
operation. To be consistent, we make the change in pattern common to the
intel_ips interface even though this adds a few more atomic operations
than necessary in a few cases.

v2: Sagar noted the mb around setting mch_dev were overkill as we only
need ordering there, and that i915_emon_status was still using
struct_mutex for no reason, but lacked rpm.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Jani Nikula <jani.nikula@intel.com>
---
 drivers/gpu/drm/i915/i915_debugfs.c |  32 ++----
 drivers/gpu/drm/i915/i915_drv.c     |   3 +
 drivers/gpu/drm/i915/intel_pm.c     | 172 ++++++++++++++--------------
 3 files changed, 102 insertions(+), 105 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index b7be5eb5c062..4257c8f78e27 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -1741,32 +1741,24 @@ static int i915_sr_status(struct seq_file *m, void *unused)
 
 static int i915_emon_status(struct seq_file *m, void *unused)
 {
-	struct drm_i915_private *dev_priv = node_to_i915(m->private);
-	struct drm_device *dev = &dev_priv->drm;
-	unsigned long temp, chipset, gfx;
+	struct drm_i915_private *i915 = node_to_i915(m->private);
 	intel_wakeref_t wakeref;
-	int ret;
 
-	if (!IS_GEN(dev_priv, 5))
+	if (!IS_GEN(i915, 5))
 		return -ENODEV;
 
-	ret = mutex_lock_interruptible(&dev->struct_mutex);
-	if (ret)
-		return ret;
+	with_intel_runtime_pm(i915, wakeref) {
+		unsigned long temp, chipset, gfx;
 
-	wakeref = intel_runtime_pm_get(dev_priv);
-
-	temp = i915_mch_val(dev_priv);
-	chipset = i915_chipset_val(dev_priv);
-	gfx = i915_gfx_val(dev_priv);
-	mutex_unlock(&dev->struct_mutex);
+		temp = i915_mch_val(i915);
+		chipset = i915_chipset_val(i915);
+		gfx = i915_gfx_val(i915);
 
-	intel_runtime_pm_put(dev_priv, wakeref);
-
-	seq_printf(m, "GMCH temp: %ld\n", temp);
-	seq_printf(m, "Chipset power: %ld\n", chipset);
-	seq_printf(m, "GFX power: %ld\n", gfx);
-	seq_printf(m, "Total power: %ld\n", chipset + gfx);
+		seq_printf(m, "GMCH temp: %ld\n", temp);
+		seq_printf(m, "Chipset power: %ld\n", chipset);
+		seq_printf(m, "GFX power: %ld\n", gfx);
+		seq_printf(m, "Total power: %ld\n", chipset + gfx);
+	}
 
 	return 0;
 }
diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index e2f4753ca21f..4d0a4f028882 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -1781,6 +1781,9 @@ void i915_driver_unload(struct drm_device *dev)
 
 	i915_driver_unregister(dev_priv);
 
+	/* Flush any external code that still may be under the RCU lock */
+	synchronize_rcu();
+
 	if (i915_gem_suspend(dev_priv))
 		DRM_ERROR("failed to idle hardware; continuing to unload!\n");
 
diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
index ab7257720c7e..7613ae72df3d 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -6203,10 +6203,6 @@ void intel_init_ipc(struct drm_i915_private *dev_priv)
  */
 DEFINE_SPINLOCK(mchdev_lock);
 
-/* Global for IPS driver to get at the current i915 device. Protected by
- * mchdev_lock. */
-static struct drm_i915_private *i915_mch_dev;
-
 bool ironlake_set_drps(struct drm_i915_private *dev_priv, u8 val)
 {
 	u16 rgvswctl;
@@ -7849,16 +7845,17 @@ static unsigned long __i915_chipset_val(struct drm_i915_private *dev_priv)
 
 unsigned long i915_chipset_val(struct drm_i915_private *dev_priv)
 {
-	unsigned long val;
+	intel_wakeref_t wakeref;
+	unsigned long val = 0;
 
 	if (!IS_GEN(dev_priv, 5))
 		return 0;
 
-	spin_lock_irq(&mchdev_lock);
-
-	val = __i915_chipset_val(dev_priv);
-
-	spin_unlock_irq(&mchdev_lock);
+	with_intel_runtime_pm(dev_priv, wakeref) {
+		spin_lock_irq(&mchdev_lock);
+		val = __i915_chipset_val(dev_priv);
+		spin_unlock_irq(&mchdev_lock);
+	}
 
 	return val;
 }
@@ -7935,14 +7932,16 @@ static void __i915_update_gfx_val(struct drm_i915_private *dev_priv)
 
 void i915_update_gfx_val(struct drm_i915_private *dev_priv)
 {
+	intel_wakeref_t wakeref;
+
 	if (!IS_GEN(dev_priv, 5))
 		return;
 
-	spin_lock_irq(&mchdev_lock);
-
-	__i915_update_gfx_val(dev_priv);
-
-	spin_unlock_irq(&mchdev_lock);
+	with_intel_runtime_pm(dev_priv, wakeref) {
+		spin_lock_irq(&mchdev_lock);
+		__i915_update_gfx_val(dev_priv);
+		spin_unlock_irq(&mchdev_lock);
+	}
 }
 
 static unsigned long __i915_gfx_val(struct drm_i915_private *dev_priv)
@@ -7984,18 +7983,34 @@ static unsigned long __i915_gfx_val(struct drm_i915_private *dev_priv)
 
 unsigned long i915_gfx_val(struct drm_i915_private *dev_priv)
 {
-	unsigned long val;
+	intel_wakeref_t wakeref;
+	unsigned long val = 0;
 
 	if (!IS_GEN(dev_priv, 5))
 		return 0;
 
-	spin_lock_irq(&mchdev_lock);
+	with_intel_runtime_pm(dev_priv, wakeref) {
+		spin_lock_irq(&mchdev_lock);
+		val = __i915_gfx_val(dev_priv);
+		spin_unlock_irq(&mchdev_lock);
+	}
 
-	val = __i915_gfx_val(dev_priv);
+	return val;
+}
 
-	spin_unlock_irq(&mchdev_lock);
+static struct drm_i915_private *i915_mch_dev;
 
-	return val;
+static struct drm_i915_private *mchdev_get(void)
+{
+	struct drm_i915_private *i915;
+
+	rcu_read_lock();
+	i915 = i915_mch_dev;
+	if (!kref_get_unless_zero(&i915->drm.ref))
+		i915 = NULL;
+	rcu_read_unlock();
+
+	return i915;
 }
 
 /**
@@ -8006,23 +8021,24 @@ unsigned long i915_gfx_val(struct drm_i915_private *dev_priv)
  */
 unsigned long i915_read_mch_val(void)
 {
-	struct drm_i915_private *dev_priv;
-	unsigned long chipset_val, graphics_val, ret = 0;
-
-	spin_lock_irq(&mchdev_lock);
-	if (!i915_mch_dev)
-		goto out_unlock;
-	dev_priv = i915_mch_dev;
-
-	chipset_val = __i915_chipset_val(dev_priv);
-	graphics_val = __i915_gfx_val(dev_priv);
+	struct drm_i915_private *i915;
+	unsigned long chipset_val = 0;
+	unsigned long graphics_val = 0;
+	intel_wakeref_t wakeref;
 
-	ret = chipset_val + graphics_val;
+	i915 = mchdev_get();
+	if (!i915)
+		return 0;
 
-out_unlock:
-	spin_unlock_irq(&mchdev_lock);
+	with_intel_runtime_pm(i915, wakeref) {
+		spin_lock_irq(&mchdev_lock);
+		chipset_val = __i915_chipset_val(i915);
+		graphics_val = __i915_gfx_val(i915);
+		spin_unlock_irq(&mchdev_lock);
+	}
 
-	return ret;
+	drm_dev_put(&i915->drm);
+	return chipset_val + graphics_val;
 }
 EXPORT_SYMBOL_GPL(i915_read_mch_val);
 
@@ -8033,23 +8049,19 @@ EXPORT_SYMBOL_GPL(i915_read_mch_val);
  */
 bool i915_gpu_raise(void)
 {
-	struct drm_i915_private *dev_priv;
-	bool ret = true;
-
-	spin_lock_irq(&mchdev_lock);
-	if (!i915_mch_dev) {
-		ret = false;
-		goto out_unlock;
-	}
-	dev_priv = i915_mch_dev;
+	struct drm_i915_private *i915;
 
-	if (dev_priv->ips.max_delay > dev_priv->ips.fmax)
-		dev_priv->ips.max_delay--;
+	i915 = mchdev_get();
+	if (!i915)
+		return false;
 
-out_unlock:
+	spin_lock_irq(&mchdev_lock);
+	if (i915->ips.max_delay > i915->ips.fmax)
+		i915->ips.max_delay--;
 	spin_unlock_irq(&mchdev_lock);
 
-	return ret;
+	drm_dev_put(&i915->drm);
+	return true;
 }
 EXPORT_SYMBOL_GPL(i915_gpu_raise);
 
@@ -8061,23 +8073,19 @@ EXPORT_SYMBOL_GPL(i915_gpu_raise);
  */
 bool i915_gpu_lower(void)
 {
-	struct drm_i915_private *dev_priv;
-	bool ret = true;
-
-	spin_lock_irq(&mchdev_lock);
-	if (!i915_mch_dev) {
-		ret = false;
-		goto out_unlock;
-	}
-	dev_priv = i915_mch_dev;
+	struct drm_i915_private *i915;
 
-	if (dev_priv->ips.max_delay < dev_priv->ips.min_delay)
-		dev_priv->ips.max_delay++;
+	i915 = mchdev_get();
+	if (!i915)
+		return false;
 
-out_unlock:
+	spin_lock_irq(&mchdev_lock);
+	if (i915->ips.max_delay < i915->ips.min_delay)
+		i915->ips.max_delay++;
 	spin_unlock_irq(&mchdev_lock);
 
-	return ret;
+	drm_dev_put(&i915->drm);
+	return true;
 }
 EXPORT_SYMBOL_GPL(i915_gpu_lower);
 
@@ -8088,13 +8096,16 @@ EXPORT_SYMBOL_GPL(i915_gpu_lower);
  */
 bool i915_gpu_busy(void)
 {
-	bool ret = false;
+	struct drm_i915_private *i915;
+	bool ret;
 
-	spin_lock_irq(&mchdev_lock);
-	if (i915_mch_dev)
-		ret = i915_mch_dev->gt.awake;
-	spin_unlock_irq(&mchdev_lock);
+	i915 = mchdev_get();
+	if (!i915)
+		return false;
 
+	ret = i915->gt.awake;
+
+	drm_dev_put(&i915->drm);
 	return ret;
 }
 EXPORT_SYMBOL_GPL(i915_gpu_busy);
@@ -8107,24 +8118,19 @@ EXPORT_SYMBOL_GPL(i915_gpu_busy);
  */
 bool i915_gpu_turbo_disable(void)
 {
-	struct drm_i915_private *dev_priv;
-	bool ret = true;
-
-	spin_lock_irq(&mchdev_lock);
-	if (!i915_mch_dev) {
-		ret = false;
-		goto out_unlock;
-	}
-	dev_priv = i915_mch_dev;
-
-	dev_priv->ips.max_delay = dev_priv->ips.fstart;
+	struct drm_i915_private *i915;
+	bool ret;
 
-	if (!ironlake_set_drps(dev_priv, dev_priv->ips.fstart))
-		ret = false;
+	i915 = mchdev_get();
+	if (!i915)
+		return false;
 
-out_unlock:
+	spin_lock_irq(&mchdev_lock);
+	i915->ips.max_delay = i915->ips.fstart;
+	ret = ironlake_set_drps(i915, i915->ips.fstart);
 	spin_unlock_irq(&mchdev_lock);
 
+	drm_dev_put(&i915->drm);
 	return ret;
 }
 EXPORT_SYMBOL_GPL(i915_gpu_turbo_disable);
@@ -8153,18 +8159,14 @@ void intel_gpu_ips_init(struct drm_i915_private *dev_priv)
 {
 	/* We only register the i915 ips part with intel-ips once everything is
 	 * set up, to avoid intel-ips sneaking in and reading bogus values. */
-	spin_lock_irq(&mchdev_lock);
-	i915_mch_dev = dev_priv;
-	spin_unlock_irq(&mchdev_lock);
+	rcu_assign_pointer(i915_mch_dev, dev_priv);
 
 	ips_ping_for_i915_load();
 }
 
 void intel_gpu_ips_teardown(void)
 {
-	spin_lock_irq(&mchdev_lock);
-	i915_mch_dev = NULL;
-	spin_unlock_irq(&mchdev_lock);
+	rcu_assign_pointer(i915_mch_dev, NULL);
 }
 
 static void intel_init_emon(struct drm_i915_private *dev_priv)
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 111+ messages in thread

* [PATCH 24/46] drm/i915: Serialise concurrent calls to i915_gem_set_wedged()
  2019-01-07 11:54 [PATCH 01/46] drm/i915: Return immediately if trylock fails for direct-reclaim Chris Wilson
                   ` (21 preceding siblings ...)
  2019-01-07 11:54 ` [PATCH 23/46] drm/i915: Mark up Ironlake ips with rpm wakerefs Chris Wilson
@ 2019-01-07 11:54 ` Chris Wilson
  2019-01-07 11:54 ` [PATCH 25/46] drm/i915: Differentiate between ggtt->mutex and ppgtt->mutex Chris Wilson
                   ` (27 subsequent siblings)
  50 siblings, 0 replies; 111+ messages in thread
From: Chris Wilson @ 2019-01-07 11:54 UTC (permalink / raw)
  To: intel-gfx

Make i915_gem_set_wedged() and i915_gem_unset_wedged() behaviour more
consistently if called concurrently.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
---
 drivers/gpu/drm/i915/i915_gem.c               | 32 ++++++++++++++-----
 drivers/gpu/drm/i915/i915_gpu_error.h         |  4 ++-
 .../gpu/drm/i915/selftests/mock_gem_device.c  |  1 +
 3 files changed, 28 insertions(+), 9 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index a3dd5bbd6700..6e5546075f17 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -3174,10 +3174,15 @@ static void nop_submit_request(struct i915_request *request)
 
 void i915_gem_set_wedged(struct drm_i915_private *i915)
 {
+	struct i915_gpu_error *error = &i915->gpu_error;
 	struct intel_engine_cs *engine;
 	enum intel_engine_id id;
 
-	GEM_TRACE("start\n");
+	mutex_lock(&error->wedge_mutex);
+	if (test_bit(I915_WEDGED, &error->flags)) {
+		mutex_unlock(&error->wedge_mutex);
+		return;
+	}
 
 	if (GEM_SHOW_DEBUG()) {
 		struct drm_printer p = drm_debug_printer(__func__);
@@ -3186,8 +3191,7 @@ void i915_gem_set_wedged(struct drm_i915_private *i915)
 			intel_engine_dump(engine, &p, "%s\n", engine->name);
 	}
 
-	if (test_and_set_bit(I915_WEDGED, &i915->gpu_error.flags))
-		goto out;
+	GEM_TRACE("start\n");
 
 	/*
 	 * First, stop submission to hw, but do not yet complete requests by
@@ -3223,23 +3227,31 @@ void i915_gem_set_wedged(struct drm_i915_private *i915)
 		intel_engine_wakeup(engine);
 	}
 
-out:
+	smp_mb__before_atomic();
+	set_bit(I915_WEDGED, &error->flags);
+
 	GEM_TRACE("end\n");
+	mutex_unlock(&error->wedge_mutex);
 
-	wake_up_all(&i915->gpu_error.reset_queue);
+	wake_up_all(&error->reset_queue);
 }
 
 bool i915_gem_unset_wedged(struct drm_i915_private *i915)
 {
+	struct i915_gpu_error *error = &i915->gpu_error;
 	struct i915_timeline *tl;
+	bool ret = false;
 
 	lockdep_assert_held(&i915->drm.struct_mutex);
-	if (!test_bit(I915_WEDGED, &i915->gpu_error.flags))
+
+	if (!test_bit(I915_WEDGED, &error->flags))
 		return true;
 
 	if (!i915->gt.scratch) /* Never full initialised, recovery impossible */
 		return false;
 
+	mutex_lock(&error->wedge_mutex);
+
 	GEM_TRACE("start\n");
 
 	/*
@@ -3273,7 +3285,7 @@ bool i915_gem_unset_wedged(struct drm_i915_private *i915)
 		 */
 		if (dma_fence_default_wait(&rq->fence, true,
 					   MAX_SCHEDULE_TIMEOUT) < 0)
-			return false;
+			goto unlock;
 	}
 	i915_retire_requests(i915);
 	GEM_BUG_ON(i915->gt.active_requests);
@@ -3296,8 +3308,11 @@ bool i915_gem_unset_wedged(struct drm_i915_private *i915)
 
 	smp_mb__before_atomic(); /* complete takeover before enabling execbuf */
 	clear_bit(I915_WEDGED, &i915->gpu_error.flags);
+	ret = true;
+unlock:
+	mutex_unlock(&i915->gpu_error.wedge_mutex);
 
-	return true;
+	return ret;
 }
 
 static void
@@ -5693,6 +5708,7 @@ int i915_gem_init_early(struct drm_i915_private *dev_priv)
 			  i915_gem_idle_work_handler);
 	init_waitqueue_head(&dev_priv->gpu_error.wait_queue);
 	init_waitqueue_head(&dev_priv->gpu_error.reset_queue);
+	mutex_init(&dev_priv->gpu_error.wedge_mutex);
 
 	atomic_set(&dev_priv->mm.bsd_engine_dispatch_index, 0);
 
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.h b/drivers/gpu/drm/i915/i915_gpu_error.h
index 6d9f45468ac1..604291f7762d 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.h
+++ b/drivers/gpu/drm/i915/i915_gpu_error.h
@@ -271,8 +271,8 @@ struct i915_gpu_error {
 #define I915_RESET_BACKOFF	0
 #define I915_RESET_HANDOFF	1
 #define I915_RESET_MODESET	2
+#define I915_RESET_ENGINE	3
 #define I915_WEDGED		(BITS_PER_LONG - 1)
-#define I915_RESET_ENGINE	(I915_WEDGED - I915_NUM_ENGINES)
 
 	/** Number of times an engine has been reset */
 	u32 reset_engine_count[I915_NUM_ENGINES];
@@ -283,6 +283,8 @@ struct i915_gpu_error {
 	/** Reason for the current *global* reset */
 	const char *reason;
 
+	struct mutex wedge_mutex; /* serialises wedging/unwedging */
+
 	/**
 	 * Waitqueue to signal when a hang is detected. Used to for waiters
 	 * to release the struct_mutex for the reset to procede.
diff --git a/drivers/gpu/drm/i915/selftests/mock_gem_device.c b/drivers/gpu/drm/i915/selftests/mock_gem_device.c
index aa4ddae94aca..4a25d2a344f2 100644
--- a/drivers/gpu/drm/i915/selftests/mock_gem_device.c
+++ b/drivers/gpu/drm/i915/selftests/mock_gem_device.c
@@ -189,6 +189,7 @@ struct drm_i915_private *mock_gem_device(void)
 
 	init_waitqueue_head(&i915->gpu_error.wait_queue);
 	init_waitqueue_head(&i915->gpu_error.reset_queue);
+	mutex_init(&i915->gpu_error.wedge_mutex);
 
 	i915->wq = alloc_ordered_workqueue("mock", 0);
 	if (!i915->wq)
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 111+ messages in thread

* [PATCH 25/46] drm/i915: Differentiate between ggtt->mutex and ppgtt->mutex
  2019-01-07 11:54 [PATCH 01/46] drm/i915: Return immediately if trylock fails for direct-reclaim Chris Wilson
                   ` (22 preceding siblings ...)
  2019-01-07 11:54 ` [PATCH 24/46] drm/i915: Serialise concurrent calls to i915_gem_set_wedged() Chris Wilson
@ 2019-01-07 11:54 ` Chris Wilson
  2019-01-08  9:00   ` Tvrtko Ursulin
  2019-01-07 11:54 ` [PATCH 26/46] drm/i915: Pull all the reset functionality together into i915_reset.c Chris Wilson
                   ` (26 subsequent siblings)
  50 siblings, 1 reply; 111+ messages in thread
From: Chris Wilson @ 2019-01-07 11:54 UTC (permalink / raw)
  To: intel-gfx

We have two classes of VM, global GTT and per-process GTT. In order to
allow ourselves the freedom to mix both along call chains, distinguish
the two classes with regards to their mutex and lockdep maps.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_gem_gtt.c       | 10 +++++-----
 drivers/gpu/drm/i915/i915_gem_gtt.h       |  2 ++
 drivers/gpu/drm/i915/selftests/mock_gtt.c |  6 +++---
 3 files changed, 10 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 9e9ce31142b1..2f35f13d177d 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -474,8 +474,7 @@ static void vm_free_page(struct i915_address_space *vm, struct page *page)
 	spin_unlock(&vm->free_pages.lock);
 }
 
-static void i915_address_space_init(struct i915_address_space *vm,
-				    struct drm_i915_private *dev_priv)
+static void i915_address_space_init(struct i915_address_space *vm, int subclass)
 {
 	/*
 	 * The vm->mutex must be reclaim safe (for use in the shrinker).
@@ -483,6 +482,7 @@ static void i915_address_space_init(struct i915_address_space *vm,
 	 * attempt holding the lock is immediately reported by lockdep.
 	 */
 	mutex_init(&vm->mutex);
+	lockdep_set_subclass(&vm->mutex, subclass);
 	i915_gem_shrinker_taints_mutex(vm->i915, &vm->mutex);
 
 	GEM_BUG_ON(!vm->total);
@@ -1548,7 +1548,7 @@ static struct i915_hw_ppgtt *gen8_ppgtt_create(struct drm_i915_private *i915)
 	/* From bdw, there is support for read-only pages in the PPGTT. */
 	ppgtt->vm.has_read_only = true;
 
-	i915_address_space_init(&ppgtt->vm, i915);
+	i915_address_space_init(&ppgtt->vm, VM_CLASS_PPGTT);
 
 	/* There are only few exceptions for gen >=6. chv and bxt.
 	 * And we are not sure about the latter so play safe for now.
@@ -1997,7 +1997,7 @@ static struct i915_hw_ppgtt *gen6_ppgtt_create(struct drm_i915_private *i915)
 
 	ppgtt->base.vm.total = I915_PDES * GEN6_PTES * I915_GTT_PAGE_SIZE;
 
-	i915_address_space_init(&ppgtt->base.vm, i915);
+	i915_address_space_init(&ppgtt->base.vm, VM_CLASS_PPGTT);
 
 	ppgtt->base.vm.allocate_va_range = gen6_alloc_va_range;
 	ppgtt->base.vm.clear_range = gen6_ppgtt_clear_range;
@@ -3434,7 +3434,7 @@ int i915_ggtt_init_hw(struct drm_i915_private *dev_priv)
 	 * and beyond the end of the GTT if we do not provide a guard.
 	 */
 	mutex_lock(&dev_priv->drm.struct_mutex);
-	i915_address_space_init(&ggtt->vm, dev_priv);
+	i915_address_space_init(&ggtt->vm, VM_CLASS_GGTT);
 
 	ggtt->vm.is_ggtt = true;
 
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h
index e2360f16427a..9229b03d629b 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.h
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.h
@@ -288,6 +288,8 @@ struct i915_address_space {
 	bool closed;
 
 	struct mutex mutex; /* protects vma and our lists */
+#define VM_CLASS_GGTT 0
+#define VM_CLASS_PPGTT 1
 
 	u64 scratch_pte;
 	struct i915_page_dma scratch_page;
diff --git a/drivers/gpu/drm/i915/selftests/mock_gtt.c b/drivers/gpu/drm/i915/selftests/mock_gtt.c
index 6ae418c76015..976c862b3842 100644
--- a/drivers/gpu/drm/i915/selftests/mock_gtt.c
+++ b/drivers/gpu/drm/i915/selftests/mock_gtt.c
@@ -70,7 +70,7 @@ mock_ppgtt(struct drm_i915_private *i915,
 	ppgtt->vm.total = round_down(U64_MAX, PAGE_SIZE);
 	ppgtt->vm.file = ERR_PTR(-ENODEV);
 
-	i915_address_space_init(&ppgtt->vm, i915);
+	i915_address_space_init(&ppgtt->vm, VM_CLASS_PPGTT);
 
 	ppgtt->vm.clear_range = nop_clear_range;
 	ppgtt->vm.insert_page = mock_insert_page;
@@ -102,6 +102,7 @@ void mock_init_ggtt(struct drm_i915_private *i915)
 	struct i915_ggtt *ggtt = &i915->ggtt;
 
 	ggtt->vm.i915 = i915;
+	ggtt->vm.is_ggtt = true;
 
 	ggtt->gmadr = (struct resource) DEFINE_RES_MEM(0, 2048 * PAGE_SIZE);
 	ggtt->mappable_end = resource_size(&ggtt->gmadr);
@@ -117,9 +118,8 @@ void mock_init_ggtt(struct drm_i915_private *i915)
 	ggtt->vm.vma_ops.set_pages   = ggtt_set_pages;
 	ggtt->vm.vma_ops.clear_pages = clear_pages;
 
-	i915_address_space_init(&ggtt->vm, i915);
 
-	ggtt->vm.is_ggtt = true;
+	i915_address_space_init(&ggtt->vm, VM_CLASS_GGTT);
 }
 
 void mock_fini_ggtt(struct drm_i915_private *i915)
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 111+ messages in thread

* [PATCH 26/46] drm/i915: Pull all the reset functionality together into i915_reset.c
  2019-01-07 11:54 [PATCH 01/46] drm/i915: Return immediately if trylock fails for direct-reclaim Chris Wilson
                   ` (23 preceding siblings ...)
  2019-01-07 11:54 ` [PATCH 25/46] drm/i915: Differentiate between ggtt->mutex and ppgtt->mutex Chris Wilson
@ 2019-01-07 11:54 ` Chris Wilson
  2019-01-07 11:54 ` [PATCH 27/46] drm/i915: Make all GPU resets atomic Chris Wilson
                   ` (25 subsequent siblings)
  50 siblings, 0 replies; 111+ messages in thread
From: Chris Wilson @ 2019-01-07 11:54 UTC (permalink / raw)
  To: intel-gfx

Currently the code to reset the GPU and our state is spread widely
across a few files. Pull the logic together into a common file.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/Makefile                 |    3 +-
 drivers/gpu/drm/i915/i915_debugfs.c           |    2 +
 drivers/gpu/drm/i915/i915_drv.c               |  206 +--
 drivers/gpu/drm/i915/i915_drv.h               |   33 +-
 drivers/gpu/drm/i915/i915_gem.c               |  446 +-----
 drivers/gpu/drm/i915/i915_gem_gtt.c           |    1 +
 drivers/gpu/drm/i915/i915_irq.c               |  238 ---
 drivers/gpu/drm/i915/i915_request.c           |    1 +
 drivers/gpu/drm/i915/i915_reset.c             | 1389 +++++++++++++++++
 drivers/gpu/drm/i915/i915_reset.h             |   56 +
 drivers/gpu/drm/i915/intel_display.c          |   15 +-
 drivers/gpu/drm/i915/intel_engine_cs.c        |    1 +
 drivers/gpu/drm/i915/intel_guc.h              |    3 +
 drivers/gpu/drm/i915/intel_hangcheck.c        |    1 +
 drivers/gpu/drm/i915/intel_uc.c               |    1 +
 drivers/gpu/drm/i915/intel_uncore.c           |  556 -------
 drivers/gpu/drm/i915/selftests/intel_lrc.c    |    2 +
 .../drm/i915/selftests/intel_workarounds.c    |    1 +
 18 files changed, 1483 insertions(+), 1472 deletions(-)
 create mode 100644 drivers/gpu/drm/i915/i915_reset.c
 create mode 100644 drivers/gpu/drm/i915/i915_reset.h

diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
index c34bee16730d..611115ed00db 100644
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -40,9 +40,10 @@ i915-y := i915_drv.o \
 	  i915_mm.o \
 	  i915_params.o \
 	  i915_pci.o \
+	  i915_reset.o \
 	  i915_suspend.o \
-	  i915_syncmap.o \
 	  i915_sw_fence.o \
+	  i915_syncmap.o \
 	  i915_sysfs.o \
 	  intel_csr.o \
 	  intel_device_info.o \
diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index 4257c8f78e27..9c3664fb39de 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -32,6 +32,8 @@
 #include "intel_drv.h"
 #include "intel_guc_submission.h"
 
+#include "i915_reset.h"
+
 static inline struct drm_i915_private *node_to_i915(struct drm_info_node *node)
 {
 	return to_i915(node->minor->dev);
diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index 4d0a4f028882..643c12ebf5a4 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -49,6 +49,7 @@
 #include "i915_drv.h"
 #include "i915_trace.h"
 #include "i915_pmu.h"
+#include "i915_reset.h"
 #include "i915_query.h"
 #include "i915_vgpu.h"
 #include "intel_drv.h"
@@ -2206,211 +2207,6 @@ static int i915_resume_switcheroo(struct drm_device *dev)
 	return i915_drm_resume(dev);
 }
 
-/**
- * i915_reset - reset chip after a hang
- * @i915: #drm_i915_private to reset
- * @stalled_mask: mask of the stalled engines with the guilty requests
- * @reason: user error message for why we are resetting
- *
- * Reset the chip.  Useful if a hang is detected. Marks the device as wedged
- * on failure.
- *
- * Caller must hold the struct_mutex.
- *
- * Procedure is fairly simple:
- *   - reset the chip using the reset reg
- *   - re-init context state
- *   - re-init hardware status page
- *   - re-init ring buffer
- *   - re-init interrupt state
- *   - re-init display
- */
-void i915_reset(struct drm_i915_private *i915,
-		unsigned int stalled_mask,
-		const char *reason)
-{
-	struct i915_gpu_error *error = &i915->gpu_error;
-	int ret;
-	int i;
-
-	GEM_TRACE("flags=%lx\n", error->flags);
-
-	might_sleep();
-	lockdep_assert_held(&i915->drm.struct_mutex);
-	assert_rpm_wakelock_held(i915);
-	GEM_BUG_ON(!test_bit(I915_RESET_BACKOFF, &error->flags));
-
-	if (!test_bit(I915_RESET_HANDOFF, &error->flags))
-		return;
-
-	/* Clear any previous failed attempts at recovery. Time to try again. */
-	if (!i915_gem_unset_wedged(i915))
-		goto wakeup;
-
-	if (reason)
-		dev_notice(i915->drm.dev, "Resetting chip for %s\n", reason);
-	error->reset_count++;
-
-	ret = i915_gem_reset_prepare(i915);
-	if (ret) {
-		dev_err(i915->drm.dev, "GPU recovery failed\n");
-		goto taint;
-	}
-
-	if (!intel_has_gpu_reset(i915)) {
-		if (i915_modparams.reset)
-			dev_err(i915->drm.dev, "GPU reset not supported\n");
-		else
-			DRM_DEBUG_DRIVER("GPU reset disabled\n");
-		goto error;
-	}
-
-	for (i = 0; i < 3; i++) {
-		ret = intel_gpu_reset(i915, ALL_ENGINES);
-		if (ret == 0)
-			break;
-
-		msleep(100);
-	}
-	if (ret) {
-		dev_err(i915->drm.dev, "Failed to reset chip\n");
-		goto taint;
-	}
-
-	/* Ok, now get things going again... */
-
-	/*
-	 * Everything depends on having the GTT running, so we need to start
-	 * there.
-	 */
-	ret = i915_ggtt_enable_hw(i915);
-	if (ret) {
-		DRM_ERROR("Failed to re-enable GGTT following reset (%d)\n",
-			  ret);
-		goto error;
-	}
-
-	i915_gem_reset(i915, stalled_mask);
-	intel_overlay_reset(i915);
-
-	/*
-	 * Next we need to restore the context, but we don't use those
-	 * yet either...
-	 *
-	 * Ring buffer needs to be re-initialized in the KMS case, or if X
-	 * was running at the time of the reset (i.e. we weren't VT
-	 * switched away).
-	 */
-	ret = i915_gem_init_hw(i915);
-	if (ret) {
-		DRM_ERROR("Failed to initialise HW following reset (%d)\n",
-			  ret);
-		goto error;
-	}
-
-	i915_queue_hangcheck(i915);
-
-finish:
-	i915_gem_reset_finish(i915);
-wakeup:
-	clear_bit(I915_RESET_HANDOFF, &error->flags);
-	wake_up_bit(&error->flags, I915_RESET_HANDOFF);
-	return;
-
-taint:
-	/*
-	 * History tells us that if we cannot reset the GPU now, we
-	 * never will. This then impacts everything that is run
-	 * subsequently. On failing the reset, we mark the driver
-	 * as wedged, preventing further execution on the GPU.
-	 * We also want to go one step further and add a taint to the
-	 * kernel so that any subsequent faults can be traced back to
-	 * this failure. This is important for CI, where if the
-	 * GPU/driver fails we would like to reboot and restart testing
-	 * rather than continue on into oblivion. For everyone else,
-	 * the system should still plod along, but they have been warned!
-	 */
-	add_taint(TAINT_WARN, LOCKDEP_STILL_OK);
-error:
-	i915_gem_set_wedged(i915);
-	i915_retire_requests(i915);
-	goto finish;
-}
-
-static inline int intel_gt_reset_engine(struct drm_i915_private *dev_priv,
-					struct intel_engine_cs *engine)
-{
-	return intel_gpu_reset(dev_priv, intel_engine_flag(engine));
-}
-
-/**
- * i915_reset_engine - reset GPU engine to recover from a hang
- * @engine: engine to reset
- * @msg: reason for GPU reset; or NULL for no dev_notice()
- *
- * Reset a specific GPU engine. Useful if a hang is detected.
- * Returns zero on successful reset or otherwise an error code.
- *
- * Procedure is:
- *  - identifies the request that caused the hang and it is dropped
- *  - reset engine (which will force the engine to idle)
- *  - re-init/configure engine
- */
-int i915_reset_engine(struct intel_engine_cs *engine, const char *msg)
-{
-	struct i915_gpu_error *error = &engine->i915->gpu_error;
-	struct i915_request *active_request;
-	int ret;
-
-	GEM_TRACE("%s flags=%lx\n", engine->name, error->flags);
-	GEM_BUG_ON(!test_bit(I915_RESET_ENGINE + engine->id, &error->flags));
-
-	active_request = i915_gem_reset_prepare_engine(engine);
-	if (IS_ERR_OR_NULL(active_request)) {
-		/* Either the previous reset failed, or we pardon the reset. */
-		ret = PTR_ERR(active_request);
-		goto out;
-	}
-
-	if (msg)
-		dev_notice(engine->i915->drm.dev,
-			   "Resetting %s for %s\n", engine->name, msg);
-	error->reset_engine_count[engine->id]++;
-
-	if (!engine->i915->guc.execbuf_client)
-		ret = intel_gt_reset_engine(engine->i915, engine);
-	else
-		ret = intel_guc_reset_engine(&engine->i915->guc, engine);
-	if (ret) {
-		/* If we fail here, we expect to fallback to a global reset */
-		DRM_DEBUG_DRIVER("%sFailed to reset %s, ret=%d\n",
-				 engine->i915->guc.execbuf_client ? "GuC " : "",
-				 engine->name, ret);
-		goto out;
-	}
-
-	/*
-	 * The request that caused the hang is stuck on elsp, we know the
-	 * active request and can drop it, adjust head to skip the offending
-	 * request to resume executing remaining requests in the queue.
-	 */
-	i915_gem_reset_engine(engine, active_request, true);
-
-	/*
-	 * The engine and its registers (and workarounds in case of render)
-	 * have been reset to their default values. Follow the init_ring
-	 * process to program RING_MODE, HWSP and re-enable submission.
-	 */
-	ret = engine->init_hw(engine);
-	if (ret)
-		goto out;
-
-out:
-	intel_engine_cancel_stop_cs(engine);
-	i915_gem_reset_finish_engine(engine);
-	return ret;
-}
-
 static int i915_pm_prepare(struct device *kdev)
 {
 	struct pci_dev *pdev = to_pci_dev(kdev);
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index e3c51c40dc9d..29c810533d03 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2598,19 +2598,7 @@ extern const struct dev_pm_ops i915_pm_ops;
 extern int i915_driver_load(struct pci_dev *pdev,
 			    const struct pci_device_id *ent);
 extern void i915_driver_unload(struct drm_device *dev);
-extern int intel_gpu_reset(struct drm_i915_private *dev_priv, u32 engine_mask);
-extern bool intel_has_gpu_reset(struct drm_i915_private *dev_priv);
-
-extern void i915_reset(struct drm_i915_private *i915,
-		       unsigned int stalled_mask,
-		       const char *reason);
-extern int i915_reset_engine(struct intel_engine_cs *engine,
-			     const char *reason);
-
-extern bool intel_has_reset_engine(struct drm_i915_private *dev_priv);
-extern int intel_reset_guc(struct drm_i915_private *dev_priv);
-extern int intel_guc_reset_engine(struct intel_guc *guc,
-				  struct intel_engine_cs *engine);
+
 extern void intel_engine_init_hangcheck(struct intel_engine_cs *engine);
 extern void intel_hangcheck_init(struct drm_i915_private *dev_priv);
 extern unsigned long i915_chipset_val(struct drm_i915_private *dev_priv);
@@ -2653,20 +2641,11 @@ static inline void i915_queue_hangcheck(struct drm_i915_private *dev_priv)
 			   &dev_priv->gpu_error.hangcheck_work, delay);
 }
 
-__printf(4, 5)
-void i915_handle_error(struct drm_i915_private *dev_priv,
-		       u32 engine_mask,
-		       unsigned long flags,
-		       const char *fmt, ...);
-#define I915_ERROR_CAPTURE BIT(0)
-
 extern void intel_irq_init(struct drm_i915_private *dev_priv);
 extern void intel_irq_fini(struct drm_i915_private *dev_priv);
 int intel_irq_install(struct drm_i915_private *dev_priv);
 void intel_irq_uninstall(struct drm_i915_private *dev_priv);
 
-void i915_clear_error_registers(struct drm_i915_private *dev_priv);
-
 static inline bool intel_gvt_active(struct drm_i915_private *dev_priv)
 {
 	return dev_priv->gvt;
@@ -3031,18 +3010,8 @@ static inline u32 i915_reset_engine_count(struct i915_gpu_error *error,
 	return READ_ONCE(error->reset_engine_count[engine->id]);
 }
 
-struct i915_request *
-i915_gem_reset_prepare_engine(struct intel_engine_cs *engine);
-int i915_gem_reset_prepare(struct drm_i915_private *dev_priv);
-void i915_gem_reset(struct drm_i915_private *dev_priv,
-		    unsigned int stalled_mask);
-void i915_gem_reset_finish_engine(struct intel_engine_cs *engine);
-void i915_gem_reset_finish(struct drm_i915_private *dev_priv);
 void i915_gem_set_wedged(struct drm_i915_private *dev_priv);
 bool i915_gem_unset_wedged(struct drm_i915_private *dev_priv);
-void i915_gem_reset_engine(struct intel_engine_cs *engine,
-			   struct i915_request *request,
-			   bool stalled);
 
 void i915_gem_init_mmio(struct drm_i915_private *i915);
 int __must_check i915_gem_init(struct drm_i915_private *dev_priv);
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 6e5546075f17..747a08b8961a 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -28,15 +28,6 @@
 #include <drm/drmP.h>
 #include <drm/drm_vma_manager.h>
 #include <drm/i915_drm.h>
-#include "i915_drv.h"
-#include "i915_gem_clflush.h"
-#include "i915_vgpu.h"
-#include "i915_trace.h"
-#include "intel_drv.h"
-#include "intel_frontbuffer.h"
-#include "intel_mocs.h"
-#include "intel_workarounds.h"
-#include "i915_gemfs.h"
 #include <linux/dma-fence-array.h>
 #include <linux/kthread.h>
 #include <linux/reservation.h>
@@ -47,6 +38,18 @@
 #include <linux/pci.h>
 #include <linux/dma-buf.h>
 
+#include "i915_drv.h"
+#include "i915_gem_clflush.h"
+#include "i915_gemfs.h"
+#include "i915_reset.h"
+#include "i915_trace.h"
+#include "i915_vgpu.h"
+
+#include "intel_drv.h"
+#include "intel_frontbuffer.h"
+#include "intel_mocs.h"
+#include "intel_workarounds.h"
+
 static void i915_gem_flush_free_objects(struct drm_i915_private *i915);
 
 static bool cpu_write_needs_clflush(struct drm_i915_gem_object *obj)
@@ -2860,61 +2863,6 @@ i915_gem_object_pwrite_gtt(struct drm_i915_gem_object *obj,
 	return 0;
 }
 
-static void i915_gem_client_mark_guilty(struct drm_i915_file_private *file_priv,
-					const struct i915_gem_context *ctx)
-{
-	unsigned int score;
-	unsigned long prev_hang;
-
-	if (i915_gem_context_is_banned(ctx))
-		score = I915_CLIENT_SCORE_CONTEXT_BAN;
-	else
-		score = 0;
-
-	prev_hang = xchg(&file_priv->hang_timestamp, jiffies);
-	if (time_before(jiffies, prev_hang + I915_CLIENT_FAST_HANG_JIFFIES))
-		score += I915_CLIENT_SCORE_HANG_FAST;
-
-	if (score) {
-		atomic_add(score, &file_priv->ban_score);
-
-		DRM_DEBUG_DRIVER("client %s: gained %u ban score, now %u\n",
-				 ctx->name, score,
-				 atomic_read(&file_priv->ban_score));
-	}
-}
-
-static void i915_gem_context_mark_guilty(struct i915_gem_context *ctx)
-{
-	unsigned int score;
-	bool banned, bannable;
-
-	atomic_inc(&ctx->guilty_count);
-
-	bannable = i915_gem_context_is_bannable(ctx);
-	score = atomic_add_return(CONTEXT_SCORE_GUILTY, &ctx->ban_score);
-	banned = score >= CONTEXT_SCORE_BAN_THRESHOLD;
-
-	/* Cool contexts don't accumulate client ban score */
-	if (!bannable)
-		return;
-
-	if (banned) {
-		DRM_DEBUG_DRIVER("context %s: guilty %d, score %u, banned\n",
-				 ctx->name, atomic_read(&ctx->guilty_count),
-				 score);
-		i915_gem_context_set_banned(ctx);
-	}
-
-	if (!IS_ERR_OR_NULL(ctx->file_priv))
-		i915_gem_client_mark_guilty(ctx->file_priv, ctx);
-}
-
-static void i915_gem_context_mark_innocent(struct i915_gem_context *ctx)
-{
-	atomic_inc(&ctx->active_count);
-}
-
 struct i915_request *
 i915_gem_find_active_request(struct intel_engine_cs *engine)
 {
@@ -2945,376 +2893,6 @@ i915_gem_find_active_request(struct intel_engine_cs *engine)
 	return active;
 }
 
-/*
- * Ensure irq handler finishes, and not run again.
- * Also return the active request so that we only search for it once.
- */
-struct i915_request *
-i915_gem_reset_prepare_engine(struct intel_engine_cs *engine)
-{
-	struct i915_request *request;
-
-	/*
-	 * During the reset sequence, we must prevent the engine from
-	 * entering RC6. As the context state is undefined until we restart
-	 * the engine, if it does enter RC6 during the reset, the state
-	 * written to the powercontext is undefined and so we may lose
-	 * GPU state upon resume, i.e. fail to restart after a reset.
-	 */
-	intel_uncore_forcewake_get(engine->i915, FORCEWAKE_ALL);
-
-	request = engine->reset.prepare(engine);
-	if (request && request->fence.error == -EIO)
-		request = ERR_PTR(-EIO); /* Previous reset failed! */
-
-	return request;
-}
-
-int i915_gem_reset_prepare(struct drm_i915_private *dev_priv)
-{
-	struct intel_engine_cs *engine;
-	struct i915_request *request;
-	enum intel_engine_id id;
-	int err = 0;
-
-	for_each_engine(engine, dev_priv, id) {
-		request = i915_gem_reset_prepare_engine(engine);
-		if (IS_ERR(request)) {
-			err = PTR_ERR(request);
-			continue;
-		}
-
-		engine->hangcheck.active_request = request;
-	}
-
-	i915_gem_revoke_fences(dev_priv);
-	intel_uc_sanitize(dev_priv);
-
-	return err;
-}
-
-static void engine_skip_context(struct i915_request *request)
-{
-	struct intel_engine_cs *engine = request->engine;
-	struct i915_gem_context *hung_ctx = request->gem_context;
-	struct i915_timeline *timeline = request->timeline;
-	unsigned long flags;
-
-	GEM_BUG_ON(timeline == &engine->timeline);
-
-	spin_lock_irqsave(&engine->timeline.lock, flags);
-	spin_lock(&timeline->lock);
-
-	list_for_each_entry_continue(request, &engine->timeline.requests, link)
-		if (request->gem_context == hung_ctx)
-			i915_request_skip(request, -EIO);
-
-	list_for_each_entry(request, &timeline->requests, link)
-		i915_request_skip(request, -EIO);
-
-	spin_unlock(&timeline->lock);
-	spin_unlock_irqrestore(&engine->timeline.lock, flags);
-}
-
-/* Returns the request if it was guilty of the hang */
-static struct i915_request *
-i915_gem_reset_request(struct intel_engine_cs *engine,
-		       struct i915_request *request,
-		       bool stalled)
-{
-	/* The guilty request will get skipped on a hung engine.
-	 *
-	 * Users of client default contexts do not rely on logical
-	 * state preserved between batches so it is safe to execute
-	 * queued requests following the hang. Non default contexts
-	 * rely on preserved state, so skipping a batch loses the
-	 * evolution of the state and it needs to be considered corrupted.
-	 * Executing more queued batches on top of corrupted state is
-	 * risky. But we take the risk by trying to advance through
-	 * the queued requests in order to make the client behaviour
-	 * more predictable around resets, by not throwing away random
-	 * amount of batches it has prepared for execution. Sophisticated
-	 * clients can use gem_reset_stats_ioctl and dma fence status
-	 * (exported via sync_file info ioctl on explicit fences) to observe
-	 * when it loses the context state and should rebuild accordingly.
-	 *
-	 * The context ban, and ultimately the client ban, mechanism are safety
-	 * valves if client submission ends up resulting in nothing more than
-	 * subsequent hangs.
-	 */
-
-	if (i915_request_completed(request)) {
-		GEM_TRACE("%s pardoned global=%d (fence %llx:%lld), current %d\n",
-			  engine->name, request->global_seqno,
-			  request->fence.context, request->fence.seqno,
-			  intel_engine_get_seqno(engine));
-		stalled = false;
-	}
-
-	if (stalled) {
-		i915_gem_context_mark_guilty(request->gem_context);
-		i915_request_skip(request, -EIO);
-
-		/* If this context is now banned, skip all pending requests. */
-		if (i915_gem_context_is_banned(request->gem_context))
-			engine_skip_context(request);
-	} else {
-		/*
-		 * Since this is not the hung engine, it may have advanced
-		 * since the hang declaration. Double check by refinding
-		 * the active request at the time of the reset.
-		 */
-		request = i915_gem_find_active_request(engine);
-		if (request) {
-			unsigned long flags;
-
-			i915_gem_context_mark_innocent(request->gem_context);
-			dma_fence_set_error(&request->fence, -EAGAIN);
-
-			/* Rewind the engine to replay the incomplete rq */
-			spin_lock_irqsave(&engine->timeline.lock, flags);
-			request = list_prev_entry(request, link);
-			if (&request->link == &engine->timeline.requests)
-				request = NULL;
-			spin_unlock_irqrestore(&engine->timeline.lock, flags);
-		}
-	}
-
-	return request;
-}
-
-void i915_gem_reset_engine(struct intel_engine_cs *engine,
-			   struct i915_request *request,
-			   bool stalled)
-{
-	if (request)
-		request = i915_gem_reset_request(engine, request, stalled);
-
-	/* Setup the CS to resume from the breadcrumb of the hung request */
-	engine->reset.reset(engine, request);
-}
-
-void i915_gem_reset(struct drm_i915_private *dev_priv,
-		    unsigned int stalled_mask)
-{
-	struct intel_engine_cs *engine;
-	enum intel_engine_id id;
-
-	lockdep_assert_held(&dev_priv->drm.struct_mutex);
-
-	i915_retire_requests(dev_priv);
-
-	for_each_engine(engine, dev_priv, id) {
-		struct intel_context *ce;
-
-		i915_gem_reset_engine(engine,
-				      engine->hangcheck.active_request,
-				      stalled_mask & ENGINE_MASK(id));
-		ce = fetch_and_zero(&engine->last_retired_context);
-		if (ce)
-			intel_context_unpin(ce);
-
-		/*
-		 * Ostensibily, we always want a context loaded for powersaving,
-		 * so if the engine is idle after the reset, send a request
-		 * to load our scratch kernel_context.
-		 *
-		 * More mysteriously, if we leave the engine idle after a reset,
-		 * the next userspace batch may hang, with what appears to be
-		 * an incoherent read by the CS (presumably stale TLB). An
-		 * empty request appears sufficient to paper over the glitch.
-		 */
-		if (intel_engine_is_idle(engine)) {
-			struct i915_request *rq;
-
-			rq = i915_request_alloc(engine,
-						dev_priv->kernel_context);
-			if (!IS_ERR(rq))
-				i915_request_add(rq);
-		}
-	}
-
-	i915_gem_restore_fences(dev_priv);
-}
-
-void i915_gem_reset_finish_engine(struct intel_engine_cs *engine)
-{
-	engine->reset.finish(engine);
-
-	intel_uncore_forcewake_put(engine->i915, FORCEWAKE_ALL);
-}
-
-void i915_gem_reset_finish(struct drm_i915_private *dev_priv)
-{
-	struct intel_engine_cs *engine;
-	enum intel_engine_id id;
-
-	lockdep_assert_held(&dev_priv->drm.struct_mutex);
-
-	for_each_engine(engine, dev_priv, id) {
-		engine->hangcheck.active_request = NULL;
-		i915_gem_reset_finish_engine(engine);
-	}
-}
-
-static void nop_submit_request(struct i915_request *request)
-{
-	unsigned long flags;
-
-	GEM_TRACE("%s fence %llx:%lld -> -EIO\n",
-		  request->engine->name,
-		  request->fence.context, request->fence.seqno);
-	dma_fence_set_error(&request->fence, -EIO);
-
-	spin_lock_irqsave(&request->engine->timeline.lock, flags);
-	__i915_request_submit(request);
-	intel_engine_write_global_seqno(request->engine, request->global_seqno);
-	spin_unlock_irqrestore(&request->engine->timeline.lock, flags);
-}
-
-void i915_gem_set_wedged(struct drm_i915_private *i915)
-{
-	struct i915_gpu_error *error = &i915->gpu_error;
-	struct intel_engine_cs *engine;
-	enum intel_engine_id id;
-
-	mutex_lock(&error->wedge_mutex);
-	if (test_bit(I915_WEDGED, &error->flags)) {
-		mutex_unlock(&error->wedge_mutex);
-		return;
-	}
-
-	if (GEM_SHOW_DEBUG()) {
-		struct drm_printer p = drm_debug_printer(__func__);
-
-		for_each_engine(engine, i915, id)
-			intel_engine_dump(engine, &p, "%s\n", engine->name);
-	}
-
-	GEM_TRACE("start\n");
-
-	/*
-	 * First, stop submission to hw, but do not yet complete requests by
-	 * rolling the global seqno forward (since this would complete requests
-	 * for which we haven't set the fence error to EIO yet).
-	 */
-	for_each_engine(engine, i915, id)
-		i915_gem_reset_prepare_engine(engine);
-
-	/* Even if the GPU reset fails, it should still stop the engines */
-	if (INTEL_GEN(i915) >= 5)
-		intel_gpu_reset(i915, ALL_ENGINES);
-
-	for_each_engine(engine, i915, id) {
-		engine->submit_request = nop_submit_request;
-		engine->schedule = NULL;
-	}
-	i915->caps.scheduler = 0;
-
-	/*
-	 * Make sure no request can slip through without getting completed by
-	 * either this call here to intel_engine_write_global_seqno, or the one
-	 * in nop_submit_request.
-	 */
-	synchronize_rcu();
-
-	/* Mark all executing requests as skipped */
-	for_each_engine(engine, i915, id)
-		engine->cancel_requests(engine);
-
-	for_each_engine(engine, i915, id) {
-		i915_gem_reset_finish_engine(engine);
-		intel_engine_wakeup(engine);
-	}
-
-	smp_mb__before_atomic();
-	set_bit(I915_WEDGED, &error->flags);
-
-	GEM_TRACE("end\n");
-	mutex_unlock(&error->wedge_mutex);
-
-	wake_up_all(&error->reset_queue);
-}
-
-bool i915_gem_unset_wedged(struct drm_i915_private *i915)
-{
-	struct i915_gpu_error *error = &i915->gpu_error;
-	struct i915_timeline *tl;
-	bool ret = false;
-
-	lockdep_assert_held(&i915->drm.struct_mutex);
-
-	if (!test_bit(I915_WEDGED, &error->flags))
-		return true;
-
-	if (!i915->gt.scratch) /* Never full initialised, recovery impossible */
-		return false;
-
-	mutex_lock(&error->wedge_mutex);
-
-	GEM_TRACE("start\n");
-
-	/*
-	 * Before unwedging, make sure that all pending operations
-	 * are flushed and errored out - we may have requests waiting upon
-	 * third party fences. We marked all inflight requests as EIO, and
-	 * every execbuf since returned EIO, for consistency we want all
-	 * the currently pending requests to also be marked as EIO, which
-	 * is done inside our nop_submit_request - and so we must wait.
-	 *
-	 * No more can be submitted until we reset the wedged bit.
-	 */
-	list_for_each_entry(tl, &i915->gt.timelines, link) {
-		struct i915_request *rq;
-
-		rq = i915_gem_active_peek(&tl->last_request,
-					  &i915->drm.struct_mutex);
-		if (!rq)
-			continue;
-
-		/*
-		 * We can't use our normal waiter as we want to
-		 * avoid recursively trying to handle the current
-		 * reset. The basic dma_fence_default_wait() installs
-		 * a callback for dma_fence_signal(), which is
-		 * triggered by our nop handler (indirectly, the
-		 * callback enables the signaler thread which is
-		 * woken by the nop_submit_request() advancing the seqno
-		 * and when the seqno passes the fence, the signaler
-		 * then signals the fence waking us up).
-		 */
-		if (dma_fence_default_wait(&rq->fence, true,
-					   MAX_SCHEDULE_TIMEOUT) < 0)
-			goto unlock;
-	}
-	i915_retire_requests(i915);
-	GEM_BUG_ON(i915->gt.active_requests);
-
-	intel_engines_sanitize(i915, false);
-
-	/*
-	 * Undo nop_submit_request. We prevent all new i915 requests from
-	 * being queued (by disallowing execbuf whilst wedged) so having
-	 * waited for all active requests above, we know the system is idle
-	 * and do not have to worry about a thread being inside
-	 * engine->submit_request() as we swap over. So unlike installing
-	 * the nop_submit_request on reset, we can do this from normal
-	 * context and do not require stop_machine().
-	 */
-	intel_engines_reset_default_submission(i915);
-	i915_gem_contexts_lost(i915);
-
-	GEM_TRACE("end\n");
-
-	smp_mb__before_atomic(); /* complete takeover before enabling execbuf */
-	clear_bit(I915_WEDGED, &i915->gpu_error.flags);
-	ret = true;
-unlock:
-	mutex_unlock(&i915->gpu_error.wedge_mutex);
-
-	return ret;
-}
-
 static void
 i915_gem_retire_work_handler(struct work_struct *work)
 {
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 2f35f13d177d..45c7c8b6c7c8 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -38,6 +38,7 @@
 
 #include "i915_drv.h"
 #include "i915_vgpu.h"
+#include "i915_reset.h"
 #include "i915_trace.h"
 #include "intel_drv.h"
 #include "intel_frontbuffer.h"
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index 3272bd02c3cf..d1727bcac776 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -2931,46 +2931,6 @@ static irqreturn_t gen8_irq_handler(int irq, void *arg)
 	return IRQ_HANDLED;
 }
 
-struct wedge_me {
-	struct delayed_work work;
-	struct drm_i915_private *i915;
-	const char *name;
-};
-
-static void wedge_me(struct work_struct *work)
-{
-	struct wedge_me *w = container_of(work, typeof(*w), work.work);
-
-	dev_err(w->i915->drm.dev,
-		"%s timed out, cancelling all in-flight rendering.\n",
-		w->name);
-	i915_gem_set_wedged(w->i915);
-}
-
-static void __init_wedge(struct wedge_me *w,
-			 struct drm_i915_private *i915,
-			 long timeout,
-			 const char *name)
-{
-	w->i915 = i915;
-	w->name = name;
-
-	INIT_DELAYED_WORK_ONSTACK(&w->work, wedge_me);
-	schedule_delayed_work(&w->work, timeout);
-}
-
-static void __fini_wedge(struct wedge_me *w)
-{
-	cancel_delayed_work_sync(&w->work);
-	destroy_delayed_work_on_stack(&w->work);
-	w->i915 = NULL;
-}
-
-#define i915_wedge_on_timeout(W, DEV, TIMEOUT)				\
-	for (__init_wedge((W), (DEV), (TIMEOUT), __func__);		\
-	     (W)->i915;							\
-	     __fini_wedge((W)))
-
 static u32
 gen11_gt_engine_identity(struct drm_i915_private * const i915,
 			 const unsigned int bank, const unsigned int bit)
@@ -3181,204 +3141,6 @@ static irqreturn_t gen11_irq_handler(int irq, void *arg)
 	return IRQ_HANDLED;
 }
 
-static void i915_reset_device(struct drm_i915_private *dev_priv,
-			      u32 engine_mask,
-			      const char *reason)
-{
-	struct i915_gpu_error *error = &dev_priv->gpu_error;
-	struct kobject *kobj = &dev_priv->drm.primary->kdev->kobj;
-	char *error_event[] = { I915_ERROR_UEVENT "=1", NULL };
-	char *reset_event[] = { I915_RESET_UEVENT "=1", NULL };
-	char *reset_done_event[] = { I915_ERROR_UEVENT "=0", NULL };
-	struct wedge_me w;
-
-	kobject_uevent_env(kobj, KOBJ_CHANGE, error_event);
-
-	DRM_DEBUG_DRIVER("resetting chip\n");
-	kobject_uevent_env(kobj, KOBJ_CHANGE, reset_event);
-
-	/* Use a watchdog to ensure that our reset completes */
-	i915_wedge_on_timeout(&w, dev_priv, 5*HZ) {
-		intel_prepare_reset(dev_priv);
-
-		error->reason = reason;
-		error->stalled_mask = engine_mask;
-
-		/* Signal that locked waiters should reset the GPU */
-		smp_mb__before_atomic();
-		set_bit(I915_RESET_HANDOFF, &error->flags);
-		wake_up_all(&error->wait_queue);
-
-		/* Wait for anyone holding the lock to wakeup, without
-		 * blocking indefinitely on struct_mutex.
-		 */
-		do {
-			if (mutex_trylock(&dev_priv->drm.struct_mutex)) {
-				i915_reset(dev_priv, engine_mask, reason);
-				mutex_unlock(&dev_priv->drm.struct_mutex);
-			}
-		} while (wait_on_bit_timeout(&error->flags,
-					     I915_RESET_HANDOFF,
-					     TASK_UNINTERRUPTIBLE,
-					     1));
-
-		error->stalled_mask = 0;
-		error->reason = NULL;
-
-		intel_finish_reset(dev_priv);
-	}
-
-	if (!test_bit(I915_WEDGED, &error->flags))
-		kobject_uevent_env(kobj, KOBJ_CHANGE, reset_done_event);
-}
-
-void i915_clear_error_registers(struct drm_i915_private *dev_priv)
-{
-	u32 eir;
-
-	if (!IS_GEN(dev_priv, 2))
-		I915_WRITE(PGTBL_ER, I915_READ(PGTBL_ER));
-
-	if (INTEL_GEN(dev_priv) < 4)
-		I915_WRITE(IPEIR, I915_READ(IPEIR));
-	else
-		I915_WRITE(IPEIR_I965, I915_READ(IPEIR_I965));
-
-	I915_WRITE(EIR, I915_READ(EIR));
-	eir = I915_READ(EIR);
-	if (eir) {
-		/*
-		 * some errors might have become stuck,
-		 * mask them.
-		 */
-		DRM_DEBUG_DRIVER("EIR stuck: 0x%08x, masking\n", eir);
-		I915_WRITE(EMR, I915_READ(EMR) | eir);
-		I915_WRITE(IIR, I915_MASTER_ERROR_INTERRUPT);
-	}
-
-	if (INTEL_GEN(dev_priv) >= 8) {
-		I915_WRITE(GEN8_RING_FAULT_REG,
-			   I915_READ(GEN8_RING_FAULT_REG) & ~RING_FAULT_VALID);
-		POSTING_READ(GEN8_RING_FAULT_REG);
-	} else if (INTEL_GEN(dev_priv) >= 6) {
-		struct intel_engine_cs *engine;
-		enum intel_engine_id id;
-
-		for_each_engine(engine, dev_priv, id) {
-			I915_WRITE(RING_FAULT_REG(engine),
-				   I915_READ(RING_FAULT_REG(engine)) &
-				   ~RING_FAULT_VALID);
-		}
-		POSTING_READ(RING_FAULT_REG(dev_priv->engine[RCS]));
-	}
-}
-
-/**
- * i915_handle_error - handle a gpu error
- * @dev_priv: i915 device private
- * @engine_mask: mask representing engines that are hung
- * @flags: control flags
- * @fmt: Error message format string
- *
- * Do some basic checking of register state at error time and
- * dump it to the syslog.  Also call i915_capture_error_state() to make
- * sure we get a record and make it available in debugfs.  Fire a uevent
- * so userspace knows something bad happened (should trigger collection
- * of a ring dump etc.).
- */
-void i915_handle_error(struct drm_i915_private *dev_priv,
-		       u32 engine_mask,
-		       unsigned long flags,
-		       const char *fmt, ...)
-{
-	struct intel_engine_cs *engine;
-	intel_wakeref_t wakeref;
-	unsigned int tmp;
-	char error_msg[80];
-	char *msg = NULL;
-
-	if (fmt) {
-		va_list args;
-
-		va_start(args, fmt);
-		vscnprintf(error_msg, sizeof(error_msg), fmt, args);
-		va_end(args);
-
-		msg = error_msg;
-	}
-
-	/*
-	 * In most cases it's guaranteed that we get here with an RPM
-	 * reference held, for example because there is a pending GPU
-	 * request that won't finish until the reset is done. This
-	 * isn't the case at least when we get here by doing a
-	 * simulated reset via debugfs, so get an RPM reference.
-	 */
-	wakeref = intel_runtime_pm_get(dev_priv);
-
-	engine_mask &= INTEL_INFO(dev_priv)->ring_mask;
-
-	if (flags & I915_ERROR_CAPTURE) {
-		i915_capture_error_state(dev_priv, engine_mask, msg);
-		i915_clear_error_registers(dev_priv);
-	}
-
-	/*
-	 * Try engine reset when available. We fall back to full reset if
-	 * single reset fails.
-	 */
-	if (intel_has_reset_engine(dev_priv) &&
-	    !i915_terminally_wedged(&dev_priv->gpu_error)) {
-		for_each_engine_masked(engine, dev_priv, engine_mask, tmp) {
-			BUILD_BUG_ON(I915_RESET_MODESET >= I915_RESET_ENGINE);
-			if (test_and_set_bit(I915_RESET_ENGINE + engine->id,
-					     &dev_priv->gpu_error.flags))
-				continue;
-
-			if (i915_reset_engine(engine, msg) == 0)
-				engine_mask &= ~intel_engine_flag(engine);
-
-			clear_bit(I915_RESET_ENGINE + engine->id,
-				  &dev_priv->gpu_error.flags);
-			wake_up_bit(&dev_priv->gpu_error.flags,
-				    I915_RESET_ENGINE + engine->id);
-		}
-	}
-
-	if (!engine_mask)
-		goto out;
-
-	/* Full reset needs the mutex, stop any other user trying to do so. */
-	if (test_and_set_bit(I915_RESET_BACKOFF, &dev_priv->gpu_error.flags)) {
-		wait_event(dev_priv->gpu_error.reset_queue,
-			   !test_bit(I915_RESET_BACKOFF,
-				     &dev_priv->gpu_error.flags));
-		goto out;
-	}
-
-	/* Prevent any other reset-engine attempt. */
-	for_each_engine(engine, dev_priv, tmp) {
-		while (test_and_set_bit(I915_RESET_ENGINE + engine->id,
-					&dev_priv->gpu_error.flags))
-			wait_on_bit(&dev_priv->gpu_error.flags,
-				    I915_RESET_ENGINE + engine->id,
-				    TASK_UNINTERRUPTIBLE);
-	}
-
-	i915_reset_device(dev_priv, engine_mask, msg);
-
-	for_each_engine(engine, dev_priv, tmp) {
-		clear_bit(I915_RESET_ENGINE + engine->id,
-			  &dev_priv->gpu_error.flags);
-	}
-
-	clear_bit(I915_RESET_BACKOFF, &dev_priv->gpu_error.flags);
-	wake_up_all(&dev_priv->gpu_error.reset_queue);
-
-out:
-	intel_runtime_pm_put(dev_priv, wakeref);
-}
-
 /* Called from drm generic code, passed 'crtc' which
  * we use as a pipe index
  */
diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
index 1e158eb8cb97..e23611433695 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -29,6 +29,7 @@
 #include <linux/sched/signal.h>
 
 #include "i915_drv.h"
+#include "i915_reset.h"
 
 static const char *i915_fence_get_driver_name(struct dma_fence *fence)
 {
diff --git a/drivers/gpu/drm/i915/i915_reset.c b/drivers/gpu/drm/i915/i915_reset.c
new file mode 100644
index 000000000000..e2e40b44a9a8
--- /dev/null
+++ b/drivers/gpu/drm/i915/i915_reset.c
@@ -0,0 +1,1389 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2008-2018 Intel Corporation
+ */
+
+#include <linux/sched/mm.h>
+
+#include "i915_drv.h"
+#include "i915_gpu_error.h"
+#include "i915_reset.h"
+
+#include "intel_guc.h"
+
+static void engine_skip_context(struct i915_request *rq)
+{
+	struct intel_engine_cs *engine = rq->engine;
+	struct i915_gem_context *hung_ctx = rq->gem_context;
+	struct i915_timeline *timeline = rq->timeline;
+	unsigned long flags;
+
+	GEM_BUG_ON(timeline == &engine->timeline);
+
+	spin_lock_irqsave(&engine->timeline.lock, flags);
+	spin_lock(&timeline->lock);
+
+	list_for_each_entry_continue(rq, &engine->timeline.requests, link)
+		if (rq->gem_context == hung_ctx)
+			i915_request_skip(rq, -EIO);
+
+	list_for_each_entry(rq, &timeline->requests, link)
+		i915_request_skip(rq, -EIO);
+
+	spin_unlock(&timeline->lock);
+	spin_unlock_irqrestore(&engine->timeline.lock, flags);
+}
+
+static void client_mark_guilty(struct drm_i915_file_private *file_priv,
+			       const struct i915_gem_context *ctx)
+{
+	unsigned int score;
+	unsigned long prev_hang;
+
+	if (i915_gem_context_is_banned(ctx))
+		score = I915_CLIENT_SCORE_CONTEXT_BAN;
+	else
+		score = 0;
+
+	prev_hang = xchg(&file_priv->hang_timestamp, jiffies);
+	if (time_before(jiffies, prev_hang + I915_CLIENT_FAST_HANG_JIFFIES))
+		score += I915_CLIENT_SCORE_HANG_FAST;
+
+	if (score) {
+		atomic_add(score, &file_priv->ban_score);
+
+		DRM_DEBUG_DRIVER("client %s: gained %u ban score, now %u\n",
+				 ctx->name, score,
+				 atomic_read(&file_priv->ban_score));
+	}
+}
+
+static void context_mark_guilty(struct i915_gem_context *ctx)
+{
+	unsigned int score;
+	bool banned, bannable;
+
+	atomic_inc(&ctx->guilty_count);
+
+	bannable = i915_gem_context_is_bannable(ctx);
+	score = atomic_add_return(CONTEXT_SCORE_GUILTY, &ctx->ban_score);
+	banned = score >= CONTEXT_SCORE_BAN_THRESHOLD;
+
+	/* Cool contexts don't accumulate client ban score */
+	if (!bannable)
+		return;
+
+	if (banned) {
+		DRM_DEBUG_DRIVER("context %s: guilty %d, score %u, banned\n",
+				 ctx->name, atomic_read(&ctx->guilty_count),
+				 score);
+		i915_gem_context_set_banned(ctx);
+	}
+
+	if (!IS_ERR_OR_NULL(ctx->file_priv))
+		client_mark_guilty(ctx->file_priv, ctx);
+}
+
+static void context_mark_innocent(struct i915_gem_context *ctx)
+{
+	atomic_inc(&ctx->active_count);
+}
+
+static void gen3_stop_engine(struct intel_engine_cs *engine)
+{
+	struct drm_i915_private *dev_priv = engine->i915;
+	const u32 base = engine->mmio_base;
+
+	if (intel_engine_stop_cs(engine))
+		DRM_DEBUG_DRIVER("%s: timed out on STOP_RING\n", engine->name);
+
+	I915_WRITE_FW(RING_HEAD(base), I915_READ_FW(RING_TAIL(base)));
+	POSTING_READ_FW(RING_HEAD(base)); /* paranoia */
+
+	I915_WRITE_FW(RING_HEAD(base), 0);
+	I915_WRITE_FW(RING_TAIL(base), 0);
+	POSTING_READ_FW(RING_TAIL(base));
+
+	/* The ring must be empty before it is disabled */
+	I915_WRITE_FW(RING_CTL(base), 0);
+
+	/* Check acts as a post */
+	if (I915_READ_FW(RING_HEAD(base)) != 0)
+		DRM_DEBUG_DRIVER("%s: ring head not parked\n",
+				 engine->name);
+}
+
+static void i915_stop_engines(struct drm_i915_private *i915,
+			      unsigned int engine_mask)
+{
+	struct intel_engine_cs *engine;
+	enum intel_engine_id id;
+
+	if (INTEL_GEN(i915) < 3)
+		return;
+
+	for_each_engine_masked(engine, i915, engine_mask, id)
+		gen3_stop_engine(engine);
+}
+
+static bool i915_in_reset(struct pci_dev *pdev)
+{
+	u8 gdrst;
+
+	pci_read_config_byte(pdev, I915_GDRST, &gdrst);
+	return gdrst & GRDOM_RESET_STATUS;
+}
+
+static int i915_do_reset(struct drm_i915_private *i915,
+			 unsigned int engine_mask,
+			 unsigned int retry)
+{
+	struct pci_dev *pdev = i915->drm.pdev;
+	int err;
+
+	/* Assert reset for at least 20 usec, and wait for acknowledgement. */
+	pci_write_config_byte(pdev, I915_GDRST, GRDOM_RESET_ENABLE);
+	usleep_range(50, 200);
+	err = wait_for(i915_in_reset(pdev), 500);
+
+	/* Clear the reset request. */
+	pci_write_config_byte(pdev, I915_GDRST, 0);
+	usleep_range(50, 200);
+	if (!err)
+		err = wait_for(!i915_in_reset(pdev), 500);
+
+	return err;
+}
+
+static bool g4x_reset_complete(struct pci_dev *pdev)
+{
+	u8 gdrst;
+
+	pci_read_config_byte(pdev, I915_GDRST, &gdrst);
+	return (gdrst & GRDOM_RESET_ENABLE) == 0;
+}
+
+static int g33_do_reset(struct drm_i915_private *i915,
+			unsigned int engine_mask,
+			unsigned int retry)
+{
+	struct pci_dev *pdev = i915->drm.pdev;
+
+	pci_write_config_byte(pdev, I915_GDRST, GRDOM_RESET_ENABLE);
+	return wait_for(g4x_reset_complete(pdev), 500);
+}
+
+static int g4x_do_reset(struct drm_i915_private *dev_priv,
+			unsigned int engine_mask,
+			unsigned int retry)
+{
+	struct pci_dev *pdev = dev_priv->drm.pdev;
+	int ret;
+
+	/* WaVcpClkGateDisableForMediaReset:ctg,elk */
+	I915_WRITE(VDECCLK_GATE_D,
+		   I915_READ(VDECCLK_GATE_D) | VCP_UNIT_CLOCK_GATE_DISABLE);
+	POSTING_READ(VDECCLK_GATE_D);
+
+	pci_write_config_byte(pdev, I915_GDRST,
+			      GRDOM_MEDIA | GRDOM_RESET_ENABLE);
+	ret =  wait_for(g4x_reset_complete(pdev), 500);
+	if (ret) {
+		DRM_DEBUG_DRIVER("Wait for media reset failed\n");
+		goto out;
+	}
+
+	pci_write_config_byte(pdev, I915_GDRST,
+			      GRDOM_RENDER | GRDOM_RESET_ENABLE);
+	ret =  wait_for(g4x_reset_complete(pdev), 500);
+	if (ret) {
+		DRM_DEBUG_DRIVER("Wait for render reset failed\n");
+		goto out;
+	}
+
+out:
+	pci_write_config_byte(pdev, I915_GDRST, 0);
+
+	I915_WRITE(VDECCLK_GATE_D,
+		   I915_READ(VDECCLK_GATE_D) & ~VCP_UNIT_CLOCK_GATE_DISABLE);
+	POSTING_READ(VDECCLK_GATE_D);
+
+	return ret;
+}
+
+static int ironlake_do_reset(struct drm_i915_private *dev_priv,
+			     unsigned int engine_mask,
+			     unsigned int retry)
+{
+	int ret;
+
+	I915_WRITE(ILK_GDSR, ILK_GRDOM_RENDER | ILK_GRDOM_RESET_ENABLE);
+	ret = intel_wait_for_register(dev_priv,
+				      ILK_GDSR, ILK_GRDOM_RESET_ENABLE, 0,
+				      500);
+	if (ret) {
+		DRM_DEBUG_DRIVER("Wait for render reset failed\n");
+		goto out;
+	}
+
+	I915_WRITE(ILK_GDSR, ILK_GRDOM_MEDIA | ILK_GRDOM_RESET_ENABLE);
+	ret = intel_wait_for_register(dev_priv,
+				      ILK_GDSR, ILK_GRDOM_RESET_ENABLE, 0,
+				      500);
+	if (ret) {
+		DRM_DEBUG_DRIVER("Wait for media reset failed\n");
+		goto out;
+	}
+
+out:
+	I915_WRITE(ILK_GDSR, 0);
+	POSTING_READ(ILK_GDSR);
+	return ret;
+}
+
+/* Reset the hardware domains (GENX_GRDOM_*) specified by mask */
+static int gen6_hw_domain_reset(struct drm_i915_private *dev_priv,
+				u32 hw_domain_mask)
+{
+	int err;
+
+	/*
+	 * GEN6_GDRST is not in the gt power well, no need to check
+	 * for fifo space for the write or forcewake the chip for
+	 * the read
+	 */
+	I915_WRITE_FW(GEN6_GDRST, hw_domain_mask);
+
+	/* Wait for the device to ack the reset requests */
+	err = __intel_wait_for_register_fw(dev_priv,
+					   GEN6_GDRST, hw_domain_mask, 0,
+					   500, 0,
+					   NULL);
+	if (err)
+		DRM_DEBUG_DRIVER("Wait for 0x%08x engines reset failed\n",
+				 hw_domain_mask);
+
+	return err;
+}
+
+static int gen6_reset_engines(struct drm_i915_private *i915,
+			      unsigned int engine_mask,
+			      unsigned int retry)
+{
+	struct intel_engine_cs *engine;
+	const u32 hw_engine_mask[I915_NUM_ENGINES] = {
+		[RCS] = GEN6_GRDOM_RENDER,
+		[BCS] = GEN6_GRDOM_BLT,
+		[VCS] = GEN6_GRDOM_MEDIA,
+		[VCS2] = GEN8_GRDOM_MEDIA2,
+		[VECS] = GEN6_GRDOM_VECS,
+	};
+	u32 hw_mask;
+
+	if (engine_mask == ALL_ENGINES) {
+		hw_mask = GEN6_GRDOM_FULL;
+	} else {
+		unsigned int tmp;
+
+		hw_mask = 0;
+		for_each_engine_masked(engine, i915, engine_mask, tmp)
+			hw_mask |= hw_engine_mask[engine->id];
+	}
+
+	return gen6_hw_domain_reset(i915, hw_mask);
+}
+
+static u32 gen11_lock_sfc(struct drm_i915_private *dev_priv,
+			  struct intel_engine_cs *engine)
+{
+	u8 vdbox_sfc_access = RUNTIME_INFO(dev_priv)->vdbox_sfc_access;
+	i915_reg_t sfc_forced_lock, sfc_forced_lock_ack;
+	u32 sfc_forced_lock_bit, sfc_forced_lock_ack_bit;
+	i915_reg_t sfc_usage;
+	u32 sfc_usage_bit;
+	u32 sfc_reset_bit;
+
+	switch (engine->class) {
+	case VIDEO_DECODE_CLASS:
+		if ((BIT(engine->instance) & vdbox_sfc_access) == 0)
+			return 0;
+
+		sfc_forced_lock = GEN11_VCS_SFC_FORCED_LOCK(engine);
+		sfc_forced_lock_bit = GEN11_VCS_SFC_FORCED_LOCK_BIT;
+
+		sfc_forced_lock_ack = GEN11_VCS_SFC_LOCK_STATUS(engine);
+		sfc_forced_lock_ack_bit  = GEN11_VCS_SFC_LOCK_ACK_BIT;
+
+		sfc_usage = GEN11_VCS_SFC_LOCK_STATUS(engine);
+		sfc_usage_bit = GEN11_VCS_SFC_USAGE_BIT;
+		sfc_reset_bit = GEN11_VCS_SFC_RESET_BIT(engine->instance);
+		break;
+
+	case VIDEO_ENHANCEMENT_CLASS:
+		sfc_forced_lock = GEN11_VECS_SFC_FORCED_LOCK(engine);
+		sfc_forced_lock_bit = GEN11_VECS_SFC_FORCED_LOCK_BIT;
+
+		sfc_forced_lock_ack = GEN11_VECS_SFC_LOCK_ACK(engine);
+		sfc_forced_lock_ack_bit  = GEN11_VECS_SFC_LOCK_ACK_BIT;
+
+		sfc_usage = GEN11_VECS_SFC_USAGE(engine);
+		sfc_usage_bit = GEN11_VECS_SFC_USAGE_BIT;
+		sfc_reset_bit = GEN11_VECS_SFC_RESET_BIT(engine->instance);
+		break;
+
+	default:
+		return 0;
+	}
+
+	/*
+	 * Tell the engine that a software reset is going to happen. The engine
+	 * will then try to force lock the SFC (if currently locked, it will
+	 * remain so until we tell the engine it is safe to unlock; if currently
+	 * unlocked, it will ignore this and all new lock requests). If SFC
+	 * ends up being locked to the engine we want to reset, we have to reset
+	 * it as well (we will unlock it once the reset sequence is completed).
+	 */
+	I915_WRITE_FW(sfc_forced_lock,
+		      I915_READ_FW(sfc_forced_lock) | sfc_forced_lock_bit);
+
+	if (__intel_wait_for_register_fw(dev_priv,
+					 sfc_forced_lock_ack,
+					 sfc_forced_lock_ack_bit,
+					 sfc_forced_lock_ack_bit,
+					 1000, 0, NULL)) {
+		DRM_DEBUG_DRIVER("Wait for SFC forced lock ack failed\n");
+		return 0;
+	}
+
+	if (I915_READ_FW(sfc_usage) & sfc_usage_bit)
+		return sfc_reset_bit;
+
+	return 0;
+}
+
+static void gen11_unlock_sfc(struct drm_i915_private *dev_priv,
+			     struct intel_engine_cs *engine)
+{
+	u8 vdbox_sfc_access = RUNTIME_INFO(dev_priv)->vdbox_sfc_access;
+	i915_reg_t sfc_forced_lock;
+	u32 sfc_forced_lock_bit;
+
+	switch (engine->class) {
+	case VIDEO_DECODE_CLASS:
+		if ((BIT(engine->instance) & vdbox_sfc_access) == 0)
+			return;
+
+		sfc_forced_lock = GEN11_VCS_SFC_FORCED_LOCK(engine);
+		sfc_forced_lock_bit = GEN11_VCS_SFC_FORCED_LOCK_BIT;
+		break;
+
+	case VIDEO_ENHANCEMENT_CLASS:
+		sfc_forced_lock = GEN11_VECS_SFC_FORCED_LOCK(engine);
+		sfc_forced_lock_bit = GEN11_VECS_SFC_FORCED_LOCK_BIT;
+		break;
+
+	default:
+		return;
+	}
+
+	I915_WRITE_FW(sfc_forced_lock,
+		      I915_READ_FW(sfc_forced_lock) & ~sfc_forced_lock_bit);
+}
+
+static int gen11_reset_engines(struct drm_i915_private *i915,
+			       unsigned int engine_mask,
+			       unsigned int retry)
+{
+	const u32 hw_engine_mask[I915_NUM_ENGINES] = {
+		[RCS] = GEN11_GRDOM_RENDER,
+		[BCS] = GEN11_GRDOM_BLT,
+		[VCS] = GEN11_GRDOM_MEDIA,
+		[VCS2] = GEN11_GRDOM_MEDIA2,
+		[VCS3] = GEN11_GRDOM_MEDIA3,
+		[VCS4] = GEN11_GRDOM_MEDIA4,
+		[VECS] = GEN11_GRDOM_VECS,
+		[VECS2] = GEN11_GRDOM_VECS2,
+	};
+	struct intel_engine_cs *engine;
+	unsigned int tmp;
+	u32 hw_mask;
+	int ret;
+
+	BUILD_BUG_ON(VECS2 + 1 != I915_NUM_ENGINES);
+
+	if (engine_mask == ALL_ENGINES) {
+		hw_mask = GEN11_GRDOM_FULL;
+	} else {
+		hw_mask = 0;
+		for_each_engine_masked(engine, i915, engine_mask, tmp) {
+			hw_mask |= hw_engine_mask[engine->id];
+			hw_mask |= gen11_lock_sfc(i915, engine);
+		}
+	}
+
+	ret = gen6_hw_domain_reset(i915, hw_mask);
+
+	if (engine_mask != ALL_ENGINES)
+		for_each_engine_masked(engine, i915, engine_mask, tmp)
+			gen11_unlock_sfc(i915, engine);
+
+	return ret;
+}
+
+static int gen8_engine_reset_prepare(struct intel_engine_cs *engine)
+{
+	struct drm_i915_private *dev_priv = engine->i915;
+	int ret;
+
+	I915_WRITE_FW(RING_RESET_CTL(engine->mmio_base),
+		      _MASKED_BIT_ENABLE(RESET_CTL_REQUEST_RESET));
+
+	ret = __intel_wait_for_register_fw(dev_priv,
+					   RING_RESET_CTL(engine->mmio_base),
+					   RESET_CTL_READY_TO_RESET,
+					   RESET_CTL_READY_TO_RESET,
+					   700, 0,
+					   NULL);
+	if (ret)
+		DRM_ERROR("%s: reset request timeout\n", engine->name);
+
+	return ret;
+}
+
+static void gen8_engine_reset_cancel(struct intel_engine_cs *engine)
+{
+	struct drm_i915_private *dev_priv = engine->i915;
+
+	I915_WRITE_FW(RING_RESET_CTL(engine->mmio_base),
+		      _MASKED_BIT_DISABLE(RESET_CTL_REQUEST_RESET));
+}
+
+static int gen8_reset_engines(struct drm_i915_private *i915,
+			      unsigned int engine_mask,
+			      unsigned int retry)
+{
+	struct intel_engine_cs *engine;
+	const bool reset_non_ready = retry >= 1;
+	unsigned int tmp;
+	int ret;
+
+	for_each_engine_masked(engine, i915, engine_mask, tmp) {
+		ret = gen8_engine_reset_prepare(engine);
+		if (ret && !reset_non_ready)
+			goto skip_reset;
+
+		/*
+		 * If this is not the first failed attempt to prepare,
+		 * we decide to proceed anyway.
+		 *
+		 * By doing so we risk context corruption and with
+		 * some gens (kbl), possible system hang if reset
+		 * happens during active bb execution.
+		 *
+		 * We rather take context corruption instead of
+		 * failed reset with a wedged driver/gpu. And
+		 * active bb execution case should be covered by
+		 * i915_stop_engines we have before the reset.
+		 */
+	}
+
+	if (INTEL_GEN(i915) >= 11)
+		ret = gen11_reset_engines(i915, engine_mask, retry);
+	else
+		ret = gen6_reset_engines(i915, engine_mask, retry);
+
+skip_reset:
+	for_each_engine_masked(engine, i915, engine_mask, tmp)
+		gen8_engine_reset_cancel(engine);
+
+	return ret;
+}
+
+typedef int (*reset_func)(struct drm_i915_private *,
+			  unsigned int engine_mask,
+			  unsigned int retry);
+
+static reset_func intel_get_gpu_reset(struct drm_i915_private *i915)
+{
+	if (!i915_modparams.reset)
+		return NULL;
+
+	if (INTEL_GEN(i915) >= 8)
+		return gen8_reset_engines;
+	else if (INTEL_GEN(i915) >= 6)
+		return gen6_reset_engines;
+	else if (INTEL_GEN(i915) >= 5)
+		return ironlake_do_reset;
+	else if (IS_G4X(i915))
+		return g4x_do_reset;
+	else if (IS_G33(i915) || IS_PINEVIEW(i915))
+		return g33_do_reset;
+	else if (INTEL_GEN(i915) >= 3)
+		return i915_do_reset;
+	else
+		return NULL;
+}
+
+int intel_gpu_reset(struct drm_i915_private *i915, unsigned int engine_mask)
+{
+	reset_func reset = intel_get_gpu_reset(i915);
+	int retry;
+	int ret;
+
+	/*
+	 * We want to perform per-engine reset from atomic context (e.g.
+	 * softirq), which imposes the constraint that we cannot sleep.
+	 * However, experience suggests that spending a bit of time waiting
+	 * for a reset helps in various cases, so for a full-device reset
+	 * we apply the opposite rule and wait if we want to. As we should
+	 * always follow up a failed per-engine reset with a full device reset,
+	 * being a little faster, stricter and more error prone for the
+	 * atomic case seems an acceptable compromise.
+	 *
+	 * Unfortunately this leads to a bimodal routine, when the goal was
+	 * to have a single reset function that worked for resetting any
+	 * number of engines simultaneously.
+	 */
+	might_sleep_if(engine_mask == ALL_ENGINES);
+
+	/*
+	 * If the power well sleeps during the reset, the reset
+	 * request may be dropped and never completes (causing -EIO).
+	 */
+	intel_uncore_forcewake_get(i915, FORCEWAKE_ALL);
+	for (retry = 0; retry < 3; retry++) {
+		/*
+		 * We stop engines, otherwise we might get failed reset and a
+		 * dead gpu (on elk). Also as modern gpu as kbl can suffer
+		 * from system hang if batchbuffer is progressing when
+		 * the reset is issued, regardless of READY_TO_RESET ack.
+		 * Thus assume it is best to stop engines on all gens
+		 * where we have a gpu reset.
+		 *
+		 * WaKBLVECSSemaphoreWaitPoll:kbl (on ALL_ENGINES)
+		 *
+		 * WaMediaResetMainRingCleanup:ctg,elk (presumably)
+		 *
+		 * FIXME: Wa for more modern gens needs to be validated
+		 */
+		i915_stop_engines(i915, engine_mask);
+
+		ret = -ENODEV;
+		if (reset) {
+			GEM_TRACE("engine_mask=%x\n", engine_mask);
+			ret = reset(i915, engine_mask, retry);
+		}
+		if (ret != -ETIMEDOUT || engine_mask != ALL_ENGINES)
+			break;
+
+		cond_resched();
+	}
+	intel_uncore_forcewake_put(i915, FORCEWAKE_ALL);
+
+	return ret;
+}
+
+bool intel_has_gpu_reset(struct drm_i915_private *i915)
+{
+	return intel_get_gpu_reset(i915);
+}
+
+bool intel_has_reset_engine(struct drm_i915_private *i915)
+{
+	return INTEL_INFO(i915)->has_reset_engine && i915_modparams.reset >= 2;
+}
+
+int intel_reset_guc(struct drm_i915_private *i915)
+{
+	u32 guc_domain =
+		INTEL_GEN(i915) >= 11 ? GEN11_GRDOM_GUC : GEN9_GRDOM_GUC;
+	int ret;
+
+	GEM_BUG_ON(!HAS_GUC(i915));
+
+	intel_uncore_forcewake_get(i915, FORCEWAKE_ALL);
+	ret = gen6_hw_domain_reset(i915, guc_domain);
+	intel_uncore_forcewake_put(i915, FORCEWAKE_ALL);
+
+	return ret;
+}
+
+/*
+ * Ensure irq handler finishes, and not run again.
+ * Also return the active request so that we only search for it once.
+ */
+static struct i915_request *
+reset_prepare_engine(struct intel_engine_cs *engine)
+{
+	struct i915_request *rq;
+
+	/*
+	 * During the reset sequence, we must prevent the engine from
+	 * entering RC6. As the context state is undefined until we restart
+	 * the engine, if it does enter RC6 during the reset, the state
+	 * written to the powercontext is undefined and so we may lose
+	 * GPU state upon resume, i.e. fail to restart after a reset.
+	 */
+	intel_uncore_forcewake_get(engine->i915, FORCEWAKE_ALL);
+
+	rq = engine->reset.prepare(engine);
+	if (rq && rq->fence.error == -EIO)
+		rq = ERR_PTR(-EIO); /* Previous reset failed! */
+
+	return rq;
+}
+
+static int reset_prepare(struct drm_i915_private *i915)
+{
+	struct intel_engine_cs *engine;
+	struct i915_request *rq;
+	enum intel_engine_id id;
+	int err = 0;
+
+	for_each_engine(engine, i915, id) {
+		rq = reset_prepare_engine(engine);
+		if (IS_ERR(rq)) {
+			err = PTR_ERR(rq);
+			continue;
+		}
+
+		engine->hangcheck.active_request = rq;
+	}
+
+	i915_gem_revoke_fences(i915);
+	intel_uc_sanitize(i915);
+
+	return err;
+}
+
+/* Returns the request if it was guilty of the hang */
+static struct i915_request *
+reset_request(struct intel_engine_cs *engine,
+	      struct i915_request *rq,
+	      bool stalled)
+{
+	/*
+	 * The guilty request will get skipped on a hung engine.
+	 *
+	 * Users of client default contexts do not rely on logical
+	 * state preserved between batches so it is safe to execute
+	 * queued requests following the hang. Non default contexts
+	 * rely on preserved state, so skipping a batch loses the
+	 * evolution of the state and it needs to be considered corrupted.
+	 * Executing more queued batches on top of corrupted state is
+	 * risky. But we take the risk by trying to advance through
+	 * the queued requests in order to make the client behaviour
+	 * more predictable around resets, by not throwing away random
+	 * amount of batches it has prepared for execution. Sophisticated
+	 * clients can use gem_reset_stats_ioctl and dma fence status
+	 * (exported via sync_file info ioctl on explicit fences) to observe
+	 * when it loses the context state and should rebuild accordingly.
+	 *
+	 * The context ban, and ultimately the client ban, mechanism are safety
+	 * valves if client submission ends up resulting in nothing more than
+	 * subsequent hangs.
+	 */
+
+	if (i915_request_completed(rq)) {
+		GEM_TRACE("%s pardoned global=%d (fence %llx:%lld), current %d\n",
+			  engine->name, rq->global_seqno,
+			  rq->fence.context, rq->fence.seqno,
+			  intel_engine_get_seqno(engine));
+		stalled = false;
+	}
+
+	if (stalled) {
+		context_mark_guilty(rq->gem_context);
+		i915_request_skip(rq, -EIO);
+
+		/* If this context is now banned, skip all pending requests. */
+		if (i915_gem_context_is_banned(rq->gem_context))
+			engine_skip_context(rq);
+	} else {
+		/*
+		 * Since this is not the hung engine, it may have advanced
+		 * since the hang declaration. Double check by refinding
+		 * the active request at the time of the reset.
+		 */
+		rq = i915_gem_find_active_request(engine);
+		if (rq) {
+			unsigned long flags;
+
+			context_mark_innocent(rq->gem_context);
+			dma_fence_set_error(&rq->fence, -EAGAIN);
+
+			/* Rewind the engine to replay the incomplete rq */
+			spin_lock_irqsave(&engine->timeline.lock, flags);
+			rq = list_prev_entry(rq, link);
+			if (&rq->link == &engine->timeline.requests)
+				rq = NULL;
+			spin_unlock_irqrestore(&engine->timeline.lock, flags);
+		}
+	}
+
+	return rq;
+}
+
+static void reset_engine(struct intel_engine_cs *engine,
+			 struct i915_request *rq,
+			 bool stalled)
+{
+	if (rq)
+		rq = reset_request(engine, rq, stalled);
+
+	/* Setup the CS to resume from the breadcrumb of the hung request */
+	engine->reset.reset(engine, rq);
+}
+
+static void gt_reset(struct drm_i915_private *i915, unsigned int stalled_mask)
+{
+	struct intel_engine_cs *engine;
+	enum intel_engine_id id;
+
+	lockdep_assert_held(&i915->drm.struct_mutex);
+
+	i915_retire_requests(i915);
+
+	for_each_engine(engine, i915, id) {
+		struct intel_context *ce;
+
+		reset_engine(engine,
+			     engine->hangcheck.active_request,
+			     stalled_mask & ENGINE_MASK(id));
+		ce = fetch_and_zero(&engine->last_retired_context);
+		if (ce)
+			intel_context_unpin(ce);
+
+		/*
+		 * Ostensibily, we always want a context loaded for powersaving,
+		 * so if the engine is idle after the reset, send a request
+		 * to load our scratch kernel_context.
+		 *
+		 * More mysteriously, if we leave the engine idle after a reset,
+		 * the next userspace batch may hang, with what appears to be
+		 * an incoherent read by the CS (presumably stale TLB). An
+		 * empty request appears sufficient to paper over the glitch.
+		 */
+		if (intel_engine_is_idle(engine)) {
+			struct i915_request *rq;
+
+			rq = i915_request_alloc(engine, i915->kernel_context);
+			if (!IS_ERR(rq))
+				i915_request_add(rq);
+		}
+	}
+
+	i915_gem_restore_fences(i915);
+}
+
+static void reset_finish_engine(struct intel_engine_cs *engine)
+{
+	engine->reset.finish(engine);
+
+	intel_uncore_forcewake_put(engine->i915, FORCEWAKE_ALL);
+}
+
+static void reset_finish(struct drm_i915_private *i915)
+{
+	struct intel_engine_cs *engine;
+	enum intel_engine_id id;
+
+	lockdep_assert_held(&i915->drm.struct_mutex);
+
+	for_each_engine(engine, i915, id) {
+		engine->hangcheck.active_request = NULL;
+		reset_finish_engine(engine);
+	}
+}
+
+static void nop_submit_request(struct i915_request *request)
+{
+	unsigned long flags;
+
+	GEM_TRACE("%s fence %llx:%lld -> -EIO\n",
+		  request->engine->name,
+		  request->fence.context, request->fence.seqno);
+	dma_fence_set_error(&request->fence, -EIO);
+
+	spin_lock_irqsave(&request->engine->timeline.lock, flags);
+	__i915_request_submit(request);
+	intel_engine_write_global_seqno(request->engine, request->global_seqno);
+	spin_unlock_irqrestore(&request->engine->timeline.lock, flags);
+}
+
+void i915_gem_set_wedged(struct drm_i915_private *i915)
+{
+	struct i915_gpu_error *error = &i915->gpu_error;
+	struct intel_engine_cs *engine;
+	enum intel_engine_id id;
+
+	mutex_lock(&error->wedge_mutex);
+	if (test_bit(I915_WEDGED, &error->flags)) {
+		mutex_unlock(&error->wedge_mutex);
+		return;
+	}
+
+	if (GEM_SHOW_DEBUG()) {
+		struct drm_printer p = drm_debug_printer(__func__);
+
+		for_each_engine(engine, i915, id)
+			intel_engine_dump(engine, &p, "%s\n", engine->name);
+	}
+
+	GEM_TRACE("start\n");
+
+	/*
+	 * First, stop submission to hw, but do not yet complete requests by
+	 * rolling the global seqno forward (since this would complete requests
+	 * for which we haven't set the fence error to EIO yet).
+	 */
+	for_each_engine(engine, i915, id)
+		reset_prepare_engine(engine);
+
+	/* Even if the GPU reset fails, it should still stop the engines */
+	if (INTEL_GEN(i915) >= 5)
+		intel_gpu_reset(i915, ALL_ENGINES);
+
+	for_each_engine(engine, i915, id) {
+		engine->submit_request = nop_submit_request;
+		engine->schedule = NULL;
+	}
+	i915->caps.scheduler = 0;
+
+	/*
+	 * Make sure no request can slip through without getting completed by
+	 * either this call here to intel_engine_write_global_seqno, or the one
+	 * in nop_submit_request.
+	 */
+	synchronize_rcu();
+
+	/* Mark all executing requests as skipped */
+	for_each_engine(engine, i915, id)
+		engine->cancel_requests(engine);
+
+	for_each_engine(engine, i915, id) {
+		reset_finish_engine(engine);
+		intel_engine_wakeup(engine);
+	}
+
+	smp_mb__before_atomic();
+	set_bit(I915_WEDGED, &error->flags);
+
+	GEM_TRACE("end\n");
+	mutex_unlock(&error->wedge_mutex);
+
+	wake_up_all(&error->reset_queue);
+}
+
+bool i915_gem_unset_wedged(struct drm_i915_private *i915)
+{
+	struct i915_gpu_error *error = &i915->gpu_error;
+	struct i915_timeline *tl;
+	bool ret = false;
+
+	lockdep_assert_held(&i915->drm.struct_mutex);
+
+	if (!test_bit(I915_WEDGED, &error->flags))
+		return true;
+
+	if (!i915->gt.scratch) /* Never full initialised, recovery impossible */
+		return false;
+
+	mutex_lock(&error->wedge_mutex);
+
+	GEM_TRACE("start\n");
+
+	/*
+	 * Before unwedging, make sure that all pending operations
+	 * are flushed and errored out - we may have requests waiting upon
+	 * third party fences. We marked all inflight requests as EIO, and
+	 * every execbuf since returned EIO, for consistency we want all
+	 * the currently pending requests to also be marked as EIO, which
+	 * is done inside our nop_submit_request - and so we must wait.
+	 *
+	 * No more can be submitted until we reset the wedged bit.
+	 */
+	list_for_each_entry(tl, &i915->gt.timelines, link) {
+		struct i915_request *rq;
+
+		rq = i915_gem_active_peek(&tl->last_request,
+					  &i915->drm.struct_mutex);
+		if (!rq)
+			continue;
+
+		/*
+		 * We can't use our normal waiter as we want to
+		 * avoid recursively trying to handle the current
+		 * reset. The basic dma_fence_default_wait() installs
+		 * a callback for dma_fence_signal(), which is
+		 * triggered by our nop handler (indirectly, the
+		 * callback enables the signaler thread which is
+		 * woken by the nop_submit_request() advancing the seqno
+		 * and when the seqno passes the fence, the signaler
+		 * then signals the fence waking us up).
+		 */
+		if (dma_fence_default_wait(&rq->fence, true,
+					   MAX_SCHEDULE_TIMEOUT) < 0)
+			goto unlock;
+	}
+	i915_retire_requests(i915);
+	GEM_BUG_ON(i915->gt.active_requests);
+
+	intel_engines_sanitize(i915, false);
+
+	/*
+	 * Undo nop_submit_request. We prevent all new i915 requests from
+	 * being queued (by disallowing execbuf whilst wedged) so having
+	 * waited for all active requests above, we know the system is idle
+	 * and do not have to worry about a thread being inside
+	 * engine->submit_request() as we swap over. So unlike installing
+	 * the nop_submit_request on reset, we can do this from normal
+	 * context and do not require stop_machine().
+	 */
+	intel_engines_reset_default_submission(i915);
+	i915_gem_contexts_lost(i915);
+
+	GEM_TRACE("end\n");
+
+	smp_mb__before_atomic(); /* complete takeover before enabling execbuf */
+	clear_bit(I915_WEDGED, &i915->gpu_error.flags);
+	ret = true;
+unlock:
+	mutex_unlock(&i915->gpu_error.wedge_mutex);
+
+	return ret;
+}
+
+/**
+ * i915_reset - reset chip after a hang
+ * @i915: #drm_i915_private to reset
+ * @stalled_mask: mask of the stalled engines with the guilty requests
+ * @reason: user error message for why we are resetting
+ *
+ * Reset the chip.  Useful if a hang is detected. Marks the device as wedged
+ * on failure.
+ *
+ * Caller must hold the struct_mutex.
+ *
+ * Procedure is fairly simple:
+ *   - reset the chip using the reset reg
+ *   - re-init context state
+ *   - re-init hardware status page
+ *   - re-init ring buffer
+ *   - re-init interrupt state
+ *   - re-init display
+ */
+void i915_reset(struct drm_i915_private *i915,
+		unsigned int stalled_mask,
+		const char *reason)
+{
+	struct i915_gpu_error *error = &i915->gpu_error;
+	int ret;
+	int i;
+
+	GEM_TRACE("flags=%lx\n", error->flags);
+
+	might_sleep();
+	lockdep_assert_held(&i915->drm.struct_mutex);
+	assert_rpm_wakelock_held(i915);
+	GEM_BUG_ON(!test_bit(I915_RESET_BACKOFF, &error->flags));
+
+	if (!test_bit(I915_RESET_HANDOFF, &error->flags))
+		return;
+
+	/* Clear any previous failed attempts at recovery. Time to try again. */
+	if (!i915_gem_unset_wedged(i915))
+		goto wakeup;
+
+	if (reason)
+		dev_notice(i915->drm.dev, "Resetting chip for %s\n", reason);
+	error->reset_count++;
+
+	ret = reset_prepare(i915);
+	if (ret) {
+		dev_err(i915->drm.dev, "GPU recovery failed\n");
+		goto taint;
+	}
+
+	if (!intel_has_gpu_reset(i915)) {
+		if (i915_modparams.reset)
+			dev_err(i915->drm.dev, "GPU reset not supported\n");
+		else
+			DRM_DEBUG_DRIVER("GPU reset disabled\n");
+		goto error;
+	}
+
+	for (i = 0; i < 3; i++) {
+		ret = intel_gpu_reset(i915, ALL_ENGINES);
+		if (ret == 0)
+			break;
+
+		msleep(100);
+	}
+	if (ret) {
+		dev_err(i915->drm.dev, "Failed to reset chip\n");
+		goto taint;
+	}
+
+	/* Ok, now get things going again... */
+
+	/*
+	 * Everything depends on having the GTT running, so we need to start
+	 * there.
+	 */
+	ret = i915_ggtt_enable_hw(i915);
+	if (ret) {
+		DRM_ERROR("Failed to re-enable GGTT following reset (%d)\n",
+			  ret);
+		goto error;
+	}
+
+	gt_reset(i915, stalled_mask);
+	intel_overlay_reset(i915);
+
+	/*
+	 * Next we need to restore the context, but we don't use those
+	 * yet either...
+	 *
+	 * Ring buffer needs to be re-initialized in the KMS case, or if X
+	 * was running at the time of the reset (i.e. we weren't VT
+	 * switched away).
+	 */
+	ret = i915_gem_init_hw(i915);
+	if (ret) {
+		DRM_ERROR("Failed to initialise HW following reset (%d)\n",
+			  ret);
+		goto error;
+	}
+
+	i915_queue_hangcheck(i915);
+
+finish:
+	reset_finish(i915);
+wakeup:
+	clear_bit(I915_RESET_HANDOFF, &error->flags);
+	wake_up_bit(&error->flags, I915_RESET_HANDOFF);
+	return;
+
+taint:
+	/*
+	 * History tells us that if we cannot reset the GPU now, we
+	 * never will. This then impacts everything that is run
+	 * subsequently. On failing the reset, we mark the driver
+	 * as wedged, preventing further execution on the GPU.
+	 * We also want to go one step further and add a taint to the
+	 * kernel so that any subsequent faults can be traced back to
+	 * this failure. This is important for CI, where if the
+	 * GPU/driver fails we would like to reboot and restart testing
+	 * rather than continue on into oblivion. For everyone else,
+	 * the system should still plod along, but they have been warned!
+	 */
+	add_taint(TAINT_WARN, LOCKDEP_STILL_OK);
+error:
+	i915_gem_set_wedged(i915);
+	i915_retire_requests(i915);
+	goto finish;
+}
+
+static inline int intel_gt_reset_engine(struct drm_i915_private *i915,
+					struct intel_engine_cs *engine)
+{
+	return intel_gpu_reset(i915, intel_engine_flag(engine));
+}
+
+/**
+ * i915_reset_engine - reset GPU engine to recover from a hang
+ * @engine: engine to reset
+ * @msg: reason for GPU reset; or NULL for no dev_notice()
+ *
+ * Reset a specific GPU engine. Useful if a hang is detected.
+ * Returns zero on successful reset or otherwise an error code.
+ *
+ * Procedure is:
+ *  - identifies the request that caused the hang and it is dropped
+ *  - reset engine (which will force the engine to idle)
+ *  - re-init/configure engine
+ */
+int i915_reset_engine(struct intel_engine_cs *engine, const char *msg)
+{
+	struct i915_gpu_error *error = &engine->i915->gpu_error;
+	struct i915_request *active_request;
+	int ret;
+
+	GEM_TRACE("%s flags=%lx\n", engine->name, error->flags);
+	GEM_BUG_ON(!test_bit(I915_RESET_ENGINE + engine->id, &error->flags));
+
+	active_request = reset_prepare_engine(engine);
+	if (IS_ERR_OR_NULL(active_request)) {
+		/* Either the previous reset failed, or we pardon the reset. */
+		ret = PTR_ERR(active_request);
+		goto out;
+	}
+
+	if (msg)
+		dev_notice(engine->i915->drm.dev,
+			   "Resetting %s for %s\n", engine->name, msg);
+	error->reset_engine_count[engine->id]++;
+
+	if (!engine->i915->guc.execbuf_client)
+		ret = intel_gt_reset_engine(engine->i915, engine);
+	else
+		ret = intel_guc_reset_engine(&engine->i915->guc, engine);
+	if (ret) {
+		/* If we fail here, we expect to fallback to a global reset */
+		DRM_DEBUG_DRIVER("%sFailed to reset %s, ret=%d\n",
+				 engine->i915->guc.execbuf_client ? "GuC " : "",
+				 engine->name, ret);
+		goto out;
+	}
+
+	/*
+	 * The request that caused the hang is stuck on elsp, we know the
+	 * active request and can drop it, adjust head to skip the offending
+	 * request to resume executing remaining requests in the queue.
+	 */
+	reset_engine(engine, active_request, true);
+
+	/*
+	 * The engine and its registers (and workarounds in case of render)
+	 * have been reset to their default values. Follow the init_ring
+	 * process to program RING_MODE, HWSP and re-enable submission.
+	 */
+	ret = engine->init_hw(engine);
+	if (ret)
+		goto out;
+
+out:
+	intel_engine_cancel_stop_cs(engine);
+	reset_finish_engine(engine);
+	return ret;
+}
+
+static void i915_reset_device(struct drm_i915_private *i915,
+			      u32 engine_mask,
+			      const char *reason)
+{
+	struct i915_gpu_error *error = &i915->gpu_error;
+	struct kobject *kobj = &i915->drm.primary->kdev->kobj;
+	char *error_event[] = { I915_ERROR_UEVENT "=1", NULL };
+	char *reset_event[] = { I915_RESET_UEVENT "=1", NULL };
+	char *reset_done_event[] = { I915_ERROR_UEVENT "=0", NULL };
+	struct i915_wedge_me w;
+
+	kobject_uevent_env(kobj, KOBJ_CHANGE, error_event);
+
+	DRM_DEBUG_DRIVER("resetting chip\n");
+	kobject_uevent_env(kobj, KOBJ_CHANGE, reset_event);
+
+	/* Use a watchdog to ensure that our reset completes */
+	i915_wedge_on_timeout(&w, i915, 5 * HZ) {
+		intel_prepare_reset(i915);
+
+		error->reason = reason;
+		error->stalled_mask = engine_mask;
+
+		/* Signal that locked waiters should reset the GPU */
+		smp_mb__before_atomic();
+		set_bit(I915_RESET_HANDOFF, &error->flags);
+		wake_up_all(&error->wait_queue);
+
+		/*
+		 * Wait for anyone holding the lock to wakeup, without
+		 * blocking indefinitely on struct_mutex.
+		 */
+		do {
+			if (mutex_trylock(&i915->drm.struct_mutex)) {
+				i915_reset(i915, engine_mask, reason);
+				mutex_unlock(&i915->drm.struct_mutex);
+			}
+		} while (wait_on_bit_timeout(&error->flags,
+					     I915_RESET_HANDOFF,
+					     TASK_UNINTERRUPTIBLE,
+					     1));
+
+		error->stalled_mask = 0;
+		error->reason = NULL;
+
+		intel_finish_reset(i915);
+	}
+
+	if (!test_bit(I915_WEDGED, &error->flags))
+		kobject_uevent_env(kobj, KOBJ_CHANGE, reset_done_event);
+}
+
+void i915_clear_error_registers(struct drm_i915_private *dev_priv)
+{
+	u32 eir;
+
+	if (!IS_GEN(dev_priv, 2))
+		I915_WRITE(PGTBL_ER, I915_READ(PGTBL_ER));
+
+	if (INTEL_GEN(dev_priv) < 4)
+		I915_WRITE(IPEIR, I915_READ(IPEIR));
+	else
+		I915_WRITE(IPEIR_I965, I915_READ(IPEIR_I965));
+
+	I915_WRITE(EIR, I915_READ(EIR));
+	eir = I915_READ(EIR);
+	if (eir) {
+		/*
+		 * some errors might have become stuck,
+		 * mask them.
+		 */
+		DRM_DEBUG_DRIVER("EIR stuck: 0x%08x, masking\n", eir);
+		I915_WRITE(EMR, I915_READ(EMR) | eir);
+		I915_WRITE(IIR, I915_MASTER_ERROR_INTERRUPT);
+	}
+
+	if (INTEL_GEN(dev_priv) >= 8) {
+		I915_WRITE(GEN8_RING_FAULT_REG,
+			   I915_READ(GEN8_RING_FAULT_REG) & ~RING_FAULT_VALID);
+		POSTING_READ(GEN8_RING_FAULT_REG);
+	} else if (INTEL_GEN(dev_priv) >= 6) {
+		struct intel_engine_cs *engine;
+		enum intel_engine_id id;
+
+		for_each_engine(engine, dev_priv, id) {
+			I915_WRITE(RING_FAULT_REG(engine),
+				   I915_READ(RING_FAULT_REG(engine)) &
+				   ~RING_FAULT_VALID);
+		}
+		POSTING_READ(RING_FAULT_REG(dev_priv->engine[RCS]));
+	}
+}
+
+/**
+ * i915_handle_error - handle a gpu error
+ * @i915: i915 device private
+ * @engine_mask: mask representing engines that are hung
+ * @flags: control flags
+ * @fmt: Error message format string
+ *
+ * Do some basic checking of register state at error time and
+ * dump it to the syslog.  Also call i915_capture_error_state() to make
+ * sure we get a record and make it available in debugfs.  Fire a uevent
+ * so userspace knows something bad happened (should trigger collection
+ * of a ring dump etc.).
+ */
+void i915_handle_error(struct drm_i915_private *i915,
+		       u32 engine_mask,
+		       unsigned long flags,
+		       const char *fmt, ...)
+{
+	struct intel_engine_cs *engine;
+	intel_wakeref_t wakeref;
+	unsigned int tmp;
+	char error_msg[80];
+	char *msg = NULL;
+
+	if (fmt) {
+		va_list args;
+
+		va_start(args, fmt);
+		vscnprintf(error_msg, sizeof(error_msg), fmt, args);
+		va_end(args);
+
+		msg = error_msg;
+	}
+
+	/*
+	 * In most cases it's guaranteed that we get here with an RPM
+	 * reference held, for example because there is a pending GPU
+	 * request that won't finish until the reset is done. This
+	 * isn't the case at least when we get here by doing a
+	 * simulated reset via debugfs, so get an RPM reference.
+	 */
+	wakeref = intel_runtime_pm_get(i915);
+
+	engine_mask &= INTEL_INFO(i915)->ring_mask;
+
+	if (flags & I915_ERROR_CAPTURE) {
+		i915_capture_error_state(i915, engine_mask, msg);
+		i915_clear_error_registers(i915);
+	}
+
+	/*
+	 * Try engine reset when available. We fall back to full reset if
+	 * single reset fails.
+	 */
+	if (intel_has_reset_engine(i915) &&
+	    !i915_terminally_wedged(&i915->gpu_error)) {
+		for_each_engine_masked(engine, i915, engine_mask, tmp) {
+			BUILD_BUG_ON(I915_RESET_MODESET >= I915_RESET_ENGINE);
+			if (test_and_set_bit(I915_RESET_ENGINE + engine->id,
+					     &i915->gpu_error.flags))
+				continue;
+
+			if (i915_reset_engine(engine, msg) == 0)
+				engine_mask &= ~intel_engine_flag(engine);
+
+			clear_bit(I915_RESET_ENGINE + engine->id,
+				  &i915->gpu_error.flags);
+			wake_up_bit(&i915->gpu_error.flags,
+				    I915_RESET_ENGINE + engine->id);
+		}
+	}
+
+	if (!engine_mask)
+		goto out;
+
+	/* Full reset needs the mutex, stop any other user trying to do so. */
+	if (test_and_set_bit(I915_RESET_BACKOFF, &i915->gpu_error.flags)) {
+		wait_event(i915->gpu_error.reset_queue,
+			   !test_bit(I915_RESET_BACKOFF,
+				     &i915->gpu_error.flags));
+		goto out;
+	}
+
+	/* Prevent any other reset-engine attempt. */
+	for_each_engine(engine, i915, tmp) {
+		while (test_and_set_bit(I915_RESET_ENGINE + engine->id,
+					&i915->gpu_error.flags))
+			wait_on_bit(&i915->gpu_error.flags,
+				    I915_RESET_ENGINE + engine->id,
+				    TASK_UNINTERRUPTIBLE);
+	}
+
+	i915_reset_device(i915, engine_mask, msg);
+
+	for_each_engine(engine, i915, tmp) {
+		clear_bit(I915_RESET_ENGINE + engine->id,
+			  &i915->gpu_error.flags);
+	}
+
+	clear_bit(I915_RESET_BACKOFF, &i915->gpu_error.flags);
+	wake_up_all(&i915->gpu_error.reset_queue);
+
+out:
+	intel_runtime_pm_put(i915, wakeref);
+}
+
+static void i915_wedge_me(struct work_struct *work)
+{
+	struct i915_wedge_me *w = container_of(work, typeof(*w), work.work);
+
+	dev_err(w->i915->drm.dev,
+		"%s timed out, cancelling all in-flight rendering.\n",
+		w->name);
+	i915_gem_set_wedged(w->i915);
+}
+
+void __i915_init_wedge(struct i915_wedge_me *w,
+		       struct drm_i915_private *i915,
+		       long timeout,
+		       const char *name)
+{
+	w->i915 = i915;
+	w->name = name;
+
+	INIT_DELAYED_WORK_ONSTACK(&w->work, i915_wedge_me);
+	schedule_delayed_work(&w->work, timeout);
+}
+
+void __i915_fini_wedge(struct i915_wedge_me *w)
+{
+	cancel_delayed_work_sync(&w->work);
+	destroy_delayed_work_on_stack(&w->work);
+	w->i915 = NULL;
+}
diff --git a/drivers/gpu/drm/i915/i915_reset.h b/drivers/gpu/drm/i915/i915_reset.h
new file mode 100644
index 000000000000..b6a519bde67d
--- /dev/null
+++ b/drivers/gpu/drm/i915/i915_reset.h
@@ -0,0 +1,56 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2008-2018 Intel Corporation
+ */
+
+#ifndef I915_RESET_H
+#define I915_RESET_H
+
+#include <linux/compiler.h>
+#include <linux/types.h>
+
+struct drm_i915_private;
+struct intel_engine_cs;
+struct intel_guc;
+
+__printf(4, 5)
+void i915_handle_error(struct drm_i915_private *i915,
+		       u32 engine_mask,
+		       unsigned long flags,
+		       const char *fmt, ...);
+#define I915_ERROR_CAPTURE BIT(0)
+
+void i915_clear_error_registers(struct drm_i915_private *i915);
+
+void i915_reset(struct drm_i915_private *i915,
+		unsigned int stalled_mask,
+		const char *reason);
+int i915_reset_engine(struct intel_engine_cs *engine,
+		      const char *reason);
+
+bool intel_has_gpu_reset(struct drm_i915_private *i915);
+bool intel_has_reset_engine(struct drm_i915_private *i915);
+
+int intel_gpu_reset(struct drm_i915_private *i915, u32 engine_mask);
+
+int intel_reset_guc(struct drm_i915_private *i915);
+
+struct i915_wedge_me {
+	struct delayed_work work;
+	struct drm_i915_private *i915;
+	const char *name;
+};
+
+void __i915_init_wedge(struct i915_wedge_me *w,
+		       struct drm_i915_private *i915,
+		       long timeout,
+		       const char *name);
+void __i915_fini_wedge(struct i915_wedge_me *w);
+
+#define i915_wedge_on_timeout(W, DEV, TIMEOUT)				\
+	for (__i915_init_wedge((W), (DEV), (TIMEOUT), __func__);	\
+	     (W)->i915;							\
+	     __i915_fini_wedge((W)))
+
+#endif /* I915_RESET_H */
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index ff9d404a33c8..db52e28fdf8f 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -32,13 +32,7 @@
 #include <linux/vgaarb.h>
 #include <drm/drm_edid.h>
 #include <drm/drmP.h>
-#include "intel_drv.h"
-#include "intel_frontbuffer.h"
 #include <drm/i915_drm.h>
-#include "i915_drv.h"
-#include "i915_gem_clflush.h"
-#include "intel_dsi.h"
-#include "i915_trace.h"
 #include <drm/drm_atomic.h>
 #include <drm/drm_atomic_helper.h>
 #include <drm/drm_dp_helper.h>
@@ -49,6 +43,15 @@
 #include <linux/intel-iommu.h>
 #include <linux/reservation.h>
 
+#include "intel_drv.h"
+#include "intel_dsi.h"
+#include "intel_frontbuffer.h"
+
+#include "i915_drv.h"
+#include "i915_gem_clflush.h"
+#include "i915_reset.h"
+#include "i915_trace.h"
+
 /* Primary plane formats for gen <= 3 */
 static const uint32_t i8xx_primary_formats[] = {
 	DRM_FORMAT_C8,
diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c
index bf4dae2649ab..9c943bb95cb9 100644
--- a/drivers/gpu/drm/i915/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/intel_engine_cs.c
@@ -25,6 +25,7 @@
 #include <drm/drm_print.h>
 
 #include "i915_drv.h"
+#include "i915_reset.h"
 #include "intel_ringbuffer.h"
 #include "intel_lrc.h"
 
diff --git a/drivers/gpu/drm/i915/intel_guc.h b/drivers/gpu/drm/i915/intel_guc.h
index 0f1c4f9ebfd8..744220296653 100644
--- a/drivers/gpu/drm/i915/intel_guc.h
+++ b/drivers/gpu/drm/i915/intel_guc.h
@@ -192,4 +192,7 @@ static inline void intel_guc_disable_msg(struct intel_guc *guc, u32 mask)
 	spin_unlock_irq(&guc->irq_lock);
 }
 
+int intel_guc_reset_engine(struct intel_guc *guc,
+			   struct intel_engine_cs *engine);
+
 #endif
diff --git a/drivers/gpu/drm/i915/intel_hangcheck.c b/drivers/gpu/drm/i915/intel_hangcheck.c
index 51e9efec5116..7dc11fcb13de 100644
--- a/drivers/gpu/drm/i915/intel_hangcheck.c
+++ b/drivers/gpu/drm/i915/intel_hangcheck.c
@@ -23,6 +23,7 @@
  */
 
 #include "i915_drv.h"
+#include "i915_reset.h"
 
 static bool instdone_unchanged(u32 current_instdone, u32 *old_instdone)
 {
diff --git a/drivers/gpu/drm/i915/intel_uc.c b/drivers/gpu/drm/i915/intel_uc.c
index 731b82afe636..e711eb3268bc 100644
--- a/drivers/gpu/drm/i915/intel_uc.c
+++ b/drivers/gpu/drm/i915/intel_uc.c
@@ -26,6 +26,7 @@
 #include "intel_guc_submission.h"
 #include "intel_guc.h"
 #include "i915_drv.h"
+#include "i915_reset.h"
 
 static void guc_free_load_err_log(struct intel_guc *guc);
 
diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c
index 681ea532585e..e88f0252d77e 100644
--- a/drivers/gpu/drm/i915/intel_uncore.c
+++ b/drivers/gpu/drm/i915/intel_uncore.c
@@ -1715,372 +1715,6 @@ int i915_reg_read_ioctl(struct drm_device *dev,
 	return ret;
 }
 
-static void gen3_stop_engine(struct intel_engine_cs *engine)
-{
-	struct drm_i915_private *dev_priv = engine->i915;
-	const u32 base = engine->mmio_base;
-
-	if (intel_engine_stop_cs(engine))
-		DRM_DEBUG_DRIVER("%s: timed out on STOP_RING\n", engine->name);
-
-	I915_WRITE_FW(RING_HEAD(base), I915_READ_FW(RING_TAIL(base)));
-	POSTING_READ_FW(RING_HEAD(base)); /* paranoia */
-
-	I915_WRITE_FW(RING_HEAD(base), 0);
-	I915_WRITE_FW(RING_TAIL(base), 0);
-	POSTING_READ_FW(RING_TAIL(base));
-
-	/* The ring must be empty before it is disabled */
-	I915_WRITE_FW(RING_CTL(base), 0);
-
-	/* Check acts as a post */
-	if (I915_READ_FW(RING_HEAD(base)) != 0)
-		DRM_DEBUG_DRIVER("%s: ring head not parked\n",
-				 engine->name);
-}
-
-static void i915_stop_engines(struct drm_i915_private *dev_priv,
-			      unsigned int engine_mask)
-{
-	struct intel_engine_cs *engine;
-	enum intel_engine_id id;
-
-	if (INTEL_GEN(dev_priv) < 3)
-		return;
-
-	for_each_engine_masked(engine, dev_priv, engine_mask, id)
-		gen3_stop_engine(engine);
-}
-
-static bool i915_in_reset(struct pci_dev *pdev)
-{
-	u8 gdrst;
-
-	pci_read_config_byte(pdev, I915_GDRST, &gdrst);
-	return gdrst & GRDOM_RESET_STATUS;
-}
-
-static int i915_do_reset(struct drm_i915_private *dev_priv,
-			 unsigned int engine_mask,
-			 unsigned int retry)
-{
-	struct pci_dev *pdev = dev_priv->drm.pdev;
-	int err;
-
-	/* Assert reset for at least 20 usec, and wait for acknowledgement. */
-	pci_write_config_byte(pdev, I915_GDRST, GRDOM_RESET_ENABLE);
-	usleep_range(50, 200);
-	err = wait_for(i915_in_reset(pdev), 500);
-
-	/* Clear the reset request. */
-	pci_write_config_byte(pdev, I915_GDRST, 0);
-	usleep_range(50, 200);
-	if (!err)
-		err = wait_for(!i915_in_reset(pdev), 500);
-
-	return err;
-}
-
-static bool g4x_reset_complete(struct pci_dev *pdev)
-{
-	u8 gdrst;
-
-	pci_read_config_byte(pdev, I915_GDRST, &gdrst);
-	return (gdrst & GRDOM_RESET_ENABLE) == 0;
-}
-
-static int g33_do_reset(struct drm_i915_private *dev_priv,
-			unsigned int engine_mask,
-			unsigned int retry)
-{
-	struct pci_dev *pdev = dev_priv->drm.pdev;
-
-	pci_write_config_byte(pdev, I915_GDRST, GRDOM_RESET_ENABLE);
-	return wait_for(g4x_reset_complete(pdev), 500);
-}
-
-static int g4x_do_reset(struct drm_i915_private *dev_priv,
-			unsigned int engine_mask,
-			unsigned int retry)
-{
-	struct pci_dev *pdev = dev_priv->drm.pdev;
-	int ret;
-
-	/* WaVcpClkGateDisableForMediaReset:ctg,elk */
-	I915_WRITE(VDECCLK_GATE_D,
-		   I915_READ(VDECCLK_GATE_D) | VCP_UNIT_CLOCK_GATE_DISABLE);
-	POSTING_READ(VDECCLK_GATE_D);
-
-	pci_write_config_byte(pdev, I915_GDRST,
-			      GRDOM_MEDIA | GRDOM_RESET_ENABLE);
-	ret =  wait_for(g4x_reset_complete(pdev), 500);
-	if (ret) {
-		DRM_DEBUG_DRIVER("Wait for media reset failed\n");
-		goto out;
-	}
-
-	pci_write_config_byte(pdev, I915_GDRST,
-			      GRDOM_RENDER | GRDOM_RESET_ENABLE);
-	ret =  wait_for(g4x_reset_complete(pdev), 500);
-	if (ret) {
-		DRM_DEBUG_DRIVER("Wait for render reset failed\n");
-		goto out;
-	}
-
-out:
-	pci_write_config_byte(pdev, I915_GDRST, 0);
-
-	I915_WRITE(VDECCLK_GATE_D,
-		   I915_READ(VDECCLK_GATE_D) & ~VCP_UNIT_CLOCK_GATE_DISABLE);
-	POSTING_READ(VDECCLK_GATE_D);
-
-	return ret;
-}
-
-static int ironlake_do_reset(struct drm_i915_private *dev_priv,
-			     unsigned int engine_mask,
-			     unsigned int retry)
-{
-	int ret;
-
-	I915_WRITE(ILK_GDSR, ILK_GRDOM_RENDER | ILK_GRDOM_RESET_ENABLE);
-	ret = intel_wait_for_register(dev_priv,
-				      ILK_GDSR, ILK_GRDOM_RESET_ENABLE, 0,
-				      500);
-	if (ret) {
-		DRM_DEBUG_DRIVER("Wait for render reset failed\n");
-		goto out;
-	}
-
-	I915_WRITE(ILK_GDSR, ILK_GRDOM_MEDIA | ILK_GRDOM_RESET_ENABLE);
-	ret = intel_wait_for_register(dev_priv,
-				      ILK_GDSR, ILK_GRDOM_RESET_ENABLE, 0,
-				      500);
-	if (ret) {
-		DRM_DEBUG_DRIVER("Wait for media reset failed\n");
-		goto out;
-	}
-
-out:
-	I915_WRITE(ILK_GDSR, 0);
-	POSTING_READ(ILK_GDSR);
-	return ret;
-}
-
-/* Reset the hardware domains (GENX_GRDOM_*) specified by mask */
-static int gen6_hw_domain_reset(struct drm_i915_private *dev_priv,
-				u32 hw_domain_mask)
-{
-	int err;
-
-	/* GEN6_GDRST is not in the gt power well, no need to check
-	 * for fifo space for the write or forcewake the chip for
-	 * the read
-	 */
-	__raw_i915_write32(dev_priv, GEN6_GDRST, hw_domain_mask);
-
-	/* Wait for the device to ack the reset requests */
-	err = __intel_wait_for_register_fw(dev_priv,
-					   GEN6_GDRST, hw_domain_mask, 0,
-					   500, 0,
-					   NULL);
-	if (err)
-		DRM_DEBUG_DRIVER("Wait for 0x%08x engines reset failed\n",
-				 hw_domain_mask);
-
-	return err;
-}
-
-/**
- * gen6_reset_engines - reset individual engines
- * @dev_priv: i915 device
- * @engine_mask: mask of intel_ring_flag() engines or ALL_ENGINES for full reset
- * @retry: the count of of previous attempts to reset.
- *
- * This function will reset the individual engines that are set in engine_mask.
- * If you provide ALL_ENGINES as mask, full global domain reset will be issued.
- *
- * Note: It is responsibility of the caller to handle the difference between
- * asking full domain reset versus reset for all available individual engines.
- *
- * Returns 0 on success, nonzero on error.
- */
-static int gen6_reset_engines(struct drm_i915_private *dev_priv,
-			      unsigned int engine_mask,
-			      unsigned int retry)
-{
-	struct intel_engine_cs *engine;
-	const u32 hw_engine_mask[I915_NUM_ENGINES] = {
-		[RCS] = GEN6_GRDOM_RENDER,
-		[BCS] = GEN6_GRDOM_BLT,
-		[VCS] = GEN6_GRDOM_MEDIA,
-		[VCS2] = GEN8_GRDOM_MEDIA2,
-		[VECS] = GEN6_GRDOM_VECS,
-	};
-	u32 hw_mask;
-
-	if (engine_mask == ALL_ENGINES) {
-		hw_mask = GEN6_GRDOM_FULL;
-	} else {
-		unsigned int tmp;
-
-		hw_mask = 0;
-		for_each_engine_masked(engine, dev_priv, engine_mask, tmp)
-			hw_mask |= hw_engine_mask[engine->id];
-	}
-
-	return gen6_hw_domain_reset(dev_priv, hw_mask);
-}
-
-static u32 gen11_lock_sfc(struct drm_i915_private *dev_priv,
-			  struct intel_engine_cs *engine)
-{
-	u8 vdbox_sfc_access = RUNTIME_INFO(dev_priv)->vdbox_sfc_access;
-	i915_reg_t sfc_forced_lock, sfc_forced_lock_ack;
-	u32 sfc_forced_lock_bit, sfc_forced_lock_ack_bit;
-	i915_reg_t sfc_usage;
-	u32 sfc_usage_bit;
-	u32 sfc_reset_bit;
-
-	switch (engine->class) {
-	case VIDEO_DECODE_CLASS:
-		if ((BIT(engine->instance) & vdbox_sfc_access) == 0)
-			return 0;
-
-		sfc_forced_lock = GEN11_VCS_SFC_FORCED_LOCK(engine);
-		sfc_forced_lock_bit = GEN11_VCS_SFC_FORCED_LOCK_BIT;
-
-		sfc_forced_lock_ack = GEN11_VCS_SFC_LOCK_STATUS(engine);
-		sfc_forced_lock_ack_bit  = GEN11_VCS_SFC_LOCK_ACK_BIT;
-
-		sfc_usage = GEN11_VCS_SFC_LOCK_STATUS(engine);
-		sfc_usage_bit = GEN11_VCS_SFC_USAGE_BIT;
-		sfc_reset_bit = GEN11_VCS_SFC_RESET_BIT(engine->instance);
-		break;
-
-	case VIDEO_ENHANCEMENT_CLASS:
-		sfc_forced_lock = GEN11_VECS_SFC_FORCED_LOCK(engine);
-		sfc_forced_lock_bit = GEN11_VECS_SFC_FORCED_LOCK_BIT;
-
-		sfc_forced_lock_ack = GEN11_VECS_SFC_LOCK_ACK(engine);
-		sfc_forced_lock_ack_bit  = GEN11_VECS_SFC_LOCK_ACK_BIT;
-
-		sfc_usage = GEN11_VECS_SFC_USAGE(engine);
-		sfc_usage_bit = GEN11_VECS_SFC_USAGE_BIT;
-		sfc_reset_bit = GEN11_VECS_SFC_RESET_BIT(engine->instance);
-		break;
-
-	default:
-		return 0;
-	}
-
-	/*
-	 * Tell the engine that a software reset is going to happen. The engine
-	 * will then try to force lock the SFC (if currently locked, it will
-	 * remain so until we tell the engine it is safe to unlock; if currently
-	 * unlocked, it will ignore this and all new lock requests). If SFC
-	 * ends up being locked to the engine we want to reset, we have to reset
-	 * it as well (we will unlock it once the reset sequence is completed).
-	 */
-	I915_WRITE_FW(sfc_forced_lock,
-		      I915_READ_FW(sfc_forced_lock) | sfc_forced_lock_bit);
-
-	if (__intel_wait_for_register_fw(dev_priv,
-					 sfc_forced_lock_ack,
-					 sfc_forced_lock_ack_bit,
-					 sfc_forced_lock_ack_bit,
-					 1000, 0, NULL)) {
-		DRM_DEBUG_DRIVER("Wait for SFC forced lock ack failed\n");
-		return 0;
-	}
-
-	if (I915_READ_FW(sfc_usage) & sfc_usage_bit)
-		return sfc_reset_bit;
-
-	return 0;
-}
-
-static void gen11_unlock_sfc(struct drm_i915_private *dev_priv,
-			     struct intel_engine_cs *engine)
-{
-	u8 vdbox_sfc_access = RUNTIME_INFO(dev_priv)->vdbox_sfc_access;
-	i915_reg_t sfc_forced_lock;
-	u32 sfc_forced_lock_bit;
-
-	switch (engine->class) {
-	case VIDEO_DECODE_CLASS:
-		if ((BIT(engine->instance) & vdbox_sfc_access) == 0)
-			return;
-
-		sfc_forced_lock = GEN11_VCS_SFC_FORCED_LOCK(engine);
-		sfc_forced_lock_bit = GEN11_VCS_SFC_FORCED_LOCK_BIT;
-		break;
-
-	case VIDEO_ENHANCEMENT_CLASS:
-		sfc_forced_lock = GEN11_VECS_SFC_FORCED_LOCK(engine);
-		sfc_forced_lock_bit = GEN11_VECS_SFC_FORCED_LOCK_BIT;
-		break;
-
-	default:
-		return;
-	}
-
-	I915_WRITE_FW(sfc_forced_lock,
-		      I915_READ_FW(sfc_forced_lock) & ~sfc_forced_lock_bit);
-}
-
-/**
- * gen11_reset_engines - reset individual engines
- * @dev_priv: i915 device
- * @engine_mask: mask of intel_ring_flag() engines or ALL_ENGINES for full reset
- *
- * This function will reset the individual engines that are set in engine_mask.
- * If you provide ALL_ENGINES as mask, full global domain reset will be issued.
- *
- * Note: It is responsibility of the caller to handle the difference between
- * asking full domain reset versus reset for all available individual engines.
- *
- * Returns 0 on success, nonzero on error.
- */
-static int gen11_reset_engines(struct drm_i915_private *dev_priv,
-			       unsigned int engine_mask)
-{
-	const u32 hw_engine_mask[I915_NUM_ENGINES] = {
-		[RCS] = GEN11_GRDOM_RENDER,
-		[BCS] = GEN11_GRDOM_BLT,
-		[VCS] = GEN11_GRDOM_MEDIA,
-		[VCS2] = GEN11_GRDOM_MEDIA2,
-		[VCS3] = GEN11_GRDOM_MEDIA3,
-		[VCS4] = GEN11_GRDOM_MEDIA4,
-		[VECS] = GEN11_GRDOM_VECS,
-		[VECS2] = GEN11_GRDOM_VECS2,
-	};
-	struct intel_engine_cs *engine;
-	unsigned int tmp;
-	u32 hw_mask;
-	int ret;
-
-	BUILD_BUG_ON(VECS2 + 1 != I915_NUM_ENGINES);
-
-	if (engine_mask == ALL_ENGINES) {
-		hw_mask = GEN11_GRDOM_FULL;
-	} else {
-		hw_mask = 0;
-		for_each_engine_masked(engine, dev_priv, engine_mask, tmp) {
-			hw_mask |= hw_engine_mask[engine->id];
-			hw_mask |= gen11_lock_sfc(dev_priv, engine);
-		}
-	}
-
-	ret = gen6_hw_domain_reset(dev_priv, hw_mask);
-
-	if (engine_mask != ALL_ENGINES)
-		for_each_engine_masked(engine, dev_priv, engine_mask, tmp)
-			gen11_unlock_sfc(dev_priv, engine);
-
-	return ret;
-}
-
 /**
  * __intel_wait_for_register_fw - wait until register matches expected state
  * @dev_priv: the i915 device
@@ -2191,196 +1825,6 @@ int __intel_wait_for_register(struct drm_i915_private *dev_priv,
 	return ret;
 }
 
-static int gen8_engine_reset_prepare(struct intel_engine_cs *engine)
-{
-	struct drm_i915_private *dev_priv = engine->i915;
-	int ret;
-
-	I915_WRITE_FW(RING_RESET_CTL(engine->mmio_base),
-		      _MASKED_BIT_ENABLE(RESET_CTL_REQUEST_RESET));
-
-	ret = __intel_wait_for_register_fw(dev_priv,
-					   RING_RESET_CTL(engine->mmio_base),
-					   RESET_CTL_READY_TO_RESET,
-					   RESET_CTL_READY_TO_RESET,
-					   700, 0,
-					   NULL);
-	if (ret)
-		DRM_ERROR("%s: reset request timeout\n", engine->name);
-
-	return ret;
-}
-
-static void gen8_engine_reset_cancel(struct intel_engine_cs *engine)
-{
-	struct drm_i915_private *dev_priv = engine->i915;
-
-	I915_WRITE_FW(RING_RESET_CTL(engine->mmio_base),
-		      _MASKED_BIT_DISABLE(RESET_CTL_REQUEST_RESET));
-}
-
-static int reset_engines(struct drm_i915_private *i915,
-			 unsigned int engine_mask,
-			 unsigned int retry)
-{
-	if (INTEL_GEN(i915) >= 11)
-		return gen11_reset_engines(i915, engine_mask);
-	else
-		return gen6_reset_engines(i915, engine_mask, retry);
-}
-
-static int gen8_reset_engines(struct drm_i915_private *dev_priv,
-			      unsigned int engine_mask,
-			      unsigned int retry)
-{
-	struct intel_engine_cs *engine;
-	const bool reset_non_ready = retry >= 1;
-	unsigned int tmp;
-	int ret;
-
-	for_each_engine_masked(engine, dev_priv, engine_mask, tmp) {
-		ret = gen8_engine_reset_prepare(engine);
-		if (ret && !reset_non_ready)
-			goto skip_reset;
-
-		/*
-		 * If this is not the first failed attempt to prepare,
-		 * we decide to proceed anyway.
-		 *
-		 * By doing so we risk context corruption and with
-		 * some gens (kbl), possible system hang if reset
-		 * happens during active bb execution.
-		 *
-		 * We rather take context corruption instead of
-		 * failed reset with a wedged driver/gpu. And
-		 * active bb execution case should be covered by
-		 * i915_stop_engines we have before the reset.
-		 */
-	}
-
-	ret = reset_engines(dev_priv, engine_mask, retry);
-
-skip_reset:
-	for_each_engine_masked(engine, dev_priv, engine_mask, tmp)
-		gen8_engine_reset_cancel(engine);
-
-	return ret;
-}
-
-typedef int (*reset_func)(struct drm_i915_private *,
-			  unsigned int engine_mask, unsigned int retry);
-
-static reset_func intel_get_gpu_reset(struct drm_i915_private *dev_priv)
-{
-	if (!i915_modparams.reset)
-		return NULL;
-
-	if (INTEL_GEN(dev_priv) >= 8)
-		return gen8_reset_engines;
-	else if (INTEL_GEN(dev_priv) >= 6)
-		return gen6_reset_engines;
-	else if (IS_GEN(dev_priv, 5))
-		return ironlake_do_reset;
-	else if (IS_G4X(dev_priv))
-		return g4x_do_reset;
-	else if (IS_G33(dev_priv) || IS_PINEVIEW(dev_priv))
-		return g33_do_reset;
-	else if (INTEL_GEN(dev_priv) >= 3)
-		return i915_do_reset;
-	else
-		return NULL;
-}
-
-int intel_gpu_reset(struct drm_i915_private *dev_priv,
-		    const unsigned int engine_mask)
-{
-	reset_func reset = intel_get_gpu_reset(dev_priv);
-	unsigned int retry;
-	int ret;
-
-	GEM_BUG_ON(!engine_mask);
-
-	/*
-	 * We want to perform per-engine reset from atomic context (e.g.
-	 * softirq), which imposes the constraint that we cannot sleep.
-	 * However, experience suggests that spending a bit of time waiting
-	 * for a reset helps in various cases, so for a full-device reset
-	 * we apply the opposite rule and wait if we want to. As we should
-	 * always follow up a failed per-engine reset with a full device reset,
-	 * being a little faster, stricter and more error prone for the
-	 * atomic case seems an acceptable compromise.
-	 *
-	 * Unfortunately this leads to a bimodal routine, when the goal was
-	 * to have a single reset function that worked for resetting any
-	 * number of engines simultaneously.
-	 */
-	might_sleep_if(engine_mask == ALL_ENGINES);
-
-	/*
-	 * If the power well sleeps during the reset, the reset
-	 * request may be dropped and never completes (causing -EIO).
-	 */
-	intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
-	for (retry = 0; retry < 3; retry++) {
-
-		/*
-		 * We stop engines, otherwise we might get failed reset and a
-		 * dead gpu (on elk). Also as modern gpu as kbl can suffer
-		 * from system hang if batchbuffer is progressing when
-		 * the reset is issued, regardless of READY_TO_RESET ack.
-		 * Thus assume it is best to stop engines on all gens
-		 * where we have a gpu reset.
-		 *
-		 * WaKBLVECSSemaphoreWaitPoll:kbl (on ALL_ENGINES)
-		 *
-		 * WaMediaResetMainRingCleanup:ctg,elk (presumably)
-		 *
-		 * FIXME: Wa for more modern gens needs to be validated
-		 */
-		i915_stop_engines(dev_priv, engine_mask);
-
-		ret = -ENODEV;
-		if (reset) {
-			ret = reset(dev_priv, engine_mask, retry);
-			GEM_TRACE("engine_mask=%x, ret=%d, retry=%d\n",
-				  engine_mask, ret, retry);
-		}
-		if (ret != -ETIMEDOUT || engine_mask != ALL_ENGINES)
-			break;
-
-		cond_resched();
-	}
-	intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
-
-	return ret;
-}
-
-bool intel_has_gpu_reset(struct drm_i915_private *dev_priv)
-{
-	return intel_get_gpu_reset(dev_priv) != NULL;
-}
-
-bool intel_has_reset_engine(struct drm_i915_private *dev_priv)
-{
-	return (INTEL_INFO(dev_priv)->has_reset_engine &&
-		i915_modparams.reset >= 2);
-}
-
-int intel_reset_guc(struct drm_i915_private *dev_priv)
-{
-	u32 guc_domain = INTEL_GEN(dev_priv) >= 11 ? GEN11_GRDOM_GUC :
-						     GEN9_GRDOM_GUC;
-	int ret;
-
-	GEM_BUG_ON(!HAS_GUC(dev_priv));
-
-	intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
-	ret = gen6_hw_domain_reset(dev_priv, guc_domain);
-	intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
-
-	return ret;
-}
-
 bool intel_uncore_unclaimed_mmio(struct drm_i915_private *dev_priv)
 {
 	return check_for_unclaimed_mmio(dev_priv);
diff --git a/drivers/gpu/drm/i915/selftests/intel_lrc.c b/drivers/gpu/drm/i915/selftests/intel_lrc.c
index e6073cd4719c..2b2ecd76c2ac 100644
--- a/drivers/gpu/drm/i915/selftests/intel_lrc.c
+++ b/drivers/gpu/drm/i915/selftests/intel_lrc.c
@@ -4,6 +4,8 @@
  * Copyright © 2018 Intel Corporation
  */
 
+#include "../i915_reset.h"
+
 #include "../i915_selftest.h"
 #include "igt_flush_test.h"
 #include "igt_spinner.h"
diff --git a/drivers/gpu/drm/i915/selftests/intel_workarounds.c b/drivers/gpu/drm/i915/selftests/intel_workarounds.c
index be2ffc9cd38d..321784b6fbf7 100644
--- a/drivers/gpu/drm/i915/selftests/intel_workarounds.c
+++ b/drivers/gpu/drm/i915/selftests/intel_workarounds.c
@@ -5,6 +5,7 @@
  */
 
 #include "../i915_selftest.h"
+#include "../i915_reset.h"
 
 #include "igt_flush_test.h"
 #include "igt_reset.h"
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 111+ messages in thread

* [PATCH 27/46] drm/i915: Make all GPU resets atomic
  2019-01-07 11:54 [PATCH 01/46] drm/i915: Return immediately if trylock fails for direct-reclaim Chris Wilson
                   ` (24 preceding siblings ...)
  2019-01-07 11:54 ` [PATCH 26/46] drm/i915: Pull all the reset functionality together into i915_reset.c Chris Wilson
@ 2019-01-07 11:54 ` Chris Wilson
  2019-01-07 11:54 ` [PATCH 28/46] drm/i915/guc: Disable global reset Chris Wilson
                   ` (24 subsequent siblings)
  50 siblings, 0 replies; 111+ messages in thread
From: Chris Wilson @ 2019-01-07 11:54 UTC (permalink / raw)
  To: intel-gfx

In preparation for the next few commits, make resetting the GPU atomic.
Currently, we have prepared gen6+ for atomic resetting of individual
engines, but now there is a requirement to perform the whole device
level reset (just the register poking) from inside an atomic context.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_reset.c             | 50 ++++++++++---------
 .../gpu/drm/i915/selftests/mock_gem_device.c  |  4 +-
 2 files changed, 29 insertions(+), 25 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_reset.c b/drivers/gpu/drm/i915/i915_reset.c
index e2e40b44a9a8..f9512e07646d 100644
--- a/drivers/gpu/drm/i915/i915_reset.c
+++ b/drivers/gpu/drm/i915/i915_reset.c
@@ -144,14 +144,14 @@ static int i915_do_reset(struct drm_i915_private *i915,
 
 	/* Assert reset for at least 20 usec, and wait for acknowledgement. */
 	pci_write_config_byte(pdev, I915_GDRST, GRDOM_RESET_ENABLE);
-	usleep_range(50, 200);
-	err = wait_for(i915_in_reset(pdev), 500);
+	udelay(50);
+	err = wait_for_atomic(i915_in_reset(pdev), 50);
 
 	/* Clear the reset request. */
 	pci_write_config_byte(pdev, I915_GDRST, 0);
-	usleep_range(50, 200);
+	udelay(50);
 	if (!err)
-		err = wait_for(!i915_in_reset(pdev), 500);
+		err = wait_for_atomic(!i915_in_reset(pdev), 50);
 
 	return err;
 }
@@ -171,7 +171,7 @@ static int g33_do_reset(struct drm_i915_private *i915,
 	struct pci_dev *pdev = i915->drm.pdev;
 
 	pci_write_config_byte(pdev, I915_GDRST, GRDOM_RESET_ENABLE);
-	return wait_for(g4x_reset_complete(pdev), 500);
+	return wait_for_atomic(g4x_reset_complete(pdev), 50);
 }
 
 static int g4x_do_reset(struct drm_i915_private *dev_priv,
@@ -182,13 +182,13 @@ static int g4x_do_reset(struct drm_i915_private *dev_priv,
 	int ret;
 
 	/* WaVcpClkGateDisableForMediaReset:ctg,elk */
-	I915_WRITE(VDECCLK_GATE_D,
-		   I915_READ(VDECCLK_GATE_D) | VCP_UNIT_CLOCK_GATE_DISABLE);
-	POSTING_READ(VDECCLK_GATE_D);
+	I915_WRITE_FW(VDECCLK_GATE_D,
+		      I915_READ(VDECCLK_GATE_D) | VCP_UNIT_CLOCK_GATE_DISABLE);
+	POSTING_READ_FW(VDECCLK_GATE_D);
 
 	pci_write_config_byte(pdev, I915_GDRST,
 			      GRDOM_MEDIA | GRDOM_RESET_ENABLE);
-	ret =  wait_for(g4x_reset_complete(pdev), 500);
+	ret =  wait_for_atomic(g4x_reset_complete(pdev), 50);
 	if (ret) {
 		DRM_DEBUG_DRIVER("Wait for media reset failed\n");
 		goto out;
@@ -196,7 +196,7 @@ static int g4x_do_reset(struct drm_i915_private *dev_priv,
 
 	pci_write_config_byte(pdev, I915_GDRST,
 			      GRDOM_RENDER | GRDOM_RESET_ENABLE);
-	ret =  wait_for(g4x_reset_complete(pdev), 500);
+	ret =  wait_for_atomic(g4x_reset_complete(pdev), 50);
 	if (ret) {
 		DRM_DEBUG_DRIVER("Wait for render reset failed\n");
 		goto out;
@@ -205,9 +205,9 @@ static int g4x_do_reset(struct drm_i915_private *dev_priv,
 out:
 	pci_write_config_byte(pdev, I915_GDRST, 0);
 
-	I915_WRITE(VDECCLK_GATE_D,
-		   I915_READ(VDECCLK_GATE_D) & ~VCP_UNIT_CLOCK_GATE_DISABLE);
-	POSTING_READ(VDECCLK_GATE_D);
+	I915_WRITE_FW(VDECCLK_GATE_D,
+		      I915_READ(VDECCLK_GATE_D) & ~VCP_UNIT_CLOCK_GATE_DISABLE);
+	POSTING_READ_FW(VDECCLK_GATE_D);
 
 	return ret;
 }
@@ -218,27 +218,29 @@ static int ironlake_do_reset(struct drm_i915_private *dev_priv,
 {
 	int ret;
 
-	I915_WRITE(ILK_GDSR, ILK_GRDOM_RENDER | ILK_GRDOM_RESET_ENABLE);
-	ret = intel_wait_for_register(dev_priv,
-				      ILK_GDSR, ILK_GRDOM_RESET_ENABLE, 0,
-				      500);
+	I915_WRITE_FW(ILK_GDSR, ILK_GRDOM_RENDER | ILK_GRDOM_RESET_ENABLE);
+	ret = __intel_wait_for_register_fw(dev_priv, ILK_GDSR,
+					   ILK_GRDOM_RESET_ENABLE, 0,
+					   5000, 0,
+					   NULL);
 	if (ret) {
 		DRM_DEBUG_DRIVER("Wait for render reset failed\n");
 		goto out;
 	}
 
-	I915_WRITE(ILK_GDSR, ILK_GRDOM_MEDIA | ILK_GRDOM_RESET_ENABLE);
-	ret = intel_wait_for_register(dev_priv,
-				      ILK_GDSR, ILK_GRDOM_RESET_ENABLE, 0,
-				      500);
+	I915_WRITE_FW(ILK_GDSR, ILK_GRDOM_MEDIA | ILK_GRDOM_RESET_ENABLE);
+	ret = __intel_wait_for_register_fw(dev_priv, ILK_GDSR,
+					   ILK_GRDOM_RESET_ENABLE, 0,
+					   5000, 0,
+					   NULL);
 	if (ret) {
 		DRM_DEBUG_DRIVER("Wait for media reset failed\n");
 		goto out;
 	}
 
 out:
-	I915_WRITE(ILK_GDSR, 0);
-	POSTING_READ(ILK_GDSR);
+	I915_WRITE_FW(ILK_GDSR, 0);
+	POSTING_READ_FW(ILK_GDSR);
 	return ret;
 }
 
@@ -572,7 +574,9 @@ int intel_gpu_reset(struct drm_i915_private *i915, unsigned int engine_mask)
 		ret = -ENODEV;
 		if (reset) {
 			GEM_TRACE("engine_mask=%x\n", engine_mask);
+			preempt_disable();
 			ret = reset(i915, engine_mask, retry);
+			preempt_enable();
 		}
 		if (ret != -ETIMEDOUT || engine_mask != ALL_ENGINES)
 			break;
diff --git a/drivers/gpu/drm/i915/selftests/mock_gem_device.c b/drivers/gpu/drm/i915/selftests/mock_gem_device.c
index 4a25d2a344f2..e756cbd0b1f4 100644
--- a/drivers/gpu/drm/i915/selftests/mock_gem_device.c
+++ b/drivers/gpu/drm/i915/selftests/mock_gem_device.c
@@ -58,8 +58,8 @@ static void mock_device_release(struct drm_device *dev)
 	i915_gem_contexts_lost(i915);
 	mutex_unlock(&i915->drm.struct_mutex);
 
-	cancel_delayed_work_sync(&i915->gt.retire_work);
-	cancel_delayed_work_sync(&i915->gt.idle_work);
+	drain_delayed_work(&i915->gt.retire_work);
+	drain_delayed_work(&i915->gt.idle_work);
 	i915_gem_drain_workqueue(i915);
 
 	mutex_lock(&i915->drm.struct_mutex);
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 111+ messages in thread

* [PATCH 28/46] drm/i915/guc: Disable global reset
  2019-01-07 11:54 [PATCH 01/46] drm/i915: Return immediately if trylock fails for direct-reclaim Chris Wilson
                   ` (25 preceding siblings ...)
  2019-01-07 11:54 ` [PATCH 27/46] drm/i915: Make all GPU resets atomic Chris Wilson
@ 2019-01-07 11:54 ` Chris Wilson
  2019-01-07 11:54 ` [PATCH 29/46] drm/i915: Remove GPU reset dependence on struct_mutex Chris Wilson
                   ` (23 subsequent siblings)
  50 siblings, 0 replies; 111+ messages in thread
From: Chris Wilson @ 2019-01-07 11:54 UTC (permalink / raw)
  To: intel-gfx

The guc (and huc) currently inexcruitably depend on struct_mutex for
device reinitialisation from inside the reset, and indeed taking any
mutex here is verboten (as we must be able to reset from underneath any
of our mutexes). That makes recovering the guc unviable without, for
example, reserving contiguous vma space and pages for it to use.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_reset.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/gpu/drm/i915/i915_reset.c b/drivers/gpu/drm/i915/i915_reset.c
index f9512e07646d..c9a844d2626f 100644
--- a/drivers/gpu/drm/i915/i915_reset.c
+++ b/drivers/gpu/drm/i915/i915_reset.c
@@ -590,6 +590,9 @@ int intel_gpu_reset(struct drm_i915_private *i915, unsigned int engine_mask)
 
 bool intel_has_gpu_reset(struct drm_i915_private *i915)
 {
+	if (USES_GUC(i915))
+		return false;
+
 	return intel_get_gpu_reset(i915);
 }
 
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 111+ messages in thread

* [PATCH 29/46] drm/i915: Remove GPU reset dependence on struct_mutex
  2019-01-07 11:54 [PATCH 01/46] drm/i915: Return immediately if trylock fails for direct-reclaim Chris Wilson
                   ` (26 preceding siblings ...)
  2019-01-07 11:54 ` [PATCH 28/46] drm/i915/guc: Disable global reset Chris Wilson
@ 2019-01-07 11:54 ` Chris Wilson
  2019-01-07 11:54 ` [PATCH 30/46] drm/i915: Issue engine resets onto idle engines Chris Wilson
                   ` (22 subsequent siblings)
  50 siblings, 0 replies; 111+ messages in thread
From: Chris Wilson @ 2019-01-07 11:54 UTC (permalink / raw)
  To: intel-gfx

Now that the submission backends are controlled via their own spinlocks,
with a wave of a magic wand we can lift the struct_mutex requirement
around GPU reset. That is we allow the submission frontend (userspace)
to keep on submitting while we process the GPU reset as we can suspend
the backend independently.

The major change is around the backoff/handoff strategy for performing
the reset. With no mutex deadlock, we no longer have to coordinate with
any waiter, and just perform the reset immediately.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_debugfs.c           |  14 +-
 drivers/gpu/drm/i915/i915_drv.h               |   5 -
 drivers/gpu/drm/i915/i915_gem.c               |  18 +-
 drivers/gpu/drm/i915/i915_gem_fence_reg.h     |   1 -
 drivers/gpu/drm/i915/i915_gem_gtt.h           |   1 +
 drivers/gpu/drm/i915/i915_gpu_error.h         |  24 +-
 drivers/gpu/drm/i915/i915_request.c           |  47 ---
 drivers/gpu/drm/i915/i915_reset.c             | 397 ++++++++----------
 drivers/gpu/drm/i915/i915_reset.h             |   3 +
 drivers/gpu/drm/i915/intel_engine_cs.c        |   6 +-
 drivers/gpu/drm/i915/intel_guc_submission.c   |   5 +-
 drivers/gpu/drm/i915/intel_lrc.c              |  92 ++--
 drivers/gpu/drm/i915/intel_overlay.c          |   2 -
 drivers/gpu/drm/i915/intel_ringbuffer.c       |  91 ++--
 drivers/gpu/drm/i915/intel_ringbuffer.h       |  13 +-
 .../gpu/drm/i915/selftests/intel_hangcheck.c  |  57 +--
 .../drm/i915/selftests/intel_workarounds.c    |   3 -
 17 files changed, 317 insertions(+), 462 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index 9c3664fb39de..a9bc7752da49 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -1284,8 +1284,6 @@ static int i915_hangcheck_info(struct seq_file *m, void *unused)
 		seq_puts(m, "Wedged\n");
 	if (test_bit(I915_RESET_BACKOFF, &dev_priv->gpu_error.flags))
 		seq_puts(m, "Reset in progress: struct_mutex backoff\n");
-	if (test_bit(I915_RESET_HANDOFF, &dev_priv->gpu_error.flags))
-		seq_puts(m, "Reset in progress: reset handoff to waiter\n");
 	if (waitqueue_active(&dev_priv->gpu_error.wait_queue))
 		seq_puts(m, "Waiter holding struct mutex\n");
 	if (waitqueue_active(&dev_priv->gpu_error.reset_queue))
@@ -3914,11 +3912,6 @@ i915_wedged_set(void *data, u64 val)
 
 	i915_handle_error(i915, val, I915_ERROR_CAPTURE,
 			  "Manually set wedged engine mask = %llx", val);
-
-	wait_on_bit(&i915->gpu_error.flags,
-		    I915_RESET_HANDOFF,
-		    TASK_UNINTERRUPTIBLE);
-
 	return 0;
 }
 
@@ -4073,13 +4066,8 @@ i915_drop_caches_set(void *data, u64 val)
 		mutex_unlock(&i915->drm.struct_mutex);
 	}
 
-	if (val & DROP_RESET_ACTIVE &&
-	    i915_terminally_wedged(&i915->gpu_error)) {
+	if (val & DROP_RESET_ACTIVE && i915_terminally_wedged(&i915->gpu_error))
 		i915_handle_error(i915, ALL_ENGINES, 0, NULL);
-		wait_on_bit(&i915->gpu_error.flags,
-			    I915_RESET_HANDOFF,
-			    TASK_UNINTERRUPTIBLE);
-	}
 
 	fs_reclaim_acquire(GFP_KERNEL);
 	if (val & DROP_BOUND)
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 29c810533d03..5ef7499166b4 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2984,11 +2984,6 @@ static inline bool i915_reset_backoff(struct i915_gpu_error *error)
 	return unlikely(test_bit(I915_RESET_BACKOFF, &error->flags));
 }
 
-static inline bool i915_reset_handoff(struct i915_gpu_error *error)
-{
-	return unlikely(test_bit(I915_RESET_HANDOFF, &error->flags));
-}
-
 static inline bool i915_terminally_wedged(struct i915_gpu_error *error)
 {
 	return unlikely(test_bit(I915_WEDGED, &error->flags));
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 747a08b8961a..83fb02dab18c 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -658,11 +658,6 @@ i915_gem_object_wait(struct drm_i915_gem_object *obj,
 		     struct intel_rps_client *rps_client)
 {
 	might_sleep();
-#if IS_ENABLED(CONFIG_LOCKDEP)
-	GEM_BUG_ON(debug_locks &&
-		   !!lockdep_is_held(&obj->base.dev->struct_mutex) !=
-		   !!(flags & I915_WAIT_LOCKED));
-#endif
 	GEM_BUG_ON(timeout < 0);
 
 	timeout = i915_gem_object_wait_reservation(obj->resv,
@@ -4480,8 +4475,6 @@ void i915_gem_sanitize(struct drm_i915_private *i915)
 
 	GEM_TRACE("\n");
 
-	mutex_lock(&i915->drm.struct_mutex);
-
 	wakeref = intel_runtime_pm_get(i915);
 	intel_uncore_forcewake_get(i915, FORCEWAKE_ALL);
 
@@ -4507,6 +4500,7 @@ void i915_gem_sanitize(struct drm_i915_private *i915)
 	intel_uncore_forcewake_put(i915, FORCEWAKE_ALL);
 	intel_runtime_pm_put(i915, wakeref);
 
+	mutex_lock(&i915->drm.struct_mutex);
 	i915_gem_contexts_lost(i915);
 	mutex_unlock(&i915->drm.struct_mutex);
 }
@@ -4521,6 +4515,8 @@ int i915_gem_suspend(struct drm_i915_private *i915)
 	wakeref = intel_runtime_pm_get(i915);
 	intel_suspend_gt_powersave(i915);
 
+	flush_workqueue(i915->wq);
+
 	mutex_lock(&i915->drm.struct_mutex);
 
 	/*
@@ -4550,11 +4546,9 @@ int i915_gem_suspend(struct drm_i915_private *i915)
 	i915_retire_requests(i915); /* ensure we flush after wedging */
 
 	mutex_unlock(&i915->drm.struct_mutex);
+	i915_reset_flush(i915);
 
-	intel_uc_suspend(i915);
-
-	cancel_delayed_work_sync(&i915->gpu_error.hangcheck_work);
-	cancel_delayed_work_sync(&i915->gt.retire_work);
+	drain_delayed_work(&i915->gt.retire_work);
 
 	/*
 	 * As the idle_work is rearming if it detects a race, play safe and
@@ -4562,6 +4556,8 @@ int i915_gem_suspend(struct drm_i915_private *i915)
 	 */
 	drain_delayed_work(&i915->gt.idle_work);
 
+	intel_uc_suspend(i915);
+
 	/*
 	 * Assert that we successfully flushed all the work and
 	 * reset the GPU back to its idle, low power state.
diff --git a/drivers/gpu/drm/i915/i915_gem_fence_reg.h b/drivers/gpu/drm/i915/i915_gem_fence_reg.h
index 99a31ded4dfd..09dcaf14121b 100644
--- a/drivers/gpu/drm/i915/i915_gem_fence_reg.h
+++ b/drivers/gpu/drm/i915/i915_gem_fence_reg.h
@@ -50,4 +50,3 @@ struct drm_i915_fence_reg {
 };
 
 #endif
-
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h
index 9229b03d629b..a0039ea97cdc 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.h
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.h
@@ -39,6 +39,7 @@
 #include <linux/pagevec.h>
 
 #include "i915_request.h"
+#include "i915_reset.h"
 #include "i915_selftest.h"
 #include "i915_timeline.h"
 
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.h b/drivers/gpu/drm/i915/i915_gpu_error.h
index 604291f7762d..733723e1ea03 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.h
+++ b/drivers/gpu/drm/i915/i915_gpu_error.h
@@ -197,6 +197,8 @@ struct i915_gpu_state {
 	struct scatterlist *sgl, *fit;
 };
 
+struct i915_gpu_restart;
+
 struct i915_gpu_error {
 	/* For hangcheck timer */
 #define DRM_I915_HANGCHECK_PERIOD 1500 /* in ms */
@@ -247,15 +249,6 @@ struct i915_gpu_error {
 	 * i915_mutex_lock_interruptible()?). I915_RESET_BACKOFF serves a
 	 * secondary role in preventing two concurrent global reset attempts.
 	 *
-	 * #I915_RESET_HANDOFF - To perform the actual GPU reset, we need the
-	 * struct_mutex. We try to acquire the struct_mutex in the reset worker,
-	 * but it may be held by some long running waiter (that we cannot
-	 * interrupt without causing trouble). Once we are ready to do the GPU
-	 * reset, we set the I915_RESET_HANDOFF bit and wakeup any waiters. If
-	 * they already hold the struct_mutex and want to participate they can
-	 * inspect the bit and do the reset directly, otherwise the worker
-	 * waits for the struct_mutex.
-	 *
 	 * #I915_RESET_ENGINE[num_engines] - Since the driver doesn't need to
 	 * acquire the struct_mutex to reset an engine, we need an explicit
 	 * flag to prevent two concurrent reset attempts in the same engine.
@@ -269,20 +262,13 @@ struct i915_gpu_error {
 	 */
 	unsigned long flags;
 #define I915_RESET_BACKOFF	0
-#define I915_RESET_HANDOFF	1
-#define I915_RESET_MODESET	2
-#define I915_RESET_ENGINE	3
+#define I915_RESET_MODESET	1
+#define I915_RESET_ENGINE	2
 #define I915_WEDGED		(BITS_PER_LONG - 1)
 
 	/** Number of times an engine has been reset */
 	u32 reset_engine_count[I915_NUM_ENGINES];
 
-	/** Set of stalled engines with guilty requests, in the current reset */
-	u32 stalled_mask;
-
-	/** Reason for the current *global* reset */
-	const char *reason;
-
 	struct mutex wedge_mutex; /* serialises wedging/unwedging */
 
 	/**
@@ -299,6 +285,8 @@ struct i915_gpu_error {
 
 	/* For missed irq/seqno simulation. */
 	unsigned long test_irq_rings;
+
+	struct i915_gpu_restart *restart;
 };
 
 struct drm_i915_error_state_buf {
diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
index e23611433695..c467392f62d7 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -1052,18 +1052,6 @@ static bool __i915_spin_request(const struct i915_request *rq,
 	return false;
 }
 
-static bool __i915_wait_request_check_and_reset(struct i915_request *request)
-{
-	struct i915_gpu_error *error = &request->i915->gpu_error;
-
-	if (likely(!i915_reset_handoff(error)))
-		return false;
-
-	__set_current_state(TASK_RUNNING);
-	i915_reset(request->i915, error->stalled_mask, error->reason);
-	return true;
-}
-
 /**
  * i915_request_wait - wait until execution of request has finished
  * @rq: the request to wait upon
@@ -1089,17 +1077,10 @@ long i915_request_wait(struct i915_request *rq,
 {
 	const int state = flags & I915_WAIT_INTERRUPTIBLE ?
 		TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE;
-	wait_queue_head_t *errq = &rq->i915->gpu_error.wait_queue;
-	DEFINE_WAIT_FUNC(reset, default_wake_function);
 	DEFINE_WAIT_FUNC(exec, default_wake_function);
 	struct intel_wait wait;
 
 	might_sleep();
-#if IS_ENABLED(CONFIG_LOCKDEP)
-	GEM_BUG_ON(debug_locks &&
-		   !!lockdep_is_held(&rq->i915->drm.struct_mutex) !=
-		   !!(flags & I915_WAIT_LOCKED));
-#endif
 	GEM_BUG_ON(timeout < 0);
 
 	if (i915_request_completed(rq))
@@ -1109,11 +1090,7 @@ long i915_request_wait(struct i915_request *rq,
 		return -ETIME;
 
 	trace_i915_request_wait_begin(rq, flags);
-
 	add_wait_queue(&rq->execute, &exec);
-	if (flags & I915_WAIT_LOCKED)
-		add_wait_queue(errq, &reset);
-
 	intel_wait_init(&wait);
 	if (flags & I915_WAIT_PRIORITY)
 		i915_schedule_bump_priority(rq, I915_PRIORITY_WAIT);
@@ -1124,10 +1101,6 @@ long i915_request_wait(struct i915_request *rq,
 		if (intel_wait_update_request(&wait, rq))
 			break;
 
-		if (flags & I915_WAIT_LOCKED &&
-		    __i915_wait_request_check_and_reset(rq))
-			continue;
-
 		if (signal_pending_state(state, current)) {
 			timeout = -ERESTARTSYS;
 			goto complete;
@@ -1157,9 +1130,6 @@ long i915_request_wait(struct i915_request *rq,
 		 */
 		goto wakeup;
 
-	if (flags & I915_WAIT_LOCKED)
-		__i915_wait_request_check_and_reset(rq);
-
 	for (;;) {
 		if (signal_pending_state(state, current)) {
 			timeout = -ERESTARTSYS;
@@ -1183,21 +1153,6 @@ long i915_request_wait(struct i915_request *rq,
 		if (i915_request_completed(rq))
 			break;
 
-		/*
-		 * If the GPU is hung, and we hold the lock, reset the GPU
-		 * and then check for completion. On a full reset, the engine's
-		 * HW seqno will be advanced passed us and we are complete.
-		 * If we do a partial reset, we have to wait for the GPU to
-		 * resume and update the breadcrumb.
-		 *
-		 * If we don't hold the mutex, we can just wait for the worker
-		 * to come along and update the breadcrumb (either directly
-		 * itself, or indirectly by recovering the GPU).
-		 */
-		if (flags & I915_WAIT_LOCKED &&
-		    __i915_wait_request_check_and_reset(rq))
-			continue;
-
 		/* Only spin if we know the GPU is processing this request */
 		if (__i915_spin_request(rq, wait.seqno, state, 2))
 			break;
@@ -1211,8 +1166,6 @@ long i915_request_wait(struct i915_request *rq,
 	intel_engine_remove_wait(rq->engine, &wait);
 complete:
 	__set_current_state(TASK_RUNNING);
-	if (flags & I915_WAIT_LOCKED)
-		remove_wait_queue(errq, &reset);
 	remove_wait_queue(&rq->execute, &exec);
 	trace_i915_request_wait_end(rq);
 
diff --git a/drivers/gpu/drm/i915/i915_reset.c b/drivers/gpu/drm/i915/i915_reset.c
index c9a844d2626f..30f669aa526a 100644
--- a/drivers/gpu/drm/i915/i915_reset.c
+++ b/drivers/gpu/drm/i915/i915_reset.c
@@ -5,6 +5,7 @@
  */
 
 #include <linux/sched/mm.h>
+#include <linux/stop_machine.h>
 
 #include "i915_drv.h"
 #include "i915_gpu_error.h"
@@ -17,22 +18,23 @@ static void engine_skip_context(struct i915_request *rq)
 	struct intel_engine_cs *engine = rq->engine;
 	struct i915_gem_context *hung_ctx = rq->gem_context;
 	struct i915_timeline *timeline = rq->timeline;
-	unsigned long flags;
 
+	lockdep_assert_held(&engine->timeline.lock);
 	GEM_BUG_ON(timeline == &engine->timeline);
 
-	spin_lock_irqsave(&engine->timeline.lock, flags);
 	spin_lock(&timeline->lock);
 
-	list_for_each_entry_continue(rq, &engine->timeline.requests, link)
-		if (rq->gem_context == hung_ctx)
-			i915_request_skip(rq, -EIO);
+	if (rq->global_seqno) {
+		list_for_each_entry_continue(rq,
+					     &engine->timeline.requests, link)
+			if (rq->gem_context == hung_ctx)
+				i915_request_skip(rq, -EIO);
+	}
 
 	list_for_each_entry(rq, &timeline->requests, link)
 		i915_request_skip(rq, -EIO);
 
 	spin_unlock(&timeline->lock);
-	spin_unlock_irqrestore(&engine->timeline.lock, flags);
 }
 
 static void client_mark_guilty(struct drm_i915_file_private *file_priv,
@@ -59,7 +61,7 @@ static void client_mark_guilty(struct drm_i915_file_private *file_priv,
 	}
 }
 
-static void context_mark_guilty(struct i915_gem_context *ctx)
+static bool context_mark_guilty(struct i915_gem_context *ctx)
 {
 	unsigned int score;
 	bool banned, bannable;
@@ -72,7 +74,7 @@ static void context_mark_guilty(struct i915_gem_context *ctx)
 
 	/* Cool contexts don't accumulate client ban score */
 	if (!bannable)
-		return;
+		return false;
 
 	if (banned) {
 		DRM_DEBUG_DRIVER("context %s: guilty %d, score %u, banned\n",
@@ -83,6 +85,8 @@ static void context_mark_guilty(struct i915_gem_context *ctx)
 
 	if (!IS_ERR_OR_NULL(ctx->file_priv))
 		client_mark_guilty(ctx->file_priv, ctx);
+
+	return banned;
 }
 
 static void context_mark_innocent(struct i915_gem_context *ctx)
@@ -90,6 +94,21 @@ static void context_mark_innocent(struct i915_gem_context *ctx)
 	atomic_inc(&ctx->active_count);
 }
 
+void i915_reset_request(struct i915_request *rq, bool guilty)
+{
+	lockdep_assert_held(&rq->engine->timeline.lock);
+	GEM_BUG_ON(i915_request_completed(rq));
+
+	if (guilty) {
+		i915_request_skip(rq, -EIO);
+		if (context_mark_guilty(rq->gem_context))
+			engine_skip_context(rq);
+	} else {
+		dma_fence_set_error(&rq->fence, -EAGAIN);
+		context_mark_innocent(rq->gem_context);
+	}
+}
+
 static void gen3_stop_engine(struct intel_engine_cs *engine)
 {
 	struct drm_i915_private *dev_priv = engine->i915;
@@ -533,22 +552,6 @@ int intel_gpu_reset(struct drm_i915_private *i915, unsigned int engine_mask)
 	int retry;
 	int ret;
 
-	/*
-	 * We want to perform per-engine reset from atomic context (e.g.
-	 * softirq), which imposes the constraint that we cannot sleep.
-	 * However, experience suggests that spending a bit of time waiting
-	 * for a reset helps in various cases, so for a full-device reset
-	 * we apply the opposite rule and wait if we want to. As we should
-	 * always follow up a failed per-engine reset with a full device reset,
-	 * being a little faster, stricter and more error prone for the
-	 * atomic case seems an acceptable compromise.
-	 *
-	 * Unfortunately this leads to a bimodal routine, when the goal was
-	 * to have a single reset function that worked for resetting any
-	 * number of engines simultaneously.
-	 */
-	might_sleep_if(engine_mask == ALL_ENGINES);
-
 	/*
 	 * If the power well sleeps during the reset, the reset
 	 * request may be dropped and never completes (causing -EIO).
@@ -580,8 +583,6 @@ int intel_gpu_reset(struct drm_i915_private *i915, unsigned int engine_mask)
 		}
 		if (ret != -ETIMEDOUT || engine_mask != ALL_ENGINES)
 			break;
-
-		cond_resched();
 	}
 	intel_uncore_forcewake_put(i915, FORCEWAKE_ALL);
 
@@ -620,11 +621,8 @@ int intel_reset_guc(struct drm_i915_private *i915)
  * Ensure irq handler finishes, and not run again.
  * Also return the active request so that we only search for it once.
  */
-static struct i915_request *
-reset_prepare_engine(struct intel_engine_cs *engine)
+static void reset_prepare_engine(struct intel_engine_cs *engine)
 {
-	struct i915_request *rq;
-
 	/*
 	 * During the reset sequence, we must prevent the engine from
 	 * entering RC6. As the context state is undefined until we restart
@@ -633,162 +631,86 @@ reset_prepare_engine(struct intel_engine_cs *engine)
 	 * GPU state upon resume, i.e. fail to restart after a reset.
 	 */
 	intel_uncore_forcewake_get(engine->i915, FORCEWAKE_ALL);
-
-	rq = engine->reset.prepare(engine);
-	if (rq && rq->fence.error == -EIO)
-		rq = ERR_PTR(-EIO); /* Previous reset failed! */
-
-	return rq;
+	engine->reset.prepare(engine);
 }
 
-static int reset_prepare(struct drm_i915_private *i915)
+static void reset_prepare(struct drm_i915_private *i915)
 {
 	struct intel_engine_cs *engine;
-	struct i915_request *rq;
 	enum intel_engine_id id;
-	int err = 0;
 
-	for_each_engine(engine, i915, id) {
-		rq = reset_prepare_engine(engine);
-		if (IS_ERR(rq)) {
-			err = PTR_ERR(rq);
-			continue;
-		}
-
-		engine->hangcheck.active_request = rq;
-	}
+	for_each_engine(engine, i915, id)
+		reset_prepare_engine(engine);
 
-	i915_gem_revoke_fences(i915);
 	intel_uc_sanitize(i915);
-
-	return err;
 }
 
-/* Returns the request if it was guilty of the hang */
-static struct i915_request *
-reset_request(struct intel_engine_cs *engine,
-	      struct i915_request *rq,
-	      bool stalled)
+static int gt_reset(struct drm_i915_private *i915, unsigned int stalled_mask)
 {
+	struct intel_engine_cs *engine;
+	enum intel_engine_id id;
+	int err;
+
 	/*
-	 * The guilty request will get skipped on a hung engine.
-	 *
-	 * Users of client default contexts do not rely on logical
-	 * state preserved between batches so it is safe to execute
-	 * queued requests following the hang. Non default contexts
-	 * rely on preserved state, so skipping a batch loses the
-	 * evolution of the state and it needs to be considered corrupted.
-	 * Executing more queued batches on top of corrupted state is
-	 * risky. But we take the risk by trying to advance through
-	 * the queued requests in order to make the client behaviour
-	 * more predictable around resets, by not throwing away random
-	 * amount of batches it has prepared for execution. Sophisticated
-	 * clients can use gem_reset_stats_ioctl and dma fence status
-	 * (exported via sync_file info ioctl on explicit fences) to observe
-	 * when it loses the context state and should rebuild accordingly.
-	 *
-	 * The context ban, and ultimately the client ban, mechanism are safety
-	 * valves if client submission ends up resulting in nothing more than
-	 * subsequent hangs.
+	 * Everything depends on having the GTT running, so we need to start
+	 * there.
 	 */
+	err = i915_ggtt_enable_hw(i915);
+	if (err)
+		return err;
 
-	if (i915_request_completed(rq)) {
-		GEM_TRACE("%s pardoned global=%d (fence %llx:%lld), current %d\n",
-			  engine->name, rq->global_seqno,
-			  rq->fence.context, rq->fence.seqno,
-			  intel_engine_get_seqno(engine));
-		stalled = false;
-	}
-
-	if (stalled) {
-		context_mark_guilty(rq->gem_context);
-		i915_request_skip(rq, -EIO);
+	for_each_engine(engine, i915, id)
+		intel_engine_reset(engine, stalled_mask & ENGINE_MASK(id));
 
-		/* If this context is now banned, skip all pending requests. */
-		if (i915_gem_context_is_banned(rq->gem_context))
-			engine_skip_context(rq);
-	} else {
-		/*
-		 * Since this is not the hung engine, it may have advanced
-		 * since the hang declaration. Double check by refinding
-		 * the active request at the time of the reset.
-		 */
-		rq = i915_gem_find_active_request(engine);
-		if (rq) {
-			unsigned long flags;
-
-			context_mark_innocent(rq->gem_context);
-			dma_fence_set_error(&rq->fence, -EAGAIN);
-
-			/* Rewind the engine to replay the incomplete rq */
-			spin_lock_irqsave(&engine->timeline.lock, flags);
-			rq = list_prev_entry(rq, link);
-			if (&rq->link == &engine->timeline.requests)
-				rq = NULL;
-			spin_unlock_irqrestore(&engine->timeline.lock, flags);
-		}
-	}
+	i915_gem_restore_fences(i915);
 
-	return rq;
+	return err;
 }
 
-static void reset_engine(struct intel_engine_cs *engine,
-			 struct i915_request *rq,
-			 bool stalled)
+static void reset_finish_engine(struct intel_engine_cs *engine)
 {
-	if (rq)
-		rq = reset_request(engine, rq, stalled);
-
-	/* Setup the CS to resume from the breadcrumb of the hung request */
-	engine->reset.reset(engine, rq);
+	engine->reset.finish(engine);
+	intel_uncore_forcewake_put(engine->i915, FORCEWAKE_ALL);
 }
 
-static void gt_reset(struct drm_i915_private *i915, unsigned int stalled_mask)
+struct i915_gpu_restart {
+	struct work_struct work;
+	struct drm_i915_private *i915;
+};
+
+static void restart_work(struct work_struct *work)
 {
+	struct i915_gpu_restart *arg = container_of(work, typeof(*arg), work);
+	struct drm_i915_private *i915 = arg->i915;
 	struct intel_engine_cs *engine;
 	enum intel_engine_id id;
+	intel_wakeref_t wakeref;
 
-	lockdep_assert_held(&i915->drm.struct_mutex);
+	wakeref = intel_runtime_pm_get(i915);
+	mutex_lock(&i915->drm.struct_mutex);
 
-	i915_retire_requests(i915);
+	smp_store_mb(i915->gpu_error.restart, NULL);
 
 	for_each_engine(engine, i915, id) {
-		struct intel_context *ce;
-
-		reset_engine(engine,
-			     engine->hangcheck.active_request,
-			     stalled_mask & ENGINE_MASK(id));
-		ce = fetch_and_zero(&engine->last_retired_context);
-		if (ce)
-			intel_context_unpin(ce);
+		struct i915_request *rq;
 
 		/*
 		 * Ostensibily, we always want a context loaded for powersaving,
 		 * so if the engine is idle after the reset, send a request
 		 * to load our scratch kernel_context.
-		 *
-		 * More mysteriously, if we leave the engine idle after a reset,
-		 * the next userspace batch may hang, with what appears to be
-		 * an incoherent read by the CS (presumably stale TLB). An
-		 * empty request appears sufficient to paper over the glitch.
 		 */
-		if (intel_engine_is_idle(engine)) {
-			struct i915_request *rq;
+		if (!intel_engine_is_idle(engine))
+			continue;
 
-			rq = i915_request_alloc(engine, i915->kernel_context);
-			if (!IS_ERR(rq))
-				i915_request_add(rq);
-		}
+		rq = i915_request_alloc(engine, i915->kernel_context);
+		if (!IS_ERR(rq))
+			i915_request_add(rq);
 	}
 
-	i915_gem_restore_fences(i915);
-}
-
-static void reset_finish_engine(struct intel_engine_cs *engine)
-{
-	engine->reset.finish(engine);
+	mutex_unlock(&i915->drm.struct_mutex);
+	intel_runtime_pm_put(i915, wakeref);
 
-	intel_uncore_forcewake_put(engine->i915, FORCEWAKE_ALL);
+	kfree(arg);
 }
 
 static void reset_finish(struct drm_i915_private *i915)
@@ -796,11 +718,30 @@ static void reset_finish(struct drm_i915_private *i915)
 	struct intel_engine_cs *engine;
 	enum intel_engine_id id;
 
-	lockdep_assert_held(&i915->drm.struct_mutex);
-
-	for_each_engine(engine, i915, id) {
-		engine->hangcheck.active_request = NULL;
+	for_each_engine(engine, i915, id)
 		reset_finish_engine(engine);
+}
+
+static void reset_restart(struct drm_i915_private *i915)
+{
+	struct i915_gpu_restart *arg;
+
+	/*
+	 * Following the reset, ensure that we always reload context for
+	 * powersaving, and to correct engine->last_retired_context. Since
+	 * this requires us to submit a request, queue a worker to do that
+	 * task for us to evade any locking here.
+	 */
+	if (READ_ONCE(i915->gpu_error.restart))
+		return;
+
+	arg = kmalloc(sizeof(*arg), GFP_KERNEL);
+	if (arg) {
+		arg->i915 = i915;
+		INIT_WORK(&arg->work, restart_work);
+
+		WRITE_ONCE(i915->gpu_error.restart, arg);
+		queue_work(i915->wq, &arg->work);
 	}
 }
 
@@ -889,8 +830,6 @@ bool i915_gem_unset_wedged(struct drm_i915_private *i915)
 	struct i915_timeline *tl;
 	bool ret = false;
 
-	lockdep_assert_held(&i915->drm.struct_mutex);
-
 	if (!test_bit(I915_WEDGED, &error->flags))
 		return true;
 
@@ -913,9 +852,9 @@ bool i915_gem_unset_wedged(struct drm_i915_private *i915)
 	 */
 	list_for_each_entry(tl, &i915->gt.timelines, link) {
 		struct i915_request *rq;
+		long timeout;
 
-		rq = i915_gem_active_peek(&tl->last_request,
-					  &i915->drm.struct_mutex);
+		rq = i915_gem_active_get_unlocked(&tl->last_request);
 		if (!rq)
 			continue;
 
@@ -930,12 +869,12 @@ bool i915_gem_unset_wedged(struct drm_i915_private *i915)
 		 * and when the seqno passes the fence, the signaler
 		 * then signals the fence waking us up).
 		 */
-		if (dma_fence_default_wait(&rq->fence, true,
-					   MAX_SCHEDULE_TIMEOUT) < 0)
+		timeout = dma_fence_default_wait(&rq->fence, true,
+						 MAX_SCHEDULE_TIMEOUT);
+		i915_request_put(rq);
+		if (timeout < 0)
 			goto unlock;
 	}
-	i915_retire_requests(i915);
-	GEM_BUG_ON(i915->gt.active_requests);
 
 	intel_engines_sanitize(i915, false);
 
@@ -949,7 +888,6 @@ bool i915_gem_unset_wedged(struct drm_i915_private *i915)
 	 * context and do not require stop_machine().
 	 */
 	intel_engines_reset_default_submission(i915);
-	i915_gem_contexts_lost(i915);
 
 	GEM_TRACE("end\n");
 
@@ -962,6 +900,43 @@ bool i915_gem_unset_wedged(struct drm_i915_private *i915)
 	return ret;
 }
 
+struct __i915_reset {
+	struct drm_i915_private *i915;
+	unsigned int stalled_mask;
+};
+
+static int __i915_reset__BKL(void *data)
+{
+	struct __i915_reset *arg = data;
+	int err;
+
+	err = intel_gpu_reset(arg->i915, ALL_ENGINES);
+	if (err)
+		return err;
+
+	return gt_reset(arg->i915, arg->stalled_mask);
+}
+
+#if 0
+#define __do_reset(fn, arg) stop_machine(fn, arg, NULL)
+#else
+#define __do_reset(fn, arg) fn(arg)
+#endif
+
+static int do_reset(struct drm_i915_private *i915, unsigned int stalled_mask)
+{
+	struct __i915_reset arg = { i915, stalled_mask };
+	int err, i;
+
+	err = __do_reset(__i915_reset__BKL, &arg);
+	for (i = 0; err && i < 3; i++) {
+		msleep(100);
+		err = __do_reset(__i915_reset__BKL, &arg);
+	}
+
+	return err;
+}
+
 /**
  * i915_reset - reset chip after a hang
  * @i915: #drm_i915_private to reset
@@ -987,31 +962,22 @@ void i915_reset(struct drm_i915_private *i915,
 {
 	struct i915_gpu_error *error = &i915->gpu_error;
 	int ret;
-	int i;
 
 	GEM_TRACE("flags=%lx\n", error->flags);
 
 	might_sleep();
-	lockdep_assert_held(&i915->drm.struct_mutex);
 	assert_rpm_wakelock_held(i915);
 	GEM_BUG_ON(!test_bit(I915_RESET_BACKOFF, &error->flags));
 
-	if (!test_bit(I915_RESET_HANDOFF, &error->flags))
-		return;
-
 	/* Clear any previous failed attempts at recovery. Time to try again. */
 	if (!i915_gem_unset_wedged(i915))
-		goto wakeup;
+		return;
 
 	if (reason)
 		dev_notice(i915->drm.dev, "Resetting chip for %s\n", reason);
 	error->reset_count++;
 
-	ret = reset_prepare(i915);
-	if (ret) {
-		dev_err(i915->drm.dev, "GPU recovery failed\n");
-		goto taint;
-	}
+	reset_prepare(i915);
 
 	if (!intel_has_gpu_reset(i915)) {
 		if (i915_modparams.reset)
@@ -1021,32 +987,11 @@ void i915_reset(struct drm_i915_private *i915,
 		goto error;
 	}
 
-	for (i = 0; i < 3; i++) {
-		ret = intel_gpu_reset(i915, ALL_ENGINES);
-		if (ret == 0)
-			break;
-
-		msleep(100);
-	}
-	if (ret) {
+	if (do_reset(i915, stalled_mask)) {
 		dev_err(i915->drm.dev, "Failed to reset chip\n");
 		goto taint;
 	}
 
-	/* Ok, now get things going again... */
-
-	/*
-	 * Everything depends on having the GTT running, so we need to start
-	 * there.
-	 */
-	ret = i915_ggtt_enable_hw(i915);
-	if (ret) {
-		DRM_ERROR("Failed to re-enable GGTT following reset (%d)\n",
-			  ret);
-		goto error;
-	}
-
-	gt_reset(i915, stalled_mask);
 	intel_overlay_reset(i915);
 
 	/*
@@ -1068,9 +1013,8 @@ void i915_reset(struct drm_i915_private *i915,
 
 finish:
 	reset_finish(i915);
-wakeup:
-	clear_bit(I915_RESET_HANDOFF, &error->flags);
-	wake_up_bit(&error->flags, I915_RESET_HANDOFF);
+	if (!i915_terminally_wedged(error))
+		reset_restart(i915);
 	return;
 
 taint:
@@ -1089,7 +1033,6 @@ void i915_reset(struct drm_i915_private *i915,
 	add_taint(TAINT_WARN, LOCKDEP_STILL_OK);
 error:
 	i915_gem_set_wedged(i915);
-	i915_retire_requests(i915);
 	goto finish;
 }
 
@@ -1115,18 +1058,16 @@ static inline int intel_gt_reset_engine(struct drm_i915_private *i915,
 int i915_reset_engine(struct intel_engine_cs *engine, const char *msg)
 {
 	struct i915_gpu_error *error = &engine->i915->gpu_error;
-	struct i915_request *active_request;
 	int ret;
 
 	GEM_TRACE("%s flags=%lx\n", engine->name, error->flags);
 	GEM_BUG_ON(!test_bit(I915_RESET_ENGINE + engine->id, &error->flags));
 
-	active_request = reset_prepare_engine(engine);
-	if (IS_ERR_OR_NULL(active_request)) {
-		/* Either the previous reset failed, or we pardon the reset. */
-		ret = PTR_ERR(active_request);
-		goto out;
-	}
+	if (i915_seqno_passed(intel_engine_get_seqno(engine),
+			      intel_engine_last_submit(engine)))
+		return 0;
+
+	reset_prepare_engine(engine);
 
 	if (msg)
 		dev_notice(engine->i915->drm.dev,
@@ -1150,7 +1091,7 @@ int i915_reset_engine(struct intel_engine_cs *engine, const char *msg)
 	 * active request and can drop it, adjust head to skip the offending
 	 * request to resume executing remaining requests in the queue.
 	 */
-	reset_engine(engine, active_request, true);
+	intel_engine_reset(engine, true);
 
 	/*
 	 * The engine and its registers (and workarounds in case of render)
@@ -1187,30 +1128,7 @@ static void i915_reset_device(struct drm_i915_private *i915,
 	i915_wedge_on_timeout(&w, i915, 5 * HZ) {
 		intel_prepare_reset(i915);
 
-		error->reason = reason;
-		error->stalled_mask = engine_mask;
-
-		/* Signal that locked waiters should reset the GPU */
-		smp_mb__before_atomic();
-		set_bit(I915_RESET_HANDOFF, &error->flags);
-		wake_up_all(&error->wait_queue);
-
-		/*
-		 * Wait for anyone holding the lock to wakeup, without
-		 * blocking indefinitely on struct_mutex.
-		 */
-		do {
-			if (mutex_trylock(&i915->drm.struct_mutex)) {
-				i915_reset(i915, engine_mask, reason);
-				mutex_unlock(&i915->drm.struct_mutex);
-			}
-		} while (wait_on_bit_timeout(&error->flags,
-					     I915_RESET_HANDOFF,
-					     TASK_UNINTERRUPTIBLE,
-					     1));
-
-		error->stalled_mask = 0;
-		error->reason = NULL;
+		i915_reset(i915, engine_mask, reason);
 
 		intel_finish_reset(i915);
 	}
@@ -1366,6 +1284,25 @@ void i915_handle_error(struct drm_i915_private *i915,
 	intel_runtime_pm_put(i915, wakeref);
 }
 
+bool i915_reset_flush(struct drm_i915_private *i915)
+{
+	int err;
+
+	cancel_delayed_work_sync(&i915->gpu_error.hangcheck_work);
+
+	flush_workqueue(i915->wq);
+	GEM_BUG_ON(READ_ONCE(i915->gpu_error.restart));
+
+	mutex_lock(&i915->drm.struct_mutex);
+	err = i915_gem_wait_for_idle(i915,
+				     I915_WAIT_LOCKED |
+				     I915_WAIT_FOR_IDLE_BOOST,
+				     MAX_SCHEDULE_TIMEOUT);
+	mutex_unlock(&i915->drm.struct_mutex);
+
+	return !err;
+}
+
 static void i915_wedge_me(struct work_struct *work)
 {
 	struct i915_wedge_me *w = container_of(work, typeof(*w), work.work);
diff --git a/drivers/gpu/drm/i915/i915_reset.h b/drivers/gpu/drm/i915/i915_reset.h
index b6a519bde67d..f2d347f319df 100644
--- a/drivers/gpu/drm/i915/i915_reset.h
+++ b/drivers/gpu/drm/i915/i915_reset.h
@@ -29,6 +29,9 @@ void i915_reset(struct drm_i915_private *i915,
 int i915_reset_engine(struct intel_engine_cs *engine,
 		      const char *reason);
 
+void i915_reset_request(struct i915_request *rq, bool guilty);
+bool i915_reset_flush(struct drm_i915_private *i915);
+
 bool intel_has_gpu_reset(struct drm_i915_private *i915);
 bool intel_has_reset_engine(struct drm_i915_private *i915);
 
diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c
index 9c943bb95cb9..3a0b8036f173 100644
--- a/drivers/gpu/drm/i915/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/intel_engine_cs.c
@@ -1074,10 +1074,8 @@ void intel_engines_sanitize(struct drm_i915_private *i915, bool force)
 	if (!reset_engines(i915) && !force)
 		return;
 
-	for_each_engine(engine, i915, id) {
-		if (engine->reset.reset)
-			engine->reset.reset(engine, NULL);
-	}
+	for_each_engine(engine, i915, id)
+		intel_engine_reset(engine, false);
 }
 
 /**
diff --git a/drivers/gpu/drm/i915/intel_guc_submission.c b/drivers/gpu/drm/i915/intel_guc_submission.c
index ab1c49b106f2..7217c7e3ee8d 100644
--- a/drivers/gpu/drm/i915/intel_guc_submission.c
+++ b/drivers/gpu/drm/i915/intel_guc_submission.c
@@ -834,8 +834,7 @@ static void guc_submission_tasklet(unsigned long data)
 	spin_unlock_irqrestore(&engine->timeline.lock, flags);
 }
 
-static struct i915_request *
-guc_reset_prepare(struct intel_engine_cs *engine)
+static void guc_reset_prepare(struct intel_engine_cs *engine)
 {
 	struct intel_engine_execlists * const execlists = &engine->execlists;
 
@@ -861,8 +860,6 @@ guc_reset_prepare(struct intel_engine_cs *engine)
 	 */
 	if (engine->i915->guc.preempt_wq)
 		flush_workqueue(engine->i915->guc.preempt_wq);
-
-	return i915_gem_find_active_request(engine);
 }
 
 /*
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 644aa9251307..eb37e2f7f835 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -137,6 +137,7 @@
 #include <drm/i915_drm.h>
 #include "i915_drv.h"
 #include "i915_gem_render_state.h"
+#include "i915_reset.h"
 #include "i915_vgpu.h"
 #include "intel_lrc_reg.h"
 #include "intel_mocs.h"
@@ -265,7 +266,8 @@ static void unwind_wa_tail(struct i915_request *rq)
 	assert_ring_tail_valid(rq->ring, rq->tail);
 }
 
-static void __unwind_incomplete_requests(struct intel_engine_cs *engine)
+static struct i915_request *
+__unwind_incomplete_requests(struct intel_engine_cs *engine)
 {
 	struct i915_request *rq, *rn, *active = NULL;
 	struct list_head *uninitialized_var(pl);
@@ -306,6 +308,8 @@ static void __unwind_incomplete_requests(struct intel_engine_cs *engine)
 		list_move_tail(&active->sched.link,
 			       i915_sched_lookup_priolist(engine, prio));
 	}
+
+	return active;
 }
 
 void
@@ -1713,11 +1717,9 @@ static int gen8_init_common_ring(struct intel_engine_cs *engine)
 	return 0;
 }
 
-static struct i915_request *
-execlists_reset_prepare(struct intel_engine_cs *engine)
+static void execlists_reset_prepare(struct intel_engine_cs *engine)
 {
 	struct intel_engine_execlists * const execlists = &engine->execlists;
-	struct i915_request *request, *active;
 	unsigned long flags;
 
 	GEM_TRACE("%s: depth<-%d\n", engine->name,
@@ -1733,59 +1735,21 @@ execlists_reset_prepare(struct intel_engine_cs *engine)
 	 * prevents the race.
 	 */
 	__tasklet_disable_sync_once(&execlists->tasklet);
+	GEM_BUG_ON(!reset_in_progress(execlists));
 
+	/* And flush any current direct submission. */
 	spin_lock_irqsave(&engine->timeline.lock, flags);
-
-	/*
-	 * We want to flush the pending context switches, having disabled
-	 * the tasklet above, we can assume exclusive access to the execlists.
-	 * For this allows us to catch up with an inflight preemption event,
-	 * and avoid blaming an innocent request if the stall was due to the
-	 * preemption itself.
-	 */
-	process_csb(engine);
-
-	/*
-	 * The last active request can then be no later than the last request
-	 * now in ELSP[0]. So search backwards from there, so that if the GPU
-	 * has advanced beyond the last CSB update, it will be pardoned.
-	 */
-	active = NULL;
-	request = port_request(execlists->port);
-	if (request) {
-		/*
-		 * Prevent the breadcrumb from advancing before we decide
-		 * which request is currently active.
-		 */
-		intel_engine_stop_cs(engine);
-
-		list_for_each_entry_from_reverse(request,
-						 &engine->timeline.requests,
-						 link) {
-			if (__i915_request_completed(request,
-						     request->global_seqno))
-				break;
-
-			active = request;
-		}
-	}
-
+	process_csb(engine); /* drain preemption events */
 	spin_unlock_irqrestore(&engine->timeline.lock, flags);
-
-	return active;
 }
 
-static void execlists_reset(struct intel_engine_cs *engine,
-			    struct i915_request *request)
+static void execlists_reset(struct intel_engine_cs *engine, bool stalled)
 {
 	struct intel_engine_execlists * const execlists = &engine->execlists;
+	struct i915_request *rq;
 	unsigned long flags;
 	u32 *regs;
 
-	GEM_TRACE("%s request global=%d, current=%d\n",
-		  engine->name, request ? request->global_seqno : 0,
-		  intel_engine_get_seqno(engine));
-
 	spin_lock_irqsave(&engine->timeline.lock, flags);
 
 	/*
@@ -1800,12 +1764,18 @@ static void execlists_reset(struct intel_engine_cs *engine,
 	execlists_cancel_port_requests(execlists);
 
 	/* Push back any incomplete requests for replay after the reset. */
-	__unwind_incomplete_requests(engine);
+	rq = __unwind_incomplete_requests(engine);
 
 	/* Following the reset, we need to reload the CSB read/write pointers */
 	reset_csb_pointers(&engine->execlists);
 
-	spin_unlock_irqrestore(&engine->timeline.lock, flags);
+	GEM_TRACE("%s seqno=%d, current=%d, stalled? %s\n",
+		  engine->name,
+		  rq ? lower_32_bits(rq->global_seqno) : 0,
+		  intel_engine_get_seqno(engine),
+		  yesno(stalled));
+	if (!rq)
+		goto out_unlock;
 
 	/*
 	 * If the request was innocent, we leave the request in the ELSP
@@ -1818,8 +1788,9 @@ static void execlists_reset(struct intel_engine_cs *engine,
 	 * and have to at least restore the RING register in the context
 	 * image back to the expected values to skip over the guilty request.
 	 */
-	if (!request || request->fence.error != -EIO)
-		return;
+	i915_reset_request(rq, stalled);
+	if (!stalled)
+		goto out_unlock;
 
 	/*
 	 * We want a simple context + ring to execute the breadcrumb update.
@@ -1829,25 +1800,23 @@ static void execlists_reset(struct intel_engine_cs *engine,
 	 * future request will be after userspace has had the opportunity
 	 * to recreate its own state.
 	 */
-	regs = request->hw_context->lrc_reg_state;
+	regs = rq->hw_context->lrc_reg_state;
 	if (engine->pinned_default_state) {
 		memcpy(regs, /* skip restoring the vanilla PPHWSP */
 		       engine->pinned_default_state + LRC_STATE_PN * PAGE_SIZE,
 		       engine->context_size - PAGE_SIZE);
 	}
-	execlists_init_reg_state(regs,
-				 request->gem_context, engine, request->ring);
+	execlists_init_reg_state(regs, rq->gem_context, engine, rq->ring);
 
 	/* Move the RING_HEAD onto the breadcrumb, past the hanging batch */
-	regs[CTX_RING_BUFFER_START + 1] = i915_ggtt_offset(request->ring->vma);
-
-	request->ring->head = intel_ring_wrap(request->ring, request->postfix);
-	regs[CTX_RING_HEAD + 1] = request->ring->head;
+	regs[CTX_RING_BUFFER_START + 1] = i915_ggtt_offset(rq->ring->vma);
 
-	intel_ring_update_space(request->ring);
+	rq->ring->head = intel_ring_wrap(rq->ring, rq->postfix);
+	regs[CTX_RING_HEAD + 1] = rq->ring->head;
+	intel_ring_update_space(rq->ring);
 
-	/* Reset WaIdleLiteRestore:bdw,skl as well */
-	unwind_wa_tail(request);
+out_unlock:
+	spin_unlock_irqrestore(&engine->timeline.lock, flags);
 }
 
 static void execlists_reset_finish(struct intel_engine_cs *engine)
@@ -1860,6 +1829,7 @@ static void execlists_reset_finish(struct intel_engine_cs *engine)
 	 * to sleep before we restart and reload a context.
 	 *
 	 */
+	GEM_BUG_ON(!reset_in_progress(execlists));
 	if (!RB_EMPTY_ROOT(&execlists->queue.rb_root))
 		execlists->tasklet.func(execlists->tasklet.data);
 
diff --git a/drivers/gpu/drm/i915/intel_overlay.c b/drivers/gpu/drm/i915/intel_overlay.c
index c153be043078..94a87eba39f9 100644
--- a/drivers/gpu/drm/i915/intel_overlay.c
+++ b/drivers/gpu/drm/i915/intel_overlay.c
@@ -479,8 +479,6 @@ void intel_overlay_reset(struct drm_i915_private *dev_priv)
 	if (!overlay)
 		return;
 
-	intel_overlay_release_old_vid(overlay);
-
 	overlay->old_xscale = 0;
 	overlay->old_yscale = 0;
 	overlay->crtc = NULL;
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index 6e2661e86d18..9c664d910c02 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -34,6 +34,7 @@
 
 #include "i915_drv.h"
 #include "i915_gem_render_state.h"
+#include "i915_reset.h"
 #include "i915_trace.h"
 #include "intel_drv.h"
 #include "intel_workarounds.h"
@@ -708,52 +709,80 @@ static int init_ring_common(struct intel_engine_cs *engine)
 	return ret;
 }
 
-static struct i915_request *reset_prepare(struct intel_engine_cs *engine)
+static void reset_prepare(struct intel_engine_cs *engine)
 {
 	intel_engine_stop_cs(engine);
-	return i915_gem_find_active_request(engine);
 }
 
-static void skip_request(struct i915_request *rq)
+static void reset_ring(struct intel_engine_cs *engine, bool stalled)
 {
-	void *vaddr = rq->ring->vaddr;
+	struct i915_timeline *tl = &engine->timeline;
+	struct i915_request *pos, *rq;
+	unsigned long flags;
 	u32 head;
 
-	head = rq->infix;
-	if (rq->postfix < head) {
-		memset32(vaddr + head, MI_NOOP,
-			 (rq->ring->size - head) / sizeof(u32));
-		head = 0;
+	rq = NULL;
+	spin_lock_irqsave(&tl->lock, flags);
+	list_for_each_entry(pos, &tl->requests, link) {
+		if (!__i915_request_completed(pos, pos->global_seqno)) {
+			rq = pos;
+			break;
+		}
 	}
-	memset32(vaddr + head, MI_NOOP, (rq->postfix - head) / sizeof(u32));
-}
-
-static void reset_ring(struct intel_engine_cs *engine, struct i915_request *rq)
-{
-	GEM_TRACE("%s request global=%d, current=%d\n",
-		  engine->name, rq ? rq->global_seqno : 0,
-		  intel_engine_get_seqno(engine));
 
+	GEM_TRACE("%s seqno=%d, current=%d, stalled? %s\n",
+		  engine->name,
+		  rq ? lower_32_bits(rq->global_seqno) : 0,
+		  intel_engine_get_seqno(engine),
+		  yesno(stalled));
 	/*
-	 * Try to restore the logical GPU state to match the continuation
-	 * of the request queue. If we skip the context/PD restore, then
-	 * the next request may try to execute assuming that its context
-	 * is valid and loaded on the GPU and so may try to access invalid
-	 * memory, prompting repeated GPU hangs.
+	 * The guilty request will get skipped on a hung engine.
 	 *
-	 * If the request was guilty, we still restore the logical state
-	 * in case the next request requires it (e.g. the aliasing ppgtt),
-	 * but skip over the hung batch.
+	 * Users of client default contexts do not rely on logical
+	 * state preserved between batches so it is safe to execute
+	 * queued requests following the hang. Non default contexts
+	 * rely on preserved state, so skipping a batch loses the
+	 * evolution of the state and it needs to be considered corrupted.
+	 * Executing more queued batches on top of corrupted state is
+	 * risky. But we take the risk by trying to advance through
+	 * the queued requests in order to make the client behaviour
+	 * more predictable around resets, by not throwing away random
+	 * amount of batches it has prepared for execution. Sophisticated
+	 * clients can use gem_reset_stats_ioctl and dma fence status
+	 * (exported via sync_file info ioctl on explicit fences) to observe
+	 * when it loses the context state and should rebuild accordingly.
 	 *
-	 * If the request was innocent, we try to replay the request with
-	 * the restored context.
+	 * The context ban, and ultimately the client ban, mechanism are safety
+	 * valves if client submission ends up resulting in nothing more than
+	 * subsequent hangs.
 	 */
+
 	if (rq) {
-		/* If the rq hung, jump to its breadcrumb and skip the batch */
-		rq->ring->head = intel_ring_wrap(rq->ring, rq->head);
-		if (rq->fence.error == -EIO)
-			skip_request(rq);
+		/*
+		 * Try to restore the logical GPU state to match the
+		 * continuation of the request queue. If we skip the
+		 * context/PD restore, then the next request may try to execute
+		 * assuming that its context is valid and loaded on the GPU and
+		 * so may try to access invalid memory, prompting repeated GPU
+		 * hangs.
+		 *
+		 * If the request was guilty, we still restore the logical
+		 * state in case the next request requires it (e.g. the
+		 * aliasing ppgtt), but skip over the hung batch.
+		 *
+		 * If the request was innocent, we try to replay the request
+		 * with the restored context.
+		 */
+		i915_reset_request(rq, stalled);
+
+		GEM_BUG_ON(rq->ring != engine->buffer);
+		head = rq->head;
+	} else {
+		head = engine->buffer->tail;
 	}
+	engine->buffer->head = intel_ring_wrap(engine->buffer, head);
+
+	spin_unlock_irqrestore(&tl->lock, flags);
 }
 
 static void reset_finish(struct intel_engine_cs *engine)
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index 3c1366c58cf3..06850ee17087 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -124,7 +124,6 @@ struct intel_engine_hangcheck {
 	unsigned long action_timestamp;
 	int deadlock;
 	struct intel_instdone instdone;
-	struct i915_request *active_request;
 	bool stalled:1;
 	bool wedged:1;
 };
@@ -445,9 +444,8 @@ struct intel_engine_cs {
 	int		(*init_hw)(struct intel_engine_cs *engine);
 
 	struct {
-		struct i915_request *(*prepare)(struct intel_engine_cs *engine);
-		void (*reset)(struct intel_engine_cs *engine,
-			      struct i915_request *rq);
+		void (*prepare)(struct intel_engine_cs *engine);
+		void (*reset)(struct intel_engine_cs *engine, bool stalled);
 		void (*finish)(struct intel_engine_cs *engine);
 	} reset;
 
@@ -1019,6 +1017,13 @@ gen8_emit_ggtt_write(u32 *cs, u32 value, u32 gtt_offset)
 	return cs;
 }
 
+static inline void intel_engine_reset(struct intel_engine_cs *engine,
+				      bool stalled)
+{
+	if (engine->reset.reset)
+		engine->reset.reset(engine, stalled);
+}
+
 void intel_engines_sanitize(struct drm_i915_private *i915, bool force);
 
 bool intel_engine_is_idle(struct intel_engine_cs *engine);
diff --git a/drivers/gpu/drm/i915/selftests/intel_hangcheck.c b/drivers/gpu/drm/i915/selftests/intel_hangcheck.c
index 12550b55c42f..67431355cd6e 100644
--- a/drivers/gpu/drm/i915/selftests/intel_hangcheck.c
+++ b/drivers/gpu/drm/i915/selftests/intel_hangcheck.c
@@ -363,9 +363,7 @@ static int igt_global_reset(void *arg)
 	/* Check that we can issue a global GPU reset */
 
 	igt_global_reset_lock(i915);
-	set_bit(I915_RESET_HANDOFF, &i915->gpu_error.flags);
 
-	mutex_lock(&i915->drm.struct_mutex);
 	reset_count = i915_reset_count(&i915->gpu_error);
 
 	i915_reset(i915, ALL_ENGINES, NULL);
@@ -374,9 +372,7 @@ static int igt_global_reset(void *arg)
 		pr_err("No GPU reset recorded!\n");
 		err = -EINVAL;
 	}
-	mutex_unlock(&i915->drm.struct_mutex);
 
-	GEM_BUG_ON(test_bit(I915_RESET_HANDOFF, &i915->gpu_error.flags));
 	igt_global_reset_unlock(i915);
 
 	if (i915_terminally_wedged(&i915->gpu_error))
@@ -399,9 +395,7 @@ static int igt_wedged_reset(void *arg)
 	i915_gem_set_wedged(i915);
 	GEM_BUG_ON(!i915_terminally_wedged(&i915->gpu_error));
 
-	set_bit(I915_RESET_HANDOFF, &i915->gpu_error.flags);
 	i915_reset(i915, ALL_ENGINES, NULL);
-	GEM_BUG_ON(test_bit(I915_RESET_HANDOFF, &i915->gpu_error.flags));
 
 	intel_runtime_pm_put(i915, wakeref);
 	mutex_unlock(&i915->drm.struct_mutex);
@@ -511,7 +505,7 @@ static int __igt_reset_engine(struct drm_i915_private *i915, bool active)
 				break;
 			}
 
-			if (!wait_for_idle(engine)) {
+			if (!i915_reset_flush(i915)) {
 				struct drm_printer p =
 					drm_info_printer(i915->drm.dev);
 
@@ -903,20 +897,13 @@ static int igt_reset_engines(void *arg)
 	return 0;
 }
 
-static u32 fake_hangcheck(struct i915_request *rq, u32 mask)
+static u32 fake_hangcheck(struct drm_i915_private *i915, u32 mask)
 {
-	struct i915_gpu_error *error = &rq->i915->gpu_error;
-	u32 reset_count = i915_reset_count(error);
-
-	error->stalled_mask = mask;
-
-	/* set_bit() must be after we have setup the backchannel (mask) */
-	smp_mb__before_atomic();
-	set_bit(I915_RESET_HANDOFF, &error->flags);
+	u32 count = i915_reset_count(&i915->gpu_error);
 
-	wake_up_all(&error->wait_queue);
+	i915_reset(i915, mask, NULL);
 
-	return reset_count;
+	return count;
 }
 
 static int igt_reset_wait(void *arg)
@@ -962,7 +949,7 @@ static int igt_reset_wait(void *arg)
 		goto out_rq;
 	}
 
-	reset_count = fake_hangcheck(rq, ALL_ENGINES);
+	reset_count = fake_hangcheck(i915, ALL_ENGINES);
 
 	timeout = i915_request_wait(rq, I915_WAIT_LOCKED, 10);
 	if (timeout < 0) {
@@ -972,7 +959,6 @@ static int igt_reset_wait(void *arg)
 		goto out_rq;
 	}
 
-	GEM_BUG_ON(test_bit(I915_RESET_HANDOFF, &i915->gpu_error.flags));
 	if (i915_reset_count(&i915->gpu_error) == reset_count) {
 		pr_err("No GPU reset recorded!\n");
 		err = -EINVAL;
@@ -1162,7 +1148,7 @@ static int __igt_reset_evict_vma(struct drm_i915_private *i915,
 	}
 
 out_reset:
-	fake_hangcheck(rq, intel_engine_flag(rq->engine));
+	fake_hangcheck(rq->i915, intel_engine_flag(rq->engine));
 
 	if (tsk) {
 		struct igt_wedge_me w;
@@ -1341,12 +1327,7 @@ static int igt_reset_queue(void *arg)
 				goto fini;
 			}
 
-			reset_count = fake_hangcheck(prev, ENGINE_MASK(id));
-
-			i915_reset(i915, ENGINE_MASK(id), NULL);
-
-			GEM_BUG_ON(test_bit(I915_RESET_HANDOFF,
-					    &i915->gpu_error.flags));
+			reset_count = fake_hangcheck(i915, ENGINE_MASK(id));
 
 			if (prev->fence.error != -EIO) {
 				pr_err("GPU reset not recorded on hanging request [fence.error=%d]!\n",
@@ -1565,6 +1546,7 @@ static int igt_atomic_reset_engine(struct intel_engine_cs *engine,
 		pr_err("%s(%s): Failed to start request %llx, at %x\n",
 		       __func__, engine->name,
 		       rq->fence.seqno, hws_seqno(&h, rq));
+		i915_gem_set_wedged(i915);
 		err = -EIO;
 	}
 
@@ -1588,7 +1570,6 @@ static int igt_atomic_reset_engine(struct intel_engine_cs *engine,
 static void force_reset(struct drm_i915_private *i915)
 {
 	i915_gem_set_wedged(i915);
-	set_bit(I915_RESET_HANDOFF, &i915->gpu_error.flags);
 	i915_reset(i915, 0, NULL);
 }
 
@@ -1618,6 +1599,26 @@ static int igt_atomic_reset(void *arg)
 	if (i915_terminally_wedged(&i915->gpu_error))
 		goto unlock;
 
+	if (intel_has_gpu_reset(i915)) {
+		const typeof(*phases) *p;
+
+		for (p = phases; p->name; p++) {
+			GEM_TRACE("intel_gpu_reset under %s\n", p->name);
+
+			p->critical_section_begin();
+			err = intel_gpu_reset(i915, ALL_ENGINES);
+			p->critical_section_end();
+
+			if (err) {
+				pr_err("intel_gpu_reset failed under %s\n",
+				       p->name);
+				goto out;
+			}
+		}
+
+		force_reset(i915);
+	}
+
 	if (intel_has_reset_engine(i915)) {
 		struct intel_engine_cs *engine;
 		enum intel_engine_id id;
diff --git a/drivers/gpu/drm/i915/selftests/intel_workarounds.c b/drivers/gpu/drm/i915/selftests/intel_workarounds.c
index 321784b6fbf7..0e43a876151f 100644
--- a/drivers/gpu/drm/i915/selftests/intel_workarounds.c
+++ b/drivers/gpu/drm/i915/selftests/intel_workarounds.c
@@ -169,7 +169,6 @@ static int check_whitelist(struct i915_gem_context *ctx,
 
 static int do_device_reset(struct intel_engine_cs *engine)
 {
-	set_bit(I915_RESET_HANDOFF, &engine->i915->gpu_error.flags);
 	i915_reset(engine->i915, ENGINE_MASK(engine->id), "live_workarounds");
 	return 0;
 }
@@ -347,7 +346,6 @@ static int
 live_gpu_reset_gt_engine_workarounds(void *arg)
 {
 	struct drm_i915_private *i915 = arg;
-	struct i915_gpu_error *error = &i915->gpu_error;
 	intel_wakeref_t wakeref;
 	bool ok;
 
@@ -363,7 +361,6 @@ live_gpu_reset_gt_engine_workarounds(void *arg)
 	if (!ok)
 		goto out;
 
-	set_bit(I915_RESET_HANDOFF, &error->flags);
 	i915_reset(i915, ALL_ENGINES, "live_workarounds");
 
 	ok = verify_gt_engine_wa(i915, "after reset");
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 111+ messages in thread

* [PATCH 30/46] drm/i915: Issue engine resets onto idle engines
  2019-01-07 11:54 [PATCH 01/46] drm/i915: Return immediately if trylock fails for direct-reclaim Chris Wilson
                   ` (27 preceding siblings ...)
  2019-01-07 11:54 ` [PATCH 29/46] drm/i915: Remove GPU reset dependence on struct_mutex Chris Wilson
@ 2019-01-07 11:54 ` Chris Wilson
  2019-01-07 11:54 ` [PATCH 31/46] drm/i915: Stop tracking MRU activity on VMA Chris Wilson
                   ` (21 subsequent siblings)
  50 siblings, 0 replies; 111+ messages in thread
From: Chris Wilson @ 2019-01-07 11:54 UTC (permalink / raw)
  To: intel-gfx

Always perform the requested reset, even if we believe the engine is
idle. Presumably there was a reason the caller wanted the reset, and in
the near future we lose the easy tracking for whether the engine is
idle.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_reset.c             |  4 ----
 .../gpu/drm/i915/selftests/intel_hangcheck.c  | 22 +++++--------------
 2 files changed, 6 insertions(+), 20 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_reset.c b/drivers/gpu/drm/i915/i915_reset.c
index 30f669aa526a..3e0833221c3a 100644
--- a/drivers/gpu/drm/i915/i915_reset.c
+++ b/drivers/gpu/drm/i915/i915_reset.c
@@ -1063,10 +1063,6 @@ int i915_reset_engine(struct intel_engine_cs *engine, const char *msg)
 	GEM_TRACE("%s flags=%lx\n", engine->name, error->flags);
 	GEM_BUG_ON(!test_bit(I915_RESET_ENGINE + engine->id, &error->flags));
 
-	if (i915_seqno_passed(intel_engine_get_seqno(engine),
-			      intel_engine_last_submit(engine)))
-		return 0;
-
 	reset_prepare_engine(engine);
 
 	if (msg)
diff --git a/drivers/gpu/drm/i915/selftests/intel_hangcheck.c b/drivers/gpu/drm/i915/selftests/intel_hangcheck.c
index 67431355cd6e..4809874ab28c 100644
--- a/drivers/gpu/drm/i915/selftests/intel_hangcheck.c
+++ b/drivers/gpu/drm/i915/selftests/intel_hangcheck.c
@@ -449,8 +449,6 @@ static int __igt_reset_engine(struct drm_i915_private *i915, bool active)
 
 		set_bit(I915_RESET_ENGINE + id, &i915->gpu_error.flags);
 		do {
-			u32 seqno = intel_engine_get_seqno(engine);
-
 			if (active) {
 				struct i915_request *rq;
 
@@ -479,8 +477,6 @@ static int __igt_reset_engine(struct drm_i915_private *i915, bool active)
 					break;
 				}
 
-				GEM_BUG_ON(!rq->global_seqno);
-				seqno = rq->global_seqno - 1;
 				i915_request_put(rq);
 			}
 
@@ -496,11 +492,10 @@ static int __igt_reset_engine(struct drm_i915_private *i915, bool active)
 				break;
 			}
 
-			reset_engine_count += active;
 			if (i915_reset_engine_count(&i915->gpu_error, engine) !=
-			    reset_engine_count) {
-				pr_err("%s engine reset %srecorded!\n",
-				       engine->name, active ? "not " : "");
+			    ++reset_engine_count) {
+				pr_err("%s engine reset not recorded!\n",
+				       engine->name);
 				err = -EINVAL;
 				break;
 			}
@@ -728,7 +723,6 @@ static int __igt_reset_engines(struct drm_i915_private *i915,
 
 		set_bit(I915_RESET_ENGINE + id, &i915->gpu_error.flags);
 		do {
-			u32 seqno = intel_engine_get_seqno(engine);
 			struct i915_request *rq = NULL;
 
 			if (flags & TEST_ACTIVE) {
@@ -756,9 +750,6 @@ static int __igt_reset_engines(struct drm_i915_private *i915,
 					err = -EIO;
 					break;
 				}
-
-				GEM_BUG_ON(!rq->global_seqno);
-				seqno = rq->global_seqno - 1;
 			}
 
 			err = i915_reset_engine(engine, NULL);
@@ -795,10 +786,9 @@ static int __igt_reset_engines(struct drm_i915_private *i915,
 
 		reported = i915_reset_engine_count(&i915->gpu_error, engine);
 		reported -= threads[engine->id].resets;
-		if (reported != (flags & TEST_ACTIVE ? count : 0)) {
-			pr_err("i915_reset_engine(%s:%s): reset %lu times, but reported %lu, expected %lu reported\n",
-			       engine->name, test_name, count, reported,
-			       (flags & TEST_ACTIVE ? count : 0));
+		if (reported != count) {
+			pr_err("i915_reset_engine(%s:%s): reset %lu times, but reported %lu\n",
+			       engine->name, test_name, count, reported);
 			if (!err)
 				err = -EINVAL;
 		}
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 111+ messages in thread

* [PATCH 31/46] drm/i915: Stop tracking MRU activity on VMA
  2019-01-07 11:54 [PATCH 01/46] drm/i915: Return immediately if trylock fails for direct-reclaim Chris Wilson
                   ` (28 preceding siblings ...)
  2019-01-07 11:54 ` [PATCH 30/46] drm/i915: Issue engine resets onto idle engines Chris Wilson
@ 2019-01-07 11:54 ` Chris Wilson
  2019-01-16 16:27   ` Tvrtko Ursulin
  2019-01-07 11:54 ` [PATCH 32/46] drm/i915: Pull VM lists under the VM mutex Chris Wilson
                   ` (20 subsequent siblings)
  50 siblings, 1 reply; 111+ messages in thread
From: Chris Wilson @ 2019-01-07 11:54 UTC (permalink / raw)
  To: intel-gfx

Our goal is to remove struct_mutex and replace it with fine grained
locking. One of the thorny issues is our eviction logic for reclaiming
space for an execbuffer (or GTT mmaping, among a few other examples).
While eviction itself is easy to move under a per-VM mutex, performing
the activity tracking is less agreeable. One solution is not to do any
MRU tracking and do a simple coarse evaluation during eviction of
active/inactive, with a loose temporal ordering of last
insertion/evaluation. That keeps all the locking constrained to when we
are manipulating the VM itself, neatly avoiding the tricky handling of
possible recursive locking during execbuf and elsewhere.

Note that discarding the MRU is unlikely to impact upon our efficiency
to reclaim VM space (where we think a LRU model is best) as our
current strategy is to use random idle replacement first before doing
a search, and over time the use of softpinned 48b per-ppGTT is growing
(thereby eliminating any need to perform any eviction searches, in
theory at least).

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_gem.c               | 10 +--
 drivers/gpu/drm/i915/i915_gem_evict.c         | 71 ++++++++++++-------
 drivers/gpu/drm/i915/i915_gem_gtt.c           | 15 ++--
 drivers/gpu/drm/i915/i915_gem_gtt.h           | 26 +------
 drivers/gpu/drm/i915/i915_gem_shrinker.c      |  8 ++-
 drivers/gpu/drm/i915/i915_gem_stolen.c        |  3 +-
 drivers/gpu/drm/i915/i915_gpu_error.c         | 37 +++++-----
 drivers/gpu/drm/i915/i915_vma.c               |  9 +--
 .../gpu/drm/i915/selftests/i915_gem_evict.c   |  4 +-
 drivers/gpu/drm/i915/selftests/i915_gem_gtt.c |  2 +-
 10 files changed, 84 insertions(+), 101 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 83fb02dab18c..6ed44aeee583 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -254,10 +254,7 @@ i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data,
 
 	pinned = ggtt->vm.reserved;
 	mutex_lock(&dev->struct_mutex);
-	list_for_each_entry(vma, &ggtt->vm.active_list, vm_link)
-		if (i915_vma_is_pinned(vma))
-			pinned += vma->node.size;
-	list_for_each_entry(vma, &ggtt->vm.inactive_list, vm_link)
+	list_for_each_entry(vma, &ggtt->vm.bound_list, vm_link)
 		if (i915_vma_is_pinned(vma))
 			pinned += vma->node.size;
 	mutex_unlock(&dev->struct_mutex);
@@ -1540,13 +1537,10 @@ static void i915_gem_object_bump_inactive_ggtt(struct drm_i915_gem_object *obj)
 	GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj));
 
 	for_each_ggtt_vma(vma, obj) {
-		if (i915_vma_is_active(vma))
-			continue;
-
 		if (!drm_mm_node_allocated(&vma->node))
 			continue;
 
-		list_move_tail(&vma->vm_link, &vma->vm->inactive_list);
+		list_move_tail(&vma->vm_link, &vma->vm->bound_list);
 	}
 
 	i915 = to_i915(obj->base.dev);
diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c b/drivers/gpu/drm/i915/i915_gem_evict.c
index 02b83a5ed96c..a76f65fe86be 100644
--- a/drivers/gpu/drm/i915/i915_gem_evict.c
+++ b/drivers/gpu/drm/i915/i915_gem_evict.c
@@ -127,14 +127,10 @@ i915_gem_evict_something(struct i915_address_space *vm,
 	struct drm_i915_private *dev_priv = vm->i915;
 	struct drm_mm_scan scan;
 	struct list_head eviction_list;
-	struct list_head *phases[] = {
-		&vm->inactive_list,
-		&vm->active_list,
-		NULL,
-	}, **phase;
 	struct i915_vma *vma, *next;
 	struct drm_mm_node *node;
 	enum drm_mm_insert_mode mode;
+	struct i915_vma *active;
 	int ret;
 
 	lockdep_assert_held(&vm->i915->drm.struct_mutex);
@@ -170,17 +166,46 @@ i915_gem_evict_something(struct i915_address_space *vm,
 	 */
 	if (!(flags & PIN_NONBLOCK))
 		i915_retire_requests(dev_priv);
-	else
-		phases[1] = NULL;
 
 search_again:
+	active = NULL;
 	INIT_LIST_HEAD(&eviction_list);
-	phase = phases;
-	do {
-		list_for_each_entry(vma, *phase, vm_link)
-			if (mark_free(&scan, vma, flags, &eviction_list))
-				goto found;
-	} while (*++phase);
+	list_for_each_entry_safe(vma, next, &vm->bound_list, vm_link) {
+		/*
+		 * We keep this list in a rough least-recently scanned order
+		 * of active elements (inactive elements are cheap to reap).
+		 * New entries are added to the end, and we move anything we
+		 * scan to the end. The assumption is that the working set
+		 * of applications is either steady state (and thanks to the
+		 * userspace bo cache it almost always is) or volatile and
+		 * frequently replaced after a frame, which are self-evicting!
+		 * Given that assumption, the MRU order of the scan list is
+		 * fairly static, and keeping it in least-recently scan order
+		 * is suitable.
+		 *
+		 * To notice when we complete one full cycle, we record the
+		 * first active element seen, before moving it to the tail.
+		 */
+		if (i915_vma_is_active(vma)) {
+			if (vma == active) {
+				if (flags & PIN_NONBLOCK)
+					break;
+
+				active = ERR_PTR(-EAGAIN);
+			}
+
+			if (active != ERR_PTR(-EAGAIN)) {
+				if (!active)
+					active = vma;
+
+				list_move_tail(&vma->vm_link, &vm->bound_list);
+				continue;
+			}
+		}
+
+		if (mark_free(&scan, vma, flags, &eviction_list))
+			goto found;
+	}
 
 	/* Nothing found, clean up and bail out! */
 	list_for_each_entry_safe(vma, next, &eviction_list, evict_link) {
@@ -389,11 +414,6 @@ int i915_gem_evict_for_node(struct i915_address_space *vm,
  */
 int i915_gem_evict_vm(struct i915_address_space *vm)
 {
-	struct list_head *phases[] = {
-		&vm->inactive_list,
-		&vm->active_list,
-		NULL
-	}, **phase;
 	struct list_head eviction_list;
 	struct i915_vma *vma, *next;
 	int ret;
@@ -413,16 +433,13 @@ int i915_gem_evict_vm(struct i915_address_space *vm)
 	}
 
 	INIT_LIST_HEAD(&eviction_list);
-	phase = phases;
-	do {
-		list_for_each_entry(vma, *phase, vm_link) {
-			if (i915_vma_is_pinned(vma))
-				continue;
+	list_for_each_entry(vma, &vm->bound_list, vm_link) {
+		if (i915_vma_is_pinned(vma))
+			continue;
 
-			__i915_vma_pin(vma);
-			list_add(&vma->evict_link, &eviction_list);
-		}
-	} while (*++phase);
+		__i915_vma_pin(vma);
+		list_add(&vma->evict_link, &eviction_list);
+	}
 
 	ret = 0;
 	list_for_each_entry_safe(vma, next, &eviction_list, evict_link) {
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 45c7c8b6c7c8..ad4ef8980b97 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -492,9 +492,8 @@ static void i915_address_space_init(struct i915_address_space *vm, int subclass)
 
 	stash_init(&vm->free_pages);
 
-	INIT_LIST_HEAD(&vm->active_list);
-	INIT_LIST_HEAD(&vm->inactive_list);
 	INIT_LIST_HEAD(&vm->unbound_list);
+	INIT_LIST_HEAD(&vm->bound_list);
 }
 
 static void i915_address_space_fini(struct i915_address_space *vm)
@@ -2112,8 +2111,7 @@ void i915_ppgtt_close(struct i915_address_space *vm)
 static void ppgtt_destroy_vma(struct i915_address_space *vm)
 {
 	struct list_head *phases[] = {
-		&vm->active_list,
-		&vm->inactive_list,
+		&vm->bound_list,
 		&vm->unbound_list,
 		NULL,
 	}, **phase;
@@ -2136,8 +2134,7 @@ void i915_ppgtt_release(struct kref *kref)
 
 	ppgtt_destroy_vma(&ppgtt->vm);
 
-	GEM_BUG_ON(!list_empty(&ppgtt->vm.active_list));
-	GEM_BUG_ON(!list_empty(&ppgtt->vm.inactive_list));
+	GEM_BUG_ON(!list_empty(&ppgtt->vm.bound_list));
 	GEM_BUG_ON(!list_empty(&ppgtt->vm.unbound_list));
 
 	ppgtt->vm.cleanup(&ppgtt->vm);
@@ -2802,8 +2799,7 @@ void i915_ggtt_cleanup_hw(struct drm_i915_private *dev_priv)
 	mutex_lock(&dev_priv->drm.struct_mutex);
 	i915_gem_fini_aliasing_ppgtt(dev_priv);
 
-	GEM_BUG_ON(!list_empty(&ggtt->vm.active_list));
-	list_for_each_entry_safe(vma, vn, &ggtt->vm.inactive_list, vm_link)
+	list_for_each_entry_safe(vma, vn, &ggtt->vm.bound_list, vm_link)
 		WARN_ON(i915_vma_unbind(vma));
 
 	if (drm_mm_node_allocated(&ggtt->error_capture))
@@ -3514,8 +3510,7 @@ void i915_gem_restore_gtt_mappings(struct drm_i915_private *dev_priv)
 	ggtt->vm.closed = true; /* skip rewriting PTE on VMA unbind */
 
 	/* clflush objects bound into the GGTT and rebind them. */
-	GEM_BUG_ON(!list_empty(&ggtt->vm.active_list));
-	list_for_each_entry_safe(vma, vn, &ggtt->vm.inactive_list, vm_link) {
+	list_for_each_entry_safe(vma, vn, &ggtt->vm.bound_list, vm_link) {
 		struct drm_i915_gem_object *obj = vma->obj;
 
 		if (!(vma->flags & I915_VMA_GLOBAL_BIND))
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h
index a0039ea97cdc..bd679c8c56dd 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.h
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.h
@@ -299,32 +299,12 @@ struct i915_address_space {
 	struct i915_page_directory_pointer *scratch_pdp; /* GEN8+ & 48b PPGTT */
 
 	/**
-	 * List of objects currently involved in rendering.
-	 *
-	 * Includes buffers having the contents of their GPU caches
-	 * flushed, not necessarily primitives. last_read_req
-	 * represents when the rendering involved will be completed.
-	 *
-	 * A reference is held on the buffer while on this list.
+	 * List of vma currently bound.
 	 */
-	struct list_head active_list;
+	struct list_head bound_list;
 
 	/**
-	 * LRU list of objects which are not in the ringbuffer and
-	 * are ready to unbind, but are still in the GTT.
-	 *
-	 * last_read_req is NULL while an object is in this list.
-	 *
-	 * A reference is not held on the buffer while on this list,
-	 * as merely being GTT-bound shouldn't prevent its being
-	 * freed, and we'll pull it off the list in the free path.
-	 */
-	struct list_head inactive_list;
-
-	/**
-	 * List of vma that have been unbound.
-	 *
-	 * A reference is not held on the buffer while on this list.
+	 * List of vma that are not unbound.
 	 */
 	struct list_head unbound_list;
 
diff --git a/drivers/gpu/drm/i915/i915_gem_shrinker.c b/drivers/gpu/drm/i915/i915_gem_shrinker.c
index e9a79059bc43..1531534eea02 100644
--- a/drivers/gpu/drm/i915/i915_gem_shrinker.c
+++ b/drivers/gpu/drm/i915/i915_gem_shrinker.c
@@ -490,9 +490,13 @@ i915_gem_shrinker_vmap(struct notifier_block *nb, unsigned long event, void *ptr
 
 	/* We also want to clear any cached iomaps as they wrap vmap */
 	list_for_each_entry_safe(vma, next,
-				 &i915->ggtt.vm.inactive_list, vm_link) {
+				 &i915->ggtt.vm.bound_list, vm_link) {
 		unsigned long count = vma->node.size >> PAGE_SHIFT;
-		if (vma->iomap && i915_vma_unbind(vma) == 0)
+
+		if (!vma->iomap || i915_vma_is_active(vma))
+			continue;
+
+		if (i915_vma_unbind(vma) == 0)
 			freed_pages += count;
 	}
 
diff --git a/drivers/gpu/drm/i915/i915_gem_stolen.c b/drivers/gpu/drm/i915/i915_gem_stolen.c
index 2f756a97689a..75b97d71f072 100644
--- a/drivers/gpu/drm/i915/i915_gem_stolen.c
+++ b/drivers/gpu/drm/i915/i915_gem_stolen.c
@@ -702,7 +702,8 @@ i915_gem_object_create_stolen_for_preallocated(struct drm_i915_private *dev_priv
 	vma->pages = obj->mm.pages;
 	vma->flags |= I915_VMA_GLOBAL_BIND;
 	__i915_vma_set_map_and_fenceable(vma);
-	list_move_tail(&vma->vm_link, &ggtt->vm.inactive_list);
+
+	list_move_tail(&vma->vm_link, &ggtt->vm.bound_list);
 
 	spin_lock(&dev_priv->mm.obj_lock);
 	list_move_tail(&obj->mm.link, &dev_priv->mm.bound_list);
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
index 5533a741abeb..6e975c43dae9 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -1124,7 +1124,7 @@ static void capture_bo(struct drm_i915_error_buffer *err,
 
 static u32 capture_error_bo(struct drm_i915_error_buffer *err,
 			    int count, struct list_head *head,
-			    bool pinned_only)
+			    bool active_only, bool pinned_only)
 {
 	struct i915_vma *vma;
 	int i = 0;
@@ -1133,6 +1133,9 @@ static u32 capture_error_bo(struct drm_i915_error_buffer *err,
 		if (!vma->obj)
 			continue;
 
+		if (active_only && !i915_vma_is_active(vma))
+			continue;
+
 		if (pinned_only && !i915_vma_is_pinned(vma))
 			continue;
 
@@ -1610,14 +1613,16 @@ static void gem_capture_vm(struct i915_gpu_state *error,
 	int count;
 
 	count = 0;
-	list_for_each_entry(vma, &vm->active_list, vm_link)
-		count++;
+	list_for_each_entry(vma, &vm->bound_list, vm_link)
+		if (i915_vma_is_active(vma))
+			count++;
 
 	active_bo = NULL;
 	if (count)
 		active_bo = kcalloc(count, sizeof(*active_bo), GFP_ATOMIC);
 	if (active_bo)
-		count = capture_error_bo(active_bo, count, &vm->active_list, false);
+		count = capture_error_bo(active_bo, count, &vm->bound_list,
+					 true, false);
 	else
 		count = 0;
 
@@ -1655,28 +1660,20 @@ static void capture_pinned_buffers(struct i915_gpu_state *error)
 	struct i915_address_space *vm = &error->i915->ggtt.vm;
 	struct drm_i915_error_buffer *bo;
 	struct i915_vma *vma;
-	int count_inactive, count_active;
-
-	count_inactive = 0;
-	list_for_each_entry(vma, &vm->inactive_list, vm_link)
-		count_inactive++;
+	int count;
 
-	count_active = 0;
-	list_for_each_entry(vma, &vm->active_list, vm_link)
-		count_active++;
+	count = 0;
+	list_for_each_entry(vma, &vm->bound_list, vm_link)
+		count++;
 
 	bo = NULL;
-	if (count_inactive + count_active)
-		bo = kcalloc(count_inactive + count_active,
-			     sizeof(*bo), GFP_ATOMIC);
+	if (count)
+		bo = kcalloc(count, sizeof(*bo), GFP_ATOMIC);
 	if (!bo)
 		return;
 
-	count_inactive = capture_error_bo(bo, count_inactive,
-					  &vm->active_list, true);
-	count_active = capture_error_bo(bo + count_inactive, count_active,
-					&vm->inactive_list, true);
-	error->pinned_bo_count = count_inactive + count_active;
+	error->pinned_bo_count =
+		capture_error_bo(bo, count, &vm->bound_list, false, true);
 	error->pinned_bo = bo;
 }
 
diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
index 5b4d78cdb4ca..7de28baffb8f 100644
--- a/drivers/gpu/drm/i915/i915_vma.c
+++ b/drivers/gpu/drm/i915/i915_vma.c
@@ -79,9 +79,6 @@ __i915_vma_retire(struct i915_vma *vma, struct i915_request *rq)
 	if (--vma->active_count)
 		return;
 
-	GEM_BUG_ON(!drm_mm_node_allocated(&vma->node));
-	list_move_tail(&vma->vm_link, &vma->vm->inactive_list);
-
 	GEM_BUG_ON(!i915_gem_object_is_active(obj));
 	if (--obj->active_count)
 		return;
@@ -659,7 +656,7 @@ i915_vma_insert(struct i915_vma *vma, u64 size, u64 alignment, u64 flags)
 	GEM_BUG_ON(!drm_mm_node_allocated(&vma->node));
 	GEM_BUG_ON(!i915_gem_valid_gtt_space(vma, cache_level));
 
-	list_move_tail(&vma->vm_link, &vma->vm->inactive_list);
+	list_move_tail(&vma->vm_link, &vma->vm->bound_list);
 
 	if (vma->obj) {
 		struct drm_i915_gem_object *obj = vma->obj;
@@ -1003,10 +1000,8 @@ int i915_vma_move_to_active(struct i915_vma *vma,
 	 * add the active reference first and queue for it to be dropped
 	 * *last*.
 	 */
-	if (!i915_gem_active_isset(active) && !vma->active_count++) {
-		list_move_tail(&vma->vm_link, &vma->vm->active_list);
+	if (!i915_gem_active_isset(active) && !vma->active_count++)
 		obj->active_count++;
-	}
 	i915_gem_active_set(active, rq);
 	GEM_BUG_ON(!i915_vma_is_active(vma));
 	GEM_BUG_ON(!obj->active_count);
diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_evict.c b/drivers/gpu/drm/i915/selftests/i915_gem_evict.c
index e1ff6a1c2cb0..9d0fe8aac219 100644
--- a/drivers/gpu/drm/i915/selftests/i915_gem_evict.c
+++ b/drivers/gpu/drm/i915/selftests/i915_gem_evict.c
@@ -57,7 +57,7 @@ static int populate_ggtt(struct drm_i915_private *i915)
 		return -EINVAL;
 	}
 
-	if (list_empty(&i915->ggtt.vm.inactive_list)) {
+	if (list_empty(&i915->ggtt.vm.bound_list)) {
 		pr_err("No objects on the GGTT inactive list!\n");
 		return -EINVAL;
 	}
@@ -69,7 +69,7 @@ static void unpin_ggtt(struct drm_i915_private *i915)
 {
 	struct i915_vma *vma;
 
-	list_for_each_entry(vma, &i915->ggtt.vm.inactive_list, vm_link)
+	list_for_each_entry(vma, &i915->ggtt.vm.bound_list, vm_link)
 		i915_vma_unpin(vma);
 }
 
diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c
index fea8ab14e79d..852b06cb50a0 100644
--- a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c
@@ -1237,7 +1237,7 @@ static void track_vma_bind(struct i915_vma *vma)
 	__i915_gem_object_pin_pages(obj);
 
 	vma->pages = obj->mm.pages;
-	list_move_tail(&vma->vm_link, &vma->vm->inactive_list);
+	list_move_tail(&vma->vm_link, &vma->vm->bound_list);
 }
 
 static int exercise_mock(struct drm_i915_private *i915,
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 111+ messages in thread

* [PATCH 32/46] drm/i915: Pull VM lists under the VM mutex.
  2019-01-07 11:54 [PATCH 01/46] drm/i915: Return immediately if trylock fails for direct-reclaim Chris Wilson
                   ` (29 preceding siblings ...)
  2019-01-07 11:54 ` [PATCH 31/46] drm/i915: Stop tracking MRU activity on VMA Chris Wilson
@ 2019-01-07 11:54 ` Chris Wilson
  2019-01-16 16:47   ` Tvrtko Ursulin
  2019-01-07 11:54 ` [PATCH 33/46] drm/i915: Move vma lookup to its own lock Chris Wilson
                   ` (19 subsequent siblings)
  50 siblings, 1 reply; 111+ messages in thread
From: Chris Wilson @ 2019-01-07 11:54 UTC (permalink / raw)
  To: intel-gfx

A starting point to counter the pervasive struct_mutex. For the goal of
avoiding (or at least blocking under them!) global locks during user
request submission, a simple but important step is being able to manage
each clients GTT separately. For which, we want to replace using the
struct_mutex as the guard for all things GTT/VM and switch instead to a
specific mutex inside i915_address_space.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_gem.c                 | 14 ++++++++------
 drivers/gpu/drm/i915/i915_gem_evict.c           |  2 ++
 drivers/gpu/drm/i915/i915_gem_gtt.c             | 15 +++++++++++++--
 drivers/gpu/drm/i915/i915_gem_shrinker.c        |  4 ++++
 drivers/gpu/drm/i915/i915_gem_stolen.c          |  2 ++
 drivers/gpu/drm/i915/i915_vma.c                 | 11 +++++++++++
 drivers/gpu/drm/i915/selftests/i915_gem_evict.c |  3 +++
 drivers/gpu/drm/i915/selftests/i915_gem_gtt.c   |  3 +++
 8 files changed, 46 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 6ed44aeee583..5141a8ba4836 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -246,18 +246,19 @@ int
 i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data,
 			    struct drm_file *file)
 {
-	struct drm_i915_private *dev_priv = to_i915(dev);
-	struct i915_ggtt *ggtt = &dev_priv->ggtt;
+	struct i915_ggtt *ggtt = &to_i915(dev)->ggtt;
 	struct drm_i915_gem_get_aperture *args = data;
 	struct i915_vma *vma;
 	u64 pinned;
 
+	mutex_lock(&ggtt->vm.mutex);
+
 	pinned = ggtt->vm.reserved;
-	mutex_lock(&dev->struct_mutex);
 	list_for_each_entry(vma, &ggtt->vm.bound_list, vm_link)
 		if (i915_vma_is_pinned(vma))
 			pinned += vma->node.size;
-	mutex_unlock(&dev->struct_mutex);
+
+	mutex_unlock(&ggtt->vm.mutex);
 
 	args->aper_size = ggtt->vm.total;
 	args->aper_available_size = args->aper_size - pinned;
@@ -1530,20 +1531,21 @@ i915_gem_pwrite_ioctl(struct drm_device *dev, void *data,
 
 static void i915_gem_object_bump_inactive_ggtt(struct drm_i915_gem_object *obj)
 {
-	struct drm_i915_private *i915;
+	struct drm_i915_private *i915 = to_i915(obj->base.dev);
 	struct list_head *list;
 	struct i915_vma *vma;
 
 	GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj));
 
+	mutex_lock(&i915->ggtt.vm.mutex);
 	for_each_ggtt_vma(vma, obj) {
 		if (!drm_mm_node_allocated(&vma->node))
 			continue;
 
 		list_move_tail(&vma->vm_link, &vma->vm->bound_list);
 	}
+	mutex_unlock(&i915->ggtt.vm.mutex);
 
-	i915 = to_i915(obj->base.dev);
 	spin_lock(&i915->mm.obj_lock);
 	list = obj->bind_count ? &i915->mm.bound_list : &i915->mm.unbound_list;
 	list_move_tail(&obj->mm.link, list);
diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c b/drivers/gpu/drm/i915/i915_gem_evict.c
index a76f65fe86be..4a0c6830659d 100644
--- a/drivers/gpu/drm/i915/i915_gem_evict.c
+++ b/drivers/gpu/drm/i915/i915_gem_evict.c
@@ -433,6 +433,7 @@ int i915_gem_evict_vm(struct i915_address_space *vm)
 	}
 
 	INIT_LIST_HEAD(&eviction_list);
+	mutex_lock(&vm->mutex);
 	list_for_each_entry(vma, &vm->bound_list, vm_link) {
 		if (i915_vma_is_pinned(vma))
 			continue;
@@ -440,6 +441,7 @@ int i915_gem_evict_vm(struct i915_address_space *vm)
 		__i915_vma_pin(vma);
 		list_add(&vma->evict_link, &eviction_list);
 	}
+	mutex_unlock(&vm->mutex);
 
 	ret = 0;
 	list_for_each_entry_safe(vma, next, &eviction_list, evict_link) {
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index ad4ef8980b97..c3363a9b586b 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -1932,7 +1932,10 @@ static struct i915_vma *pd_vma_create(struct gen6_hw_ppgtt *ppgtt, int size)
 	vma->ggtt_view.type = I915_GGTT_VIEW_ROTATED; /* prevent fencing */
 
 	INIT_LIST_HEAD(&vma->obj_link);
+
+	mutex_lock(&vma->vm->mutex);
 	list_add(&vma->vm_link, &vma->vm->unbound_list);
+	mutex_unlock(&vma->vm->mutex);
 
 	return vma;
 }
@@ -3504,9 +3507,10 @@ void i915_gem_restore_gtt_mappings(struct drm_i915_private *dev_priv)
 
 	i915_check_and_clear_faults(dev_priv);
 
+	mutex_lock(&ggtt->vm.mutex);
+
 	/* First fill our portion of the GTT with scratch pages */
 	ggtt->vm.clear_range(&ggtt->vm, 0, ggtt->vm.total);
-
 	ggtt->vm.closed = true; /* skip rewriting PTE on VMA unbind */
 
 	/* clflush objects bound into the GGTT and rebind them. */
@@ -3516,19 +3520,26 @@ void i915_gem_restore_gtt_mappings(struct drm_i915_private *dev_priv)
 		if (!(vma->flags & I915_VMA_GLOBAL_BIND))
 			continue;
 
+		mutex_unlock(&ggtt->vm.mutex);
+
 		if (!i915_vma_unbind(vma))
-			continue;
+			goto lock;
 
 		WARN_ON(i915_vma_bind(vma,
 				      obj ? obj->cache_level : 0,
 				      PIN_UPDATE));
 		if (obj)
 			WARN_ON(i915_gem_object_set_to_gtt_domain(obj, false));
+
+lock:
+		mutex_lock(&ggtt->vm.mutex);
 	}
 
 	ggtt->vm.closed = false;
 	i915_ggtt_invalidate(dev_priv);
 
+	mutex_unlock(&ggtt->vm.mutex);
+
 	if (INTEL_GEN(dev_priv) >= 8) {
 		struct intel_ppat *ppat = &dev_priv->ppat;
 
diff --git a/drivers/gpu/drm/i915/i915_gem_shrinker.c b/drivers/gpu/drm/i915/i915_gem_shrinker.c
index 1531534eea02..786121609016 100644
--- a/drivers/gpu/drm/i915/i915_gem_shrinker.c
+++ b/drivers/gpu/drm/i915/i915_gem_shrinker.c
@@ -489,6 +489,7 @@ i915_gem_shrinker_vmap(struct notifier_block *nb, unsigned long event, void *ptr
 					       I915_SHRINK_VMAPS);
 
 	/* We also want to clear any cached iomaps as they wrap vmap */
+	mutex_lock(&i915->ggtt.vm.mutex);
 	list_for_each_entry_safe(vma, next,
 				 &i915->ggtt.vm.bound_list, vm_link) {
 		unsigned long count = vma->node.size >> PAGE_SHIFT;
@@ -496,9 +497,12 @@ i915_gem_shrinker_vmap(struct notifier_block *nb, unsigned long event, void *ptr
 		if (!vma->iomap || i915_vma_is_active(vma))
 			continue;
 
+		mutex_unlock(&i915->ggtt.vm.mutex);
 		if (i915_vma_unbind(vma) == 0)
 			freed_pages += count;
+		mutex_lock(&i915->ggtt.vm.mutex);
 	}
+	mutex_unlock(&i915->ggtt.vm.mutex);
 
 out:
 	shrinker_unlock(i915, unlock);
diff --git a/drivers/gpu/drm/i915/i915_gem_stolen.c b/drivers/gpu/drm/i915/i915_gem_stolen.c
index 75b97d71f072..21de3a5e9910 100644
--- a/drivers/gpu/drm/i915/i915_gem_stolen.c
+++ b/drivers/gpu/drm/i915/i915_gem_stolen.c
@@ -703,7 +703,9 @@ i915_gem_object_create_stolen_for_preallocated(struct drm_i915_private *dev_priv
 	vma->flags |= I915_VMA_GLOBAL_BIND;
 	__i915_vma_set_map_and_fenceable(vma);
 
+	mutex_lock(&ggtt->vm.mutex);
 	list_move_tail(&vma->vm_link, &ggtt->vm.bound_list);
+	mutex_unlock(&ggtt->vm.mutex);
 
 	spin_lock(&dev_priv->mm.obj_lock);
 	list_move_tail(&obj->mm.link, &dev_priv->mm.bound_list);
diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
index 7de28baffb8f..dcbd0d345c72 100644
--- a/drivers/gpu/drm/i915/i915_vma.c
+++ b/drivers/gpu/drm/i915/i915_vma.c
@@ -213,7 +213,10 @@ vma_create(struct drm_i915_gem_object *obj,
 	}
 	rb_link_node(&vma->obj_node, rb, p);
 	rb_insert_color(&vma->obj_node, &obj->vma_tree);
+
+	mutex_lock(&vm->mutex);
 	list_add(&vma->vm_link, &vm->unbound_list);
+	mutex_unlock(&vm->mutex);
 
 	return vma;
 
@@ -656,7 +659,9 @@ i915_vma_insert(struct i915_vma *vma, u64 size, u64 alignment, u64 flags)
 	GEM_BUG_ON(!drm_mm_node_allocated(&vma->node));
 	GEM_BUG_ON(!i915_gem_valid_gtt_space(vma, cache_level));
 
+	mutex_lock(&vma->vm->mutex);
 	list_move_tail(&vma->vm_link, &vma->vm->bound_list);
+	mutex_unlock(&vma->vm->mutex);
 
 	if (vma->obj) {
 		struct drm_i915_gem_object *obj = vma->obj;
@@ -689,8 +694,10 @@ i915_vma_remove(struct i915_vma *vma)
 
 	vma->ops->clear_pages(vma);
 
+	mutex_lock(&vma->vm->mutex);
 	drm_mm_remove_node(&vma->node);
 	list_move_tail(&vma->vm_link, &vma->vm->unbound_list);
+	mutex_unlock(&vma->vm->mutex);
 
 	/*
 	 * Since the unbound list is global, only move to that list if
@@ -802,7 +809,11 @@ static void __i915_vma_destroy(struct i915_vma *vma)
 	GEM_BUG_ON(i915_gem_active_isset(&vma->last_fence));
 
 	list_del(&vma->obj_link);
+
+	mutex_lock(&vma->vm->mutex);
 	list_del(&vma->vm_link);
+	mutex_unlock(&vma->vm->mutex);
+
 	if (vma->obj)
 		rb_erase(&vma->obj_node, &vma->obj->vma_tree);
 
diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_evict.c b/drivers/gpu/drm/i915/selftests/i915_gem_evict.c
index 9d0fe8aac219..eaefba7470f7 100644
--- a/drivers/gpu/drm/i915/selftests/i915_gem_evict.c
+++ b/drivers/gpu/drm/i915/selftests/i915_gem_evict.c
@@ -67,10 +67,13 @@ static int populate_ggtt(struct drm_i915_private *i915)
 
 static void unpin_ggtt(struct drm_i915_private *i915)
 {
+	struct i915_ggtt *ggtt = &i915->ggtt;
 	struct i915_vma *vma;
 
+	mutex_lock(&ggtt->vm.mutex);
 	list_for_each_entry(vma, &i915->ggtt.vm.bound_list, vm_link)
 		i915_vma_unpin(vma);
+	mutex_unlock(&ggtt->vm.mutex);
 }
 
 static void cleanup_objects(struct drm_i915_private *i915)
diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c
index 852b06cb50a0..35eb40e5de91 100644
--- a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c
@@ -1237,7 +1237,10 @@ static void track_vma_bind(struct i915_vma *vma)
 	__i915_gem_object_pin_pages(obj);
 
 	vma->pages = obj->mm.pages;
+
+	mutex_lock(&vma->vm->mutex);
 	list_move_tail(&vma->vm_link, &vma->vm->bound_list);
+	mutex_unlock(&vma->vm->mutex);
 }
 
 static int exercise_mock(struct drm_i915_private *i915,
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 111+ messages in thread

* [PATCH 33/46] drm/i915: Move vma lookup to its own lock
  2019-01-07 11:54 [PATCH 01/46] drm/i915: Return immediately if trylock fails for direct-reclaim Chris Wilson
                   ` (30 preceding siblings ...)
  2019-01-07 11:54 ` [PATCH 32/46] drm/i915: Pull VM lists under the VM mutex Chris Wilson
@ 2019-01-07 11:54 ` Chris Wilson
  2019-01-07 11:54 ` [PATCH 34/46] drm/i915: Move intel_execlists_show_requests() aside Chris Wilson
                   ` (18 subsequent siblings)
  50 siblings, 0 replies; 111+ messages in thread
From: Chris Wilson @ 2019-01-07 11:54 UTC (permalink / raw)
  To: intel-gfx

Remove the struct_mutex requirement for looking up the vma for an
object.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_debugfs.c       |  6 +--
 drivers/gpu/drm/i915/i915_gem.c           | 33 +++++++------
 drivers/gpu/drm/i915/i915_gem_object.h    | 45 ++++++++++-------
 drivers/gpu/drm/i915/i915_vma.c           | 60 +++++++++++++++--------
 drivers/gpu/drm/i915/i915_vma.h           |  2 +-
 drivers/gpu/drm/i915/selftests/i915_vma.c |  4 +-
 6 files changed, 92 insertions(+), 58 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index a9bc7752da49..42590a0a634f 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -159,14 +159,14 @@ describe_obj(struct seq_file *m, struct drm_i915_gem_object *obj)
 		   obj->mm.madv == I915_MADV_DONTNEED ? " purgeable" : "");
 	if (obj->base.name)
 		seq_printf(m, " (name: %d)", obj->base.name);
-	list_for_each_entry(vma, &obj->vma_list, obj_link) {
+	list_for_each_entry(vma, &obj->vma.list, obj_link) {
 		if (i915_vma_is_pinned(vma))
 			pin_count++;
 	}
 	seq_printf(m, " (pinned x %d)", pin_count);
 	if (obj->pin_global)
 		seq_printf(m, " (global)");
-	list_for_each_entry(vma, &obj->vma_list, obj_link) {
+	list_for_each_entry(vma, &obj->vma.list, obj_link) {
 		if (!drm_mm_node_allocated(&vma->node))
 			continue;
 
@@ -322,7 +322,7 @@ static int per_file_stats(int id, void *ptr, void *data)
 	if (obj->base.name || obj->base.dma_buf)
 		stats->shared += obj->base.size;
 
-	list_for_each_entry(vma, &obj->vma_list, obj_link) {
+	list_for_each_entry(vma, &obj->vma.list, obj_link) {
 		if (!drm_mm_node_allocated(&vma->node))
 			continue;
 
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 5141a8ba4836..37270ee360c8 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -438,15 +438,19 @@ int i915_gem_object_unbind(struct drm_i915_gem_object *obj)
 	if (ret)
 		return ret;
 
-	while ((vma = list_first_entry_or_null(&obj->vma_list,
-					       struct i915_vma,
-					       obj_link))) {
+	spin_lock(&obj->vma.lock);
+	while (!ret && (vma = list_first_entry_or_null(&obj->vma.list,
+						       struct i915_vma,
+						       obj_link))) {
 		list_move_tail(&vma->obj_link, &still_in_list);
+		spin_unlock(&obj->vma.lock);
+
 		ret = i915_vma_unbind(vma);
-		if (ret)
-			break;
+
+		spin_lock(&obj->vma.lock);
 	}
-	list_splice(&still_in_list, &obj->vma_list);
+	list_splice(&still_in_list, &obj->vma.list);
+	spin_unlock(&obj->vma.lock);
 
 	return ret;
 }
@@ -3476,7 +3480,7 @@ int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
 	 * reading an invalid PTE on older architectures.
 	 */
 restart:
-	list_for_each_entry(vma, &obj->vma_list, obj_link) {
+	list_for_each_entry(vma, &obj->vma.list, obj_link) {
 		if (!drm_mm_node_allocated(&vma->node))
 			continue;
 
@@ -3554,7 +3558,7 @@ int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
 			 */
 		}
 
-		list_for_each_entry(vma, &obj->vma_list, obj_link) {
+		list_for_each_entry(vma, &obj->vma.list, obj_link) {
 			if (!drm_mm_node_allocated(&vma->node))
 				continue;
 
@@ -3564,7 +3568,7 @@ int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
 		}
 	}
 
-	list_for_each_entry(vma, &obj->vma_list, obj_link)
+	list_for_each_entry(vma, &obj->vma.list, obj_link)
 		vma->node.color = cache_level;
 	i915_gem_object_set_cache_coherency(obj, cache_level);
 	obj->cache_dirty = true; /* Always invalidate stale cachelines */
@@ -4140,7 +4144,9 @@ void i915_gem_object_init(struct drm_i915_gem_object *obj,
 {
 	mutex_init(&obj->mm.lock);
 
-	INIT_LIST_HEAD(&obj->vma_list);
+	spin_lock_init(&obj->vma.lock);
+	INIT_LIST_HEAD(&obj->vma.list);
+
 	INIT_LIST_HEAD(&obj->lut_list);
 	INIT_LIST_HEAD(&obj->batch_pool_link);
 
@@ -4306,14 +4312,13 @@ static void __i915_gem_free_objects(struct drm_i915_private *i915,
 		mutex_lock(&i915->drm.struct_mutex);
 
 		GEM_BUG_ON(i915_gem_object_is_active(obj));
-		list_for_each_entry_safe(vma, vn,
-					 &obj->vma_list, obj_link) {
+		list_for_each_entry_safe(vma, vn, &obj->vma.list, obj_link) {
 			GEM_BUG_ON(i915_vma_is_active(vma));
 			vma->flags &= ~I915_VMA_PIN_MASK;
 			i915_vma_destroy(vma);
 		}
-		GEM_BUG_ON(!list_empty(&obj->vma_list));
-		GEM_BUG_ON(!RB_EMPTY_ROOT(&obj->vma_tree));
+		GEM_BUG_ON(!list_empty(&obj->vma.list));
+		GEM_BUG_ON(!RB_EMPTY_ROOT(&obj->vma.tree));
 
 		/* This serializes freeing with the shrinker. Since the free
 		 * is delayed, first by RCU then by the workqueue, we want the
diff --git a/drivers/gpu/drm/i915/i915_gem_object.h b/drivers/gpu/drm/i915/i915_gem_object.h
index a6dd7c46de0d..35de9e739104 100644
--- a/drivers/gpu/drm/i915/i915_gem_object.h
+++ b/drivers/gpu/drm/i915/i915_gem_object.h
@@ -85,24 +85,33 @@ struct drm_i915_gem_object {
 
 	const struct drm_i915_gem_object_ops *ops;
 
-	/**
-	 * @vma_list: List of VMAs backed by this object
-	 *
-	 * The VMA on this list are ordered by type, all GGTT vma are placed
-	 * at the head and all ppGTT vma are placed at the tail. The different
-	 * types of GGTT vma are unordered between themselves, use the
-	 * @vma_tree (which has a defined order between all VMA) to find an
-	 * exact match.
-	 */
-	struct list_head vma_list;
-	/**
-	 * @vma_tree: Ordered tree of VMAs backed by this object
-	 *
-	 * All VMA created for this object are placed in the @vma_tree for
-	 * fast retrieval via a binary search in i915_vma_instance().
-	 * They are also added to @vma_list for easy iteration.
-	 */
-	struct rb_root vma_tree;
+	struct {
+		/**
+		 * @vma.lock: protect the list/tre of vmas
+		 */
+		struct spinlock lock;
+
+		/**
+		 * @vma.list: List of VMAs backed by this object
+		 *
+		 * The VMA on this list are ordered by type, all GGTT vma are
+		 * placed at the head and all ppGTT vma are placed at the tail.
+		 * The different types of GGTT vma are unordered between
+		 * themselves, use the @vma.tree (which has a defined order
+		 * between all VMA) to quickly find an exact match.
+		 */
+		struct list_head list;
+
+		/**
+		 * @vma.tree: Ordered tree of VMAs backed by this object
+		 *
+		 * All VMA created for this object are placed in the @vma.tree
+		 * for fast retrieval via a binary search in
+		 * i915_vma_instance(). They are also added to @vma.list for
+		 * easy iteration.
+		 */
+		struct rb_root tree;
+	} vma;
 
 	/**
 	 * @lut_list: List of vma lookup entries in use for this object.
diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
index dcbd0d345c72..3a680fe2bb8b 100644
--- a/drivers/gpu/drm/i915/i915_vma.c
+++ b/drivers/gpu/drm/i915/i915_vma.c
@@ -187,32 +187,47 @@ vma_create(struct drm_i915_gem_object *obj,
 								i915_gem_object_get_stride(obj));
 		GEM_BUG_ON(!is_power_of_2(vma->fence_alignment));
 
-		/*
-		 * We put the GGTT vma at the start of the vma-list, followed
-		 * by the ppGGTT vma. This allows us to break early when
-		 * iterating over only the GGTT vma for an object, see
-		 * for_each_ggtt_vma()
-		 */
 		vma->flags |= I915_VMA_GGTT;
-		list_add(&vma->obj_link, &obj->vma_list);
-	} else {
-		list_add_tail(&vma->obj_link, &obj->vma_list);
 	}
 
+	spin_lock(&obj->vma.lock);
+
 	rb = NULL;
-	p = &obj->vma_tree.rb_node;
+	p = &obj->vma.tree.rb_node;
 	while (*p) {
 		struct i915_vma *pos;
+		long cmp;
 
 		rb = *p;
 		pos = rb_entry(rb, struct i915_vma, obj_node);
-		if (i915_vma_compare(pos, vm, view) < 0)
+
+		cmp = i915_vma_compare(pos, vm, view);
+		if (cmp == 0) {
+			spin_unlock(&obj->vma.lock);
+			kmem_cache_free(vm->i915->vmas, vma);
+			return pos;
+		}
+
+		if (cmp < 0)
 			p = &rb->rb_right;
 		else
 			p = &rb->rb_left;
 	}
 	rb_link_node(&vma->obj_node, rb, p);
-	rb_insert_color(&vma->obj_node, &obj->vma_tree);
+	rb_insert_color(&vma->obj_node, &obj->vma.tree);
+
+	if (i915_vma_is_ggtt(vma))
+		/*
+		 * We put the GGTT vma at the start of the vma-list, followed
+		 * by the ppGGTT vma. This allows us to break early when
+		 * iterating over only the GGTT vma for an object, see
+		 * for_each_ggtt_vma()
+		 */
+		list_add(&vma->obj_link, &obj->vma.list);
+	else
+		list_add_tail(&vma->obj_link, &obj->vma.list);
+
+	spin_unlock(&obj->vma.lock);
 
 	mutex_lock(&vm->mutex);
 	list_add(&vma->vm_link, &vm->unbound_list);
@@ -232,7 +247,7 @@ vma_lookup(struct drm_i915_gem_object *obj,
 {
 	struct rb_node *rb;
 
-	rb = obj->vma_tree.rb_node;
+	rb = obj->vma.tree.rb_node;
 	while (rb) {
 		struct i915_vma *vma = rb_entry(rb, struct i915_vma, obj_node);
 		long cmp;
@@ -272,16 +287,17 @@ i915_vma_instance(struct drm_i915_gem_object *obj,
 {
 	struct i915_vma *vma;
 
-	lockdep_assert_held(&obj->base.dev->struct_mutex);
 	GEM_BUG_ON(view && !i915_is_ggtt(vm));
 	GEM_BUG_ON(vm->closed);
 
+	spin_lock(&obj->vma.lock);
 	vma = vma_lookup(obj, vm, view);
-	if (!vma)
+	spin_unlock(&obj->vma.lock);
+
+	if (unlikely(!vma))
 		vma = vma_create(obj, vm, view);
 
 	GEM_BUG_ON(!IS_ERR(vma) && i915_vma_compare(vma, vm, view));
-	GEM_BUG_ON(!IS_ERR(vma) && vma_lookup(obj, vm, view) != vma);
 	return vma;
 }
 
@@ -808,14 +824,18 @@ static void __i915_vma_destroy(struct i915_vma *vma)
 
 	GEM_BUG_ON(i915_gem_active_isset(&vma->last_fence));
 
-	list_del(&vma->obj_link);
-
 	mutex_lock(&vma->vm->mutex);
 	list_del(&vma->vm_link);
 	mutex_unlock(&vma->vm->mutex);
 
-	if (vma->obj)
-		rb_erase(&vma->obj_node, &vma->obj->vma_tree);
+	if (vma->obj) {
+		struct drm_i915_gem_object *obj = vma->obj;
+
+		spin_lock(&obj->vma.lock);
+		list_del(&vma->obj_link);
+		rb_erase(&vma->obj_node, &vma->obj->vma.tree);
+		spin_unlock(&obj->vma.lock);
+	}
 
 	rbtree_postorder_for_each_entry_safe(iter, n, &vma->active, node) {
 		GEM_BUG_ON(i915_gem_active_isset(&iter->base));
diff --git a/drivers/gpu/drm/i915/i915_vma.h b/drivers/gpu/drm/i915/i915_vma.h
index 4f7c1c7599f4..7252abc73d3e 100644
--- a/drivers/gpu/drm/i915/i915_vma.h
+++ b/drivers/gpu/drm/i915/i915_vma.h
@@ -425,7 +425,7 @@ void i915_vma_parked(struct drm_i915_private *i915);
  * or the list is empty ofc.
  */
 #define for_each_ggtt_vma(V, OBJ) \
-	list_for_each_entry(V, &(OBJ)->vma_list, obj_link)		\
+	list_for_each_entry(V, &(OBJ)->vma.list, obj_link)		\
 		for_each_until(!i915_vma_is_ggtt(V))
 
 #endif
diff --git a/drivers/gpu/drm/i915/selftests/i915_vma.c b/drivers/gpu/drm/i915/selftests/i915_vma.c
index ffa74290e054..f1008b07dfd2 100644
--- a/drivers/gpu/drm/i915/selftests/i915_vma.c
+++ b/drivers/gpu/drm/i915/selftests/i915_vma.c
@@ -670,7 +670,7 @@ static int igt_vma_partial(void *arg)
 		}
 
 		count = 0;
-		list_for_each_entry(vma, &obj->vma_list, obj_link)
+		list_for_each_entry(vma, &obj->vma.list, obj_link)
 			count++;
 		if (count != nvma) {
 			pr_err("(%s) All partial vma were not recorded on the obj->vma_list: found %u, expected %u\n",
@@ -699,7 +699,7 @@ static int igt_vma_partial(void *arg)
 		i915_vma_unpin(vma);
 
 		count = 0;
-		list_for_each_entry(vma, &obj->vma_list, obj_link)
+		list_for_each_entry(vma, &obj->vma.list, obj_link)
 			count++;
 		if (count != nvma) {
 			pr_err("(%s) allocated an extra full vma!\n", p->name);
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 111+ messages in thread

* [PATCH 34/46] drm/i915: Move intel_execlists_show_requests() aside
  2019-01-07 11:54 [PATCH 01/46] drm/i915: Return immediately if trylock fails for direct-reclaim Chris Wilson
                   ` (31 preceding siblings ...)
  2019-01-07 11:54 ` [PATCH 33/46] drm/i915: Move vma lookup to its own lock Chris Wilson
@ 2019-01-07 11:54 ` Chris Wilson
  2019-01-07 11:54 ` [PATCH 35/46] drm/i915: Use b->irq_enable() as predicate for mock engine Chris Wilson
                   ` (17 subsequent siblings)
  50 siblings, 0 replies; 111+ messages in thread
From: Chris Wilson @ 2019-01-07 11:54 UTC (permalink / raw)
  To: intel-gfx

Move the debug pretty printer into a standalone routine prior to
extending it in upcoming feature work.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 drivers/gpu/drm/i915/intel_engine_cs.c | 55 ++----------------------
 drivers/gpu/drm/i915/intel_lrc.c       | 58 ++++++++++++++++++++++++++
 drivers/gpu/drm/i915/intel_lrc.h       | 10 ++++-
 3 files changed, 71 insertions(+), 52 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c
index 3a0b8036f173..53096edb8547 100644
--- a/drivers/gpu/drm/i915/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/intel_engine_cs.c
@@ -1421,15 +1421,12 @@ void intel_engine_dump(struct intel_engine_cs *engine,
 		       struct drm_printer *m,
 		       const char *header, ...)
 {
-	const int MAX_REQUESTS_TO_SHOW = 8;
 	struct intel_breadcrumbs * const b = &engine->breadcrumbs;
-	const struct intel_engine_execlists * const execlists = &engine->execlists;
 	struct i915_gpu_error * const error = &engine->i915->gpu_error;
-	struct i915_request *rq, *last;
+	struct i915_request *rq;
 	intel_wakeref_t wakeref;
 	unsigned long flags;
 	struct rb_node *rb;
-	int count;
 
 	if (header) {
 		va_list ap;
@@ -1493,52 +1490,9 @@ void intel_engine_dump(struct intel_engine_cs *engine,
 		drm_printf(m, "\tDevice is asleep; skipping register dump\n");
 	}
 
-	local_irq_save(flags);
-	spin_lock(&engine->timeline.lock);
-
-	last = NULL;
-	count = 0;
-	list_for_each_entry(rq, &engine->timeline.requests, link) {
-		if (count++ < MAX_REQUESTS_TO_SHOW - 1)
-			print_request(m, rq, "\t\tE ");
-		else
-			last = rq;
-	}
-	if (last) {
-		if (count > MAX_REQUESTS_TO_SHOW) {
-			drm_printf(m,
-				   "\t\t...skipping %d executing requests...\n",
-				   count - MAX_REQUESTS_TO_SHOW);
-		}
-		print_request(m, last, "\t\tE ");
-	}
-
-	last = NULL;
-	count = 0;
-	drm_printf(m, "\t\tQueue priority: %d\n", execlists->queue_priority);
-	for (rb = rb_first_cached(&execlists->queue); rb; rb = rb_next(rb)) {
-		struct i915_priolist *p = rb_entry(rb, typeof(*p), node);
-		int i;
-
-		priolist_for_each_request(rq, p, i) {
-			if (count++ < MAX_REQUESTS_TO_SHOW - 1)
-				print_request(m, rq, "\t\tQ ");
-			else
-				last = rq;
-		}
-	}
-	if (last) {
-		if (count > MAX_REQUESTS_TO_SHOW) {
-			drm_printf(m,
-				   "\t\t...skipping %d queued requests...\n",
-				   count - MAX_REQUESTS_TO_SHOW);
-		}
-		print_request(m, last, "\t\tQ ");
-	}
-
-	spin_unlock(&engine->timeline.lock);
+	intel_execlists_show_requests(engine, m, print_request, 8);
 
-	spin_lock(&b->rb_lock);
+	spin_lock_irqsave(&b->rb_lock, flags);
 	for (rb = rb_first(&b->waiters); rb; rb = rb_next(rb)) {
 		struct intel_wait *w = rb_entry(rb, typeof(*w), node);
 
@@ -1547,8 +1501,7 @@ void intel_engine_dump(struct intel_engine_cs *engine,
 			   task_state_to_char(w->tsk),
 			   w->seqno);
 	}
-	spin_unlock(&b->rb_lock);
-	local_irq_restore(flags);
+	spin_unlock_irqrestore(&b->rb_lock, flags);
 
 	drm_printf(m, "HWSP:\n");
 	hexdump(m, engine->status_page.page_addr, PAGE_SIZE);
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index eb37e2f7f835..4e3415b2daa0 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -2676,6 +2676,64 @@ void intel_lr_context_resume(struct drm_i915_private *i915)
 	}
 }
 
+void intel_execlists_show_requests(struct intel_engine_cs *engine,
+				   struct drm_printer *m,
+				   void (*show_request)(struct drm_printer *m,
+							struct i915_request *rq,
+							const char *prefix),
+				   unsigned int max)
+{
+	const struct intel_engine_execlists *execlists = &engine->execlists;
+	struct i915_request *rq, *last;
+	unsigned long flags;
+	unsigned int count;
+	struct rb_node *rb;
+
+	spin_lock_irqsave(&engine->timeline.lock, flags);
+
+	last = NULL;
+	count = 0;
+	list_for_each_entry(rq, &engine->timeline.requests, link) {
+		if (count++ < max - 1)
+			show_request(m, rq, "\t\tE ");
+		else
+			last = rq;
+	}
+	if (last) {
+		if (count > max) {
+			drm_printf(m,
+				   "\t\t...skipping %d executing requests...\n",
+				   count - max);
+		}
+		show_request(m, last, "\t\tE ");
+	}
+
+	last = NULL;
+	count = 0;
+	drm_printf(m, "\t\tQueue priority: %d\n", execlists->queue_priority);
+	for (rb = rb_first_cached(&execlists->queue); rb; rb = rb_next(rb)) {
+		struct i915_priolist *p = rb_entry(rb, typeof(*p), node);
+		int i;
+
+		priolist_for_each_request(rq, p, i) {
+			if (count++ < max - 1)
+				show_request(m, rq, "\t\tQ ");
+			else
+				last = rq;
+		}
+	}
+	if (last) {
+		if (count > max) {
+			drm_printf(m,
+				   "\t\t...skipping %d queued requests...\n",
+				   count - max);
+		}
+		show_request(m, last, "\t\tQ ");
+	}
+
+	spin_unlock_irqrestore(&engine->timeline.lock, flags);
+}
+
 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
 #include "selftests/intel_lrc.c"
 #endif
diff --git a/drivers/gpu/drm/i915/intel_lrc.h b/drivers/gpu/drm/i915/intel_lrc.h
index f5a5502ecf70..3d86c27c6b32 100644
--- a/drivers/gpu/drm/i915/intel_lrc.h
+++ b/drivers/gpu/drm/i915/intel_lrc.h
@@ -97,11 +97,19 @@ int logical_xcs_ring_init(struct intel_engine_cs *engine);
  */
 #define LRC_HEADER_PAGES LRC_PPHWSP_PN
 
+struct drm_printer;
+
 struct drm_i915_private;
 struct i915_gem_context;
 
 void intel_lr_context_resume(struct drm_i915_private *dev_priv);
-
 void intel_execlists_set_default_submission(struct intel_engine_cs *engine);
 
+void intel_execlists_show_requests(struct intel_engine_cs *engine,
+				   struct drm_printer *m,
+				   void (*show_request)(struct drm_printer *m,
+							struct i915_request *rq,
+							const char *prefix),
+				   unsigned int max);
+
 #endif /* _INTEL_LRC_H_ */
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 111+ messages in thread

* [PATCH 35/46] drm/i915: Use b->irq_enable() as predicate for mock engine
  2019-01-07 11:54 [PATCH 01/46] drm/i915: Return immediately if trylock fails for direct-reclaim Chris Wilson
                   ` (32 preceding siblings ...)
  2019-01-07 11:54 ` [PATCH 34/46] drm/i915: Move intel_execlists_show_requests() aside Chris Wilson
@ 2019-01-07 11:54 ` Chris Wilson
  2019-01-07 11:54 ` [PATCH 36/46] drm/i915/selftests: Allocate mock ring/timeline per context Chris Wilson
                   ` (16 subsequent siblings)
  50 siblings, 0 replies; 111+ messages in thread
From: Chris Wilson @ 2019-01-07 11:54 UTC (permalink / raw)
  To: intel-gfx

Since commit  d4ccceb05591 ("drm/i915/icl: Ringbuffer interrupt handling")
we have required a mechanism to avoid touching the interrupt hardware
for breadcrumbs, superseding our mock interface for selftests.

References: d4ccceb05591 ("drm/i915/icl: Ringbuffer interrupt handling")
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/intel_breadcrumbs.c     | 39 ++++++++------------
 drivers/gpu/drm/i915/intel_engine_cs.c       | 11 ++----
 drivers/gpu/drm/i915/intel_ringbuffer.h      |  1 -
 drivers/gpu/drm/i915/selftests/mock_engine.c |  1 -
 4 files changed, 20 insertions(+), 32 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_breadcrumbs.c b/drivers/gpu/drm/i915/intel_breadcrumbs.c
index 4ed7105d7ff5..7b517bf83507 100644
--- a/drivers/gpu/drm/i915/intel_breadcrumbs.c
+++ b/drivers/gpu/drm/i915/intel_breadcrumbs.c
@@ -158,6 +158,9 @@ static void intel_breadcrumbs_fake_irq(struct timer_list *t)
 
 static void irq_enable(struct intel_engine_cs *engine)
 {
+	if (!engine->irq_enable)
+		return;
+
 	/*
 	 * FIXME: Ideally we want this on the API boundary, but for the
 	 * sake of testing with mock breadcrumbs (no HW so unable to
@@ -167,21 +170,20 @@ static void irq_enable(struct intel_engine_cs *engine)
 	GEM_BUG_ON(!intel_irqs_enabled(engine->i915));
 
 	/* Caller disables interrupts */
-	if (engine->irq_enable) {
-		spin_lock(&engine->i915->irq_lock);
-		engine->irq_enable(engine);
-		spin_unlock(&engine->i915->irq_lock);
-	}
+	spin_lock(&engine->i915->irq_lock);
+	engine->irq_enable(engine);
+	spin_unlock(&engine->i915->irq_lock);
 }
 
 static void irq_disable(struct intel_engine_cs *engine)
 {
+	if (!engine->irq_disable)
+		return;
+
 	/* Caller disables interrupts */
-	if (engine->irq_disable) {
-		spin_lock(&engine->i915->irq_lock);
-		engine->irq_disable(engine);
-		spin_unlock(&engine->i915->irq_lock);
-	}
+	spin_lock(&engine->i915->irq_lock);
+	engine->irq_disable(engine);
+	spin_unlock(&engine->i915->irq_lock);
 }
 
 void __intel_engine_disarm_breadcrumbs(struct intel_engine_cs *engine)
@@ -293,25 +295,16 @@ static bool __intel_breadcrumbs_enable_irq(struct intel_breadcrumbs *b)
 	if (b->irq_armed)
 		return false;
 
-	/* The breadcrumb irq will be disarmed on the interrupt after the
+	/*
+	 * The breadcrumb irq will be disarmed on the interrupt after the
 	 * waiters are signaled. This gives us a single interrupt window in
 	 * which we can add a new waiter and avoid the cost of re-enabling
 	 * the irq.
 	 */
 	b->irq_armed = true;
 
-	if (I915_SELFTEST_ONLY(b->mock)) {
-		/* For our mock objects we want to avoid interaction
-		 * with the real hardware (which is not set up). So
-		 * we simply pretend we have enabled the powerwell
-		 * and the irq, and leave it up to the mock
-		 * implementation to call intel_engine_wakeup()
-		 * itself when it wants to simulate a user interrupt,
-		 */
-		return true;
-	}
-
-	/* Since we are waiting on a request, the GPU should be busy
+	/*
+	 * Since we are waiting on a request, the GPU should be busy
 	 * and should have its own rpm reference. This is tracked
 	 * by i915->gt.awake, we can forgo holding our own wakref
 	 * for the interrupt as before i915->gt.awake is released (when
diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c
index 53096edb8547..1a9de4a01b9d 100644
--- a/drivers/gpu/drm/i915/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/intel_engine_cs.c
@@ -917,6 +917,9 @@ static bool ring_is_idle(struct intel_engine_cs *engine)
 	intel_wakeref_t wakeref;
 	bool idle = true;
 
+	if (I915_SELFTEST_ONLY(!engine->mmio_base))
+		return true;
+
 	/* If the whole device is asleep, the engine must be idle */
 	wakeref = intel_runtime_pm_get_if_in_use(dev_priv);
 	if (!wakeref)
@@ -955,9 +958,6 @@ bool intel_engine_is_idle(struct intel_engine_cs *engine)
 	if (!intel_engine_signaled(engine, intel_engine_last_submit(engine)))
 		return false;
 
-	if (I915_SELFTEST_ONLY(engine->breadcrumbs.mock))
-		return true;
-
 	/* Waiting to drain ELSP? */
 	if (READ_ONCE(engine->execlists.active)) {
 		struct tasklet_struct *t = &engine->execlists.tasklet;
@@ -983,10 +983,7 @@ bool intel_engine_is_idle(struct intel_engine_cs *engine)
 		return false;
 
 	/* Ring stopped? */
-	if (!ring_is_idle(engine))
-		return false;
-
-	return true;
+	return ring_is_idle(engine);
 }
 
 bool intel_engines_are_idle(struct drm_i915_private *dev_priv)
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index 06850ee17087..88dda43aae35 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -397,7 +397,6 @@ struct intel_engine_cs {
 		unsigned int irq_count;
 
 		bool irq_armed : 1;
-		I915_SELFTEST_DECLARE(bool mock : 1);
 	} breadcrumbs;
 
 	struct {
diff --git a/drivers/gpu/drm/i915/selftests/mock_engine.c b/drivers/gpu/drm/i915/selftests/mock_engine.c
index 50e1a0b1af7e..9fe5b2c8f8d4 100644
--- a/drivers/gpu/drm/i915/selftests/mock_engine.c
+++ b/drivers/gpu/drm/i915/selftests/mock_engine.c
@@ -201,7 +201,6 @@ struct intel_engine_cs *mock_engine(struct drm_i915_private *i915,
 	i915_timeline_set_subclass(&engine->base.timeline, TIMELINE_ENGINE);
 
 	intel_engine_init_breadcrumbs(&engine->base);
-	engine->base.breadcrumbs.mock = true; /* prevent touching HW for irqs */
 
 	/* fake hw queue */
 	spin_lock_init(&engine->hw_lock);
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 111+ messages in thread

* [PATCH 36/46] drm/i915/selftests: Allocate mock ring/timeline per context
  2019-01-07 11:54 [PATCH 01/46] drm/i915: Return immediately if trylock fails for direct-reclaim Chris Wilson
                   ` (33 preceding siblings ...)
  2019-01-07 11:54 ` [PATCH 35/46] drm/i915: Use b->irq_enable() as predicate for mock engine Chris Wilson
@ 2019-01-07 11:54 ` Chris Wilson
  2019-01-07 11:55 ` [PATCH 37/46] drm/i915/selftests: Make evict tolerant of foreign objects Chris Wilson
                   ` (15 subsequent siblings)
  50 siblings, 0 replies; 111+ messages in thread
From: Chris Wilson @ 2019-01-07 11:54 UTC (permalink / raw)
  To: intel-gfx

To correctly simulate preemption between contexts, we need independent
timelines along each context. Make it so.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/selftests/mock_engine.c | 90 ++++++++++----------
 1 file changed, 47 insertions(+), 43 deletions(-)

diff --git a/drivers/gpu/drm/i915/selftests/mock_engine.c b/drivers/gpu/drm/i915/selftests/mock_engine.c
index 9fe5b2c8f8d4..8b8d51af7d6a 100644
--- a/drivers/gpu/drm/i915/selftests/mock_engine.c
+++ b/drivers/gpu/drm/i915/selftests/mock_engine.c
@@ -30,6 +30,36 @@ struct mock_ring {
 	struct i915_timeline timeline;
 };
 
+static struct intel_ring *mock_ring(struct intel_engine_cs *engine)
+{
+	const unsigned long sz = PAGE_SIZE / 2;
+	struct mock_ring *ring;
+
+	ring = kzalloc(sizeof(*ring) + sz, GFP_KERNEL);
+	if (!ring)
+		return NULL;
+
+	i915_timeline_init(engine->i915, &ring->timeline, engine->name);
+
+	ring->base.size = sz;
+	ring->base.effective_size = sz;
+	ring->base.vaddr = (void *)(ring + 1);
+	ring->base.timeline = &ring->timeline;
+
+	INIT_LIST_HEAD(&ring->base.request_list);
+	intel_ring_update_space(&ring->base);
+
+	return &ring->base;
+}
+
+static void mock_ring_free(struct intel_ring *base)
+{
+	struct mock_ring *ring = container_of(base, typeof(*ring), base);
+
+	i915_timeline_fini(&ring->timeline);
+	kfree(ring);
+}
+
 static struct mock_request *first_request(struct mock_engine *engine)
 {
 	return list_first_entry_or_null(&engine->hw_queue,
@@ -80,6 +110,9 @@ static void mock_context_unpin(struct intel_context *ce)
 static void mock_context_destroy(struct intel_context *ce)
 {
 	GEM_BUG_ON(ce->pin_count);
+
+	if (ce->ring)
+		mock_ring_free(ce->ring);
 }
 
 static const struct intel_context_ops mock_context_ops = {
@@ -93,13 +126,22 @@ mock_context_pin(struct intel_engine_cs *engine,
 {
 	struct intel_context *ce = to_intel_context(ctx, engine);
 
-	if (!ce->pin_count++) {
-		i915_gem_context_get(ctx);
-		ce->ring = engine->buffer;
-		ce->ops = &mock_context_ops;
+	if (ce->pin_count++)
+		return ce;
+
+	if (!ce->ring) {
+		ce->ring = mock_ring(engine);
+		if (!ce->ring)
+			goto err;
 	}
 
+	ce->ops = &mock_context_ops;
+	i915_gem_context_get(ctx);
 	return ce;
+
+err:
+	ce->pin_count = 0;
+	return ERR_PTR(-ENOMEM);
 }
 
 static int mock_request_alloc(struct i915_request *request)
@@ -143,36 +185,6 @@ static void mock_submit_request(struct i915_request *request)
 	spin_unlock_irq(&engine->hw_lock);
 }
 
-static struct intel_ring *mock_ring(struct intel_engine_cs *engine)
-{
-	const unsigned long sz = PAGE_SIZE / 2;
-	struct mock_ring *ring;
-
-	ring = kzalloc(sizeof(*ring) + sz, GFP_KERNEL);
-	if (!ring)
-		return NULL;
-
-	i915_timeline_init(engine->i915, &ring->timeline, engine->name);
-
-	ring->base.size = sz;
-	ring->base.effective_size = sz;
-	ring->base.vaddr = (void *)(ring + 1);
-	ring->base.timeline = &ring->timeline;
-
-	INIT_LIST_HEAD(&ring->base.request_list);
-	intel_ring_update_space(&ring->base);
-
-	return &ring->base;
-}
-
-static void mock_ring_free(struct intel_ring *base)
-{
-	struct mock_ring *ring = container_of(base, typeof(*ring), base);
-
-	i915_timeline_fini(&ring->timeline);
-	kfree(ring);
-}
-
 struct intel_engine_cs *mock_engine(struct drm_i915_private *i915,
 				    const char *name,
 				    int id)
@@ -207,17 +219,11 @@ struct intel_engine_cs *mock_engine(struct drm_i915_private *i915,
 	timer_setup(&engine->hw_delay, hw_delay_complete, 0);
 	INIT_LIST_HEAD(&engine->hw_queue);
 
-	engine->base.buffer = mock_ring(&engine->base);
-	if (!engine->base.buffer)
-		goto err_breadcrumbs;
-
 	if (IS_ERR(intel_context_pin(i915->kernel_context, &engine->base)))
-		goto err_ring;
+		goto err_breadcrumbs;
 
 	return &engine->base;
 
-err_ring:
-	mock_ring_free(engine->base.buffer);
 err_breadcrumbs:
 	intel_engine_fini_breadcrumbs(&engine->base);
 	i915_timeline_fini(&engine->base.timeline);
@@ -260,8 +266,6 @@ void mock_engine_free(struct intel_engine_cs *engine)
 
 	__intel_context_unpin(engine->i915->kernel_context, engine);
 
-	mock_ring_free(engine->buffer);
-
 	intel_engine_fini_breadcrumbs(engine);
 	i915_timeline_fini(&engine->timeline);
 
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 111+ messages in thread

* [PATCH 37/46] drm/i915/selftests: Make evict tolerant of foreign objects
  2019-01-07 11:54 [PATCH 01/46] drm/i915: Return immediately if trylock fails for direct-reclaim Chris Wilson
                   ` (34 preceding siblings ...)
  2019-01-07 11:54 ` [PATCH 36/46] drm/i915/selftests: Allocate mock ring/timeline per context Chris Wilson
@ 2019-01-07 11:55 ` Chris Wilson
  2019-01-07 11:55 ` [PATCH 38/46] drm/i915: Remove the intel_engine_notify tracepoint Chris Wilson
                   ` (14 subsequent siblings)
  50 siblings, 0 replies; 111+ messages in thread
From: Chris Wilson @ 2019-01-07 11:55 UTC (permalink / raw)
  To: intel-gfx

The evict selftests presumed that all objects in use had been allocated
by itself. This is a dubious claim and so instead of asserting complete
control over the object lists, take (temporary) ownership of them
instead.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 .../gpu/drm/i915/selftests/i915_gem_evict.c   | 64 +++++++++++++++----
 1 file changed, 53 insertions(+), 11 deletions(-)

diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_evict.c b/drivers/gpu/drm/i915/selftests/i915_gem_evict.c
index eaefba7470f7..55cd2154676b 100644
--- a/drivers/gpu/drm/i915/selftests/i915_gem_evict.c
+++ b/drivers/gpu/drm/i915/selftests/i915_gem_evict.c
@@ -31,30 +31,63 @@
 
 static int populate_ggtt(struct drm_i915_private *i915)
 {
-	struct drm_i915_gem_object *obj;
+	struct drm_i915_gem_object *obj, *on;
+	unsigned long expected_unbound, expected_bound;
+	unsigned long unbound, bound, count;
 	u64 size;
+	int err;
+
+	expected_unbound = 0;
+	list_for_each_entry(obj, &i915->mm.unbound_list, mm.link) {
+		i915_gem_object_get(obj);
+		expected_unbound++;
+	}
+
+	expected_bound = 0;
+	list_for_each_entry(obj, &i915->mm.bound_list, mm.link) {
+		i915_gem_object_get(obj);
+		expected_bound++;
+	}
 
+	count = 0;
 	for (size = 0;
 	     size + I915_GTT_PAGE_SIZE <= i915->ggtt.vm.total;
 	     size += I915_GTT_PAGE_SIZE) {
 		struct i915_vma *vma;
 
 		obj = i915_gem_object_create_internal(i915, I915_GTT_PAGE_SIZE);
-		if (IS_ERR(obj))
-			return PTR_ERR(obj);
+		if (IS_ERR(obj)) {
+			err = PTR_ERR(obj);
+			goto cleanup;
+		}
 
 		vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, 0);
-		if (IS_ERR(vma))
-			return PTR_ERR(vma);
+		if (IS_ERR(vma)) {
+			err = PTR_ERR(vma);
+			goto cleanup;
+		}
+
+		count++;
 	}
 
-	if (!list_empty(&i915->mm.unbound_list)) {
-		size = 0;
-		list_for_each_entry(obj, &i915->mm.unbound_list, mm.link)
-			size++;
+	unbound = 0;
+	list_for_each_entry(obj, &i915->mm.unbound_list, mm.link)
+		unbound++;
+	if (unbound != expected_unbound) {
+		pr_err("%s: Found %lu objects unbound, expected %lu!\n",
+		       __func__, unbound, expected_unbound);
+		err = -EINVAL;
+		goto cleanup;
+	}
 
-		pr_err("Found %lld objects unbound!\n", size);
-		return -EINVAL;
+	bound = 0;
+	list_for_each_entry(obj, &i915->mm.bound_list, mm.link)
+		bound++;
+	if (bound != expected_bound + count) {
+		pr_err("%s: Found %lu objects bound, expected %lu!\n",
+		       __func__, bound, expected_bound + count);
+		err = -EINVAL;
+		goto cleanup;
 	}
 
 	if (list_empty(&i915->ggtt.vm.bound_list)) {
@@ -63,6 +96,15 @@ static int populate_ggtt(struct drm_i915_private *i915)
 	}
 
 	return 0;
+
+cleanup:
+	list_for_each_entry_safe(obj, on, &i915->mm.unbound_list, mm.link)
+		i915_gem_object_put(obj);
+
+	list_for_each_entry_safe(obj, on, &i915->mm.bound_list, mm.link)
+		i915_gem_object_put(obj);
+
+	return err;
 }
 
 static void unpin_ggtt(struct drm_i915_private *i915)
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 111+ messages in thread

* [PATCH 38/46] drm/i915: Remove the intel_engine_notify tracepoint
  2019-01-07 11:54 [PATCH 01/46] drm/i915: Return immediately if trylock fails for direct-reclaim Chris Wilson
                   ` (35 preceding siblings ...)
  2019-01-07 11:55 ` [PATCH 37/46] drm/i915/selftests: Make evict tolerant of foreign objects Chris Wilson
@ 2019-01-07 11:55 ` Chris Wilson
  2019-01-07 11:55 ` [PATCH 39/46] drm/i915: Always allocate an object/vma for the HWSP Chris Wilson
                   ` (13 subsequent siblings)
  50 siblings, 0 replies; 111+ messages in thread
From: Chris Wilson @ 2019-01-07 11:55 UTC (permalink / raw)
  To: intel-gfx

The global seqno is defunct and so we have no meaningful indicator of
forward progress for an engine. You need to listen to the request
signaling tracepoints instead.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_irq.c   |  2 --
 drivers/gpu/drm/i915/i915_trace.h | 25 -------------------------
 2 files changed, 27 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index d1727bcac776..46c742d71610 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -1211,8 +1211,6 @@ static void notify_ring(struct intel_engine_cs *engine)
 		wake_up_process(tsk);
 
 	rcu_read_unlock();
-
-	trace_intel_engine_notify(engine, wait);
 }
 
 static void vlv_c0_read(struct drm_i915_private *dev_priv,
diff --git a/drivers/gpu/drm/i915/i915_trace.h b/drivers/gpu/drm/i915/i915_trace.h
index 5cf378936b05..4b35b0b9462c 100644
--- a/drivers/gpu/drm/i915/i915_trace.h
+++ b/drivers/gpu/drm/i915/i915_trace.h
@@ -751,31 +751,6 @@ trace_i915_request_out(struct i915_request *rq)
 #endif
 #endif
 
-TRACE_EVENT(intel_engine_notify,
-	    TP_PROTO(struct intel_engine_cs *engine, bool waiters),
-	    TP_ARGS(engine, waiters),
-
-	    TP_STRUCT__entry(
-			     __field(u32, dev)
-			     __field(u16, class)
-			     __field(u16, instance)
-			     __field(u32, seqno)
-			     __field(bool, waiters)
-			     ),
-
-	    TP_fast_assign(
-			   __entry->dev = engine->i915->drm.primary->index;
-			   __entry->class = engine->uabi_class;
-			   __entry->instance = engine->instance;
-			   __entry->seqno = intel_engine_get_seqno(engine);
-			   __entry->waiters = waiters;
-			   ),
-
-	    TP_printk("dev=%u, engine=%u:%u, seqno=%u, waiters=%u",
-		      __entry->dev, __entry->class, __entry->instance,
-		      __entry->seqno, __entry->waiters)
-);
-
 DEFINE_EVENT(i915_request, i915_request_retire,
 	    TP_PROTO(struct i915_request *rq),
 	    TP_ARGS(rq)
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 111+ messages in thread

* [PATCH 39/46] drm/i915: Always allocate an object/vma for the HWSP
  2019-01-07 11:54 [PATCH 01/46] drm/i915: Return immediately if trylock fails for direct-reclaim Chris Wilson
                   ` (36 preceding siblings ...)
  2019-01-07 11:55 ` [PATCH 38/46] drm/i915: Remove the intel_engine_notify tracepoint Chris Wilson
@ 2019-01-07 11:55 ` Chris Wilson
  2019-01-10 10:52   ` Matthew Auld
  2019-01-10 11:24   ` Matthew Auld
  2019-01-07 11:55 ` [PATCH 40/46] drm/i915: Move list of timelines under its own lock Chris Wilson
                   ` (12 subsequent siblings)
  50 siblings, 2 replies; 111+ messages in thread
From: Chris Wilson @ 2019-01-07 11:55 UTC (permalink / raw)
  To: intel-gfx

Currently we only allocate an object and vma if we are using a GGTT
virtual HWSP, and a plain struct page for a physical HWSP. For
convenience later on with global timelines, it will be useful to always
have the status page being tracked by a struct i915_vma. Make it so.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/intel_engine_cs.c       | 109 ++++++++++---------
 drivers/gpu/drm/i915/intel_guc_submission.c  |   5 +
 drivers/gpu/drm/i915/intel_lrc.c             |  11 +-
 drivers/gpu/drm/i915/intel_ringbuffer.c      |  20 +++-
 drivers/gpu/drm/i915/intel_ringbuffer.h      |  23 +---
 drivers/gpu/drm/i915/selftests/mock_engine.c |   2 +-
 6 files changed, 90 insertions(+), 80 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c
index 1a9de4a01b9d..ffef7f43fda3 100644
--- a/drivers/gpu/drm/i915/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/intel_engine_cs.c
@@ -506,27 +506,61 @@ void intel_engine_setup_common(struct intel_engine_cs *engine)
 
 static void cleanup_status_page(struct intel_engine_cs *engine)
 {
+	struct i915_vma *vma;
+
 	/* Prevent writes into HWSP after returning the page to the system */
 	intel_engine_set_hwsp_writemask(engine, ~0u);
 
-	if (HWS_NEEDS_PHYSICAL(engine->i915)) {
-		void *addr = fetch_and_zero(&engine->status_page.page_addr);
+	vma = fetch_and_zero(&engine->status_page.vma);
+	if (!vma)
+		return;
 
-		__free_page(virt_to_page(addr));
-	}
+	if (!HWS_NEEDS_PHYSICAL(engine->i915))
+		i915_vma_unpin(vma);
+
+	i915_gem_object_unpin_map(vma->obj);
+	__i915_gem_object_release_unless_active(vma->obj);
+}
+
+static int pin_ggtt_status_page(struct intel_engine_cs *engine,
+				struct i915_vma *vma)
+{
+	unsigned int flags;
+
+	flags = PIN_GLOBAL;
+	if (!HAS_LLC(engine->i915))
+		/*
+		 * On g33, we cannot place HWS above 256MiB, so
+		 * restrict its pinning to the low mappable arena.
+		 * Though this restriction is not documented for
+		 * gen4, gen5, or byt, they also behave similarly
+		 * and hang if the HWS is placed at the top of the
+		 * GTT. To generalise, it appears that all !llc
+		 * platforms have issues with us placing the HWS
+		 * above the mappable region (even though we never
+		 * actually map it).
+		 */
+		flags |= PIN_MAPPABLE;
+	else
+		flags |= PIN_HIGH;
 
-	i915_vma_unpin_and_release(&engine->status_page.vma,
-				   I915_VMA_RELEASE_MAP);
+	return i915_vma_pin(vma, 0, 0, flags);
 }
 
 static int init_status_page(struct intel_engine_cs *engine)
 {
 	struct drm_i915_gem_object *obj;
 	struct i915_vma *vma;
-	unsigned int flags;
 	void *vaddr;
 	int ret;
 
+	/*
+	 * Though the HWS register does support 36bit addresses, historically
+	 * we have had hangs and corruption reported due to wild writes if
+	 * the HWS is placed above 4G. We only allow objects to be allocated
+	 * in GFP_DMA32 for i965, and no earlier physical address users had
+	 * access to more than 4G.
+	 */
 	obj = i915_gem_object_create_internal(engine->i915, PAGE_SIZE);
 	if (IS_ERR(obj)) {
 		DRM_ERROR("Failed to allocate status page\n");
@@ -543,61 +577,30 @@ static int init_status_page(struct intel_engine_cs *engine)
 		goto err;
 	}
 
-	flags = PIN_GLOBAL;
-	if (!HAS_LLC(engine->i915))
-		/* On g33, we cannot place HWS above 256MiB, so
-		 * restrict its pinning to the low mappable arena.
-		 * Though this restriction is not documented for
-		 * gen4, gen5, or byt, they also behave similarly
-		 * and hang if the HWS is placed at the top of the
-		 * GTT. To generalise, it appears that all !llc
-		 * platforms have issues with us placing the HWS
-		 * above the mappable region (even though we never
-		 * actually map it).
-		 */
-		flags |= PIN_MAPPABLE;
-	else
-		flags |= PIN_HIGH;
-	ret = i915_vma_pin(vma, 0, 0, flags);
-	if (ret)
-		goto err;
-
 	vaddr = i915_gem_object_pin_map(obj, I915_MAP_WB);
 	if (IS_ERR(vaddr)) {
 		ret = PTR_ERR(vaddr);
-		goto err_unpin;
+		goto err;
 	}
 
+	engine->status_page.addr = memset(vaddr, 0, PAGE_SIZE);
 	engine->status_page.vma = vma;
-	engine->status_page.ggtt_offset = i915_ggtt_offset(vma);
-	engine->status_page.page_addr = memset(vaddr, 0, PAGE_SIZE);
+
+	if (!HWS_NEEDS_PHYSICAL(engine->i915)) {
+		ret = pin_ggtt_status_page(engine, vma);
+		if (ret)
+			goto err_unpin;
+	}
+
 	return 0;
 
 err_unpin:
-	i915_vma_unpin(vma);
+	i915_gem_object_unpin_map(obj);
 err:
 	i915_gem_object_put(obj);
 	return ret;
 }
 
-static int init_phys_status_page(struct intel_engine_cs *engine)
-{
-	struct page *page;
-
-	/*
-	 * Though the HWS register does support 36bit addresses, historically
-	 * we have had hangs and corruption reported due to wild writes if
-	 * the HWS is placed above 4G.
-	 */
-	page = alloc_page(GFP_KERNEL | __GFP_DMA32 | __GFP_ZERO);
-	if (!page)
-		return -ENOMEM;
-
-	engine->status_page.page_addr = page_address(page);
-
-	return 0;
-}
-
 static void __intel_context_unpin(struct i915_gem_context *ctx,
 				  struct intel_engine_cs *engine)
 {
@@ -650,10 +653,7 @@ int intel_engine_init_common(struct intel_engine_cs *engine)
 	if (ret)
 		goto err_unpin_preempt;
 
-	if (HWS_NEEDS_PHYSICAL(i915))
-		ret = init_phys_status_page(engine);
-	else
-		ret = init_status_page(engine);
+	ret = init_status_page(engine);
 	if (ret)
 		goto err_breadcrumbs;
 
@@ -1318,7 +1318,8 @@ static void intel_engine_print_registers(const struct intel_engine_cs *engine,
 	}
 
 	if (HAS_EXECLISTS(dev_priv)) {
-		const u32 *hws = &engine->status_page.page_addr[I915_HWS_CSB_BUF0_INDEX];
+		const u32 *hws =
+			&engine->status_page.addr[I915_HWS_CSB_BUF0_INDEX];
 		unsigned int idx;
 		u8 read, write;
 
@@ -1501,7 +1502,7 @@ void intel_engine_dump(struct intel_engine_cs *engine,
 	spin_unlock_irqrestore(&b->rb_lock, flags);
 
 	drm_printf(m, "HWSP:\n");
-	hexdump(m, engine->status_page.page_addr, PAGE_SIZE);
+	hexdump(m, engine->status_page.addr, PAGE_SIZE);
 
 	drm_printf(m, "Idle? %s\n", yesno(intel_engine_is_idle(engine)));
 }
diff --git a/drivers/gpu/drm/i915/intel_guc_submission.c b/drivers/gpu/drm/i915/intel_guc_submission.c
index 7217c7e3ee8d..b044162a41d3 100644
--- a/drivers/gpu/drm/i915/intel_guc_submission.c
+++ b/drivers/gpu/drm/i915/intel_guc_submission.c
@@ -81,6 +81,11 @@
  *
  */
 
+static inline u32 intel_hws_preempt_done_address(struct intel_engine_cs *engine)
+{
+	return i915_ggtt_offset(engine->status_page.vma) + I915_GEM_HWS_PREEMPT_ADDR;
+}
+
 static inline struct i915_priolist *to_priolist(struct rb_node *rb)
 {
 	return rb_entry(rb, struct i915_priolist, node);
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 4e3415b2daa0..3b512a54aacb 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -173,6 +173,11 @@ static void execlists_init_reg_state(u32 *reg_state,
 				     struct intel_engine_cs *engine,
 				     struct intel_ring *ring);
 
+static inline u32 intel_hws_seqno_address(struct intel_engine_cs *engine)
+{
+	return i915_ggtt_offset(engine->status_page.vma) + I915_GEM_HWS_INDEX_ADDR;
+}
+
 static inline struct i915_priolist *to_priolist(struct rb_node *rb)
 {
 	return rb_entry(rb, struct i915_priolist, node);
@@ -1680,7 +1685,7 @@ static void enable_execlists(struct intel_engine_cs *engine)
 		   _MASKED_BIT_DISABLE(STOP_RING));
 
 	I915_WRITE(RING_HWS_PGA(engine->mmio_base),
-		   engine->status_page.ggtt_offset);
+		   i915_ggtt_offset(engine->status_page.vma));
 	POSTING_READ(RING_HWS_PGA(engine->mmio_base));
 }
 
@@ -2225,10 +2230,10 @@ static int logical_ring_init(struct intel_engine_cs *engine)
 	}
 
 	execlists->csb_status =
-		&engine->status_page.page_addr[I915_HWS_CSB_BUF0_INDEX];
+		&engine->status_page.addr[I915_HWS_CSB_BUF0_INDEX];
 
 	execlists->csb_write =
-		&engine->status_page.page_addr[intel_hws_csb_write_index(i915)];
+		&engine->status_page.addr[intel_hws_csb_write_index(i915)];
 
 	reset_csb_pointers(execlists);
 
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index 9c664d910c02..8700f102f669 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -44,6 +44,11 @@
  */
 #define LEGACY_REQUEST_SIZE 200
 
+static inline u32 intel_hws_seqno_address(struct intel_engine_cs *engine)
+{
+	return i915_ggtt_offset(engine->status_page.vma) + I915_GEM_HWS_INDEX_ADDR;
+}
+
 static unsigned int __intel_ring_space(unsigned int head,
 				       unsigned int tail,
 				       unsigned int size)
@@ -500,12 +505,17 @@ static void set_hws_pga(struct intel_engine_cs *engine, phys_addr_t phys)
 	I915_WRITE(HWS_PGA, addr);
 }
 
-static void ring_setup_phys_status_page(struct intel_engine_cs *engine)
+static struct page *status_page(struct intel_engine_cs *engine)
 {
-	struct page *page = virt_to_page(engine->status_page.page_addr);
-	phys_addr_t phys = PFN_PHYS(page_to_pfn(page));
+	struct drm_i915_gem_object *obj = engine->status_page.vma->obj;
 
-	set_hws_pga(engine, phys);
+	GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj));
+	return sg_page(obj->mm.pages->sgl);
+}
+
+static void ring_setup_phys_status_page(struct intel_engine_cs *engine)
+{
+	set_hws_pga(engine, PFN_PHYS(page_to_pfn(status_page(engine))));
 	set_hwstam(engine, ~0u);
 }
 
@@ -572,7 +582,7 @@ static void flush_cs_tlb(struct intel_engine_cs *engine)
 
 static void ring_setup_status_page(struct intel_engine_cs *engine)
 {
-	set_hwsp(engine, engine->status_page.ggtt_offset);
+	set_hwsp(engine, i915_ggtt_offset(engine->status_page.vma));
 	set_hwstam(engine, ~0u);
 
 	flush_cs_tlb(engine);
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index 88dda43aae35..c232549c188e 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -32,8 +32,7 @@ struct i915_sched_attr;
 
 struct intel_hw_status_page {
 	struct i915_vma *vma;
-	u32 *page_addr;
-	u32 ggtt_offset;
+	u32 *addr;
 };
 
 #define I915_READ_TAIL(engine) I915_READ(RING_TAIL((engine)->mmio_base))
@@ -675,7 +674,7 @@ static inline u32
 intel_read_status_page(const struct intel_engine_cs *engine, int reg)
 {
 	/* Ensure that the compiler doesn't optimize away the load. */
-	return READ_ONCE(engine->status_page.page_addr[reg]);
+	return READ_ONCE(engine->status_page.addr[reg]);
 }
 
 static inline void
@@ -688,12 +687,12 @@ intel_write_status_page(struct intel_engine_cs *engine, int reg, u32 value)
 	 */
 	if (static_cpu_has(X86_FEATURE_CLFLUSH)) {
 		mb();
-		clflush(&engine->status_page.page_addr[reg]);
-		engine->status_page.page_addr[reg] = value;
-		clflush(&engine->status_page.page_addr[reg]);
+		clflush(&engine->status_page.addr[reg]);
+		engine->status_page.addr[reg] = value;
+		clflush(&engine->status_page.addr[reg]);
 		mb();
 	} else {
-		WRITE_ONCE(engine->status_page.page_addr[reg], value);
+		WRITE_ONCE(engine->status_page.addr[reg], value);
 	}
 }
 
@@ -881,16 +880,6 @@ static inline bool intel_engine_has_started(struct intel_engine_cs *engine,
 void intel_engine_get_instdone(struct intel_engine_cs *engine,
 			       struct intel_instdone *instdone);
 
-static inline u32 intel_hws_seqno_address(struct intel_engine_cs *engine)
-{
-	return engine->status_page.ggtt_offset + I915_GEM_HWS_INDEX_ADDR;
-}
-
-static inline u32 intel_hws_preempt_done_address(struct intel_engine_cs *engine)
-{
-	return engine->status_page.ggtt_offset + I915_GEM_HWS_PREEMPT_ADDR;
-}
-
 /* intel_breadcrumbs.c -- user interrupt bottom-half for waiters */
 int intel_engine_init_breadcrumbs(struct intel_engine_cs *engine);
 
diff --git a/drivers/gpu/drm/i915/selftests/mock_engine.c b/drivers/gpu/drm/i915/selftests/mock_engine.c
index 8b8d51af7d6a..968a7e139a67 100644
--- a/drivers/gpu/drm/i915/selftests/mock_engine.c
+++ b/drivers/gpu/drm/i915/selftests/mock_engine.c
@@ -201,7 +201,7 @@ struct intel_engine_cs *mock_engine(struct drm_i915_private *i915,
 	engine->base.i915 = i915;
 	snprintf(engine->base.name, sizeof(engine->base.name), "%s", name);
 	engine->base.id = id;
-	engine->base.status_page.page_addr = (void *)(engine + 1);
+	engine->base.status_page.addr = (void *)(engine + 1);
 
 	engine->base.context_pin = mock_context_pin;
 	engine->base.request_alloc = mock_request_alloc;
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 111+ messages in thread

* [PATCH 40/46] drm/i915: Move list of timelines under its own lock
  2019-01-07 11:54 [PATCH 01/46] drm/i915: Return immediately if trylock fails for direct-reclaim Chris Wilson
                   ` (37 preceding siblings ...)
  2019-01-07 11:55 ` [PATCH 39/46] drm/i915: Always allocate an object/vma for the HWSP Chris Wilson
@ 2019-01-07 11:55 ` Chris Wilson
  2019-01-07 11:55 ` [PATCH 41/46] drm/i915: Introduce concept of per-timeline (context) HWSP Chris Wilson
                   ` (11 subsequent siblings)
  50 siblings, 0 replies; 111+ messages in thread
From: Chris Wilson @ 2019-01-07 11:55 UTC (permalink / raw)
  To: intel-gfx

Currently, the list of timelines is serialised by the struct_mutex, but
to alleviate difficulties with using that mutex in future, move the
list management under its own dedicated mutex.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_drv.h               |  1 +
 drivers/gpu/drm/i915/i915_gem.c               | 89 +++++++++----------
 drivers/gpu/drm/i915/i915_gem_object.h        |  2 +-
 drivers/gpu/drm/i915/i915_timeline.c          | 27 +++++-
 drivers/gpu/drm/i915/i915_timeline.h          |  3 +
 drivers/gpu/drm/i915/i915_vma.c               |  6 ++
 .../gpu/drm/i915/selftests/mock_gem_device.c  |  7 +-
 .../gpu/drm/i915/selftests/mock_timeline.c    |  3 +-
 8 files changed, 83 insertions(+), 55 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 5ef7499166b4..4a8c45949c4d 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1958,6 +1958,7 @@ struct drm_i915_private {
 		void (*resume)(struct drm_i915_private *);
 		void (*cleanup_engine)(struct intel_engine_cs *engine);
 
+		struct mutex timeline_lock;
 		struct list_head timelines;
 
 		struct list_head active_rings;
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 37270ee360c8..09c7ded8f498 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -3209,33 +3209,6 @@ i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
 	return ret;
 }
 
-static long wait_for_timeline(struct i915_timeline *tl,
-			      unsigned int flags, long timeout)
-{
-	struct i915_request *rq;
-
-	rq = i915_gem_active_get_unlocked(&tl->last_request);
-	if (!rq)
-		return timeout;
-
-	/*
-	 * "Race-to-idle".
-	 *
-	 * Switching to the kernel context is often used a synchronous
-	 * step prior to idling, e.g. in suspend for flushing all
-	 * current operations to memory before sleeping. These we
-	 * want to complete as quickly as possible to avoid prolonged
-	 * stalls, so allow the gpu to boost to maximum clocks.
-	 */
-	if (flags & I915_WAIT_FOR_IDLE_BOOST)
-		gen6_rps_boost(rq, NULL);
-
-	timeout = i915_request_wait(rq, flags, timeout);
-	i915_request_put(rq);
-
-	return timeout;
-}
-
 static int wait_for_engines(struct drm_i915_private *i915)
 {
 	if (wait_for(intel_engines_are_idle(i915), I915_IDLE_ENGINES_TIMEOUT)) {
@@ -3252,6 +3225,8 @@ static int wait_for_engines(struct drm_i915_private *i915)
 int i915_gem_wait_for_idle(struct drm_i915_private *i915,
 			   unsigned int flags, long timeout)
 {
+	struct i915_timeline *tl;
+
 	GEM_TRACE("flags=%x (%s), timeout=%ld%s\n",
 		  flags, flags & I915_WAIT_LOCKED ? "locked" : "unlocked",
 		  timeout, timeout == MAX_SCHEDULE_TIMEOUT ? " (forever)" : "");
@@ -3260,17 +3235,45 @@ int i915_gem_wait_for_idle(struct drm_i915_private *i915,
 	if (!READ_ONCE(i915->gt.awake))
 		return 0;
 
+	mutex_lock(&i915->gt.timeline_lock);
+	list_for_each_entry(tl, &i915->gt.timelines, link) {
+		struct i915_request *rq;
+
+		rq = i915_gem_active_get_unlocked(&tl->last_request);
+		if (!rq)
+			continue;
+
+		mutex_unlock(&i915->gt.timeline_lock);
+
+		/*
+		 * "Race-to-idle".
+		 *
+		 * Switching to the kernel context is often used a synchronous
+		 * step prior to idling, e.g. in suspend for flushing all
+		 * current operations to memory before sleeping. These we
+		 * want to complete as quickly as possible to avoid prolonged
+		 * stalls, so allow the gpu to boost to maximum clocks.
+		 */
+		if (flags & I915_WAIT_FOR_IDLE_BOOST)
+			gen6_rps_boost(rq, NULL);
+
+		timeout = i915_request_wait(rq, flags, timeout);
+		i915_request_put(rq);
+		if (timeout < 0)
+			return timeout;
+
+		mutex_lock(&i915->gt.timeline_lock);
+
+		/* restart after dropping the lock */
+		tl = list_entry(&i915->gt.timelines, typeof(*tl), link);
+	}
+	mutex_unlock(&i915->gt.timeline_lock);
+
 	if (flags & I915_WAIT_LOCKED) {
-		struct i915_timeline *tl;
 		int err;
 
 		lockdep_assert_held(&i915->drm.struct_mutex);
 
-		list_for_each_entry(tl, &i915->gt.timelines, link) {
-			timeout = wait_for_timeline(tl, flags, timeout);
-			if (timeout < 0)
-				return timeout;
-		}
 		if (GEM_SHOW_DEBUG() && !timeout) {
 			/* Presume that timeout was non-zero to begin with! */
 			dev_warn(&i915->drm.pdev->dev,
@@ -3284,17 +3287,6 @@ int i915_gem_wait_for_idle(struct drm_i915_private *i915,
 
 		i915_retire_requests(i915);
 		GEM_BUG_ON(i915->gt.active_requests);
-	} else {
-		struct intel_engine_cs *engine;
-		enum intel_engine_id id;
-
-		for_each_engine(engine, i915, id) {
-			struct i915_timeline *tl = &engine->timeline;
-
-			timeout = wait_for_timeline(tl, flags, timeout);
-			if (timeout < 0)
-				return timeout;
-		}
 	}
 
 	return 0;
@@ -4995,6 +4987,8 @@ int i915_gem_init(struct drm_i915_private *dev_priv)
 		dev_priv->gt.cleanup_engine = intel_engine_cleanup;
 	}
 
+	i915_timelines_init(dev_priv);
+
 	ret = i915_gem_init_userptr(dev_priv);
 	if (ret)
 		return ret;
@@ -5117,8 +5111,10 @@ int i915_gem_init(struct drm_i915_private *dev_priv)
 err_uc_misc:
 	intel_uc_fini_misc(dev_priv);
 
-	if (ret != -EIO)
+	if (ret != -EIO) {
 		i915_gem_cleanup_userptr(dev_priv);
+		i915_timelines_fini(dev_priv);
+	}
 
 	if (ret == -EIO) {
 		mutex_lock(&dev_priv->drm.struct_mutex);
@@ -5169,6 +5165,7 @@ void i915_gem_fini(struct drm_i915_private *dev_priv)
 
 	intel_uc_fini_misc(dev_priv);
 	i915_gem_cleanup_userptr(dev_priv);
+	i915_timelines_fini(dev_priv);
 
 	i915_gem_drain_freed_objects(dev_priv);
 
@@ -5271,7 +5268,6 @@ int i915_gem_init_early(struct drm_i915_private *dev_priv)
 	if (!dev_priv->priorities)
 		goto err_dependencies;
 
-	INIT_LIST_HEAD(&dev_priv->gt.timelines);
 	INIT_LIST_HEAD(&dev_priv->gt.active_rings);
 	INIT_LIST_HEAD(&dev_priv->gt.closed_vma);
 
@@ -5315,7 +5311,6 @@ void i915_gem_cleanup_early(struct drm_i915_private *dev_priv)
 	GEM_BUG_ON(!llist_empty(&dev_priv->mm.free_list));
 	GEM_BUG_ON(atomic_read(&dev_priv->mm.free_count));
 	WARN_ON(dev_priv->mm.object_count);
-	WARN_ON(!list_empty(&dev_priv->gt.timelines));
 
 	kmem_cache_destroy(dev_priv->priorities);
 	kmem_cache_destroy(dev_priv->dependencies);
diff --git a/drivers/gpu/drm/i915/i915_gem_object.h b/drivers/gpu/drm/i915/i915_gem_object.h
index 35de9e739104..2e881b8d9936 100644
--- a/drivers/gpu/drm/i915/i915_gem_object.h
+++ b/drivers/gpu/drm/i915/i915_gem_object.h
@@ -87,7 +87,7 @@ struct drm_i915_gem_object {
 
 	struct {
 		/**
-		 * @vma.lock: protect the list/tre of vmas
+		 * @vma.lock: protect the list/tree of vmas
 		 */
 		struct spinlock lock;
 
diff --git a/drivers/gpu/drm/i915/i915_timeline.c b/drivers/gpu/drm/i915/i915_timeline.c
index 4667cc08c416..0434b5e0d3e1 100644
--- a/drivers/gpu/drm/i915/i915_timeline.c
+++ b/drivers/gpu/drm/i915/i915_timeline.c
@@ -13,8 +13,6 @@ void i915_timeline_init(struct drm_i915_private *i915,
 			struct i915_timeline *timeline,
 			const char *name)
 {
-	lockdep_assert_held(&i915->drm.struct_mutex);
-
 	/*
 	 * Ideally we want a set of engines on a single leaf as we expect
 	 * to mostly be tracking synchronisation between engines. It is not
@@ -23,9 +21,12 @@ void i915_timeline_init(struct drm_i915_private *i915,
 	 */
 	BUILD_BUG_ON(KSYNCMAP < I915_NUM_ENGINES);
 
+	timeline->i915 = i915;
 	timeline->name = name;
 
+	mutex_lock(&i915->gt.timeline_lock);
 	list_add(&timeline->link, &i915->gt.timelines);
+	mutex_unlock(&i915->gt.timeline_lock);
 
 	/* Called during early_init before we know how many engines there are */
 
@@ -39,6 +40,15 @@ void i915_timeline_init(struct drm_i915_private *i915,
 	i915_syncmap_init(&timeline->sync);
 }
 
+void i915_timelines_init(struct drm_i915_private *i915)
+{
+	mutex_init(&i915->gt.timeline_lock);
+	INIT_LIST_HEAD(&i915->gt.timelines);
+
+	/* via i915_gem_wait_for_idle() */
+	i915_gem_shrinker_taints_mutex(i915, &i915->gt.timeline_lock);
+}
+
 /**
  * i915_timelines_park - called when the driver idles
  * @i915: the drm_i915_private device
@@ -53,8 +63,7 @@ void i915_timelines_park(struct drm_i915_private *i915)
 {
 	struct i915_timeline *timeline;
 
-	lockdep_assert_held(&i915->drm.struct_mutex);
-
+	mutex_lock(&i915->gt.timeline_lock);
 	list_for_each_entry(timeline, &i915->gt.timelines, link) {
 		/*
 		 * All known fences are completed so we can scrap
@@ -64,6 +73,7 @@ void i915_timelines_park(struct drm_i915_private *i915)
 		 */
 		i915_syncmap_free(&timeline->sync);
 	}
+	mutex_unlock(&i915->gt.timeline_lock);
 }
 
 void i915_timeline_fini(struct i915_timeline *timeline)
@@ -72,7 +82,9 @@ void i915_timeline_fini(struct i915_timeline *timeline)
 
 	i915_syncmap_free(&timeline->sync);
 
+	mutex_lock(&timeline->i915->gt.timeline_lock);
 	list_del(&timeline->link);
+	mutex_unlock(&timeline->i915->gt.timeline_lock);
 }
 
 struct i915_timeline *
@@ -99,6 +111,13 @@ void __i915_timeline_free(struct kref *kref)
 	kfree(timeline);
 }
 
+void i915_timelines_fini(struct drm_i915_private *i915)
+{
+	GEM_BUG_ON(!list_empty(&i915->gt.timelines));
+
+	mutex_destroy(&i915->gt.timeline_lock);
+}
+
 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
 #include "selftests/mock_timeline.c"
 #include "selftests/i915_timeline.c"
diff --git a/drivers/gpu/drm/i915/i915_timeline.h b/drivers/gpu/drm/i915/i915_timeline.h
index 38c1e15e927a..87ad2dd31c20 100644
--- a/drivers/gpu/drm/i915/i915_timeline.h
+++ b/drivers/gpu/drm/i915/i915_timeline.h
@@ -66,6 +66,7 @@ struct i915_timeline {
 
 	struct list_head link;
 	const char *name;
+	struct drm_i915_private *i915;
 
 	struct kref kref;
 };
@@ -134,6 +135,8 @@ static inline bool i915_timeline_sync_is_later(struct i915_timeline *tl,
 	return __i915_timeline_sync_is_later(tl, fence->context, fence->seqno);
 }
 
+void i915_timelines_init(struct drm_i915_private *i915);
 void i915_timelines_park(struct drm_i915_private *i915);
+void i915_timelines_fini(struct drm_i915_private *i915);
 
 #endif
diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
index 3a680fe2bb8b..d83b8ad5f859 100644
--- a/drivers/gpu/drm/i915/i915_vma.c
+++ b/drivers/gpu/drm/i915/i915_vma.c
@@ -201,6 +201,11 @@ vma_create(struct drm_i915_gem_object *obj,
 		rb = *p;
 		pos = rb_entry(rb, struct i915_vma, obj_node);
 
+		/*
+		 * If the view already exists in the tree, another thread
+		 * already created a matching vma, so return the older instance
+		 * and dispose of ours.
+		 */
 		cmp = i915_vma_compare(pos, vm, view);
 		if (cmp == 0) {
 			spin_unlock(&obj->vma.lock);
@@ -294,6 +299,7 @@ i915_vma_instance(struct drm_i915_gem_object *obj,
 	vma = vma_lookup(obj, vm, view);
 	spin_unlock(&obj->vma.lock);
 
+	/* vma_create() will resolve the race if another creates the vma */
 	if (unlikely(!vma))
 		vma = vma_create(obj, vm, view);
 
diff --git a/drivers/gpu/drm/i915/selftests/mock_gem_device.c b/drivers/gpu/drm/i915/selftests/mock_gem_device.c
index e756cbd0b1f4..66a2e47ad888 100644
--- a/drivers/gpu/drm/i915/selftests/mock_gem_device.c
+++ b/drivers/gpu/drm/i915/selftests/mock_gem_device.c
@@ -68,13 +68,14 @@ static void mock_device_release(struct drm_device *dev)
 	i915_gem_contexts_fini(i915);
 	mutex_unlock(&i915->drm.struct_mutex);
 
+	i915_timelines_fini(i915);
+
 	drain_workqueue(i915->wq);
 	i915_gem_drain_freed_objects(i915);
 
 	mutex_lock(&i915->drm.struct_mutex);
 	mock_fini_ggtt(i915);
 	mutex_unlock(&i915->drm.struct_mutex);
-	WARN_ON(!list_empty(&i915->gt.timelines));
 
 	destroy_workqueue(i915->wq);
 
@@ -227,7 +228,8 @@ struct drm_i915_private *mock_gem_device(void)
 	if (!i915->priorities)
 		goto err_dependencies;
 
-	INIT_LIST_HEAD(&i915->gt.timelines);
+	i915_timelines_init(i915);
+
 	INIT_LIST_HEAD(&i915->gt.active_rings);
 	INIT_LIST_HEAD(&i915->gt.closed_vma);
 
@@ -254,6 +256,7 @@ struct drm_i915_private *mock_gem_device(void)
 	i915_gem_contexts_fini(i915);
 err_unlock:
 	mutex_unlock(&i915->drm.struct_mutex);
+	i915_timelines_fini(i915);
 	kmem_cache_destroy(i915->priorities);
 err_dependencies:
 	kmem_cache_destroy(i915->dependencies);
diff --git a/drivers/gpu/drm/i915/selftests/mock_timeline.c b/drivers/gpu/drm/i915/selftests/mock_timeline.c
index dcf3b16f5a07..cf39ccd9fc05 100644
--- a/drivers/gpu/drm/i915/selftests/mock_timeline.c
+++ b/drivers/gpu/drm/i915/selftests/mock_timeline.c
@@ -10,6 +10,7 @@
 
 void mock_timeline_init(struct i915_timeline *timeline, u64 context)
 {
+	timeline->i915 = NULL;
 	timeline->fence_context = context;
 
 	spin_lock_init(&timeline->lock);
@@ -24,5 +25,5 @@ void mock_timeline_init(struct i915_timeline *timeline, u64 context)
 
 void mock_timeline_fini(struct i915_timeline *timeline)
 {
-	i915_timeline_fini(timeline);
+	i915_syncmap_free(&timeline->sync);
 }
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 111+ messages in thread

* [PATCH 41/46] drm/i915: Introduce concept of per-timeline (context) HWSP
  2019-01-07 11:54 [PATCH 01/46] drm/i915: Return immediately if trylock fails for direct-reclaim Chris Wilson
                   ` (38 preceding siblings ...)
  2019-01-07 11:55 ` [PATCH 40/46] drm/i915: Move list of timelines under its own lock Chris Wilson
@ 2019-01-07 11:55 ` Chris Wilson
  2019-01-15  0:55   ` John Harrison
  2019-01-07 11:55 ` [PATCH 42/46] drm/i915: Enlarge vma->pin_count Chris Wilson
                   ` (10 subsequent siblings)
  50 siblings, 1 reply; 111+ messages in thread
From: Chris Wilson @ 2019-01-07 11:55 UTC (permalink / raw)
  To: intel-gfx

Supplement the per-engine HWSP with a per-timeline HWSP. That is a
per-request pointer through which we can check a local seqno,
abstracting away the presumption of a global seqno. In this first step,
we point each request back into the engine's HWSP so everything
continues to work with the global timeline.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_request.c | 16 +++++++++++-----
 drivers/gpu/drm/i915/i915_request.h | 16 +++++++++-------
 drivers/gpu/drm/i915/intel_lrc.c    |  9 ++++++---
 3 files changed, 26 insertions(+), 15 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
index c467392f62d7..3b69c62d040f 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -182,10 +182,11 @@ static void free_capture_list(struct i915_request *request)
 static void __retire_engine_request(struct intel_engine_cs *engine,
 				    struct i915_request *rq)
 {
-	GEM_TRACE("%s(%s) fence %llx:%lld, global=%d, current %d\n",
+	GEM_TRACE("%s(%s) fence %llx:%lld, global=%d, current %d:%d\n",
 		  __func__, engine->name,
 		  rq->fence.context, rq->fence.seqno,
 		  rq->global_seqno,
+		  i915_request_hwsp(rq),
 		  intel_engine_get_seqno(engine));
 
 	GEM_BUG_ON(!i915_request_completed(rq));
@@ -244,10 +245,11 @@ static void i915_request_retire(struct i915_request *request)
 {
 	struct i915_gem_active *active, *next;
 
-	GEM_TRACE("%s fence %llx:%lld, global=%d, current %d\n",
+	GEM_TRACE("%s fence %llx:%lld, global=%d, current %d:%d\n",
 		  request->engine->name,
 		  request->fence.context, request->fence.seqno,
 		  request->global_seqno,
+		  i915_request_hwsp(request),
 		  intel_engine_get_seqno(request->engine));
 
 	lockdep_assert_held(&request->i915->drm.struct_mutex);
@@ -307,10 +309,11 @@ void i915_request_retire_upto(struct i915_request *rq)
 	struct intel_ring *ring = rq->ring;
 	struct i915_request *tmp;
 
-	GEM_TRACE("%s fence %llx:%lld, global=%d, current %d\n",
+	GEM_TRACE("%s fence %llx:%lld, global=%d, current %d:%d\n",
 		  rq->engine->name,
 		  rq->fence.context, rq->fence.seqno,
 		  rq->global_seqno,
+		  i915_request_hwsp(rq),
 		  intel_engine_get_seqno(rq->engine));
 
 	lockdep_assert_held(&rq->i915->drm.struct_mutex);
@@ -348,10 +351,11 @@ void __i915_request_submit(struct i915_request *request)
 	struct intel_engine_cs *engine = request->engine;
 	u32 seqno;
 
-	GEM_TRACE("%s fence %llx:%lld -> global=%d, current %d\n",
+	GEM_TRACE("%s fence %llx:%lld -> global=%d, current %d:%d\n",
 		  engine->name,
 		  request->fence.context, request->fence.seqno,
 		  engine->timeline.seqno + 1,
+		  i915_request_hwsp(request),
 		  intel_engine_get_seqno(engine));
 
 	GEM_BUG_ON(!irqs_disabled());
@@ -398,10 +402,11 @@ void __i915_request_unsubmit(struct i915_request *request)
 {
 	struct intel_engine_cs *engine = request->engine;
 
-	GEM_TRACE("%s fence %llx:%lld <- global=%d, current %d\n",
+	GEM_TRACE("%s fence %llx:%lld <- global=%d, current %d:%d\n",
 		  engine->name,
 		  request->fence.context, request->fence.seqno,
 		  request->global_seqno,
+		  i915_request_hwsp(request),
 		  intel_engine_get_seqno(engine));
 
 	GEM_BUG_ON(!irqs_disabled());
@@ -585,6 +590,7 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx)
 	rq->ring = ce->ring;
 	rq->timeline = ce->ring->timeline;
 	GEM_BUG_ON(rq->timeline == &engine->timeline);
+	rq->hwsp_seqno = &engine->status_page.addr[I915_GEM_HWS_INDEX];
 
 	spin_lock_init(&rq->lock);
 	dma_fence_init(&rq->fence,
diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h
index d014b0605445..e2b209a26a8e 100644
--- a/drivers/gpu/drm/i915/i915_request.h
+++ b/drivers/gpu/drm/i915/i915_request.h
@@ -130,6 +130,8 @@ struct i915_request {
 	struct i915_sched_node sched;
 	struct i915_dependency dep;
 
+	const u32 *hwsp_seqno;
+
 	/**
 	 * GEM sequence number associated with this request on the
 	 * global execution timeline. It is zero when the request is not
@@ -280,11 +282,6 @@ long i915_request_wait(struct i915_request *rq,
 #define I915_WAIT_ALL		BIT(3) /* used by i915_gem_object_wait() */
 #define I915_WAIT_FOR_IDLE_BOOST BIT(4)
 
-static inline bool intel_engine_has_started(struct intel_engine_cs *engine,
-					    u32 seqno);
-static inline bool intel_engine_has_completed(struct intel_engine_cs *engine,
-					      u32 seqno);
-
 /**
  * Returns true if seq1 is later than seq2.
  */
@@ -293,6 +290,11 @@ static inline bool i915_seqno_passed(u32 seq1, u32 seq2)
 	return (s32)(seq1 - seq2) >= 0;
 }
 
+static inline u32 i915_request_hwsp(const struct i915_request *rq)
+{
+	return READ_ONCE(*rq->hwsp_seqno);
+}
+
 /**
  * i915_request_started - check if the request has begun being executed
  * @rq: the request
@@ -310,14 +312,14 @@ static inline bool i915_request_started(const struct i915_request *rq)
 	if (!seqno) /* not yet submitted to HW */
 		return false;
 
-	return intel_engine_has_started(rq->engine, seqno);
+	return i915_seqno_passed(i915_request_hwsp(rq), seqno - 1);
 }
 
 static inline bool
 __i915_request_completed(const struct i915_request *rq, u32 seqno)
 {
 	GEM_BUG_ON(!seqno);
-	return intel_engine_has_completed(rq->engine, seqno) &&
+	return i915_seqno_passed(i915_request_hwsp(rq), seqno) &&
 		seqno == i915_request_global_seqno(rq);
 }
 
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 3b512a54aacb..1df2a1868622 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -445,11 +445,12 @@ static void execlists_submit_ports(struct intel_engine_cs *engine)
 			desc = execlists_update_context(rq);
 			GEM_DEBUG_EXEC(port[n].context_id = upper_32_bits(desc));
 
-			GEM_TRACE("%s in[%d]:  ctx=%d.%d, global=%d (fence %llx:%lld) (current %d), prio=%d\n",
+			GEM_TRACE("%s in[%d]:  ctx=%d.%d, global=%d (fence %llx:%lld) (current %d:%d), prio=%d\n",
 				  engine->name, n,
 				  port[n].context_id, count,
 				  rq->global_seqno,
 				  rq->fence.context, rq->fence.seqno,
+				  i915_request_hwsp(rq),
 				  intel_engine_get_seqno(engine),
 				  rq_prio(rq));
 		} else {
@@ -738,11 +739,12 @@ execlists_cancel_port_requests(struct intel_engine_execlists * const execlists)
 	while (num_ports-- && port_isset(port)) {
 		struct i915_request *rq = port_request(port);
 
-		GEM_TRACE("%s:port%u global=%d (fence %llx:%lld), (current %d)\n",
+		GEM_TRACE("%s:port%u global=%d (fence %llx:%lld), (current %d:%d)\n",
 			  rq->engine->name,
 			  (unsigned int)(port - execlists->port),
 			  rq->global_seqno,
 			  rq->fence.context, rq->fence.seqno,
+			  i915_request_hwsp(rq),
 			  intel_engine_get_seqno(rq->engine));
 
 		GEM_BUG_ON(!execlists->active);
@@ -966,12 +968,13 @@ static void process_csb(struct intel_engine_cs *engine)
 						EXECLISTS_ACTIVE_USER));
 
 		rq = port_unpack(port, &count);
-		GEM_TRACE("%s out[0]: ctx=%d.%d, global=%d (fence %llx:%lld) (current %d), prio=%d\n",
+		GEM_TRACE("%s out[0]: ctx=%d.%d, global=%d (fence %llx:%lld) (current %d:%d), prio=%d\n",
 			  engine->name,
 			  port->context_id, count,
 			  rq ? rq->global_seqno : 0,
 			  rq ? rq->fence.context : 0,
 			  rq ? rq->fence.seqno : 0,
+			  rq ? i915_request_hwsp(rq) : 0,
 			  intel_engine_get_seqno(engine),
 			  rq ? rq_prio(rq) : 0);
 
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 111+ messages in thread

* [PATCH 42/46] drm/i915: Enlarge vma->pin_count
  2019-01-07 11:54 [PATCH 01/46] drm/i915: Return immediately if trylock fails for direct-reclaim Chris Wilson
                   ` (39 preceding siblings ...)
  2019-01-07 11:55 ` [PATCH 41/46] drm/i915: Introduce concept of per-timeline (context) HWSP Chris Wilson
@ 2019-01-07 11:55 ` Chris Wilson
  2019-01-15 19:57   ` John Harrison
  2019-01-07 11:55 ` [PATCH 43/46] drm/i915: Allocate a status page for each timeline Chris Wilson
                   ` (9 subsequent siblings)
  50 siblings, 1 reply; 111+ messages in thread
From: Chris Wilson @ 2019-01-07 11:55 UTC (permalink / raw)
  To: intel-gfx

Previously we only accommodated having a vma pinned by a small number of
users, with the maximum being pinned for use by the display engine. As
such, we used a small bitfield only large enough to allow the vma to
be pinned twice (for back/front buffers) in each scanout plane. Keeping
the maximum permissible pin_count small allows us to quickly catch a
potential leak. However, as we want to split a 4096B page into 64
different cachelines and pin each cacheline for use by a different
timeline, we will exceed the current maximum permissible vma->pin_count
and so time has come to enlarge it.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_gem_gtt.h | 26 +++++++++++++-------------
 drivers/gpu/drm/i915/i915_vma.h     | 28 +++++++++-------------------
 2 files changed, 22 insertions(+), 32 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h
index bd679c8c56dd..03ade71b8d9a 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.h
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.h
@@ -642,19 +642,19 @@ int i915_gem_gtt_insert(struct i915_address_space *vm,
 
 /* Flags used by pin/bind&friends. */
 #define PIN_NONBLOCK		BIT_ULL(0)
-#define PIN_MAPPABLE		BIT_ULL(1)
-#define PIN_ZONE_4G		BIT_ULL(2)
-#define PIN_NONFAULT		BIT_ULL(3)
-#define PIN_NOEVICT		BIT_ULL(4)
-
-#define PIN_MBZ			BIT_ULL(5) /* I915_VMA_PIN_OVERFLOW */
-#define PIN_GLOBAL		BIT_ULL(6) /* I915_VMA_GLOBAL_BIND */
-#define PIN_USER		BIT_ULL(7) /* I915_VMA_LOCAL_BIND */
-#define PIN_UPDATE		BIT_ULL(8)
-
-#define PIN_HIGH		BIT_ULL(9)
-#define PIN_OFFSET_BIAS		BIT_ULL(10)
-#define PIN_OFFSET_FIXED	BIT_ULL(11)
+#define PIN_NONFAULT		BIT_ULL(1)
+#define PIN_NOEVICT		BIT_ULL(2)
+#define PIN_MAPPABLE		BIT_ULL(3)
+#define PIN_ZONE_4G		BIT_ULL(4)
+#define PIN_HIGH		BIT_ULL(5)
+#define PIN_OFFSET_BIAS		BIT_ULL(6)
+#define PIN_OFFSET_FIXED	BIT_ULL(7)
+
+#define PIN_MBZ			BIT_ULL(8) /* I915_VMA_PIN_OVERFLOW */
+#define PIN_GLOBAL		BIT_ULL(9) /* I915_VMA_GLOBAL_BIND */
+#define PIN_USER		BIT_ULL(10) /* I915_VMA_LOCAL_BIND */
+#define PIN_UPDATE		BIT_ULL(11)
+
 #define PIN_OFFSET_MASK		(-I915_GTT_PAGE_SIZE)
 
 #endif
diff --git a/drivers/gpu/drm/i915/i915_vma.h b/drivers/gpu/drm/i915/i915_vma.h
index 7252abc73d3e..266b226ebef2 100644
--- a/drivers/gpu/drm/i915/i915_vma.h
+++ b/drivers/gpu/drm/i915/i915_vma.h
@@ -70,30 +70,20 @@ struct i915_vma {
 	 */
 	unsigned int open_count;
 	unsigned long flags;
-	/**
-	 * How many users have pinned this object in GTT space. The following
-	 * users can each hold at most one reference: pwrite/pread, execbuffer
-	 * (objects are not allowed multiple times for the same batchbuffer),
-	 * and the framebuffer code. When switching/pageflipping, the
-	 * framebuffer code has at most two buffers pinned per crtc.
-	 *
-	 * In the worst case this is 1 + 1 + 1 + 2*2 = 7. That would fit into 3
-	 * bits with absolutely no headroom. So use 4 bits.
-	 */
-#define I915_VMA_PIN_MASK 0xf
-#define I915_VMA_PIN_OVERFLOW	BIT(5)
+#define I915_VMA_PIN_MASK 0xff
+#define I915_VMA_PIN_OVERFLOW	BIT(8)
 
 	/** Flags and address space this VMA is bound to */
-#define I915_VMA_GLOBAL_BIND	BIT(6)
-#define I915_VMA_LOCAL_BIND	BIT(7)
+#define I915_VMA_GLOBAL_BIND	BIT(9)
+#define I915_VMA_LOCAL_BIND	BIT(10)
 #define I915_VMA_BIND_MASK (I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND | I915_VMA_PIN_OVERFLOW)
 
-#define I915_VMA_GGTT		BIT(8)
-#define I915_VMA_CAN_FENCE	BIT(9)
-#define I915_VMA_CLOSED		BIT(10)
-#define I915_VMA_USERFAULT_BIT	11
+#define I915_VMA_GGTT		BIT(11)
+#define I915_VMA_CAN_FENCE	BIT(12)
+#define I915_VMA_CLOSED		BIT(13)
+#define I915_VMA_USERFAULT_BIT	14
 #define I915_VMA_USERFAULT	BIT(I915_VMA_USERFAULT_BIT)
-#define I915_VMA_GGTT_WRITE	BIT(12)
+#define I915_VMA_GGTT_WRITE	BIT(15)
 
 	unsigned int active_count;
 	struct rb_root active;
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 111+ messages in thread

* [PATCH 43/46] drm/i915: Allocate a status page for each timeline
  2019-01-07 11:54 [PATCH 01/46] drm/i915: Return immediately if trylock fails for direct-reclaim Chris Wilson
                   ` (40 preceding siblings ...)
  2019-01-07 11:55 ` [PATCH 42/46] drm/i915: Enlarge vma->pin_count Chris Wilson
@ 2019-01-07 11:55 ` Chris Wilson
  2019-01-15  0:56   ` John Harrison
  2019-01-07 11:55 ` [PATCH 44/46] drm/i915: Track the context's seqno in its own timeline HWSP Chris Wilson
                   ` (8 subsequent siblings)
  50 siblings, 1 reply; 111+ messages in thread
From: Chris Wilson @ 2019-01-07 11:55 UTC (permalink / raw)
  To: intel-gfx

Allocate a page for use as a status page by a group of timelines, as we
only need a dword of storage for each (rounded up to the cacheline for
safety) we can pack multiple timelines into the same page. Each timeline
will then be able to track its own HW seqno.

v2: Reuse the common per-engine HWSP for the solitary ringbuffer
timeline, so that we do not have to emit (using per-gen specialised
vfuncs) the breadcrumb into the distinct timeline HWSP and instead can
keep on using the common MI_STORE_DWORD_INDEX. However, to maintain the
sleight-of-hand for the global/per-context seqno switchover, we will
store both temporarily (and so use a custom offset for the shared timeline
HWSP until the switch over).

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_drv.h               |   4 +
 drivers/gpu/drm/i915/i915_timeline.c          | 159 +++++++-
 drivers/gpu/drm/i915/i915_timeline.h          |  21 +-
 drivers/gpu/drm/i915/intel_engine_cs.c        |  64 +--
 drivers/gpu/drm/i915/intel_lrc.c              |  22 +-
 drivers/gpu/drm/i915/intel_ringbuffer.c       |  10 +-
 drivers/gpu/drm/i915/intel_ringbuffer.h       |   6 +-
 .../drm/i915/selftests/i915_live_selftests.h  |   1 +
 .../drm/i915/selftests/i915_mock_selftests.h  |   2 +-
 .../gpu/drm/i915/selftests/i915_timeline.c    | 373 +++++++++++++++++-
 drivers/gpu/drm/i915/selftests/mock_engine.c  |  17 +-
 11 files changed, 626 insertions(+), 53 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 4a8c45949c4d..a0009e7fe05a 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1961,6 +1961,10 @@ struct drm_i915_private {
 		struct mutex timeline_lock;
 		struct list_head timelines;
 
+		/* Pack multiple timelines' seqnos into the same page */
+		struct i915_vma *timeline_hwsp;
+		u64 timeline_free;
+
 		struct list_head active_rings;
 		struct list_head closed_vma;
 		u32 active_requests;
diff --git a/drivers/gpu/drm/i915/i915_timeline.c b/drivers/gpu/drm/i915/i915_timeline.c
index 0434b5e0d3e1..73ad951c74d1 100644
--- a/drivers/gpu/drm/i915/i915_timeline.c
+++ b/drivers/gpu/drm/i915/i915_timeline.c
@@ -9,10 +9,75 @@
 #include "i915_timeline.h"
 #include "i915_syncmap.h"
 
-void i915_timeline_init(struct drm_i915_private *i915,
-			struct i915_timeline *timeline,
-			const char *name)
+#define NBITS BITS_PER_TYPE(typeof(i915->gt.timeline_free))
+
+static int find_first_cacheline(struct drm_i915_private *i915)
+{
+	return find_first_bit((unsigned long *)&i915->gt.timeline_free, NBITS);
+}
+
+static int alloc_hwsp(struct i915_timeline *timeline)
+{
+	struct drm_i915_private *i915 = timeline->i915;
+	struct i915_vma *vma;
+	int offset;
+
+	mutex_lock(&i915->gt.timeline_lock);
+
+restart:
+	offset = find_first_cacheline(i915);
+	if (offset == NBITS && i915->gt.timeline_hwsp) {
+		i915_vma_put(i915->gt.timeline_hwsp);
+		i915->gt.timeline_hwsp = NULL;
+	}
+
+	vma = i915->gt.timeline_hwsp;
+	if (!vma) {
+		struct drm_i915_gem_object *bo;
+
+		/* Drop the lock before allocations */
+		mutex_unlock(&i915->gt.timeline_lock);
+
+		BUILD_BUG_ON(NBITS * CACHELINE_BYTES > PAGE_SIZE);
+		bo = i915_gem_object_create_internal(i915, PAGE_SIZE);
+		if (IS_ERR(bo))
+			return PTR_ERR(bo);
+
+		i915_gem_object_set_cache_level(bo, I915_CACHE_LLC);
+
+		vma = i915_vma_instance(bo, &i915->ggtt.vm, NULL);
+		if (IS_ERR(vma))
+			return PTR_ERR(vma);
+
+		mutex_lock(&i915->gt.timeline_lock);
+		if (i915->gt.timeline_hwsp) {
+			i915_gem_object_put(bo);
+			goto restart;
+		}
+
+		i915->gt.timeline_hwsp = vma;
+		i915->gt.timeline_free = ~0ull;
+		offset = 0;
+	}
+
+	i915->gt.timeline_free &= ~BIT_ULL(offset);
+
+	timeline->hwsp_ggtt = i915_vma_get(vma);
+	timeline->hwsp_offset = offset * CACHELINE_BYTES;
+
+	mutex_unlock(&i915->gt.timeline_lock);
+
+	return 0;
+}
+
+int i915_timeline_init(struct drm_i915_private *i915,
+		       struct i915_timeline *timeline,
+		       const char *name,
+		       struct i915_vma *global_hwsp)
 {
+	void *vaddr;
+	int err;
+
 	/*
 	 * Ideally we want a set of engines on a single leaf as we expect
 	 * to mostly be tracking synchronisation between engines. It is not
@@ -23,10 +88,27 @@ void i915_timeline_init(struct drm_i915_private *i915,
 
 	timeline->i915 = i915;
 	timeline->name = name;
+	timeline->pin_count = 0;
 
-	mutex_lock(&i915->gt.timeline_lock);
-	list_add(&timeline->link, &i915->gt.timelines);
-	mutex_unlock(&i915->gt.timeline_lock);
+	if (global_hwsp) {
+		timeline->hwsp_ggtt = i915_vma_get(global_hwsp);
+		timeline->hwsp_offset = I915_GEM_HWS_SEQNO_ADDR;
+	} else {
+		err = alloc_hwsp(timeline);
+		if (err)
+			return err;
+	}
+
+	vaddr = i915_gem_object_pin_map(timeline->hwsp_ggtt->obj, I915_MAP_WB);
+	if (IS_ERR(vaddr)) { /* leak the cacheline, but will clean up later */
+		i915_vma_put(timeline->hwsp_ggtt);
+		return PTR_ERR(vaddr);
+	}
+
+	timeline->hwsp_seqno =
+		memset(vaddr + timeline->hwsp_offset,
+		       0,
+		       sizeof(*timeline->hwsp_seqno));
 
 	/* Called during early_init before we know how many engines there are */
 
@@ -38,6 +120,12 @@ void i915_timeline_init(struct drm_i915_private *i915,
 	INIT_LIST_HEAD(&timeline->requests);
 
 	i915_syncmap_init(&timeline->sync);
+
+	mutex_lock(&i915->gt.timeline_lock);
+	list_add(&timeline->link, &i915->gt.timelines);
+	mutex_unlock(&i915->gt.timeline_lock);
+
+	return 0;
 }
 
 void i915_timelines_init(struct drm_i915_private *i915)
@@ -78,30 +166,75 @@ void i915_timelines_park(struct drm_i915_private *i915)
 
 void i915_timeline_fini(struct i915_timeline *timeline)
 {
+	struct drm_i915_private *i915 = timeline->i915;
+
+	GEM_BUG_ON(timeline->pin_count);
 	GEM_BUG_ON(!list_empty(&timeline->requests));
 
 	i915_syncmap_free(&timeline->sync);
 
-	mutex_lock(&timeline->i915->gt.timeline_lock);
+	mutex_lock(&i915->gt.timeline_lock);
 	list_del(&timeline->link);
-	mutex_unlock(&timeline->i915->gt.timeline_lock);
+	if (timeline->hwsp_ggtt == i915->gt.timeline_hwsp)
+		i915->gt.timeline_free |=
+			BIT_ULL(timeline->hwsp_offset / CACHELINE_BYTES);
+	mutex_unlock(&i915->gt.timeline_lock);
+
+	i915_gem_object_unpin_map(timeline->hwsp_ggtt->obj);
+	i915_vma_put(timeline->hwsp_ggtt);
 }
 
 struct i915_timeline *
-i915_timeline_create(struct drm_i915_private *i915, const char *name)
+i915_timeline_create(struct drm_i915_private *i915,
+		     const char *name,
+		     struct i915_vma *global_hwsp)
 {
 	struct i915_timeline *timeline;
+	int err;
 
 	timeline = kzalloc(sizeof(*timeline), GFP_KERNEL);
 	if (!timeline)
 		return ERR_PTR(-ENOMEM);
 
-	i915_timeline_init(i915, timeline, name);
+	err = i915_timeline_init(i915, timeline, name, global_hwsp);
+	if (err) {
+		kfree(timeline);
+		return ERR_PTR(err);
+	}
+
 	kref_init(&timeline->kref);
 
 	return timeline;
 }
 
+int i915_timeline_pin(struct i915_timeline *tl)
+{
+	int err;
+
+	if (tl->pin_count++)
+		return 0;
+	GEM_BUG_ON(!tl->pin_count);
+
+	err = i915_vma_pin(tl->hwsp_ggtt, 0, 0, PIN_GLOBAL | PIN_HIGH);
+	if (err)
+		goto unpin;
+
+	return 0;
+
+unpin:
+	tl->pin_count = 0;
+	return err;
+}
+
+void i915_timeline_unpin(struct i915_timeline *tl)
+{
+	GEM_BUG_ON(!tl->pin_count);
+	if (--tl->pin_count)
+		return;
+
+	__i915_vma_unpin(tl->hwsp_ggtt);
+}
+
 void __i915_timeline_free(struct kref *kref)
 {
 	struct i915_timeline *timeline =
@@ -113,8 +246,14 @@ void __i915_timeline_free(struct kref *kref)
 
 void i915_timelines_fini(struct drm_i915_private *i915)
 {
+	struct i915_vma *vma;
+
 	GEM_BUG_ON(!list_empty(&i915->gt.timelines));
 
+	vma = fetch_and_zero(&i915->gt.timeline_hwsp);
+	if (vma)
+		i915_vma_put(vma);
+
 	mutex_destroy(&i915->gt.timeline_lock);
 }
 
diff --git a/drivers/gpu/drm/i915/i915_timeline.h b/drivers/gpu/drm/i915/i915_timeline.h
index 87ad2dd31c20..0c3739d53d79 100644
--- a/drivers/gpu/drm/i915/i915_timeline.h
+++ b/drivers/gpu/drm/i915/i915_timeline.h
@@ -32,6 +32,8 @@
 #include "i915_syncmap.h"
 #include "i915_utils.h"
 
+struct i915_vma;
+
 struct i915_timeline {
 	u64 fence_context;
 	u32 seqno;
@@ -40,6 +42,11 @@ struct i915_timeline {
 #define TIMELINE_CLIENT 0 /* default subclass */
 #define TIMELINE_ENGINE 1
 
+	unsigned int pin_count;
+	const u32 *hwsp_seqno;
+	struct i915_vma *hwsp_ggtt;
+	u32 hwsp_offset;
+
 	/**
 	 * List of breadcrumbs associated with GPU requests currently
 	 * outstanding.
@@ -71,9 +78,10 @@ struct i915_timeline {
 	struct kref kref;
 };
 
-void i915_timeline_init(struct drm_i915_private *i915,
-			struct i915_timeline *tl,
-			const char *name);
+int i915_timeline_init(struct drm_i915_private *i915,
+		       struct i915_timeline *tl,
+		       const char *name,
+		       struct i915_vma *hwsp);
 void i915_timeline_fini(struct i915_timeline *tl);
 
 static inline void
@@ -96,7 +104,9 @@ i915_timeline_set_subclass(struct i915_timeline *timeline,
 }
 
 struct i915_timeline *
-i915_timeline_create(struct drm_i915_private *i915, const char *name);
+i915_timeline_create(struct drm_i915_private *i915,
+		     const char *name,
+		     struct i915_vma *global_hwsp);
 
 static inline struct i915_timeline *
 i915_timeline_get(struct i915_timeline *timeline)
@@ -135,6 +145,9 @@ static inline bool i915_timeline_sync_is_later(struct i915_timeline *tl,
 	return __i915_timeline_sync_is_later(tl, fence->context, fence->seqno);
 }
 
+int i915_timeline_pin(struct i915_timeline *tl);
+void i915_timeline_unpin(struct i915_timeline *tl);
+
 void i915_timelines_init(struct drm_i915_private *i915);
 void i915_timelines_park(struct drm_i915_private *i915);
 void i915_timelines_fini(struct drm_i915_private *i915);
diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c
index ffef7f43fda3..f168f2fee979 100644
--- a/drivers/gpu/drm/i915/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/intel_engine_cs.c
@@ -484,26 +484,6 @@ static void intel_engine_init_execlist(struct intel_engine_cs *engine)
 	execlists->queue = RB_ROOT_CACHED;
 }
 
-/**
- * intel_engines_setup_common - setup engine state not requiring hw access
- * @engine: Engine to setup.
- *
- * Initializes @engine@ structure members shared between legacy and execlists
- * submission modes which do not require hardware access.
- *
- * Typically done early in the submission mode specific engine setup stage.
- */
-void intel_engine_setup_common(struct intel_engine_cs *engine)
-{
-	i915_timeline_init(engine->i915, &engine->timeline, engine->name);
-	i915_timeline_set_subclass(&engine->timeline, TIMELINE_ENGINE);
-
-	intel_engine_init_execlist(engine);
-	intel_engine_init_hangcheck(engine);
-	intel_engine_init_batch_pool(engine);
-	intel_engine_init_cmd_parser(engine);
-}
-
 static void cleanup_status_page(struct intel_engine_cs *engine)
 {
 	struct i915_vma *vma;
@@ -601,6 +581,44 @@ static int init_status_page(struct intel_engine_cs *engine)
 	return ret;
 }
 
+/**
+ * intel_engines_setup_common - setup engine state not requiring hw access
+ * @engine: Engine to setup.
+ *
+ * Initializes @engine@ structure members shared between legacy and execlists
+ * submission modes which do not require hardware access.
+ *
+ * Typically done early in the submission mode specific engine setup stage.
+ */
+int intel_engine_setup_common(struct intel_engine_cs *engine)
+{
+	int err;
+
+	err = init_status_page(engine);
+	if (err)
+		return err;
+
+	err = i915_timeline_init(engine->i915,
+				 &engine->timeline,
+				 engine->name,
+				 engine->status_page.vma);
+	if (err)
+		goto err_hwsp;
+
+	i915_timeline_set_subclass(&engine->timeline, TIMELINE_ENGINE);
+
+	intel_engine_init_execlist(engine);
+	intel_engine_init_hangcheck(engine);
+	intel_engine_init_batch_pool(engine);
+	intel_engine_init_cmd_parser(engine);
+
+	return 0;
+
+err_hwsp:
+	cleanup_status_page(engine);
+	return err;
+}
+
 static void __intel_context_unpin(struct i915_gem_context *ctx,
 				  struct intel_engine_cs *engine)
 {
@@ -653,14 +671,8 @@ int intel_engine_init_common(struct intel_engine_cs *engine)
 	if (ret)
 		goto err_unpin_preempt;
 
-	ret = init_status_page(engine);
-	if (ret)
-		goto err_breadcrumbs;
-
 	return 0;
 
-err_breadcrumbs:
-	intel_engine_fini_breadcrumbs(engine);
 err_unpin_preempt:
 	if (i915->preempt_context)
 		__intel_context_unpin(i915->preempt_context, engine);
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 1df2a1868622..5927ef124bf9 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -2188,10 +2188,14 @@ logical_ring_default_irqs(struct intel_engine_cs *engine)
 	engine->irq_keep_mask = GT_CONTEXT_SWITCH_INTERRUPT << shift;
 }
 
-static void
+static int
 logical_ring_setup(struct intel_engine_cs *engine)
 {
-	intel_engine_setup_common(engine);
+	int err;
+
+	err = intel_engine_setup_common(engine);
+	if (err)
+		return err;
 
 	/* Intentionally left blank. */
 	engine->buffer = NULL;
@@ -2201,6 +2205,8 @@ logical_ring_setup(struct intel_engine_cs *engine)
 
 	logical_ring_default_vfuncs(engine);
 	logical_ring_default_irqs(engine);
+
+	return 0;
 }
 
 static int logical_ring_init(struct intel_engine_cs *engine)
@@ -2248,7 +2254,9 @@ int logical_render_ring_init(struct intel_engine_cs *engine)
 	struct drm_i915_private *dev_priv = engine->i915;
 	int ret;
 
-	logical_ring_setup(engine);
+	ret = logical_ring_setup(engine);
+	if (ret)
+		return ret;
 
 	if (HAS_L3_DPF(dev_priv))
 		engine->irq_keep_mask |= GT_RENDER_L3_PARITY_ERROR_INTERRUPT;
@@ -2282,7 +2290,11 @@ int logical_render_ring_init(struct intel_engine_cs *engine)
 
 int logical_xcs_ring_init(struct intel_engine_cs *engine)
 {
-	logical_ring_setup(engine);
+	int err;
+
+	err = logical_ring_setup(engine);
+	if (err)
+		return err;
 
 	return logical_ring_init(engine);
 }
@@ -2616,7 +2628,7 @@ static int execlists_context_deferred_alloc(struct i915_gem_context *ctx,
 		goto error_deref_obj;
 	}
 
-	timeline = i915_timeline_create(ctx->i915, ctx->name);
+	timeline = i915_timeline_create(ctx->i915, ctx->name, NULL);
 	if (IS_ERR(timeline)) {
 		ret = PTR_ERR(timeline);
 		goto error_deref_obj;
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index 8700f102f669..24bd6f5b4f57 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -1540,9 +1540,13 @@ static int intel_init_ring_buffer(struct intel_engine_cs *engine)
 	struct intel_ring *ring;
 	int err;
 
-	intel_engine_setup_common(engine);
+	err = intel_engine_setup_common(engine);
+	if (err)
+		return err;
 
-	timeline = i915_timeline_create(engine->i915, engine->name);
+	timeline = i915_timeline_create(engine->i915,
+					engine->name,
+					engine->status_page.vma);
 	if (IS_ERR(timeline)) {
 		err = PTR_ERR(timeline);
 		goto err;
@@ -1566,6 +1570,8 @@ static int intel_init_ring_buffer(struct intel_engine_cs *engine)
 	if (err)
 		goto err_unpin;
 
+	GEM_BUG_ON(ring->timeline->hwsp_ggtt != engine->status_page.vma);
+
 	return 0;
 
 err_unpin:
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index c232549c188e..e6ec96e0ab56 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -716,7 +716,9 @@ intel_write_status_page(struct intel_engine_cs *engine, int reg, u32 value)
 #define I915_GEM_HWS_INDEX_ADDR (I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT)
 #define I915_GEM_HWS_PREEMPT_INDEX	0x32
 #define I915_GEM_HWS_PREEMPT_ADDR (I915_GEM_HWS_PREEMPT_INDEX << MI_STORE_DWORD_INDEX_SHIFT)
-#define I915_GEM_HWS_SCRATCH_INDEX	0x40
+#define I915_GEM_HWS_SEQNO		0x40
+#define I915_GEM_HWS_SEQNO_ADDR (I915_GEM_HWS_SEQNO << MI_STORE_DWORD_INDEX_SHIFT)
+#define I915_GEM_HWS_SCRATCH_INDEX	0x80
 #define I915_GEM_HWS_SCRATCH_ADDR (I915_GEM_HWS_SCRATCH_INDEX << MI_STORE_DWORD_INDEX_SHIFT)
 
 #define I915_HWS_CSB_BUF0_INDEX		0x10
@@ -822,7 +824,7 @@ intel_ring_set_tail(struct intel_ring *ring, unsigned int tail)
 
 void intel_engine_write_global_seqno(struct intel_engine_cs *engine, u32 seqno);
 
-void intel_engine_setup_common(struct intel_engine_cs *engine);
+int intel_engine_setup_common(struct intel_engine_cs *engine);
 int intel_engine_init_common(struct intel_engine_cs *engine);
 void intel_engine_cleanup_common(struct intel_engine_cs *engine);
 
diff --git a/drivers/gpu/drm/i915/selftests/i915_live_selftests.h b/drivers/gpu/drm/i915/selftests/i915_live_selftests.h
index a15713cae3b3..76b4f87fc853 100644
--- a/drivers/gpu/drm/i915/selftests/i915_live_selftests.h
+++ b/drivers/gpu/drm/i915/selftests/i915_live_selftests.h
@@ -13,6 +13,7 @@ selftest(sanitycheck, i915_live_sanitycheck) /* keep first (igt selfcheck) */
 selftest(uncore, intel_uncore_live_selftests)
 selftest(workarounds, intel_workarounds_live_selftests)
 selftest(requests, i915_request_live_selftests)
+selftest(timelines, i915_timeline_live_selftests)
 selftest(objects, i915_gem_object_live_selftests)
 selftest(dmabuf, i915_gem_dmabuf_live_selftests)
 selftest(coherency, i915_gem_coherency_live_selftests)
diff --git a/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h b/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h
index 1b70208eeea7..4a83a1c6c406 100644
--- a/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h
+++ b/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h
@@ -16,7 +16,7 @@ selftest(syncmap, i915_syncmap_mock_selftests)
 selftest(uncore, intel_uncore_mock_selftests)
 selftest(engine, intel_engine_cs_mock_selftests)
 selftest(breadcrumbs, intel_breadcrumbs_mock_selftests)
-selftest(timelines, i915_gem_timeline_mock_selftests)
+selftest(timelines, i915_timeline_mock_selftests)
 selftest(requests, i915_request_mock_selftests)
 selftest(objects, i915_gem_object_mock_selftests)
 selftest(dmabuf, i915_gem_dmabuf_mock_selftests)
diff --git a/drivers/gpu/drm/i915/selftests/i915_timeline.c b/drivers/gpu/drm/i915/selftests/i915_timeline.c
index 19f1c6a5c8fb..d13779808200 100644
--- a/drivers/gpu/drm/i915/selftests/i915_timeline.c
+++ b/drivers/gpu/drm/i915/selftests/i915_timeline.c
@@ -256,7 +256,7 @@ static int bench_sync(void *arg)
 	return 0;
 }
 
-int i915_gem_timeline_mock_selftests(void)
+int i915_timeline_mock_selftests(void)
 {
 	static const struct i915_subtest tests[] = {
 		SUBTEST(igt_sync),
@@ -265,3 +265,374 @@ int i915_gem_timeline_mock_selftests(void)
 
 	return i915_subtests(tests, NULL);
 }
+
+static int live_hwsp_engine(void *arg)
+{
+#define NUM_TIMELINES 4096
+	struct drm_i915_private *i915 = arg;
+	struct i915_timeline **timelines;
+	struct intel_engine_cs *engine;
+	enum intel_engine_id id;
+	intel_wakeref_t wakeref;
+	unsigned long count, n;
+	int err = 0;
+
+	/*
+	 * Create a bunch of timelines and check we can write
+	 * independently to each of their breadcrumb slots.
+	 */
+
+	timelines = kvmalloc_array(NUM_TIMELINES * I915_NUM_ENGINES,
+				   sizeof(*timelines),
+				   GFP_KERNEL);
+	if (!timelines)
+		return -ENOMEM;
+
+	mutex_lock(&i915->drm.struct_mutex);
+	wakeref = intel_runtime_pm_get(i915);
+
+	count = 0;
+	for_each_engine(engine, i915, id) {
+		if (!intel_engine_can_store_dword(engine))
+			continue;
+
+		for (n = 0; n < NUM_TIMELINES; n++) {
+			struct i915_timeline *tl;
+			struct i915_request *rq;
+			u32 addr;
+			u32 *cs;
+
+			tl = i915_timeline_create(i915, "live", NULL);
+			if (IS_ERR(tl)) {
+				err = PTR_ERR(tl);
+				goto out;
+			}
+
+			if (*tl->hwsp_seqno) {
+				pr_err("Timeline %lu created with non-zero breadcrumb, found %x\n",
+				       count, *tl->hwsp_seqno);
+				err = -EINVAL;
+				i915_timeline_put(tl);
+				goto out;
+			}
+
+			err = i915_timeline_pin(tl);
+			if (err) {
+				i915_timeline_put(tl);
+				goto out;
+			}
+
+			rq = i915_request_alloc(engine, i915->kernel_context);
+			if (IS_ERR(rq)) {
+				i915_timeline_unpin(tl);
+				i915_timeline_put(tl);
+				err = PTR_ERR(rq);
+				goto out;
+			}
+
+			cs = intel_ring_begin(rq, 4);
+			if (IS_ERR(cs)) {
+				i915_request_add(rq);
+				i915_timeline_unpin(tl);
+				i915_timeline_put(tl);
+				err = PTR_ERR(cs);
+				goto out;
+			}
+
+			addr = i915_ggtt_offset(tl->hwsp_ggtt) + tl->hwsp_offset;
+
+			if (INTEL_GEN(i915) >= 8) {
+				*cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
+				*cs++ = addr;
+				*cs++ = 0;
+				*cs++ = count;
+			} else if (INTEL_GEN(i915) >= 4) {
+				*cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
+				*cs++ = 0;
+				*cs++ = addr;
+				*cs++ = count;
+			} else {
+				*cs++ = MI_STORE_DWORD_IMM | MI_MEM_VIRTUAL;
+				*cs++ = addr;
+				*cs++ = count;
+				*cs++ = MI_NOOP;
+			}
+			intel_ring_advance(rq, cs);
+
+			i915_request_add(rq);
+			i915_timeline_unpin(tl);
+
+			timelines[count++] = tl;
+		}
+	}
+
+	err = i915_gem_wait_for_idle(i915, I915_WAIT_LOCKED, MAX_SCHEDULE_TIMEOUT);
+
+out:
+	for (n = 0; n < count; n++) {
+		struct i915_timeline *tl = timelines[n];
+
+		if (!err && *tl->hwsp_seqno != n) {
+			pr_err("Invalid seqno stored in timeline %lu, found 0x%x\n",
+			       n, *tl->hwsp_seqno);
+			err = -EINVAL;
+		}
+		i915_timeline_put(tl);
+	}
+
+	intel_runtime_pm_put(i915, wakeref);
+	mutex_unlock(&i915->drm.struct_mutex);
+
+	kvfree(timelines);
+
+	return err;
+#undef NUM_TIMELINES
+}
+
+static int live_hwsp_alternate(void *arg)
+{
+#define NUM_TIMELINES 4096
+	struct drm_i915_private *i915 = arg;
+	struct i915_timeline **timelines;
+	struct intel_engine_cs *engine;
+	enum intel_engine_id id;
+	intel_wakeref_t wakeref;
+	unsigned long count, n;
+	int err = 0;
+
+	/*
+	 * Create a bunch of timelines and check we can write
+	 * independently to each of their breadcrumb slots with adjacent
+	 * engines.
+	 */
+
+	timelines = kvmalloc_array(NUM_TIMELINES * I915_NUM_ENGINES,
+				   sizeof(*timelines),
+				   GFP_KERNEL);
+	if (!timelines)
+		return -ENOMEM;
+
+	mutex_lock(&i915->drm.struct_mutex);
+	wakeref = intel_runtime_pm_get(i915);
+
+	count = 0;
+	for (n = 0; n < NUM_TIMELINES; n++) {
+		for_each_engine(engine, i915, id) {
+			struct i915_timeline *tl;
+			struct i915_request *rq;
+			u32 addr;
+			u32 *cs;
+
+			if (!intel_engine_can_store_dword(engine))
+				continue;
+
+			tl = i915_timeline_create(i915, "live", NULL);
+			if (IS_ERR(tl)) {
+				err = PTR_ERR(tl);
+				goto out;
+			}
+
+			if (*tl->hwsp_seqno) {
+				pr_err("Timeline %lu created with non-zero breadcrumb, found %x\n",
+				       count, *tl->hwsp_seqno);
+				err = -EINVAL;
+				i915_timeline_put(tl);
+				goto out;
+			}
+
+			err = i915_timeline_pin(tl);
+			if (err) {
+				i915_timeline_put(tl);
+				goto out;
+			}
+
+			rq = i915_request_alloc(engine, i915->kernel_context);
+			if (IS_ERR(rq)) {
+				i915_timeline_unpin(tl);
+				i915_timeline_put(tl);
+				err = PTR_ERR(rq);
+				goto out;
+			}
+
+			cs = intel_ring_begin(rq, 4);
+			if (IS_ERR(cs)) {
+				i915_request_add(rq);
+				i915_timeline_unpin(tl);
+				i915_timeline_put(tl);
+				err = PTR_ERR(cs);
+				goto out;
+			}
+
+			addr = i915_ggtt_offset(tl->hwsp_ggtt) + tl->hwsp_offset;
+
+			if (INTEL_GEN(i915) >= 8) {
+				*cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
+				*cs++ = addr;
+				*cs++ = 0;
+				*cs++ = count;
+			} else if (INTEL_GEN(i915) >= 4) {
+				*cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
+				*cs++ = 0;
+				*cs++ = addr;
+				*cs++ = count;
+			} else {
+				*cs++ = MI_STORE_DWORD_IMM | MI_MEM_VIRTUAL;
+				*cs++ = addr;
+				*cs++ = count;
+				*cs++ = MI_NOOP;
+			}
+			intel_ring_advance(rq, cs);
+
+			i915_request_add(rq);
+			i915_timeline_unpin(tl);
+
+			timelines[count++] = tl;
+		}
+	}
+
+	err = i915_gem_wait_for_idle(i915, I915_WAIT_LOCKED, MAX_SCHEDULE_TIMEOUT);
+
+out:
+	for (n = 0; n < count; n++) {
+		struct i915_timeline *tl = timelines[n];
+
+		if (!err && *tl->hwsp_seqno != n) {
+			pr_err("Invalid seqno stored in timeline %lu, found 0x%x\n",
+			       n, *tl->hwsp_seqno);
+			err = -EINVAL;
+		}
+		i915_timeline_put(tl);
+	}
+
+	intel_runtime_pm_put(i915, wakeref);
+	mutex_unlock(&i915->drm.struct_mutex);
+
+	kvfree(timelines);
+
+	return err;
+#undef NUM_TIMELINES
+}
+
+static int live_hwsp_recycle(void *arg)
+{
+	struct drm_i915_private *i915 = arg;
+	struct intel_engine_cs *engine;
+	enum intel_engine_id id;
+	intel_wakeref_t wakeref;
+	unsigned long count;
+	int err = 0;
+
+	/*
+	 * Check seqno writes into one timeline at a time. We expect to
+	 * recycle the breadcrumb slot between iterations and neither
+	 * want to confuse ourselves or the GPU.
+	 */
+
+	mutex_lock(&i915->drm.struct_mutex);
+	wakeref = intel_runtime_pm_get(i915);
+
+	count = 0;
+	for_each_engine(engine, i915, id) {
+		IGT_TIMEOUT(end_time);
+
+		if (!intel_engine_can_store_dword(engine))
+			continue;
+
+		do {
+			struct i915_timeline *tl;
+			struct i915_request *rq;
+			u32 addr;
+			u32 *cs;
+
+			tl = i915_timeline_create(i915, "live", NULL);
+			if (IS_ERR(tl)) {
+				err = PTR_ERR(tl);
+				goto out;
+			}
+
+			if (*tl->hwsp_seqno) {
+				pr_err("Timeline %lu created with non-zero breadcrumb, found %x\n",
+				       count, *tl->hwsp_seqno);
+				err = -EINVAL;
+				i915_timeline_put(tl);
+				goto out;
+			}
+
+			err = i915_timeline_pin(tl);
+			if (err) {
+				i915_timeline_put(tl);
+				goto out;
+			}
+
+			rq = i915_request_alloc(engine, i915->kernel_context);
+			if (IS_ERR(rq)) {
+				i915_timeline_unpin(tl);
+				i915_timeline_put(tl);
+				err = PTR_ERR(rq);
+				goto out;
+			}
+
+			cs = intel_ring_begin(rq, 4);
+			if (IS_ERR(cs)) {
+				i915_request_add(rq);
+				i915_timeline_unpin(tl);
+				i915_timeline_put(tl);
+				err = PTR_ERR(cs);
+				goto out;
+			}
+
+			addr = i915_ggtt_offset(tl->hwsp_ggtt) + tl->hwsp_offset;
+
+			if (INTEL_GEN(i915) >= 8) {
+				*cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
+				*cs++ = addr;
+				*cs++ = 0;
+				*cs++ = count;
+			} else if (INTEL_GEN(i915) >= 4) {
+				*cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
+				*cs++ = 0;
+				*cs++ = addr;
+				*cs++ = count;
+			} else {
+				*cs++ = MI_STORE_DWORD_IMM | MI_MEM_VIRTUAL;
+				*cs++ = addr;
+				*cs++ = count;
+				*cs++ = MI_NOOP;
+			}
+			intel_ring_advance(rq, cs);
+
+			i915_request_add(rq);
+			i915_timeline_unpin(tl);
+
+			i915_request_wait(rq, I915_WAIT_LOCKED, MAX_SCHEDULE_TIMEOUT);
+			if (*tl->hwsp_seqno != count) {
+				pr_err("Invalid seqno stored in timeline %lu, found 0x%x\n",
+				       count, *tl->hwsp_seqno);
+				err = -EINVAL;
+			}
+
+			i915_timeline_put(tl);
+			count++;
+
+			if (err)
+				goto out;
+		} while (!__igt_timeout(end_time, NULL));
+	}
+
+out:
+	intel_runtime_pm_put(i915, wakeref);
+	mutex_unlock(&i915->drm.struct_mutex);
+
+	return err;
+}
+
+int i915_timeline_live_selftests(struct drm_i915_private *i915)
+{
+	static const struct i915_subtest tests[] = {
+		SUBTEST(live_hwsp_recycle),
+		SUBTEST(live_hwsp_engine),
+		SUBTEST(live_hwsp_alternate),
+	};
+
+	return i915_subtests(tests, i915);
+}
diff --git a/drivers/gpu/drm/i915/selftests/mock_engine.c b/drivers/gpu/drm/i915/selftests/mock_engine.c
index 968a7e139a67..acd27c7e807b 100644
--- a/drivers/gpu/drm/i915/selftests/mock_engine.c
+++ b/drivers/gpu/drm/i915/selftests/mock_engine.c
@@ -34,12 +34,20 @@ static struct intel_ring *mock_ring(struct intel_engine_cs *engine)
 {
 	const unsigned long sz = PAGE_SIZE / 2;
 	struct mock_ring *ring;
+	int err;
 
 	ring = kzalloc(sizeof(*ring) + sz, GFP_KERNEL);
 	if (!ring)
 		return NULL;
 
-	i915_timeline_init(engine->i915, &ring->timeline, engine->name);
+	err = i915_timeline_init(engine->i915,
+				 &ring->timeline,
+				 engine->name,
+				 NULL);
+	if (err) {
+		kfree(ring);
+		return NULL;
+	}
 
 	ring->base.size = sz;
 	ring->base.effective_size = sz;
@@ -209,7 +217,11 @@ struct intel_engine_cs *mock_engine(struct drm_i915_private *i915,
 	engine->base.emit_breadcrumb = mock_emit_breadcrumb;
 	engine->base.submit_request = mock_submit_request;
 
-	i915_timeline_init(i915, &engine->base.timeline, engine->base.name);
+	if (i915_timeline_init(i915,
+			       &engine->base.timeline,
+			       engine->base.name,
+			       NULL))
+		goto err_free;
 	i915_timeline_set_subclass(&engine->base.timeline, TIMELINE_ENGINE);
 
 	intel_engine_init_breadcrumbs(&engine->base);
@@ -227,6 +239,7 @@ struct intel_engine_cs *mock_engine(struct drm_i915_private *i915,
 err_breadcrumbs:
 	intel_engine_fini_breadcrumbs(&engine->base);
 	i915_timeline_fini(&engine->base.timeline);
+err_free:
 	kfree(engine);
 	return NULL;
 }
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 111+ messages in thread

* [PATCH 44/46] drm/i915: Track the context's seqno in its own timeline HWSP
  2019-01-07 11:54 [PATCH 01/46] drm/i915: Return immediately if trylock fails for direct-reclaim Chris Wilson
                   ` (41 preceding siblings ...)
  2019-01-07 11:55 ` [PATCH 43/46] drm/i915: Allocate a status page for each timeline Chris Wilson
@ 2019-01-07 11:55 ` Chris Wilson
  2019-01-07 11:55 ` [PATCH 45/46] drm/i915: Identify active requests Chris Wilson
                   ` (7 subsequent siblings)
  50 siblings, 0 replies; 111+ messages in thread
From: Chris Wilson @ 2019-01-07 11:55 UTC (permalink / raw)
  To: intel-gfx

Now that we have allocated ourselves a cacheline to store a breadcrumb,
we can emit a write from the GPU into the timeline's HWSP of the
per-context seqno as we complete each request. This drops the mirroring
of the per-engine HWSP and allows each context to operate independently.
We do not need to unwind the per-context timeline, and so requests are
always consistent with the timeline breadcrumb, greatly simplifying the
completion checks as we no longer need to be concerned about the
global_seqno changing mid check.

At this point, we are emitting both per-context and global seqno and
still using the single per-engine execution timeline for resolving
interrupts.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_gem.c              |  2 +-
 drivers/gpu/drm/i915/i915_request.c          |  2 +-
 drivers/gpu/drm/i915/i915_request.h          | 29 ++-----
 drivers/gpu/drm/i915/i915_reset.c            |  1 +
 drivers/gpu/drm/i915/i915_vma.h              |  7 ++
 drivers/gpu/drm/i915/intel_lrc.c             | 32 ++++---
 drivers/gpu/drm/i915/intel_ringbuffer.c      | 91 ++++++++++++++------
 drivers/gpu/drm/i915/selftests/mock_engine.c |  8 +-
 8 files changed, 108 insertions(+), 64 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 09c7ded8f498..16e56fed07be 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2877,7 +2877,7 @@ i915_gem_find_active_request(struct intel_engine_cs *engine)
 	 */
 	spin_lock_irqsave(&engine->timeline.lock, flags);
 	list_for_each_entry(request, &engine->timeline.requests, link) {
-		if (__i915_request_completed(request, request->global_seqno))
+		if (i915_request_completed(request))
 			continue;
 
 		active = request;
diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
index 3b69c62d040f..ec4dbc67ef9e 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -590,7 +590,7 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx)
 	rq->ring = ce->ring;
 	rq->timeline = ce->ring->timeline;
 	GEM_BUG_ON(rq->timeline == &engine->timeline);
-	rq->hwsp_seqno = &engine->status_page.addr[I915_GEM_HWS_INDEX];
+	rq->hwsp_seqno = rq->timeline->hwsp_seqno;
 
 	spin_lock_init(&rq->lock);
 	dma_fence_init(&rq->fence,
diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h
index e2b209a26a8e..344e52b53ccd 100644
--- a/drivers/gpu/drm/i915/i915_request.h
+++ b/drivers/gpu/drm/i915/i915_request.h
@@ -295,6 +295,11 @@ static inline u32 i915_request_hwsp(const struct i915_request *rq)
 	return READ_ONCE(*rq->hwsp_seqno);
 }
 
+static inline void i915_request_fake_complete(const struct i915_request *rq)
+{
+	*(u32 *)rq->hwsp_seqno = rq->fence.seqno;
+}
+
 /**
  * i915_request_started - check if the request has begun being executed
  * @rq: the request
@@ -306,32 +311,12 @@ static inline u32 i915_request_hwsp(const struct i915_request *rq)
  */
 static inline bool i915_request_started(const struct i915_request *rq)
 {
-	u32 seqno;
-
-	seqno = i915_request_global_seqno(rq);
-	if (!seqno) /* not yet submitted to HW */
-		return false;
-
-	return i915_seqno_passed(i915_request_hwsp(rq), seqno - 1);
-}
-
-static inline bool
-__i915_request_completed(const struct i915_request *rq, u32 seqno)
-{
-	GEM_BUG_ON(!seqno);
-	return i915_seqno_passed(i915_request_hwsp(rq), seqno) &&
-		seqno == i915_request_global_seqno(rq);
+	return i915_seqno_passed(i915_request_hwsp(rq), rq->fence.seqno - 1);
 }
 
 static inline bool i915_request_completed(const struct i915_request *rq)
 {
-	u32 seqno;
-
-	seqno = i915_request_global_seqno(rq);
-	if (!seqno)
-		return false;
-
-	return __i915_request_completed(rq, seqno);
+	return i915_seqno_passed(i915_request_hwsp(rq), rq->fence.seqno);
 }
 
 void i915_retire_requests(struct drm_i915_private *i915);
diff --git a/drivers/gpu/drm/i915/i915_reset.c b/drivers/gpu/drm/i915/i915_reset.c
index 3e0833221c3a..89d4540ad5e6 100644
--- a/drivers/gpu/drm/i915/i915_reset.c
+++ b/drivers/gpu/drm/i915/i915_reset.c
@@ -756,6 +756,7 @@ static void nop_submit_request(struct i915_request *request)
 
 	spin_lock_irqsave(&request->engine->timeline.lock, flags);
 	__i915_request_submit(request);
+	i915_request_fake_complete(request);
 	intel_engine_write_global_seqno(request->engine, request->global_seqno);
 	spin_unlock_irqrestore(&request->engine->timeline.lock, flags);
 }
diff --git a/drivers/gpu/drm/i915/i915_vma.h b/drivers/gpu/drm/i915/i915_vma.h
index 266b226ebef2..ea9b85576e2d 100644
--- a/drivers/gpu/drm/i915/i915_vma.h
+++ b/drivers/gpu/drm/i915/i915_vma.h
@@ -198,6 +198,13 @@ static inline u32 i915_ggtt_offset(const struct i915_vma *vma)
 	return lower_32_bits(vma->node.start);
 }
 
+/* XXX inline spaghetti */
+static inline u32 i915_timeline_seqno_address(const struct i915_timeline *tl)
+{
+	GEM_BUG_ON(!tl->pin_count);
+	return i915_ggtt_offset(tl->hwsp_ggtt) + tl->hwsp_offset;
+}
+
 static inline u32 i915_ggtt_pin_bias(struct i915_vma *vma)
 {
 	return i915_vm_to_ggtt(vma->vm)->pin_bias;
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 5927ef124bf9..3757a4d3a205 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -828,10 +828,10 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine)
 	list_for_each_entry(rq, &engine->timeline.requests, link) {
 		GEM_BUG_ON(!rq->global_seqno);
 
-		if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &rq->fence.flags))
-			continue;
+		if (!test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &rq->fence.flags))
+			dma_fence_set_error(&rq->fence, -EIO);
 
-		dma_fence_set_error(&rq->fence, -EIO);
+		i915_request_fake_complete(rq);
 	}
 
 	/* Flush the queued requests to the timeline list (for retiring). */
@@ -844,6 +844,7 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine)
 
 			dma_fence_set_error(&rq->fence, -EIO);
 			__i915_request_submit(rq);
+			i915_request_fake_complete(rq);
 		}
 
 		rb_erase_cached(&p->node, &execlists->queue);
@@ -2023,31 +2024,40 @@ static void gen8_emit_breadcrumb(struct i915_request *request, u32 *cs)
 	/* w/a: bit 5 needs to be zero for MI_FLUSH_DW address. */
 	BUILD_BUG_ON(I915_GEM_HWS_INDEX_ADDR & (1 << 5));
 
-	cs = gen8_emit_ggtt_write(cs, request->global_seqno,
+	cs = gen8_emit_ggtt_write(cs,
+				  request->fence.seqno,
+				  i915_timeline_seqno_address(request->timeline));
+
+	cs = gen8_emit_ggtt_write(cs,
+				  request->global_seqno,
 				  intel_hws_seqno_address(request->engine));
+
 	*cs++ = MI_USER_INTERRUPT;
 	*cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
+
 	request->tail = intel_ring_offset(request, cs);
 	assert_ring_tail_valid(request->ring, request->tail);
 
 	gen8_emit_wa_tail(request, cs);
 }
-static const int gen8_emit_breadcrumb_sz = 6 + WA_TAIL_DWORDS;
+static const int gen8_emit_breadcrumb_sz = 10 + WA_TAIL_DWORDS;
 
 static void gen8_emit_breadcrumb_rcs(struct i915_request *request, u32 *cs)
 {
-	/* We're using qword write, seqno should be aligned to 8 bytes. */
-	BUILD_BUG_ON(I915_GEM_HWS_INDEX & 1);
-
 	cs = gen8_emit_ggtt_write_rcs(cs,
-				      request->global_seqno,
-				      intel_hws_seqno_address(request->engine),
+				      request->fence.seqno,
+				      i915_timeline_seqno_address(request->timeline),
 				      PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH |
 				      PIPE_CONTROL_DEPTH_CACHE_FLUSH |
 				      PIPE_CONTROL_DC_FLUSH_ENABLE |
 				      PIPE_CONTROL_FLUSH_ENABLE |
 				      PIPE_CONTROL_CS_STALL);
 
+	cs = gen8_emit_ggtt_write_rcs(cs,
+				      request->global_seqno,
+				      intel_hws_seqno_address(request->engine),
+				      PIPE_CONTROL_CS_STALL);
+
 	*cs++ = MI_USER_INTERRUPT;
 	*cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
 
@@ -2056,7 +2066,7 @@ static void gen8_emit_breadcrumb_rcs(struct i915_request *request, u32 *cs)
 
 	gen8_emit_wa_tail(request, cs);
 }
-static const int gen8_emit_breadcrumb_rcs_sz = 8 + WA_TAIL_DWORDS;
+static const int gen8_emit_breadcrumb_rcs_sz = 14 + WA_TAIL_DWORDS;
 
 static int gen8_init_rcs_context(struct i915_request *rq)
 {
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index 24bd6f5b4f57..99d821d6ad3b 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -326,6 +326,12 @@ static void gen6_rcs_emit_breadcrumb(struct i915_request *rq, u32 *cs)
 		 PIPE_CONTROL_DC_FLUSH_ENABLE |
 		 PIPE_CONTROL_QW_WRITE |
 		 PIPE_CONTROL_CS_STALL);
+	*cs++ = i915_timeline_seqno_address(rq->timeline) |
+		PIPE_CONTROL_GLOBAL_GTT;
+	*cs++ = rq->fence.seqno;
+
+	*cs++ = GFX_OP_PIPE_CONTROL(4);
+	*cs++ = PIPE_CONTROL_QW_WRITE | PIPE_CONTROL_CS_STALL;
 	*cs++ = intel_hws_seqno_address(rq->engine) | PIPE_CONTROL_GLOBAL_GTT;
 	*cs++ = rq->global_seqno;
 
@@ -335,7 +341,7 @@ static void gen6_rcs_emit_breadcrumb(struct i915_request *rq, u32 *cs)
 	rq->tail = intel_ring_offset(rq, cs);
 	assert_ring_tail_valid(rq->ring, rq->tail);
 }
-static const int gen6_rcs_emit_breadcrumb_sz = 14;
+static const int gen6_rcs_emit_breadcrumb_sz = 18;
 
 static int
 gen7_render_ring_cs_stall_wa(struct i915_request *rq)
@@ -426,6 +432,13 @@ static void gen7_rcs_emit_breadcrumb(struct i915_request *rq, u32 *cs)
 		 PIPE_CONTROL_QW_WRITE |
 		 PIPE_CONTROL_GLOBAL_GTT_IVB |
 		 PIPE_CONTROL_CS_STALL);
+	*cs++ = i915_timeline_seqno_address(rq->timeline);
+	*cs++ = rq->fence.seqno;
+
+	*cs++ = GFX_OP_PIPE_CONTROL(4);
+	*cs++ = (PIPE_CONTROL_QW_WRITE |
+		 PIPE_CONTROL_GLOBAL_GTT_IVB |
+		 PIPE_CONTROL_CS_STALL);
 	*cs++ = intel_hws_seqno_address(rq->engine);
 	*cs++ = rq->global_seqno;
 
@@ -435,27 +448,37 @@ static void gen7_rcs_emit_breadcrumb(struct i915_request *rq, u32 *cs)
 	rq->tail = intel_ring_offset(rq, cs);
 	assert_ring_tail_valid(rq->ring, rq->tail);
 }
-static const int gen7_rcs_emit_breadcrumb_sz = 6;
+static const int gen7_rcs_emit_breadcrumb_sz = 10;
 
 static void gen6_xcs_emit_breadcrumb(struct i915_request *rq, u32 *cs)
 {
-	*cs++ = MI_FLUSH_DW | MI_FLUSH_DW_OP_STOREDW;
-	*cs++ = intel_hws_seqno_address(rq->engine) | MI_FLUSH_DW_USE_GTT;
+	*cs++ = MI_FLUSH_DW | MI_FLUSH_DW_OP_STOREDW | MI_FLUSH_DW_STORE_INDEX;
+	*cs++ = I915_GEM_HWS_SEQNO_ADDR | MI_FLUSH_DW_USE_GTT;
+	*cs++ = rq->fence.seqno;
+
+	*cs++ = MI_FLUSH_DW | MI_FLUSH_DW_OP_STOREDW | MI_FLUSH_DW_STORE_INDEX;
+	*cs++ = I915_GEM_HWS_INDEX_ADDR | MI_FLUSH_DW_USE_GTT;
 	*cs++ = rq->global_seqno;
+
 	*cs++ = MI_USER_INTERRUPT;
+	*cs++ = MI_NOOP;
 
 	rq->tail = intel_ring_offset(rq, cs);
 	assert_ring_tail_valid(rq->ring, rq->tail);
 }
-static const int gen6_xcs_emit_breadcrumb_sz = 4;
+static const int gen6_xcs_emit_breadcrumb_sz = 8;
 
 #define GEN7_XCS_WA 32
 static void gen7_xcs_emit_breadcrumb(struct i915_request *rq, u32 *cs)
 {
 	int i;
 
-	*cs++ = MI_FLUSH_DW | MI_FLUSH_DW_OP_STOREDW;
-	*cs++ = intel_hws_seqno_address(rq->engine) | MI_FLUSH_DW_USE_GTT;
+	*cs++ = MI_FLUSH_DW | MI_FLUSH_DW_OP_STOREDW | MI_FLUSH_DW_STORE_INDEX;
+	*cs++ = I915_GEM_HWS_SEQNO_ADDR | MI_FLUSH_DW_USE_GTT;
+	*cs++ = rq->fence.seqno;
+
+	*cs++ = MI_FLUSH_DW | MI_FLUSH_DW_OP_STOREDW | MI_FLUSH_DW_STORE_INDEX;
+	*cs++ = I915_GEM_HWS_INDEX_ADDR | MI_FLUSH_DW_USE_GTT;
 	*cs++ = rq->global_seqno;
 
 	for (i = 0; i < GEN7_XCS_WA; i++) {
@@ -469,12 +492,11 @@ static void gen7_xcs_emit_breadcrumb(struct i915_request *rq, u32 *cs)
 	*cs++ = 0;
 
 	*cs++ = MI_USER_INTERRUPT;
-	*cs++ = MI_NOOP;
 
 	rq->tail = intel_ring_offset(rq, cs);
 	assert_ring_tail_valid(rq->ring, rq->tail);
 }
-static const int gen7_xcs_emit_breadcrumb_sz = 8 + GEN7_XCS_WA * 3;
+static const int gen7_xcs_emit_breadcrumb_sz = 10 + GEN7_XCS_WA * 3;
 #undef GEN7_XCS_WA
 
 static void set_hwstam(struct intel_engine_cs *engine, u32 mask)
@@ -734,7 +756,7 @@ static void reset_ring(struct intel_engine_cs *engine, bool stalled)
 	rq = NULL;
 	spin_lock_irqsave(&tl->lock, flags);
 	list_for_each_entry(pos, &tl->requests, link) {
-		if (!__i915_request_completed(pos, pos->global_seqno)) {
+		if (!i915_request_completed(pos)) {
 			rq = pos;
 			break;
 		}
@@ -876,11 +898,10 @@ static void cancel_requests(struct intel_engine_cs *engine)
 	list_for_each_entry(request, &engine->timeline.requests, link) {
 		GEM_BUG_ON(!request->global_seqno);
 
-		if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT,
-			     &request->fence.flags))
-			continue;
-
-		dma_fence_set_error(&request->fence, -EIO);
+		if (!test_bit(DMA_FENCE_FLAG_SIGNALED_BIT,
+			      &request->fence.flags))
+			dma_fence_set_error(&request->fence, -EIO);
+		i915_request_fake_complete(request);
 	}
 
 	intel_write_status_page(engine,
@@ -904,27 +925,38 @@ static void i9xx_submit_request(struct i915_request *request)
 
 static void i9xx_emit_breadcrumb(struct i915_request *rq, u32 *cs)
 {
+	GEM_BUG_ON(rq->timeline->hwsp_ggtt != rq->engine->status_page.vma);
+
 	*cs++ = MI_FLUSH;
 
+	*cs++ = MI_STORE_DWORD_INDEX;
+	*cs++ = I915_GEM_HWS_SEQNO_ADDR;
+	*cs++ = rq->fence.seqno;
+
 	*cs++ = MI_STORE_DWORD_INDEX;
 	*cs++ = I915_GEM_HWS_INDEX_ADDR;
 	*cs++ = rq->global_seqno;
 
 	*cs++ = MI_USER_INTERRUPT;
-	*cs++ = MI_NOOP;
 
 	rq->tail = intel_ring_offset(rq, cs);
 	assert_ring_tail_valid(rq->ring, rq->tail);
 }
-static const int i9xx_emit_breadcrumb_sz = 6;
+static const int i9xx_emit_breadcrumb_sz = 8;
 
 #define GEN5_WA_STORES 8 /* must be at least 1! */
 static void gen5_emit_breadcrumb(struct i915_request *rq, u32 *cs)
 {
 	int i;
 
+	GEM_BUG_ON(rq->timeline->hwsp_ggtt != rq->engine->status_page.vma);
+
 	*cs++ = MI_FLUSH;
 
+	*cs++ = MI_STORE_DWORD_INDEX;
+	*cs++ = I915_GEM_HWS_SEQNO_ADDR;
+	*cs++ = rq->fence.seqno;
+
 	BUILD_BUG_ON(GEN5_WA_STORES < 1);
 	for (i = 0; i < GEN5_WA_STORES; i++) {
 		*cs++ = MI_STORE_DWORD_INDEX;
@@ -933,11 +965,12 @@ static void gen5_emit_breadcrumb(struct i915_request *rq, u32 *cs)
 	}
 
 	*cs++ = MI_USER_INTERRUPT;
+	*cs++ = MI_NOOP;
 
 	rq->tail = intel_ring_offset(rq, cs);
 	assert_ring_tail_valid(rq->ring, rq->tail);
 }
-static const int gen5_emit_breadcrumb_sz = GEN5_WA_STORES * 3 + 2;
+static const int gen5_emit_breadcrumb_sz = GEN5_WA_STORES * 3 + 6;
 #undef GEN5_WA_STORES
 
 static void
@@ -1164,6 +1197,10 @@ int intel_ring_pin(struct intel_ring *ring)
 
 	GEM_BUG_ON(ring->vaddr);
 
+	ret = i915_timeline_pin(ring->timeline);
+	if (ret)
+		return ret;
+
 	flags = PIN_GLOBAL;
 
 	/* Ring wraparound at offset 0 sometimes hangs. No idea why. */
@@ -1180,28 +1217,32 @@ int intel_ring_pin(struct intel_ring *ring)
 		else
 			ret = i915_gem_object_set_to_cpu_domain(vma->obj, true);
 		if (unlikely(ret))
-			return ret;
+			goto unpin_timeline;
 	}
 
 	ret = i915_vma_pin(vma, 0, 0, flags);
 	if (unlikely(ret))
-		return ret;
+		goto unpin_timeline;
 
 	if (i915_vma_is_map_and_fenceable(vma))
 		addr = (void __force *)i915_vma_pin_iomap(vma);
 	else
 		addr = i915_gem_object_pin_map(vma->obj, map);
-	if (IS_ERR(addr))
-		goto err;
+	if (IS_ERR(addr)) {
+		ret = PTR_ERR(addr);
+		goto unpin_ring;
+	}
 
 	vma->obj->pin_global++;
 
 	ring->vaddr = addr;
 	return 0;
 
-err:
+unpin_ring:
 	i915_vma_unpin(vma);
-	return PTR_ERR(addr);
+unpin_timeline:
+	i915_timeline_unpin(ring->timeline);
+	return ret;
 }
 
 void intel_ring_reset(struct intel_ring *ring, u32 tail)
@@ -1230,6 +1271,8 @@ void intel_ring_unpin(struct intel_ring *ring)
 
 	ring->vma->obj->pin_global--;
 	i915_vma_unpin(ring->vma);
+
+	i915_timeline_unpin(ring->timeline);
 }
 
 static struct i915_vma *
diff --git a/drivers/gpu/drm/i915/selftests/mock_engine.c b/drivers/gpu/drm/i915/selftests/mock_engine.c
index acd27c7e807b..b4b61056b227 100644
--- a/drivers/gpu/drm/i915/selftests/mock_engine.c
+++ b/drivers/gpu/drm/i915/selftests/mock_engine.c
@@ -79,6 +79,7 @@ static void advance(struct mock_engine *engine,
 		    struct mock_request *request)
 {
 	list_del_init(&request->link);
+	i915_request_fake_complete(&request->base);
 	mock_seqno_advance(&engine->base, request->base.global_seqno);
 }
 
@@ -253,16 +254,13 @@ void mock_engine_flush(struct intel_engine_cs *engine)
 	del_timer_sync(&mock->hw_delay);
 
 	spin_lock_irq(&mock->hw_lock);
-	list_for_each_entry_safe(request, rn, &mock->hw_queue, link) {
-		list_del_init(&request->link);
-		mock_seqno_advance(&mock->base, request->base.global_seqno);
-	}
+	list_for_each_entry_safe(request, rn, &mock->hw_queue, link)
+		advance(mock, request);
 	spin_unlock_irq(&mock->hw_lock);
 }
 
 void mock_engine_reset(struct intel_engine_cs *engine)
 {
-	intel_write_status_page(engine, I915_GEM_HWS_INDEX, 0);
 }
 
 void mock_engine_free(struct intel_engine_cs *engine)
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 111+ messages in thread

* [PATCH 45/46] drm/i915: Identify active requests
  2019-01-07 11:54 [PATCH 01/46] drm/i915: Return immediately if trylock fails for direct-reclaim Chris Wilson
                   ` (42 preceding siblings ...)
  2019-01-07 11:55 ` [PATCH 44/46] drm/i915: Track the context's seqno in its own timeline HWSP Chris Wilson
@ 2019-01-07 11:55 ` Chris Wilson
  2019-01-07 11:55 ` [PATCH 46/46] drm/i915: Replace global breadcrumbs with per-context interrupt tracking Chris Wilson
                   ` (6 subsequent siblings)
  50 siblings, 0 replies; 111+ messages in thread
From: Chris Wilson @ 2019-01-07 11:55 UTC (permalink / raw)
  To: intel-gfx

To allow requests to forgo a common execution timeline, one question we
need to be able to answer is "is this request running?". To track
whether a request has started on HW, we can emit a breadcrumb at the
beginning of the request and check its timeline's HWSP to see if the
breadcrumb has advanced past the start of this request. (This is in
contrast to the global timeline where we need only ask if we are on the
global timeline and if the timeline has advanced past the end of the
previous request.)

There is still confusion from a preempted request, which has already
started but relinquished the HW to a high priority request. For the
common case, this discrepancy should be negligible. However, for
identification of hung requests, knowing which one was running at the
time of the hang will be much more important.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_request.c     |  1 +
 drivers/gpu/drm/i915/i915_request.h     |  1 +
 drivers/gpu/drm/i915/i915_timeline.c    |  1 +
 drivers/gpu/drm/i915/i915_timeline.h    |  2 ++
 drivers/gpu/drm/i915/intel_engine_cs.c  |  4 +++-
 drivers/gpu/drm/i915/intel_lrc.c        | 23 +++++++++++++++++++++++
 drivers/gpu/drm/i915/intel_ringbuffer.c |  2 ++
 7 files changed, 33 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
index ec4dbc67ef9e..8e61a118ad2c 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -332,6 +332,7 @@ void i915_request_retire_upto(struct i915_request *rq)
 
 static u32 timeline_get_seqno(struct i915_timeline *tl)
 {
+	tl->seqno += tl->has_initial_breadcrumb;
 	return ++tl->seqno;
 }
 
diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h
index 344e52b53ccd..084d174ba83b 100644
--- a/drivers/gpu/drm/i915/i915_request.h
+++ b/drivers/gpu/drm/i915/i915_request.h
@@ -311,6 +311,7 @@ static inline void i915_request_fake_complete(const struct i915_request *rq)
  */
 static inline bool i915_request_started(const struct i915_request *rq)
 {
+	/* Remember: started but may have since been preempted! */
 	return i915_seqno_passed(i915_request_hwsp(rq), rq->fence.seqno - 1);
 }
 
diff --git a/drivers/gpu/drm/i915/i915_timeline.c b/drivers/gpu/drm/i915/i915_timeline.c
index 73ad951c74d1..601479eb7944 100644
--- a/drivers/gpu/drm/i915/i915_timeline.c
+++ b/drivers/gpu/drm/i915/i915_timeline.c
@@ -89,6 +89,7 @@ int i915_timeline_init(struct drm_i915_private *i915,
 	timeline->i915 = i915;
 	timeline->name = name;
 	timeline->pin_count = 0;
+	timeline->has_initial_breadcrumb = !global_hwsp;
 
 	if (global_hwsp) {
 		timeline->hwsp_ggtt = i915_vma_get(global_hwsp);
diff --git a/drivers/gpu/drm/i915/i915_timeline.h b/drivers/gpu/drm/i915/i915_timeline.h
index 0c3739d53d79..421eb34568de 100644
--- a/drivers/gpu/drm/i915/i915_timeline.h
+++ b/drivers/gpu/drm/i915/i915_timeline.h
@@ -47,6 +47,8 @@ struct i915_timeline {
 	struct i915_vma *hwsp_ggtt;
 	u32 hwsp_offset;
 
+	bool has_initial_breadcrumb;
+
 	/**
 	 * List of breadcrumbs associated with GPU requests currently
 	 * outstanding.
diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c
index f168f2fee979..e49925af6da8 100644
--- a/drivers/gpu/drm/i915/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/intel_engine_cs.c
@@ -1239,7 +1239,9 @@ static void print_request(struct drm_printer *m,
 	drm_printf(m, "%s%x%s [%llx:%llx]%s @ %dms: %s\n",
 		   prefix,
 		   rq->global_seqno,
-		   i915_request_completed(rq) ? "!" : "",
+		   i915_request_completed(rq) ? "!" :
+		   i915_request_started(rq) ? "*" :
+		   "",
 		   rq->fence.context, rq->fence.seqno,
 		   buf,
 		   jiffies_to_msecs(jiffies - rq->emitted_jiffies),
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 3757a4d3a205..0c0a030b2bb3 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -1264,6 +1264,25 @@ execlists_context_pin(struct intel_engine_cs *engine,
 	return __execlists_context_pin(engine, ctx, ce);
 }
 
+static int emit_initial_breadcrumb(struct i915_request *rq)
+{
+	u32 *cs;
+
+	GEM_BUG_ON(!rq->timeline->has_initial_breadcrumb);
+
+	cs = intel_ring_begin(rq, 4);
+	if (IS_ERR(cs))
+		return PTR_ERR(cs);
+
+	*cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
+	*cs++ = i915_timeline_seqno_address(rq->timeline);
+	*cs++ = 0;
+	*cs++ = rq->fence.seqno - 1;
+
+	intel_ring_advance(rq, cs);
+	return 0;
+}
+
 static int emit_pdps(struct i915_request *rq)
 {
 	const struct intel_engine_cs * const engine = rq->engine;
@@ -1338,6 +1357,10 @@ static int execlists_request_alloc(struct i915_request *request)
 	 * to cancel/unwind this request now.
 	 */
 
+	ret = emit_initial_breadcrumb(request);
+	if (ret)
+		return ret;
+
 	/* Unconditionally invalidate GPU caches and TLBs. */
 	if (i915_vm_is_48bit(&request->gem_context->ppgtt->vm))
 		ret = request->engine->emit_flush(request, EMIT_INVALIDATE);
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index 99d821d6ad3b..7148d1480f5d 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -1594,6 +1594,7 @@ static int intel_init_ring_buffer(struct intel_engine_cs *engine)
 		err = PTR_ERR(timeline);
 		goto err;
 	}
+	GEM_BUG_ON(timeline->has_initial_breadcrumb);
 
 	ring = intel_engine_create_ring(engine, timeline, 32 * PAGE_SIZE);
 	i915_timeline_put(timeline);
@@ -1947,6 +1948,7 @@ static int ring_request_alloc(struct i915_request *request)
 	int ret;
 
 	GEM_BUG_ON(!request->hw_context->pin_count);
+	GEM_BUG_ON(request->timeline->has_initial_breadcrumb);
 
 	/*
 	 * Flush enough space to reduce the likelihood of waiting after
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 111+ messages in thread

* [PATCH 46/46] drm/i915: Replace global breadcrumbs with per-context interrupt tracking
  2019-01-07 11:54 [PATCH 01/46] drm/i915: Return immediately if trylock fails for direct-reclaim Chris Wilson
                   ` (43 preceding siblings ...)
  2019-01-07 11:55 ` [PATCH 45/46] drm/i915: Identify active requests Chris Wilson
@ 2019-01-07 11:55 ` Chris Wilson
  2019-01-07 12:45 ` ✗ Fi.CI.CHECKPATCH: warning for series starting with [01/46] drm/i915: Return immediately if trylock fails for direct-reclaim Patchwork
                   ` (5 subsequent siblings)
  50 siblings, 0 replies; 111+ messages in thread
From: Chris Wilson @ 2019-01-07 11:55 UTC (permalink / raw)
  To: intel-gfx

A few years ago, see commit 688e6c725816 ("drm/i915: Slaughter the
thundering i915_wait_request herd"), the issue of handling multiple
clients waiting in parallel was brought to our attention. The
requirement was that every client should be woken immediately upon its
request being signaled, without incurring any cpu overhead.

To handle certain fragility of our hw meant that we could not do a
simple check inside the irq handler (some generations required almost
unbounded delays before we could be sure of seqno coherency) and so
request completion checking required delegation.

Before commit 688e6c725816, the solution was simple. Every client waking
on a request would be woken on every interrupt and each would do a
heavyweight check to see if their request was complete. Commit
688e6c725816 introduced an rbtree so that only the earliest waiter on
the global timeline would woken, and would wake the next and so on.
(Along with various complications to handle requests being reordered
along the global timeline, and also a requirement for kthread to provide
a delegate for fence signaling that had no process context.)

The global rbtree depends on knowing the execution timeline (and global
seqno). Without knowing that order, we must instead check all contexts
queued to the HW to see which may have advanced. We trim that list by
only checking queued contexts that are being waited on, but still we
keep a list of all active contexts and their active signalers that we
inspect from inside the irq handler. By moving the waiters onto the fence
signal list, we can combine the client wakeup with the dma_fence
signaling (a dramatic reduction in complexity, but does require the HW
being coherent, the seqno must be visible from the cpu before the
interrupt is raised - we keep a timer backup just in case).

Having previously fixed all the issues with irq-seqno serialisation (by
inserting delays onto the GPU after each request instead of random delays
on the CPU after each interrupt), we can rely on the seqno state to
perfom direct wakeups from the interrupt handler. This allows us to
preserve our single context switch behaviour of the current routine,
with the only downside that we lose the RT priority sorting of wakeups.
In general, direct wakeup latency of multiple clients is about the same
(about 10% better in most cases) with a reduction in total CPU time spent
in the waiter (about 20-50% depending on gen). Average herd behaviour is
improved, but at the cost of not delegating wakeups on task_prio.

References: 688e6c725816 ("drm/i915: Slaughter the thundering i915_wait_request herd")
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_debugfs.c           |  28 +-
 drivers/gpu/drm/i915/i915_gem_context.c       |   2 +
 drivers/gpu/drm/i915/i915_gem_context.h       |   2 +
 drivers/gpu/drm/i915/i915_gpu_error.c         |  73 --
 drivers/gpu/drm/i915/i915_gpu_error.h         |   8 -
 drivers/gpu/drm/i915/i915_irq.c               |  88 +-
 drivers/gpu/drm/i915/i915_request.c           | 128 +--
 drivers/gpu/drm/i915/i915_request.h           |  22 +-
 drivers/gpu/drm/i915/i915_reset.c             |  13 +-
 drivers/gpu/drm/i915/intel_breadcrumbs.c      | 806 +++++-------------
 drivers/gpu/drm/i915/intel_engine_cs.c        |  34 +-
 drivers/gpu/drm/i915/intel_ringbuffer.c       |   6 +-
 drivers/gpu/drm/i915/intel_ringbuffer.h       |  95 +--
 .../drm/i915/selftests/i915_mock_selftests.h  |   1 -
 drivers/gpu/drm/i915/selftests/i915_request.c | 398 +++++++++
 drivers/gpu/drm/i915/selftests/igt_spinner.c  |   5 -
 .../drm/i915/selftests/intel_breadcrumbs.c    | 470 ----------
 .../gpu/drm/i915/selftests/intel_hangcheck.c  |   2 +-
 drivers/gpu/drm/i915/selftests/lib_sw_fence.c |  54 ++
 drivers/gpu/drm/i915/selftests/lib_sw_fence.h |   3 +
 drivers/gpu/drm/i915/selftests/mock_context.c |   2 +
 drivers/gpu/drm/i915/selftests/mock_engine.c  |  26 +-
 drivers/gpu/drm/i915/selftests/mock_engine.h  |   6 -
 23 files changed, 779 insertions(+), 1493 deletions(-)
 delete mode 100644 drivers/gpu/drm/i915/selftests/intel_breadcrumbs.c

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index 42590a0a634f..c0414af0731b 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -1315,29 +1315,16 @@ static int i915_hangcheck_info(struct seq_file *m, void *unused)
 	seq_printf(m, "GT active? %s\n", yesno(dev_priv->gt.awake));
 
 	for_each_engine(engine, dev_priv, id) {
-		struct intel_breadcrumbs *b = &engine->breadcrumbs;
-		struct rb_node *rb;
-
 		seq_printf(m, "%s:\n", engine->name);
 		seq_printf(m, "\tseqno = %x [current %x, last %x]\n",
 			   engine->hangcheck.seqno, seqno[id],
 			   intel_engine_last_submit(engine));
-		seq_printf(m, "\twaiters? %s, fake irq active? %s, stalled? %s, wedged? %s\n",
-			   yesno(intel_engine_has_waiter(engine)),
+		seq_printf(m, "\tfake irq active? %s, stalled? %s, wedged? %s\n",
 			   yesno(test_bit(engine->id,
 					  &dev_priv->gpu_error.missed_irq_rings)),
 			   yesno(engine->hangcheck.stalled),
 			   yesno(engine->hangcheck.wedged));
 
-		spin_lock_irq(&b->rb_lock);
-		for (rb = rb_first(&b->waiters); rb; rb = rb_next(rb)) {
-			struct intel_wait *w = rb_entry(rb, typeof(*w), node);
-
-			seq_printf(m, "\t%s [%d] waiting for %x\n",
-				   w->tsk->comm, w->tsk->pid, w->seqno);
-		}
-		spin_unlock_irq(&b->rb_lock);
-
 		seq_printf(m, "\tACTHD = 0x%08llx [current 0x%08llx]\n",
 			   (long long)engine->hangcheck.acthd,
 			   (long long)acthd[id]);
@@ -2026,18 +2013,6 @@ static int i915_swizzle_info(struct seq_file *m, void *data)
 	return 0;
 }
 
-static int count_irq_waiters(struct drm_i915_private *i915)
-{
-	struct intel_engine_cs *engine;
-	enum intel_engine_id id;
-	int count = 0;
-
-	for_each_engine(engine, i915, id)
-		count += intel_engine_has_waiter(engine);
-
-	return count;
-}
-
 static const char *rps_power_to_str(unsigned int power)
 {
 	static const char * const strings[] = {
@@ -2077,7 +2052,6 @@ static int i915_rps_boost_info(struct seq_file *m, void *data)
 	seq_printf(m, "RPS enabled? %d\n", rps->enabled);
 	seq_printf(m, "GPU busy? %s [%d requests]\n",
 		   yesno(dev_priv->gt.awake), dev_priv->gt.active_requests);
-	seq_printf(m, "CPU waiting? %d\n", count_irq_waiters(dev_priv));
 	seq_printf(m, "Boosts outstanding? %d\n",
 		   atomic_read(&rps->num_waiters));
 	seq_printf(m, "Interactive? %d\n", READ_ONCE(rps->power.interactive));
diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
index 5905b6d8f291..4714568bce97 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -343,6 +343,8 @@ __create_hw_context(struct drm_i915_private *dev_priv,
 		struct intel_context *ce = &ctx->__engine[n];
 
 		ce->gem_context = ctx;
+		INIT_LIST_HEAD(&ce->signal_link);
+		INIT_LIST_HEAD(&ce->signals);
 	}
 
 	INIT_RADIX_TREE(&ctx->handles_vma, GFP_KERNEL);
diff --git a/drivers/gpu/drm/i915/i915_gem_context.h b/drivers/gpu/drm/i915/i915_gem_context.h
index f6d870b1f73e..e5eca29cd373 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.h
+++ b/drivers/gpu/drm/i915/i915_gem_context.h
@@ -164,6 +164,8 @@ struct i915_gem_context {
 	struct intel_context {
 		struct i915_gem_context *gem_context;
 		struct intel_engine_cs *active;
+		struct list_head signal_link;
+		struct list_head signals;
 		struct i915_vma *state;
 		struct intel_ring *ring;
 		u32 *lrc_reg_state;
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
index 6e975c43dae9..cb1dd93b3aaa 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -530,7 +530,6 @@ static void error_print_engine(struct drm_i915_error_state_buf *m,
 	}
 	err_printf(m, "  seqno: 0x%08x\n", ee->seqno);
 	err_printf(m, "  last_seqno: 0x%08x\n", ee->last_seqno);
-	err_printf(m, "  waiting: %s\n", yesno(ee->waiting));
 	err_printf(m, "  ring->head: 0x%08x\n", ee->cpu_ring_head);
 	err_printf(m, "  ring->tail: 0x%08x\n", ee->cpu_ring_tail);
 	err_printf(m, "  hangcheck stall: %s\n", yesno(ee->hangcheck_stalled));
@@ -807,21 +806,6 @@ static void __err_print_to_sgl(struct drm_i915_error_state_buf *m,
 						    error->epoch);
 		}
 
-		if (IS_ERR(ee->waiters)) {
-			err_printf(m, "%s --- ? waiters [unable to acquire spinlock]\n",
-				   m->i915->engine[i]->name);
-		} else if (ee->num_waiters) {
-			err_printf(m, "%s --- %d waiters\n",
-				   m->i915->engine[i]->name,
-				   ee->num_waiters);
-			for (j = 0; j < ee->num_waiters; j++) {
-				err_printf(m, " seqno 0x%08x for %s [%d]\n",
-					   ee->waiters[j].seqno,
-					   ee->waiters[j].comm,
-					   ee->waiters[j].pid);
-			}
-		}
-
 		print_error_obj(m, m->i915->engine[i],
 				"ringbuffer", ee->ringbuffer);
 
@@ -1003,8 +987,6 @@ void __i915_gpu_state_free(struct kref *error_ref)
 		i915_error_object_free(ee->wa_ctx);
 
 		kfree(ee->requests);
-		if (!IS_ERR_OR_NULL(ee->waiters))
-			kfree(ee->waiters);
 	}
 
 	for (i = 0; i < ARRAY_SIZE(error->active_bo); i++)
@@ -1211,59 +1193,6 @@ static void gen6_record_semaphore_state(struct intel_engine_cs *engine,
 			I915_READ(RING_SYNC_2(engine->mmio_base));
 }
 
-static void error_record_engine_waiters(struct intel_engine_cs *engine,
-					struct drm_i915_error_engine *ee)
-{
-	struct intel_breadcrumbs *b = &engine->breadcrumbs;
-	struct drm_i915_error_waiter *waiter;
-	struct rb_node *rb;
-	int count;
-
-	ee->num_waiters = 0;
-	ee->waiters = NULL;
-
-	if (RB_EMPTY_ROOT(&b->waiters))
-		return;
-
-	if (!spin_trylock_irq(&b->rb_lock)) {
-		ee->waiters = ERR_PTR(-EDEADLK);
-		return;
-	}
-
-	count = 0;
-	for (rb = rb_first(&b->waiters); rb != NULL; rb = rb_next(rb))
-		count++;
-	spin_unlock_irq(&b->rb_lock);
-
-	waiter = NULL;
-	if (count)
-		waiter = kmalloc_array(count,
-				       sizeof(struct drm_i915_error_waiter),
-				       GFP_ATOMIC);
-	if (!waiter)
-		return;
-
-	if (!spin_trylock_irq(&b->rb_lock)) {
-		kfree(waiter);
-		ee->waiters = ERR_PTR(-EDEADLK);
-		return;
-	}
-
-	ee->waiters = waiter;
-	for (rb = rb_first(&b->waiters); rb; rb = rb_next(rb)) {
-		struct intel_wait *w = rb_entry(rb, typeof(*w), node);
-
-		strcpy(waiter->comm, w->tsk->comm);
-		waiter->pid = w->tsk->pid;
-		waiter->seqno = w->seqno;
-		waiter++;
-
-		if (++ee->num_waiters == count)
-			break;
-	}
-	spin_unlock_irq(&b->rb_lock);
-}
-
 static void error_record_engine_registers(struct i915_gpu_state *error,
 					  struct intel_engine_cs *engine,
 					  struct drm_i915_error_engine *ee)
@@ -1299,7 +1228,6 @@ static void error_record_engine_registers(struct i915_gpu_state *error,
 
 	intel_engine_get_instdone(engine, &ee->instdone);
 
-	ee->waiting = intel_engine_has_waiter(engine);
 	ee->instpm = I915_READ(RING_INSTPM(engine->mmio_base));
 	ee->acthd = intel_engine_get_active_head(engine);
 	ee->seqno = intel_engine_get_seqno(engine);
@@ -1549,7 +1477,6 @@ static void gem_record_rings(struct i915_gpu_state *error)
 		ee->engine_id = i;
 
 		error_record_engine_registers(error, engine, ee);
-		error_record_engine_waiters(engine, ee);
 		error_record_engine_execlists(engine, ee);
 
 		request = i915_gem_find_active_request(engine);
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.h b/drivers/gpu/drm/i915/i915_gpu_error.h
index 733723e1ea03..cb6a6add543e 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.h
+++ b/drivers/gpu/drm/i915/i915_gpu_error.h
@@ -82,8 +82,6 @@ struct i915_gpu_state {
 		int engine_id;
 		/* Software tracked state */
 		bool idle;
-		bool waiting;
-		int num_waiters;
 		unsigned long hangcheck_timestamp;
 		bool hangcheck_stalled;
 		enum intel_engine_hangcheck_action hangcheck_action;
@@ -161,12 +159,6 @@ struct i915_gpu_state {
 		} *requests, execlist[EXECLIST_MAX_PORTS];
 		unsigned int num_ports;
 
-		struct drm_i915_error_waiter {
-			char comm[TASK_COMM_LEN];
-			pid_t pid;
-			u32 seqno;
-		} *waiters;
-
 		struct {
 			u32 gfx_mode;
 			union {
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index 46c742d71610..9e7bb5977829 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -28,9 +28,10 @@
 
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 
-#include <linux/sysrq.h>
-#include <linux/slab.h>
 #include <linux/circ_buf.h>
+#include <linux/slab.h>
+#include <linux/sysrq.h>
+
 #include <drm/drmP.h>
 #include <drm/i915_drm.h>
 #include "i915_drv.h"
@@ -1152,67 +1153,6 @@ static void ironlake_rps_change_irq_handler(struct drm_i915_private *dev_priv)
 	return;
 }
 
-static void notify_ring(struct intel_engine_cs *engine)
-{
-	const u32 seqno = intel_engine_get_seqno(engine);
-	struct i915_request *rq = NULL;
-	struct task_struct *tsk = NULL;
-	struct intel_wait *wait;
-
-	if (unlikely(!engine->breadcrumbs.irq_armed))
-		return;
-
-	rcu_read_lock();
-
-	spin_lock(&engine->breadcrumbs.irq_lock);
-	wait = engine->breadcrumbs.irq_wait;
-	if (wait) {
-		/*
-		 * We use a callback from the dma-fence to submit
-		 * requests after waiting on our own requests. To
-		 * ensure minimum delay in queuing the next request to
-		 * hardware, signal the fence now rather than wait for
-		 * the signaler to be woken up. We still wake up the
-		 * waiter in order to handle the irq-seqno coherency
-		 * issues (we may receive the interrupt before the
-		 * seqno is written, see __i915_request_irq_complete())
-		 * and to handle coalescing of multiple seqno updates
-		 * and many waiters.
-		 */
-		if (i915_seqno_passed(seqno, wait->seqno)) {
-			struct i915_request *waiter = wait->request;
-
-			if (waiter &&
-			    !test_bit(DMA_FENCE_FLAG_SIGNALED_BIT,
-				      &waiter->fence.flags) &&
-			    intel_wait_check_request(wait, waiter))
-				rq = i915_request_get(waiter);
-
-			tsk = wait->tsk;
-		}
-
-		engine->breadcrumbs.irq_count++;
-	} else {
-		if (engine->breadcrumbs.irq_armed)
-			__intel_engine_disarm_breadcrumbs(engine);
-	}
-	spin_unlock(&engine->breadcrumbs.irq_lock);
-
-	if (rq) {
-		spin_lock(&rq->lock);
-		dma_fence_signal_locked(&rq->fence);
-		GEM_BUG_ON(!i915_request_completed(rq));
-		spin_unlock(&rq->lock);
-
-		i915_request_put(rq);
-	}
-
-	if (tsk && tsk->state & TASK_NORMAL)
-		wake_up_process(tsk);
-
-	rcu_read_unlock();
-}
-
 static void vlv_c0_read(struct drm_i915_private *dev_priv,
 			struct intel_rps_ei *ei)
 {
@@ -1457,20 +1397,20 @@ static void ilk_gt_irq_handler(struct drm_i915_private *dev_priv,
 			       u32 gt_iir)
 {
 	if (gt_iir & GT_RENDER_USER_INTERRUPT)
-		notify_ring(dev_priv->engine[RCS]);
+		intel_engine_breadcrumbs_irq(dev_priv->engine[RCS]);
 	if (gt_iir & ILK_BSD_USER_INTERRUPT)
-		notify_ring(dev_priv->engine[VCS]);
+		intel_engine_breadcrumbs_irq(dev_priv->engine[VCS]);
 }
 
 static void snb_gt_irq_handler(struct drm_i915_private *dev_priv,
 			       u32 gt_iir)
 {
 	if (gt_iir & GT_RENDER_USER_INTERRUPT)
-		notify_ring(dev_priv->engine[RCS]);
+		intel_engine_breadcrumbs_irq(dev_priv->engine[RCS]);
 	if (gt_iir & GT_BSD_USER_INTERRUPT)
-		notify_ring(dev_priv->engine[VCS]);
+		intel_engine_breadcrumbs_irq(dev_priv->engine[VCS]);
 	if (gt_iir & GT_BLT_USER_INTERRUPT)
-		notify_ring(dev_priv->engine[BCS]);
+		intel_engine_breadcrumbs_irq(dev_priv->engine[BCS]);
 
 	if (gt_iir & (GT_BLT_CS_ERROR_INTERRUPT |
 		      GT_BSD_CS_ERROR_INTERRUPT |
@@ -1490,7 +1430,7 @@ gen8_cs_irq_handler(struct intel_engine_cs *engine, u32 iir)
 		tasklet = true;
 
 	if (iir & GT_RENDER_USER_INTERRUPT) {
-		notify_ring(engine);
+		intel_engine_breadcrumbs_irq(engine);
 		tasklet |= USES_GUC_SUBMISSION(engine->i915);
 	}
 
@@ -1836,7 +1776,7 @@ static void gen6_rps_irq_handler(struct drm_i915_private *dev_priv, u32 pm_iir)
 
 	if (HAS_VEBOX(dev_priv)) {
 		if (pm_iir & PM_VEBOX_USER_INTERRUPT)
-			notify_ring(dev_priv->engine[VECS]);
+			intel_engine_breadcrumbs_irq(dev_priv->engine[VECS]);
 
 		if (pm_iir & PM_VEBOX_CS_ERROR_INTERRUPT)
 			DRM_DEBUG("Command parser error, pm_iir 0x%08x\n", pm_iir);
@@ -4262,7 +4202,7 @@ static irqreturn_t i8xx_irq_handler(int irq, void *arg)
 		I915_WRITE16(IIR, iir);
 
 		if (iir & I915_USER_INTERRUPT)
-			notify_ring(dev_priv->engine[RCS]);
+			intel_engine_breadcrumbs_irq(dev_priv->engine[RCS]);
 
 		if (iir & I915_MASTER_ERROR_INTERRUPT)
 			i8xx_error_irq_handler(dev_priv, eir, eir_stuck);
@@ -4370,7 +4310,7 @@ static irqreturn_t i915_irq_handler(int irq, void *arg)
 		I915_WRITE(IIR, iir);
 
 		if (iir & I915_USER_INTERRUPT)
-			notify_ring(dev_priv->engine[RCS]);
+			intel_engine_breadcrumbs_irq(dev_priv->engine[RCS]);
 
 		if (iir & I915_MASTER_ERROR_INTERRUPT)
 			i9xx_error_irq_handler(dev_priv, eir, eir_stuck);
@@ -4515,10 +4455,10 @@ static irqreturn_t i965_irq_handler(int irq, void *arg)
 		I915_WRITE(IIR, iir);
 
 		if (iir & I915_USER_INTERRUPT)
-			notify_ring(dev_priv->engine[RCS]);
+			intel_engine_breadcrumbs_irq(dev_priv->engine[RCS]);
 
 		if (iir & I915_BSD_USER_INTERRUPT)
-			notify_ring(dev_priv->engine[VCS]);
+			intel_engine_breadcrumbs_irq(dev_priv->engine[VCS]);
 
 		if (iir & I915_MASTER_ERROR_INTERRUPT)
 			i9xx_error_irq_handler(dev_priv, eir, eir_stuck);
diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
index 8e61a118ad2c..90f84f609077 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -60,7 +60,7 @@ static bool i915_fence_signaled(struct dma_fence *fence)
 
 static bool i915_fence_enable_signaling(struct dma_fence *fence)
 {
-	return intel_engine_enable_signaling(to_request(fence), true);
+	return intel_engine_enable_signaling(to_request(fence));
 }
 
 static signed long i915_fence_wait(struct dma_fence *fence,
@@ -370,9 +370,11 @@ void __i915_request_submit(struct i915_request *request)
 
 	/* We may be recursing from the signal callback of another i915 fence */
 	spin_lock_nested(&request->lock, SINGLE_DEPTH_NESTING);
+	set_bit(I915_FENCE_FLAG_ACTIVE, &request->fence.flags);
 	request->global_seqno = seqno;
-	if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &request->fence.flags))
-		intel_engine_enable_signaling(request, false);
+	if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &request->fence.flags) &&
+	    !intel_engine_enable_signaling(request))
+		intel_engine_queue_breadcrumbs(engine);
 	spin_unlock(&request->lock);
 
 	engine->emit_breadcrumb(request,
@@ -382,8 +384,6 @@ void __i915_request_submit(struct i915_request *request)
 	move_to_timeline(request, &engine->timeline);
 
 	trace_i915_request_execute(request);
-
-	wake_up_all(&request->execute);
 }
 
 void i915_request_submit(struct i915_request *request)
@@ -427,6 +427,7 @@ void __i915_request_unsubmit(struct i915_request *request)
 	request->global_seqno = 0;
 	if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &request->fence.flags))
 		intel_engine_cancel_signaling(request);
+	clear_bit(I915_FENCE_FLAG_ACTIVE, &request->fence.flags);
 	spin_unlock(&request->lock);
 
 	/* Transfer back from the global per-engine timeline to per-context */
@@ -602,13 +603,11 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx)
 
 	/* We bump the ref for the fence chain */
 	i915_sw_fence_init(&i915_request_get(rq)->submit, submit_notify);
-	init_waitqueue_head(&rq->execute);
 
 	i915_sched_node_init(&rq->sched);
 
 	/* No zalloc, must clear what we need by hand */
 	rq->global_seqno = 0;
-	rq->signaling.wait.seqno = 0;
 	rq->file_priv = NULL;
 	rq->batch = NULL;
 	rq->capture_list = NULL;
@@ -999,13 +998,10 @@ static bool busywait_stop(unsigned long timeout, unsigned int cpu)
 	return this_cpu != cpu;
 }
 
-static bool __i915_spin_request(const struct i915_request *rq,
-				u32 seqno, int state, unsigned long timeout_us)
+static bool __i915_spin_request(const struct i915_request * const rq,
+				int state, unsigned long timeout_us)
 {
-	struct intel_engine_cs *engine = rq->engine;
-	unsigned int irq, cpu;
-
-	GEM_BUG_ON(!seqno);
+	unsigned int cpu;
 
 	/*
 	 * Only wait for the request if we know it is likely to complete.
@@ -1018,7 +1014,7 @@ static bool __i915_spin_request(const struct i915_request *rq,
 	 * it is a fair assumption that it will not complete within our
 	 * relatively short timeout.
 	 */
-	if (!intel_engine_has_started(engine, seqno))
+	if (!i915_request_started(rq))
 		return false;
 
 	/*
@@ -1032,20 +1028,10 @@ static bool __i915_spin_request(const struct i915_request *rq,
 	 * takes to sleep on a request, on the order of a microsecond.
 	 */
 
-	irq = READ_ONCE(engine->breadcrumbs.irq_count);
 	timeout_us += local_clock_us(&cpu);
 	do {
-		if (intel_engine_has_completed(engine, seqno))
-			return seqno == i915_request_global_seqno(rq);
-
-		/*
-		 * Seqno are meant to be ordered *before* the interrupt. If
-		 * we see an interrupt without a corresponding seqno advance,
-		 * assume we won't see one in the near future but require
-		 * the engine->seqno_barrier() to fixup coherency.
-		 */
-		if (READ_ONCE(engine->breadcrumbs.irq_count) != irq)
-			break;
+		if (i915_request_completed(rq))
+			return true;
 
 		if (signal_pending_state(state, current))
 			break;
@@ -1059,6 +1045,18 @@ static bool __i915_spin_request(const struct i915_request *rq,
 	return false;
 }
 
+struct request_wait {
+	struct dma_fence_cb cb;
+	struct task_struct *tsk;
+};
+
+static void request_wait_wake(struct dma_fence *fence, struct dma_fence_cb *cb)
+{
+	struct request_wait *wait = container_of(cb, typeof(*wait), cb);
+
+	wake_up_process(wait->tsk);
+}
+
 /**
  * i915_request_wait - wait until execution of request has finished
  * @rq: the request to wait upon
@@ -1084,8 +1082,7 @@ long i915_request_wait(struct i915_request *rq,
 {
 	const int state = flags & I915_WAIT_INTERRUPTIBLE ?
 		TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE;
-	DEFINE_WAIT_FUNC(exec, default_wake_function);
-	struct intel_wait wait;
+	struct request_wait wait;
 
 	might_sleep();
 	GEM_BUG_ON(timeout < 0);
@@ -1097,47 +1094,24 @@ long i915_request_wait(struct i915_request *rq,
 		return -ETIME;
 
 	trace_i915_request_wait_begin(rq, flags);
-	add_wait_queue(&rq->execute, &exec);
-	intel_wait_init(&wait);
-	if (flags & I915_WAIT_PRIORITY)
-		i915_schedule_bump_priority(rq, I915_PRIORITY_WAIT);
-
-restart:
-	do {
-		set_current_state(state);
-		if (intel_wait_update_request(&wait, rq))
-			break;
 
-		if (signal_pending_state(state, current)) {
-			timeout = -ERESTARTSYS;
-			goto complete;
-		}
-
-		if (!timeout) {
-			timeout = -ETIME;
-			goto complete;
-		}
+	/* Optimistic short spin before touching IRQs */
+	if (__i915_spin_request(rq, state, 5))
+		goto out;
 
-		timeout = io_schedule_timeout(timeout);
-	} while (1);
+	if (flags & I915_WAIT_PRIORITY)
+		i915_schedule_bump_priority(rq, I915_PRIORITY_WAIT);
 
-	GEM_BUG_ON(!intel_wait_has_seqno(&wait));
-	GEM_BUG_ON(!i915_sw_fence_signaled(&rq->submit));
+	wait.tsk = current;
+	if (dma_fence_add_callback(&rq->fence, &wait.cb, request_wait_wake))
+		goto out;
 
-	/* Optimistic short spin before touching IRQs */
-	if (__i915_spin_request(rq, wait.seqno, state, 5))
-		goto complete;
+	for (;;) {
+		set_current_state(state);
 
-	set_current_state(state);
-	if (intel_engine_add_wait(rq->engine, &wait))
-		/*
-		 * In order to check that we haven't missed the interrupt
-		 * as we enabled it, we need to kick ourselves to do a
-		 * coherent check on the seqno before we sleep.
-		 */
-		goto wakeup;
+		if (i915_request_completed(rq))
+			break;
 
-	for (;;) {
 		if (signal_pending_state(state, current)) {
 			timeout = -ERESTARTSYS;
 			break;
@@ -1149,33 +1123,13 @@ long i915_request_wait(struct i915_request *rq,
 		}
 
 		timeout = io_schedule_timeout(timeout);
-
-		if (intel_wait_complete(&wait) &&
-		    intel_wait_check_request(&wait, rq))
-			break;
-
-		set_current_state(state);
-
-wakeup:
-		if (i915_request_completed(rq))
-			break;
-
-		/* Only spin if we know the GPU is processing this request */
-		if (__i915_spin_request(rq, wait.seqno, state, 2))
-			break;
-
-		if (!intel_wait_check_request(&wait, rq)) {
-			intel_engine_remove_wait(rq->engine, &wait);
-			goto restart;
-		}
 	}
-
-	intel_engine_remove_wait(rq->engine, &wait);
-complete:
 	__set_current_state(TASK_RUNNING);
-	remove_wait_queue(&rq->execute, &exec);
-	trace_i915_request_wait_end(rq);
 
+	dma_fence_remove_callback(&rq->fence, &wait.cb);
+
+out:
+	trace_i915_request_wait_end(rq);
 	return timeout;
 }
 
diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h
index 084d174ba83b..f7741a35f20b 100644
--- a/drivers/gpu/drm/i915/i915_request.h
+++ b/drivers/gpu/drm/i915/i915_request.h
@@ -38,23 +38,16 @@ struct drm_i915_gem_object;
 struct i915_request;
 struct i915_timeline;
 
-struct intel_wait {
-	struct rb_node node;
-	struct task_struct *tsk;
-	struct i915_request *request;
-	u32 seqno;
-};
-
-struct intel_signal_node {
-	struct intel_wait wait;
-	struct list_head link;
-};
-
 struct i915_capture_list {
 	struct i915_capture_list *next;
 	struct i915_vma *vma;
 };
 
+enum {
+	I915_FENCE_FLAG_ACTIVE = DMA_FENCE_FLAG_USER_BITS,
+	I915_FENCE_FLAG_SIGNAL,
+};
+
 /**
  * Request queue structure.
  *
@@ -97,7 +90,7 @@ struct i915_request {
 	struct intel_context *hw_context;
 	struct intel_ring *ring;
 	struct i915_timeline *timeline;
-	struct intel_signal_node signaling;
+	struct list_head signal_link;
 
 	/*
 	 * The rcu epoch of when this request was allocated. Used to judiciously
@@ -116,7 +109,6 @@ struct i915_request {
 	 */
 	struct i915_sw_fence submit;
 	wait_queue_entry_t submitq;
-	wait_queue_head_t execute;
 
 	/*
 	 * A list of everyone we wait upon, and everyone who waits upon us.
@@ -250,7 +242,7 @@ i915_request_put(struct i915_request *rq)
  * that it has passed the global seqno and the global seqno is unchanged
  * after the read, it is indeed complete).
  */
-static u32
+static inline u32
 i915_request_global_seqno(const struct i915_request *request)
 {
 	return READ_ONCE(request->global_seqno);
diff --git a/drivers/gpu/drm/i915/i915_reset.c b/drivers/gpu/drm/i915/i915_reset.c
index 89d4540ad5e6..4881af83d315 100644
--- a/drivers/gpu/drm/i915/i915_reset.c
+++ b/drivers/gpu/drm/i915/i915_reset.c
@@ -747,18 +747,19 @@ static void reset_restart(struct drm_i915_private *i915)
 
 static void nop_submit_request(struct i915_request *request)
 {
+	struct intel_engine_cs *engine = request->engine;
 	unsigned long flags;
 
 	GEM_TRACE("%s fence %llx:%lld -> -EIO\n",
-		  request->engine->name,
-		  request->fence.context, request->fence.seqno);
+		  engine->name, request->fence.context, request->fence.seqno);
 	dma_fence_set_error(&request->fence, -EIO);
 
-	spin_lock_irqsave(&request->engine->timeline.lock, flags);
+	spin_lock_irqsave(&engine->timeline.lock, flags);
 	__i915_request_submit(request);
 	i915_request_fake_complete(request);
-	intel_engine_write_global_seqno(request->engine, request->global_seqno);
-	spin_unlock_irqrestore(&request->engine->timeline.lock, flags);
+	spin_unlock_irqrestore(&engine->timeline.lock, flags);
+
+	intel_engine_queue_breadcrumbs(engine);
 }
 
 void i915_gem_set_wedged(struct drm_i915_private *i915)
@@ -813,7 +814,7 @@ void i915_gem_set_wedged(struct drm_i915_private *i915)
 
 	for_each_engine(engine, i915, id) {
 		reset_finish_engine(engine);
-		intel_engine_wakeup(engine);
+		intel_engine_signal_breadcrumbs(engine);
 	}
 
 	smp_mb__before_atomic();
diff --git a/drivers/gpu/drm/i915/intel_breadcrumbs.c b/drivers/gpu/drm/i915/intel_breadcrumbs.c
index 7b517bf83507..be928bd204d3 100644
--- a/drivers/gpu/drm/i915/intel_breadcrumbs.c
+++ b/drivers/gpu/drm/i915/intel_breadcrumbs.c
@@ -29,48 +29,135 @@
 
 #define task_asleep(tsk) ((tsk)->state & TASK_NORMAL && !(tsk)->on_rq)
 
-static unsigned int __intel_breadcrumbs_wakeup(struct intel_breadcrumbs *b)
+static void irq_enable(struct intel_engine_cs *engine)
+{
+	if (!engine->irq_enable)
+		return;
+
+	/*
+	 * FIXME: Ideally we want this on the API boundary, but for the
+	 * sake of testing with mock breadcrumbs (no HW so unable to
+	 * enable irqs) we place it deep within the bowels, at the point
+	 * of no return.
+	 */
+	GEM_BUG_ON(!intel_irqs_enabled(engine->i915));
+
+	/* Caller disables interrupts */
+	spin_lock(&engine->i915->irq_lock);
+	engine->irq_enable(engine);
+	spin_unlock(&engine->i915->irq_lock);
+}
+
+static void irq_disable(struct intel_engine_cs *engine)
 {
-	struct intel_wait *wait;
-	unsigned int result = 0;
+	if (!engine->irq_disable)
+		return;
+
+	/* Caller disables interrupts */
+	spin_lock(&engine->i915->irq_lock);
+	engine->irq_disable(engine);
+	spin_unlock(&engine->i915->irq_lock);
+}
 
+static void __intel_breadcrumbs_disarm_irq(struct intel_breadcrumbs *b)
+{
 	lockdep_assert_held(&b->irq_lock);
 
-	wait = b->irq_wait;
-	if (wait) {
-		/*
-		 * N.B. Since task_asleep() and ttwu are not atomic, the
-		 * waiter may actually go to sleep after the check, causing
-		 * us to suppress a valid wakeup. We prefer to reduce the
-		 * number of false positive missed_breadcrumb() warnings
-		 * at the expense of a few false negatives, as it it easy
-		 * to trigger a false positive under heavy load. Enough
-		 * signal should remain from genuine missed_breadcrumb()
-		 * for us to detect in CI.
-		 */
-		bool was_asleep = task_asleep(wait->tsk);
-
-		result = ENGINE_WAKEUP_WAITER;
-		if (wake_up_process(wait->tsk) && was_asleep)
-			result |= ENGINE_WAKEUP_ASLEEP;
-	}
+	GEM_BUG_ON(!b->irq_enabled);
+	if (!--b->irq_enabled)
+		irq_disable(container_of(b,
+					 struct intel_engine_cs,
+					 breadcrumbs));
 
-	return result;
+	b->irq_armed = false;
 }
 
-unsigned int intel_engine_wakeup(struct intel_engine_cs *engine)
+void intel_engine_disarm_breadcrumbs(struct intel_engine_cs *engine)
 {
 	struct intel_breadcrumbs *b = &engine->breadcrumbs;
-	unsigned long flags;
-	unsigned int result;
 
-	spin_lock_irqsave(&b->irq_lock, flags);
-	result = __intel_breadcrumbs_wakeup(b);
-	spin_unlock_irqrestore(&b->irq_lock, flags);
+	if (!b->irq_armed)
+		return;
+
+	spin_lock_irq(&b->irq_lock);
+	if (b->irq_armed)
+		__intel_breadcrumbs_disarm_irq(b);
+	spin_unlock_irq(&b->irq_lock);
+}
+
+bool intel_engine_breadcrumbs_irq(struct intel_engine_cs *engine)
+{
+	struct intel_breadcrumbs *b = &engine->breadcrumbs;
+	struct intel_context *ce, *cn;
+	struct i915_request *rq, *rn;
+	LIST_HEAD(signal);
+
+	spin_lock(&b->irq_lock);
+
+	b->irq_fired = true;
+	if (b->irq_armed && list_empty(&b->signalers))
+		__intel_breadcrumbs_disarm_irq(b);
+
+	list_for_each_entry_safe(ce, cn, &b->signalers, signal_link) {
+		GEM_BUG_ON(list_empty(&ce->signals));
+
+		list_for_each_entry_safe(rq, rn, &ce->signals, signal_link) {
+			if (!i915_request_completed(rq))
+				break;
+
+			GEM_BUG_ON(!test_bit(I915_FENCE_FLAG_SIGNAL,
+					     &rq->fence.flags));
+			clear_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags);
+
+			if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT,
+				     &rq->fence.flags))
+				continue;
+
+			/*
+			 * Queue for execution after dropping the signaling
+			 * spinlock as the callback chain may end adding
+			 * more signalers to the same context or engine.
+			 */
+			i915_request_get(rq);
+			list_add_tail(&rq->signal_link, &signal);
+		}
+
+		if (!list_is_first(&rq->signal_link, &ce->signals)) {
+			__list_del_many(&ce->signals, &rq->signal_link);
+			if (&ce->signals == &rq->signal_link)
+				list_del_init(&ce->signal_link);
+		}
+	}
+
+	spin_unlock(&b->irq_lock);
+
+	list_for_each_entry_safe(rq, rn, &signal, signal_link) {
+		dma_fence_signal(&rq->fence);
+		i915_request_put(rq);
+	}
+
+	return !list_empty(&signal);
+}
+
+bool intel_engine_signal_breadcrumbs(struct intel_engine_cs *engine)
+{
+	bool result;
+
+	local_irq_disable();
+	result = intel_engine_breadcrumbs_irq(engine);
+	local_irq_enable();
 
 	return result;
 }
 
+static void signal_irq_work(struct irq_work *work)
+{
+	struct intel_engine_cs *engine =
+		container_of(work, typeof(*engine), breadcrumbs.irq_work);
+
+	intel_engine_breadcrumbs_irq(engine);
+}
+
 static unsigned long wait_timeout(void)
 {
 	return round_jiffies_up(jiffies + DRM_I915_HANGCHECK_JIFFIES);
@@ -94,19 +181,15 @@ static void intel_breadcrumbs_hangcheck(struct timer_list *t)
 	struct intel_engine_cs *engine =
 		from_timer(engine, t, breadcrumbs.hangcheck);
 	struct intel_breadcrumbs *b = &engine->breadcrumbs;
-	unsigned int irq_count;
 
 	if (!b->irq_armed)
 		return;
 
-	irq_count = READ_ONCE(b->irq_count);
-	if (b->hangcheck_interrupts != irq_count) {
-		b->hangcheck_interrupts = irq_count;
-		mod_timer(&b->hangcheck, wait_timeout());
-		return;
-	}
+	if (b->irq_fired)
+		goto rearm;
 
-	/* We keep the hangcheck timer alive until we disarm the irq, even
+	/*
+	 * We keep the hangcheck timer alive until we disarm the irq, even
 	 * if there are no waiters at present.
 	 *
 	 * If the waiter was currently running, assume it hasn't had a chance
@@ -118,10 +201,13 @@ static void intel_breadcrumbs_hangcheck(struct timer_list *t)
 	 * but we still have a waiter. Assuming all batches complete within
 	 * DRM_I915_HANGCHECK_JIFFIES [1.5s]!
 	 */
-	if (intel_engine_wakeup(engine) & ENGINE_WAKEUP_ASLEEP) {
+	synchronize_hardirq(engine->i915->drm.irq);
+	if (intel_engine_signal_breadcrumbs(engine)) {
 		missed_breadcrumb(engine);
 		mod_timer(&b->fake_irq, jiffies + 1);
 	} else {
+rearm:
+		b->irq_fired = false;
 		mod_timer(&b->hangcheck, wait_timeout());
 	}
 }
@@ -140,11 +226,7 @@ static void intel_breadcrumbs_fake_irq(struct timer_list *t)
 	 * oldest waiter to do the coherent seqno check.
 	 */
 
-	spin_lock_irq(&b->irq_lock);
-	if (b->irq_armed && !__intel_breadcrumbs_wakeup(b))
-		__intel_engine_disarm_breadcrumbs(engine);
-	spin_unlock_irq(&b->irq_lock);
-	if (!b->irq_armed)
+	if (!intel_engine_signal_breadcrumbs(engine) && !b->irq_armed)
 		return;
 
 	/* If the user has disabled the fake-irq, restore the hangchecking */
@@ -156,51 +238,6 @@ static void intel_breadcrumbs_fake_irq(struct timer_list *t)
 	mod_timer(&b->fake_irq, jiffies + 1);
 }
 
-static void irq_enable(struct intel_engine_cs *engine)
-{
-	if (!engine->irq_enable)
-		return;
-
-	/*
-	 * FIXME: Ideally we want this on the API boundary, but for the
-	 * sake of testing with mock breadcrumbs (no HW so unable to
-	 * enable irqs) we place it deep within the bowels, at the point
-	 * of no return.
-	 */
-	GEM_BUG_ON(!intel_irqs_enabled(engine->i915));
-
-	/* Caller disables interrupts */
-	spin_lock(&engine->i915->irq_lock);
-	engine->irq_enable(engine);
-	spin_unlock(&engine->i915->irq_lock);
-}
-
-static void irq_disable(struct intel_engine_cs *engine)
-{
-	if (!engine->irq_disable)
-		return;
-
-	/* Caller disables interrupts */
-	spin_lock(&engine->i915->irq_lock);
-	engine->irq_disable(engine);
-	spin_unlock(&engine->i915->irq_lock);
-}
-
-void __intel_engine_disarm_breadcrumbs(struct intel_engine_cs *engine)
-{
-	struct intel_breadcrumbs *b = &engine->breadcrumbs;
-
-	lockdep_assert_held(&b->irq_lock);
-	GEM_BUG_ON(b->irq_wait);
-	GEM_BUG_ON(!b->irq_armed);
-
-	GEM_BUG_ON(!b->irq_enabled);
-	if (!--b->irq_enabled)
-		irq_disable(engine);
-
-	b->irq_armed = false;
-}
-
 void intel_engine_pin_breadcrumbs_irq(struct intel_engine_cs *engine)
 {
 	struct intel_breadcrumbs *b = &engine->breadcrumbs;
@@ -223,40 +260,6 @@ void intel_engine_unpin_breadcrumbs_irq(struct intel_engine_cs *engine)
 	spin_unlock_irq(&b->irq_lock);
 }
 
-void intel_engine_disarm_breadcrumbs(struct intel_engine_cs *engine)
-{
-	struct intel_breadcrumbs *b = &engine->breadcrumbs;
-	struct intel_wait *wait, *n;
-
-	if (!b->irq_armed)
-		return;
-
-	/*
-	 * We only disarm the irq when we are idle (all requests completed),
-	 * so if the bottom-half remains asleep, it missed the request
-	 * completion.
-	 */
-	if (intel_engine_wakeup(engine) & ENGINE_WAKEUP_ASLEEP)
-		missed_breadcrumb(engine);
-
-	spin_lock_irq(&b->rb_lock);
-
-	spin_lock(&b->irq_lock);
-	b->irq_wait = NULL;
-	if (b->irq_armed)
-		__intel_engine_disarm_breadcrumbs(engine);
-	spin_unlock(&b->irq_lock);
-
-	rbtree_postorder_for_each_entry_safe(wait, n, &b->waiters, node) {
-		GEM_BUG_ON(!intel_engine_signaled(engine, wait->seqno));
-		RB_CLEAR_NODE(&wait->node);
-		wake_up_process(wait->tsk);
-	}
-	b->waiters = RB_ROOT;
-
-	spin_unlock_irq(&b->rb_lock);
-}
-
 static bool use_fake_irq(const struct intel_breadcrumbs *b)
 {
 	const struct intel_engine_cs *engine =
@@ -272,7 +275,7 @@ static bool use_fake_irq(const struct intel_breadcrumbs *b)
 	 * engine->seqno_barrier(), a timing error that should be transient
 	 * and unlikely to reoccur.
 	 */
-	return READ_ONCE(b->irq_count) == b->hangcheck_interrupts;
+	return !b->irq_fired;
 }
 
 static void enable_fake_irq(struct intel_breadcrumbs *b)
@@ -284,7 +287,7 @@ static void enable_fake_irq(struct intel_breadcrumbs *b)
 		mod_timer(&b->hangcheck, wait_timeout());
 }
 
-static bool __intel_breadcrumbs_enable_irq(struct intel_breadcrumbs *b)
+static bool __intel_breadcrumbs_arm_irq(struct intel_breadcrumbs *b)
 {
 	struct intel_engine_cs *engine =
 		container_of(b, struct intel_engine_cs, breadcrumbs);
@@ -323,537 +326,132 @@ static bool __intel_breadcrumbs_enable_irq(struct intel_breadcrumbs *b)
 	return enabled;
 }
 
-static inline struct intel_wait *to_wait(struct rb_node *node)
+void intel_engine_init_breadcrumbs(struct intel_engine_cs *engine)
 {
-	return rb_entry(node, struct intel_wait, node);
-}
+	struct intel_breadcrumbs *b = &engine->breadcrumbs;
 
-static inline void __intel_breadcrumbs_finish(struct intel_breadcrumbs *b,
-					      struct intel_wait *wait)
-{
-	lockdep_assert_held(&b->rb_lock);
-	GEM_BUG_ON(b->irq_wait == wait);
+	spin_lock_init(&b->irq_lock);
+	INIT_LIST_HEAD(&b->signalers);
 
-	/*
-	 * This request is completed, so remove it from the tree, mark it as
-	 * complete, and *then* wake up the associated task. N.B. when the
-	 * task wakes up, it will find the empty rb_node, discern that it
-	 * has already been removed from the tree and skip the serialisation
-	 * of the b->rb_lock and b->irq_lock. This means that the destruction
-	 * of the intel_wait is not serialised with the interrupt handler
-	 * by the waiter - it must instead be serialised by the caller.
-	 */
-	rb_erase(&wait->node, &b->waiters);
-	RB_CLEAR_NODE(&wait->node);
+	init_irq_work(&b->irq_work, signal_irq_work);
 
-	if (wait->tsk->state != TASK_RUNNING)
-		wake_up_process(wait->tsk); /* implicit smp_wmb() */
+	timer_setup(&b->fake_irq, intel_breadcrumbs_fake_irq, 0);
+	timer_setup(&b->hangcheck, intel_breadcrumbs_hangcheck, 0);
 }
 
-static inline void __intel_breadcrumbs_next(struct intel_engine_cs *engine,
-					    struct rb_node *next)
+static void cancel_fake_irq(struct intel_engine_cs *engine)
 {
 	struct intel_breadcrumbs *b = &engine->breadcrumbs;
 
-	spin_lock(&b->irq_lock);
-	GEM_BUG_ON(!b->irq_armed);
-	GEM_BUG_ON(!b->irq_wait);
-	b->irq_wait = to_wait(next);
-	spin_unlock(&b->irq_lock);
-
-	/* We always wake up the next waiter that takes over as the bottom-half
-	 * as we may delegate not only the irq-seqno barrier to the next waiter
-	 * but also the task of waking up concurrent waiters.
-	 */
-	if (next)
-		wake_up_process(to_wait(next)->tsk);
+	del_timer_sync(&b->fake_irq); /* may queue b->hangcheck */
+	del_timer_sync(&b->hangcheck);
+	clear_bit(engine->id, &engine->i915->gpu_error.missed_irq_rings);
 }
 
-static bool __intel_engine_add_wait(struct intel_engine_cs *engine,
-				    struct intel_wait *wait)
+void intel_engine_reset_breadcrumbs(struct intel_engine_cs *engine)
 {
 	struct intel_breadcrumbs *b = &engine->breadcrumbs;
-	struct rb_node **p, *parent, *completed;
-	bool first, armed;
-	u32 seqno;
+	unsigned long flags;
 
-	GEM_BUG_ON(!wait->seqno);
+	spin_lock_irqsave(&b->irq_lock, flags);
 
-	/* Insert the request into the retirement ordered list
-	 * of waiters by walking the rbtree. If we are the oldest
-	 * seqno in the tree (the first to be retired), then
-	 * set ourselves as the bottom-half.
-	 *
-	 * As we descend the tree, prune completed branches since we hold the
-	 * spinlock we know that the first_waiter must be delayed and can
-	 * reduce some of the sequential wake up latency if we take action
-	 * ourselves and wake up the completed tasks in parallel. Also, by
-	 * removing stale elements in the tree, we may be able to reduce the
-	 * ping-pong between the old bottom-half and ourselves as first-waiter.
+	/*
+	 * Leave the fake_irq timer enabled (if it is running), but clear the
+	 * bit so that it turns itself off on its next wake up and goes back
+	 * to the long hangcheck interval if still required.
 	 */
-	armed = false;
-	first = true;
-	parent = NULL;
-	completed = NULL;
-	seqno = intel_engine_get_seqno(engine);
-
-	 /* If the request completed before we managed to grab the spinlock,
-	  * return now before adding ourselves to the rbtree. We let the
-	  * current bottom-half handle any pending wakeups and instead
-	  * try and get out of the way quickly.
-	  */
-	if (i915_seqno_passed(seqno, wait->seqno)) {
-		RB_CLEAR_NODE(&wait->node);
-		return first;
-	}
-
-	p = &b->waiters.rb_node;
-	while (*p) {
-		parent = *p;
-		if (wait->seqno == to_wait(parent)->seqno) {
-			/* We have multiple waiters on the same seqno, select
-			 * the highest priority task (that with the smallest
-			 * task->prio) to serve as the bottom-half for this
-			 * group.
-			 */
-			if (wait->tsk->prio > to_wait(parent)->tsk->prio) {
-				p = &parent->rb_right;
-				first = false;
-			} else {
-				p = &parent->rb_left;
-			}
-		} else if (i915_seqno_passed(wait->seqno,
-					     to_wait(parent)->seqno)) {
-			p = &parent->rb_right;
-			if (i915_seqno_passed(seqno, to_wait(parent)->seqno))
-				completed = parent;
-			else
-				first = false;
-		} else {
-			p = &parent->rb_left;
-		}
-	}
-	rb_link_node(&wait->node, parent, p);
-	rb_insert_color(&wait->node, &b->waiters);
-
-	if (first) {
-		spin_lock(&b->irq_lock);
-		b->irq_wait = wait;
-		/* After assigning ourselves as the new bottom-half, we must
-		 * perform a cursory check to prevent a missed interrupt.
-		 * Either we miss the interrupt whilst programming the hardware,
-		 * or if there was a previous waiter (for a later seqno) they
-		 * may be woken instead of us (due to the inherent race
-		 * in the unlocked read of b->irq_seqno_bh in the irq handler)
-		 * and so we miss the wake up.
-		 */
-		armed = __intel_breadcrumbs_enable_irq(b);
-		spin_unlock(&b->irq_lock);
-	}
-
-	if (completed) {
-		/* Advance the bottom-half (b->irq_wait) before we wake up
-		 * the waiters who may scribble over their intel_wait
-		 * just as the interrupt handler is dereferencing it via
-		 * b->irq_wait.
-		 */
-		if (!first) {
-			struct rb_node *next = rb_next(completed);
-			GEM_BUG_ON(next == &wait->node);
-			__intel_breadcrumbs_next(engine, next);
-		}
-
-		do {
-			struct intel_wait *crumb = to_wait(completed);
-			completed = rb_prev(completed);
-			__intel_breadcrumbs_finish(b, crumb);
-		} while (completed);
-	}
-
-	GEM_BUG_ON(!b->irq_wait);
-	GEM_BUG_ON(!b->irq_armed);
-	GEM_BUG_ON(rb_first(&b->waiters) != &b->irq_wait->node);
-
-	return armed;
-}
-
-bool intel_engine_add_wait(struct intel_engine_cs *engine,
-			   struct intel_wait *wait)
-{
-	struct intel_breadcrumbs *b = &engine->breadcrumbs;
-	bool armed;
-
-	spin_lock_irq(&b->rb_lock);
-	armed = __intel_engine_add_wait(engine, wait);
-	spin_unlock_irq(&b->rb_lock);
-	if (armed)
-		return armed;
-
-	/* Make the caller recheck if its request has already started. */
-	return intel_engine_has_started(engine, wait->seqno);
-}
-
-static inline bool chain_wakeup(struct rb_node *rb, int priority)
-{
-	return rb && to_wait(rb)->tsk->prio <= priority;
-}
+	clear_bit(engine->id, &engine->i915->gpu_error.missed_irq_rings);
 
-static inline int wakeup_priority(struct intel_breadcrumbs *b,
-				  struct task_struct *tsk)
-{
-	if (tsk == b->signaler)
-		return INT_MIN;
+	if (b->irq_enabled)
+		irq_enable(engine);
 	else
-		return tsk->prio;
-}
-
-static void __intel_engine_remove_wait(struct intel_engine_cs *engine,
-				       struct intel_wait *wait)
-{
-	struct intel_breadcrumbs *b = &engine->breadcrumbs;
-
-	lockdep_assert_held(&b->rb_lock);
-
-	if (RB_EMPTY_NODE(&wait->node))
-		goto out;
-
-	if (b->irq_wait == wait) {
-		const int priority = wakeup_priority(b, wait->tsk);
-		struct rb_node *next;
-
-		/* We are the current bottom-half. Find the next candidate,
-		 * the first waiter in the queue on the remaining oldest
-		 * request. As multiple seqnos may complete in the time it
-		 * takes us to wake up and find the next waiter, we have to
-		 * wake up that waiter for it to perform its own coherent
-		 * completion check.
-		 */
-		next = rb_next(&wait->node);
-		if (chain_wakeup(next, priority)) {
-			/* If the next waiter is already complete,
-			 * wake it up and continue onto the next waiter. So
-			 * if have a small herd, they will wake up in parallel
-			 * rather than sequentially, which should reduce
-			 * the overall latency in waking all the completed
-			 * clients.
-			 *
-			 * However, waking up a chain adds extra latency to
-			 * the first_waiter. This is undesirable if that
-			 * waiter is a high priority task.
-			 */
-			u32 seqno = intel_engine_get_seqno(engine);
-
-			while (i915_seqno_passed(seqno, to_wait(next)->seqno)) {
-				struct rb_node *n = rb_next(next);
-
-				__intel_breadcrumbs_finish(b, to_wait(next));
-				next = n;
-				if (!chain_wakeup(next, priority))
-					break;
-			}
-		}
-
-		__intel_breadcrumbs_next(engine, next);
-	} else {
-		GEM_BUG_ON(rb_first(&b->waiters) == &wait->node);
-	}
-
-	GEM_BUG_ON(RB_EMPTY_NODE(&wait->node));
-	rb_erase(&wait->node, &b->waiters);
-	RB_CLEAR_NODE(&wait->node);
-
-out:
-	GEM_BUG_ON(b->irq_wait == wait);
-	GEM_BUG_ON(rb_first(&b->waiters) !=
-		   (b->irq_wait ? &b->irq_wait->node : NULL));
-}
-
-void intel_engine_remove_wait(struct intel_engine_cs *engine,
-			      struct intel_wait *wait)
-{
-	struct intel_breadcrumbs *b = &engine->breadcrumbs;
-
-	/* Quick check to see if this waiter was already decoupled from
-	 * the tree by the bottom-half to avoid contention on the spinlock
-	 * by the herd.
-	 */
-	if (RB_EMPTY_NODE(&wait->node)) {
-		GEM_BUG_ON(READ_ONCE(b->irq_wait) == wait);
-		return;
-	}
+		irq_disable(engine);
 
-	spin_lock_irq(&b->rb_lock);
-	__intel_engine_remove_wait(engine, wait);
-	spin_unlock_irq(&b->rb_lock);
+	spin_unlock_irqrestore(&b->irq_lock, flags);
 }
 
-static void signaler_set_rtpriority(void)
+void intel_engine_fini_breadcrumbs(struct intel_engine_cs *engine)
 {
-	 struct sched_param param = { .sched_priority = 1 };
-
-	 sched_setscheduler_nocheck(current, SCHED_FIFO, &param);
+	cancel_fake_irq(engine);
 }
 
-static int intel_breadcrumbs_signaler(void *arg)
+bool intel_engine_enable_signaling(struct i915_request *rq)
 {
-	struct intel_engine_cs *engine = arg;
-	struct intel_breadcrumbs *b = &engine->breadcrumbs;
-	struct i915_request *rq, *n;
-
-	/* Install ourselves with high priority to reduce signalling latency */
-	signaler_set_rtpriority();
-
-	do {
-		bool do_schedule = true;
-		LIST_HEAD(list);
-		u32 seqno;
-
-		set_current_state(TASK_INTERRUPTIBLE);
-		if (list_empty(&b->signals))
-			goto sleep;
-
-		/*
-		 * We are either woken up by the interrupt bottom-half,
-		 * or by a client adding a new signaller. In both cases,
-		 * the GPU seqno may have advanced beyond our oldest signal.
-		 * If it has, propagate the signal, remove the waiter and
-		 * check again with the next oldest signal. Otherwise we
-		 * need to wait for a new interrupt from the GPU or for
-		 * a new client.
-		 */
-		seqno = intel_engine_get_seqno(engine);
-
-		spin_lock_irq(&b->rb_lock);
-		list_for_each_entry_safe(rq, n, &b->signals, signaling.link) {
-			u32 this = rq->signaling.wait.seqno;
-
-			GEM_BUG_ON(!rq->signaling.wait.seqno);
-
-			if (!i915_seqno_passed(seqno, this))
-				break;
+	struct intel_breadcrumbs *b = &rq->engine->breadcrumbs;
 
-			if (likely(this == i915_request_global_seqno(rq))) {
-				__intel_engine_remove_wait(engine,
-							   &rq->signaling.wait);
+	GEM_BUG_ON(test_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags));
 
-				rq->signaling.wait.seqno = 0;
-				__list_del_entry(&rq->signaling.link);
-
-				if (!test_bit(DMA_FENCE_FLAG_SIGNALED_BIT,
-					      &rq->fence.flags)) {
-					list_add_tail(&rq->signaling.link,
-						      &list);
-					i915_request_get(rq);
-				}
-			}
-		}
-		spin_unlock_irq(&b->rb_lock);
-
-		if (!list_empty(&list)) {
-			local_bh_disable();
-			list_for_each_entry_safe(rq, n, &list, signaling.link) {
-				dma_fence_signal(&rq->fence);
-				GEM_BUG_ON(!i915_request_completed(rq));
-				i915_request_put(rq);
-			}
-			local_bh_enable(); /* kick start the tasklets */
+	if (!test_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags))
+		return true;
 
-			/*
-			 * If the engine is saturated we may be continually
-			 * processing completed requests. This angers the
-			 * NMI watchdog if we never let anything else
-			 * have access to the CPU. Let's pretend to be nice
-			 * and relinquish the CPU if we burn through the
-			 * entire RT timeslice!
-			 */
-			do_schedule = need_resched();
-		}
+	spin_lock(&b->irq_lock);
+	if (test_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags)) {
+		struct intel_context *ce = rq->hw_context;
+		struct i915_request *pos;
 
-		if (unlikely(do_schedule)) {
-sleep:
-			if (kthread_should_park())
-				kthread_parkme();
+		__intel_breadcrumbs_arm_irq(b);
 
-			if (unlikely(kthread_should_stop()))
+		list_for_each_entry_reverse(pos, &ce->signals, signal_link) {
+			if (i915_seqno_passed(rq->fence.seqno,
+					      pos->fence.seqno))
 				break;
-
-			schedule();
 		}
-	} while (1);
-	__set_current_state(TASK_RUNNING);
+		list_add(&rq->signal_link, &pos->signal_link);
 
-	return 0;
-}
+		list_move_tail(&ce->signal_link, &b->signalers);
 
-static void insert_signal(struct intel_breadcrumbs *b,
-			  struct i915_request *request,
-			  const u32 seqno)
-{
-	struct i915_request *iter;
-
-	lockdep_assert_held(&b->rb_lock);
-
-	/*
-	 * A reasonable assumption is that we are called to add signals
-	 * in sequence, as the requests are submitted for execution and
-	 * assigned a global_seqno. This will be the case for the majority
-	 * of internally generated signals (inter-engine signaling).
-	 *
-	 * Out of order waiters triggering random signaling enabling will
-	 * be more problematic, but hopefully rare enough and the list
-	 * small enough that the O(N) insertion sort is not an issue.
-	 */
-
-	list_for_each_entry_reverse(iter, &b->signals, signaling.link)
-		if (i915_seqno_passed(seqno, iter->signaling.wait.seqno))
-			break;
-
-	list_add(&request->signaling.link, &iter->signaling.link);
-}
-
-bool intel_engine_enable_signaling(struct i915_request *request, bool wakeup)
-{
-	struct intel_engine_cs *engine = request->engine;
-	struct intel_breadcrumbs *b = &engine->breadcrumbs;
-	struct intel_wait *wait = &request->signaling.wait;
-	u32 seqno;
-
-	/*
-	 * Note that we may be called from an interrupt handler on another
-	 * device (e.g. nouveau signaling a fence completion causing us
-	 * to submit a request, and so enable signaling). As such,
-	 * we need to make sure that all other users of b->rb_lock protect
-	 * against interrupts, i.e. use spin_lock_irqsave.
-	 */
-
-	/* locked by dma_fence_enable_sw_signaling() (irqsafe fence->lock) */
-	GEM_BUG_ON(!irqs_disabled());
-	lockdep_assert_held(&request->lock);
-
-	seqno = i915_request_global_seqno(request);
-	if (!seqno) /* will be enabled later upon execution */
-		return true;
-
-	GEM_BUG_ON(wait->seqno);
-	wait->tsk = b->signaler;
-	wait->request = request;
-	wait->seqno = seqno;
-
-	/*
-	 * Add ourselves into the list of waiters, but registering our
-	 * bottom-half as the signaller thread. As per usual, only the oldest
-	 * waiter (not just signaller) is tasked as the bottom-half waking
-	 * up all completed waiters after the user interrupt.
-	 *
-	 * If we are the oldest waiter, enable the irq (after which we
-	 * must double check that the seqno did not complete).
-	 */
-	spin_lock(&b->rb_lock);
-	insert_signal(b, request, seqno);
-	wakeup &= __intel_engine_add_wait(engine, wait);
-	spin_unlock(&b->rb_lock);
-
-	if (wakeup) {
-		wake_up_process(b->signaler);
-		return !intel_wait_complete(wait);
+		set_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags);
 	}
+	spin_unlock(&b->irq_lock);
 
-	return true;
+	return !i915_request_completed(rq);
 }
 
-void intel_engine_cancel_signaling(struct i915_request *request)
+void intel_engine_cancel_signaling(struct i915_request *rq)
 {
-	struct intel_engine_cs *engine = request->engine;
-	struct intel_breadcrumbs *b = &engine->breadcrumbs;
-
-	GEM_BUG_ON(!irqs_disabled());
-	lockdep_assert_held(&request->lock);
+	struct intel_breadcrumbs *b = &rq->engine->breadcrumbs;
 
-	if (!READ_ONCE(request->signaling.wait.seqno))
+	if (!test_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags))
 		return;
 
-	spin_lock(&b->rb_lock);
-	__intel_engine_remove_wait(engine, &request->signaling.wait);
-	if (fetch_and_zero(&request->signaling.wait.seqno))
-		__list_del_entry(&request->signaling.link);
-	spin_unlock(&b->rb_lock);
-}
-
-int intel_engine_init_breadcrumbs(struct intel_engine_cs *engine)
-{
-	struct intel_breadcrumbs *b = &engine->breadcrumbs;
-	struct task_struct *tsk;
-
-	spin_lock_init(&b->rb_lock);
-	spin_lock_init(&b->irq_lock);
-
-	timer_setup(&b->fake_irq, intel_breadcrumbs_fake_irq, 0);
-	timer_setup(&b->hangcheck, intel_breadcrumbs_hangcheck, 0);
-
-	INIT_LIST_HEAD(&b->signals);
-
-	/* Spawn a thread to provide a common bottom-half for all signals.
-	 * As this is an asynchronous interface we cannot steal the current
-	 * task for handling the bottom-half to the user interrupt, therefore
-	 * we create a thread to do the coherent seqno dance after the
-	 * interrupt and then signal the waitqueue (via the dma-buf/fence).
-	 */
-	tsk = kthread_run(intel_breadcrumbs_signaler, engine,
-			  "i915/signal:%d", engine->id);
-	if (IS_ERR(tsk))
-		return PTR_ERR(tsk);
-
-	b->signaler = tsk;
-
-	return 0;
-}
+	spin_lock(&b->irq_lock);
+	if (test_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags)) {
+		struct intel_context *ce = rq->hw_context;
 
-static void cancel_fake_irq(struct intel_engine_cs *engine)
-{
-	struct intel_breadcrumbs *b = &engine->breadcrumbs;
+		list_del(&rq->signal_link);
+		if (list_empty(&ce->signals))
+			list_del_init(&ce->signal_link);
 
-	del_timer_sync(&b->fake_irq); /* may queue b->hangcheck */
-	del_timer_sync(&b->hangcheck);
-	clear_bit(engine->id, &engine->i915->gpu_error.missed_irq_rings);
+		clear_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags);
+	}
+	spin_unlock(&b->irq_lock);
 }
 
-void intel_engine_reset_breadcrumbs(struct intel_engine_cs *engine)
+void intel_engine_print_breadcrumbs(struct intel_engine_cs *engine,
+				    struct drm_printer *p)
 {
 	struct intel_breadcrumbs *b = &engine->breadcrumbs;
-	unsigned long flags;
+	struct intel_context *ce;
+	struct i915_request *rq;
 
-	spin_lock_irqsave(&b->irq_lock, flags);
-
-	/*
-	 * Leave the fake_irq timer enabled (if it is running), but clear the
-	 * bit so that it turns itself off on its next wake up and goes back
-	 * to the long hangcheck interval if still required.
-	 */
-	clear_bit(engine->id, &engine->i915->gpu_error.missed_irq_rings);
-
-	if (b->irq_enabled)
-		irq_enable(engine);
-	else
-		irq_disable(engine);
-
-	spin_unlock_irqrestore(&b->irq_lock, flags);
-}
-
-void intel_engine_fini_breadcrumbs(struct intel_engine_cs *engine)
-{
-	struct intel_breadcrumbs *b = &engine->breadcrumbs;
+	if (list_empty(&b->signalers))
+		return;
 
-	/* The engines should be idle and all requests accounted for! */
-	WARN_ON(READ_ONCE(b->irq_wait));
-	WARN_ON(!RB_EMPTY_ROOT(&b->waiters));
-	WARN_ON(!list_empty(&b->signals));
+	drm_printf(p, "Signals:\n");
 
-	if (!IS_ERR_OR_NULL(b->signaler))
-		kthread_stop(b->signaler);
+	spin_lock_irq(&b->irq_lock);
+	list_for_each_entry(ce, &b->signalers, signal_link) {
+		list_for_each_entry(rq, &ce->signals, signal_link) {
+			drm_printf(p, "\t[%llx:%llx%s] @ %dms\n",
+				   rq->fence.context, rq->fence.seqno,
+				   i915_request_completed(rq) ? "!" :
+				   i915_request_started(rq) ? "*" :
+				   "",
+				   jiffies_to_msecs(jiffies - rq->emitted_jiffies));
+		}
+	}
+	spin_unlock_irq(&b->irq_lock);
 
-	cancel_fake_irq(engine);
+	if (test_bit(engine->id, &engine->i915->gpu_error.missed_irq_rings))
+		drm_printf(p, "Fake irq active\n");
 }
-
-#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
-#include "selftests/intel_breadcrumbs.c"
-#endif
diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c
index e49925af6da8..31a614ea0d16 100644
--- a/drivers/gpu/drm/i915/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/intel_engine_cs.c
@@ -458,12 +458,6 @@ int intel_engines_init(struct drm_i915_private *dev_priv)
 void intel_engine_write_global_seqno(struct intel_engine_cs *engine, u32 seqno)
 {
 	intel_write_status_page(engine, I915_GEM_HWS_INDEX, seqno);
-
-	/* After manually advancing the seqno, fake the interrupt in case
-	 * there are any waiters for that seqno.
-	 */
-	intel_engine_wakeup(engine);
-
 	GEM_BUG_ON(intel_engine_get_seqno(engine) != seqno);
 }
 
@@ -667,16 +661,10 @@ int intel_engine_init_common(struct intel_engine_cs *engine)
 		}
 	}
 
-	ret = intel_engine_init_breadcrumbs(engine);
-	if (ret)
-		goto err_unpin_preempt;
+	intel_engine_init_breadcrumbs(engine);
 
 	return 0;
 
-err_unpin_preempt:
-	if (i915->preempt_context)
-		__intel_context_unpin(i915->preempt_context, engine);
-
 err_unpin_kernel:
 	__intel_context_unpin(i915->kernel_context, engine);
 	return ret;
@@ -1236,12 +1224,14 @@ static void print_request(struct drm_printer *m,
 
 	x = print_sched_attr(rq->i915, &rq->sched.attr, buf, x, sizeof(buf));
 
-	drm_printf(m, "%s%x%s [%llx:%llx]%s @ %dms: %s\n",
+	drm_printf(m, "%s%x%s%s [%llx:%llx]%s @ %dms: %s\n",
 		   prefix,
 		   rq->global_seqno,
 		   i915_request_completed(rq) ? "!" :
 		   i915_request_started(rq) ? "*" :
 		   "",
+		   test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT,
+			    &rq->fence.flags) ?  "+" : "",
 		   rq->fence.context, rq->fence.seqno,
 		   buf,
 		   jiffies_to_msecs(jiffies - rq->emitted_jiffies),
@@ -1433,12 +1423,9 @@ void intel_engine_dump(struct intel_engine_cs *engine,
 		       struct drm_printer *m,
 		       const char *header, ...)
 {
-	struct intel_breadcrumbs * const b = &engine->breadcrumbs;
 	struct i915_gpu_error * const error = &engine->i915->gpu_error;
 	struct i915_request *rq;
 	intel_wakeref_t wakeref;
-	unsigned long flags;
-	struct rb_node *rb;
 
 	if (header) {
 		va_list ap;
@@ -1504,21 +1491,12 @@ void intel_engine_dump(struct intel_engine_cs *engine,
 
 	intel_execlists_show_requests(engine, m, print_request, 8);
 
-	spin_lock_irqsave(&b->rb_lock, flags);
-	for (rb = rb_first(&b->waiters); rb; rb = rb_next(rb)) {
-		struct intel_wait *w = rb_entry(rb, typeof(*w), node);
-
-		drm_printf(m, "\t%s [%d:%c] waiting for %x\n",
-			   w->tsk->comm, w->tsk->pid,
-			   task_state_to_char(w->tsk),
-			   w->seqno);
-	}
-	spin_unlock_irqrestore(&b->rb_lock, flags);
-
 	drm_printf(m, "HWSP:\n");
 	hexdump(m, engine->status_page.addr, PAGE_SIZE);
 
 	drm_printf(m, "Idle? %s\n", yesno(intel_engine_is_idle(engine)));
+
+	intel_engine_print_breadcrumbs(engine, m);
 }
 
 static u8 user_class_map[] = {
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index 7148d1480f5d..ec60b66773e9 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -483,8 +483,8 @@ static void gen7_xcs_emit_breadcrumb(struct i915_request *rq, u32 *cs)
 
 	for (i = 0; i < GEN7_XCS_WA; i++) {
 		*cs++ = MI_STORE_DWORD_INDEX;
-		*cs++ = I915_GEM_HWS_INDEX_ADDR;
-		*cs++ = rq->global_seqno;
+		*cs++ = I915_GEM_HWS_SEQNO_ADDR;
+		*cs++ = rq->fence.seqno;
 	}
 
 	*cs++ = MI_FLUSH_DW;
@@ -734,7 +734,7 @@ static int init_ring_common(struct intel_engine_cs *engine)
 	}
 
 	/* Papering over lost _interrupts_ immediately following the restart */
-	intel_engine_wakeup(engine);
+	intel_engine_signal_breadcrumbs(engine);
 out:
 	intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
 
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index e6ec96e0ab56..8b4b7bd529e7 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -5,6 +5,7 @@
 #include <drm/drm_util.h>
 
 #include <linux/hashtable.h>
+#include <linux/irq_work.h>
 #include <linux/seqlock.h>
 
 #include "i915_gem_batch_pool.h"
@@ -380,22 +381,19 @@ struct intel_engine_cs {
 	 * the overhead of waking that client is much preferred.
 	 */
 	struct intel_breadcrumbs {
-		spinlock_t irq_lock; /* protects irq_*; irqsafe */
-		struct intel_wait *irq_wait; /* oldest waiter by retirement */
+		spinlock_t irq_lock;
+		struct list_head signalers;
 
-		spinlock_t rb_lock; /* protects the rb and wraps irq_lock */
-		struct rb_root waiters; /* sorted by retirement, priority */
-		struct list_head signals; /* sorted by retirement */
-		struct task_struct *signaler; /* used for fence signalling */
+		struct irq_work irq_work;
 
 		struct timer_list fake_irq; /* used after a missed interrupt */
 		struct timer_list hangcheck; /* detect missed interrupts */
 
 		unsigned int hangcheck_interrupts;
 		unsigned int irq_enabled;
-		unsigned int irq_count;
 
-		bool irq_armed : 1;
+		bool irq_armed;
+		bool irq_fired;
 	} breadcrumbs;
 
 	struct {
@@ -882,83 +880,32 @@ static inline bool intel_engine_has_started(struct intel_engine_cs *engine,
 void intel_engine_get_instdone(struct intel_engine_cs *engine,
 			       struct intel_instdone *instdone);
 
-/* intel_breadcrumbs.c -- user interrupt bottom-half for waiters */
-int intel_engine_init_breadcrumbs(struct intel_engine_cs *engine);
-
-static inline void intel_wait_init(struct intel_wait *wait)
-{
-	wait->tsk = current;
-	wait->request = NULL;
-}
-
-static inline void intel_wait_init_for_seqno(struct intel_wait *wait, u32 seqno)
-{
-	wait->tsk = current;
-	wait->seqno = seqno;
-}
-
-static inline bool intel_wait_has_seqno(const struct intel_wait *wait)
-{
-	return wait->seqno;
-}
-
-static inline bool
-intel_wait_update_seqno(struct intel_wait *wait, u32 seqno)
-{
-	wait->seqno = seqno;
-	return intel_wait_has_seqno(wait);
-}
-
-static inline bool
-intel_wait_update_request(struct intel_wait *wait,
-			  const struct i915_request *rq)
-{
-	return intel_wait_update_seqno(wait, i915_request_global_seqno(rq));
-}
-
-static inline bool
-intel_wait_check_seqno(const struct intel_wait *wait, u32 seqno)
-{
-	return wait->seqno == seqno;
-}
-
-static inline bool
-intel_wait_check_request(const struct intel_wait *wait,
-			 const struct i915_request *rq)
-{
-	return intel_wait_check_seqno(wait, i915_request_global_seqno(rq));
-}
-
-static inline bool intel_wait_complete(const struct intel_wait *wait)
-{
-	return RB_EMPTY_NODE(&wait->node);
-}
+void intel_engine_init_breadcrumbs(struct intel_engine_cs *engine);
+void intel_engine_fini_breadcrumbs(struct intel_engine_cs *engine);
 
-bool intel_engine_add_wait(struct intel_engine_cs *engine,
-			   struct intel_wait *wait);
-void intel_engine_remove_wait(struct intel_engine_cs *engine,
-			      struct intel_wait *wait);
-bool intel_engine_enable_signaling(struct i915_request *request, bool wakeup);
+bool intel_engine_enable_signaling(struct i915_request *request);
 void intel_engine_cancel_signaling(struct i915_request *request);
 
-static inline bool intel_engine_has_waiter(const struct intel_engine_cs *engine)
-{
-	return READ_ONCE(engine->breadcrumbs.irq_wait);
-}
-
-unsigned int intel_engine_wakeup(struct intel_engine_cs *engine);
-#define ENGINE_WAKEUP_WAITER BIT(0)
-#define ENGINE_WAKEUP_ASLEEP BIT(1)
-
 void intel_engine_pin_breadcrumbs_irq(struct intel_engine_cs *engine);
 void intel_engine_unpin_breadcrumbs_irq(struct intel_engine_cs *engine);
 
-void __intel_engine_disarm_breadcrumbs(struct intel_engine_cs *engine);
+bool intel_engine_signal_breadcrumbs(struct intel_engine_cs *engine);
 void intel_engine_disarm_breadcrumbs(struct intel_engine_cs *engine);
 
+static inline void
+intel_engine_queue_breadcrumbs(struct intel_engine_cs *engine)
+{
+	irq_work_queue(&engine->breadcrumbs.irq_work);
+}
+
+bool intel_engine_breadcrumbs_irq(struct intel_engine_cs *engine);
+
 void intel_engine_reset_breadcrumbs(struct intel_engine_cs *engine);
 void intel_engine_fini_breadcrumbs(struct intel_engine_cs *engine);
 
+void intel_engine_print_breadcrumbs(struct intel_engine_cs *engine,
+				    struct drm_printer *p);
+
 static inline u32 *gen8_emit_pipe_control(u32 *batch, u32 flags, u32 offset)
 {
 	memset(batch, 0, 6 * sizeof(u32));
diff --git a/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h b/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h
index 4a83a1c6c406..88e5ab586337 100644
--- a/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h
+++ b/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h
@@ -15,7 +15,6 @@ selftest(scatterlist, scatterlist_mock_selftests)
 selftest(syncmap, i915_syncmap_mock_selftests)
 selftest(uncore, intel_uncore_mock_selftests)
 selftest(engine, intel_engine_cs_mock_selftests)
-selftest(breadcrumbs, intel_breadcrumbs_mock_selftests)
 selftest(timelines, i915_timeline_mock_selftests)
 selftest(requests, i915_request_mock_selftests)
 selftest(objects, i915_gem_object_mock_selftests)
diff --git a/drivers/gpu/drm/i915/selftests/i915_request.c b/drivers/gpu/drm/i915/selftests/i915_request.c
index 8b73a8c21377..4dbb062d4c5c 100644
--- a/drivers/gpu/drm/i915/selftests/i915_request.c
+++ b/drivers/gpu/drm/i915/selftests/i915_request.c
@@ -25,8 +25,11 @@
 #include <linux/prime_numbers.h>
 
 #include "../i915_selftest.h"
+#include "i915_random.h"
+#include "lib_sw_fence.h"
 
 #include "mock_context.h"
+#include "mock_drm.h"
 #include "mock_gem_device.h"
 
 static int igt_add_request(void *arg)
@@ -246,6 +249,239 @@ static int igt_request_rewind(void *arg)
 	return err;
 }
 
+struct smoketest {
+	struct intel_engine_cs *engine;
+	struct i915_gem_context **contexts;
+	unsigned int ncontexts, max_batch;
+	atomic_long_t num_waits, num_fences;
+	struct i915_request *(*request_alloc)(struct i915_gem_context *,
+					      struct intel_engine_cs *);
+
+};
+
+static struct i915_request *
+__mock_request_alloc(struct i915_gem_context *ctx,
+		     struct intel_engine_cs *engine)
+{
+	return mock_request(engine, ctx, 0);
+}
+
+static struct i915_request *
+__live_request_alloc(struct i915_gem_context *ctx,
+		     struct intel_engine_cs *engine)
+{
+	return i915_request_alloc(engine, ctx);
+}
+
+static int __igt_breadcrumbs_smoketest(void *arg)
+{
+	struct smoketest *t = arg;
+	struct mutex *BKL = &t->engine->i915->drm.struct_mutex;
+	struct i915_request **requests;
+	I915_RND_STATE(prng);
+	const unsigned int total = 4 * t->ncontexts + 1;
+	const unsigned int max_batch = min(t->ncontexts, t->max_batch) - 1;
+	unsigned int num_waits = 0, num_fences = 0;
+	unsigned int *order;
+	int err = 0;
+
+	requests = kmalloc_array(total, sizeof(*requests), GFP_KERNEL);
+	if (!requests)
+		return -ENOMEM;
+
+	order = i915_random_order(total, &prng);
+	if (!order) {
+		err = -ENOMEM;
+		goto out_requests;
+	}
+
+	while (!kthread_should_stop()) {
+		struct i915_sw_fence *submit, *wait;
+		unsigned int n, count;
+
+		submit = heap_fence_create(GFP_KERNEL);
+		if (!submit) {
+			err = -ENOMEM;
+			break;
+		}
+
+		wait = heap_fence_create(GFP_KERNEL);
+		if (!wait) {
+			i915_sw_fence_commit(submit);
+			heap_fence_put(submit);
+			err = ENOMEM;
+			break;
+		}
+
+		i915_random_reorder(order, total, &prng);
+		count = 1 + i915_prandom_u32_max_state(max_batch, &prng);
+
+		for (n = 0; n < count; n++) {
+			struct i915_gem_context *ctx =
+				t->contexts[order[n] % t->ncontexts];
+			struct i915_request *rq;
+
+			mutex_lock(BKL);
+
+			rq = t->request_alloc(ctx, t->engine);
+			if (IS_ERR(rq)) {
+				mutex_unlock(BKL);
+				err = PTR_ERR(rq);
+				count = n;
+				break;
+			}
+
+			err = i915_sw_fence_await_sw_fence_gfp(&rq->submit,
+							       submit,
+							       GFP_KERNEL);
+
+			requests[n] = i915_request_get(rq);
+			i915_request_add(rq);
+
+			mutex_unlock(BKL);
+
+			if (err >= 0)
+				err = i915_sw_fence_await_dma_fence(wait,
+								    &rq->fence,
+								    0,
+								    GFP_KERNEL);
+			if (err < 0) {
+				i915_request_put(rq);
+				count = n;
+				break;
+			}
+		}
+
+		i915_sw_fence_commit(submit);
+		i915_sw_fence_commit(wait);
+
+		if (!wait_event_timeout(wait->wait,
+					i915_sw_fence_done(wait),
+					HZ / 2)) {
+			struct i915_request *rq = requests[count - 1];
+
+			pr_err("waiting for %d fences (last %llx:%lld) on %s timed out!\n",
+			       count,
+			       rq->fence.context, rq->fence.seqno,
+			       t->engine->name);
+			i915_gem_set_wedged(t->engine->i915);
+			GEM_BUG_ON(!i915_request_completed(rq));
+			i915_sw_fence_wait(wait);
+			err = -EIO;
+		}
+
+		for (n = 0; n < count; n++) {
+			struct i915_request *rq = requests[n];
+
+			if (!test_bit(DMA_FENCE_FLAG_SIGNALED_BIT,
+				      &rq->fence.flags)) {
+				pr_err("%llu:%llu was not signaled!\n",
+				       rq->fence.context, rq->fence.seqno);
+				err = -EINVAL;
+			}
+
+			i915_request_put(rq);
+		}
+
+		heap_fence_put(wait);
+		heap_fence_put(submit);
+
+		if (err < 0)
+			break;
+
+		num_fences += count;
+		num_waits++;
+
+		cond_resched();
+	}
+
+	atomic_long_add(num_fences, &t->num_fences);
+	atomic_long_add(num_waits, &t->num_waits);
+
+	kfree(order);
+out_requests:
+	kfree(requests);
+	return err;
+}
+
+static int mock_breadcrumbs_smoketest(void *arg)
+{
+	struct drm_i915_private *i915 = arg;
+	struct smoketest t = {
+		.engine = i915->engine[RCS],
+		.ncontexts = 1024,
+		.max_batch = 1024,
+		.request_alloc = __mock_request_alloc
+	};
+	unsigned int ncpus = num_online_cpus();
+	struct task_struct **threads;
+	unsigned int n;
+	int ret = 0;
+
+	threads = kmalloc_array(ncpus, sizeof(*threads), GFP_KERNEL);
+	if (!threads)
+		return -ENOMEM;
+
+	t.contexts =
+		kmalloc_array(t.ncontexts, sizeof(*t.contexts), GFP_KERNEL);
+	if (!t.contexts) {
+		ret = -ENOMEM;
+		goto out_threads;
+	}
+
+	mutex_lock(&t.engine->i915->drm.struct_mutex);
+	for (n = 0; n < t.ncontexts; n++) {
+		t.contexts[n] = mock_context(t.engine->i915, "mock");
+		if (!t.contexts[n]) {
+			ret = -ENOMEM;
+			goto out_contexts;
+		}
+	}
+
+	for (n = 0; n < ncpus; n++) {
+		threads[n] = kthread_run(__igt_breadcrumbs_smoketest,
+					 &t, "igt/%d", n);
+		if (IS_ERR(threads[n])) {
+			ret = PTR_ERR(threads[n]);
+			ncpus = n;
+			break;
+		}
+
+		get_task_struct(threads[n]);
+	}
+	mutex_unlock(&t.engine->i915->drm.struct_mutex);
+
+	msleep(jiffies_to_msecs(i915_selftest.timeout_jiffies));
+
+	for (n = 0; n < ncpus; n++) {
+		int err;
+
+		err = kthread_stop(threads[n]);
+		if (err < 0 && !ret)
+			ret = err;
+
+		put_task_struct(threads[n]);
+	}
+	pr_info("Completed %lu waits for %lu fence across %d cpus\n",
+		atomic_long_read(&t.num_waits),
+		atomic_long_read(&t.num_fences),
+		ncpus);
+
+	mutex_lock(&t.engine->i915->drm.struct_mutex);
+out_contexts:
+	for (n = 0; n < t.ncontexts; n++) {
+		if (!t.contexts[n])
+			break;
+		mock_context_close(t.contexts[n]);
+	}
+	mutex_unlock(&t.engine->i915->drm.struct_mutex);
+	kfree(t.contexts);
+out_threads:
+	kfree(threads);
+
+	return ret;
+}
+
 int i915_request_mock_selftests(void)
 {
 	static const struct i915_subtest tests[] = {
@@ -253,6 +489,7 @@ int i915_request_mock_selftests(void)
 		SUBTEST(igt_wait_request),
 		SUBTEST(igt_fence_wait),
 		SUBTEST(igt_request_rewind),
+		SUBTEST(mock_breadcrumbs_smoketest),
 	};
 	struct drm_i915_private *i915;
 	int err;
@@ -869,6 +1106,166 @@ static int live_sequential_engines(void *arg)
 	return err;
 }
 
+static int
+max_batches(struct i915_gem_context *ctx, struct intel_engine_cs *engine)
+{
+	struct i915_request *rq;
+	int ret;
+
+	/*
+	 * Before execlists, all contexts share the same ringbuffer. With
+	 * execlists, each context/engine has a separate ringbuffer and
+	 * for the purposes of this test, inexhaustible.
+	 *
+	 * For the global ringbuffer though, we have to be very careful
+	 * that we do not wrap while preventing the execution of requests
+	 * with a unsignaled fence.
+	 */
+	if (HAS_EXECLISTS(ctx->i915))
+		return INT_MAX;
+
+	rq = i915_request_alloc(engine, ctx);
+	if (IS_ERR(rq)) {
+		ret = PTR_ERR(rq);
+	} else {
+		int sz;
+
+		ret = rq->ring->size - rq->reserved_space;
+		i915_request_add(rq);
+
+		sz = rq->ring->emit - rq->head;
+		if (sz < 0)
+			sz += rq->ring->size;
+		ret /= sz;
+		ret /= 2; /* leave half spare, in case of emergency! */
+
+		/* One ring interleaved between requests from all cpus */
+		ret /= num_online_cpus() + 1;
+	}
+
+	return ret;
+}
+
+static int live_breadcrumbs_smoketest(void *arg)
+{
+	struct drm_i915_private *i915 = arg;
+	struct smoketest t[I915_NUM_ENGINES];
+	unsigned int ncpus = num_online_cpus();
+	unsigned long num_waits, num_fences;
+	struct intel_engine_cs *engine;
+	struct task_struct **threads;
+	enum intel_engine_id id;
+	intel_wakeref_t wakeref;
+	struct drm_file *file;
+	struct live_test live;
+	unsigned int n;
+	int ret = 0;
+
+	wakeref = intel_runtime_pm_get(i915);
+
+	file = mock_file(i915);
+	if (IS_ERR(file)) {
+		ret = PTR_ERR(file);
+		goto out_rpm;
+	}
+
+	threads = kcalloc(ncpus * I915_NUM_ENGINES,
+			  sizeof(*threads),
+			  GFP_KERNEL);
+	if (!threads)
+		return -ENOMEM;
+
+	memset(&t[0], 0, sizeof(t[0]));
+	t[0].request_alloc = __live_request_alloc;
+	t[0].ncontexts = 64;
+	t[0].contexts = kmalloc_array(t[0].ncontexts,
+				      sizeof(*t[0].contexts),
+				      GFP_KERNEL);
+	if (!t[0].contexts) {
+		ret = -ENOMEM;
+		goto out_threads;
+	}
+
+	mutex_lock(&i915->drm.struct_mutex);
+	for (n = 0; n < t[0].ncontexts; n++) {
+		t[0].contexts[n] = live_context(i915, file);
+		if (!t[0].contexts[n]) {
+			ret = -ENOMEM;
+			goto out_contexts;
+		}
+	}
+
+	ret = begin_live_test(&live, i915, __func__, "");
+	if (ret)
+		goto out_contexts;
+
+	for_each_engine(engine, i915, id) {
+		t[id] = t[0];
+		t[id].engine = engine;
+		t[id].max_batch = max_batches(t[0].contexts[0], engine);
+		if (t[id].max_batch < 0) {
+			ret = t[id].max_batch;
+			goto out_flush;
+		}
+		pr_debug("Limiting batches to %d requests on %s\n",
+			 t[id].max_batch, engine->name);
+
+		for (n = 0; n < ncpus; n++) {
+			struct task_struct *tsk;
+
+			tsk = kthread_run(__igt_breadcrumbs_smoketest,
+					  &t[id], "igt/%d.%d", id, n);
+			if (IS_ERR(tsk)) {
+				ret = PTR_ERR(tsk);
+				goto out_flush;
+			}
+
+			get_task_struct(tsk);
+			threads[id * ncpus + n] = tsk;
+		}
+	}
+	mutex_unlock(&i915->drm.struct_mutex);
+
+	msleep(jiffies_to_msecs(i915_selftest.timeout_jiffies));
+
+out_flush:
+	num_waits = 0;
+	num_fences = 0;
+	for_each_engine(engine, i915, id) {
+		for (n = 0; n < ncpus; n++) {
+			struct task_struct *tsk = threads[id * ncpus + n];
+			int err;
+
+			if (!tsk)
+				continue;
+
+			err = kthread_stop(tsk);
+			if (err < 0 && !ret)
+				ret = err;
+
+			put_task_struct(tsk);
+		}
+
+		num_waits += atomic_long_read(&t[id].num_waits);
+		num_fences += atomic_long_read(&t[id].num_fences);
+	}
+	pr_info("Completed %lu waits for %lu fence across %d engines and %d cpus\n",
+		num_waits, num_fences, RUNTIME_INFO(i915)->num_rings, ncpus);
+
+	mutex_lock(&i915->drm.struct_mutex);
+	ret = end_live_test(&live) ?: ret;
+out_contexts:
+	mutex_unlock(&i915->drm.struct_mutex);
+	kfree(t[0].contexts);
+out_threads:
+	kfree(threads);
+	mock_file_free(i915, file);
+out_rpm:
+	intel_runtime_pm_put(i915, wakeref);
+
+	return ret;
+}
+
 int i915_request_live_selftests(struct drm_i915_private *i915)
 {
 	static const struct i915_subtest tests[] = {
@@ -876,6 +1273,7 @@ int i915_request_live_selftests(struct drm_i915_private *i915)
 		SUBTEST(live_all_engines),
 		SUBTEST(live_sequential_engines),
 		SUBTEST(live_empty_request),
+		SUBTEST(live_breadcrumbs_smoketest),
 	};
 
 	if (i915_terminally_wedged(&i915->gpu_error))
diff --git a/drivers/gpu/drm/i915/selftests/igt_spinner.c b/drivers/gpu/drm/i915/selftests/igt_spinner.c
index 0e70df0230b8..9ebd9225684e 100644
--- a/drivers/gpu/drm/i915/selftests/igt_spinner.c
+++ b/drivers/gpu/drm/i915/selftests/igt_spinner.c
@@ -185,11 +185,6 @@ void igt_spinner_fini(struct igt_spinner *spin)
 
 bool igt_wait_for_spinner(struct igt_spinner *spin, struct i915_request *rq)
 {
-	if (!wait_event_timeout(rq->execute,
-				READ_ONCE(rq->global_seqno),
-				msecs_to_jiffies(10)))
-		return false;
-
 	return !(wait_for_us(i915_seqno_passed(hws_seqno(spin, rq),
 					       rq->fence.seqno),
 			     10) &&
diff --git a/drivers/gpu/drm/i915/selftests/intel_breadcrumbs.c b/drivers/gpu/drm/i915/selftests/intel_breadcrumbs.c
deleted file mode 100644
index f03b407fdbe2..000000000000
--- a/drivers/gpu/drm/i915/selftests/intel_breadcrumbs.c
+++ /dev/null
@@ -1,470 +0,0 @@
-/*
- * Copyright © 2016 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- *
- */
-
-#include "../i915_selftest.h"
-#include "i915_random.h"
-
-#include "mock_gem_device.h"
-#include "mock_engine.h"
-
-static int check_rbtree(struct intel_engine_cs *engine,
-			const unsigned long *bitmap,
-			const struct intel_wait *waiters,
-			const int count)
-{
-	struct intel_breadcrumbs *b = &engine->breadcrumbs;
-	struct rb_node *rb;
-	int n;
-
-	if (&b->irq_wait->node != rb_first(&b->waiters)) {
-		pr_err("First waiter does not match first element of wait-tree\n");
-		return -EINVAL;
-	}
-
-	n = find_first_bit(bitmap, count);
-	for (rb = rb_first(&b->waiters); rb; rb = rb_next(rb)) {
-		struct intel_wait *w = container_of(rb, typeof(*w), node);
-		int idx = w - waiters;
-
-		if (!test_bit(idx, bitmap)) {
-			pr_err("waiter[%d, seqno=%d] removed but still in wait-tree\n",
-			       idx, w->seqno);
-			return -EINVAL;
-		}
-
-		if (n != idx) {
-			pr_err("waiter[%d, seqno=%d] does not match expected next element in tree [%d]\n",
-			       idx, w->seqno, n);
-			return -EINVAL;
-		}
-
-		n = find_next_bit(bitmap, count, n + 1);
-	}
-
-	return 0;
-}
-
-static int check_completion(struct intel_engine_cs *engine,
-			    const unsigned long *bitmap,
-			    const struct intel_wait *waiters,
-			    const int count)
-{
-	int n;
-
-	for (n = 0; n < count; n++) {
-		if (intel_wait_complete(&waiters[n]) != !!test_bit(n, bitmap))
-			continue;
-
-		pr_err("waiter[%d, seqno=%d] is %s, but expected %s\n",
-		       n, waiters[n].seqno,
-		       intel_wait_complete(&waiters[n]) ? "complete" : "active",
-		       test_bit(n, bitmap) ? "active" : "complete");
-		return -EINVAL;
-	}
-
-	return 0;
-}
-
-static int check_rbtree_empty(struct intel_engine_cs *engine)
-{
-	struct intel_breadcrumbs *b = &engine->breadcrumbs;
-
-	if (b->irq_wait) {
-		pr_err("Empty breadcrumbs still has a waiter\n");
-		return -EINVAL;
-	}
-
-	if (!RB_EMPTY_ROOT(&b->waiters)) {
-		pr_err("Empty breadcrumbs, but wait-tree not empty\n");
-		return -EINVAL;
-	}
-
-	return 0;
-}
-
-static int igt_random_insert_remove(void *arg)
-{
-	const u32 seqno_bias = 0x1000;
-	I915_RND_STATE(prng);
-	struct intel_engine_cs *engine = arg;
-	struct intel_wait *waiters;
-	const int count = 4096;
-	unsigned int *order;
-	unsigned long *bitmap;
-	int err = -ENOMEM;
-	int n;
-
-	mock_engine_reset(engine);
-
-	waiters = kvmalloc_array(count, sizeof(*waiters), GFP_KERNEL);
-	if (!waiters)
-		goto out_engines;
-
-	bitmap = kcalloc(DIV_ROUND_UP(count, BITS_PER_LONG), sizeof(*bitmap),
-			 GFP_KERNEL);
-	if (!bitmap)
-		goto out_waiters;
-
-	order = i915_random_order(count, &prng);
-	if (!order)
-		goto out_bitmap;
-
-	for (n = 0; n < count; n++)
-		intel_wait_init_for_seqno(&waiters[n], seqno_bias + n);
-
-	err = check_rbtree(engine, bitmap, waiters, count);
-	if (err)
-		goto out_order;
-
-	/* Add and remove waiters into the rbtree in random order. At each
-	 * step, we verify that the rbtree is correctly ordered.
-	 */
-	for (n = 0; n < count; n++) {
-		int i = order[n];
-
-		intel_engine_add_wait(engine, &waiters[i]);
-		__set_bit(i, bitmap);
-
-		err = check_rbtree(engine, bitmap, waiters, count);
-		if (err)
-			goto out_order;
-	}
-
-	i915_random_reorder(order, count, &prng);
-	for (n = 0; n < count; n++) {
-		int i = order[n];
-
-		intel_engine_remove_wait(engine, &waiters[i]);
-		__clear_bit(i, bitmap);
-
-		err = check_rbtree(engine, bitmap, waiters, count);
-		if (err)
-			goto out_order;
-	}
-
-	err = check_rbtree_empty(engine);
-out_order:
-	kfree(order);
-out_bitmap:
-	kfree(bitmap);
-out_waiters:
-	kvfree(waiters);
-out_engines:
-	mock_engine_flush(engine);
-	return err;
-}
-
-static int igt_insert_complete(void *arg)
-{
-	const u32 seqno_bias = 0x1000;
-	struct intel_engine_cs *engine = arg;
-	struct intel_wait *waiters;
-	const int count = 4096;
-	unsigned long *bitmap;
-	int err = -ENOMEM;
-	int n, m;
-
-	mock_engine_reset(engine);
-
-	waiters = kvmalloc_array(count, sizeof(*waiters), GFP_KERNEL);
-	if (!waiters)
-		goto out_engines;
-
-	bitmap = kcalloc(DIV_ROUND_UP(count, BITS_PER_LONG), sizeof(*bitmap),
-			 GFP_KERNEL);
-	if (!bitmap)
-		goto out_waiters;
-
-	for (n = 0; n < count; n++) {
-		intel_wait_init_for_seqno(&waiters[n], n + seqno_bias);
-		intel_engine_add_wait(engine, &waiters[n]);
-		__set_bit(n, bitmap);
-	}
-	err = check_rbtree(engine, bitmap, waiters, count);
-	if (err)
-		goto out_bitmap;
-
-	/* On each step, we advance the seqno so that several waiters are then
-	 * complete (we increase the seqno by increasingly larger values to
-	 * retire more and more waiters at once). All retired waiters should
-	 * be woken and removed from the rbtree, and so that we check.
-	 */
-	for (n = 0; n < count; n = m) {
-		int seqno = 2 * n;
-
-		GEM_BUG_ON(find_first_bit(bitmap, count) != n);
-
-		if (intel_wait_complete(&waiters[n])) {
-			pr_err("waiter[%d, seqno=%d] completed too early\n",
-			       n, waiters[n].seqno);
-			err = -EINVAL;
-			goto out_bitmap;
-		}
-
-		/* complete the following waiters */
-		mock_seqno_advance(engine, seqno + seqno_bias);
-		for (m = n; m <= seqno; m++) {
-			if (m == count)
-				break;
-
-			GEM_BUG_ON(!test_bit(m, bitmap));
-			__clear_bit(m, bitmap);
-		}
-
-		intel_engine_remove_wait(engine, &waiters[n]);
-		RB_CLEAR_NODE(&waiters[n].node);
-
-		err = check_rbtree(engine, bitmap, waiters, count);
-		if (err) {
-			pr_err("rbtree corrupt after seqno advance to %d\n",
-			       seqno + seqno_bias);
-			goto out_bitmap;
-		}
-
-		err = check_completion(engine, bitmap, waiters, count);
-		if (err) {
-			pr_err("completions after seqno advance to %d failed\n",
-			       seqno + seqno_bias);
-			goto out_bitmap;
-		}
-	}
-
-	err = check_rbtree_empty(engine);
-out_bitmap:
-	kfree(bitmap);
-out_waiters:
-	kvfree(waiters);
-out_engines:
-	mock_engine_flush(engine);
-	return err;
-}
-
-struct igt_wakeup {
-	struct task_struct *tsk;
-	atomic_t *ready, *set, *done;
-	struct intel_engine_cs *engine;
-	unsigned long flags;
-#define STOP 0
-#define IDLE 1
-	wait_queue_head_t *wq;
-	u32 seqno;
-};
-
-static bool wait_for_ready(struct igt_wakeup *w)
-{
-	DEFINE_WAIT(ready);
-
-	set_bit(IDLE, &w->flags);
-	if (atomic_dec_and_test(w->done))
-		wake_up_var(w->done);
-
-	if (test_bit(STOP, &w->flags))
-		goto out;
-
-	for (;;) {
-		prepare_to_wait(w->wq, &ready, TASK_INTERRUPTIBLE);
-		if (atomic_read(w->ready) == 0)
-			break;
-
-		schedule();
-	}
-	finish_wait(w->wq, &ready);
-
-out:
-	clear_bit(IDLE, &w->flags);
-	if (atomic_dec_and_test(w->set))
-		wake_up_var(w->set);
-
-	return !test_bit(STOP, &w->flags);
-}
-
-static int igt_wakeup_thread(void *arg)
-{
-	struct igt_wakeup *w = arg;
-	struct intel_wait wait;
-
-	while (wait_for_ready(w)) {
-		GEM_BUG_ON(kthread_should_stop());
-
-		intel_wait_init_for_seqno(&wait, w->seqno);
-		intel_engine_add_wait(w->engine, &wait);
-		for (;;) {
-			set_current_state(TASK_UNINTERRUPTIBLE);
-			if (i915_seqno_passed(intel_engine_get_seqno(w->engine),
-					      w->seqno))
-				break;
-
-			if (test_bit(STOP, &w->flags)) /* emergency escape */
-				break;
-
-			schedule();
-		}
-		intel_engine_remove_wait(w->engine, &wait);
-		__set_current_state(TASK_RUNNING);
-	}
-
-	return 0;
-}
-
-static void igt_wake_all_sync(atomic_t *ready,
-			      atomic_t *set,
-			      atomic_t *done,
-			      wait_queue_head_t *wq,
-			      int count)
-{
-	atomic_set(set, count);
-	atomic_set(ready, 0);
-	wake_up_all(wq);
-
-	wait_var_event(set, !atomic_read(set));
-	atomic_set(ready, count);
-	atomic_set(done, count);
-}
-
-static int igt_wakeup(void *arg)
-{
-	I915_RND_STATE(prng);
-	struct intel_engine_cs *engine = arg;
-	struct igt_wakeup *waiters;
-	DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq);
-	const int count = 4096;
-	const u32 max_seqno = count / 4;
-	atomic_t ready, set, done;
-	int err = -ENOMEM;
-	int n, step;
-
-	mock_engine_reset(engine);
-
-	waiters = kvmalloc_array(count, sizeof(*waiters), GFP_KERNEL);
-	if (!waiters)
-		goto out_engines;
-
-	/* Create a large number of threads, each waiting on a random seqno.
-	 * Multiple waiters will be waiting for the same seqno.
-	 */
-	atomic_set(&ready, count);
-	for (n = 0; n < count; n++) {
-		waiters[n].wq = &wq;
-		waiters[n].ready = &ready;
-		waiters[n].set = &set;
-		waiters[n].done = &done;
-		waiters[n].engine = engine;
-		waiters[n].flags = BIT(IDLE);
-
-		waiters[n].tsk = kthread_run(igt_wakeup_thread, &waiters[n],
-					     "i915/igt:%d", n);
-		if (IS_ERR(waiters[n].tsk))
-			goto out_waiters;
-
-		get_task_struct(waiters[n].tsk);
-	}
-
-	for (step = 1; step <= max_seqno; step <<= 1) {
-		u32 seqno;
-
-		/* The waiter threads start paused as we assign them a random
-		 * seqno and reset the engine. Once the engine is reset,
-		 * we signal that the threads may begin their wait upon their
-		 * seqno.
-		 */
-		for (n = 0; n < count; n++) {
-			GEM_BUG_ON(!test_bit(IDLE, &waiters[n].flags));
-			waiters[n].seqno =
-				1 + prandom_u32_state(&prng) % max_seqno;
-		}
-		mock_seqno_advance(engine, 0);
-		igt_wake_all_sync(&ready, &set, &done, &wq, count);
-
-		/* Simulate the GPU doing chunks of work, with one or more
-		 * seqno appearing to finish at the same time. A random number
-		 * of threads will be waiting upon the update and hopefully be
-		 * woken.
-		 */
-		for (seqno = 1; seqno <= max_seqno + step; seqno += step) {
-			usleep_range(50, 500);
-			mock_seqno_advance(engine, seqno);
-		}
-		GEM_BUG_ON(intel_engine_get_seqno(engine) < 1 + max_seqno);
-
-		/* With the seqno now beyond any of the waiting threads, they
-		 * should all be woken, see that they are complete and signal
-		 * that they are ready for the next test. We wait until all
-		 * threads are complete and waiting for us (i.e. not a seqno).
-		 */
-		if (!wait_var_event_timeout(&done,
-					    !atomic_read(&done), 10 * HZ)) {
-			pr_err("Timed out waiting for %d remaining waiters\n",
-			       atomic_read(&done));
-			err = -ETIMEDOUT;
-			break;
-		}
-
-		err = check_rbtree_empty(engine);
-		if (err)
-			break;
-	}
-
-out_waiters:
-	for (n = 0; n < count; n++) {
-		if (IS_ERR(waiters[n].tsk))
-			break;
-
-		set_bit(STOP, &waiters[n].flags);
-	}
-	mock_seqno_advance(engine, INT_MAX); /* wakeup any broken waiters */
-	igt_wake_all_sync(&ready, &set, &done, &wq, n);
-
-	for (n = 0; n < count; n++) {
-		if (IS_ERR(waiters[n].tsk))
-			break;
-
-		kthread_stop(waiters[n].tsk);
-		put_task_struct(waiters[n].tsk);
-	}
-
-	kvfree(waiters);
-out_engines:
-	mock_engine_flush(engine);
-	return err;
-}
-
-int intel_breadcrumbs_mock_selftests(void)
-{
-	static const struct i915_subtest tests[] = {
-		SUBTEST(igt_random_insert_remove),
-		SUBTEST(igt_insert_complete),
-		SUBTEST(igt_wakeup),
-	};
-	struct drm_i915_private *i915;
-	int err;
-
-	i915 = mock_gem_device();
-	if (!i915)
-		return -ENOMEM;
-
-	err = i915_subtests(tests, i915->engine[RCS]);
-	drm_dev_put(&i915->drm);
-
-	return err;
-}
diff --git a/drivers/gpu/drm/i915/selftests/intel_hangcheck.c b/drivers/gpu/drm/i915/selftests/intel_hangcheck.c
index 4809874ab28c..4e8c3008dc84 100644
--- a/drivers/gpu/drm/i915/selftests/intel_hangcheck.c
+++ b/drivers/gpu/drm/i915/selftests/intel_hangcheck.c
@@ -1127,7 +1127,7 @@ static int __igt_reset_evict_vma(struct drm_i915_private *i915,
 
 	wait_for_completion(&arg.completion);
 
-	if (wait_for(waitqueue_active(&rq->execute), 10)) {
+	if (wait_for(!list_empty(&rq->fence.cb_list), 10)) {
 		struct drm_printer p = drm_info_printer(i915->drm.dev);
 
 		pr_err("igt/evict_vma kthread did not wait\n");
diff --git a/drivers/gpu/drm/i915/selftests/lib_sw_fence.c b/drivers/gpu/drm/i915/selftests/lib_sw_fence.c
index b26f07b55d86..2bfa72c1654b 100644
--- a/drivers/gpu/drm/i915/selftests/lib_sw_fence.c
+++ b/drivers/gpu/drm/i915/selftests/lib_sw_fence.c
@@ -76,3 +76,57 @@ void timed_fence_fini(struct timed_fence *tf)
 	destroy_timer_on_stack(&tf->timer);
 	i915_sw_fence_fini(&tf->fence);
 }
+
+struct heap_fence {
+	struct i915_sw_fence fence;
+	union {
+		struct kref ref;
+		struct rcu_head rcu;
+	};
+};
+
+static int __i915_sw_fence_call
+heap_fence_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state)
+{
+	struct heap_fence *h = container_of(fence, typeof(*h), fence);
+
+	switch (state) {
+	case FENCE_COMPLETE:
+		break;
+
+	case FENCE_FREE:
+		heap_fence_put(&h->fence);
+	}
+
+	return NOTIFY_DONE;
+}
+
+struct i915_sw_fence *heap_fence_create(gfp_t gfp)
+{
+	struct heap_fence *h;
+
+	h = kmalloc(sizeof(*h), gfp);
+	if (!h)
+		return NULL;
+
+	i915_sw_fence_init(&h->fence, heap_fence_notify);
+	refcount_set(&h->ref.refcount, 2);
+
+	return &h->fence;
+}
+
+static void heap_fence_release(struct kref *ref)
+{
+	struct heap_fence *h = container_of(ref, typeof(*h), ref);
+
+	i915_sw_fence_fini(&h->fence);
+
+	kfree_rcu(h, rcu);
+}
+
+void heap_fence_put(struct i915_sw_fence *fence)
+{
+	struct heap_fence *h = container_of(fence, typeof(*h), fence);
+
+	kref_put(&h->ref, heap_fence_release);
+}
diff --git a/drivers/gpu/drm/i915/selftests/lib_sw_fence.h b/drivers/gpu/drm/i915/selftests/lib_sw_fence.h
index 474aafb92ae1..1f9927e10f3a 100644
--- a/drivers/gpu/drm/i915/selftests/lib_sw_fence.h
+++ b/drivers/gpu/drm/i915/selftests/lib_sw_fence.h
@@ -39,4 +39,7 @@ struct timed_fence {
 void timed_fence_init(struct timed_fence *tf, unsigned long expires);
 void timed_fence_fini(struct timed_fence *tf);
 
+struct i915_sw_fence *heap_fence_create(gfp_t gfp);
+void heap_fence_put(struct i915_sw_fence *fence);
+
 #endif /* _LIB_SW_FENCE_H_ */
diff --git a/drivers/gpu/drm/i915/selftests/mock_context.c b/drivers/gpu/drm/i915/selftests/mock_context.c
index d937bdff26f9..e4db9a31b510 100644
--- a/drivers/gpu/drm/i915/selftests/mock_context.c
+++ b/drivers/gpu/drm/i915/selftests/mock_context.c
@@ -49,6 +49,8 @@ mock_context(struct drm_i915_private *i915,
 		struct intel_context *ce = &ctx->__engine[n];
 
 		ce->gem_context = ctx;
+		INIT_LIST_HEAD(&ce->signal_link);
+		INIT_LIST_HEAD(&ce->signals);
 	}
 
 	ret = i915_gem_context_pin_hw_id(ctx);
diff --git a/drivers/gpu/drm/i915/selftests/mock_engine.c b/drivers/gpu/drm/i915/selftests/mock_engine.c
index b4b61056b227..c1cd8b27b32a 100644
--- a/drivers/gpu/drm/i915/selftests/mock_engine.c
+++ b/drivers/gpu/drm/i915/selftests/mock_engine.c
@@ -75,25 +75,27 @@ static struct mock_request *first_request(struct mock_engine *engine)
 					link);
 }
 
-static void advance(struct mock_engine *engine,
-		    struct mock_request *request)
+static void advance(struct mock_request *request)
 {
 	list_del_init(&request->link);
 	i915_request_fake_complete(&request->base);
-	mock_seqno_advance(&engine->base, request->base.global_seqno);
+	intel_engine_write_global_seqno(request->base.engine,
+					request->base.global_seqno);
+	intel_engine_queue_breadcrumbs(request->base.engine);
 }
 
 static void hw_delay_complete(struct timer_list *t)
 {
 	struct mock_engine *engine = from_timer(engine, t, hw_delay);
 	struct mock_request *request;
+	unsigned long flags;
 
-	spin_lock(&engine->hw_lock);
+	spin_lock_irqsave(&engine->hw_lock, flags);
 
 	/* Timer fired, first request is complete */
 	request = first_request(engine);
 	if (request)
-		advance(engine, request);
+		advance(request);
 
 	/*
 	 * Also immediately signal any subsequent 0-delay requests, but
@@ -105,10 +107,10 @@ static void hw_delay_complete(struct timer_list *t)
 			break;
 		}
 
-		advance(engine, request);
+		advance(request);
 	}
 
-	spin_unlock(&engine->hw_lock);
+	spin_unlock_irqrestore(&engine->hw_lock, flags);
 }
 
 static void mock_context_unpin(struct intel_context *ce)
@@ -179,19 +181,20 @@ static void mock_submit_request(struct i915_request *request)
 	struct mock_request *mock = container_of(request, typeof(*mock), base);
 	struct mock_engine *engine =
 		container_of(request->engine, typeof(*engine), base);
+	unsigned long flags;
 
 	i915_request_submit(request);
 	GEM_BUG_ON(!request->global_seqno);
 
-	spin_lock_irq(&engine->hw_lock);
+	spin_lock_irqsave(&engine->hw_lock, flags);
 	list_add_tail(&mock->link, &engine->hw_queue);
 	if (mock->link.prev == &engine->hw_queue) {
 		if (mock->delay)
 			mod_timer(&engine->hw_delay, jiffies + mock->delay);
 		else
-			advance(engine, mock);
+			advance(mock);
 	}
-	spin_unlock_irq(&engine->hw_lock);
+	spin_unlock_irqrestore(&engine->hw_lock, flags);
 }
 
 struct intel_engine_cs *mock_engine(struct drm_i915_private *i915,
@@ -255,12 +258,13 @@ void mock_engine_flush(struct intel_engine_cs *engine)
 
 	spin_lock_irq(&mock->hw_lock);
 	list_for_each_entry_safe(request, rn, &mock->hw_queue, link)
-		advance(mock, request);
+		advance(request);
 	spin_unlock_irq(&mock->hw_lock);
 }
 
 void mock_engine_reset(struct intel_engine_cs *engine)
 {
+	intel_engine_write_global_seqno(engine, 0);
 }
 
 void mock_engine_free(struct intel_engine_cs *engine)
diff --git a/drivers/gpu/drm/i915/selftests/mock_engine.h b/drivers/gpu/drm/i915/selftests/mock_engine.h
index 133d0c21790d..b9cc3a245f16 100644
--- a/drivers/gpu/drm/i915/selftests/mock_engine.h
+++ b/drivers/gpu/drm/i915/selftests/mock_engine.h
@@ -46,10 +46,4 @@ void mock_engine_flush(struct intel_engine_cs *engine);
 void mock_engine_reset(struct intel_engine_cs *engine);
 void mock_engine_free(struct intel_engine_cs *engine);
 
-static inline void mock_seqno_advance(struct intel_engine_cs *engine, u32 seqno)
-{
-	intel_write_status_page(engine, I915_GEM_HWS_INDEX, seqno);
-	intel_engine_wakeup(engine);
-}
-
 #endif /* !__MOCK_ENGINE_H__ */
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 111+ messages in thread

* Re: [PATCH 02/46] drm/i915: Report the number of closed vma held by each context in debugfs
  2019-01-07 11:54 ` [PATCH 02/46] drm/i915: Report the number of closed vma held by each context in debugfs Chris Wilson
@ 2019-01-07 12:35   ` Mika Kuoppala
  0 siblings, 0 replies; 111+ messages in thread
From: Mika Kuoppala @ 2019-01-07 12:35 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx

Chris Wilson <chris@chris-wilson.co.uk> writes:

> Include the total size of closed vma when reporting the per_ctx_stats of
> debugfs/i915_gem_objects.
>
> Whilst adjusting the context tracking, note that we can simply use our
> list of contexts in i915->contexts rather than circumlocute via
> dev->filelist and the per-file context idr, with the result that we can
> show objects allocated to different vm (i.e. contexts within a file).
>
> We change the output to show every context of each client, with its own
> unique set of objects (for full-ppgtt machines, i.e. gen7+, for older
> hardware all objects are in the global gtt and so can not be associated
> with a single context). That should result in no loss of information,
> and for gen7+, no duplication of active objects.
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>  drivers/gpu/drm/i915/i915_debugfs.c | 124 +++++++++++-----------------
>  1 file changed, 47 insertions(+), 77 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
> index 193823048f96..c77326a7d058 100644
> --- a/drivers/gpu/drm/i915/i915_debugfs.c
> +++ b/drivers/gpu/drm/i915/i915_debugfs.c
> @@ -297,11 +297,12 @@ static int i915_gem_stolen_list_info(struct seq_file *m, void *data)
>  }
>  
>  struct file_stats {
> -	struct drm_i915_file_private *file_priv;
> +	struct i915_address_space *vm;
>  	unsigned long count;
>  	u64 total, unbound;
>  	u64 global, shared;
>  	u64 active, inactive;
> +	u64 closed;
>  };
>  
>  static int per_file_stats(int id, void *ptr, void *data)
> @@ -326,9 +327,7 @@ static int per_file_stats(int id, void *ptr, void *data)
>  		if (i915_vma_is_ggtt(vma)) {
>  			stats->global += vma->node.size;
>  		} else {
> -			struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vma->vm);
> -
> -			if (ppgtt->vm.file != stats->file_priv)
> +			if (vma->vm != stats->vm)
>  				continue;
>  		}
>  
> @@ -336,6 +335,9 @@ static int per_file_stats(int id, void *ptr, void *data)
>  			stats->active += vma->node.size;
>  		else
>  			stats->inactive += vma->node.size;
> +
> +		if (i915_vma_is_closed(vma))
> +			stats->closed += vma->node.size;
>  	}
>  
>  	return 0;
> @@ -343,7 +345,7 @@ static int per_file_stats(int id, void *ptr, void *data)
>  
>  #define print_file_stats(m, name, stats) do { \
>  	if (stats.count) \
> -		seq_printf(m, "%s: %lu objects, %llu bytes (%llu active, %llu inactive, %llu global, %llu shared, %llu unbound)\n", \
> +		seq_printf(m, "%s: %lu objects, %llu bytes (%llu active, %llu inactive, %llu global, %llu shared, %llu unbound, %llu closed)\n", \
>  			   name, \
>  			   stats.count, \
>  			   stats.total, \
> @@ -351,20 +353,19 @@ static int per_file_stats(int id, void *ptr, void *data)
>  			   stats.inactive, \
>  			   stats.global, \
>  			   stats.shared, \
> -			   stats.unbound); \
> +			   stats.unbound, \
> +			   stats.closed); \
>  } while (0)
>  
>  static void print_batch_pool_stats(struct seq_file *m,
>  				   struct drm_i915_private *dev_priv)
>  {
>  	struct drm_i915_gem_object *obj;
> -	struct file_stats stats;
>  	struct intel_engine_cs *engine;
> +	struct file_stats stats = {};
>  	enum intel_engine_id id;
>  	int j;
>  
> -	memset(&stats, 0, sizeof(stats));
> -
>  	for_each_engine(engine, dev_priv, id) {
>  		for (j = 0; j < ARRAY_SIZE(engine->batch_pool.cache_list); j++) {
>  			list_for_each_entry(obj,
> @@ -377,44 +378,47 @@ static void print_batch_pool_stats(struct seq_file *m,
>  	print_file_stats(m, "[k]batch pool", stats);
>  }
>  
> -static int per_file_ctx_stats(int idx, void *ptr, void *data)
> +static void print_context_stats(struct seq_file *m,
> +				struct drm_i915_private *i915)
>  {
> -	struct i915_gem_context *ctx = ptr;
> -	struct intel_engine_cs *engine;
> -	enum intel_engine_id id;
> +	struct file_stats kstats = {};
> +	struct i915_gem_context *ctx;
>  
> -	for_each_engine(engine, ctx->i915, id) {
> -		struct intel_context *ce = to_intel_context(ctx, engine);
> +	list_for_each_entry(ctx, &i915->contexts.list, link) {
> +		struct intel_engine_cs *engine;
> +		enum intel_engine_id id;
>  
> -		if (ce->state)
> -			per_file_stats(0, ce->state->obj, data);
> -		if (ce->ring)
> -			per_file_stats(0, ce->ring->vma->obj, data);
> -	}
> +		for_each_engine(engine, i915, id) {
> +			struct intel_context *ce = to_intel_context(ctx, engine);
>  
> -	return 0;
> -}
> +			if (ce->state)
> +				per_file_stats(0, ce->state->obj, &kstats);
> +			if (ce->ring)
> +				per_file_stats(0, ce->ring->vma->obj, &kstats);
> +		}
>  
> -static void print_context_stats(struct seq_file *m,
> -				struct drm_i915_private *dev_priv)
> -{
> -	struct drm_device *dev = &dev_priv->drm;
> -	struct file_stats stats;
> -	struct drm_file *file;
> +		if (!IS_ERR_OR_NULL(ctx->file_priv)) {
> +			struct file_stats stats = { .vm = &ctx->ppgtt->vm, };
> +			struct drm_file *file = ctx->file_priv->file;
> +			struct task_struct *task;
> +			char name[80];
>  
> -	memset(&stats, 0, sizeof(stats));
> +			spin_lock(&file->table_lock);
> +			idr_for_each(&file->object_idr, per_file_stats, &stats);
> +			spin_unlock(&file->table_lock);
>  
> -	mutex_lock(&dev->struct_mutex);
> -	if (dev_priv->kernel_context)
> -		per_file_ctx_stats(0, dev_priv->kernel_context, &stats);
> +			rcu_read_lock();
> +			task = pid_task(ctx->pid ?: file->pid, PIDTYPE_PID);
> +			snprintf(name, sizeof(name), "%s/%d",
> +				 task ? task->comm : "<unknown>",
> +				 ctx->user_handle);
> +			rcu_read_unlock();

I stamped this from the previous submission, so cutpasting here:

You removed the comment but... the pattern to protect                                                            
task->comm is rather well established so I dont mind.

Reviewed-by: Mika Kuoppala <mika.kuoppala@linux.intel.com>

>  
> -	list_for_each_entry(file, &dev->filelist, lhead) {
> -		struct drm_i915_file_private *fpriv = file->driver_priv;
> -		idr_for_each(&fpriv->context_idr, per_file_ctx_stats, &stats);
> +			print_file_stats(m, name, stats);
> +		}
>  	}
> -	mutex_unlock(&dev->struct_mutex);
>  
> -	print_file_stats(m, "[k]contexts", stats);
> +	print_file_stats(m, "[k]contexts", kstats);
>  }
>  
>  static int i915_gem_object_info(struct seq_file *m, void *data)
> @@ -426,14 +430,9 @@ static int i915_gem_object_info(struct seq_file *m, void *data)
>  	u64 size, mapped_size, purgeable_size, dpy_size, huge_size;
>  	struct drm_i915_gem_object *obj;
>  	unsigned int page_sizes = 0;
> -	struct drm_file *file;
>  	char buf[80];
>  	int ret;
>  
> -	ret = mutex_lock_interruptible(&dev->struct_mutex);
> -	if (ret)
> -		return ret;
> -
>  	seq_printf(m, "%u objects, %llu bytes\n",
>  		   dev_priv->mm.object_count,
>  		   dev_priv->mm.object_memory);
> @@ -514,43 +513,14 @@ static int i915_gem_object_info(struct seq_file *m, void *data)
>  					buf, sizeof(buf)));
>  
>  	seq_putc(m, '\n');
> -	print_batch_pool_stats(m, dev_priv);
> -	mutex_unlock(&dev->struct_mutex);
> -
> -	mutex_lock(&dev->filelist_mutex);
> -	print_context_stats(m, dev_priv);
> -	list_for_each_entry_reverse(file, &dev->filelist, lhead) {
> -		struct file_stats stats;
> -		struct drm_i915_file_private *file_priv = file->driver_priv;
> -		struct i915_request *request;
> -		struct task_struct *task;
> -
> -		mutex_lock(&dev->struct_mutex);
>  
> -		memset(&stats, 0, sizeof(stats));
> -		stats.file_priv = file->driver_priv;
> -		spin_lock(&file->table_lock);
> -		idr_for_each(&file->object_idr, per_file_stats, &stats);
> -		spin_unlock(&file->table_lock);
> -		/*
> -		 * Although we have a valid reference on file->pid, that does
> -		 * not guarantee that the task_struct who called get_pid() is
> -		 * still alive (e.g. get_pid(current) => fork() => exit()).
> -		 * Therefore, we need to protect this ->comm access using RCU.
> -		 */
> -		request = list_first_entry_or_null(&file_priv->mm.request_list,
> -						   struct i915_request,
> -						   client_link);
> -		rcu_read_lock();
> -		task = pid_task(request && request->gem_context->pid ?
> -				request->gem_context->pid : file->pid,
> -				PIDTYPE_PID);
> -		print_file_stats(m, task ? task->comm : "<unknown>", stats);
> -		rcu_read_unlock();
> +	ret = mutex_lock_interruptible(&dev->struct_mutex);
> +	if (ret)
> +		return ret;
>  
> -		mutex_unlock(&dev->struct_mutex);
> -	}
> -	mutex_unlock(&dev->filelist_mutex);
> +	print_batch_pool_stats(m, dev_priv);
> +	print_context_stats(m, dev_priv);
> +	mutex_unlock(&dev->struct_mutex);
>  
>  	return 0;
>  }
> -- 
> 2.20.1
>
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/intel-gfx
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 111+ messages in thread

* ✗ Fi.CI.CHECKPATCH: warning for series starting with [01/46] drm/i915: Return immediately if trylock fails for direct-reclaim
  2019-01-07 11:54 [PATCH 01/46] drm/i915: Return immediately if trylock fails for direct-reclaim Chris Wilson
                   ` (44 preceding siblings ...)
  2019-01-07 11:55 ` [PATCH 46/46] drm/i915: Replace global breadcrumbs with per-context interrupt tracking Chris Wilson
@ 2019-01-07 12:45 ` Patchwork
  2019-01-07 13:02 ` ✗ Fi.CI.SPARSE: " Patchwork
                   ` (4 subsequent siblings)
  50 siblings, 0 replies; 111+ messages in thread
From: Patchwork @ 2019-01-07 12:45 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

== Series Details ==

Series: series starting with [01/46] drm/i915: Return immediately if trylock fails for direct-reclaim
URL   : https://patchwork.freedesktop.org/series/54803/
State : warning

== Summary ==

$ dim checkpatch origin/drm-tip
7b61c9e7b9d7 drm/i915: Return immediately if trylock fails for direct-reclaim
-:54: CHECK:UNCOMMENTED_DEFINITION: struct mutex definition without comment
#54: FILE: drivers/gpu/drm/i915/i915_drv.h:3191:
+				    struct mutex *mutex);

total: 0 errors, 0 warnings, 1 checks, 172 lines checked
15f754232c8b drm/i915: Report the number of closed vma held by each context in debugfs
-:68: WARNING:LONG_LINE: line over 100 characters
#68: FILE: drivers/gpu/drm/i915/i915_debugfs.c:348:
+		seq_printf(m, "%s: %lu objects, %llu bytes (%llu active, %llu inactive, %llu global, %llu shared, %llu unbound, %llu closed)\n", \

total: 0 errors, 1 warnings, 0 checks, 201 lines checked
44eb22b9d0e9 drm/i915: Track all held rpm wakerefs
-:106: CHECK:UNCOMMENTED_DEFINITION: spinlock_t definition without comment
#106: FILE: drivers/gpu/drm/i915/i915_drv.h:1162:
+	spinlock_t debug_lock;

total: 0 errors, 0 warnings, 1 checks, 571 lines checked
4474d8b3116c drm/i915: Markup paired operations on wakerefs
-:380: WARNING:NEW_TYPEDEFS: do not add new typedefs
#380: FILE: drivers/gpu/drm/i915/i915_drv.h:134:
+typedef depot_stack_handle_t intel_wakeref_t;

total: 0 errors, 1 warnings, 0 checks, 1168 lines checked
73bb87c62275 drm/i915: Track GT wakeref
ecc94ab152f0 drm/i915: Track the rpm wakerefs for error handling
51f4dc876674 drm/i915: Mark up sysfs with rpm wakeref tracking
bbadf9c61168 drm/i915: Mark up debugfs with rpm wakeref tracking
6f69f6cb282d drm/i915/perf: Track the rpm wakeref
4a6ad9c0bf6b drm/i915/pmu: Track rpm wakeref
27ac441fad2c drm/i915/guc: Track the rpm wakeref
31d63b175d79 drm/i915/gem: Track the rpm wakerefs
a92f6a6c3356 drm/i915/fb: Track rpm wakerefs
8cc18f52ef6e drm/i915/hotplug: Track temporary rpm wakeref
bb14613eca20 drm/i915/panel: Track temporary rpm wakeref
597c006df1dc drm/i915/selftests: Mark up rpm wakerefs
6f2468caa207 drm/i915: Syntatic sugar for using intel_runtime_pm
-:512: CHECK:MACRO_ARG_REUSE: Macro argument reuse 'i915' - possible side-effects?
#512: FILE: drivers/gpu/drm/i915/intel_drv.h:2190:
+#define with_intel_runtime_pm(i915, wf) \
+	for (wf = intel_runtime_pm_get(i915); wf; \
+	     intel_runtime_pm_put(i915, wf), wf = 0)

-:512: CHECK:MACRO_ARG_REUSE: Macro argument reuse 'wf' - possible side-effects?
#512: FILE: drivers/gpu/drm/i915/intel_drv.h:2190:
+#define with_intel_runtime_pm(i915, wf) \
+	for (wf = intel_runtime_pm_get(i915); wf; \
+	     intel_runtime_pm_put(i915, wf), wf = 0)

-:516: CHECK:MACRO_ARG_REUSE: Macro argument reuse 'i915' - possible side-effects?
#516: FILE: drivers/gpu/drm/i915/intel_drv.h:2194:
+#define with_intel_runtime_pm_if_in_use(i915, wf) \
+	for (wf = intel_runtime_pm_get_if_in_use(i915); wf; \
+	     intel_runtime_pm_put(i915, wf), wf = 0)

-:516: CHECK:MACRO_ARG_REUSE: Macro argument reuse 'wf' - possible side-effects?
#516: FILE: drivers/gpu/drm/i915/intel_drv.h:2194:
+#define with_intel_runtime_pm_if_in_use(i915, wf) \
+	for (wf = intel_runtime_pm_get_if_in_use(i915); wf; \
+	     intel_runtime_pm_put(i915, wf), wf = 0)

total: 0 errors, 0 warnings, 4 checks, 734 lines checked
b80423487b07 drm/i915: Markup paired operations on display power domains
e1b960f8dd38 drm/i915: Track the wakeref used to initialise display power domains
-:214: WARNING:LINE_SPACING: Missing a blank line after declarations
#214: FILE: drivers/gpu/drm/i915/intel_runtime_pm.c:4107:
+	struct i915_power_domains *power_domains = &i915->power_domains;
+	intel_wakeref_t wakeref __maybe_unused =

total: 0 errors, 1 warnings, 0 checks, 324 lines checked
3ca33907833e drm/i915: Combined gt.awake/gt.power wakerefs
2846424a5065 drm/i915/dp: Markup pps lock power well
-:58: CHECK:MACRO_ARG_REUSE: Macro argument reuse 'dp' - possible side-effects?
#58: FILE: drivers/gpu/drm/i915/intel_dp.c:635:
+#define with_pps_lock(dp, wf) \
+	for (wf = pps_lock(dp); wf; wf = pps_unlock(dp, wf))

-:58: CHECK:MACRO_ARG_REUSE: Macro argument reuse 'wf' - possible side-effects?
#58: FILE: drivers/gpu/drm/i915/intel_dp.c:635:
+#define with_pps_lock(dp, wf) \
+	for (wf = pps_lock(dp); wf; wf = pps_unlock(dp, wf))

total: 0 errors, 0 warnings, 2 checks, 430 lines checked
2cf475a0e278 drm/i915: Complain if hsw_get_pipe_config acquires the same power well twice
439540e3f554 drm/i915: Mark up Ironlake ips with rpm wakerefs
6888a1eda106 drm/i915: Serialise concurrent calls to i915_gem_set_wedged()
-:48: WARNING:MEMORY_BARRIER: memory barrier without comment
#48: FILE: drivers/gpu/drm/i915/i915_gem.c:3230:
+	smp_mb__before_atomic();

total: 0 errors, 1 warnings, 0 checks, 110 lines checked
c9872aff5d50 drm/i915: Differentiate between ggtt->mutex and ppgtt->mutex
015a2f8dbceb drm/i915: Pull all the reset functionality together into i915_reset.c
-:1089: WARNING:FILE_PATH_CHANGES: added, moved or deleted file(s), does MAINTAINERS need updating?
#1089: 
new file mode 100644

-:1094: WARNING:SPDX_LICENSE_TAG: Missing or malformed SPDX-License-Identifier tag in line 1
#1094: FILE: drivers/gpu/drm/i915/i915_reset.c:1:
+/*

-:1238: WARNING:TYPO_SPELLING: 'acknowledgement' may be misspelled - perhaps 'acknowledgment'?
#1238: FILE: drivers/gpu/drm/i915/i915_reset.c:145:
+	/* Assert reset for at least 20 usec, and wait for acknowledgement. */

-:1963: WARNING:MEMORY_BARRIER: memory barrier without comment
#1963: FILE: drivers/gpu/drm/i915/i915_reset.c:870:
+	smp_mb__before_atomic();

-:2262: WARNING:STATIC_CONST_CHAR_ARRAY: char * array declaration might be better as static const
#2262: FILE: drivers/gpu/drm/i915/i915_reset.c:1169:
+	char *error_event[] = { I915_ERROR_UEVENT "=1", NULL };

-:2263: WARNING:STATIC_CONST_CHAR_ARRAY: char * array declaration might be better as static const
#2263: FILE: drivers/gpu/drm/i915/i915_reset.c:1170:
+	char *reset_event[] = { I915_RESET_UEVENT "=1", NULL };

-:2264: WARNING:STATIC_CONST_CHAR_ARRAY: char * array declaration might be better as static const
#2264: FILE: drivers/gpu/drm/i915/i915_reset.c:1171:
+	char *reset_done_event[] = { I915_ERROR_UEVENT "=0", NULL };

-:2489: WARNING:SPDX_LICENSE_TAG: Missing or malformed SPDX-License-Identifier tag in line 1
#2489: FILE: drivers/gpu/drm/i915/i915_reset.h:1:
+/*

-:2539: CHECK:MACRO_ARG_REUSE: Macro argument reuse 'W' - possible side-effects?
#2539: FILE: drivers/gpu/drm/i915/i915_reset.h:51:
+#define i915_wedge_on_timeout(W, DEV, TIMEOUT)				\
+	for (__i915_init_wedge((W), (DEV), (TIMEOUT), __func__);	\
+	     (W)->i915;							\
+	     __i915_fini_wedge((W)))

total: 0 errors, 8 warnings, 1 checks, 3113 lines checked
0129900c3794 drm/i915: Make all GPU resets atomic
-:23: CHECK:USLEEP_RANGE: usleep_range is preferred over udelay; see Documentation/timers/timers-howto.txt
#23: FILE: drivers/gpu/drm/i915/i915_reset.c:147:
+	udelay(50);

-:29: CHECK:USLEEP_RANGE: usleep_range is preferred over udelay; see Documentation/timers/timers-howto.txt
#29: FILE: drivers/gpu/drm/i915/i915_reset.c:152:
+	udelay(50);

total: 0 errors, 0 warnings, 2 checks, 121 lines checked
d9c35bdc837f drm/i915/guc: Disable global reset
5872f352bbf4 drm/i915: Remove GPU reset dependence on struct_mutex
-:616: WARNING:MEMORY_BARRIER: memory barrier without comment
#616: FILE: drivers/gpu/drm/i915/i915_reset.c:692:
+	smp_store_mb(i915->gpu_error.restart, NULL);

-:769: WARNING:IF_0: Consider removing the code enclosed by this #if 0 and its #endif
#769: FILE: drivers/gpu/drm/i915/i915_reset.c:920:
+#if 0

total: 0 errors, 2 warnings, 0 checks, 1395 lines checked
5c6544073058 drm/i915: Issue engine resets onto idle engines
850cfae97d02 drm/i915: Stop tracking MRU activity on VMA
05f0e0d4f8c8 drm/i915: Pull VM lists under the VM mutex.
aaf60bafd95e drm/i915: Move vma lookup to its own lock
-:157: WARNING:USE_SPINLOCK_T: struct spinlock should be spinlock_t
#157: FILE: drivers/gpu/drm/i915/i915_gem_object.h:92:
+		struct spinlock lock;

total: 0 errors, 1 warnings, 0 checks, 284 lines checked
d7ed47a26a28 drm/i915: Move intel_execlists_show_requests() aside
710f39b1dd59 drm/i915: Use b->irq_enable() as predicate for mock engine
-:6: ERROR:GIT_COMMIT_ID: Please use git commit description style 'commit <12+ chars of sha1> ("<title line>")' - ie: 'commit d4ccceb05591 ("drm/i915/icl: Ringbuffer interrupt handling")'
#6: 
Since commit  d4ccceb05591 ("drm/i915/icl: Ringbuffer interrupt handling")

-:10: ERROR:GIT_COMMIT_ID: Please use git commit description style 'commit <12+ chars of sha1> ("<title line>")' - ie: 'commit d4ccceb05591 ("drm/i915/icl: Ringbuffer interrupt handling")'
#10: 
References: d4ccceb05591 ("drm/i915/icl: Ringbuffer interrupt handling")

total: 2 errors, 0 warnings, 0 checks, 111 lines checked
f100df244053 drm/i915/selftests: Allocate mock ring/timeline per context
898cc7add579 drm/i915/selftests: Make evict tolerant of foreign objects
bcbacd09560a drm/i915: Remove the intel_engine_notify tracepoint
7addb9fa85ee drm/i915: Always allocate an object/vma for the HWSP
7eaf41844056 drm/i915: Move list of timelines under its own lock
-:20: CHECK:UNCOMMENTED_DEFINITION: struct mutex definition without comment
#20: FILE: drivers/gpu/drm/i915/i915_drv.h:1961:
+		struct mutex timeline_lock;

total: 0 errors, 0 warnings, 1 checks, 314 lines checked
82cb670aee32 drm/i915: Introduce concept of per-timeline (context) HWSP
b74a0ab29880 drm/i915: Enlarge vma->pin_count
3a18270b30aa drm/i915: Allocate a status page for each timeline
caa054848443 drm/i915: Track the context's seqno in its own timeline HWSP
-:176: CHECK:LINE_SPACING: Please use a blank line after function/struct/union/enum declarations
#176: FILE: drivers/gpu/drm/i915/intel_lrc.c:2043:
 }
+static const int gen8_emit_breadcrumb_sz = 10 + WA_TAIL_DWORDS;

-:207: CHECK:LINE_SPACING: Please use a blank line after function/struct/union/enum declarations
#207: FILE: drivers/gpu/drm/i915/intel_lrc.c:2069:
 }
+static const int gen8_emit_breadcrumb_rcs_sz = 14 + WA_TAIL_DWORDS;

-:233: CHECK:LINE_SPACING: Please use a blank line after function/struct/union/enum declarations
#233: FILE: drivers/gpu/drm/i915/intel_ringbuffer.c:344:
 }
+static const int gen6_rcs_emit_breadcrumb_sz = 18;

-:256: CHECK:LINE_SPACING: Please use a blank line after function/struct/union/enum declarations
#256: FILE: drivers/gpu/drm/i915/intel_ringbuffer.c:451:
 }
+static const int gen7_rcs_emit_breadcrumb_sz = 10;

-:277: CHECK:LINE_SPACING: Please use a blank line after function/struct/union/enum declarations
#277: FILE: drivers/gpu/drm/i915/intel_ringbuffer.c:469:
 }
+static const int gen6_xcs_emit_breadcrumb_sz = 8;

-:305: CHECK:LINE_SPACING: Please use a blank line after function/struct/union/enum declarations
#305: FILE: drivers/gpu/drm/i915/intel_ringbuffer.c:499:
 }
+static const int gen7_xcs_emit_breadcrumb_sz = 10 + GEN7_XCS_WA * 3;

-:357: CHECK:LINE_SPACING: Please use a blank line after function/struct/union/enum declarations
#357: FILE: drivers/gpu/drm/i915/intel_ringbuffer.c:945:
 }
+static const int i9xx_emit_breadcrumb_sz = 8;

-:385: CHECK:LINE_SPACING: Please use a blank line after function/struct/union/enum declarations
#385: FILE: drivers/gpu/drm/i915/intel_ringbuffer.c:973:
 }
+static const int gen5_emit_breadcrumb_sz = GEN5_WA_STORES * 3 + 6;

total: 0 errors, 0 warnings, 8 checks, 402 lines checked
494f87700216 drm/i915: Identify active requests
-:67: CHECK:BOOL_MEMBER: Avoid using bool structure members because of possible alignment issues - see: https://lkml.org/lkml/2017/11/21/384
#67: FILE: drivers/gpu/drm/i915/i915_timeline.h:50:
+	bool has_initial_breadcrumb;

total: 0 errors, 0 warnings, 1 checks, 88 lines checked
0bf3389593c0 drm/i915: Replace global breadcrumbs with per-context interrupt tracking
-:18: ERROR:GIT_COMMIT_ID: Please use git commit description style 'commit <12+ chars of sha1> ("<title line>")' - ie: 'commit 688e6c725816 ("drm/i915: Slaughter the thundering i915_wait_request herd")'
#18: 
Before commit 688e6c725816, the solution was simple. Every client waking

-:21: ERROR:GIT_COMMIT_ID: Please use git commit description style 'commit <12+ chars of sha1> ("<title line>")' - ie: 'commit 688e6c725816 ("drm/i915: Slaughter the thundering i915_wait_request herd")'
#21: 
688e6c725816 introduced an rbtree so that only the earliest waiter on

-:49: WARNING:COMMIT_LOG_LONG_LINE: Possible unwrapped commit description (prefer a maximum 75 chars per line)
#49: 
References: 688e6c725816 ("drm/i915: Slaughter the thundering i915_wait_request herd")

-:49: ERROR:GIT_COMMIT_ID: Please use git commit description style 'commit <12+ chars of sha1> ("<title line>")' - ie: 'commit 688e6c725816 ("drm/i915: Slaughter the thundering i915_wait_request herd")'
#49: 
References: 688e6c725816 ("drm/i915: Slaughter the thundering i915_wait_request herd")

-:1868: CHECK:BOOL_MEMBER: Avoid using bool structure members because of possible alignment issues - see: https://lkml.org/lkml/2017/11/21/384
#1868: FILE: drivers/gpu/drm/i915/intel_ringbuffer.h:395:
+		bool irq_armed;

-:1869: CHECK:BOOL_MEMBER: Avoid using bool structure members because of possible alignment issues - see: https://lkml.org/lkml/2017/11/21/384
#1869: FILE: drivers/gpu/drm/i915/intel_ringbuffer.h:396:
+		bool irq_fired;

-:2009: WARNING:FUNCTION_ARGUMENTS: function definition argument 'struct i915_gem_context *' should also have an identifier name
#2009: FILE: drivers/gpu/drm/i915/selftests/i915_request.c:257:
+	struct i915_request *(*request_alloc)(struct i915_gem_context *,

-:2009: WARNING:FUNCTION_ARGUMENTS: function definition argument 'struct intel_engine_cs *' should also have an identifier name
#2009: FILE: drivers/gpu/drm/i915/selftests/i915_request.c:257:
+	struct i915_request *(*request_alloc)(struct i915_gem_context *,

-:2033: WARNING:LINE_SPACING: Missing a blank line after declarations
#2033: FILE: drivers/gpu/drm/i915/selftests/i915_request.c:281:
+	struct i915_request **requests;
+	I915_RND_STATE(prng);

-:2440: WARNING:FILE_PATH_CHANGES: added, moved or deleted file(s), does MAINTAINERS need updating?
#2440: 
deleted file mode 100644

total: 3 errors, 5 warnings, 2 checks, 2425 lines checked

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 111+ messages in thread

* ✗ Fi.CI.SPARSE: warning for series starting with [01/46] drm/i915: Return immediately if trylock fails for direct-reclaim
  2019-01-07 11:54 [PATCH 01/46] drm/i915: Return immediately if trylock fails for direct-reclaim Chris Wilson
                   ` (45 preceding siblings ...)
  2019-01-07 12:45 ` ✗ Fi.CI.CHECKPATCH: warning for series starting with [01/46] drm/i915: Return immediately if trylock fails for direct-reclaim Patchwork
@ 2019-01-07 13:02 ` Patchwork
  2019-01-07 13:05 ` ✓ Fi.CI.BAT: success " Patchwork
                   ` (3 subsequent siblings)
  50 siblings, 0 replies; 111+ messages in thread
From: Patchwork @ 2019-01-07 13:02 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

== Series Details ==

Series: series starting with [01/46] drm/i915: Return immediately if trylock fails for direct-reclaim
URL   : https://patchwork.freedesktop.org/series/54803/
State : warning

== Summary ==

$ dim sparse origin/drm-tip
Sparse version: v0.5.2
Commit: drm/i915: Return immediately if trylock fails for direct-reclaim
-drivers/gpu/drm/i915/selftests/../i915_drv.h:3545:16: warning: expression using sizeof(void)
+drivers/gpu/drm/i915/selftests/../i915_drv.h:3546:16: warning: expression using sizeof(void)

Commit: drm/i915: Report the number of closed vma held by each context in debugfs
Okay!

Commit: drm/i915: Track all held rpm wakerefs
-drivers/gpu/drm/i915/selftests/../i915_drv.h:3546:16: warning: expression using sizeof(void)
+drivers/gpu/drm/i915/selftests/../i915_drv.h:3553:16: warning: expression using sizeof(void)

Commit: drm/i915: Markup paired operations on wakerefs
-drivers/gpu/drm/i915/selftests/../i915_drv.h:3553:16: warning: expression using sizeof(void)
+drivers/gpu/drm/i915/selftests/../i915_drv.h:3555:16: warning: expression using sizeof(void)

Commit: drm/i915: Track GT wakeref
Okay!

Commit: drm/i915: Track the rpm wakerefs for error handling
Okay!

Commit: drm/i915: Mark up sysfs with rpm wakeref tracking
Okay!

Commit: drm/i915: Mark up debugfs with rpm wakeref tracking
Okay!

Commit: drm/i915/perf: Track the rpm wakeref
-drivers/gpu/drm/i915/selftests/../i915_drv.h:3555:16: warning: expression using sizeof(void)
+drivers/gpu/drm/i915/selftests/../i915_drv.h:3557:16: warning: expression using sizeof(void)

Commit: drm/i915/pmu: Track rpm wakeref
Okay!

Commit: drm/i915/guc: Track the rpm wakeref
Okay!

Commit: drm/i915/gem: Track the rpm wakerefs
Okay!

Commit: drm/i915/fb: Track rpm wakerefs
Okay!

Commit: drm/i915/hotplug: Track temporary rpm wakeref
Okay!

Commit: drm/i915/panel: Track temporary rpm wakeref
Okay!

Commit: drm/i915/selftests: Mark up rpm wakerefs
Okay!

Commit: drm/i915: Syntatic sugar for using intel_runtime_pm
Okay!

Commit: drm/i915: Markup paired operations on display power domains
-drivers/gpu/drm/i915/selftests/../i915_drv.h:3557:16: warning: expression using sizeof(void)
+drivers/gpu/drm/i915/selftests/../i915_drv.h:3559:16: warning: expression using sizeof(void)

Commit: drm/i915: Track the wakeref used to initialise display power domains
-drivers/gpu/drm/i915/selftests/../i915_drv.h:3559:16: warning: expression using sizeof(void)
+drivers/gpu/drm/i915/selftests/../i915_drv.h:3561:16: warning: expression using sizeof(void)

Commit: drm/i915: Combined gt.awake/gt.power wakerefs
-drivers/gpu/drm/i915/selftests/../i915_drv.h:3561:16: warning: expression using sizeof(void)
+drivers/gpu/drm/i915/selftests/../i915_drv.h:3560:16: warning: expression using sizeof(void)

Commit: drm/i915/dp: Markup pps lock power well
Okay!

Commit: drm/i915: Complain if hsw_get_pipe_config acquires the same power well twice
Okay!

Commit: drm/i915: Mark up Ironlake ips with rpm wakerefs
Okay!

Commit: drm/i915: Serialise concurrent calls to i915_gem_set_wedged()
Okay!

Commit: drm/i915: Differentiate between ggtt->mutex and ppgtt->mutex
Okay!

Commit: drm/i915: Pull all the reset functionality together into i915_reset.c
-drivers/gpu/drm/i915/selftests/../i915_drv.h:3560:16: warning: expression using sizeof(void)
+drivers/gpu/drm/i915/selftests/../i915_drv.h:3529:16: warning: expression using sizeof(void)
+./include/uapi/linux/perf_event.h:147:56: warning: cast truncates bits from constant value (8000000000000000 becomes 0)

Commit: drm/i915: Make all GPU resets atomic
Okay!

Commit: drm/i915/guc: Disable global reset
Okay!

Commit: drm/i915: Remove GPU reset dependence on struct_mutex
-drivers/gpu/drm/i915/selftests/../i915_drv.h:3529:16: warning: expression using sizeof(void)
+drivers/gpu/drm/i915/selftests/../i915_drv.h:3524:16: warning: expression using sizeof(void)

Commit: drm/i915: Issue engine resets onto idle engines
Okay!

Commit: drm/i915: Stop tracking MRU activity on VMA
Okay!

Commit: drm/i915: Pull VM lists under the VM mutex.
Okay!

Commit: drm/i915: Move vma lookup to its own lock
Okay!

Commit: drm/i915: Move intel_execlists_show_requests() aside
Okay!

Commit: drm/i915: Use b->irq_enable() as predicate for mock engine
Okay!

Commit: drm/i915/selftests: Allocate mock ring/timeline per context
Okay!

Commit: drm/i915/selftests: Make evict tolerant of foreign objects
Okay!

Commit: drm/i915: Remove the intel_engine_notify tracepoint
Okay!

Commit: drm/i915: Always allocate an object/vma for the HWSP
Okay!

Commit: drm/i915: Move list of timelines under its own lock
-drivers/gpu/drm/i915/selftests/../i915_drv.h:3524:16: warning: expression using sizeof(void)
+drivers/gpu/drm/i915/selftests/../i915_drv.h:3525:16: warning: expression using sizeof(void)

Commit: drm/i915: Introduce concept of per-timeline (context) HWSP
Okay!

Commit: drm/i915: Enlarge vma->pin_count
Okay!

Commit: drm/i915: Allocate a status page for each timeline
-drivers/gpu/drm/i915/selftests/../i915_drv.h:3525:16: warning: expression using sizeof(void)
+drivers/gpu/drm/i915/selftests/../i915_drv.h:3529:16: warning: expression using sizeof(void)
+./include/linux/mm.h:619:13: error: not a function <noident>
+./include/linux/mm.h:619:13: error: undefined identifier '__builtin_mul_overflow'
+./include/linux/mm.h:619:13: warning: call with no type!

Commit: drm/i915: Track the context's seqno in its own timeline HWSP
Okay!

Commit: drm/i915: Identify active requests
Okay!

Commit: drm/i915: Replace global breadcrumbs with per-context interrupt tracking
+drivers/gpu/drm/i915/selftests/i915_request.c:283:40: warning: expression using sizeof(void)
+drivers/gpu/drm/i915/selftests/i915_request.c:283:40: warning: expression using sizeof(void)
-./include/linux/mm.h:619:13: error: not a function <noident>
-./include/linux/mm.h:619:13: error: not a function <noident>
-./include/linux/mm.h:619:13: error: undefined identifier '__builtin_mul_overflow'
-./include/linux/mm.h:619:13: warning: call with no type!
+./include/linux/slab.h:664:13: error: not a function <noident>
+./include/linux/slab.h:664:13: error: not a function <noident>

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 111+ messages in thread

* ✓ Fi.CI.BAT: success for series starting with [01/46] drm/i915: Return immediately if trylock fails for direct-reclaim
  2019-01-07 11:54 [PATCH 01/46] drm/i915: Return immediately if trylock fails for direct-reclaim Chris Wilson
                   ` (46 preceding siblings ...)
  2019-01-07 13:02 ` ✗ Fi.CI.SPARSE: " Patchwork
@ 2019-01-07 13:05 ` Patchwork
  2019-01-07 14:00 ` [PATCH 01/46] " Tvrtko Ursulin
                   ` (2 subsequent siblings)
  50 siblings, 0 replies; 111+ messages in thread
From: Patchwork @ 2019-01-07 13:05 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

== Series Details ==

Series: series starting with [01/46] drm/i915: Return immediately if trylock fails for direct-reclaim
URL   : https://patchwork.freedesktop.org/series/54803/
State : success

== Summary ==

CI Bug Log - changes from CI_DRM_5368 -> Patchwork_11200
====================================================

Summary
-------

  **WARNING**

  Minor unknown changes coming with Patchwork_11200 need to be verified
  manually.
  
  If you think the reported changes have nothing to do with the changes
  introduced in Patchwork_11200, please notify your bug team to allow them
  to document this new failure mode, which will reduce false positives in CI.

  External URL: https://patchwork.freedesktop.org/api/1.0/series/54803/revisions/1/mbox/

Possible new issues
-------------------

  Here are the unknown changes that may have been introduced in Patchwork_11200:

### IGT changes ###

#### Warnings ####

  * igt@kms_pipe_crc_basic@hang-read-crc-pipe-b:
    - fi-skl-guc:         PASS -> SKIP +2
    - fi-apl-guc:         PASS -> SKIP +2
    - fi-cfl-guc:         PASS -> SKIP +2

  * igt@kms_pipe_crc_basic@nonblocking-crc-pipe-c:
    - fi-kbl-7567u:       SKIP -> PASS +33

  * igt@pm_rpm@basic-pci-d3-state:
    - fi-bsw-kefka:       PASS -> SKIP

  
Known issues
------------

  Here are the changes found in Patchwork_11200 that come from known issues:

### IGT changes ###

#### Issues hit ####

  * igt@i915_selftest@live_contexts:
    - fi-icl-u3:          NOTRUN -> DMESG-FAIL [fdo#108569]

  * igt@i915_selftest@live_execlists:
    - fi-apl-guc:         PASS -> INCOMPLETE [fdo#103927]

  * igt@kms_busy@basic-flip-b:
    - fi-gdg-551:         PASS -> FAIL [fdo#103182]

  * igt@kms_chamelium@hdmi-hpd-fast:
    - fi-kbl-7500u:       PASS -> FAIL [fdo#108767]

  * igt@kms_pipe_crc_basic@nonblocking-crc-pipe-a:
    - fi-byt-clapper:     PASS -> FAIL [fdo#107362]

  * igt@kms_pipe_crc_basic@nonblocking-crc-pipe-a-frame-sequence:
    - fi-byt-clapper:     PASS -> FAIL [fdo#103191] / [fdo#107362]

  * igt@pm_rpm@basic-rte:
    - fi-bsw-kefka:       PASS -> FAIL [fdo#108800]

  
#### Possible fixes ####

  * igt@pm_rpm@module-reload:
    - fi-icl-u2:          DMESG-WARN [fdo#108654] -> PASS

  
  {name}: This element is suppressed. This means it is ignored when computing
          the status of the difference (SUCCESS, WARNING, or FAILURE).

  [fdo#103182]: https://bugs.freedesktop.org/show_bug.cgi?id=103182
  [fdo#103191]: https://bugs.freedesktop.org/show_bug.cgi?id=103191
  [fdo#103927]: https://bugs.freedesktop.org/show_bug.cgi?id=103927
  [fdo#107362]: https://bugs.freedesktop.org/show_bug.cgi?id=107362
  [fdo#108569]: https://bugs.freedesktop.org/show_bug.cgi?id=108569
  [fdo#108622]: https://bugs.freedesktop.org/show_bug.cgi?id=108622
  [fdo#108654]: https://bugs.freedesktop.org/show_bug.cgi?id=108654
  [fdo#108767]: https://bugs.freedesktop.org/show_bug.cgi?id=108767
  [fdo#108800]: https://bugs.freedesktop.org/show_bug.cgi?id=108800
  [fdo#108915]: https://bugs.freedesktop.org/show_bug.cgi?id=108915


Participating hosts (47 -> 44)
------------------------------

  Additional (3): fi-icl-y fi-byt-j1900 fi-icl-u3 
  Missing    (6): fi-kbl-soraka fi-hsw-4770r fi-ilk-m540 fi-byt-squawks fi-bsw-cyan fi-ctg-p8600 


Build changes
-------------

    * Linux: CI_DRM_5368 -> Patchwork_11200

  CI_DRM_5368: 64bd30ea3ce0edd057a5b393569947a955472757 @ git://anongit.freedesktop.org/gfx-ci/linux
  IGT_4756: 75081c6bfb9998bd7cbf35a7ac0578c683fe55a8 @ git://anongit.freedesktop.org/xorg/app/intel-gpu-tools
  Patchwork_11200: 0bf3389593c055c874799a4cd89caa7cfaa86927 @ git://anongit.freedesktop.org/gfx-ci/linux


== Linux commits ==

0bf3389593c0 drm/i915: Replace global breadcrumbs with per-context interrupt tracking
494f87700216 drm/i915: Identify active requests
caa054848443 drm/i915: Track the context's seqno in its own timeline HWSP
3a18270b30aa drm/i915: Allocate a status page for each timeline
b74a0ab29880 drm/i915: Enlarge vma->pin_count
82cb670aee32 drm/i915: Introduce concept of per-timeline (context) HWSP
7eaf41844056 drm/i915: Move list of timelines under its own lock
7addb9fa85ee drm/i915: Always allocate an object/vma for the HWSP
bcbacd09560a drm/i915: Remove the intel_engine_notify tracepoint
898cc7add579 drm/i915/selftests: Make evict tolerant of foreign objects
f100df244053 drm/i915/selftests: Allocate mock ring/timeline per context
710f39b1dd59 drm/i915: Use b->irq_enable() as predicate for mock engine
d7ed47a26a28 drm/i915: Move intel_execlists_show_requests() aside
aaf60bafd95e drm/i915: Move vma lookup to its own lock
05f0e0d4f8c8 drm/i915: Pull VM lists under the VM mutex.
850cfae97d02 drm/i915: Stop tracking MRU activity on VMA
5c6544073058 drm/i915: Issue engine resets onto idle engines
5872f352bbf4 drm/i915: Remove GPU reset dependence on struct_mutex
d9c35bdc837f drm/i915/guc: Disable global reset
0129900c3794 drm/i915: Make all GPU resets atomic
015a2f8dbceb drm/i915: Pull all the reset functionality together into i915_reset.c
c9872aff5d50 drm/i915: Differentiate between ggtt->mutex and ppgtt->mutex
6888a1eda106 drm/i915: Serialise concurrent calls to i915_gem_set_wedged()
439540e3f554 drm/i915: Mark up Ironlake ips with rpm wakerefs
2cf475a0e278 drm/i915: Complain if hsw_get_pipe_config acquires the same power well twice
2846424a5065 drm/i915/dp: Markup pps lock power well
3ca33907833e drm/i915: Combined gt.awake/gt.power wakerefs
e1b960f8dd38 drm/i915: Track the wakeref used to initialise display power domains
b80423487b07 drm/i915: Markup paired operations on display power domains
6f2468caa207 drm/i915: Syntatic sugar for using intel_runtime_pm
597c006df1dc drm/i915/selftests: Mark up rpm wakerefs
bb14613eca20 drm/i915/panel: Track temporary rpm wakeref
8cc18f52ef6e drm/i915/hotplug: Track temporary rpm wakeref
a92f6a6c3356 drm/i915/fb: Track rpm wakerefs
31d63b175d79 drm/i915/gem: Track the rpm wakerefs
27ac441fad2c drm/i915/guc: Track the rpm wakeref
4a6ad9c0bf6b drm/i915/pmu: Track rpm wakeref
6f69f6cb282d drm/i915/perf: Track the rpm wakeref
bbadf9c61168 drm/i915: Mark up debugfs with rpm wakeref tracking
51f4dc876674 drm/i915: Mark up sysfs with rpm wakeref tracking
ecc94ab152f0 drm/i915: Track the rpm wakerefs for error handling
73bb87c62275 drm/i915: Track GT wakeref
4474d8b3116c drm/i915: Markup paired operations on wakerefs
44eb22b9d0e9 drm/i915: Track all held rpm wakerefs
15f754232c8b drm/i915: Report the number of closed vma held by each context in debugfs
7b61c9e7b9d7 drm/i915: Return immediately if trylock fails for direct-reclaim

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_11200/
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 111+ messages in thread

* Re: [PATCH 03/46] drm/i915: Track all held rpm wakerefs
  2019-01-07 11:54 ` [PATCH 03/46] drm/i915: Track all held rpm wakerefs Chris Wilson
@ 2019-01-07 13:14   ` Mika Kuoppala
  2019-01-07 13:22     ` Chris Wilson
  2019-01-08 11:45   ` [PATCH v2] " Chris Wilson
  2019-01-08 12:22   ` [PATCH v3] " Chris Wilson
  2 siblings, 1 reply; 111+ messages in thread
From: Mika Kuoppala @ 2019-01-07 13:14 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx; +Cc: Jani Nikula

Chris Wilson <chris@chris-wilson.co.uk> writes:

> Everytime we take a wakeref, record the stack trace of where it was
> taken; clearing the set if we ever drop back to no owners. For debugging
> a rpm leak, we can look at all the current wakerefs and check if they
> have a matching rpm_put.
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Jani Nikula <jani.nikula@intel.com>
> ---
>  drivers/gpu/drm/i915/Kconfig.debug            |   2 +-
>  drivers/gpu/drm/i915/i915_debugfs.c           |   6 +
>  drivers/gpu/drm/i915/i915_drv.c               |   8 +-
>  drivers/gpu/drm/i915/i915_drv.h               |   7 +
>  drivers/gpu/drm/i915/intel_drv.h              |  44 ++-
>  drivers/gpu/drm/i915/intel_runtime_pm.c       | 267 ++++++++++++++++--
>  .../gpu/drm/i915/selftests/mock_gem_device.c  |   8 +-
>  7 files changed, 292 insertions(+), 50 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/Kconfig.debug b/drivers/gpu/drm/i915/Kconfig.debug
> index 9e36ffb5eb7c..a97929c47466 100644
> --- a/drivers/gpu/drm/i915/Kconfig.debug
> +++ b/drivers/gpu/drm/i915/Kconfig.debug
> @@ -21,11 +21,11 @@ config DRM_I915_DEBUG
>          select DEBUG_FS
>          select PREEMPT_COUNT
>          select I2C_CHARDEV
> +        select STACKDEPOT
>          select DRM_DP_AUX_CHARDEV
>          select X86_MSR # used by igt/pm_rpm
>          select DRM_VGEM # used by igt/prime_vgem (dmabuf interop checks)
>          select DRM_DEBUG_MM if DRM=y
> -        select STACKDEPOT if DRM=y # for DRM_DEBUG_MM
>  	select DRM_DEBUG_SELFTEST
>  	select SW_SYNC # signaling validation framework (igt/syncobj*)
>  	select DRM_I915_SW_FENCE_DEBUG_OBJECTS
> diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
> index c77326a7d058..3a369245d7e6 100644
> --- a/drivers/gpu/drm/i915/i915_debugfs.c
> +++ b/drivers/gpu/drm/i915/i915_debugfs.c
> @@ -2702,6 +2702,12 @@ static int i915_runtime_pm_status(struct seq_file *m, void *unused)
>  		   pci_power_name(pdev->current_state),
>  		   pdev->current_state);
>  
> +	if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_RUNTIME_PM)) {
> +		struct drm_printer p = drm_seq_file_printer(m);
> +
> +		print_intel_runtime_pm_wakeref(dev_priv, &p);
> +	}
> +
>  	return 0;
>  }
>  
> diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
> index 17fca3ba343e..e2f4753ca21f 100644
> --- a/drivers/gpu/drm/i915/i915_drv.c
> +++ b/drivers/gpu/drm/i915/i915_drv.c
> @@ -906,6 +906,7 @@ static int i915_driver_init_early(struct drm_i915_private *dev_priv)
>  	mutex_init(&dev_priv->pps_mutex);
>  
>  	i915_memcpy_init_early(dev_priv);
> +	intel_runtime_pm_init_early(dev_priv);
>  
>  	ret = i915_workqueues_init(dev_priv);
>  	if (ret < 0)
> @@ -1808,8 +1809,7 @@ void i915_driver_unload(struct drm_device *dev)
>  	i915_driver_cleanup_mmio(dev_priv);
>  
>  	enable_rpm_wakeref_asserts(dev_priv);
> -
> -	WARN_ON(atomic_read(&dev_priv->runtime_pm.wakeref_count));
> +	intel_runtime_pm_cleanup(dev_priv);
>  }
>  
>  static void i915_driver_release(struct drm_device *dev)
> @@ -2011,6 +2011,8 @@ static int i915_drm_suspend_late(struct drm_device *dev, bool hibernation)
>  
>  out:
>  	enable_rpm_wakeref_asserts(dev_priv);
> +	if (!dev_priv->uncore.user_forcewake.count)
> +		intel_runtime_pm_cleanup(dev_priv);
>

Why would we have forcewake active in here?

Are you planning on extending the intel_runtime_pm_cleanup?
Atleast in the callsite 'intel_runtime_assert_no_wakerefs' would
make more sense.

>  	return ret;
>  }
> @@ -2966,7 +2968,7 @@ static int intel_runtime_suspend(struct device *kdev)
>  	}
>  
>  	enable_rpm_wakeref_asserts(dev_priv);
> -	WARN_ON_ONCE(atomic_read(&dev_priv->runtime_pm.wakeref_count));
> +	intel_runtime_pm_cleanup(dev_priv);
>  
>  	if (intel_uncore_arm_unclaimed_mmio_detection(dev_priv))
>  		DRM_ERROR("Unclaimed access detected prior to suspending\n");
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 17a017645c5d..60b98103aba3 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -45,6 +45,7 @@
>  #include <linux/pm_qos.h>
>  #include <linux/reservation.h>
>  #include <linux/shmem_fs.h>
> +#include <linux/stackdepot.h>
>  
>  #include <drm/drmP.h>
>  #include <drm/intel-gtt.h>
> @@ -1156,6 +1157,12 @@ struct i915_runtime_pm {
>  	atomic_t wakeref_count;
>  	bool suspended;
>  	bool irqs_enabled;
> +
> +#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_RUNTIME_PM)
> +	spinlock_t debug_lock;
> +	depot_stack_handle_t *debug_owners;
> +	unsigned long debug_count;
> +#endif
>  };
>  
>  enum intel_pipe_crc_source {
> diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h
> index 1a11c2beb7f3..ac513fd70315 100644
> --- a/drivers/gpu/drm/i915/intel_drv.h
> +++ b/drivers/gpu/drm/i915/intel_drv.h
> @@ -41,6 +41,8 @@
>  #include <drm/drm_atomic.h>
>  #include <media/cec-notifier.h>
>  
> +struct drm_printer;
> +
>  /**
>   * __wait_for - magic wait macro
>   *
> @@ -2084,6 +2086,7 @@ bool intel_psr_enabled(struct intel_dp *intel_dp);
>  void intel_init_quirks(struct drm_i915_private *dev_priv);
>  
>  /* intel_runtime_pm.c */
> +void intel_runtime_pm_init_early(struct drm_i915_private *dev_priv);
>  int intel_power_domains_init(struct drm_i915_private *);
>  void intel_power_domains_cleanup(struct drm_i915_private *dev_priv);
>  void intel_power_domains_init_hw(struct drm_i915_private *dev_priv, bool resume);
> @@ -2106,6 +2109,7 @@ void bxt_display_core_init(struct drm_i915_private *dev_priv, bool resume);
>  void bxt_display_core_uninit(struct drm_i915_private *dev_priv);
>  void intel_runtime_pm_enable(struct drm_i915_private *dev_priv);
>  void intel_runtime_pm_disable(struct drm_i915_private *dev_priv);
> +void intel_runtime_pm_cleanup(struct drm_i915_private *dev_priv);
>  const char *
>  intel_display_power_domain_str(enum intel_display_power_domain domain);
>  
> @@ -2123,23 +2127,23 @@ void icl_dbuf_slices_update(struct drm_i915_private *dev_priv,
>  			    u8 req_slices);
>  
>  static inline void
> -assert_rpm_device_not_suspended(struct drm_i915_private *dev_priv)
> +assert_rpm_device_not_suspended(struct drm_i915_private *i915)
>  {
> -	WARN_ONCE(dev_priv->runtime_pm.suspended,
> +	WARN_ONCE(i915->runtime_pm.suspended,
>  		  "Device suspended during HW access\n");
>  }
>  
>  static inline void
> -assert_rpm_wakelock_held(struct drm_i915_private *dev_priv)
> +assert_rpm_wakelock_held(struct drm_i915_private *i915)
>  {
> -	assert_rpm_device_not_suspended(dev_priv);
> -	WARN_ONCE(!atomic_read(&dev_priv->runtime_pm.wakeref_count),
> +	assert_rpm_device_not_suspended(i915);
> +	WARN_ONCE(!atomic_read(&i915->runtime_pm.wakeref_count),
>  		  "RPM wakelock ref not held during HW access");
>  }
>  
>  /**
>   * disable_rpm_wakeref_asserts - disable the RPM assert checks
> - * @dev_priv: i915 device instance
> + * @i915: i915 device instance
>   *
>   * This function disable asserts that check if we hold an RPM wakelock
>   * reference, while keeping the device-not-suspended checks still enabled.
> @@ -2156,14 +2160,14 @@ assert_rpm_wakelock_held(struct drm_i915_private *dev_priv)
>   * enable_rpm_wakeref_asserts().
>   */
>  static inline void
> -disable_rpm_wakeref_asserts(struct drm_i915_private *dev_priv)
> +disable_rpm_wakeref_asserts(struct drm_i915_private *i915)
>  {
> -	atomic_inc(&dev_priv->runtime_pm.wakeref_count);
> +	atomic_inc(&i915->runtime_pm.wakeref_count);
>  }
>  
>  /**
>   * enable_rpm_wakeref_asserts - re-enable the RPM assert checks
> - * @dev_priv: i915 device instance
> + * @i915: i915 device instance
>   *
>   * This function re-enables the RPM assert checks after disabling them with
>   * disable_rpm_wakeref_asserts. It's meant to be used only in special
> @@ -2173,15 +2177,25 @@ disable_rpm_wakeref_asserts(struct drm_i915_private *dev_priv)
>   * disable_rpm_wakeref_asserts().
>   */
>  static inline void
> -enable_rpm_wakeref_asserts(struct drm_i915_private *dev_priv)
> +enable_rpm_wakeref_asserts(struct drm_i915_private *i915)
>  {
> -	atomic_dec(&dev_priv->runtime_pm.wakeref_count);
> +	atomic_dec(&i915->runtime_pm.wakeref_count);
>  }
>  
> -void intel_runtime_pm_get(struct drm_i915_private *dev_priv);
> -bool intel_runtime_pm_get_if_in_use(struct drm_i915_private *dev_priv);
> -void intel_runtime_pm_get_noresume(struct drm_i915_private *dev_priv);
> -void intel_runtime_pm_put(struct drm_i915_private *dev_priv);
> +void intel_runtime_pm_get(struct drm_i915_private *i915);
> +bool intel_runtime_pm_get_if_in_use(struct drm_i915_private *i915);
> +void intel_runtime_pm_get_noresume(struct drm_i915_private *i915);
> +void intel_runtime_pm_put(struct drm_i915_private *i915);
> +
> +#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_RUNTIME_PM)
> +void print_intel_runtime_pm_wakeref(struct drm_i915_private *i915,
> +				    struct drm_printer *p);
> +#else
> +static inline void print_intel_runtime_pm_wakeref(struct drm_i915_private *i915,
> +						  struct drm_printer *p)
> +{
> +}
> +#endif

Plural 'wakerefs' would read better for me.

>  
>  void chv_phy_powergate_lanes(struct intel_encoder *encoder,
>  			     bool override, unsigned int mask);
> diff --git a/drivers/gpu/drm/i915/intel_runtime_pm.c b/drivers/gpu/drm/i915/intel_runtime_pm.c
> index 9e9501f82f06..67d71cc604f1 100644
> --- a/drivers/gpu/drm/i915/intel_runtime_pm.c
> +++ b/drivers/gpu/drm/i915/intel_runtime_pm.c
> @@ -29,6 +29,8 @@
>  #include <linux/pm_runtime.h>
>  #include <linux/vgaarb.h>
>  
> +#include <drm/drm_print.h>
> +
>  #include "i915_drv.h"
>  #include "intel_drv.h"
>  
> @@ -49,6 +51,189 @@
>   * present for a given platform.
>   */
>  
> +#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_RUNTIME_PM)
> +
> +#include <linux/sort.h>
> +
> +#define STACKDEPTH 8
> +
> +static void init_intel_runtime_pm_wakeref(struct drm_i915_private *i915)
> +{
> +	spin_lock_init(&i915->runtime_pm.debug_lock);
> +}
> +
> +static noinline void
> +track_intel_runtime_pm_wakeref(struct drm_i915_private *i915)
> +{
> +	struct i915_runtime_pm *rpm = &i915->runtime_pm;
> +	unsigned long entries[STACKDEPTH];
> +	struct stack_trace trace = {
> +		.entries = entries,
> +		.max_entries = ARRAY_SIZE(entries),
> +		.skip = 1
> +	};
> +	unsigned long flags;
> +	depot_stack_handle_t stack, *stacks;
> +
> +	if (!HAS_RUNTIME_PM(i915))
> +		return;
> +
> +	save_stack_trace(&trace);
> +	if (trace.nr_entries &&
> +	    trace.entries[trace.nr_entries - 1] == ULONG_MAX)
> +		trace.nr_entries--;
> +

What is this special handling for?

> +	stack = depot_save_stack(&trace, GFP_NOWAIT | __GFP_NOWARN);
> +	if (!stack)
> +		return;
> +
> +	spin_lock_irqsave(&rpm->debug_lock, flags);
> +	stacks = krealloc(rpm->debug_owners,
> +			  (rpm->debug_count + 1) * sizeof(*stacks),
> +			  GFP_NOWAIT | __GFP_NOWARN);
> +	if (stacks) {
> +		stacks[rpm->debug_count++] = stack;
> +		rpm->debug_owners = stacks;
> +	}
> +	spin_unlock_irqrestore(&rpm->debug_lock, flags);
> +}
> +
> +static void untrack_intel_runtime_pm_wakeref(struct drm_i915_private *i915)
> +{
> +	struct i915_runtime_pm *rpm = &i915->runtime_pm;
> +	depot_stack_handle_t *stacks;
> +	unsigned long flags;
> +
> +	spin_lock_irqsave(&rpm->debug_lock, flags);
> +	stacks = fetch_and_zero(&rpm->debug_owners);
> +	rpm->debug_count = 0;
> +	spin_unlock_irqrestore(&rpm->debug_lock, flags);
> +
> +	kfree(stacks);
> +}
> +
> +static int cmphandle(const void *_a, const void *_b)
> +{
> +	const depot_stack_handle_t * const a = _a, * const b = _b;
> +
> +	if (*a < *b)
> +		return -1;
> +	else if (*a > *b)
> +		return 1;
> +	else
> +		return 0;
> +}
> +
> +static void __print_intel_runtime_pm_wakeref(struct drm_printer *p,
> +					     depot_stack_handle_t *stacks,
> +					     unsigned long count)
> +{
> +	unsigned long entries[STACKDEPTH];
> +	unsigned long i;
> +	char *buf;
> +
> +	drm_printf(p, "Wakeref count: %lu\n", count);
> +
> +	buf = kmalloc(PAGE_SIZE, GFP_KERNEL);
> +	if (!buf)
> +		return;
> +
> +	sort(stacks, count, sizeof(*stacks), cmphandle, NULL);

Is the sorting only for nicer output?

> +
> +	for (i = 0; i < count; i++) {
> +		struct stack_trace trace = {
> +			.entries = entries,
> +			.max_entries = ARRAY_SIZE(entries),
> +		};
> +		depot_stack_handle_t stack = stacks[i];
> +		unsigned long rep;
> +
> +		rep = 1;
> +		while (i + 1 < count && stacks[i + 1] == stack)
> +			rep++, i++;
> +		depot_fetch_stack(stack, &trace);
> +		snprint_stack_trace(buf, PAGE_SIZE, &trace, 2);
> +		drm_printf(p, "Wakeref x%lu taken at:\n%s", rep, buf);
> +	}
> +
> +	kfree(buf);
> +}
> +
> +static void show_intel_runtime_pm_wakeref(struct drm_i915_private *i915)
> +{

s/wakeref/wakerefs?

-Mika

> +	struct i915_runtime_pm *rpm = &i915->runtime_pm;
> +	depot_stack_handle_t *stacks;
> +	unsigned long flags, count;
> +	struct drm_printer p;
> +
> +	spin_lock_irqsave(&rpm->debug_lock, flags);
> +	stacks = fetch_and_zero(&rpm->debug_owners);
> +	count = fetch_and_zero(&rpm->debug_count);
> +	spin_unlock_irqrestore(&rpm->debug_lock, flags);
> +	if (!count)
> +		return;
> +
> +	p = drm_debug_printer("i915");
> +	__print_intel_runtime_pm_wakeref(&p, stacks, count);
> +
> +	kfree(stacks);
> +}
> +
> +void print_intel_runtime_pm_wakeref(struct drm_i915_private *i915,
> +				    struct drm_printer *p)
> +{
> +	depot_stack_handle_t *stacks = NULL;
> +	unsigned long count = 0;
> +
> +	do {
> +		struct i915_runtime_pm *rpm = &i915->runtime_pm;
> +		unsigned long alloc = count;
> +		depot_stack_handle_t *s;
> +
> +		spin_lock_irq(&rpm->debug_lock);
> +		count = rpm->debug_count;
> +		if (count == alloc) {
> +			memcpy(stacks,
> +			       rpm->debug_owners,
> +			       count * sizeof(*stacks));
> +		}
> +		spin_unlock_irq(&rpm->debug_lock);
> +		if (count == alloc)
> +			break;
> +
> +		s = krealloc(stacks, count * sizeof(*stacks), GFP_KERNEL);
> +		if (!s)
> +			goto out;
> +
> +		stacks = s;
> +	} while (1);
> +
> +	__print_intel_runtime_pm_wakeref(p, stacks, count);
> +
> +out:
> +	kfree(stacks);
> +}
> +
> +#else
> +
> +static void init_intel_runtime_pm_wakeref(struct drm_i915_private *i915)
> +{
> +}
> +
> +static void track_intel_runtime_pm_wakeref(struct drm_i915_private *i915)
> +{
> +}
> +
> +static void untrack_intel_runtime_pm_wakeref(struct drm_i915_private *i915)
> +{
> +}
> +
> +static void show_intel_runtime_pm_wakeref(struct drm_i915_private *i915)
> +{
> +}
> +
> +#endif
> +
>  bool intel_display_power_well_is_enabled(struct drm_i915_private *dev_priv,
>  					 enum i915_power_well_id power_well_id);
>  
> @@ -3986,7 +4171,7 @@ static void intel_power_domains_verify_state(struct drm_i915_private *dev_priv)
>  
>  /**
>   * intel_runtime_pm_get - grab a runtime pm reference
> - * @dev_priv: i915 device instance
> + * @i915: i915 device instance
>   *
>   * This function grabs a device-level runtime pm reference (mostly used for GEM
>   * code to ensure the GTT or GT is on) and ensures that it is powered up.
> @@ -3994,22 +4179,24 @@ static void intel_power_domains_verify_state(struct drm_i915_private *dev_priv)
>   * Any runtime pm reference obtained by this function must have a symmetric
>   * call to intel_runtime_pm_put() to release the reference again.
>   */
> -void intel_runtime_pm_get(struct drm_i915_private *dev_priv)
> +void intel_runtime_pm_get(struct drm_i915_private *i915)
>  {
> -	struct pci_dev *pdev = dev_priv->drm.pdev;
> +	struct pci_dev *pdev = i915->drm.pdev;
>  	struct device *kdev = &pdev->dev;
>  	int ret;
>  
>  	ret = pm_runtime_get_sync(kdev);
>  	WARN_ONCE(ret < 0, "pm_runtime_get_sync() failed: %d\n", ret);
>  
> -	atomic_inc(&dev_priv->runtime_pm.wakeref_count);
> -	assert_rpm_wakelock_held(dev_priv);
> +	atomic_inc(&i915->runtime_pm.wakeref_count);
> +	assert_rpm_wakelock_held(i915);
> +
> +	track_intel_runtime_pm_wakeref(i915);
>  }
>  
>  /**
>   * intel_runtime_pm_get_if_in_use - grab a runtime pm reference if device in use
> - * @dev_priv: i915 device instance
> + * @i915: i915 device instance
>   *
>   * This function grabs a device-level runtime pm reference if the device is
>   * already in use and ensures that it is powered up. It is illegal to try
> @@ -4020,10 +4207,10 @@ void intel_runtime_pm_get(struct drm_i915_private *dev_priv)
>   *
>   * Returns: True if the wakeref was acquired, or False otherwise.
>   */
> -bool intel_runtime_pm_get_if_in_use(struct drm_i915_private *dev_priv)
> +bool intel_runtime_pm_get_if_in_use(struct drm_i915_private *i915)
>  {
>  	if (IS_ENABLED(CONFIG_PM)) {
> -		struct pci_dev *pdev = dev_priv->drm.pdev;
> +		struct pci_dev *pdev = i915->drm.pdev;
>  		struct device *kdev = &pdev->dev;
>  
>  		/*
> @@ -4036,15 +4223,17 @@ bool intel_runtime_pm_get_if_in_use(struct drm_i915_private *dev_priv)
>  			return false;
>  	}
>  
> -	atomic_inc(&dev_priv->runtime_pm.wakeref_count);
> -	assert_rpm_wakelock_held(dev_priv);
> +	atomic_inc(&i915->runtime_pm.wakeref_count);
> +	assert_rpm_wakelock_held(i915);
> +
> +	track_intel_runtime_pm_wakeref(i915);
>  
>  	return true;
>  }
>  
>  /**
>   * intel_runtime_pm_get_noresume - grab a runtime pm reference
> - * @dev_priv: i915 device instance
> + * @i915: i915 device instance
>   *
>   * This function grabs a device-level runtime pm reference (mostly used for GEM
>   * code to ensure the GTT or GT is on).
> @@ -4059,32 +4248,35 @@ bool intel_runtime_pm_get_if_in_use(struct drm_i915_private *dev_priv)
>   * Any runtime pm reference obtained by this function must have a symmetric
>   * call to intel_runtime_pm_put() to release the reference again.
>   */
> -void intel_runtime_pm_get_noresume(struct drm_i915_private *dev_priv)
> +void intel_runtime_pm_get_noresume(struct drm_i915_private *i915)
>  {
> -	struct pci_dev *pdev = dev_priv->drm.pdev;
> +	struct pci_dev *pdev = i915->drm.pdev;
>  	struct device *kdev = &pdev->dev;
>  
> -	assert_rpm_wakelock_held(dev_priv);
> +	assert_rpm_wakelock_held(i915);
>  	pm_runtime_get_noresume(kdev);
>  
> -	atomic_inc(&dev_priv->runtime_pm.wakeref_count);
> +	atomic_inc(&i915->runtime_pm.wakeref_count);
> +
> +	track_intel_runtime_pm_wakeref(i915);
>  }
>  
>  /**
>   * intel_runtime_pm_put - release a runtime pm reference
> - * @dev_priv: i915 device instance
> + * @i915: i915 device instance
>   *
>   * This function drops the device-level runtime pm reference obtained by
>   * intel_runtime_pm_get() and might power down the corresponding
>   * hardware block right away if this is the last reference.
>   */
> -void intel_runtime_pm_put(struct drm_i915_private *dev_priv)
> +void intel_runtime_pm_put(struct drm_i915_private *i915)
>  {
> -	struct pci_dev *pdev = dev_priv->drm.pdev;
> +	struct pci_dev *pdev = i915->drm.pdev;
>  	struct device *kdev = &pdev->dev;
>  
> -	assert_rpm_wakelock_held(dev_priv);
> -	atomic_dec(&dev_priv->runtime_pm.wakeref_count);
> +	assert_rpm_wakelock_held(i915);
> +	if (atomic_dec_and_test(&i915->runtime_pm.wakeref_count))
> +		untrack_intel_runtime_pm_wakeref(i915);
>  
>  	pm_runtime_mark_last_busy(kdev);
>  	pm_runtime_put_autosuspend(kdev);
> @@ -4092,7 +4284,7 @@ void intel_runtime_pm_put(struct drm_i915_private *dev_priv)
>  
>  /**
>   * intel_runtime_pm_enable - enable runtime pm
> - * @dev_priv: i915 device instance
> + * @i915: i915 device instance
>   *
>   * This function enables runtime pm at the end of the driver load sequence.
>   *
> @@ -4100,9 +4292,9 @@ void intel_runtime_pm_put(struct drm_i915_private *dev_priv)
>   * subordinate display power domains. That is done by
>   * intel_power_domains_enable().
>   */
> -void intel_runtime_pm_enable(struct drm_i915_private *dev_priv)
> +void intel_runtime_pm_enable(struct drm_i915_private *i915)
>  {
> -	struct pci_dev *pdev = dev_priv->drm.pdev;
> +	struct pci_dev *pdev = i915->drm.pdev;
>  	struct device *kdev = &pdev->dev;
>  
>  	/*
> @@ -4124,7 +4316,7 @@ void intel_runtime_pm_enable(struct drm_i915_private *dev_priv)
>  	 * so the driver's own RPM reference tracking asserts also work on
>  	 * platforms without RPM support.
>  	 */
> -	if (!HAS_RUNTIME_PM(dev_priv)) {
> +	if (!HAS_RUNTIME_PM(i915)) {
>  		int ret;
>  
>  		pm_runtime_dont_use_autosuspend(kdev);
> @@ -4142,17 +4334,36 @@ void intel_runtime_pm_enable(struct drm_i915_private *dev_priv)
>  	pm_runtime_put_autosuspend(kdev);
>  }
>  
> -void intel_runtime_pm_disable(struct drm_i915_private *dev_priv)
> +void intel_runtime_pm_disable(struct drm_i915_private *i915)
>  {
> -	struct pci_dev *pdev = dev_priv->drm.pdev;
> +	struct pci_dev *pdev = i915->drm.pdev;
>  	struct device *kdev = &pdev->dev;
>  
>  	/* Transfer rpm ownership back to core */
> -	WARN(pm_runtime_get_sync(&dev_priv->drm.pdev->dev) < 0,
> +	WARN(pm_runtime_get_sync(kdev) < 0,
>  	     "Failed to pass rpm ownership back to core\n");
>  
>  	pm_runtime_dont_use_autosuspend(kdev);
>  
> -	if (!HAS_RUNTIME_PM(dev_priv))
> +	if (!HAS_RUNTIME_PM(i915))
>  		pm_runtime_put(kdev);
>  }
> +
> +void intel_runtime_pm_cleanup(struct drm_i915_private *i915)
> +{
> +	struct i915_runtime_pm *rpm = &i915->runtime_pm;
> +
> +	if (WARN(atomic_read(&rpm->wakeref_count),
> +		 "i915->runtime_pm.wakeref_count=%d on cleanup\n",
> +		 atomic_read(&rpm->wakeref_count))) {
> +		show_intel_runtime_pm_wakeref(i915);
> +		atomic_set(&rpm->wakeref_count, 0);
> +	}
> +
> +	untrack_intel_runtime_pm_wakeref(i915);
> +}
> +
> +void intel_runtime_pm_init_early(struct drm_i915_private *i915)
> +{
> +	init_intel_runtime_pm_wakeref(i915);
> +}
> diff --git a/drivers/gpu/drm/i915/selftests/mock_gem_device.c b/drivers/gpu/drm/i915/selftests/mock_gem_device.c
> index 43ed8b28aeaa..0eb283e7fc96 100644
> --- a/drivers/gpu/drm/i915/selftests/mock_gem_device.c
> +++ b/drivers/gpu/drm/i915/selftests/mock_gem_device.c
> @@ -154,15 +154,17 @@ struct drm_i915_private *mock_gem_device(void)
>  	pdev->dev.archdata.iommu = (void *)-1;
>  #endif
>  
> +	i915 = (struct drm_i915_private *)(pdev + 1);
> +	pci_set_drvdata(pdev, i915);
> +
> +	intel_runtime_pm_init_early(i915);
> +
>  	dev_pm_domain_set(&pdev->dev, &pm_domain);
>  	pm_runtime_enable(&pdev->dev);
>  	pm_runtime_dont_use_autosuspend(&pdev->dev);
>  	if (pm_runtime_enabled(&pdev->dev))
>  		WARN_ON(pm_runtime_get_sync(&pdev->dev));
>  
> -	i915 = (struct drm_i915_private *)(pdev + 1);
> -	pci_set_drvdata(pdev, i915);
> -
>  	err = drm_dev_init(&i915->drm, &mock_driver, &pdev->dev);
>  	if (err) {
>  		pr_err("Failed to initialise mock GEM device: err=%d\n", err);
> -- 
> 2.20.1
>
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/intel-gfx
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 111+ messages in thread

* Re: [PATCH 03/46] drm/i915: Track all held rpm wakerefs
  2019-01-07 13:14   ` Mika Kuoppala
@ 2019-01-07 13:22     ` Chris Wilson
  0 siblings, 0 replies; 111+ messages in thread
From: Chris Wilson @ 2019-01-07 13:22 UTC (permalink / raw)
  To: Mika Kuoppala, intel-gfx; +Cc: Jani Nikula

Quoting Mika Kuoppala (2019-01-07 13:14:00)
> Chris Wilson <chris@chris-wilson.co.uk> writes:
> > @@ -2011,6 +2011,8 @@ static int i915_drm_suspend_late(struct drm_device *dev, bool hibernation)
> >  
> >  out:
> >       enable_rpm_wakeref_asserts(dev_priv);
> > +     if (!dev_priv->uncore.user_forcewake.count)
> > +             intel_runtime_pm_cleanup(dev_priv);
> >
> 
> Why would we have forcewake active in here?

Why would the user suspend while holding
open("/debug/dri/0/i915_user_forcewake")?

Because they can.
 
> Are you planning on extending the intel_runtime_pm_cleanup?
> Atleast in the callsite 'intel_runtime_assert_no_wakerefs' would
> make more sense.

Oh yes, yes, yes. The challenge is that we take the rpm wakeref with
such frequency and variety of lifetimes is that we end up with so much
tracked that it makes finding the leak very hard (and we cannot report
an underflow elsewhere at the moment for similar reasons). So this first
wave was to catch the easy stuff and report a leak on module unload, then
once everyone is tracking their own wakeref, we can do the must_check
annotation and WARN on underflow.
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 111+ messages in thread

* Re: [PATCH 01/46] drm/i915: Return immediately if trylock fails for direct-reclaim
  2019-01-07 11:54 [PATCH 01/46] drm/i915: Return immediately if trylock fails for direct-reclaim Chris Wilson
                   ` (47 preceding siblings ...)
  2019-01-07 13:05 ` ✓ Fi.CI.BAT: success " Patchwork
@ 2019-01-07 14:00 ` Tvrtko Ursulin
  2019-01-07 14:07   ` Chris Wilson
  2019-01-07 17:10 ` ✗ Fi.CI.IGT: failure for series starting with [01/46] " Patchwork
  2019-01-08 13:50 ` ✗ Fi.CI.BAT: failure for series starting with [01/46] drm/i915: Return immediately if trylock fails for direct-reclaim (rev3) Patchwork
  50 siblings, 1 reply; 111+ messages in thread
From: Tvrtko Ursulin @ 2019-01-07 14:00 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 07/01/2019 11:54, Chris Wilson wrote:
> Ignore trying to shrink from i915 if we fail to acquire the struct_mutex
> in the shrinker while performing direct-reclaim. The trade-off being
> (much) lower latency for non-i915 clients at an increased risk of being
> unable to obtain a page from direct-reclaim without hitting the
> oom-notifier. The proviso being that we still keep trying to hard
> obtain the lock for kswapd so that we can reap under heavy memory
> pressure.
> 
> v2: Taint all mutexes taken within the shrinker with the struct_mutex
> subclass as an early warning system, and drop I915_SHRINK_ACTIVE from
> vmap to reduce the number of dangerous paths. We also have to drop
> I915_SHRINK_ACTIVE from oom-notifier to be able to make the same claim
> that ACTIVE is only used from outside context, which fits in with a
> longer strategy of avoiding stalls due to scanning active during
> shrinking.

Oom notifier is not always the outside context?

> 
> The danger in using the subclass struct_mutex is that we declare
> ourselves more knowledgable than lockdep and deprive ourselves of
> automatic coverage. Instead, we require ourselves to mark up any mutex
> taken inside the shrinker in order to detect lock-inversion, and if we
> miss any we are doomed to a deadlock at the worst possible moment.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> ---
>   drivers/gpu/drm/i915/i915_drv.h          |  7 +--
>   drivers/gpu/drm/i915/i915_gem_gtt.c      |  8 +--
>   drivers/gpu/drm/i915/i915_gem_shrinker.c | 68 ++++++++++++++++--------
>   3 files changed, 54 insertions(+), 29 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 7fa2a405c5fe..17a017645c5d 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -2899,9 +2899,9 @@ i915_gem_object_unpin_pages(struct drm_i915_gem_object *obj)
>   	__i915_gem_object_unpin_pages(obj);
>   }
>   
> -enum i915_mm_subclass { /* lockdep subclass for obj->mm.lock */
> +enum i915_mm_subclass { /* lockdep subclass for obj->mm.lock/struct_mutex */
>   	I915_MM_NORMAL = 0,
> -	I915_MM_SHRINKER
> +	I915_MM_SHRINKER /* called "recursively" from direct-reclaim-esque */
>   };
>   
>   void __i915_gem_object_put_pages(struct drm_i915_gem_object *obj,
> @@ -3187,7 +3187,8 @@ unsigned long i915_gem_shrink(struct drm_i915_private *i915,
>   unsigned long i915_gem_shrink_all(struct drm_i915_private *i915);
>   void i915_gem_shrinker_register(struct drm_i915_private *i915);
>   void i915_gem_shrinker_unregister(struct drm_i915_private *i915);
> -void i915_gem_shrinker_taints_mutex(struct mutex *mutex);
> +void i915_gem_shrinker_taints_mutex(struct drm_i915_private *i915,
> +				    struct mutex *mutex);
>   
>   /* i915_gem_tiling.c */
>   static inline bool i915_gem_object_needs_bit17_swizzle(struct drm_i915_gem_object *obj)
> diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
> index d4c5973ea33d..5cc8968eb3bf 100644
> --- a/drivers/gpu/drm/i915/i915_gem_gtt.c
> +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
> @@ -483,7 +483,7 @@ static void i915_address_space_init(struct i915_address_space *vm,
>   	 * attempt holding the lock is immediately reported by lockdep.
>   	 */
>   	mutex_init(&vm->mutex);
> -	i915_gem_shrinker_taints_mutex(&vm->mutex);
> +	i915_gem_shrinker_taints_mutex(vm->i915, &vm->mutex);
>   
>   	GEM_BUG_ON(!vm->total);
>   	drm_mm_init(&vm->mm, 0, vm->total);
> @@ -2245,7 +2245,8 @@ int i915_gem_gtt_prepare_pages(struct drm_i915_gem_object *obj,
>   				     DMA_ATTR_NO_WARN))
>   			return 0;
>   
> -		/* If the DMA remap fails, one cause can be that we have
> +		/*
> +		 * If the DMA remap fails, one cause can be that we have
>   		 * too many objects pinned in a small remapping table,
>   		 * such as swiotlb. Incrementally purge all other objects and
>   		 * try again - if there are no more pages to remove from
> @@ -2255,8 +2256,7 @@ int i915_gem_gtt_prepare_pages(struct drm_i915_gem_object *obj,
>   	} while (i915_gem_shrink(to_i915(obj->base.dev),
>   				 obj->base.size >> PAGE_SHIFT, NULL,
>   				 I915_SHRINK_BOUND |
> -				 I915_SHRINK_UNBOUND |
> -				 I915_SHRINK_ACTIVE));
> +				 I915_SHRINK_UNBOUND));
>   

Why this change?

Regards,

Tvrtko

>   	return -ENOSPC;
>   }
> diff --git a/drivers/gpu/drm/i915/i915_gem_shrinker.c b/drivers/gpu/drm/i915/i915_gem_shrinker.c
> index ea90d3a0d511..72d6ea0cac7e 100644
> --- a/drivers/gpu/drm/i915/i915_gem_shrinker.c
> +++ b/drivers/gpu/drm/i915/i915_gem_shrinker.c
> @@ -36,7 +36,9 @@
>   #include "i915_drv.h"
>   #include "i915_trace.h"
>   
> -static bool shrinker_lock(struct drm_i915_private *i915, bool *unlock)
> +static bool shrinker_lock(struct drm_i915_private *i915,
> +			  unsigned int flags,
> +			  bool *unlock)
>   {
>   	switch (mutex_trylock_recursive(&i915->drm.struct_mutex)) {
>   	case MUTEX_TRYLOCK_RECURSIVE:
> @@ -45,15 +47,11 @@ static bool shrinker_lock(struct drm_i915_private *i915, bool *unlock)
>   
>   	case MUTEX_TRYLOCK_FAILED:
>   		*unlock = false;
> -		preempt_disable();
> -		do {
> -			cpu_relax();
> -			if (mutex_trylock(&i915->drm.struct_mutex)) {
> -				*unlock = true;
> -				break;
> -			}
> -		} while (!need_resched());
> -		preempt_enable();
> +		if (flags & I915_SHRINK_ACTIVE) {
> +			mutex_lock_nested(&i915->drm.struct_mutex,
> +					  I915_MM_SHRINKER);
> +			*unlock = true;
> +		}
>   		return *unlock;
>   
>   	case MUTEX_TRYLOCK_SUCCESS:
> @@ -160,7 +158,7 @@ i915_gem_shrink(struct drm_i915_private *i915,
>   	unsigned long scanned = 0;
>   	bool unlock;
>   
> -	if (!shrinker_lock(i915, &unlock))
> +	if (!shrinker_lock(i915, flags, &unlock))
>   		return 0;
>   
>   	/*
> @@ -357,7 +355,7 @@ i915_gem_shrinker_scan(struct shrinker *shrinker, struct shrink_control *sc)
>   
>   	sc->nr_scanned = 0;
>   
> -	if (!shrinker_lock(i915, &unlock))
> +	if (!shrinker_lock(i915, 0, &unlock))
>   		return SHRINK_STOP;
>   
>   	freed = i915_gem_shrink(i915,
> @@ -397,7 +395,7 @@ shrinker_lock_uninterruptible(struct drm_i915_private *i915, bool *unlock,
>   	do {
>   		if (i915_gem_wait_for_idle(i915,
>   					   0, MAX_SCHEDULE_TIMEOUT) == 0 &&
> -		    shrinker_lock(i915, unlock))
> +		    shrinker_lock(i915, 0, unlock))
>   			break;
>   
>   		schedule_timeout_killable(1);
> @@ -421,7 +419,11 @@ i915_gem_shrinker_oom(struct notifier_block *nb, unsigned long event, void *ptr)
>   	struct drm_i915_gem_object *obj;
>   	unsigned long unevictable, bound, unbound, freed_pages;
>   
> -	freed_pages = i915_gem_shrink_all(i915);
> +	intel_runtime_pm_get(i915);
> +	freed_pages = i915_gem_shrink(i915, -1UL, NULL,
> +				      I915_SHRINK_BOUND |
> +				      I915_SHRINK_UNBOUND);
> +	intel_runtime_pm_put(i915);
>   
>   	/* Because we may be allocating inside our own driver, we cannot
>   	 * assert that there are no objects with pinned pages that are not
> @@ -447,10 +449,6 @@ i915_gem_shrinker_oom(struct notifier_block *nb, unsigned long event, void *ptr)
>   		pr_info("Purging GPU memory, %lu pages freed, "
>   			"%lu pages still pinned.\n",
>   			freed_pages, unevictable);
> -	if (unbound || bound)
> -		pr_err("%lu and %lu pages still available in the "
> -		       "bound and unbound GPU page lists.\n",
> -		       bound, unbound);
>   
>   	*(unsigned long *)ptr += freed_pages;
>   	return NOTIFY_DONE;
> @@ -480,7 +478,6 @@ i915_gem_shrinker_vmap(struct notifier_block *nb, unsigned long event, void *ptr
>   	freed_pages += i915_gem_shrink(i915, -1UL, NULL,
>   				       I915_SHRINK_BOUND |
>   				       I915_SHRINK_UNBOUND |
> -				       I915_SHRINK_ACTIVE |
>   				       I915_SHRINK_VMAPS);
>   	intel_runtime_pm_put(i915);
>   
> @@ -533,13 +530,40 @@ void i915_gem_shrinker_unregister(struct drm_i915_private *i915)
>   	unregister_shrinker(&i915->mm.shrinker);
>   }
>   
> -void i915_gem_shrinker_taints_mutex(struct mutex *mutex)
> +void i915_gem_shrinker_taints_mutex(struct drm_i915_private *i915,
> +				    struct mutex *mutex)
>   {
> +	bool unlock = false;
> +
>   	if (!IS_ENABLED(CONFIG_LOCKDEP))
>   		return;
>   
> +	if (!lockdep_is_held_type(&i915->drm.struct_mutex, -1)) {
> +		mutex_acquire(&i915->drm.struct_mutex.dep_map,
> +			      I915_MM_NORMAL, 0, _RET_IP_);
> +		unlock = true;
> +	}
> +
>   	fs_reclaim_acquire(GFP_KERNEL);
> -	mutex_lock(mutex);
> -	mutex_unlock(mutex);
> +
> +	/*
> +	 * As we invariably rely on the struct_mutex within the shrinker,
> +	 * but have a complicated recursion dance, taint all the mutexes used
> +	 * within the shrinker with the struct_mutex. For completeness, we
> +	 * taint with all subclass of struct_mutex, even though we should
> +	 * only need tainting by I915_MM_NORMAL to catch possible ABBA
> +	 * deadlocks from using struct_mutex inside @mutex.
> +	 */
> +	mutex_acquire(&i915->drm.struct_mutex.dep_map,
> +		      I915_MM_SHRINKER, 0, _RET_IP_);
> +
> +	mutex_acquire(&mutex->dep_map, 0, 0, _RET_IP_);
> +	mutex_release(&mutex->dep_map, 0, _RET_IP_);
> +
> +	mutex_release(&i915->drm.struct_mutex.dep_map, 0, _RET_IP_);
> +
>   	fs_reclaim_release(GFP_KERNEL);
> +
> +	if (unlock)
> +		mutex_release(&i915->drm.struct_mutex.dep_map, 0, _RET_IP_);
>   }
> 
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 111+ messages in thread

* Re: [PATCH 01/46] drm/i915: Return immediately if trylock fails for direct-reclaim
  2019-01-07 14:00 ` [PATCH 01/46] " Tvrtko Ursulin
@ 2019-01-07 14:07   ` Chris Wilson
  2019-01-08  8:58     ` Tvrtko Ursulin
  0 siblings, 1 reply; 111+ messages in thread
From: Chris Wilson @ 2019-01-07 14:07 UTC (permalink / raw)
  To: Tvrtko Ursulin, intel-gfx

Quoting Tvrtko Ursulin (2019-01-07 14:00:25)
> 
> On 07/01/2019 11:54, Chris Wilson wrote:
> > Ignore trying to shrink from i915 if we fail to acquire the struct_mutex
> > in the shrinker while performing direct-reclaim. The trade-off being
> > (much) lower latency for non-i915 clients at an increased risk of being
> > unable to obtain a page from direct-reclaim without hitting the
> > oom-notifier. The proviso being that we still keep trying to hard
> > obtain the lock for kswapd so that we can reap under heavy memory
> > pressure.
> > 
> > v2: Taint all mutexes taken within the shrinker with the struct_mutex
> > subclass as an early warning system, and drop I915_SHRINK_ACTIVE from
> > vmap to reduce the number of dangerous paths. We also have to drop
> > I915_SHRINK_ACTIVE from oom-notifier to be able to make the same claim
> > that ACTIVE is only used from outside context, which fits in with a
> > longer strategy of avoiding stalls due to scanning active during
> > shrinking.
> 
> Oom notifier is not always the outside context?

oom-notifier is the final death throes of direct reclaim.

> > @@ -2255,8 +2256,7 @@ int i915_gem_gtt_prepare_pages(struct drm_i915_gem_object *obj,
> >       } while (i915_gem_shrink(to_i915(obj->base.dev),
> >                                obj->base.size >> PAGE_SHIFT, NULL,
> >                                I915_SHRINK_BOUND |
> > -                              I915_SHRINK_UNBOUND |
> > -                              I915_SHRINK_ACTIVE));
> > +                              I915_SHRINK_UNBOUND));
> >   
> 
> Why this change?

gtt_prepare_pages is called at the end of obj->ops->get_pages() which is
not guaranteed to be outside of struct_mutex (yet). So I dropped the
ACTIVE here as part of the claim that ACTIVE is only used from kswapd (or
at known idle times such as i915_gem_freeze where I can handwave that
ACTIVE is meaningless).
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 111+ messages in thread

* ✗ Fi.CI.IGT: failure for series starting with [01/46] drm/i915: Return immediately if trylock fails for direct-reclaim
  2019-01-07 11:54 [PATCH 01/46] drm/i915: Return immediately if trylock fails for direct-reclaim Chris Wilson
                   ` (48 preceding siblings ...)
  2019-01-07 14:00 ` [PATCH 01/46] " Tvrtko Ursulin
@ 2019-01-07 17:10 ` Patchwork
  2019-01-07 17:19   ` Chris Wilson
  2019-01-08 13:50 ` ✗ Fi.CI.BAT: failure for series starting with [01/46] drm/i915: Return immediately if trylock fails for direct-reclaim (rev3) Patchwork
  50 siblings, 1 reply; 111+ messages in thread
From: Patchwork @ 2019-01-07 17:10 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

== Series Details ==

Series: series starting with [01/46] drm/i915: Return immediately if trylock fails for direct-reclaim
URL   : https://patchwork.freedesktop.org/series/54803/
State : failure

== Summary ==

CI Bug Log - changes from CI_DRM_5368_full -> Patchwork_11200_full
====================================================

Summary
-------

  **FAILURE**

  Serious unknown changes coming with Patchwork_11200_full absolutely need to be
  verified manually.
  
  If you think the reported changes have nothing to do with the changes
  introduced in Patchwork_11200_full, please notify your bug team to allow them
  to document this new failure mode, which will reduce false positives in CI.

  

Possible new issues
-------------------

  Here are the unknown changes that may have been introduced in Patchwork_11200_full:

### IGT changes ###

#### Possible regressions ####

  * igt@i915_missed_irq:
    - shard-snb:          PASS -> FAIL
    - shard-kbl:          PASS -> FAIL
    - shard-hsw:          PASS -> FAIL
    - shard-skl:          PASS -> FAIL
    - shard-glk:          PASS -> FAIL
    - shard-apl:          PASS -> FAIL

  
#### Warnings ####

  * igt@pm_rc6_residency@rc6-accuracy:
    - shard-kbl:          SKIP -> PASS

  
Known issues
------------

  Here are the changes found in Patchwork_11200_full that come from known issues:

### IGT changes ###

#### Issues hit ####

  * igt@gem_exec_schedule@pi-ringfull-blt:
    - shard-skl:          NOTRUN -> FAIL [fdo#103158] +2

  * igt@gem_ppgtt@blt-vs-render-ctx0:
    - shard-skl:          NOTRUN -> TIMEOUT [fdo#108039]

  * igt@i915_suspend@shrink:
    - shard-skl:          NOTRUN -> INCOMPLETE [fdo#106886]
    - shard-apl:          NOTRUN -> DMESG-WARN [fdo#107886] / [fdo#109244]

  * igt@kms_atomic_transition@1x-modeset-transitions:
    - shard-skl:          NOTRUN -> FAIL [fdo#107815] / [fdo#108470] +1

  * igt@kms_busy@extended-modeset-hang-newfb-render-b:
    - shard-kbl:          NOTRUN -> DMESG-WARN [fdo#107956] +1

  * igt@kms_busy@extended-modeset-hang-newfb-render-c:
    - shard-skl:          NOTRUN -> DMESG-WARN [fdo#107956] +4

  * igt@kms_chv_cursor_fail@pipe-b-128x128-top-edge:
    - shard-skl:          NOTRUN -> FAIL [fdo#104671]

  * igt@kms_color@pipe-b-ctm-0-25:
    - shard-skl:          NOTRUN -> FAIL [fdo#108682]

  * igt@kms_color@pipe-b-ctm-max:
    - shard-apl:          PASS -> FAIL [fdo#108147]

  * igt@kms_cursor_crc@cursor-256x85-onscreen:
    - shard-apl:          PASS -> FAIL [fdo#103232] +1

  * igt@kms_draw_crc@draw-method-rgb565-render-xtiled:
    - shard-skl:          PASS -> FAIL [fdo#103184]

  * igt@kms_fbcon_fbt@psr-suspend:
    - shard-skl:          NOTRUN -> FAIL [fdo#107882]

  * igt@kms_frontbuffer_tracking@fbc-1p-primscrn-pri-shrfb-draw-mmap-gtt:
    - shard-skl:          NOTRUN -> FAIL [fdo#105682]

  * igt@kms_frontbuffer_tracking@fbc-1p-primscrn-spr-indfb-draw-pwrite:
    - shard-apl:          PASS -> FAIL [fdo#103167] +1

  * igt@kms_frontbuffer_tracking@fbcpsr-suspend:
    - shard-iclb:         PASS -> INCOMPLETE [fdo#106978] / [fdo#107713]

  * igt@kms_frontbuffer_tracking@psr-1p-primscrn-spr-indfb-draw-mmap-gtt:
    - shard-skl:          NOTRUN -> FAIL [fdo#103167]

  * igt@kms_panel_fitting@legacy:
    - shard-skl:          NOTRUN -> FAIL [fdo#105456]

  * igt@kms_pipe_crc_basic@suspend-read-crc-pipe-b:
    - shard-skl:          NOTRUN -> FAIL [fdo#103191] / [fdo#107362] +1

  * igt@kms_pipe_crc_basic@suspend-read-crc-pipe-c:
    - shard-skl:          PASS -> INCOMPLETE [fdo#104108] / [fdo#107773]

  * igt@kms_plane@pixel-format-pipe-a-planes-source-clamping:
    - shard-skl:          NOTRUN -> DMESG-WARN [fdo#106885] +2

  * igt@kms_plane@pixel-format-pipe-c-planes-source-clamping:
    - shard-glk:          PASS -> FAIL [fdo#108948]
    - shard-apl:          PASS -> FAIL [fdo#108948]

  * igt@kms_plane_alpha_blend@pipe-a-alpha-7efc:
    - shard-skl:          NOTRUN -> FAIL [fdo#107815] / [fdo#108145] +1

  * igt@kms_plane_alpha_blend@pipe-a-constant-alpha-max:
    - shard-skl:          NOTRUN -> FAIL [fdo#108145] +2

  * igt@kms_plane_multiple@atomic-pipe-a-tiling-x:
    - shard-apl:          PASS -> FAIL [fdo#103166]

  * igt@kms_plane_multiple@atomic-pipe-a-tiling-y:
    - shard-glk:          PASS -> FAIL [fdo#103166] +3

  * igt@kms_plane_multiple@atomic-pipe-b-tiling-yf:
    - shard-skl:          NOTRUN -> FAIL [fdo#103166] / [fdo#107815]

  * igt@kms_psr@suspend:
    - shard-skl:          NOTRUN -> INCOMPLETE [fdo#107773]

  * igt@kms_rmfb@close-fd:
    - shard-iclb:         PASS -> DMESG-WARN [fdo#107724] +1

  * igt@kms_rotation_crc@multiplane-rotation-cropping-top:
    - shard-kbl:          PASS -> DMESG-FAIL [fdo#108950]
    - shard-iclb:         PASS -> DMESG-FAIL [fdo#107724]

  * igt@kms_setmode@basic:
    - shard-skl:          NOTRUN -> FAIL [fdo#99912]

  * igt@kms_vblank@pipe-a-ts-continuation-dpms-suspend:
    - shard-kbl:          PASS -> INCOMPLETE [fdo#103665]

  * igt@pm_rpm@universal-planes:
    - shard-iclb:         PASS -> DMESG-WARN [fdo#108654] / [fdo#108756]

  * igt@pm_rps@min-max-config-loaded:
    - shard-apl:          PASS -> FAIL [fdo#102250]

  
#### Possible fixes ####

  * igt@kms_color@pipe-c-ctm-max:
    - shard-apl:          FAIL [fdo#108147] -> PASS

  * igt@kms_cursor_crc@cursor-128x128-suspend:
    - shard-apl:          FAIL [fdo#103191] / [fdo#103232] -> PASS

  * igt@kms_cursor_crc@cursor-256x85-sliding:
    - shard-apl:          FAIL [fdo#103232] -> PASS +2

  * igt@kms_flip@flip-vs-expired-vblank-interruptible:
    - shard-glk:          FAIL [fdo#102887] / [fdo#105363] -> PASS

  * igt@kms_frontbuffer_tracking@fbc-1p-primscrn-spr-indfb-draw-mmap-gtt:
    - shard-apl:          FAIL [fdo#103167] -> PASS +1
    - shard-glk:          FAIL [fdo#103167] -> PASS +2

  * igt@kms_frontbuffer_tracking@fbcpsr-stridechange:
    - shard-iclb:         FAIL [fdo#105683] / [fdo#108040] -> PASS

  * igt@kms_frontbuffer_tracking@psr-1p-primscrn-spr-indfb-draw-pwrite:
    - shard-iclb:         FAIL [fdo#103167] -> PASS +2

  * igt@kms_plane@plane-position-covered-pipe-b-planes:
    - shard-glk:          FAIL [fdo#103166] -> PASS +2

  * igt@kms_plane_multiple@atomic-pipe-a-tiling-y:
    - shard-iclb:         FAIL [fdo#103166] -> PASS

  * igt@kms_plane_multiple@atomic-pipe-b-tiling-x:
    - shard-apl:          FAIL [fdo#103166] -> PASS +1

  * igt@kms_plane_scaling@pipe-a-scaler-with-rotation:
    - shard-iclb:         DMESG-WARN [fdo#107724] -> PASS +1

  * igt@kms_vblank@pipe-b-query-busy-hang:
    - shard-apl:          INCOMPLETE [fdo#103927] -> PASS

  * igt@pm_rpm@dpms-non-lpsp:
    - shard-kbl:          DMESG-WARN [fdo#103313] / [fdo#105345] -> PASS +1

  * igt@pm_rpm@modeset-lpsp-stress:
    - shard-skl:          INCOMPLETE [fdo#107807] -> PASS

  * igt@pm_rpm@system-suspend:
    - shard-skl:          INCOMPLETE [fdo#104108] / [fdo#107773] / [fdo#107807] -> PASS

  
#### Warnings ####

  * igt@kms_rotation_crc@multiplane-rotation-cropping-top:
    - shard-glk:          DMESG-WARN [fdo#105763] / [fdo#106538] -> DMESG-FAIL [fdo#105763] / [fdo#106538]

  * igt@pm_backlight@fade_with_suspend:
    - shard-iclb:         FAIL [fdo#107847] -> DMESG-FAIL [fdo#107724]

  
  {name}: This element is suppressed. This means it is ignored when computing
          the status of the difference (SUCCESS, WARNING, or FAILURE).

  [fdo#102250]: https://bugs.freedesktop.org/show_bug.cgi?id=102250
  [fdo#102887]: https://bugs.freedesktop.org/show_bug.cgi?id=102887
  [fdo#103158]: https://bugs.freedesktop.org/show_bug.cgi?id=103158
  [fdo#103166]: https://bugs.freedesktop.org/show_bug.cgi?id=103166
  [fdo#103167]: https://bugs.freedesktop.org/show_bug.cgi?id=103167
  [fdo#103184]: https://bugs.freedesktop.org/show_bug.cgi?id=103184
  [fdo#103191]: https://bugs.freedesktop.org/show_bug.cgi?id=103191
  [fdo#103232]: https://bugs.freedesktop.org/show_bug.cgi?id=103232
  [fdo#103313]: https://bugs.freedesktop.org/show_bug.cgi?id=103313
  [fdo#103665]: https://bugs.freedesktop.org/show_bug.cgi?id=103665
  [fdo#103927]: https://bugs.freedesktop.org/show_bug.cgi?id=103927
  [fdo#104108]: https://bugs.freedesktop.org/show_bug.cgi?id=104108
  [fdo#104671]: https://bugs.freedesktop.org/show_bug.cgi?id=104671
  [fdo#105345]: https://bugs.freedesktop.org/show_bug.cgi?id=105345
  [fdo#105363]: https://bugs.freedesktop.org/show_bug.cgi?id=105363
  [fdo#105456]: https://bugs.freedesktop.org/show_bug.cgi?id=105456
  [fdo#105682]: https://bugs.freedesktop.org/show_bug.cgi?id=105682
  [fdo#105683]: https://bugs.freedesktop.org/show_bug.cgi?id=105683
  [fdo#105763]: https://bugs.freedesktop.org/show_bug.cgi?id=105763
  [fdo#106538]: https://bugs.freedesktop.org/show_bug.cgi?id=106538
  [fdo#106885]: https://bugs.freedesktop.org/show_bug.cgi?id=106885
  [fdo#106886]: https://bugs.freedesktop.org/show_bug.cgi?id=106886
  [fdo#106978]: https://bugs.freedesktop.org/show_bug.cgi?id=106978
  [fdo#107362]: https://bugs.freedesktop.org/show_bug.cgi?id=107362
  [fdo#107713]: https://bugs.freedesktop.org/show_bug.cgi?id=107713
  [fdo#107724]: https://bugs.freedesktop.org/show_bug.cgi?id=107724
  [fdo#107773]: https://bugs.freedesktop.org/show_bug.cgi?id=107773
  [fdo#107807]: https://bugs.freedesktop.org/show_bug.cgi?id=107807
  [fdo#107815]: https://bugs.freedesktop.org/show_bug.cgi?id=107815
  [fdo#107847]: https://bugs.freedesktop.org/show_bug.cgi?id=107847
  [fdo#107882]: https://bugs.freedesktop.org/show_bug.cgi?id=107882
  [fdo#107886]: https://bugs.freedesktop.org/show_bug.cgi?id=107886
  [fdo#107956]: https://bugs.freedesktop.org/show_bug.cgi?id=107956
  [fdo#108039]: https://bugs.freedesktop.org/show_bug.cgi?id=108039
  [fdo#108040]: https://bugs.freedesktop.org/show_bug.cgi?id=108040
  [fdo#108145]: https://bugs.freedesktop.org/show_bug.cgi?id=108145
  [fdo#108147]: https://bugs.freedesktop.org/show_bug.cgi?id=108147
  [fdo#108470]: https://bugs.freedesktop.org/show_bug.cgi?id=108470
  [fdo#108654]: https://bugs.freedesktop.org/show_bug.cgi?id=108654
  [fdo#108682]: https://bugs.freedesktop.org/show_bug.cgi?id=108682
  [fdo#108756]: https://bugs.freedesktop.org/show_bug.cgi?id=108756
  [fdo#108770]: https://bugs.freedesktop.org/show_bug.cgi?id=108770
  [fdo#108948]: https://bugs.freedesktop.org/show_bug.cgi?id=108948
  [fdo#108950]: https://bugs.freedesktop.org/show_bug.cgi?id=108950
  [fdo#109241]: https://bugs.freedesktop.org/show_bug.cgi?id=109241
  [fdo#109244]: https://bugs.freedesktop.org/show_bug.cgi?id=109244
  [fdo#99912]: https://bugs.freedesktop.org/show_bug.cgi?id=99912


Participating hosts (7 -> 7)
------------------------------

  No changes in participating hosts


Build changes
-------------

    * Linux: CI_DRM_5368 -> Patchwork_11200

  CI_DRM_5368: 64bd30ea3ce0edd057a5b393569947a955472757 @ git://anongit.freedesktop.org/gfx-ci/linux
  IGT_4756: 75081c6bfb9998bd7cbf35a7ac0578c683fe55a8 @ git://anongit.freedesktop.org/xorg/app/intel-gpu-tools
  Patchwork_11200: 0bf3389593c055c874799a4cd89caa7cfaa86927 @ git://anongit.freedesktop.org/gfx-ci/linux
  piglit_4509: fdc5a4ca11124ab8413c7988896eec4c97336694 @ git://anongit.freedesktop.org/piglit

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_11200/
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 111+ messages in thread

* Re: ✗ Fi.CI.IGT: failure for series starting with [01/46] drm/i915: Return immediately if trylock fails for direct-reclaim
  2019-01-07 17:10 ` ✗ Fi.CI.IGT: failure for series starting with [01/46] " Patchwork
@ 2019-01-07 17:19   ` Chris Wilson
  0 siblings, 0 replies; 111+ messages in thread
From: Chris Wilson @ 2019-01-07 17:19 UTC (permalink / raw)
  To: Patchwork; +Cc: intel-gfx

Quoting Patchwork (2019-01-07 17:10:25)
> #### Possible regressions ####
> 
>   * igt@i915_missed_irq:
>     - shard-snb:          PASS -> FAIL
>     - shard-kbl:          PASS -> FAIL
>     - shard-hsw:          PASS -> FAIL
>     - shard-skl:          PASS -> FAIL
>     - shard-glk:          PASS -> FAIL
>     - shard-apl:          PASS -> FAIL

It's not really a bug, just a failure in how we test. (Not that I could
think of a better test.) The issue is that since we have coupled
together the waiting + dma_fence, every time we retire/signal the
fences, we remove the waiter. So in this test, the retire worker runs in
the background signaling all the fences before the
intel_breadcrumbs_hangcheck runs -- so we never see the missed interrupt
as we do asynchronous polling.

My conclusion is a patch to just remove the fake_irq as we do not need
it anymore.
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 111+ messages in thread

* Re: [PATCH 01/46] drm/i915: Return immediately if trylock fails for direct-reclaim
  2019-01-07 14:07   ` Chris Wilson
@ 2019-01-08  8:58     ` Tvrtko Ursulin
  0 siblings, 0 replies; 111+ messages in thread
From: Tvrtko Ursulin @ 2019-01-08  8:58 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 07/01/2019 14:07, Chris Wilson wrote:
> Quoting Tvrtko Ursulin (2019-01-07 14:00:25)
>>
>> On 07/01/2019 11:54, Chris Wilson wrote:
>>> Ignore trying to shrink from i915 if we fail to acquire the struct_mutex
>>> in the shrinker while performing direct-reclaim. The trade-off being
>>> (much) lower latency for non-i915 clients at an increased risk of being
>>> unable to obtain a page from direct-reclaim without hitting the
>>> oom-notifier. The proviso being that we still keep trying to hard
>>> obtain the lock for kswapd so that we can reap under heavy memory
>>> pressure.
>>>
>>> v2: Taint all mutexes taken within the shrinker with the struct_mutex
>>> subclass as an early warning system, and drop I915_SHRINK_ACTIVE from
>>> vmap to reduce the number of dangerous paths. We also have to drop
>>> I915_SHRINK_ACTIVE from oom-notifier to be able to make the same claim
>>> that ACTIVE is only used from outside context, which fits in with a
>>> longer strategy of avoiding stalls due to scanning active during
>>> shrinking.
>>
>> Oom notifier is not always the outside context?
> 
> oom-notifier is the final death throes of direct reclaim.
> 
>>> @@ -2255,8 +2256,7 @@ int i915_gem_gtt_prepare_pages(struct drm_i915_gem_object *obj,
>>>        } while (i915_gem_shrink(to_i915(obj->base.dev),
>>>                                 obj->base.size >> PAGE_SHIFT, NULL,
>>>                                 I915_SHRINK_BOUND |
>>> -                              I915_SHRINK_UNBOUND |
>>> -                              I915_SHRINK_ACTIVE));
>>> +                              I915_SHRINK_UNBOUND));
>>>    
>>
>> Why this change?
> 
> gtt_prepare_pages is called at the end of obj->ops->get_pages() which is
> not guaranteed to be outside of struct_mutex (yet). So I dropped the
> ACTIVE here as part of the claim that ACTIVE is only used from kswapd (or
> at known idle times such as i915_gem_freeze where I can handwave that
> ACTIVE is meaningless).

Okay,

Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 111+ messages in thread

* Re: [PATCH 25/46] drm/i915: Differentiate between ggtt->mutex and ppgtt->mutex
  2019-01-07 11:54 ` [PATCH 25/46] drm/i915: Differentiate between ggtt->mutex and ppgtt->mutex Chris Wilson
@ 2019-01-08  9:00   ` Tvrtko Ursulin
  0 siblings, 0 replies; 111+ messages in thread
From: Tvrtko Ursulin @ 2019-01-08  9:00 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 07/01/2019 11:54, Chris Wilson wrote:
> We have two classes of VM, global GTT and per-process GTT. In order to
> allow ourselves the freedom to mix both along call chains, distinguish
> the two classes with regards to their mutex and lockdep maps.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>   drivers/gpu/drm/i915/i915_gem_gtt.c       | 10 +++++-----
>   drivers/gpu/drm/i915/i915_gem_gtt.h       |  2 ++
>   drivers/gpu/drm/i915/selftests/mock_gtt.c |  6 +++---
>   3 files changed, 10 insertions(+), 8 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
> index 9e9ce31142b1..2f35f13d177d 100644
> --- a/drivers/gpu/drm/i915/i915_gem_gtt.c
> +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
> @@ -474,8 +474,7 @@ static void vm_free_page(struct i915_address_space *vm, struct page *page)
>   	spin_unlock(&vm->free_pages.lock);
>   }
>   
> -static void i915_address_space_init(struct i915_address_space *vm,
> -				    struct drm_i915_private *dev_priv)
> +static void i915_address_space_init(struct i915_address_space *vm, int subclass)
>   {
>   	/*
>   	 * The vm->mutex must be reclaim safe (for use in the shrinker).
> @@ -483,6 +482,7 @@ static void i915_address_space_init(struct i915_address_space *vm,
>   	 * attempt holding the lock is immediately reported by lockdep.
>   	 */
>   	mutex_init(&vm->mutex);
> +	lockdep_set_subclass(&vm->mutex, subclass);
>   	i915_gem_shrinker_taints_mutex(vm->i915, &vm->mutex);
>   
>   	GEM_BUG_ON(!vm->total);
> @@ -1548,7 +1548,7 @@ static struct i915_hw_ppgtt *gen8_ppgtt_create(struct drm_i915_private *i915)
>   	/* From bdw, there is support for read-only pages in the PPGTT. */
>   	ppgtt->vm.has_read_only = true;
>   
> -	i915_address_space_init(&ppgtt->vm, i915);
> +	i915_address_space_init(&ppgtt->vm, VM_CLASS_PPGTT);
>   
>   	/* There are only few exceptions for gen >=6. chv and bxt.
>   	 * And we are not sure about the latter so play safe for now.
> @@ -1997,7 +1997,7 @@ static struct i915_hw_ppgtt *gen6_ppgtt_create(struct drm_i915_private *i915)
>   
>   	ppgtt->base.vm.total = I915_PDES * GEN6_PTES * I915_GTT_PAGE_SIZE;
>   
> -	i915_address_space_init(&ppgtt->base.vm, i915);
> +	i915_address_space_init(&ppgtt->base.vm, VM_CLASS_PPGTT);
>   
>   	ppgtt->base.vm.allocate_va_range = gen6_alloc_va_range;
>   	ppgtt->base.vm.clear_range = gen6_ppgtt_clear_range;
> @@ -3434,7 +3434,7 @@ int i915_ggtt_init_hw(struct drm_i915_private *dev_priv)
>   	 * and beyond the end of the GTT if we do not provide a guard.
>   	 */
>   	mutex_lock(&dev_priv->drm.struct_mutex);
> -	i915_address_space_init(&ggtt->vm, dev_priv);
> +	i915_address_space_init(&ggtt->vm, VM_CLASS_GGTT);
>   
>   	ggtt->vm.is_ggtt = true;
>   
> diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h
> index e2360f16427a..9229b03d629b 100644
> --- a/drivers/gpu/drm/i915/i915_gem_gtt.h
> +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h
> @@ -288,6 +288,8 @@ struct i915_address_space {
>   	bool closed;
>   
>   	struct mutex mutex; /* protects vma and our lists */
> +#define VM_CLASS_GGTT 0
> +#define VM_CLASS_PPGTT 1
>   
>   	u64 scratch_pte;
>   	struct i915_page_dma scratch_page;
> diff --git a/drivers/gpu/drm/i915/selftests/mock_gtt.c b/drivers/gpu/drm/i915/selftests/mock_gtt.c
> index 6ae418c76015..976c862b3842 100644
> --- a/drivers/gpu/drm/i915/selftests/mock_gtt.c
> +++ b/drivers/gpu/drm/i915/selftests/mock_gtt.c
> @@ -70,7 +70,7 @@ mock_ppgtt(struct drm_i915_private *i915,
>   	ppgtt->vm.total = round_down(U64_MAX, PAGE_SIZE);
>   	ppgtt->vm.file = ERR_PTR(-ENODEV);
>   
> -	i915_address_space_init(&ppgtt->vm, i915);
> +	i915_address_space_init(&ppgtt->vm, VM_CLASS_PPGTT);
>   
>   	ppgtt->vm.clear_range = nop_clear_range;
>   	ppgtt->vm.insert_page = mock_insert_page;
> @@ -102,6 +102,7 @@ void mock_init_ggtt(struct drm_i915_private *i915)
>   	struct i915_ggtt *ggtt = &i915->ggtt;
>   
>   	ggtt->vm.i915 = i915;
> +	ggtt->vm.is_ggtt = true;
>   
>   	ggtt->gmadr = (struct resource) DEFINE_RES_MEM(0, 2048 * PAGE_SIZE);
>   	ggtt->mappable_end = resource_size(&ggtt->gmadr);
> @@ -117,9 +118,8 @@ void mock_init_ggtt(struct drm_i915_private *i915)
>   	ggtt->vm.vma_ops.set_pages   = ggtt_set_pages;
>   	ggtt->vm.vma_ops.clear_pages = clear_pages;
>   
> -	i915_address_space_init(&ggtt->vm, i915);
>   
> -	ggtt->vm.is_ggtt = true;
> +	i915_address_space_init(&ggtt->vm, VM_CLASS_GGTT);
>   }
>   
>   void mock_fini_ggtt(struct drm_i915_private *i915)
> 

Again:

Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 111+ messages in thread

* [PATCH v2] drm/i915: Track all held rpm wakerefs
  2019-01-07 11:54 ` [PATCH 03/46] drm/i915: Track all held rpm wakerefs Chris Wilson
  2019-01-07 13:14   ` Mika Kuoppala
@ 2019-01-08 11:45   ` Chris Wilson
  2019-01-08 12:22   ` [PATCH v3] " Chris Wilson
  2 siblings, 0 replies; 111+ messages in thread
From: Chris Wilson @ 2019-01-08 11:45 UTC (permalink / raw)
  To: intel-gfx; +Cc: Jani Nikula

Everytime we take a wakeref, record the stack trace of where it was
taken; clearing the set if we ever drop back to no owners. For debugging
a rpm leak, we can look at all the current wakerefs and check if they
have a matching rpm_put.

v2: Use skip=0 for unwinding the stack as it appears our noinline
function doesn't appear on the stack (nor does save_stack_trace itself!)

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Jani Nikula <jani.nikula@intel.com>
Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
---
 drivers/gpu/drm/i915/Kconfig.debug            |   2 +-
 drivers/gpu/drm/i915/i915_debugfs.c           |   6 +
 drivers/gpu/drm/i915/i915_drv.c               |   8 +-
 drivers/gpu/drm/i915/i915_drv.h               |   7 +
 drivers/gpu/drm/i915/intel_drv.h              |  44 ++-
 drivers/gpu/drm/i915/intel_runtime_pm.c       | 267 ++++++++++++++++--
 .../gpu/drm/i915/selftests/mock_gem_device.c  |   8 +-
 7 files changed, 292 insertions(+), 50 deletions(-)

diff --git a/drivers/gpu/drm/i915/Kconfig.debug b/drivers/gpu/drm/i915/Kconfig.debug
index 9e36ffb5eb7c..a97929c47466 100644
--- a/drivers/gpu/drm/i915/Kconfig.debug
+++ b/drivers/gpu/drm/i915/Kconfig.debug
@@ -21,11 +21,11 @@ config DRM_I915_DEBUG
         select DEBUG_FS
         select PREEMPT_COUNT
         select I2C_CHARDEV
+        select STACKDEPOT
         select DRM_DP_AUX_CHARDEV
         select X86_MSR # used by igt/pm_rpm
         select DRM_VGEM # used by igt/prime_vgem (dmabuf interop checks)
         select DRM_DEBUG_MM if DRM=y
-        select STACKDEPOT if DRM=y # for DRM_DEBUG_MM
 	select DRM_DEBUG_SELFTEST
 	select SW_SYNC # signaling validation framework (igt/syncobj*)
 	select DRM_I915_SW_FENCE_DEBUG_OBJECTS
diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index 95813e21ae02..050cf8abd426 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -2702,6 +2702,12 @@ static int i915_runtime_pm_status(struct seq_file *m, void *unused)
 		   pci_power_name(pdev->current_state),
 		   pdev->current_state);
 
+	if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_RUNTIME_PM)) {
+		struct drm_printer p = drm_seq_file_printer(m);
+
+		print_intel_runtime_pm_wakeref(dev_priv, &p);
+	}
+
 	return 0;
 }
 
diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index 17fca3ba343e..e2f4753ca21f 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -906,6 +906,7 @@ static int i915_driver_init_early(struct drm_i915_private *dev_priv)
 	mutex_init(&dev_priv->pps_mutex);
 
 	i915_memcpy_init_early(dev_priv);
+	intel_runtime_pm_init_early(dev_priv);
 
 	ret = i915_workqueues_init(dev_priv);
 	if (ret < 0)
@@ -1808,8 +1809,7 @@ void i915_driver_unload(struct drm_device *dev)
 	i915_driver_cleanup_mmio(dev_priv);
 
 	enable_rpm_wakeref_asserts(dev_priv);
-
-	WARN_ON(atomic_read(&dev_priv->runtime_pm.wakeref_count));
+	intel_runtime_pm_cleanup(dev_priv);
 }
 
 static void i915_driver_release(struct drm_device *dev)
@@ -2011,6 +2011,8 @@ static int i915_drm_suspend_late(struct drm_device *dev, bool hibernation)
 
 out:
 	enable_rpm_wakeref_asserts(dev_priv);
+	if (!dev_priv->uncore.user_forcewake.count)
+		intel_runtime_pm_cleanup(dev_priv);
 
 	return ret;
 }
@@ -2966,7 +2968,7 @@ static int intel_runtime_suspend(struct device *kdev)
 	}
 
 	enable_rpm_wakeref_asserts(dev_priv);
-	WARN_ON_ONCE(atomic_read(&dev_priv->runtime_pm.wakeref_count));
+	intel_runtime_pm_cleanup(dev_priv);
 
 	if (intel_uncore_arm_unclaimed_mmio_detection(dev_priv))
 		DRM_ERROR("Unclaimed access detected prior to suspending\n");
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 7c152cd51498..e1f244107e42 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -45,6 +45,7 @@
 #include <linux/pm_qos.h>
 #include <linux/reservation.h>
 #include <linux/shmem_fs.h>
+#include <linux/stackdepot.h>
 
 #include <drm/drmP.h>
 #include <drm/intel-gtt.h>
@@ -1156,6 +1157,12 @@ struct i915_runtime_pm {
 	atomic_t wakeref_count;
 	bool suspended;
 	bool irqs_enabled;
+
+#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_RUNTIME_PM)
+	spinlock_t debug_lock;
+	depot_stack_handle_t *debug_owners;
+	unsigned long debug_count;
+#endif
 };
 
 enum intel_pipe_crc_source {
diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h
index 1a11c2beb7f3..ac513fd70315 100644
--- a/drivers/gpu/drm/i915/intel_drv.h
+++ b/drivers/gpu/drm/i915/intel_drv.h
@@ -41,6 +41,8 @@
 #include <drm/drm_atomic.h>
 #include <media/cec-notifier.h>
 
+struct drm_printer;
+
 /**
  * __wait_for - magic wait macro
  *
@@ -2084,6 +2086,7 @@ bool intel_psr_enabled(struct intel_dp *intel_dp);
 void intel_init_quirks(struct drm_i915_private *dev_priv);
 
 /* intel_runtime_pm.c */
+void intel_runtime_pm_init_early(struct drm_i915_private *dev_priv);
 int intel_power_domains_init(struct drm_i915_private *);
 void intel_power_domains_cleanup(struct drm_i915_private *dev_priv);
 void intel_power_domains_init_hw(struct drm_i915_private *dev_priv, bool resume);
@@ -2106,6 +2109,7 @@ void bxt_display_core_init(struct drm_i915_private *dev_priv, bool resume);
 void bxt_display_core_uninit(struct drm_i915_private *dev_priv);
 void intel_runtime_pm_enable(struct drm_i915_private *dev_priv);
 void intel_runtime_pm_disable(struct drm_i915_private *dev_priv);
+void intel_runtime_pm_cleanup(struct drm_i915_private *dev_priv);
 const char *
 intel_display_power_domain_str(enum intel_display_power_domain domain);
 
@@ -2123,23 +2127,23 @@ void icl_dbuf_slices_update(struct drm_i915_private *dev_priv,
 			    u8 req_slices);
 
 static inline void
-assert_rpm_device_not_suspended(struct drm_i915_private *dev_priv)
+assert_rpm_device_not_suspended(struct drm_i915_private *i915)
 {
-	WARN_ONCE(dev_priv->runtime_pm.suspended,
+	WARN_ONCE(i915->runtime_pm.suspended,
 		  "Device suspended during HW access\n");
 }
 
 static inline void
-assert_rpm_wakelock_held(struct drm_i915_private *dev_priv)
+assert_rpm_wakelock_held(struct drm_i915_private *i915)
 {
-	assert_rpm_device_not_suspended(dev_priv);
-	WARN_ONCE(!atomic_read(&dev_priv->runtime_pm.wakeref_count),
+	assert_rpm_device_not_suspended(i915);
+	WARN_ONCE(!atomic_read(&i915->runtime_pm.wakeref_count),
 		  "RPM wakelock ref not held during HW access");
 }
 
 /**
  * disable_rpm_wakeref_asserts - disable the RPM assert checks
- * @dev_priv: i915 device instance
+ * @i915: i915 device instance
  *
  * This function disable asserts that check if we hold an RPM wakelock
  * reference, while keeping the device-not-suspended checks still enabled.
@@ -2156,14 +2160,14 @@ assert_rpm_wakelock_held(struct drm_i915_private *dev_priv)
  * enable_rpm_wakeref_asserts().
  */
 static inline void
-disable_rpm_wakeref_asserts(struct drm_i915_private *dev_priv)
+disable_rpm_wakeref_asserts(struct drm_i915_private *i915)
 {
-	atomic_inc(&dev_priv->runtime_pm.wakeref_count);
+	atomic_inc(&i915->runtime_pm.wakeref_count);
 }
 
 /**
  * enable_rpm_wakeref_asserts - re-enable the RPM assert checks
- * @dev_priv: i915 device instance
+ * @i915: i915 device instance
  *
  * This function re-enables the RPM assert checks after disabling them with
  * disable_rpm_wakeref_asserts. It's meant to be used only in special
@@ -2173,15 +2177,25 @@ disable_rpm_wakeref_asserts(struct drm_i915_private *dev_priv)
  * disable_rpm_wakeref_asserts().
  */
 static inline void
-enable_rpm_wakeref_asserts(struct drm_i915_private *dev_priv)
+enable_rpm_wakeref_asserts(struct drm_i915_private *i915)
 {
-	atomic_dec(&dev_priv->runtime_pm.wakeref_count);
+	atomic_dec(&i915->runtime_pm.wakeref_count);
 }
 
-void intel_runtime_pm_get(struct drm_i915_private *dev_priv);
-bool intel_runtime_pm_get_if_in_use(struct drm_i915_private *dev_priv);
-void intel_runtime_pm_get_noresume(struct drm_i915_private *dev_priv);
-void intel_runtime_pm_put(struct drm_i915_private *dev_priv);
+void intel_runtime_pm_get(struct drm_i915_private *i915);
+bool intel_runtime_pm_get_if_in_use(struct drm_i915_private *i915);
+void intel_runtime_pm_get_noresume(struct drm_i915_private *i915);
+void intel_runtime_pm_put(struct drm_i915_private *i915);
+
+#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_RUNTIME_PM)
+void print_intel_runtime_pm_wakeref(struct drm_i915_private *i915,
+				    struct drm_printer *p);
+#else
+static inline void print_intel_runtime_pm_wakeref(struct drm_i915_private *i915,
+						  struct drm_printer *p)
+{
+}
+#endif
 
 void chv_phy_powergate_lanes(struct intel_encoder *encoder,
 			     bool override, unsigned int mask);
diff --git a/drivers/gpu/drm/i915/intel_runtime_pm.c b/drivers/gpu/drm/i915/intel_runtime_pm.c
index 9e9501f82f06..48a19104d208 100644
--- a/drivers/gpu/drm/i915/intel_runtime_pm.c
+++ b/drivers/gpu/drm/i915/intel_runtime_pm.c
@@ -29,6 +29,8 @@
 #include <linux/pm_runtime.h>
 #include <linux/vgaarb.h>
 
+#include <drm/drm_print.h>
+
 #include "i915_drv.h"
 #include "intel_drv.h"
 
@@ -49,6 +51,189 @@
  * present for a given platform.
  */
 
+#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_RUNTIME_PM)
+
+#include <linux/sort.h>
+
+#define STACKDEPTH 8
+
+static void init_intel_runtime_pm_wakeref(struct drm_i915_private *i915)
+{
+	spin_lock_init(&i915->runtime_pm.debug_lock);
+}
+
+static noinline void
+track_intel_runtime_pm_wakeref(struct drm_i915_private *i915)
+{
+	struct i915_runtime_pm *rpm = &i915->runtime_pm;
+	unsigned long entries[STACKDEPTH];
+	struct stack_trace trace = {
+		.entries = entries,
+		.max_entries = ARRAY_SIZE(entries),
+		.skip = 0 /* gcc is ignoring noinline for tail calls? */
+	};
+	unsigned long flags;
+	depot_stack_handle_t stack, *stacks;
+
+	if (!HAS_RUNTIME_PM(i915))
+		return;
+
+	save_stack_trace(&trace);
+	if (trace.nr_entries &&
+	    trace.entries[trace.nr_entries - 1] == ULONG_MAX)
+		trace.nr_entries--;
+
+	stack = depot_save_stack(&trace, GFP_NOWAIT | __GFP_NOWARN);
+	if (!stack)
+		return;
+
+	spin_lock_irqsave(&rpm->debug_lock, flags);
+	stacks = krealloc(rpm->debug_owners,
+			  (rpm->debug_count + 1) * sizeof(*stacks),
+			  GFP_NOWAIT | __GFP_NOWARN);
+	if (stacks) {
+		stacks[rpm->debug_count++] = stack;
+		rpm->debug_owners = stacks;
+	}
+	spin_unlock_irqrestore(&rpm->debug_lock, flags);
+}
+
+static void untrack_intel_runtime_pm_wakeref(struct drm_i915_private *i915)
+{
+	struct i915_runtime_pm *rpm = &i915->runtime_pm;
+	depot_stack_handle_t *stacks;
+	unsigned long flags;
+
+	spin_lock_irqsave(&rpm->debug_lock, flags);
+	stacks = fetch_and_zero(&rpm->debug_owners);
+	rpm->debug_count = 0;
+	spin_unlock_irqrestore(&rpm->debug_lock, flags);
+
+	kfree(stacks);
+}
+
+static int cmphandle(const void *_a, const void *_b)
+{
+	const depot_stack_handle_t * const a = _a, * const b = _b;
+
+	if (*a < *b)
+		return -1;
+	else if (*a > *b)
+		return 1;
+	else
+		return 0;
+}
+
+static void __print_intel_runtime_pm_wakeref(struct drm_printer *p,
+					     depot_stack_handle_t *stacks,
+					     unsigned long count)
+{
+	unsigned long entries[STACKDEPTH];
+	unsigned long i;
+	char *buf;
+
+	drm_printf(p, "Wakeref count: %lu\n", count);
+
+	buf = kmalloc(PAGE_SIZE, GFP_KERNEL);
+	if (!buf)
+		return;
+
+	sort(stacks, count, sizeof(*stacks), cmphandle, NULL);
+
+	for (i = 0; i < count; i++) {
+		struct stack_trace trace = {
+			.entries = entries,
+			.max_entries = ARRAY_SIZE(entries),
+		};
+		depot_stack_handle_t stack = stacks[i];
+		unsigned long rep;
+
+		rep = 1;
+		while (i + 1 < count && stacks[i + 1] == stack)
+			rep++, i++;
+		depot_fetch_stack(stack, &trace);
+		snprint_stack_trace(buf, PAGE_SIZE, &trace, 2);
+		drm_printf(p, "Wakeref x%lu taken at:\n%s", rep, buf);
+	}
+
+	kfree(buf);
+}
+
+static void show_intel_runtime_pm_wakeref(struct drm_i915_private *i915)
+{
+	struct i915_runtime_pm *rpm = &i915->runtime_pm;
+	depot_stack_handle_t *stacks;
+	unsigned long flags, count;
+	struct drm_printer p;
+
+	spin_lock_irqsave(&rpm->debug_lock, flags);
+	stacks = fetch_and_zero(&rpm->debug_owners);
+	count = fetch_and_zero(&rpm->debug_count);
+	spin_unlock_irqrestore(&rpm->debug_lock, flags);
+	if (!count)
+		return;
+
+	p = drm_debug_printer("i915");
+	__print_intel_runtime_pm_wakeref(&p, stacks, count);
+
+	kfree(stacks);
+}
+
+void print_intel_runtime_pm_wakeref(struct drm_i915_private *i915,
+				    struct drm_printer *p)
+{
+	depot_stack_handle_t *stacks = NULL;
+	unsigned long count = 0;
+
+	do {
+		struct i915_runtime_pm *rpm = &i915->runtime_pm;
+		unsigned long alloc = count;
+		depot_stack_handle_t *s;
+
+		spin_lock_irq(&rpm->debug_lock);
+		count = rpm->debug_count;
+		if (count == alloc) {
+			memcpy(stacks,
+			       rpm->debug_owners,
+			       count * sizeof(*stacks));
+		}
+		spin_unlock_irq(&rpm->debug_lock);
+		if (count == alloc)
+			break;
+
+		s = krealloc(stacks, count * sizeof(*stacks), GFP_KERNEL);
+		if (!s)
+			goto out;
+
+		stacks = s;
+	} while (1);
+
+	__print_intel_runtime_pm_wakeref(p, stacks, count);
+
+out:
+	kfree(stacks);
+}
+
+#else
+
+static void init_intel_runtime_pm_wakeref(struct drm_i915_private *i915)
+{
+}
+
+static void track_intel_runtime_pm_wakeref(struct drm_i915_private *i915)
+{
+}
+
+static void untrack_intel_runtime_pm_wakeref(struct drm_i915_private *i915)
+{
+}
+
+static void show_intel_runtime_pm_wakeref(struct drm_i915_private *i915)
+{
+}
+
+#endif
+
 bool intel_display_power_well_is_enabled(struct drm_i915_private *dev_priv,
 					 enum i915_power_well_id power_well_id);
 
@@ -3986,7 +4171,7 @@ static void intel_power_domains_verify_state(struct drm_i915_private *dev_priv)
 
 /**
  * intel_runtime_pm_get - grab a runtime pm reference
- * @dev_priv: i915 device instance
+ * @i915: i915 device instance
  *
  * This function grabs a device-level runtime pm reference (mostly used for GEM
  * code to ensure the GTT or GT is on) and ensures that it is powered up.
@@ -3994,22 +4179,24 @@ static void intel_power_domains_verify_state(struct drm_i915_private *dev_priv)
  * Any runtime pm reference obtained by this function must have a symmetric
  * call to intel_runtime_pm_put() to release the reference again.
  */
-void intel_runtime_pm_get(struct drm_i915_private *dev_priv)
+void intel_runtime_pm_get(struct drm_i915_private *i915)
 {
-	struct pci_dev *pdev = dev_priv->drm.pdev;
+	struct pci_dev *pdev = i915->drm.pdev;
 	struct device *kdev = &pdev->dev;
 	int ret;
 
 	ret = pm_runtime_get_sync(kdev);
 	WARN_ONCE(ret < 0, "pm_runtime_get_sync() failed: %d\n", ret);
 
-	atomic_inc(&dev_priv->runtime_pm.wakeref_count);
-	assert_rpm_wakelock_held(dev_priv);
+	atomic_inc(&i915->runtime_pm.wakeref_count);
+	assert_rpm_wakelock_held(i915);
+
+	track_intel_runtime_pm_wakeref(i915);
 }
 
 /**
  * intel_runtime_pm_get_if_in_use - grab a runtime pm reference if device in use
- * @dev_priv: i915 device instance
+ * @i915: i915 device instance
  *
  * This function grabs a device-level runtime pm reference if the device is
  * already in use and ensures that it is powered up. It is illegal to try
@@ -4020,10 +4207,10 @@ void intel_runtime_pm_get(struct drm_i915_private *dev_priv)
  *
  * Returns: True if the wakeref was acquired, or False otherwise.
  */
-bool intel_runtime_pm_get_if_in_use(struct drm_i915_private *dev_priv)
+bool intel_runtime_pm_get_if_in_use(struct drm_i915_private *i915)
 {
 	if (IS_ENABLED(CONFIG_PM)) {
-		struct pci_dev *pdev = dev_priv->drm.pdev;
+		struct pci_dev *pdev = i915->drm.pdev;
 		struct device *kdev = &pdev->dev;
 
 		/*
@@ -4036,15 +4223,17 @@ bool intel_runtime_pm_get_if_in_use(struct drm_i915_private *dev_priv)
 			return false;
 	}
 
-	atomic_inc(&dev_priv->runtime_pm.wakeref_count);
-	assert_rpm_wakelock_held(dev_priv);
+	atomic_inc(&i915->runtime_pm.wakeref_count);
+	assert_rpm_wakelock_held(i915);
+
+	track_intel_runtime_pm_wakeref(i915);
 
 	return true;
 }
 
 /**
  * intel_runtime_pm_get_noresume - grab a runtime pm reference
- * @dev_priv: i915 device instance
+ * @i915: i915 device instance
  *
  * This function grabs a device-level runtime pm reference (mostly used for GEM
  * code to ensure the GTT or GT is on).
@@ -4059,32 +4248,35 @@ bool intel_runtime_pm_get_if_in_use(struct drm_i915_private *dev_priv)
  * Any runtime pm reference obtained by this function must have a symmetric
  * call to intel_runtime_pm_put() to release the reference again.
  */
-void intel_runtime_pm_get_noresume(struct drm_i915_private *dev_priv)
+void intel_runtime_pm_get_noresume(struct drm_i915_private *i915)
 {
-	struct pci_dev *pdev = dev_priv->drm.pdev;
+	struct pci_dev *pdev = i915->drm.pdev;
 	struct device *kdev = &pdev->dev;
 
-	assert_rpm_wakelock_held(dev_priv);
+	assert_rpm_wakelock_held(i915);
 	pm_runtime_get_noresume(kdev);
 
-	atomic_inc(&dev_priv->runtime_pm.wakeref_count);
+	atomic_inc(&i915->runtime_pm.wakeref_count);
+
+	track_intel_runtime_pm_wakeref(i915);
 }
 
 /**
  * intel_runtime_pm_put - release a runtime pm reference
- * @dev_priv: i915 device instance
+ * @i915: i915 device instance
  *
  * This function drops the device-level runtime pm reference obtained by
  * intel_runtime_pm_get() and might power down the corresponding
  * hardware block right away if this is the last reference.
  */
-void intel_runtime_pm_put(struct drm_i915_private *dev_priv)
+void intel_runtime_pm_put(struct drm_i915_private *i915)
 {
-	struct pci_dev *pdev = dev_priv->drm.pdev;
+	struct pci_dev *pdev = i915->drm.pdev;
 	struct device *kdev = &pdev->dev;
 
-	assert_rpm_wakelock_held(dev_priv);
-	atomic_dec(&dev_priv->runtime_pm.wakeref_count);
+	assert_rpm_wakelock_held(i915);
+	if (atomic_dec_and_test(&i915->runtime_pm.wakeref_count))
+		untrack_intel_runtime_pm_wakeref(i915);
 
 	pm_runtime_mark_last_busy(kdev);
 	pm_runtime_put_autosuspend(kdev);
@@ -4092,7 +4284,7 @@ void intel_runtime_pm_put(struct drm_i915_private *dev_priv)
 
 /**
  * intel_runtime_pm_enable - enable runtime pm
- * @dev_priv: i915 device instance
+ * @i915: i915 device instance
  *
  * This function enables runtime pm at the end of the driver load sequence.
  *
@@ -4100,9 +4292,9 @@ void intel_runtime_pm_put(struct drm_i915_private *dev_priv)
  * subordinate display power domains. That is done by
  * intel_power_domains_enable().
  */
-void intel_runtime_pm_enable(struct drm_i915_private *dev_priv)
+void intel_runtime_pm_enable(struct drm_i915_private *i915)
 {
-	struct pci_dev *pdev = dev_priv->drm.pdev;
+	struct pci_dev *pdev = i915->drm.pdev;
 	struct device *kdev = &pdev->dev;
 
 	/*
@@ -4124,7 +4316,7 @@ void intel_runtime_pm_enable(struct drm_i915_private *dev_priv)
 	 * so the driver's own RPM reference tracking asserts also work on
 	 * platforms without RPM support.
 	 */
-	if (!HAS_RUNTIME_PM(dev_priv)) {
+	if (!HAS_RUNTIME_PM(i915)) {
 		int ret;
 
 		pm_runtime_dont_use_autosuspend(kdev);
@@ -4142,17 +4334,36 @@ void intel_runtime_pm_enable(struct drm_i915_private *dev_priv)
 	pm_runtime_put_autosuspend(kdev);
 }
 
-void intel_runtime_pm_disable(struct drm_i915_private *dev_priv)
+void intel_runtime_pm_disable(struct drm_i915_private *i915)
 {
-	struct pci_dev *pdev = dev_priv->drm.pdev;
+	struct pci_dev *pdev = i915->drm.pdev;
 	struct device *kdev = &pdev->dev;
 
 	/* Transfer rpm ownership back to core */
-	WARN(pm_runtime_get_sync(&dev_priv->drm.pdev->dev) < 0,
+	WARN(pm_runtime_get_sync(kdev) < 0,
 	     "Failed to pass rpm ownership back to core\n");
 
 	pm_runtime_dont_use_autosuspend(kdev);
 
-	if (!HAS_RUNTIME_PM(dev_priv))
+	if (!HAS_RUNTIME_PM(i915))
 		pm_runtime_put(kdev);
 }
+
+void intel_runtime_pm_cleanup(struct drm_i915_private *i915)
+{
+	struct i915_runtime_pm *rpm = &i915->runtime_pm;
+
+	if (WARN(atomic_read(&rpm->wakeref_count),
+		 "i915->runtime_pm.wakeref_count=%d on cleanup\n",
+		 atomic_read(&rpm->wakeref_count))) {
+		show_intel_runtime_pm_wakeref(i915);
+		atomic_set(&rpm->wakeref_count, 0);
+	}
+
+	untrack_intel_runtime_pm_wakeref(i915);
+}
+
+void intel_runtime_pm_init_early(struct drm_i915_private *i915)
+{
+	init_intel_runtime_pm_wakeref(i915);
+}
diff --git a/drivers/gpu/drm/i915/selftests/mock_gem_device.c b/drivers/gpu/drm/i915/selftests/mock_gem_device.c
index baa3c38919de..082809569681 100644
--- a/drivers/gpu/drm/i915/selftests/mock_gem_device.c
+++ b/drivers/gpu/drm/i915/selftests/mock_gem_device.c
@@ -154,15 +154,17 @@ struct drm_i915_private *mock_gem_device(void)
 	pdev->dev.archdata.iommu = (void *)-1;
 #endif
 
+	i915 = (struct drm_i915_private *)(pdev + 1);
+	pci_set_drvdata(pdev, i915);
+
+	intel_runtime_pm_init_early(i915);
+
 	dev_pm_domain_set(&pdev->dev, &pm_domain);
 	pm_runtime_enable(&pdev->dev);
 	pm_runtime_dont_use_autosuspend(&pdev->dev);
 	if (pm_runtime_enabled(&pdev->dev))
 		WARN_ON(pm_runtime_get_sync(&pdev->dev));
 
-	i915 = (struct drm_i915_private *)(pdev + 1);
-	pci_set_drvdata(pdev, i915);
-
 	err = drm_dev_init(&i915->drm, &mock_driver, &pdev->dev);
 	if (err) {
 		pr_err("Failed to initialise mock GEM device: err=%d\n", err);
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 111+ messages in thread

* [PATCH v3] drm/i915: Track all held rpm wakerefs
  2019-01-07 11:54 ` [PATCH 03/46] drm/i915: Track all held rpm wakerefs Chris Wilson
  2019-01-07 13:14   ` Mika Kuoppala
  2019-01-08 11:45   ` [PATCH v2] " Chris Wilson
@ 2019-01-08 12:22   ` Chris Wilson
  2019-01-08 12:49     ` Mika Kuoppala
  2019-01-08 20:05     ` kbuild test robot
  2 siblings, 2 replies; 111+ messages in thread
From: Chris Wilson @ 2019-01-08 12:22 UTC (permalink / raw)
  To: intel-gfx; +Cc: Jani Nikula

Everytime we take a wakeref, record the stack trace of where it was
taken; clearing the set if we ever drop back to no owners. For debugging
a rpm leak, we can look at all the current wakerefs and check if they
have a matching rpm_put.

v2: Use skip=0 for unwinding the stack as it appears our noinline
function doesn't appear on the stack (nor does save_stack_trace itself!)
v3: Allow rpm->debug_count to disappear between inspections and so
avoid calling krealloc(0) as that may return a ZERO_PTR not NULL! (Mika)

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Jani Nikula <jani.nikula@intel.com>
Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
---
 drivers/gpu/drm/i915/Kconfig.debug            |   2 +-
 drivers/gpu/drm/i915/i915_debugfs.c           |   6 +
 drivers/gpu/drm/i915/i915_drv.c               |   8 +-
 drivers/gpu/drm/i915/i915_drv.h               |   7 +
 drivers/gpu/drm/i915/intel_drv.h              |  44 ++-
 drivers/gpu/drm/i915/intel_runtime_pm.c       | 264 ++++++++++++++++--
 .../gpu/drm/i915/selftests/mock_gem_device.c  |   8 +-
 7 files changed, 289 insertions(+), 50 deletions(-)

diff --git a/drivers/gpu/drm/i915/Kconfig.debug b/drivers/gpu/drm/i915/Kconfig.debug
index 9e36ffb5eb7c..a97929c47466 100644
--- a/drivers/gpu/drm/i915/Kconfig.debug
+++ b/drivers/gpu/drm/i915/Kconfig.debug
@@ -21,11 +21,11 @@ config DRM_I915_DEBUG
         select DEBUG_FS
         select PREEMPT_COUNT
         select I2C_CHARDEV
+        select STACKDEPOT
         select DRM_DP_AUX_CHARDEV
         select X86_MSR # used by igt/pm_rpm
         select DRM_VGEM # used by igt/prime_vgem (dmabuf interop checks)
         select DRM_DEBUG_MM if DRM=y
-        select STACKDEPOT if DRM=y # for DRM_DEBUG_MM
 	select DRM_DEBUG_SELFTEST
 	select SW_SYNC # signaling validation framework (igt/syncobj*)
 	select DRM_I915_SW_FENCE_DEBUG_OBJECTS
diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index 95813e21ae02..050cf8abd426 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -2702,6 +2702,12 @@ static int i915_runtime_pm_status(struct seq_file *m, void *unused)
 		   pci_power_name(pdev->current_state),
 		   pdev->current_state);
 
+	if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_RUNTIME_PM)) {
+		struct drm_printer p = drm_seq_file_printer(m);
+
+		print_intel_runtime_pm_wakeref(dev_priv, &p);
+	}
+
 	return 0;
 }
 
diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index 17fca3ba343e..e2f4753ca21f 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -906,6 +906,7 @@ static int i915_driver_init_early(struct drm_i915_private *dev_priv)
 	mutex_init(&dev_priv->pps_mutex);
 
 	i915_memcpy_init_early(dev_priv);
+	intel_runtime_pm_init_early(dev_priv);
 
 	ret = i915_workqueues_init(dev_priv);
 	if (ret < 0)
@@ -1808,8 +1809,7 @@ void i915_driver_unload(struct drm_device *dev)
 	i915_driver_cleanup_mmio(dev_priv);
 
 	enable_rpm_wakeref_asserts(dev_priv);
-
-	WARN_ON(atomic_read(&dev_priv->runtime_pm.wakeref_count));
+	intel_runtime_pm_cleanup(dev_priv);
 }
 
 static void i915_driver_release(struct drm_device *dev)
@@ -2011,6 +2011,8 @@ static int i915_drm_suspend_late(struct drm_device *dev, bool hibernation)
 
 out:
 	enable_rpm_wakeref_asserts(dev_priv);
+	if (!dev_priv->uncore.user_forcewake.count)
+		intel_runtime_pm_cleanup(dev_priv);
 
 	return ret;
 }
@@ -2966,7 +2968,7 @@ static int intel_runtime_suspend(struct device *kdev)
 	}
 
 	enable_rpm_wakeref_asserts(dev_priv);
-	WARN_ON_ONCE(atomic_read(&dev_priv->runtime_pm.wakeref_count));
+	intel_runtime_pm_cleanup(dev_priv);
 
 	if (intel_uncore_arm_unclaimed_mmio_detection(dev_priv))
 		DRM_ERROR("Unclaimed access detected prior to suspending\n");
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 7c152cd51498..e1f244107e42 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -45,6 +45,7 @@
 #include <linux/pm_qos.h>
 #include <linux/reservation.h>
 #include <linux/shmem_fs.h>
+#include <linux/stackdepot.h>
 
 #include <drm/drmP.h>
 #include <drm/intel-gtt.h>
@@ -1156,6 +1157,12 @@ struct i915_runtime_pm {
 	atomic_t wakeref_count;
 	bool suspended;
 	bool irqs_enabled;
+
+#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_RUNTIME_PM)
+	spinlock_t debug_lock;
+	depot_stack_handle_t *debug_owners;
+	unsigned long debug_count;
+#endif
 };
 
 enum intel_pipe_crc_source {
diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h
index 1a11c2beb7f3..ac513fd70315 100644
--- a/drivers/gpu/drm/i915/intel_drv.h
+++ b/drivers/gpu/drm/i915/intel_drv.h
@@ -41,6 +41,8 @@
 #include <drm/drm_atomic.h>
 #include <media/cec-notifier.h>
 
+struct drm_printer;
+
 /**
  * __wait_for - magic wait macro
  *
@@ -2084,6 +2086,7 @@ bool intel_psr_enabled(struct intel_dp *intel_dp);
 void intel_init_quirks(struct drm_i915_private *dev_priv);
 
 /* intel_runtime_pm.c */
+void intel_runtime_pm_init_early(struct drm_i915_private *dev_priv);
 int intel_power_domains_init(struct drm_i915_private *);
 void intel_power_domains_cleanup(struct drm_i915_private *dev_priv);
 void intel_power_domains_init_hw(struct drm_i915_private *dev_priv, bool resume);
@@ -2106,6 +2109,7 @@ void bxt_display_core_init(struct drm_i915_private *dev_priv, bool resume);
 void bxt_display_core_uninit(struct drm_i915_private *dev_priv);
 void intel_runtime_pm_enable(struct drm_i915_private *dev_priv);
 void intel_runtime_pm_disable(struct drm_i915_private *dev_priv);
+void intel_runtime_pm_cleanup(struct drm_i915_private *dev_priv);
 const char *
 intel_display_power_domain_str(enum intel_display_power_domain domain);
 
@@ -2123,23 +2127,23 @@ void icl_dbuf_slices_update(struct drm_i915_private *dev_priv,
 			    u8 req_slices);
 
 static inline void
-assert_rpm_device_not_suspended(struct drm_i915_private *dev_priv)
+assert_rpm_device_not_suspended(struct drm_i915_private *i915)
 {
-	WARN_ONCE(dev_priv->runtime_pm.suspended,
+	WARN_ONCE(i915->runtime_pm.suspended,
 		  "Device suspended during HW access\n");
 }
 
 static inline void
-assert_rpm_wakelock_held(struct drm_i915_private *dev_priv)
+assert_rpm_wakelock_held(struct drm_i915_private *i915)
 {
-	assert_rpm_device_not_suspended(dev_priv);
-	WARN_ONCE(!atomic_read(&dev_priv->runtime_pm.wakeref_count),
+	assert_rpm_device_not_suspended(i915);
+	WARN_ONCE(!atomic_read(&i915->runtime_pm.wakeref_count),
 		  "RPM wakelock ref not held during HW access");
 }
 
 /**
  * disable_rpm_wakeref_asserts - disable the RPM assert checks
- * @dev_priv: i915 device instance
+ * @i915: i915 device instance
  *
  * This function disable asserts that check if we hold an RPM wakelock
  * reference, while keeping the device-not-suspended checks still enabled.
@@ -2156,14 +2160,14 @@ assert_rpm_wakelock_held(struct drm_i915_private *dev_priv)
  * enable_rpm_wakeref_asserts().
  */
 static inline void
-disable_rpm_wakeref_asserts(struct drm_i915_private *dev_priv)
+disable_rpm_wakeref_asserts(struct drm_i915_private *i915)
 {
-	atomic_inc(&dev_priv->runtime_pm.wakeref_count);
+	atomic_inc(&i915->runtime_pm.wakeref_count);
 }
 
 /**
  * enable_rpm_wakeref_asserts - re-enable the RPM assert checks
- * @dev_priv: i915 device instance
+ * @i915: i915 device instance
  *
  * This function re-enables the RPM assert checks after disabling them with
  * disable_rpm_wakeref_asserts. It's meant to be used only in special
@@ -2173,15 +2177,25 @@ disable_rpm_wakeref_asserts(struct drm_i915_private *dev_priv)
  * disable_rpm_wakeref_asserts().
  */
 static inline void
-enable_rpm_wakeref_asserts(struct drm_i915_private *dev_priv)
+enable_rpm_wakeref_asserts(struct drm_i915_private *i915)
 {
-	atomic_dec(&dev_priv->runtime_pm.wakeref_count);
+	atomic_dec(&i915->runtime_pm.wakeref_count);
 }
 
-void intel_runtime_pm_get(struct drm_i915_private *dev_priv);
-bool intel_runtime_pm_get_if_in_use(struct drm_i915_private *dev_priv);
-void intel_runtime_pm_get_noresume(struct drm_i915_private *dev_priv);
-void intel_runtime_pm_put(struct drm_i915_private *dev_priv);
+void intel_runtime_pm_get(struct drm_i915_private *i915);
+bool intel_runtime_pm_get_if_in_use(struct drm_i915_private *i915);
+void intel_runtime_pm_get_noresume(struct drm_i915_private *i915);
+void intel_runtime_pm_put(struct drm_i915_private *i915);
+
+#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_RUNTIME_PM)
+void print_intel_runtime_pm_wakeref(struct drm_i915_private *i915,
+				    struct drm_printer *p);
+#else
+static inline void print_intel_runtime_pm_wakeref(struct drm_i915_private *i915,
+						  struct drm_printer *p)
+{
+}
+#endif
 
 void chv_phy_powergate_lanes(struct intel_encoder *encoder,
 			     bool override, unsigned int mask);
diff --git a/drivers/gpu/drm/i915/intel_runtime_pm.c b/drivers/gpu/drm/i915/intel_runtime_pm.c
index 9e9501f82f06..c813a2b972c2 100644
--- a/drivers/gpu/drm/i915/intel_runtime_pm.c
+++ b/drivers/gpu/drm/i915/intel_runtime_pm.c
@@ -29,6 +29,8 @@
 #include <linux/pm_runtime.h>
 #include <linux/vgaarb.h>
 
+#include <drm/drm_print.h>
+
 #include "i915_drv.h"
 #include "intel_drv.h"
 
@@ -49,6 +51,186 @@
  * present for a given platform.
  */
 
+#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_RUNTIME_PM)
+
+#include <linux/sort.h>
+
+#define STACKDEPTH 8
+
+static void init_intel_runtime_pm_wakeref(struct drm_i915_private *i915)
+{
+	spin_lock_init(&i915->runtime_pm.debug_lock);
+}
+
+static noinline void
+track_intel_runtime_pm_wakeref(struct drm_i915_private *i915)
+{
+	struct i915_runtime_pm *rpm = &i915->runtime_pm;
+	unsigned long entries[STACKDEPTH];
+	struct stack_trace trace = {
+		.entries = entries,
+		.max_entries = ARRAY_SIZE(entries),
+		.skip = 0 /* gcc is ignoring noinline for tail calls? */
+	};
+	unsigned long flags;
+	depot_stack_handle_t stack, *stacks;
+
+	if (!HAS_RUNTIME_PM(i915))
+		return;
+
+	save_stack_trace(&trace);
+	if (trace.nr_entries &&
+	    trace.entries[trace.nr_entries - 1] == ULONG_MAX)
+		trace.nr_entries--;
+
+	stack = depot_save_stack(&trace, GFP_NOWAIT | __GFP_NOWARN);
+	if (!stack)
+		return;
+
+	spin_lock_irqsave(&rpm->debug_lock, flags);
+	stacks = krealloc(rpm->debug_owners,
+			  (rpm->debug_count + 1) * sizeof(*stacks),
+			  GFP_NOWAIT | __GFP_NOWARN);
+	if (stacks) {
+		stacks[rpm->debug_count++] = stack;
+		rpm->debug_owners = stacks;
+	}
+	spin_unlock_irqrestore(&rpm->debug_lock, flags);
+}
+
+static void untrack_intel_runtime_pm_wakeref(struct drm_i915_private *i915)
+{
+	struct i915_runtime_pm *rpm = &i915->runtime_pm;
+	depot_stack_handle_t *stacks;
+	unsigned long flags;
+
+	spin_lock_irqsave(&rpm->debug_lock, flags);
+	stacks = fetch_and_zero(&rpm->debug_owners);
+	rpm->debug_count = 0;
+	spin_unlock_irqrestore(&rpm->debug_lock, flags);
+
+	kfree(stacks);
+}
+
+static int cmphandle(const void *_a, const void *_b)
+{
+	const depot_stack_handle_t * const a = _a, * const b = _b;
+
+	if (*a < *b)
+		return -1;
+	else if (*a > *b)
+		return 1;
+	else
+		return 0;
+}
+
+static void __print_intel_runtime_pm_wakeref(struct drm_printer *p,
+					     depot_stack_handle_t *stacks,
+					     unsigned long count)
+{
+	unsigned long entries[STACKDEPTH];
+	unsigned long i;
+	char *buf;
+
+	drm_printf(p, "Wakeref count: %lu\n", count);
+
+	buf = kmalloc(PAGE_SIZE, GFP_KERNEL);
+	if (!buf)
+		return;
+
+	sort(stacks, count, sizeof(*stacks), cmphandle, NULL);
+
+	for (i = 0; i < count; i++) {
+		struct stack_trace trace = {
+			.entries = entries,
+			.max_entries = ARRAY_SIZE(entries),
+		};
+		depot_stack_handle_t stack = stacks[i];
+		unsigned long rep;
+
+		rep = 1;
+		while (i + 1 < count && stacks[i + 1] == stack)
+			rep++, i++;
+		depot_fetch_stack(stack, &trace);
+		snprint_stack_trace(buf, PAGE_SIZE, &trace, 2);
+		drm_printf(p, "Wakeref x%lu taken at:\n%s", rep, buf);
+	}
+
+	kfree(buf);
+}
+
+static void show_intel_runtime_pm_wakeref(struct drm_i915_private *i915)
+{
+	struct i915_runtime_pm *rpm = &i915->runtime_pm;
+	depot_stack_handle_t *stacks;
+	unsigned long flags, count;
+	struct drm_printer p;
+
+	spin_lock_irqsave(&rpm->debug_lock, flags);
+	stacks = fetch_and_zero(&rpm->debug_owners);
+	count = fetch_and_zero(&rpm->debug_count);
+	spin_unlock_irqrestore(&rpm->debug_lock, flags);
+	if (!count)
+		return;
+
+	p = drm_debug_printer("i915");
+	__print_intel_runtime_pm_wakeref(&p, stacks, count);
+
+	kfree(stacks);
+}
+
+void print_intel_runtime_pm_wakeref(struct drm_i915_private *i915,
+				    struct drm_printer *p)
+{
+	depot_stack_handle_t *stacks = NULL;
+	unsigned long count = 0;
+
+	do {
+		struct i915_runtime_pm *rpm = &i915->runtime_pm;
+		unsigned long alloc = count;
+		depot_stack_handle_t *s;
+
+		spin_lock_irq(&rpm->debug_lock);
+		count = rpm->debug_count;
+		if (count <= alloc)
+			memcpy(stacks, rpm->debug_owners, count * sizeof(*s));
+		spin_unlock_irq(&rpm->debug_lock);
+		if (count <= alloc)
+			break;
+
+		s = krealloc(stacks, count * sizeof(*s), GFP_KERNEL);
+		if (!s)
+			goto out;
+
+		stacks = s;
+	} while (1);
+
+	__print_intel_runtime_pm_wakeref(p, stacks, count);
+
+out:
+	kfree(stacks);
+}
+
+#else
+
+static void init_intel_runtime_pm_wakeref(struct drm_i915_private *i915)
+{
+}
+
+static void track_intel_runtime_pm_wakeref(struct drm_i915_private *i915)
+{
+}
+
+static void untrack_intel_runtime_pm_wakeref(struct drm_i915_private *i915)
+{
+}
+
+static void show_intel_runtime_pm_wakeref(struct drm_i915_private *i915)
+{
+}
+
+#endif
+
 bool intel_display_power_well_is_enabled(struct drm_i915_private *dev_priv,
 					 enum i915_power_well_id power_well_id);
 
@@ -3986,7 +4168,7 @@ static void intel_power_domains_verify_state(struct drm_i915_private *dev_priv)
 
 /**
  * intel_runtime_pm_get - grab a runtime pm reference
- * @dev_priv: i915 device instance
+ * @i915: i915 device instance
  *
  * This function grabs a device-level runtime pm reference (mostly used for GEM
  * code to ensure the GTT or GT is on) and ensures that it is powered up.
@@ -3994,22 +4176,24 @@ static void intel_power_domains_verify_state(struct drm_i915_private *dev_priv)
  * Any runtime pm reference obtained by this function must have a symmetric
  * call to intel_runtime_pm_put() to release the reference again.
  */
-void intel_runtime_pm_get(struct drm_i915_private *dev_priv)
+void intel_runtime_pm_get(struct drm_i915_private *i915)
 {
-	struct pci_dev *pdev = dev_priv->drm.pdev;
+	struct pci_dev *pdev = i915->drm.pdev;
 	struct device *kdev = &pdev->dev;
 	int ret;
 
 	ret = pm_runtime_get_sync(kdev);
 	WARN_ONCE(ret < 0, "pm_runtime_get_sync() failed: %d\n", ret);
 
-	atomic_inc(&dev_priv->runtime_pm.wakeref_count);
-	assert_rpm_wakelock_held(dev_priv);
+	atomic_inc(&i915->runtime_pm.wakeref_count);
+	assert_rpm_wakelock_held(i915);
+
+	track_intel_runtime_pm_wakeref(i915);
 }
 
 /**
  * intel_runtime_pm_get_if_in_use - grab a runtime pm reference if device in use
- * @dev_priv: i915 device instance
+ * @i915: i915 device instance
  *
  * This function grabs a device-level runtime pm reference if the device is
  * already in use and ensures that it is powered up. It is illegal to try
@@ -4020,10 +4204,10 @@ void intel_runtime_pm_get(struct drm_i915_private *dev_priv)
  *
  * Returns: True if the wakeref was acquired, or False otherwise.
  */
-bool intel_runtime_pm_get_if_in_use(struct drm_i915_private *dev_priv)
+bool intel_runtime_pm_get_if_in_use(struct drm_i915_private *i915)
 {
 	if (IS_ENABLED(CONFIG_PM)) {
-		struct pci_dev *pdev = dev_priv->drm.pdev;
+		struct pci_dev *pdev = i915->drm.pdev;
 		struct device *kdev = &pdev->dev;
 
 		/*
@@ -4036,15 +4220,17 @@ bool intel_runtime_pm_get_if_in_use(struct drm_i915_private *dev_priv)
 			return false;
 	}
 
-	atomic_inc(&dev_priv->runtime_pm.wakeref_count);
-	assert_rpm_wakelock_held(dev_priv);
+	atomic_inc(&i915->runtime_pm.wakeref_count);
+	assert_rpm_wakelock_held(i915);
+
+	track_intel_runtime_pm_wakeref(i915);
 
 	return true;
 }
 
 /**
  * intel_runtime_pm_get_noresume - grab a runtime pm reference
- * @dev_priv: i915 device instance
+ * @i915: i915 device instance
  *
  * This function grabs a device-level runtime pm reference (mostly used for GEM
  * code to ensure the GTT or GT is on).
@@ -4059,32 +4245,35 @@ bool intel_runtime_pm_get_if_in_use(struct drm_i915_private *dev_priv)
  * Any runtime pm reference obtained by this function must have a symmetric
  * call to intel_runtime_pm_put() to release the reference again.
  */
-void intel_runtime_pm_get_noresume(struct drm_i915_private *dev_priv)
+void intel_runtime_pm_get_noresume(struct drm_i915_private *i915)
 {
-	struct pci_dev *pdev = dev_priv->drm.pdev;
+	struct pci_dev *pdev = i915->drm.pdev;
 	struct device *kdev = &pdev->dev;
 
-	assert_rpm_wakelock_held(dev_priv);
+	assert_rpm_wakelock_held(i915);
 	pm_runtime_get_noresume(kdev);
 
-	atomic_inc(&dev_priv->runtime_pm.wakeref_count);
+	atomic_inc(&i915->runtime_pm.wakeref_count);
+
+	track_intel_runtime_pm_wakeref(i915);
 }
 
 /**
  * intel_runtime_pm_put - release a runtime pm reference
- * @dev_priv: i915 device instance
+ * @i915: i915 device instance
  *
  * This function drops the device-level runtime pm reference obtained by
  * intel_runtime_pm_get() and might power down the corresponding
  * hardware block right away if this is the last reference.
  */
-void intel_runtime_pm_put(struct drm_i915_private *dev_priv)
+void intel_runtime_pm_put(struct drm_i915_private *i915)
 {
-	struct pci_dev *pdev = dev_priv->drm.pdev;
+	struct pci_dev *pdev = i915->drm.pdev;
 	struct device *kdev = &pdev->dev;
 
-	assert_rpm_wakelock_held(dev_priv);
-	atomic_dec(&dev_priv->runtime_pm.wakeref_count);
+	assert_rpm_wakelock_held(i915);
+	if (atomic_dec_and_test(&i915->runtime_pm.wakeref_count))
+		untrack_intel_runtime_pm_wakeref(i915);
 
 	pm_runtime_mark_last_busy(kdev);
 	pm_runtime_put_autosuspend(kdev);
@@ -4092,7 +4281,7 @@ void intel_runtime_pm_put(struct drm_i915_private *dev_priv)
 
 /**
  * intel_runtime_pm_enable - enable runtime pm
- * @dev_priv: i915 device instance
+ * @i915: i915 device instance
  *
  * This function enables runtime pm at the end of the driver load sequence.
  *
@@ -4100,9 +4289,9 @@ void intel_runtime_pm_put(struct drm_i915_private *dev_priv)
  * subordinate display power domains. That is done by
  * intel_power_domains_enable().
  */
-void intel_runtime_pm_enable(struct drm_i915_private *dev_priv)
+void intel_runtime_pm_enable(struct drm_i915_private *i915)
 {
-	struct pci_dev *pdev = dev_priv->drm.pdev;
+	struct pci_dev *pdev = i915->drm.pdev;
 	struct device *kdev = &pdev->dev;
 
 	/*
@@ -4124,7 +4313,7 @@ void intel_runtime_pm_enable(struct drm_i915_private *dev_priv)
 	 * so the driver's own RPM reference tracking asserts also work on
 	 * platforms without RPM support.
 	 */
-	if (!HAS_RUNTIME_PM(dev_priv)) {
+	if (!HAS_RUNTIME_PM(i915)) {
 		int ret;
 
 		pm_runtime_dont_use_autosuspend(kdev);
@@ -4142,17 +4331,36 @@ void intel_runtime_pm_enable(struct drm_i915_private *dev_priv)
 	pm_runtime_put_autosuspend(kdev);
 }
 
-void intel_runtime_pm_disable(struct drm_i915_private *dev_priv)
+void intel_runtime_pm_disable(struct drm_i915_private *i915)
 {
-	struct pci_dev *pdev = dev_priv->drm.pdev;
+	struct pci_dev *pdev = i915->drm.pdev;
 	struct device *kdev = &pdev->dev;
 
 	/* Transfer rpm ownership back to core */
-	WARN(pm_runtime_get_sync(&dev_priv->drm.pdev->dev) < 0,
+	WARN(pm_runtime_get_sync(kdev) < 0,
 	     "Failed to pass rpm ownership back to core\n");
 
 	pm_runtime_dont_use_autosuspend(kdev);
 
-	if (!HAS_RUNTIME_PM(dev_priv))
+	if (!HAS_RUNTIME_PM(i915))
 		pm_runtime_put(kdev);
 }
+
+void intel_runtime_pm_cleanup(struct drm_i915_private *i915)
+{
+	struct i915_runtime_pm *rpm = &i915->runtime_pm;
+
+	if (WARN(atomic_read(&rpm->wakeref_count),
+		 "i915->runtime_pm.wakeref_count=%d on cleanup\n",
+		 atomic_read(&rpm->wakeref_count))) {
+		show_intel_runtime_pm_wakeref(i915);
+		atomic_set(&rpm->wakeref_count, 0);
+	}
+
+	untrack_intel_runtime_pm_wakeref(i915);
+}
+
+void intel_runtime_pm_init_early(struct drm_i915_private *i915)
+{
+	init_intel_runtime_pm_wakeref(i915);
+}
diff --git a/drivers/gpu/drm/i915/selftests/mock_gem_device.c b/drivers/gpu/drm/i915/selftests/mock_gem_device.c
index baa3c38919de..082809569681 100644
--- a/drivers/gpu/drm/i915/selftests/mock_gem_device.c
+++ b/drivers/gpu/drm/i915/selftests/mock_gem_device.c
@@ -154,15 +154,17 @@ struct drm_i915_private *mock_gem_device(void)
 	pdev->dev.archdata.iommu = (void *)-1;
 #endif
 
+	i915 = (struct drm_i915_private *)(pdev + 1);
+	pci_set_drvdata(pdev, i915);
+
+	intel_runtime_pm_init_early(i915);
+
 	dev_pm_domain_set(&pdev->dev, &pm_domain);
 	pm_runtime_enable(&pdev->dev);
 	pm_runtime_dont_use_autosuspend(&pdev->dev);
 	if (pm_runtime_enabled(&pdev->dev))
 		WARN_ON(pm_runtime_get_sync(&pdev->dev));
 
-	i915 = (struct drm_i915_private *)(pdev + 1);
-	pci_set_drvdata(pdev, i915);
-
 	err = drm_dev_init(&i915->drm, &mock_driver, &pdev->dev);
 	if (err) {
 		pr_err("Failed to initialise mock GEM device: err=%d\n", err);
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 111+ messages in thread

* Re: [PATCH v3] drm/i915: Track all held rpm wakerefs
  2019-01-08 12:22   ` [PATCH v3] " Chris Wilson
@ 2019-01-08 12:49     ` Mika Kuoppala
  2019-01-08 20:05     ` kbuild test robot
  1 sibling, 0 replies; 111+ messages in thread
From: Mika Kuoppala @ 2019-01-08 12:49 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx; +Cc: Jani Nikula

Chris Wilson <chris@chris-wilson.co.uk> writes:

> Everytime we take a wakeref, record the stack trace of where it was
> taken; clearing the set if we ever drop back to no owners. For debugging
> a rpm leak, we can look at all the current wakerefs and check if they
> have a matching rpm_put.
>
> v2: Use skip=0 for unwinding the stack as it appears our noinline
> function doesn't appear on the stack (nor does save_stack_trace
> itself!)

With this I am able to get previous frame that did call
the intel_runtime_pm_*. Which is good enough do distinguish
callsites uniquely.

> v3: Allow rpm->debug_count to disappear between inspections and so
> avoid calling krealloc(0) as that may return a ZERO_PTR not NULL! (Mika)
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Jani Nikula <jani.nikula@intel.com>
> Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>

Some use of singular 'wakeref' would read better
if changed to plural. But I am not insisting.

Can't find anything else with this so you can add:

Reviewed-by: Mika Kuoppala <mika.kuoppala@linux.intel.com>

And I did play with it by introducing intentional leaks.
The signal to noise ratio is still high but it is a start.
If it serves any purpose, you can add also:

Tested-by: Mika Kuoppala <mika.kuoppala@linux.intel.com>

-Mika

> ---
>  drivers/gpu/drm/i915/Kconfig.debug            |   2 +-
>  drivers/gpu/drm/i915/i915_debugfs.c           |   6 +
>  drivers/gpu/drm/i915/i915_drv.c               |   8 +-
>  drivers/gpu/drm/i915/i915_drv.h               |   7 +
>  drivers/gpu/drm/i915/intel_drv.h              |  44 ++-
>  drivers/gpu/drm/i915/intel_runtime_pm.c       | 264 ++++++++++++++++--
>  .../gpu/drm/i915/selftests/mock_gem_device.c  |   8 +-
>  7 files changed, 289 insertions(+), 50 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/Kconfig.debug b/drivers/gpu/drm/i915/Kconfig.debug
> index 9e36ffb5eb7c..a97929c47466 100644
> --- a/drivers/gpu/drm/i915/Kconfig.debug
> +++ b/drivers/gpu/drm/i915/Kconfig.debug
> @@ -21,11 +21,11 @@ config DRM_I915_DEBUG
>          select DEBUG_FS
>          select PREEMPT_COUNT
>          select I2C_CHARDEV
> +        select STACKDEPOT
>          select DRM_DP_AUX_CHARDEV
>          select X86_MSR # used by igt/pm_rpm
>          select DRM_VGEM # used by igt/prime_vgem (dmabuf interop checks)
>          select DRM_DEBUG_MM if DRM=y
> -        select STACKDEPOT if DRM=y # for DRM_DEBUG_MM
>  	select DRM_DEBUG_SELFTEST
>  	select SW_SYNC # signaling validation framework (igt/syncobj*)
>  	select DRM_I915_SW_FENCE_DEBUG_OBJECTS
> diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
> index 95813e21ae02..050cf8abd426 100644
> --- a/drivers/gpu/drm/i915/i915_debugfs.c
> +++ b/drivers/gpu/drm/i915/i915_debugfs.c
> @@ -2702,6 +2702,12 @@ static int i915_runtime_pm_status(struct seq_file *m, void *unused)
>  		   pci_power_name(pdev->current_state),
>  		   pdev->current_state);
>  
> +	if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_RUNTIME_PM)) {
> +		struct drm_printer p = drm_seq_file_printer(m);
> +
> +		print_intel_runtime_pm_wakeref(dev_priv, &p);
> +	}
> +
>  	return 0;
>  }
>  
> diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
> index 17fca3ba343e..e2f4753ca21f 100644
> --- a/drivers/gpu/drm/i915/i915_drv.c
> +++ b/drivers/gpu/drm/i915/i915_drv.c
> @@ -906,6 +906,7 @@ static int i915_driver_init_early(struct drm_i915_private *dev_priv)
>  	mutex_init(&dev_priv->pps_mutex);
>  
>  	i915_memcpy_init_early(dev_priv);
> +	intel_runtime_pm_init_early(dev_priv);
>  
>  	ret = i915_workqueues_init(dev_priv);
>  	if (ret < 0)
> @@ -1808,8 +1809,7 @@ void i915_driver_unload(struct drm_device *dev)
>  	i915_driver_cleanup_mmio(dev_priv);
>  
>  	enable_rpm_wakeref_asserts(dev_priv);
> -
> -	WARN_ON(atomic_read(&dev_priv->runtime_pm.wakeref_count));
> +	intel_runtime_pm_cleanup(dev_priv);
>  }
>  
>  static void i915_driver_release(struct drm_device *dev)
> @@ -2011,6 +2011,8 @@ static int i915_drm_suspend_late(struct drm_device *dev, bool hibernation)
>  
>  out:
>  	enable_rpm_wakeref_asserts(dev_priv);
> +	if (!dev_priv->uncore.user_forcewake.count)
> +		intel_runtime_pm_cleanup(dev_priv);
>  
>  	return ret;
>  }
> @@ -2966,7 +2968,7 @@ static int intel_runtime_suspend(struct device *kdev)
>  	}
>  
>  	enable_rpm_wakeref_asserts(dev_priv);
> -	WARN_ON_ONCE(atomic_read(&dev_priv->runtime_pm.wakeref_count));
> +	intel_runtime_pm_cleanup(dev_priv);
>  
>  	if (intel_uncore_arm_unclaimed_mmio_detection(dev_priv))
>  		DRM_ERROR("Unclaimed access detected prior to suspending\n");
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 7c152cd51498..e1f244107e42 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -45,6 +45,7 @@
>  #include <linux/pm_qos.h>
>  #include <linux/reservation.h>
>  #include <linux/shmem_fs.h>
> +#include <linux/stackdepot.h>
>  
>  #include <drm/drmP.h>
>  #include <drm/intel-gtt.h>
> @@ -1156,6 +1157,12 @@ struct i915_runtime_pm {
>  	atomic_t wakeref_count;
>  	bool suspended;
>  	bool irqs_enabled;
> +
> +#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_RUNTIME_PM)
> +	spinlock_t debug_lock;
> +	depot_stack_handle_t *debug_owners;
> +	unsigned long debug_count;
> +#endif
>  };
>  
>  enum intel_pipe_crc_source {
> diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h
> index 1a11c2beb7f3..ac513fd70315 100644
> --- a/drivers/gpu/drm/i915/intel_drv.h
> +++ b/drivers/gpu/drm/i915/intel_drv.h
> @@ -41,6 +41,8 @@
>  #include <drm/drm_atomic.h>
>  #include <media/cec-notifier.h>
>  
> +struct drm_printer;
> +
>  /**
>   * __wait_for - magic wait macro
>   *
> @@ -2084,6 +2086,7 @@ bool intel_psr_enabled(struct intel_dp *intel_dp);
>  void intel_init_quirks(struct drm_i915_private *dev_priv);
>  
>  /* intel_runtime_pm.c */
> +void intel_runtime_pm_init_early(struct drm_i915_private *dev_priv);
>  int intel_power_domains_init(struct drm_i915_private *);
>  void intel_power_domains_cleanup(struct drm_i915_private *dev_priv);
>  void intel_power_domains_init_hw(struct drm_i915_private *dev_priv, bool resume);
> @@ -2106,6 +2109,7 @@ void bxt_display_core_init(struct drm_i915_private *dev_priv, bool resume);
>  void bxt_display_core_uninit(struct drm_i915_private *dev_priv);
>  void intel_runtime_pm_enable(struct drm_i915_private *dev_priv);
>  void intel_runtime_pm_disable(struct drm_i915_private *dev_priv);
> +void intel_runtime_pm_cleanup(struct drm_i915_private *dev_priv);
>  const char *
>  intel_display_power_domain_str(enum intel_display_power_domain domain);
>  
> @@ -2123,23 +2127,23 @@ void icl_dbuf_slices_update(struct drm_i915_private *dev_priv,
>  			    u8 req_slices);
>  
>  static inline void
> -assert_rpm_device_not_suspended(struct drm_i915_private *dev_priv)
> +assert_rpm_device_not_suspended(struct drm_i915_private *i915)
>  {
> -	WARN_ONCE(dev_priv->runtime_pm.suspended,
> +	WARN_ONCE(i915->runtime_pm.suspended,
>  		  "Device suspended during HW access\n");
>  }
>  
>  static inline void
> -assert_rpm_wakelock_held(struct drm_i915_private *dev_priv)
> +assert_rpm_wakelock_held(struct drm_i915_private *i915)
>  {
> -	assert_rpm_device_not_suspended(dev_priv);
> -	WARN_ONCE(!atomic_read(&dev_priv->runtime_pm.wakeref_count),
> +	assert_rpm_device_not_suspended(i915);
> +	WARN_ONCE(!atomic_read(&i915->runtime_pm.wakeref_count),
>  		  "RPM wakelock ref not held during HW access");
>  }
>  
>  /**
>   * disable_rpm_wakeref_asserts - disable the RPM assert checks
> - * @dev_priv: i915 device instance
> + * @i915: i915 device instance
>   *
>   * This function disable asserts that check if we hold an RPM wakelock
>   * reference, while keeping the device-not-suspended checks still enabled.
> @@ -2156,14 +2160,14 @@ assert_rpm_wakelock_held(struct drm_i915_private *dev_priv)
>   * enable_rpm_wakeref_asserts().
>   */
>  static inline void
> -disable_rpm_wakeref_asserts(struct drm_i915_private *dev_priv)
> +disable_rpm_wakeref_asserts(struct drm_i915_private *i915)
>  {
> -	atomic_inc(&dev_priv->runtime_pm.wakeref_count);
> +	atomic_inc(&i915->runtime_pm.wakeref_count);
>  }
>  
>  /**
>   * enable_rpm_wakeref_asserts - re-enable the RPM assert checks
> - * @dev_priv: i915 device instance
> + * @i915: i915 device instance
>   *
>   * This function re-enables the RPM assert checks after disabling them with
>   * disable_rpm_wakeref_asserts. It's meant to be used only in special
> @@ -2173,15 +2177,25 @@ disable_rpm_wakeref_asserts(struct drm_i915_private *dev_priv)
>   * disable_rpm_wakeref_asserts().
>   */
>  static inline void
> -enable_rpm_wakeref_asserts(struct drm_i915_private *dev_priv)
> +enable_rpm_wakeref_asserts(struct drm_i915_private *i915)
>  {
> -	atomic_dec(&dev_priv->runtime_pm.wakeref_count);
> +	atomic_dec(&i915->runtime_pm.wakeref_count);
>  }
>  
> -void intel_runtime_pm_get(struct drm_i915_private *dev_priv);
> -bool intel_runtime_pm_get_if_in_use(struct drm_i915_private *dev_priv);
> -void intel_runtime_pm_get_noresume(struct drm_i915_private *dev_priv);
> -void intel_runtime_pm_put(struct drm_i915_private *dev_priv);
> +void intel_runtime_pm_get(struct drm_i915_private *i915);
> +bool intel_runtime_pm_get_if_in_use(struct drm_i915_private *i915);
> +void intel_runtime_pm_get_noresume(struct drm_i915_private *i915);
> +void intel_runtime_pm_put(struct drm_i915_private *i915);
> +
> +#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_RUNTIME_PM)
> +void print_intel_runtime_pm_wakeref(struct drm_i915_private *i915,
> +				    struct drm_printer *p);
> +#else
> +static inline void print_intel_runtime_pm_wakeref(struct drm_i915_private *i915,
> +						  struct drm_printer *p)
> +{
> +}
> +#endif
>  
>  void chv_phy_powergate_lanes(struct intel_encoder *encoder,
>  			     bool override, unsigned int mask);
> diff --git a/drivers/gpu/drm/i915/intel_runtime_pm.c b/drivers/gpu/drm/i915/intel_runtime_pm.c
> index 9e9501f82f06..c813a2b972c2 100644
> --- a/drivers/gpu/drm/i915/intel_runtime_pm.c
> +++ b/drivers/gpu/drm/i915/intel_runtime_pm.c
> @@ -29,6 +29,8 @@
>  #include <linux/pm_runtime.h>
>  #include <linux/vgaarb.h>
>  
> +#include <drm/drm_print.h>
> +
>  #include "i915_drv.h"
>  #include "intel_drv.h"
>  
> @@ -49,6 +51,186 @@
>   * present for a given platform.
>   */
>  
> +#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_RUNTIME_PM)
> +
> +#include <linux/sort.h>
> +
> +#define STACKDEPTH 8
> +
> +static void init_intel_runtime_pm_wakeref(struct drm_i915_private *i915)
> +{
> +	spin_lock_init(&i915->runtime_pm.debug_lock);
> +}
> +
> +static noinline void
> +track_intel_runtime_pm_wakeref(struct drm_i915_private *i915)
> +{
> +	struct i915_runtime_pm *rpm = &i915->runtime_pm;
> +	unsigned long entries[STACKDEPTH];
> +	struct stack_trace trace = {
> +		.entries = entries,
> +		.max_entries = ARRAY_SIZE(entries),
> +		.skip = 0 /* gcc is ignoring noinline for tail calls? */
> +	};
> +	unsigned long flags;
> +	depot_stack_handle_t stack, *stacks;
> +
> +	if (!HAS_RUNTIME_PM(i915))
> +		return;
> +
> +	save_stack_trace(&trace);
> +	if (trace.nr_entries &&
> +	    trace.entries[trace.nr_entries - 1] == ULONG_MAX)
> +		trace.nr_entries--;
> +
> +	stack = depot_save_stack(&trace, GFP_NOWAIT | __GFP_NOWARN);
> +	if (!stack)
> +		return;
> +
> +	spin_lock_irqsave(&rpm->debug_lock, flags);
> +	stacks = krealloc(rpm->debug_owners,
> +			  (rpm->debug_count + 1) * sizeof(*stacks),
> +			  GFP_NOWAIT | __GFP_NOWARN);
> +	if (stacks) {
> +		stacks[rpm->debug_count++] = stack;
> +		rpm->debug_owners = stacks;
> +	}
> +	spin_unlock_irqrestore(&rpm->debug_lock, flags);
> +}
> +
> +static void untrack_intel_runtime_pm_wakeref(struct drm_i915_private *i915)
> +{
> +	struct i915_runtime_pm *rpm = &i915->runtime_pm;
> +	depot_stack_handle_t *stacks;
> +	unsigned long flags;
> +
> +	spin_lock_irqsave(&rpm->debug_lock, flags);
> +	stacks = fetch_and_zero(&rpm->debug_owners);
> +	rpm->debug_count = 0;
> +	spin_unlock_irqrestore(&rpm->debug_lock, flags);
> +
> +	kfree(stacks);
> +}
> +
> +static int cmphandle(const void *_a, const void *_b)
> +{
> +	const depot_stack_handle_t * const a = _a, * const b = _b;
> +
> +	if (*a < *b)
> +		return -1;
> +	else if (*a > *b)
> +		return 1;
> +	else
> +		return 0;
> +}
> +
> +static void __print_intel_runtime_pm_wakeref(struct drm_printer *p,
> +					     depot_stack_handle_t *stacks,
> +					     unsigned long count)
> +{
> +	unsigned long entries[STACKDEPTH];
> +	unsigned long i;
> +	char *buf;
> +
> +	drm_printf(p, "Wakeref count: %lu\n", count);
> +
> +	buf = kmalloc(PAGE_SIZE, GFP_KERNEL);
> +	if (!buf)
> +		return;
> +
> +	sort(stacks, count, sizeof(*stacks), cmphandle, NULL);
> +
> +	for (i = 0; i < count; i++) {
> +		struct stack_trace trace = {
> +			.entries = entries,
> +			.max_entries = ARRAY_SIZE(entries),
> +		};
> +		depot_stack_handle_t stack = stacks[i];
> +		unsigned long rep;
> +
> +		rep = 1;
> +		while (i + 1 < count && stacks[i + 1] == stack)
> +			rep++, i++;
> +		depot_fetch_stack(stack, &trace);
> +		snprint_stack_trace(buf, PAGE_SIZE, &trace, 2);
> +		drm_printf(p, "Wakeref x%lu taken at:\n%s", rep, buf);
> +	}
> +
> +	kfree(buf);
> +}
> +
> +static void show_intel_runtime_pm_wakeref(struct drm_i915_private *i915)
> +{
> +	struct i915_runtime_pm *rpm = &i915->runtime_pm;
> +	depot_stack_handle_t *stacks;
> +	unsigned long flags, count;
> +	struct drm_printer p;
> +
> +	spin_lock_irqsave(&rpm->debug_lock, flags);
> +	stacks = fetch_and_zero(&rpm->debug_owners);
> +	count = fetch_and_zero(&rpm->debug_count);
> +	spin_unlock_irqrestore(&rpm->debug_lock, flags);
> +	if (!count)
> +		return;
> +
> +	p = drm_debug_printer("i915");
> +	__print_intel_runtime_pm_wakeref(&p, stacks, count);
> +
> +	kfree(stacks);
> +}
> +
> +void print_intel_runtime_pm_wakeref(struct drm_i915_private *i915,
> +				    struct drm_printer *p)
> +{
> +	depot_stack_handle_t *stacks = NULL;
> +	unsigned long count = 0;
> +
> +	do {
> +		struct i915_runtime_pm *rpm = &i915->runtime_pm;
> +		unsigned long alloc = count;
> +		depot_stack_handle_t *s;
> +
> +		spin_lock_irq(&rpm->debug_lock);
> +		count = rpm->debug_count;
> +		if (count <= alloc)
> +			memcpy(stacks, rpm->debug_owners, count * sizeof(*s));
> +		spin_unlock_irq(&rpm->debug_lock);
> +		if (count <= alloc)
> +			break;
> +
> +		s = krealloc(stacks, count * sizeof(*s), GFP_KERNEL);
> +		if (!s)
> +			goto out;
> +
> +		stacks = s;
> +	} while (1);
> +
> +	__print_intel_runtime_pm_wakeref(p, stacks, count);
> +
> +out:
> +	kfree(stacks);
> +}
> +
> +#else
> +
> +static void init_intel_runtime_pm_wakeref(struct drm_i915_private *i915)
> +{
> +}
> +
> +static void track_intel_runtime_pm_wakeref(struct drm_i915_private *i915)
> +{
> +}
> +
> +static void untrack_intel_runtime_pm_wakeref(struct drm_i915_private *i915)
> +{
> +}
> +
> +static void show_intel_runtime_pm_wakeref(struct drm_i915_private *i915)
> +{
> +}
> +
> +#endif
> +
>  bool intel_display_power_well_is_enabled(struct drm_i915_private *dev_priv,
>  					 enum i915_power_well_id power_well_id);
>  
> @@ -3986,7 +4168,7 @@ static void intel_power_domains_verify_state(struct drm_i915_private *dev_priv)
>  
>  /**
>   * intel_runtime_pm_get - grab a runtime pm reference
> - * @dev_priv: i915 device instance
> + * @i915: i915 device instance
>   *
>   * This function grabs a device-level runtime pm reference (mostly used for GEM
>   * code to ensure the GTT or GT is on) and ensures that it is powered up.
> @@ -3994,22 +4176,24 @@ static void intel_power_domains_verify_state(struct drm_i915_private *dev_priv)
>   * Any runtime pm reference obtained by this function must have a symmetric
>   * call to intel_runtime_pm_put() to release the reference again.
>   */
> -void intel_runtime_pm_get(struct drm_i915_private *dev_priv)
> +void intel_runtime_pm_get(struct drm_i915_private *i915)
>  {
> -	struct pci_dev *pdev = dev_priv->drm.pdev;
> +	struct pci_dev *pdev = i915->drm.pdev;
>  	struct device *kdev = &pdev->dev;
>  	int ret;
>  
>  	ret = pm_runtime_get_sync(kdev);
>  	WARN_ONCE(ret < 0, "pm_runtime_get_sync() failed: %d\n", ret);
>  
> -	atomic_inc(&dev_priv->runtime_pm.wakeref_count);
> -	assert_rpm_wakelock_held(dev_priv);
> +	atomic_inc(&i915->runtime_pm.wakeref_count);
> +	assert_rpm_wakelock_held(i915);
> +
> +	track_intel_runtime_pm_wakeref(i915);
>  }
>  
>  /**
>   * intel_runtime_pm_get_if_in_use - grab a runtime pm reference if device in use
> - * @dev_priv: i915 device instance
> + * @i915: i915 device instance
>   *
>   * This function grabs a device-level runtime pm reference if the device is
>   * already in use and ensures that it is powered up. It is illegal to try
> @@ -4020,10 +4204,10 @@ void intel_runtime_pm_get(struct drm_i915_private *dev_priv)
>   *
>   * Returns: True if the wakeref was acquired, or False otherwise.
>   */
> -bool intel_runtime_pm_get_if_in_use(struct drm_i915_private *dev_priv)
> +bool intel_runtime_pm_get_if_in_use(struct drm_i915_private *i915)
>  {
>  	if (IS_ENABLED(CONFIG_PM)) {
> -		struct pci_dev *pdev = dev_priv->drm.pdev;
> +		struct pci_dev *pdev = i915->drm.pdev;
>  		struct device *kdev = &pdev->dev;
>  
>  		/*
> @@ -4036,15 +4220,17 @@ bool intel_runtime_pm_get_if_in_use(struct drm_i915_private *dev_priv)
>  			return false;
>  	}
>  
> -	atomic_inc(&dev_priv->runtime_pm.wakeref_count);
> -	assert_rpm_wakelock_held(dev_priv);
> +	atomic_inc(&i915->runtime_pm.wakeref_count);
> +	assert_rpm_wakelock_held(i915);
> +
> +	track_intel_runtime_pm_wakeref(i915);
>  
>  	return true;
>  }
>  
>  /**
>   * intel_runtime_pm_get_noresume - grab a runtime pm reference
> - * @dev_priv: i915 device instance
> + * @i915: i915 device instance
>   *
>   * This function grabs a device-level runtime pm reference (mostly used for GEM
>   * code to ensure the GTT or GT is on).
> @@ -4059,32 +4245,35 @@ bool intel_runtime_pm_get_if_in_use(struct drm_i915_private *dev_priv)
>   * Any runtime pm reference obtained by this function must have a symmetric
>   * call to intel_runtime_pm_put() to release the reference again.
>   */
> -void intel_runtime_pm_get_noresume(struct drm_i915_private *dev_priv)
> +void intel_runtime_pm_get_noresume(struct drm_i915_private *i915)
>  {
> -	struct pci_dev *pdev = dev_priv->drm.pdev;
> +	struct pci_dev *pdev = i915->drm.pdev;
>  	struct device *kdev = &pdev->dev;
>  
> -	assert_rpm_wakelock_held(dev_priv);
> +	assert_rpm_wakelock_held(i915);
>  	pm_runtime_get_noresume(kdev);
>  
> -	atomic_inc(&dev_priv->runtime_pm.wakeref_count);
> +	atomic_inc(&i915->runtime_pm.wakeref_count);
> +
> +	track_intel_runtime_pm_wakeref(i915);
>  }
>  
>  /**
>   * intel_runtime_pm_put - release a runtime pm reference
> - * @dev_priv: i915 device instance
> + * @i915: i915 device instance
>   *
>   * This function drops the device-level runtime pm reference obtained by
>   * intel_runtime_pm_get() and might power down the corresponding
>   * hardware block right away if this is the last reference.
>   */
> -void intel_runtime_pm_put(struct drm_i915_private *dev_priv)
> +void intel_runtime_pm_put(struct drm_i915_private *i915)
>  {
> -	struct pci_dev *pdev = dev_priv->drm.pdev;
> +	struct pci_dev *pdev = i915->drm.pdev;
>  	struct device *kdev = &pdev->dev;
>  
> -	assert_rpm_wakelock_held(dev_priv);
> -	atomic_dec(&dev_priv->runtime_pm.wakeref_count);
> +	assert_rpm_wakelock_held(i915);
> +	if (atomic_dec_and_test(&i915->runtime_pm.wakeref_count))
> +		untrack_intel_runtime_pm_wakeref(i915);
>  
>  	pm_runtime_mark_last_busy(kdev);
>  	pm_runtime_put_autosuspend(kdev);
> @@ -4092,7 +4281,7 @@ void intel_runtime_pm_put(struct drm_i915_private *dev_priv)
>  
>  /**
>   * intel_runtime_pm_enable - enable runtime pm
> - * @dev_priv: i915 device instance
> + * @i915: i915 device instance
>   *
>   * This function enables runtime pm at the end of the driver load sequence.
>   *
> @@ -4100,9 +4289,9 @@ void intel_runtime_pm_put(struct drm_i915_private *dev_priv)
>   * subordinate display power domains. That is done by
>   * intel_power_domains_enable().
>   */
> -void intel_runtime_pm_enable(struct drm_i915_private *dev_priv)
> +void intel_runtime_pm_enable(struct drm_i915_private *i915)
>  {
> -	struct pci_dev *pdev = dev_priv->drm.pdev;
> +	struct pci_dev *pdev = i915->drm.pdev;
>  	struct device *kdev = &pdev->dev;
>  
>  	/*
> @@ -4124,7 +4313,7 @@ void intel_runtime_pm_enable(struct drm_i915_private *dev_priv)
>  	 * so the driver's own RPM reference tracking asserts also work on
>  	 * platforms without RPM support.
>  	 */
> -	if (!HAS_RUNTIME_PM(dev_priv)) {
> +	if (!HAS_RUNTIME_PM(i915)) {
>  		int ret;
>  
>  		pm_runtime_dont_use_autosuspend(kdev);
> @@ -4142,17 +4331,36 @@ void intel_runtime_pm_enable(struct drm_i915_private *dev_priv)
>  	pm_runtime_put_autosuspend(kdev);
>  }
>  
> -void intel_runtime_pm_disable(struct drm_i915_private *dev_priv)
> +void intel_runtime_pm_disable(struct drm_i915_private *i915)
>  {
> -	struct pci_dev *pdev = dev_priv->drm.pdev;
> +	struct pci_dev *pdev = i915->drm.pdev;
>  	struct device *kdev = &pdev->dev;
>  
>  	/* Transfer rpm ownership back to core */
> -	WARN(pm_runtime_get_sync(&dev_priv->drm.pdev->dev) < 0,
> +	WARN(pm_runtime_get_sync(kdev) < 0,
>  	     "Failed to pass rpm ownership back to core\n");
>  
>  	pm_runtime_dont_use_autosuspend(kdev);
>  
> -	if (!HAS_RUNTIME_PM(dev_priv))
> +	if (!HAS_RUNTIME_PM(i915))
>  		pm_runtime_put(kdev);
>  }
> +
> +void intel_runtime_pm_cleanup(struct drm_i915_private *i915)
> +{
> +	struct i915_runtime_pm *rpm = &i915->runtime_pm;
> +
> +	if (WARN(atomic_read(&rpm->wakeref_count),
> +		 "i915->runtime_pm.wakeref_count=%d on cleanup\n",
> +		 atomic_read(&rpm->wakeref_count))) {
> +		show_intel_runtime_pm_wakeref(i915);
> +		atomic_set(&rpm->wakeref_count, 0);
> +	}
> +
> +	untrack_intel_runtime_pm_wakeref(i915);
> +}
> +
> +void intel_runtime_pm_init_early(struct drm_i915_private *i915)
> +{
> +	init_intel_runtime_pm_wakeref(i915);
> +}
> diff --git a/drivers/gpu/drm/i915/selftests/mock_gem_device.c b/drivers/gpu/drm/i915/selftests/mock_gem_device.c
> index baa3c38919de..082809569681 100644
> --- a/drivers/gpu/drm/i915/selftests/mock_gem_device.c
> +++ b/drivers/gpu/drm/i915/selftests/mock_gem_device.c
> @@ -154,15 +154,17 @@ struct drm_i915_private *mock_gem_device(void)
>  	pdev->dev.archdata.iommu = (void *)-1;
>  #endif
>  
> +	i915 = (struct drm_i915_private *)(pdev + 1);
> +	pci_set_drvdata(pdev, i915);
> +
> +	intel_runtime_pm_init_early(i915);
> +
>  	dev_pm_domain_set(&pdev->dev, &pm_domain);
>  	pm_runtime_enable(&pdev->dev);
>  	pm_runtime_dont_use_autosuspend(&pdev->dev);
>  	if (pm_runtime_enabled(&pdev->dev))
>  		WARN_ON(pm_runtime_get_sync(&pdev->dev));
>  
> -	i915 = (struct drm_i915_private *)(pdev + 1);
> -	pci_set_drvdata(pdev, i915);
> -
>  	err = drm_dev_init(&i915->drm, &mock_driver, &pdev->dev);
>  	if (err) {
>  		pr_err("Failed to initialise mock GEM device: err=%d\n", err);
> -- 
> 2.20.1
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 111+ messages in thread

* ✗ Fi.CI.BAT: failure for series starting with [01/46] drm/i915: Return immediately if trylock fails for direct-reclaim (rev3)
  2019-01-07 11:54 [PATCH 01/46] drm/i915: Return immediately if trylock fails for direct-reclaim Chris Wilson
                   ` (49 preceding siblings ...)
  2019-01-07 17:10 ` ✗ Fi.CI.IGT: failure for series starting with [01/46] " Patchwork
@ 2019-01-08 13:50 ` Patchwork
  50 siblings, 0 replies; 111+ messages in thread
From: Patchwork @ 2019-01-08 13:50 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

== Series Details ==

Series: series starting with [01/46] drm/i915: Return immediately if trylock fails for direct-reclaim (rev3)
URL   : https://patchwork.freedesktop.org/series/54803/
State : failure

== Summary ==

Applying: drm/i915: Return immediately if trylock fails for direct-reclaim
Using index info to reconstruct a base tree...
M	drivers/gpu/drm/i915/i915_drv.h
M	drivers/gpu/drm/i915/i915_gem_gtt.c
M	drivers/gpu/drm/i915/i915_gem_shrinker.c
Falling back to patching base and 3-way merge...
No changes -- Patch already applied.
Applying: drm/i915: Report the number of closed vma held by each context in debugfs
Using index info to reconstruct a base tree...
M	drivers/gpu/drm/i915/i915_debugfs.c
Falling back to patching base and 3-way merge...
Auto-merging drivers/gpu/drm/i915/i915_debugfs.c
No changes -- Patch already applied.
Applying: drm/i915: Track all held rpm wakerefs
Applying: drm/i915: Markup paired operations on wakerefs
Applying: drm/i915: Track GT wakeref
Applying: drm/i915: Track the rpm wakerefs for error handling
Applying: drm/i915: Mark up sysfs with rpm wakeref tracking
Applying: drm/i915: Mark up debugfs with rpm wakeref tracking
Applying: drm/i915/perf: Track the rpm wakeref
Applying: drm/i915/pmu: Track rpm wakeref
Applying: drm/i915/guc: Track the rpm wakeref
Applying: drm/i915/gem: Track the rpm wakerefs
Applying: drm/i915/fb: Track rpm wakerefs
Applying: drm/i915/hotplug: Track temporary rpm wakeref
Applying: drm/i915/panel: Track temporary rpm wakeref
Applying: drm/i915/selftests: Mark up rpm wakerefs
Applying: drm/i915: Syntatic sugar for using intel_runtime_pm
Applying: drm/i915: Markup paired operations on display power domains
Applying: drm/i915: Track the wakeref used to initialise display power domains
Applying: drm/i915: Combined gt.awake/gt.power wakerefs
Applying: drm/i915/dp: Markup pps lock power well
Applying: drm/i915: Complain if hsw_get_pipe_config acquires the same power well twice
Applying: drm/i915: Mark up Ironlake ips with rpm wakerefs
Applying: drm/i915: Serialise concurrent calls to i915_gem_set_wedged()
Applying: drm/i915: Differentiate between ggtt->mutex and ppgtt->mutex
Applying: drm/i915: Pull all the reset functionality together into i915_reset.c
error: patch failed: drivers/gpu/drm/i915/selftests/intel_lrc.c:4
error: drivers/gpu/drm/i915/selftests/intel_lrc.c: patch does not apply
error: Did you hand edit your patch?
It does not apply to blobs recorded in its index.
hint: Use 'git am --show-current-patch' to see the failed patch
Using index info to reconstruct a base tree...
M	drivers/gpu/drm/i915/i915_debugfs.c
Patch failed at 0026 drm/i915: Pull all the reset functionality together into i915_reset.c
When you have resolved this problem, run "git am --continue".
If you prefer to skip this patch, run "git am --skip" instead.
To restore the original branch and stop patching, run "git am --abort".

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 111+ messages in thread

* Re: [PATCH 04/46] drm/i915: Markup paired operations on wakerefs
  2019-01-07 11:54 ` [PATCH 04/46] drm/i915: Markup paired operations on wakerefs Chris Wilson
@ 2019-01-08 16:23   ` Mika Kuoppala
  2019-01-08 16:41     ` Chris Wilson
  0 siblings, 1 reply; 111+ messages in thread
From: Mika Kuoppala @ 2019-01-08 16:23 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx; +Cc: Jani Nikula

Chris Wilson <chris@chris-wilson.co.uk> writes:

> The majority of runtime-pm operations are bounded and scoped within a
> function; these are easy to verify that the wakeref are handled
> correctly. We can employ the compiler to help us, and reduce the number
> of wakerefs tracked when debugging, by passing around cookies provided
> by the various rpm_get functions to their rpm_put counterpart. This
> makes the pairing explicit, and given the required wakeref cookie the
> compiler can verify that we pass an initialised value to the rpm_put
> (quite handy for double checking error paths).
>
> For regular builds, the compiler should be able to eliminate the unused
> local variables and the program growth should be minimal. Fwiw, it came
> out as a net improvement as gcc was able to refactor rpm_get and
> rpm_get_if_in_use together,
>
> v2: Just s/rpm_put/rpm_put_unchecked/ everywhere, leaving the manual
> mark up for smaller more targeted patches.
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Jani Nikula <jani.nikula@intel.com>
> ---
>  drivers/gpu/drm/i915/Kconfig.debug            |  1 +
>  drivers/gpu/drm/i915/gvt/aperture_gm.c        |  8 +-
>  drivers/gpu/drm/i915/gvt/gvt.h                |  2 +-
>  drivers/gpu/drm/i915/gvt/sched_policy.c       |  2 +-
>  drivers/gpu/drm/i915/gvt/scheduler.c          |  4 +-
>  drivers/gpu/drm/i915/i915_debugfs.c           | 54 +++++------
>  drivers/gpu/drm/i915/i915_drv.h               |  2 +
>  drivers/gpu/drm/i915/i915_gem.c               | 20 ++---
>  drivers/gpu/drm/i915/i915_gem_execbuffer.c    |  2 +-
>  drivers/gpu/drm/i915/i915_gem_fence_reg.c     |  2 +-
>  drivers/gpu/drm/i915/i915_gem_gtt.c           |  8 +-
>  drivers/gpu/drm/i915/i915_gem_shrinker.c      | 10 +--
>  drivers/gpu/drm/i915/i915_irq.c               |  2 +-
>  drivers/gpu/drm/i915/i915_perf.c              |  4 +-
>  drivers/gpu/drm/i915/i915_pmu.c               |  6 +-
>  drivers/gpu/drm/i915/i915_sysfs.c             | 12 +--
>  drivers/gpu/drm/i915/intel_display.c          |  2 +-
>  drivers/gpu/drm/i915/intel_drv.h              | 15 +++-
>  drivers/gpu/drm/i915/intel_engine_cs.c        |  4 +-
>  drivers/gpu/drm/i915/intel_fbdev.c            |  4 +-
>  drivers/gpu/drm/i915/intel_guc_log.c          |  6 +-
>  drivers/gpu/drm/i915/intel_hotplug.c          |  2 +-
>  drivers/gpu/drm/i915/intel_huc.c              |  2 +-
>  drivers/gpu/drm/i915/intel_panel.c            |  2 +-
>  drivers/gpu/drm/i915/intel_runtime_pm.c       | 89 +++++++++++++++----
>  drivers/gpu/drm/i915/intel_uncore.c           |  2 +-
>  drivers/gpu/drm/i915/selftests/huge_pages.c   |  2 +-
>  drivers/gpu/drm/i915/selftests/i915_gem.c     | 10 +--
>  .../drm/i915/selftests/i915_gem_coherency.c   |  2 +-
>  .../gpu/drm/i915/selftests/i915_gem_context.c | 10 +--
>  .../gpu/drm/i915/selftests/i915_gem_evict.c   |  2 +-
>  drivers/gpu/drm/i915/selftests/i915_gem_gtt.c |  4 +-
>  .../gpu/drm/i915/selftests/i915_gem_object.c  |  6 +-
>  drivers/gpu/drm/i915/selftests/i915_request.c |  8 +-
>  drivers/gpu/drm/i915/selftests/intel_guc.c    |  4 +-
>  .../gpu/drm/i915/selftests/intel_hangcheck.c  |  6 +-
>  drivers/gpu/drm/i915/selftests/intel_lrc.c    | 10 +--
>  .../drm/i915/selftests/intel_workarounds.c    | 10 +--
>  38 files changed, 203 insertions(+), 138 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/Kconfig.debug b/drivers/gpu/drm/i915/Kconfig.debug
> index a97929c47466..ad4d71161dda 100644
> --- a/drivers/gpu/drm/i915/Kconfig.debug
> +++ b/drivers/gpu/drm/i915/Kconfig.debug
> @@ -173,6 +173,7 @@ config DRM_I915_DEBUG_RUNTIME_PM
>  	bool "Enable extra state checking for runtime PM"
>  	depends on DRM_I915
>  	default n
> +	select STACKDEPOT
>  	help
>  	  Choose this option to turn on extra state checking for the
>  	  runtime PM functionality. This may introduce overhead during
> diff --git a/drivers/gpu/drm/i915/gvt/aperture_gm.c b/drivers/gpu/drm/i915/gvt/aperture_gm.c
> index 359d37d5c958..1fa2f65c3cd1 100644
> --- a/drivers/gpu/drm/i915/gvt/aperture_gm.c
> +++ b/drivers/gpu/drm/i915/gvt/aperture_gm.c
> @@ -180,7 +180,7 @@ static void free_vgpu_fence(struct intel_vgpu *vgpu)
>  	}
>  	mutex_unlock(&dev_priv->drm.struct_mutex);
>  
> -	intel_runtime_pm_put(dev_priv);
> +	intel_runtime_pm_put_unchecked(dev_priv);
>  }
>  
>  static int alloc_vgpu_fence(struct intel_vgpu *vgpu)
> @@ -206,7 +206,7 @@ static int alloc_vgpu_fence(struct intel_vgpu *vgpu)
>  	_clear_vgpu_fence(vgpu);
>  
>  	mutex_unlock(&dev_priv->drm.struct_mutex);
> -	intel_runtime_pm_put(dev_priv);
> +	intel_runtime_pm_put_unchecked(dev_priv);
>  	return 0;
>  out_free_fence:
>  	gvt_vgpu_err("Failed to alloc fences\n");
> @@ -219,7 +219,7 @@ static int alloc_vgpu_fence(struct intel_vgpu *vgpu)
>  		vgpu->fence.regs[i] = NULL;
>  	}
>  	mutex_unlock(&dev_priv->drm.struct_mutex);
> -	intel_runtime_pm_put(dev_priv);
> +	intel_runtime_pm_put_unchecked(dev_priv);
>  	return -ENOSPC;
>  }
>  
> @@ -317,7 +317,7 @@ void intel_vgpu_reset_resource(struct intel_vgpu *vgpu)
>  
>  	intel_runtime_pm_get(dev_priv);
>  	_clear_vgpu_fence(vgpu);
> -	intel_runtime_pm_put(dev_priv);
> +	intel_runtime_pm_put_unchecked(dev_priv);
>  }
>  
>  /**
> diff --git a/drivers/gpu/drm/i915/gvt/gvt.h b/drivers/gpu/drm/i915/gvt/gvt.h
> index b4ab1dad0143..435c746c3f73 100644
> --- a/drivers/gpu/drm/i915/gvt/gvt.h
> +++ b/drivers/gpu/drm/i915/gvt/gvt.h
> @@ -597,7 +597,7 @@ static inline void mmio_hw_access_pre(struct drm_i915_private *dev_priv)
>  
>  static inline void mmio_hw_access_post(struct drm_i915_private *dev_priv)
>  {
> -	intel_runtime_pm_put(dev_priv);
> +	intel_runtime_pm_put_unchecked(dev_priv);
>  }
>  
>  /**
> diff --git a/drivers/gpu/drm/i915/gvt/sched_policy.c b/drivers/gpu/drm/i915/gvt/sched_policy.c
> index c32e7d5e8629..f04b3b965bfc 100644
> --- a/drivers/gpu/drm/i915/gvt/sched_policy.c
> +++ b/drivers/gpu/drm/i915/gvt/sched_policy.c
> @@ -474,6 +474,6 @@ void intel_vgpu_stop_schedule(struct intel_vgpu *vgpu)
>  		}
>  	}
>  	spin_unlock_bh(&scheduler->mmio_context_lock);
> -	intel_runtime_pm_put(dev_priv);
> +	intel_runtime_pm_put_unchecked(dev_priv);
>  	mutex_unlock(&vgpu->gvt->sched_lock);
>  }
> diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c
> index 1ad8c5e1455d..3816dcae2185 100644
> --- a/drivers/gpu/drm/i915/gvt/scheduler.c
> +++ b/drivers/gpu/drm/i915/gvt/scheduler.c
> @@ -997,7 +997,7 @@ static int workload_thread(void *priv)
>  			intel_uncore_forcewake_put(gvt->dev_priv,
>  					FORCEWAKE_ALL);
>  
> -		intel_runtime_pm_put(gvt->dev_priv);
> +		intel_runtime_pm_put_unchecked(gvt->dev_priv);
>  		if (ret && (vgpu_is_vm_unhealthy(ret)))
>  			enter_failsafe_mode(vgpu, GVT_FAILSAFE_GUEST_ERR);
>  	}
> @@ -1451,7 +1451,7 @@ intel_vgpu_create_workload(struct intel_vgpu *vgpu, int ring_id,
>  		mutex_lock(&dev_priv->drm.struct_mutex);
>  		ret = intel_gvt_scan_and_shadow_workload(workload);
>  		mutex_unlock(&dev_priv->drm.struct_mutex);
> -		intel_runtime_pm_put(dev_priv);
> +		intel_runtime_pm_put_unchecked(dev_priv);
>  	}
>  
>  	if (ret && (vgpu_is_vm_unhealthy(ret))) {
> diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
> index 3a369245d7e6..6b8da14f213b 100644
> --- a/drivers/gpu/drm/i915/i915_debugfs.c
> +++ b/drivers/gpu/drm/i915/i915_debugfs.c
> @@ -877,7 +877,7 @@ static int i915_interrupt_info(struct seq_file *m, void *data)
>  		}
>  	}
>  
> -	intel_runtime_pm_put(dev_priv);
> +	intel_runtime_pm_put_unchecked(dev_priv);
>  
>  	return 0;
>  }
> @@ -953,7 +953,7 @@ static int i915_gpu_info_open(struct inode *inode, struct file *file)
>  
>  	intel_runtime_pm_get(i915);
>  	gpu = i915_capture_gpu_state(i915);
> -	intel_runtime_pm_put(i915);
> +	intel_runtime_pm_put_unchecked(i915);
>  	if (IS_ERR(gpu))
>  		return PTR_ERR(gpu);
>  
> @@ -1226,7 +1226,7 @@ static int i915_frequency_info(struct seq_file *m, void *unused)
>  	seq_printf(m, "Max CD clock frequency: %d kHz\n", dev_priv->max_cdclk_freq);
>  	seq_printf(m, "Max pixel clock frequency: %d kHz\n", dev_priv->max_dotclk_freq);
>  
> -	intel_runtime_pm_put(dev_priv);
> +	intel_runtime_pm_put_unchecked(dev_priv);
>  	return ret;
>  }
>  
> @@ -1292,7 +1292,7 @@ static int i915_hangcheck_info(struct seq_file *m, void *unused)
>  
>  	intel_engine_get_instdone(dev_priv->engine[RCS], &instdone);
>  
> -	intel_runtime_pm_put(dev_priv);
> +	intel_runtime_pm_put_unchecked(dev_priv);
>  
>  	if (timer_pending(&dev_priv->gpu_error.hangcheck_work.timer))
>  		seq_printf(m, "Hangcheck active, timer fires in %dms\n",
> @@ -1579,7 +1579,7 @@ static int i915_drpc_info(struct seq_file *m, void *unused)
>  	else
>  		err = ironlake_drpc_info(m);
>  
> -	intel_runtime_pm_put(dev_priv);
> +	intel_runtime_pm_put_unchecked(dev_priv);
>  
>  	return err;
>  }
> @@ -1632,7 +1632,7 @@ static int i915_fbc_status(struct seq_file *m, void *unused)
>  	}
>  
>  	mutex_unlock(&fbc->lock);
> -	intel_runtime_pm_put(dev_priv);
> +	intel_runtime_pm_put_unchecked(dev_priv);
>  
>  	return 0;
>  }
> @@ -1695,7 +1695,7 @@ static int i915_ips_status(struct seq_file *m, void *unused)
>  			seq_puts(m, "Currently: disabled\n");
>  	}
>  
> -	intel_runtime_pm_put(dev_priv);
> +	intel_runtime_pm_put_unchecked(dev_priv);
>  
>  	return 0;
>  }
> @@ -1723,7 +1723,7 @@ static int i915_sr_status(struct seq_file *m, void *unused)
>  		sr_enabled = I915_READ(FW_BLC_SELF_VLV) & FW_CSPWRDWNEN;
>  
>  	intel_display_power_put(dev_priv, POWER_DOMAIN_INIT);
> -	intel_runtime_pm_put(dev_priv);
> +	intel_runtime_pm_put_unchecked(dev_priv);
>  
>  	seq_printf(m, "self-refresh: %s\n", enableddisabled(sr_enabled));
>  
> @@ -1756,7 +1756,7 @@ static int i915_emon_status(struct seq_file *m, void *unused)
>  	seq_printf(m, "GFX power: %ld\n", gfx);
>  	seq_printf(m, "Total power: %ld\n", chipset + gfx);
>  
> -	intel_runtime_pm_put(dev_priv);
> +	intel_runtime_pm_put_unchecked(dev_priv);
>  
>  	return 0;
>  }
> @@ -1805,7 +1805,7 @@ static int i915_ring_freq_table(struct seq_file *m, void *unused)
>  	mutex_unlock(&dev_priv->pcu_lock);
>  
>  out:
> -	intel_runtime_pm_put(dev_priv);
> +	intel_runtime_pm_put_unchecked(dev_priv);
>  	return ret;
>  }
>  
> @@ -2017,7 +2017,7 @@ static int i915_swizzle_info(struct seq_file *m, void *data)
>  	if (dev_priv->quirks & QUIRK_PIN_SWIZZLED_PAGES)
>  		seq_puts(m, "L-shaped memory detected\n");
>  
> -	intel_runtime_pm_put(dev_priv);
> +	intel_runtime_pm_put_unchecked(dev_priv);
>  
>  	return 0;
>  }
> @@ -2067,7 +2067,7 @@ static int i915_rps_boost_info(struct seq_file *m, void *data)
>  			act_freq = intel_get_cagf(dev_priv,
>  						  I915_READ(GEN6_RPSTAT1));
>  		}
> -		intel_runtime_pm_put(dev_priv);
> +		intel_runtime_pm_put_unchecked(dev_priv);
>  	}
>  
>  	seq_printf(m, "RPS enabled? %d\n", rps->enabled);
> @@ -2160,7 +2160,7 @@ static int i915_huc_load_status_info(struct seq_file *m, void *data)
>  
>  	intel_runtime_pm_get(dev_priv);
>  	seq_printf(m, "\nHuC status 0x%08x:\n", I915_READ(HUC_STATUS2));
> -	intel_runtime_pm_put(dev_priv);
> +	intel_runtime_pm_put_unchecked(dev_priv);
>  
>  	return 0;
>  }
> @@ -2192,7 +2192,7 @@ static int i915_guc_load_status_info(struct seq_file *m, void *data)
>  	for (i = 0; i < 16; i++)
>  		seq_printf(m, "\t%2d: \t0x%x\n", i, I915_READ(SOFT_SCRATCH(i)));
>  
> -	intel_runtime_pm_put(dev_priv);
> +	intel_runtime_pm_put_unchecked(dev_priv);
>  
>  	return 0;
>  }
> @@ -2601,7 +2601,7 @@ static int i915_edp_psr_status(struct seq_file *m, void *data)
>  			   dev_priv->psr.last_exit);
>  	}
>  
> -	intel_runtime_pm_put(dev_priv);
> +	intel_runtime_pm_put_unchecked(dev_priv);
>  	return 0;
>  }
>  
> @@ -2632,7 +2632,7 @@ i915_edp_psr_debug_set(void *data, u64 val)
>  	drm_modeset_drop_locks(&ctx);
>  	drm_modeset_acquire_fini(&ctx);
>  
> -	intel_runtime_pm_put(dev_priv);
> +	intel_runtime_pm_put_unchecked(dev_priv);
>  
>  	return ret;
>  }
> @@ -2665,7 +2665,7 @@ static int i915_energy_uJ(struct seq_file *m, void *data)
>  	intel_runtime_pm_get(dev_priv);
>  
>  	if (rdmsrl_safe(MSR_RAPL_POWER_UNIT, &power)) {
> -		intel_runtime_pm_put(dev_priv);
> +		intel_runtime_pm_put_unchecked(dev_priv);
>  		return -ENODEV;
>  	}
>  
> @@ -2673,7 +2673,7 @@ static int i915_energy_uJ(struct seq_file *m, void *data)
>  	power = I915_READ(MCH_SECP_NRG_STTS);
>  	power = (1000000 * power) >> units; /* convert to uJ */
>  
> -	intel_runtime_pm_put(dev_priv);
> +	intel_runtime_pm_put_unchecked(dev_priv);
>  
>  	seq_printf(m, "%llu", power);
>  
> @@ -2775,7 +2775,7 @@ static int i915_dmc_info(struct seq_file *m, void *unused)
>  	seq_printf(m, "ssp base: 0x%08x\n", I915_READ(CSR_SSP_BASE));
>  	seq_printf(m, "htp: 0x%08x\n", I915_READ(CSR_HTP_SKL));
>  
> -	intel_runtime_pm_put(dev_priv);
> +	intel_runtime_pm_put_unchecked(dev_priv);
>  
>  	return 0;
>  }
> @@ -3114,7 +3114,7 @@ static int i915_display_info(struct seq_file *m, void *unused)
>  	drm_connector_list_iter_end(&conn_iter);
>  	mutex_unlock(&dev->mode_config.mutex);
>  
> -	intel_runtime_pm_put(dev_priv);
> +	intel_runtime_pm_put_unchecked(dev_priv);
>  
>  	return 0;
>  }
> @@ -3139,7 +3139,7 @@ static int i915_engine_info(struct seq_file *m, void *unused)
>  	for_each_engine(engine, dev_priv, id)
>  		intel_engine_dump(engine, &p, "%s\n", engine->name);
>  
> -	intel_runtime_pm_put(dev_priv);
> +	intel_runtime_pm_put_unchecked(dev_priv);
>  
>  	return 0;
>  }
> @@ -3265,7 +3265,7 @@ static ssize_t i915_ipc_status_write(struct file *file, const char __user *ubuf,
>  	dev_priv->wm.distrust_bios_wm = true;
>  	dev_priv->ipc_enabled = enable;
>  	intel_enable_ipc(dev_priv);
> -	intel_runtime_pm_put(dev_priv);
> +	intel_runtime_pm_put_unchecked(dev_priv);
>  
>  	return len;
>  }
> @@ -4090,7 +4090,7 @@ i915_drop_caches_set(void *data, u64 val)
>  		i915_gem_drain_freed_objects(i915);
>  
>  out:
> -	intel_runtime_pm_put(i915);
> +	intel_runtime_pm_put_unchecked(i915);
>  
>  	return ret;
>  }
> @@ -4112,7 +4112,7 @@ i915_cache_sharing_get(void *data, u64 *val)
>  
>  	snpcr = I915_READ(GEN6_MBCUNIT_SNPCR);
>  
> -	intel_runtime_pm_put(dev_priv);
> +	intel_runtime_pm_put_unchecked(dev_priv);
>  
>  	*val = (snpcr & GEN6_MBC_SNPCR_MASK) >> GEN6_MBC_SNPCR_SHIFT;
>  
> @@ -4140,7 +4140,7 @@ i915_cache_sharing_set(void *data, u64 val)
>  	snpcr |= (val << GEN6_MBC_SNPCR_SHIFT);
>  	I915_WRITE(GEN6_MBCUNIT_SNPCR, snpcr);
>  
> -	intel_runtime_pm_put(dev_priv);
> +	intel_runtime_pm_put_unchecked(dev_priv);
>  	return 0;
>  }
>  
> @@ -4388,7 +4388,7 @@ static int i915_sseu_status(struct seq_file *m, void *unused)
>  		gen10_sseu_device_status(dev_priv, &sseu);
>  	}
>  
> -	intel_runtime_pm_put(dev_priv);
> +	intel_runtime_pm_put_unchecked(dev_priv);
>  
>  	i915_print_sseu_info(m, false, &sseu);
>  
> @@ -4416,7 +4416,7 @@ static int i915_forcewake_release(struct inode *inode, struct file *file)
>  		return 0;
>  
>  	intel_uncore_forcewake_user_put(i915);
> -	intel_runtime_pm_put(i915);
> +	intel_runtime_pm_put_unchecked(i915);
>  
>  	return 0;
>  }
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 60b98103aba3..464ff89d1464 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -131,6 +131,8 @@ bool i915_error_injected(void);
>  	__i915_printk(i915, i915_error_injected() ? KERN_DEBUG : KERN_ERR, \
>  		      fmt, ##__VA_ARGS__)
>  
> +typedef depot_stack_handle_t intel_wakeref_t;
> +
>  enum hpd_pin {
>  	HPD_NONE = 0,
>  	HPD_TV = HPD_NONE,     /* TV is known to be unreliable */
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index 987acbb8280f..5ac32ea4c8fa 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -176,7 +176,7 @@ static u32 __i915_gem_park(struct drm_i915_private *i915)
>  
>  	intel_display_power_put(i915, POWER_DOMAIN_GT_IRQ);
>  
> -	intel_runtime_pm_put(i915);
> +	intel_runtime_pm_put_unchecked(i915);
>  
>  	return i915->gt.epoch;
>  }
> @@ -815,7 +815,7 @@ void i915_gem_flush_ggtt_writes(struct drm_i915_private *dev_priv)
>  	POSTING_READ_FW(RING_HEAD(RENDER_RING_BASE));
>  
>  	spin_unlock_irq(&dev_priv->uncore.lock);
> -	intel_runtime_pm_put(dev_priv);
> +	intel_runtime_pm_put_unchecked(dev_priv);
>  }
>  
>  static void
> @@ -1150,7 +1150,7 @@ i915_gem_gtt_pread(struct drm_i915_gem_object *obj,
>  		i915_vma_unpin(vma);
>  	}
>  out_unlock:
> -	intel_runtime_pm_put(i915);
> +	intel_runtime_pm_put_unchecked(i915);
>  	mutex_unlock(&i915->drm.struct_mutex);
>  
>  	return ret;
> @@ -1357,7 +1357,7 @@ i915_gem_gtt_pwrite_fast(struct drm_i915_gem_object *obj,
>  		i915_vma_unpin(vma);
>  	}
>  out_rpm:
> -	intel_runtime_pm_put(i915);
> +	intel_runtime_pm_put_unchecked(i915);
>  out_unlock:
>  	mutex_unlock(&i915->drm.struct_mutex);
>  	return ret;
> @@ -1969,7 +1969,7 @@ vm_fault_t i915_gem_fault(struct vm_fault *vmf)
>  err_unlock:
>  	mutex_unlock(&dev->struct_mutex);
>  err_rpm:
> -	intel_runtime_pm_put(dev_priv);
> +	intel_runtime_pm_put_unchecked(dev_priv);
>  	i915_gem_object_unpin_pages(obj);
>  err:
>  	switch (ret) {
> @@ -2069,7 +2069,7 @@ i915_gem_release_mmap(struct drm_i915_gem_object *obj)
>  	wmb();
>  
>  out:
> -	intel_runtime_pm_put(i915);
> +	intel_runtime_pm_put_unchecked(i915);
>  }
>  
>  void i915_gem_runtime_suspend(struct drm_i915_private *dev_priv)
> @@ -4766,7 +4766,7 @@ static void __i915_gem_free_objects(struct drm_i915_private *i915,
>  		if (on)
>  			cond_resched();
>  	}
> -	intel_runtime_pm_put(i915);
> +	intel_runtime_pm_put_unchecked(i915);
>  }
>  
>  static void i915_gem_flush_free_objects(struct drm_i915_private *i915)
> @@ -4902,7 +4902,7 @@ void i915_gem_sanitize(struct drm_i915_private *i915)
>  	intel_engines_sanitize(i915, false);
>  
>  	intel_uncore_forcewake_put(i915, FORCEWAKE_ALL);
> -	intel_runtime_pm_put(i915);
> +	intel_runtime_pm_put_unchecked(i915);
>  
>  	i915_gem_contexts_lost(i915);
>  	mutex_unlock(&i915->drm.struct_mutex);
> @@ -4966,12 +4966,12 @@ int i915_gem_suspend(struct drm_i915_private *i915)
>  	if (WARN_ON(!intel_engines_are_idle(i915)))
>  		i915_gem_set_wedged(i915); /* no hope, discard everything */
>  
> -	intel_runtime_pm_put(i915);
> +	intel_runtime_pm_put_unchecked(i915);
>  	return 0;
>  
>  err_unlock:
>  	mutex_unlock(&i915->drm.struct_mutex);
> -	intel_runtime_pm_put(i915);
> +	intel_runtime_pm_put_unchecked(i915);
>  	return ret;
>  }
>  
> diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> index 5b7cd7add63e..a52fa42ed8b1 100644
> --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> @@ -2425,7 +2425,7 @@ i915_gem_do_execbuffer(struct drm_device *dev,
>  		eb_release_vmas(&eb);
>  	mutex_unlock(&dev->struct_mutex);
>  err_rpm:
> -	intel_runtime_pm_put(eb.i915);
> +	intel_runtime_pm_put_unchecked(eb.i915);
>  	i915_gem_context_put(eb.ctx);
>  err_destroy:
>  	eb_destroy(&eb);
> diff --git a/drivers/gpu/drm/i915/i915_gem_fence_reg.c b/drivers/gpu/drm/i915/i915_gem_fence_reg.c
> index 24df2e2a8fc1..1f72f5047945 100644
> --- a/drivers/gpu/drm/i915/i915_gem_fence_reg.c
> +++ b/drivers/gpu/drm/i915/i915_gem_fence_reg.c
> @@ -259,7 +259,7 @@ static int fence_update(struct drm_i915_fence_reg *fence,
>  	 */
>  	if (intel_runtime_pm_get_if_in_use(fence->i915)) {
>  		fence_write(fence, vma);
> -		intel_runtime_pm_put(fence->i915);
> +		intel_runtime_pm_put_unchecked(fence->i915);
>  	}
>  
>  	if (vma) {
> diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
> index 5cc8968eb3bf..6dac9614f7ba 100644
> --- a/drivers/gpu/drm/i915/i915_gem_gtt.c
> +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
> @@ -2537,7 +2537,7 @@ static int ggtt_bind_vma(struct i915_vma *vma,
>  
>  	intel_runtime_pm_get(i915);
>  	vma->vm->insert_entries(vma->vm, vma, cache_level, pte_flags);
> -	intel_runtime_pm_put(i915);
> +	intel_runtime_pm_put_unchecked(i915);
>  
>  	vma->page_sizes.gtt = I915_GTT_PAGE_SIZE;
>  
> @@ -2557,7 +2557,7 @@ static void ggtt_unbind_vma(struct i915_vma *vma)
>  
>  	intel_runtime_pm_get(i915);
>  	vma->vm->clear_range(vma->vm, vma->node.start, vma->size);
> -	intel_runtime_pm_put(i915);
> +	intel_runtime_pm_put_unchecked(i915);
>  }
>  
>  static int aliasing_gtt_bind_vma(struct i915_vma *vma,
> @@ -2591,7 +2591,7 @@ static int aliasing_gtt_bind_vma(struct i915_vma *vma,
>  	if (flags & I915_VMA_GLOBAL_BIND) {
>  		intel_runtime_pm_get(i915);
>  		vma->vm->insert_entries(vma->vm, vma, cache_level, pte_flags);
> -		intel_runtime_pm_put(i915);
> +		intel_runtime_pm_put_unchecked(i915);
>  	}
>  
>  	return 0;
> @@ -2604,7 +2604,7 @@ static void aliasing_gtt_unbind_vma(struct i915_vma *vma)
>  	if (vma->flags & I915_VMA_GLOBAL_BIND) {
>  		intel_runtime_pm_get(i915);
>  		vma->vm->clear_range(vma->vm, vma->node.start, vma->size);
> -		intel_runtime_pm_put(i915);
> +		intel_runtime_pm_put_unchecked(i915);
>  	}
>  
>  	if (vma->flags & I915_VMA_LOCAL_BIND) {
> diff --git a/drivers/gpu/drm/i915/i915_gem_shrinker.c b/drivers/gpu/drm/i915/i915_gem_shrinker.c
> index 72d6ea0cac7e..16693dd4d019 100644
> --- a/drivers/gpu/drm/i915/i915_gem_shrinker.c
> +++ b/drivers/gpu/drm/i915/i915_gem_shrinker.c
> @@ -266,7 +266,7 @@ i915_gem_shrink(struct drm_i915_private *i915,
>  	}
>  
>  	if (flags & I915_SHRINK_BOUND)
> -		intel_runtime_pm_put(i915);
> +		intel_runtime_pm_put_unchecked(i915);
>  
>  	i915_retire_requests(i915);
>  
> @@ -300,7 +300,7 @@ unsigned long i915_gem_shrink_all(struct drm_i915_private *i915)
>  				I915_SHRINK_BOUND |
>  				I915_SHRINK_UNBOUND |
>  				I915_SHRINK_ACTIVE);
> -	intel_runtime_pm_put(i915);
> +	intel_runtime_pm_put_unchecked(i915);
>  
>  	return freed;
>  }
> @@ -378,7 +378,7 @@ i915_gem_shrinker_scan(struct shrinker *shrinker, struct shrink_control *sc)
>  					 I915_SHRINK_ACTIVE |
>  					 I915_SHRINK_BOUND |
>  					 I915_SHRINK_UNBOUND);
> -		intel_runtime_pm_put(i915);
> +		intel_runtime_pm_put_unchecked(i915);
>  	}
>  
>  	shrinker_unlock(i915, unlock);
> @@ -423,7 +423,7 @@ i915_gem_shrinker_oom(struct notifier_block *nb, unsigned long event, void *ptr)
>  	freed_pages = i915_gem_shrink(i915, -1UL, NULL,
>  				      I915_SHRINK_BOUND |
>  				      I915_SHRINK_UNBOUND);
> -	intel_runtime_pm_put(i915);
> +	intel_runtime_pm_put_unchecked(i915);
>  
>  	/* Because we may be allocating inside our own driver, we cannot
>  	 * assert that there are no objects with pinned pages that are not
> @@ -479,7 +479,7 @@ i915_gem_shrinker_vmap(struct notifier_block *nb, unsigned long event, void *ptr
>  				       I915_SHRINK_BOUND |
>  				       I915_SHRINK_UNBOUND |
>  				       I915_SHRINK_VMAPS);
> -	intel_runtime_pm_put(i915);
> +	intel_runtime_pm_put_unchecked(i915);
>  
>  	/* We also want to clear any cached iomaps as they wrap vmap */
>  	list_for_each_entry_safe(vma, next,
> diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
> index fbb094ecf6c9..72b799c5e8f6 100644
> --- a/drivers/gpu/drm/i915/i915_irq.c
> +++ b/drivers/gpu/drm/i915/i915_irq.c
> @@ -3375,7 +3375,7 @@ void i915_handle_error(struct drm_i915_private *dev_priv,
>  	wake_up_all(&dev_priv->gpu_error.reset_queue);
>  
>  out:
> -	intel_runtime_pm_put(dev_priv);
> +	intel_runtime_pm_put_unchecked(dev_priv);
>  }
>  
>  /* Called from drm generic code, passed 'crtc' which
> diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
> index 5b1ae5ed97b3..e4dfd1477c78 100644
> --- a/drivers/gpu/drm/i915/i915_perf.c
> +++ b/drivers/gpu/drm/i915/i915_perf.c
> @@ -1365,7 +1365,7 @@ static void i915_oa_stream_destroy(struct i915_perf_stream *stream)
>  	free_oa_buffer(dev_priv);
>  
>  	intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
> -	intel_runtime_pm_put(dev_priv);
> +	intel_runtime_pm_put_unchecked(dev_priv);
>  
>  	if (stream->ctx)
>  		oa_put_render_ctx_id(stream);
> @@ -2123,7 +2123,7 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream,
>  	put_oa_config(dev_priv, stream->oa_config);
>  
>  	intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
> -	intel_runtime_pm_put(dev_priv);
> +	intel_runtime_pm_put_unchecked(dev_priv);
>  
>  err_config:
>  	if (stream->ctx)
> diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c
> index d6c8f8fdfda5..c99fcfce79d5 100644
> --- a/drivers/gpu/drm/i915/i915_pmu.c
> +++ b/drivers/gpu/drm/i915/i915_pmu.c
> @@ -210,7 +210,7 @@ engines_sample(struct drm_i915_private *dev_priv, unsigned int period_ns)
>  	if (fw)
>  		intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
>  
> -	intel_runtime_pm_put(dev_priv);
> +	intel_runtime_pm_put_unchecked(dev_priv);
>  }
>  
>  static void
> @@ -231,7 +231,7 @@ frequency_sample(struct drm_i915_private *dev_priv, unsigned int period_ns)
>  		    intel_runtime_pm_get_if_in_use(dev_priv)) {
>  			val = intel_get_cagf(dev_priv,
>  					     I915_READ_NOTRACE(GEN6_RPSTAT1));
> -			intel_runtime_pm_put(dev_priv);
> +			intel_runtime_pm_put_unchecked(dev_priv);
>  		}
>  
>  		add_sample_mult(&dev_priv->pmu.sample[__I915_SAMPLE_FREQ_ACT],
> @@ -448,7 +448,7 @@ static u64 get_rc6(struct drm_i915_private *i915)
>  
>  	if (intel_runtime_pm_get_if_in_use(i915)) {
>  		val = __get_rc6(i915);
> -		intel_runtime_pm_put(i915);
> +		intel_runtime_pm_put_unchecked(i915);
>  
>  		/*
>  		 * If we are coming back from being runtime suspended we must
> diff --git a/drivers/gpu/drm/i915/i915_sysfs.c b/drivers/gpu/drm/i915/i915_sysfs.c
> index c0cfe7ae2ba5..53c20e103d56 100644
> --- a/drivers/gpu/drm/i915/i915_sysfs.c
> +++ b/drivers/gpu/drm/i915/i915_sysfs.c
> @@ -46,7 +46,7 @@ static u32 calc_residency(struct drm_i915_private *dev_priv,
>  
>  	intel_runtime_pm_get(dev_priv);
>  	res = intel_rc6_residency_us(dev_priv, reg);
> -	intel_runtime_pm_put(dev_priv);
> +	intel_runtime_pm_put_unchecked(dev_priv);
>  
>  	return DIV_ROUND_CLOSEST_ULL(res, 1000);
>  }
> @@ -274,7 +274,7 @@ static ssize_t gt_act_freq_mhz_show(struct device *kdev,
>  	}
>  	mutex_unlock(&dev_priv->pcu_lock);
>  
> -	intel_runtime_pm_put(dev_priv);
> +	intel_runtime_pm_put_unchecked(dev_priv);
>  
>  	return snprintf(buf, PAGE_SIZE, "%d\n", ret);
>  }
> @@ -371,7 +371,7 @@ static ssize_t gt_max_freq_mhz_store(struct device *kdev,
>  	    val > rps->max_freq ||
>  	    val < rps->min_freq_softlimit) {
>  		mutex_unlock(&dev_priv->pcu_lock);
> -		intel_runtime_pm_put(dev_priv);
> +		intel_runtime_pm_put_unchecked(dev_priv);
>  		return -EINVAL;
>  	}
>  
> @@ -392,7 +392,7 @@ static ssize_t gt_max_freq_mhz_store(struct device *kdev,
>  
>  	mutex_unlock(&dev_priv->pcu_lock);
>  
> -	intel_runtime_pm_put(dev_priv);
> +	intel_runtime_pm_put_unchecked(dev_priv);
>  
>  	return ret ?: count;
>  }
> @@ -429,7 +429,7 @@ static ssize_t gt_min_freq_mhz_store(struct device *kdev,
>  	    val > rps->max_freq ||
>  	    val > rps->max_freq_softlimit) {
>  		mutex_unlock(&dev_priv->pcu_lock);
> -		intel_runtime_pm_put(dev_priv);
> +		intel_runtime_pm_put_unchecked(dev_priv);
>  		return -EINVAL;
>  	}
>  
> @@ -446,7 +446,7 @@ static ssize_t gt_min_freq_mhz_store(struct device *kdev,
>  
>  	mutex_unlock(&dev_priv->pcu_lock);
>  
> -	intel_runtime_pm_put(dev_priv);
> +	intel_runtime_pm_put_unchecked(dev_priv);
>  
>  	return ret ?: count;
>  }
> diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
> index 696e6f5680df..c6000aa47a8d 100644
> --- a/drivers/gpu/drm/i915/intel_display.c
> +++ b/drivers/gpu/drm/i915/intel_display.c
> @@ -2102,7 +2102,7 @@ intel_pin_and_fence_fb_obj(struct drm_framebuffer *fb,
>  err:
>  	atomic_dec(&dev_priv->gpu_error.pending_fb_pin);
>  
> -	intel_runtime_pm_put(dev_priv);
> +	intel_runtime_pm_put_unchecked(dev_priv);
>  	return vma;
>  }
>  
> diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h
> index ac513fd70315..a1e4e1033289 100644
> --- a/drivers/gpu/drm/i915/intel_drv.h
> +++ b/drivers/gpu/drm/i915/intel_drv.h
> @@ -29,6 +29,7 @@
>  #include <linux/i2c.h>
>  #include <linux/hdmi.h>
>  #include <linux/sched/clock.h>
> +#include <linux/stackdepot.h>
>  #include <drm/i915_drm.h>
>  #include "i915_drv.h"
>  #include <drm/drm_crtc.h>
> @@ -2182,10 +2183,16 @@ enable_rpm_wakeref_asserts(struct drm_i915_private *i915)
>  	atomic_dec(&i915->runtime_pm.wakeref_count);
>  }
>  
> -void intel_runtime_pm_get(struct drm_i915_private *i915);
> -bool intel_runtime_pm_get_if_in_use(struct drm_i915_private *i915);
> -void intel_runtime_pm_get_noresume(struct drm_i915_private *i915);
> -void intel_runtime_pm_put(struct drm_i915_private *i915);
> +intel_wakeref_t intel_runtime_pm_get(struct drm_i915_private *i915);
> +intel_wakeref_t intel_runtime_pm_get_if_in_use(struct drm_i915_private *i915);
> +intel_wakeref_t intel_runtime_pm_get_noresume(struct drm_i915_private *i915);
> +
> +void intel_runtime_pm_put_unchecked(struct drm_i915_private *i915);
> +#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_RUNTIME_PM)
> +void intel_runtime_pm_put(struct drm_i915_private *i915, intel_wakeref_t wref);
> +#else
> +#define intel_runtime_pm_put(i915, wref) intel_runtime_pm_put_unchecked(i915)
> +#endif
>  
>  #if IS_ENABLED(CONFIG_DRM_I915_DEBUG_RUNTIME_PM)
>  void print_intel_runtime_pm_wakeref(struct drm_i915_private *i915,
> diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c
> index 236cd040f271..85131166589c 100644
> --- a/drivers/gpu/drm/i915/intel_engine_cs.c
> +++ b/drivers/gpu/drm/i915/intel_engine_cs.c
> @@ -928,7 +928,7 @@ static bool ring_is_idle(struct intel_engine_cs *engine)
>  	if (INTEL_GEN(dev_priv) > 2 && !(I915_READ_MODE(engine) & MODE_IDLE))
>  		idle = false;
>  
> -	intel_runtime_pm_put(dev_priv);
> +	intel_runtime_pm_put_unchecked(dev_priv);
>  
>  	return idle;
>  }
> @@ -1485,7 +1485,7 @@ void intel_engine_dump(struct intel_engine_cs *engine,
>  
>  	if (intel_runtime_pm_get_if_in_use(engine->i915)) {
>  		intel_engine_print_registers(engine, m);
> -		intel_runtime_pm_put(engine->i915);
> +		intel_runtime_pm_put_unchecked(engine->i915);
>  	} else {
>  		drm_printf(m, "\tDevice is asleep; skipping register dump\n");
>  	}
> diff --git a/drivers/gpu/drm/i915/intel_fbdev.c b/drivers/gpu/drm/i915/intel_fbdev.c
> index fb5bb5b32a60..11d877b908e2 100644
> --- a/drivers/gpu/drm/i915/intel_fbdev.c
> +++ b/drivers/gpu/drm/i915/intel_fbdev.c
> @@ -277,7 +277,7 @@ static int intelfb_create(struct drm_fb_helper *helper,
>  	ifbdev->vma = vma;
>  	ifbdev->vma_flags = flags;
>  
> -	intel_runtime_pm_put(dev_priv);
> +	intel_runtime_pm_put_unchecked(dev_priv);
>  	mutex_unlock(&dev->struct_mutex);
>  	vga_switcheroo_client_fb_set(pdev, info);
>  	return 0;
> @@ -285,7 +285,7 @@ static int intelfb_create(struct drm_fb_helper *helper,
>  out_unpin:
>  	intel_unpin_fb_vma(vma, flags);
>  out_unlock:
> -	intel_runtime_pm_put(dev_priv);
> +	intel_runtime_pm_put_unchecked(dev_priv);
>  	mutex_unlock(&dev->struct_mutex);
>  	return ret;
>  }
> diff --git a/drivers/gpu/drm/i915/intel_guc_log.c b/drivers/gpu/drm/i915/intel_guc_log.c
> index d3ebdbc0182e..1b1581a42aa1 100644
> --- a/drivers/gpu/drm/i915/intel_guc_log.c
> +++ b/drivers/gpu/drm/i915/intel_guc_log.c
> @@ -445,7 +445,7 @@ static void guc_log_capture_logs(struct intel_guc_log *log)
>  	 */
>  	intel_runtime_pm_get(dev_priv);
>  	guc_action_flush_log_complete(guc);
> -	intel_runtime_pm_put(dev_priv);
> +	intel_runtime_pm_put_unchecked(dev_priv);
>  }
>  
>  int intel_guc_log_create(struct intel_guc_log *log)
> @@ -528,7 +528,7 @@ int intel_guc_log_set_level(struct intel_guc_log *log, u32 level)
>  	ret = guc_action_control_log(guc, GUC_LOG_LEVEL_IS_VERBOSE(level),
>  				     GUC_LOG_LEVEL_IS_ENABLED(level),
>  				     GUC_LOG_LEVEL_TO_VERBOSITY(level));
> -	intel_runtime_pm_put(dev_priv);
> +	intel_runtime_pm_put_unchecked(dev_priv);
>  	if (ret) {
>  		DRM_DEBUG_DRIVER("guc_log_control action failed %d\n", ret);
>  		goto out_unlock;
> @@ -610,7 +610,7 @@ void intel_guc_log_relay_flush(struct intel_guc_log *log)
>  
>  	intel_runtime_pm_get(i915);
>  	guc_action_flush_log(guc);
> -	intel_runtime_pm_put(i915);
> +	intel_runtime_pm_put_unchecked(i915);
>  
>  	/* GuC would have updated log buffer by now, so capture it */
>  	guc_log_capture_logs(log);
> diff --git a/drivers/gpu/drm/i915/intel_hotplug.c b/drivers/gpu/drm/i915/intel_hotplug.c
> index e24174d08fed..067277ca7cff 100644
> --- a/drivers/gpu/drm/i915/intel_hotplug.c
> +++ b/drivers/gpu/drm/i915/intel_hotplug.c
> @@ -262,7 +262,7 @@ static void intel_hpd_irq_storm_reenable_work(struct work_struct *work)
>  		dev_priv->display.hpd_irq_setup(dev_priv);
>  	spin_unlock_irq(&dev_priv->irq_lock);
>  
> -	intel_runtime_pm_put(dev_priv);
> +	intel_runtime_pm_put_unchecked(dev_priv);
>  }
>  
>  bool intel_encoder_hotplug(struct intel_encoder *encoder,
> diff --git a/drivers/gpu/drm/i915/intel_huc.c b/drivers/gpu/drm/i915/intel_huc.c
> index bc27b691d824..c2b076e9bada 100644
> --- a/drivers/gpu/drm/i915/intel_huc.c
> +++ b/drivers/gpu/drm/i915/intel_huc.c
> @@ -122,7 +122,7 @@ int intel_huc_check_status(struct intel_huc *huc)
>  
>  	intel_runtime_pm_get(dev_priv);
>  	status = I915_READ(HUC_STATUS2) & HUC_FW_VERIFIED;
> -	intel_runtime_pm_put(dev_priv);
> +	intel_runtime_pm_put_unchecked(dev_priv);
>  
>  	return status;
>  }
> diff --git a/drivers/gpu/drm/i915/intel_panel.c b/drivers/gpu/drm/i915/intel_panel.c
> index ee3e0842d542..c2b7455a023e 100644
> --- a/drivers/gpu/drm/i915/intel_panel.c
> +++ b/drivers/gpu/drm/i915/intel_panel.c
> @@ -1213,7 +1213,7 @@ static int intel_backlight_device_get_brightness(struct backlight_device *bd)
>  	ret = scale_hw_to_user(connector, hw_level, bd->props.max_brightness);
>  
>  	drm_modeset_unlock(&dev->mode_config.connection_mutex);
> -	intel_runtime_pm_put(dev_priv);
> +	intel_runtime_pm_put_unchecked(dev_priv);
>  
>  	return ret;
>  }
> diff --git a/drivers/gpu/drm/i915/intel_runtime_pm.c b/drivers/gpu/drm/i915/intel_runtime_pm.c
> index 67d71cc604f1..38c22fb7152e 100644
> --- a/drivers/gpu/drm/i915/intel_runtime_pm.c
> +++ b/drivers/gpu/drm/i915/intel_runtime_pm.c
> @@ -62,7 +62,7 @@ static void init_intel_runtime_pm_wakeref(struct drm_i915_private *i915)
>  	spin_lock_init(&i915->runtime_pm.debug_lock);
>  }
>  
> -static noinline void
> +static noinline depot_stack_handle_t
>  track_intel_runtime_pm_wakeref(struct drm_i915_private *i915)
>  {
>  	struct i915_runtime_pm *rpm = &i915->runtime_pm;
> @@ -76,7 +76,7 @@ track_intel_runtime_pm_wakeref(struct drm_i915_private *i915)
>  	depot_stack_handle_t stack, *stacks;
>  
>  	if (!HAS_RUNTIME_PM(i915))
> -		return;
> +		return -1;
>  
>  	save_stack_trace(&trace);
>  	if (trace.nr_entries &&
> @@ -85,7 +85,7 @@ track_intel_runtime_pm_wakeref(struct drm_i915_private *i915)
>  
>  	stack = depot_save_stack(&trace, GFP_NOWAIT | __GFP_NOWARN);
>  	if (!stack)
> -		return;
> +		return -1;
>  
>  	spin_lock_irqsave(&rpm->debug_lock, flags);
>  	stacks = krealloc(rpm->debug_owners,
> @@ -94,8 +94,55 @@ track_intel_runtime_pm_wakeref(struct drm_i915_private *i915)
>  	if (stacks) {
>  		stacks[rpm->debug_count++] = stack;
>  		rpm->debug_owners = stacks;
> +	} else {
> +		stack = -1;
>  	}
>  	spin_unlock_irqrestore(&rpm->debug_lock, flags);
> +
> +	return stack;
> +}
> +
> +static void cancel_intel_runtime_pm_wakeref(struct drm_i915_private *i915,
> +					    depot_stack_handle_t stack)
> +{
> +	struct i915_runtime_pm *rpm = &i915->runtime_pm;
> +	unsigned long flags, n;
> +	bool found = false;
> +
> +	if (unlikely(stack == -1))
> +		return;
> +
> +	spin_lock_irqsave(&rpm->debug_lock, flags);
> +	for (n = rpm->debug_count; n--; ) {
> +		if (rpm->debug_owners[n] == stack) {
> +			memmove(rpm->debug_owners + n,
> +				rpm->debug_owners + n + 1,
> +				(--rpm->debug_count - n) * sizeof(stack));

You could mark the released ones as a special value here? (-1).

Tho releasing from the end, should keep the size small enough
so that we dont need to care.

> +			found = true;
> +			break;
> +		}
> +	}
> +	spin_unlock_irqrestore(&rpm->debug_lock, flags);
> +
> +	if (WARN(!found,
> +		 "Unmatched wakeref (tracking %lu), count %u\n",
> +		 rpm->debug_count, atomic_read(&rpm->wakeref_count))) {
> +		unsigned long entries[STACKDEPTH];
> +		struct stack_trace trace = {
> +			.entries = entries,
> +			.max_entries = ARRAY_SIZE(entries),
> +		};
> +		char *buf;
> +
> +		buf = kmalloc(PAGE_SIZE, GFP_KERNEL);
> +		if (!buf)
> +			return;
> +
> +		depot_fetch_stack(stack, &trace);
> +		snprint_stack_trace(buf, PAGE_SIZE, &trace, 0);
> +		DRM_DEBUG_DRIVER("wakeref %x from\n%s", stack, buf);
> +		kfree(buf);
> +	}
>  }
>  
>  static void untrack_intel_runtime_pm_wakeref(struct drm_i915_private *i915)
> @@ -220,8 +267,10 @@ static void init_intel_runtime_pm_wakeref(struct drm_i915_private *i915)
>  {
>  }
>  
> -static void track_intel_runtime_pm_wakeref(struct drm_i915_private *i915)
> +static depot_stack_handle_t
> +track_intel_runtime_pm_wakeref(struct drm_i915_private *i915)
>  {
> +	return -1;
>  }
>  
>  static void untrack_intel_runtime_pm_wakeref(struct drm_i915_private *i915)
> @@ -1823,7 +1872,7 @@ bool intel_display_power_get_if_enabled(struct drm_i915_private *dev_priv,
>  	mutex_unlock(&power_domains->lock);
>  
>  	if (!is_enabled)
> -		intel_runtime_pm_put(dev_priv);
> +		intel_runtime_pm_put_unchecked(dev_priv);
>  
>  	return is_enabled;
>  }
> @@ -1857,7 +1906,7 @@ void intel_display_power_put(struct drm_i915_private *dev_priv,
>  
>  	mutex_unlock(&power_domains->lock);
>  
> -	intel_runtime_pm_put(dev_priv);
> +	intel_runtime_pm_put_unchecked(dev_priv);
>  }
>  
>  #define I830_PIPES_POWER_DOMAINS (		\
> @@ -3965,7 +4014,7 @@ void intel_power_domains_init_hw(struct drm_i915_private *dev_priv, bool resume)
>  void intel_power_domains_fini_hw(struct drm_i915_private *dev_priv)
>  {
>  	/* Keep the power well enabled, but cancel its rpm wakeref. */
> -	intel_runtime_pm_put(dev_priv);
> +	intel_runtime_pm_put_unchecked(dev_priv);
>  
>  	/* Remove the refcount we took to keep power well support disabled. */
>  	if (!i915_modparams.disable_power_well)
> @@ -4179,7 +4228,7 @@ static void intel_power_domains_verify_state(struct drm_i915_private *dev_priv)
>   * Any runtime pm reference obtained by this function must have a symmetric
>   * call to intel_runtime_pm_put() to release the reference again.
>   */

Need to update the documentation.

> -void intel_runtime_pm_get(struct drm_i915_private *i915)
> +intel_wakeref_t intel_runtime_pm_get(struct drm_i915_private *i915)
>  {
>  	struct pci_dev *pdev = i915->drm.pdev;
>  	struct device *kdev = &pdev->dev;
> @@ -4191,7 +4240,7 @@ void intel_runtime_pm_get(struct drm_i915_private *i915)
>  	atomic_inc(&i915->runtime_pm.wakeref_count);
>  	assert_rpm_wakelock_held(i915);
>  
> -	track_intel_runtime_pm_wakeref(i915);
> +	return track_intel_runtime_pm_wakeref(i915);
>  }
>  
>  /**
> @@ -4207,7 +4256,7 @@ void intel_runtime_pm_get(struct drm_i915_private *i915)
>   *
>   * Returns: True if the wakeref was acquired, or False otherwise.

For practical purposes this could still be the case but please update
the return value type.

>   */
> -bool intel_runtime_pm_get_if_in_use(struct drm_i915_private *i915)
> +intel_wakeref_t intel_runtime_pm_get_if_in_use(struct drm_i915_private *i915)
>  {
>  	if (IS_ENABLED(CONFIG_PM)) {
>  		struct pci_dev *pdev = i915->drm.pdev;
> @@ -4220,15 +4269,13 @@ bool intel_runtime_pm_get_if_in_use(struct drm_i915_private *i915)
>  		 * atm to the late/early system suspend/resume handlers.
>  		 */
>  		if (pm_runtime_get_if_in_use(kdev) <= 0)
> -			return false;
> +			return 0;
>  	}
>  
>  	atomic_inc(&i915->runtime_pm.wakeref_count);
>  	assert_rpm_wakelock_held(i915);
>  
> -	track_intel_runtime_pm_wakeref(i915);
> -
> -	return true;
> +	return track_intel_runtime_pm_wakeref(i915);
>  }
>  
>  /**
> @@ -4248,7 +4295,7 @@ bool intel_runtime_pm_get_if_in_use(struct drm_i915_private *i915)
>   * Any runtime pm reference obtained by this function must have a symmetric
>   * call to intel_runtime_pm_put() to release the reference again.
>   */

Document update needed here also.

> -void intel_runtime_pm_get_noresume(struct drm_i915_private *i915)
> +intel_wakeref_t intel_runtime_pm_get_noresume(struct drm_i915_private *i915)
>  {
>  	struct pci_dev *pdev = i915->drm.pdev;
>  	struct device *kdev = &pdev->dev;
> @@ -4258,7 +4305,7 @@ void intel_runtime_pm_get_noresume(struct drm_i915_private *i915)
>  
>  	atomic_inc(&i915->runtime_pm.wakeref_count);
>  
> -	track_intel_runtime_pm_wakeref(i915);
> +	return track_intel_runtime_pm_wakeref(i915);
>  }
>  
>  /**
> @@ -4269,7 +4316,7 @@ void intel_runtime_pm_get_noresume(struct drm_i915_private *i915)
>   * intel_runtime_pm_get() and might power down the corresponding
>   * hardware block right away if this is the last reference.
>   */

Documentation part needs updating.

-Mika

> -void intel_runtime_pm_put(struct drm_i915_private *i915)
> +void intel_runtime_pm_put_unchecked(struct drm_i915_private *i915)
>  {
>  	struct pci_dev *pdev = i915->drm.pdev;
>  	struct device *kdev = &pdev->dev;
> @@ -4282,6 +4329,14 @@ void intel_runtime_pm_put(struct drm_i915_private *i915)
>  	pm_runtime_put_autosuspend(kdev);
>  }
>  
> +#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_RUNTIME_PM)
> +void intel_runtime_pm_put(struct drm_i915_private *i915, intel_wakeref_t wref)
> +{
> +	cancel_intel_runtime_pm_wakeref(i915, wref);
> +	intel_runtime_pm_put_unchecked(i915);
> +}
> +#endif
> +
>  /**
>   * intel_runtime_pm_enable - enable runtime pm
>   * @i915: i915 device instance
> diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c
> index fff468f17d2d..8d4c76ac0e7d 100644
> --- a/drivers/gpu/drm/i915/intel_uncore.c
> +++ b/drivers/gpu/drm/i915/intel_uncore.c
> @@ -1709,7 +1709,7 @@ int i915_reg_read_ioctl(struct drm_device *dev,
>  		reg->val = I915_READ8(entry->offset_ldw);
>  	else
>  		ret = -EINVAL;
> -	intel_runtime_pm_put(dev_priv);
> +	intel_runtime_pm_put_unchecked(dev_priv);
>  
>  	return ret;
>  }
> diff --git a/drivers/gpu/drm/i915/selftests/huge_pages.c b/drivers/gpu/drm/i915/selftests/huge_pages.c
> index a0c7cbc212ba..731dfd3d3fc8 100644
> --- a/drivers/gpu/drm/i915/selftests/huge_pages.c
> +++ b/drivers/gpu/drm/i915/selftests/huge_pages.c
> @@ -1789,7 +1789,7 @@ int i915_gem_huge_page_live_selftests(struct drm_i915_private *dev_priv)
>  	err = i915_subtests(tests, ctx);
>  
>  out_unlock:
> -	intel_runtime_pm_put(dev_priv);
> +	intel_runtime_pm_put_unchecked(dev_priv);
>  	mutex_unlock(&dev_priv->drm.struct_mutex);
>  
>  	mock_file_free(dev_priv, file);
> diff --git a/drivers/gpu/drm/i915/selftests/i915_gem.c b/drivers/gpu/drm/i915/selftests/i915_gem.c
> index bdcc53e15e75..762e1a7125f5 100644
> --- a/drivers/gpu/drm/i915/selftests/i915_gem.c
> +++ b/drivers/gpu/drm/i915/selftests/i915_gem.c
> @@ -32,7 +32,7 @@ static int switch_to_context(struct drm_i915_private *i915,
>  		i915_request_add(rq);
>  	}
>  
> -	intel_runtime_pm_put(i915);
> +	intel_runtime_pm_put_unchecked(i915);
>  
>  	return err;
>  }
> @@ -76,7 +76,7 @@ static void simulate_hibernate(struct drm_i915_private *i915)
>  	 */
>  	trash_stolen(i915);
>  
> -	intel_runtime_pm_put(i915);
> +	intel_runtime_pm_put_unchecked(i915);
>  }
>  
>  static int pm_prepare(struct drm_i915_private *i915)
> @@ -98,7 +98,7 @@ static void pm_suspend(struct drm_i915_private *i915)
>  	i915_gem_suspend_gtt_mappings(i915);
>  	i915_gem_suspend_late(i915);
>  
> -	intel_runtime_pm_put(i915);
> +	intel_runtime_pm_put_unchecked(i915);
>  }
>  
>  static void pm_hibernate(struct drm_i915_private *i915)
> @@ -110,7 +110,7 @@ static void pm_hibernate(struct drm_i915_private *i915)
>  	i915_gem_freeze(i915);
>  	i915_gem_freeze_late(i915);
>  
> -	intel_runtime_pm_put(i915);
> +	intel_runtime_pm_put_unchecked(i915);
>  }
>  
>  static void pm_resume(struct drm_i915_private *i915)
> @@ -125,7 +125,7 @@ static void pm_resume(struct drm_i915_private *i915)
>  	i915_gem_sanitize(i915);
>  	i915_gem_resume(i915);
>  
> -	intel_runtime_pm_put(i915);
> +	intel_runtime_pm_put_unchecked(i915);
>  }
>  
>  static int igt_gem_suspend(void *arg)
> diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_coherency.c b/drivers/gpu/drm/i915/selftests/i915_gem_coherency.c
> index f7392c1ffe75..eea4fc2445ae 100644
> --- a/drivers/gpu/drm/i915/selftests/i915_gem_coherency.c
> +++ b/drivers/gpu/drm/i915/selftests/i915_gem_coherency.c
> @@ -376,7 +376,7 @@ static int igt_gem_coherency(void *arg)
>  		}
>  	}
>  unlock:
> -	intel_runtime_pm_put(i915);
> +	intel_runtime_pm_put_unchecked(i915);
>  	mutex_unlock(&i915->drm.struct_mutex);
>  	kfree(offsets);
>  	return err;
> diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/selftests/i915_gem_context.c
> index d00cdf3c2939..6e1a0711d201 100644
> --- a/drivers/gpu/drm/i915/selftests/i915_gem_context.c
> +++ b/drivers/gpu/drm/i915/selftests/i915_gem_context.c
> @@ -243,7 +243,7 @@ static int live_nop_switch(void *arg)
>  	}
>  
>  out_unlock:
> -	intel_runtime_pm_put(i915);
> +	intel_runtime_pm_put_unchecked(i915);
>  	mutex_unlock(&i915->drm.struct_mutex);
>  	mock_file_free(i915, file);
>  	return err;
> @@ -609,7 +609,7 @@ static int igt_ctx_exec(void *arg)
>  
>  			intel_runtime_pm_get(i915);
>  			err = gpu_fill(obj, ctx, engine, dw);
> -			intel_runtime_pm_put(i915);
> +			intel_runtime_pm_put_unchecked(i915);
>  			if (err) {
>  				pr_err("Failed to fill dword %lu [%lu/%lu] with gpu (%s) in ctx %u [full-ppgtt? %s], err=%d\n",
>  				       ndwords, dw, max_dwords(obj),
> @@ -715,7 +715,7 @@ static int igt_ctx_readonly(void *arg)
>  
>  			intel_runtime_pm_get(i915);
>  			err = gpu_fill(obj, ctx, engine, dw);
> -			intel_runtime_pm_put(i915);
> +			intel_runtime_pm_put_unchecked(i915);
>  			if (err) {
>  				pr_err("Failed to fill dword %lu [%lu/%lu] with gpu (%s) in ctx %u [full-ppgtt? %s], err=%d\n",
>  				       ndwords, dw, max_dwords(obj),
> @@ -1067,7 +1067,7 @@ static int igt_vm_isolation(void *arg)
>  		count, RUNTIME_INFO(i915)->num_rings);
>  
>  out_rpm:
> -	intel_runtime_pm_put(i915);
> +	intel_runtime_pm_put_unchecked(i915);
>  out_unlock:
>  	if (end_live_test(&t))
>  		err = -EIO;
> @@ -1200,7 +1200,7 @@ static int igt_switch_to_kernel_context(void *arg)
>  	if (igt_flush_test(i915, I915_WAIT_LOCKED))
>  		err = -EIO;
>  
> -	intel_runtime_pm_put(i915);
> +	intel_runtime_pm_put_unchecked(i915);
>  	mutex_unlock(&i915->drm.struct_mutex);
>  
>  	kernel_context_close(ctx);
> diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_evict.c b/drivers/gpu/drm/i915/selftests/i915_gem_evict.c
> index 4365979d8222..8d22f73a9b63 100644
> --- a/drivers/gpu/drm/i915/selftests/i915_gem_evict.c
> +++ b/drivers/gpu/drm/i915/selftests/i915_gem_evict.c
> @@ -464,7 +464,7 @@ static int igt_evict_contexts(void *arg)
>  	}
>  	if (drm_mm_node_allocated(&hole))
>  		drm_mm_remove_node(&hole);
> -	intel_runtime_pm_put(i915);
> +	intel_runtime_pm_put_unchecked(i915);
>  	mutex_unlock(&i915->drm.struct_mutex);
>  
>  	return err;
> diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c
> index a9ed0ecc94e2..87cb0602a5fc 100644
> --- a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c
> +++ b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c
> @@ -295,7 +295,7 @@ static int lowlevel_hole(struct drm_i915_private *i915,
>  
>  			intel_runtime_pm_get(i915);
>  			vm->insert_entries(vm, &mock_vma, I915_CACHE_NONE, 0);
> -			intel_runtime_pm_put(i915);
> +			intel_runtime_pm_put_unchecked(i915);
>  		}
>  		count = n;
>  
> @@ -1216,7 +1216,7 @@ static int igt_ggtt_page(void *arg)
>  	kfree(order);
>  out_remove:
>  	ggtt->vm.clear_range(&ggtt->vm, tmp.start, tmp.size);
> -	intel_runtime_pm_put(i915);
> +	intel_runtime_pm_put_unchecked(i915);
>  	drm_mm_remove_node(&tmp);
>  out_unpin:
>  	i915_gem_object_unpin_pages(obj);
> diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_object.c b/drivers/gpu/drm/i915/selftests/i915_gem_object.c
> index be7ecb66ad11..b03890c590d7 100644
> --- a/drivers/gpu/drm/i915/selftests/i915_gem_object.c
> +++ b/drivers/gpu/drm/i915/selftests/i915_gem_object.c
> @@ -444,7 +444,7 @@ next_tiling: ;
>  	}
>  
>  out_unlock:
> -	intel_runtime_pm_put(i915);
> +	intel_runtime_pm_put_unchecked(i915);
>  	mutex_unlock(&i915->drm.struct_mutex);
>  	i915_gem_object_unpin_pages(obj);
>  out:
> @@ -508,7 +508,7 @@ static void disable_retire_worker(struct drm_i915_private *i915)
>  	if (!i915->gt.active_requests++) {
>  		intel_runtime_pm_get(i915);
>  		i915_gem_unpark(i915);
> -		intel_runtime_pm_put(i915);
> +		intel_runtime_pm_put_unchecked(i915);
>  	}
>  	mutex_unlock(&i915->drm.struct_mutex);
>  	cancel_delayed_work_sync(&i915->gt.retire_work);
> @@ -590,7 +590,7 @@ static int igt_mmap_offset_exhaustion(void *arg)
>  		mutex_lock(&i915->drm.struct_mutex);
>  		intel_runtime_pm_get(i915);
>  		err = make_obj_busy(obj);
> -		intel_runtime_pm_put(i915);
> +		intel_runtime_pm_put_unchecked(i915);
>  		mutex_unlock(&i915->drm.struct_mutex);
>  		if (err) {
>  			pr_err("[loop %d] Failed to busy the object\n", loop);
> diff --git a/drivers/gpu/drm/i915/selftests/i915_request.c b/drivers/gpu/drm/i915/selftests/i915_request.c
> index 07e557815308..e8880cabd5c7 100644
> --- a/drivers/gpu/drm/i915/selftests/i915_request.c
> +++ b/drivers/gpu/drm/i915/selftests/i915_request.c
> @@ -403,7 +403,7 @@ static int live_nop_request(void *arg)
>  	}
>  
>  out_unlock:
> -	intel_runtime_pm_put(i915);
> +	intel_runtime_pm_put_unchecked(i915);
>  	mutex_unlock(&i915->drm.struct_mutex);
>  	return err;
>  }
> @@ -553,7 +553,7 @@ static int live_empty_request(void *arg)
>  	i915_vma_unpin(batch);
>  	i915_vma_put(batch);
>  out_unlock:
> -	intel_runtime_pm_put(i915);
> +	intel_runtime_pm_put_unchecked(i915);
>  	mutex_unlock(&i915->drm.struct_mutex);
>  	return err;
>  }
> @@ -731,7 +731,7 @@ static int live_all_engines(void *arg)
>  	i915_vma_unpin(batch);
>  	i915_vma_put(batch);
>  out_unlock:
> -	intel_runtime_pm_put(i915);
> +	intel_runtime_pm_put_unchecked(i915);
>  	mutex_unlock(&i915->drm.struct_mutex);
>  	return err;
>  }
> @@ -860,7 +860,7 @@ static int live_sequential_engines(void *arg)
>  		i915_request_put(request[id]);
>  	}
>  out_unlock:
> -	intel_runtime_pm_put(i915);
> +	intel_runtime_pm_put_unchecked(i915);
>  	mutex_unlock(&i915->drm.struct_mutex);
>  	return err;
>  }
> diff --git a/drivers/gpu/drm/i915/selftests/intel_guc.c b/drivers/gpu/drm/i915/selftests/intel_guc.c
> index 32cba4cae31a..3590ba3d8897 100644
> --- a/drivers/gpu/drm/i915/selftests/intel_guc.c
> +++ b/drivers/gpu/drm/i915/selftests/intel_guc.c
> @@ -225,7 +225,7 @@ static int igt_guc_clients(void *args)
>  	guc_clients_create(guc);
>  	guc_clients_enable(guc);
>  unlock:
> -	intel_runtime_pm_put(dev_priv);
> +	intel_runtime_pm_put_unchecked(dev_priv);
>  	mutex_unlock(&dev_priv->drm.struct_mutex);
>  	return err;
>  }
> @@ -337,7 +337,7 @@ static int igt_guc_doorbells(void *arg)
>  			guc_client_free(clients[i]);
>  		}
>  unlock:
> -	intel_runtime_pm_put(dev_priv);
> +	intel_runtime_pm_put_unchecked(dev_priv);
>  	mutex_unlock(&dev_priv->drm.struct_mutex);
>  	return err;
>  }
> diff --git a/drivers/gpu/drm/i915/selftests/intel_hangcheck.c b/drivers/gpu/drm/i915/selftests/intel_hangcheck.c
> index 0aadbd9c7d56..33bd3c4b6fa3 100644
> --- a/drivers/gpu/drm/i915/selftests/intel_hangcheck.c
> +++ b/drivers/gpu/drm/i915/selftests/intel_hangcheck.c
> @@ -402,7 +402,7 @@ static int igt_wedged_reset(void *arg)
>  	i915_reset(i915, ALL_ENGINES, NULL);
>  	GEM_BUG_ON(test_bit(I915_RESET_HANDOFF, &i915->gpu_error.flags));
>  
> -	intel_runtime_pm_put(i915);
> +	intel_runtime_pm_put_unchecked(i915);
>  	mutex_unlock(&i915->drm.struct_mutex);
>  	igt_global_reset_unlock(i915);
>  
> @@ -1636,7 +1636,7 @@ static int igt_atomic_reset(void *arg)
>  	force_reset(i915);
>  
>  unlock:
> -	intel_runtime_pm_put(i915);
> +	intel_runtime_pm_put_unchecked(i915);
>  	mutex_unlock(&i915->drm.struct_mutex);
>  	igt_global_reset_unlock(i915);
>  
> @@ -1679,7 +1679,7 @@ int intel_hangcheck_live_selftests(struct drm_i915_private *i915)
>  	mutex_unlock(&i915->drm.struct_mutex);
>  
>  	i915_modparams.enable_hangcheck = saved_hangcheck;
> -	intel_runtime_pm_put(i915);
> +	intel_runtime_pm_put_unchecked(i915);
>  
>  	return err;
>  }
> diff --git a/drivers/gpu/drm/i915/selftests/intel_lrc.c b/drivers/gpu/drm/i915/selftests/intel_lrc.c
> index 00caaa00f02f..ac1b18a17f3c 100644
> --- a/drivers/gpu/drm/i915/selftests/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/selftests/intel_lrc.c
> @@ -65,7 +65,7 @@ static int live_sanitycheck(void *arg)
>  	igt_spinner_fini(&spin);
>  err_unlock:
>  	igt_flush_test(i915, I915_WAIT_LOCKED);
> -	intel_runtime_pm_put(i915);
> +	intel_runtime_pm_put_unchecked(i915);
>  	mutex_unlock(&i915->drm.struct_mutex);
>  	return err;
>  }
> @@ -158,7 +158,7 @@ static int live_preempt(void *arg)
>  	igt_spinner_fini(&spin_hi);
>  err_unlock:
>  	igt_flush_test(i915, I915_WAIT_LOCKED);
> -	intel_runtime_pm_put(i915);
> +	intel_runtime_pm_put_unchecked(i915);
>  	mutex_unlock(&i915->drm.struct_mutex);
>  	return err;
>  }
> @@ -251,7 +251,7 @@ static int live_late_preempt(void *arg)
>  	igt_spinner_fini(&spin_hi);
>  err_unlock:
>  	igt_flush_test(i915, I915_WAIT_LOCKED);
> -	intel_runtime_pm_put(i915);
> +	intel_runtime_pm_put_unchecked(i915);
>  	mutex_unlock(&i915->drm.struct_mutex);
>  	return err;
>  
> @@ -374,7 +374,7 @@ static int live_preempt_hang(void *arg)
>  	igt_spinner_fini(&spin_hi);
>  err_unlock:
>  	igt_flush_test(i915, I915_WAIT_LOCKED);
> -	intel_runtime_pm_put(i915);
> +	intel_runtime_pm_put_unchecked(i915);
>  	mutex_unlock(&i915->drm.struct_mutex);
>  	return err;
>  }
> @@ -627,7 +627,7 @@ static int live_preempt_smoke(void *arg)
>  err_batch:
>  	i915_gem_object_put(smoke.batch);
>  err_unlock:
> -	intel_runtime_pm_put(smoke.i915);
> +	intel_runtime_pm_put_unchecked(smoke.i915);
>  	mutex_unlock(&smoke.i915->drm.struct_mutex);
>  	kfree(smoke.contexts);
>  
> diff --git a/drivers/gpu/drm/i915/selftests/intel_workarounds.c b/drivers/gpu/drm/i915/selftests/intel_workarounds.c
> index c2b3cd8fcc34..54f5c2de3d08 100644
> --- a/drivers/gpu/drm/i915/selftests/intel_workarounds.c
> +++ b/drivers/gpu/drm/i915/selftests/intel_workarounds.c
> @@ -49,7 +49,7 @@ read_nonprivs(struct i915_gem_context *ctx, struct intel_engine_cs *engine)
>  
>  	intel_runtime_pm_get(engine->i915);
>  	rq = i915_request_alloc(engine, ctx);
> -	intel_runtime_pm_put(engine->i915);
> +	intel_runtime_pm_put_unchecked(engine->i915);
>  	if (IS_ERR(rq)) {
>  		err = PTR_ERR(rq);
>  		goto err_pin;
> @@ -196,7 +196,7 @@ switch_to_scratch_context(struct intel_engine_cs *engine,
>  	else
>  		rq = i915_request_alloc(engine, ctx);
>  
> -	intel_runtime_pm_put(engine->i915);
> +	intel_runtime_pm_put_unchecked(engine->i915);
>  
>  	kernel_context_close(ctx);
>  
> @@ -255,7 +255,7 @@ static int check_whitelist_across_reset(struct intel_engine_cs *engine,
>  
>  	intel_runtime_pm_get(i915);
>  	err = reset(engine);
> -	intel_runtime_pm_put(i915);
> +	intel_runtime_pm_put_unchecked(i915);
>  
>  	if (want_spin) {
>  		igt_spinner_end(&spin);
> @@ -364,7 +364,7 @@ live_gpu_reset_gt_engine_workarounds(void *arg)
>  	ok = verify_gt_engine_wa(i915, "after reset");
>  
>  out:
> -	intel_runtime_pm_put(i915);
> +	intel_runtime_pm_put_unchecked(i915);
>  	igt_global_reset_unlock(i915);
>  
>  	return ok ? 0 : -ESRCH;
> @@ -443,7 +443,7 @@ live_engine_reset_gt_engine_workarounds(void *arg)
>  	}
>  
>  err:
> -	intel_runtime_pm_put(i915);
> +	intel_runtime_pm_put_unchecked(i915);
>  	igt_global_reset_unlock(i915);
>  	kernel_context_close(ctx);
>  
> -- 
> 2.20.1
>
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/intel-gfx
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 111+ messages in thread

* Re: [PATCH 04/46] drm/i915: Markup paired operations on wakerefs
  2019-01-08 16:23   ` Mika Kuoppala
@ 2019-01-08 16:41     ` Chris Wilson
  2019-01-09  9:23       ` Mika Kuoppala
  0 siblings, 1 reply; 111+ messages in thread
From: Chris Wilson @ 2019-01-08 16:41 UTC (permalink / raw)
  To: Mika Kuoppala, intel-gfx; +Cc: Jani Nikula

Quoting Mika Kuoppala (2019-01-08 16:23:18)
> > @@ -3965,7 +4014,7 @@ void intel_power_domains_init_hw(struct drm_i915_private *dev_priv, bool resume)
> >  void intel_power_domains_fini_hw(struct drm_i915_private *dev_priv)
> >  {
> >       /* Keep the power well enabled, but cancel its rpm wakeref. */
> > -     intel_runtime_pm_put(dev_priv);
> > +     intel_runtime_pm_put_unchecked(dev_priv);
> >  
> >       /* Remove the refcount we took to keep power well support disabled. */
> >       if (!i915_modparams.disable_power_well)
> > @@ -4179,7 +4228,7 @@ static void intel_power_domains_verify_state(struct drm_i915_private *dev_priv)
> >   * Any runtime pm reference obtained by this function must have a symmetric
> >   * call to intel_runtime_pm_put() to release the reference again.
> >   */
> 
> Need to update the documentation.

No. You are expected to pair intel_runtime_pm_get with intel_runtime_pm_put.
The _unchecked version is temporary and not expected to be used in new code.
Once the dust has settled it will be gone.

* Any runtime pm reference obtained by this function must have a symmetric
* call to intel_runtime_pm_put() to release the reference again.

is accurate.

> > -void intel_runtime_pm_get(struct drm_i915_private *i915)
> > +intel_wakeref_t intel_runtime_pm_get(struct drm_i915_private *i915)
> >  {
> >       struct pci_dev *pdev = i915->drm.pdev;
> >       struct device *kdev = &pdev->dev;
> > @@ -4191,7 +4240,7 @@ void intel_runtime_pm_get(struct drm_i915_private *i915)
> >       atomic_inc(&i915->runtime_pm.wakeref_count);
> >       assert_rpm_wakelock_held(i915);
> >  
> > -     track_intel_runtime_pm_wakeref(i915);
> > +     return track_intel_runtime_pm_wakeref(i915);
> >  }
> >  
> >  /**
> > @@ -4207,7 +4256,7 @@ void intel_runtime_pm_get(struct drm_i915_private *i915)
> >   *
> >   * Returns: True if the wakeref was acquired, or False otherwise.
> 
> For practical purposes this could still be the case but please update
> the return value type.

Still should be used as a boolean (true/false) though.

> >   */
> > -bool intel_runtime_pm_get_if_in_use(struct drm_i915_private *i915)
> > +intel_wakeref_t intel_runtime_pm_get_if_in_use(struct drm_i915_private *i915)
> >  {
> >       if (IS_ENABLED(CONFIG_PM)) {
> >               struct pci_dev *pdev = i915->drm.pdev;
> > @@ -4220,15 +4269,13 @@ bool intel_runtime_pm_get_if_in_use(struct drm_i915_private *i915)
> >                * atm to the late/early system suspend/resume handlers.
> >                */
> >               if (pm_runtime_get_if_in_use(kdev) <= 0)
> > -                     return false;
> > +                     return 0;
> >       }
> >  
> >       atomic_inc(&i915->runtime_pm.wakeref_count);
> >       assert_rpm_wakelock_held(i915);
> >  
> > -     track_intel_runtime_pm_wakeref(i915);
> > -
> > -     return true;
> > +     return track_intel_runtime_pm_wakeref(i915);
> >  }
> >  
> >  /**
> > @@ -4248,7 +4295,7 @@ bool intel_runtime_pm_get_if_in_use(struct drm_i915_private *i915)
> >   * Any runtime pm reference obtained by this function must have a symmetric
> >   * call to intel_runtime_pm_put() to release the reference again.
> >   */
> 
> Document update needed here also.

Nope.

> > -void intel_runtime_pm_get_noresume(struct drm_i915_private *i915)
> > +intel_wakeref_t intel_runtime_pm_get_noresume(struct drm_i915_private *i915)
> >  {
> >       struct pci_dev *pdev = i915->drm.pdev;
> >       struct device *kdev = &pdev->dev;
> > @@ -4258,7 +4305,7 @@ void intel_runtime_pm_get_noresume(struct drm_i915_private *i915)
> >  
> >       atomic_inc(&i915->runtime_pm.wakeref_count);
> >  
> > -     track_intel_runtime_pm_wakeref(i915);
> > +     return track_intel_runtime_pm_wakeref(i915);
> >  }
> >  
> >  /**
> > @@ -4269,7 +4316,7 @@ void intel_runtime_pm_get_noresume(struct drm_i915_private *i915)
> >   * intel_runtime_pm_get() and might power down the corresponding
> >   * hardware block right away if this is the last reference.
> >   */
> 
> Documentation part needs updating.

I either don't get your point, or you missed the point of the wakeref
tracking? :)
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 111+ messages in thread

* Re: [PATCH v3] drm/i915: Track all held rpm wakerefs
  2019-01-08 12:22   ` [PATCH v3] " Chris Wilson
  2019-01-08 12:49     ` Mika Kuoppala
@ 2019-01-08 20:05     ` kbuild test robot
  1 sibling, 0 replies; 111+ messages in thread
From: kbuild test robot @ 2019-01-08 20:05 UTC (permalink / raw)
  To: Chris Wilson; +Cc: Jani Nikula, intel-gfx, kbuild-all

[-- Attachment #1: Type: text/plain, Size: 4518 bytes --]

Hi Chris,

Thank you for the patch! Yet something to improve:

[auto build test ERROR on drm-intel/for-linux-next]
[also build test ERROR on v5.0-rc1 next-20190108]
[if your patch is applied to the wrong git tree, please drop us a note to help improve the system]

url:    https://github.com/0day-ci/linux/commits/Chris-Wilson/drm-i915-Track-all-held-rpm-wakerefs/20190108-233439
base:   git://anongit.freedesktop.org/drm-intel for-linux-next
config: x86_64-randconfig-s5-01090236 (attached as .config)
compiler: gcc-7 (Debian 7.3.0-1) 7.3.0
reproduce:
        # save the attached .config to linux build tree
        make ARCH=x86_64 

All errors (new ones prefixed by >>):

   drivers/gpu/drm/i915/intel_runtime_pm.o: In function `__print_intel_runtime_pm_wakeref':
>> drivers/gpu/drm/i915/intel_runtime_pm.c:154: undefined reference to `depot_fetch_stack'
>> drivers/gpu/drm/i915/intel_runtime_pm.c:154: undefined reference to `depot_fetch_stack'
>> drivers/gpu/drm/i915/intel_runtime_pm.c:154: undefined reference to `depot_fetch_stack'
>> drivers/gpu/drm/i915/intel_runtime_pm.c:154: undefined reference to `depot_fetch_stack'
   drivers/gpu/drm/i915/intel_runtime_pm.o: In function `track_intel_runtime_pm_wakeref':
>> drivers/gpu/drm/i915/intel_runtime_pm.c:86: undefined reference to `depot_save_stack'

vim +154 drivers/gpu/drm/i915/intel_runtime_pm.c

    64	
    65	static noinline void
    66	track_intel_runtime_pm_wakeref(struct drm_i915_private *i915)
    67	{
    68		struct i915_runtime_pm *rpm = &i915->runtime_pm;
    69		unsigned long entries[STACKDEPTH];
    70		struct stack_trace trace = {
    71			.entries = entries,
    72			.max_entries = ARRAY_SIZE(entries),
    73			.skip = 0 /* gcc is ignoring noinline for tail calls? */
    74		};
    75		unsigned long flags;
    76		depot_stack_handle_t stack, *stacks;
    77	
    78		if (!HAS_RUNTIME_PM(i915))
    79			return;
    80	
    81		save_stack_trace(&trace);
    82		if (trace.nr_entries &&
    83		    trace.entries[trace.nr_entries - 1] == ULONG_MAX)
    84			trace.nr_entries--;
    85	
  > 86		stack = depot_save_stack(&trace, GFP_NOWAIT | __GFP_NOWARN);
    87		if (!stack)
    88			return;
    89	
    90		spin_lock_irqsave(&rpm->debug_lock, flags);
    91		stacks = krealloc(rpm->debug_owners,
    92				  (rpm->debug_count + 1) * sizeof(*stacks),
    93				  GFP_NOWAIT | __GFP_NOWARN);
    94		if (stacks) {
    95			stacks[rpm->debug_count++] = stack;
    96			rpm->debug_owners = stacks;
    97		}
    98		spin_unlock_irqrestore(&rpm->debug_lock, flags);
    99	}
   100	
   101	static void untrack_intel_runtime_pm_wakeref(struct drm_i915_private *i915)
   102	{
   103		struct i915_runtime_pm *rpm = &i915->runtime_pm;
   104		depot_stack_handle_t *stacks;
   105		unsigned long flags;
   106	
   107		spin_lock_irqsave(&rpm->debug_lock, flags);
   108		stacks = fetch_and_zero(&rpm->debug_owners);
   109		rpm->debug_count = 0;
   110		spin_unlock_irqrestore(&rpm->debug_lock, flags);
   111	
   112		kfree(stacks);
   113	}
   114	
   115	static int cmphandle(const void *_a, const void *_b)
   116	{
   117		const depot_stack_handle_t * const a = _a, * const b = _b;
   118	
   119		if (*a < *b)
   120			return -1;
   121		else if (*a > *b)
   122			return 1;
   123		else
   124			return 0;
   125	}
   126	
   127	static void __print_intel_runtime_pm_wakeref(struct drm_printer *p,
   128						     depot_stack_handle_t *stacks,
   129						     unsigned long count)
   130	{
   131		unsigned long entries[STACKDEPTH];
   132		unsigned long i;
   133		char *buf;
   134	
   135		drm_printf(p, "Wakeref count: %lu\n", count);
   136	
   137		buf = kmalloc(PAGE_SIZE, GFP_KERNEL);
   138		if (!buf)
   139			return;
   140	
   141		sort(stacks, count, sizeof(*stacks), cmphandle, NULL);
   142	
   143		for (i = 0; i < count; i++) {
   144			struct stack_trace trace = {
   145				.entries = entries,
   146				.max_entries = ARRAY_SIZE(entries),
   147			};
   148			depot_stack_handle_t stack = stacks[i];
   149			unsigned long rep;
   150	
   151			rep = 1;
   152			while (i + 1 < count && stacks[i + 1] == stack)
   153				rep++, i++;
 > 154			depot_fetch_stack(stack, &trace);
   155			snprint_stack_trace(buf, PAGE_SIZE, &trace, 2);
   156			drm_printf(p, "Wakeref x%lu taken at:\n%s", rep, buf);
   157		}
   158	
   159		kfree(buf);
   160	}
   161	

---
0-DAY kernel test infrastructure                Open Source Technology Center
https://lists.01.org/pipermail/kbuild-all                   Intel Corporation

[-- Attachment #2: .config.gz --]
[-- Type: application/gzip, Size: 35206 bytes --]

[-- Attachment #3: Type: text/plain, Size: 160 bytes --]

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 111+ messages in thread

* Re: [PATCH 04/46] drm/i915: Markup paired operations on wakerefs
  2019-01-08 16:41     ` Chris Wilson
@ 2019-01-09  9:23       ` Mika Kuoppala
  2019-01-09 11:51         ` Chris Wilson
  0 siblings, 1 reply; 111+ messages in thread
From: Mika Kuoppala @ 2019-01-09  9:23 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx; +Cc: Jani Nikula

Chris Wilson <chris@chris-wilson.co.uk> writes:

> Quoting Mika Kuoppala (2019-01-08 16:23:18)
>> > @@ -3965,7 +4014,7 @@ void intel_power_domains_init_hw(struct drm_i915_private *dev_priv, bool resume)
>> >  void intel_power_domains_fini_hw(struct drm_i915_private *dev_priv)
>> >  {
>> >       /* Keep the power well enabled, but cancel its rpm wakeref. */
>> > -     intel_runtime_pm_put(dev_priv);
>> > +     intel_runtime_pm_put_unchecked(dev_priv);
>> >  
>> >       /* Remove the refcount we took to keep power well support disabled. */
>> >       if (!i915_modparams.disable_power_well)
>> > @@ -4179,7 +4228,7 @@ static void intel_power_domains_verify_state(struct drm_i915_private *dev_priv)
>> >   * Any runtime pm reference obtained by this function must have a symmetric
>> >   * call to intel_runtime_pm_put() to release the reference again.
>> >   */
>> 
>> Need to update the documentation.
>
> No. You are expected to pair intel_runtime_pm_get with intel_runtime_pm_put.
> The _unchecked version is temporary and not expected to be used in new code.
> Once the dust has settled it will be gone.
>
> * Any runtime pm reference obtained by this function must have a symmetric
> * call to intel_runtime_pm_put() to release the reference again.
>
> is accurate.

Ok.
>
>> > -void intel_runtime_pm_get(struct drm_i915_private *i915)
>> > +intel_wakeref_t intel_runtime_pm_get(struct drm_i915_private *i915)
>> >  {
>> >       struct pci_dev *pdev = i915->drm.pdev;
>> >       struct device *kdev = &pdev->dev;
>> > @@ -4191,7 +4240,7 @@ void intel_runtime_pm_get(struct drm_i915_private *i915)
>> >       atomic_inc(&i915->runtime_pm.wakeref_count);
>> >       assert_rpm_wakelock_held(i915);
>> >  
>> > -     track_intel_runtime_pm_wakeref(i915);
>> > +     return track_intel_runtime_pm_wakeref(i915);
>> >  }
>> >  
>> >  /**
>> > @@ -4207,7 +4256,7 @@ void intel_runtime_pm_get(struct drm_i915_private *i915)
>> >   *
>> >   * Returns: True if the wakeref was acquired, or False otherwise.
>> 
>> For practical purposes this could still be the case but please update
>> the return value type.
>
> Still should be used as a boolean (true/false) though.

Agreed but this is documentation for function. It returns a wakeref.

>
>> >   */
>> > -bool intel_runtime_pm_get_if_in_use(struct drm_i915_private *i915)
>> > +intel_wakeref_t intel_runtime_pm_get_if_in_use(struct drm_i915_private *i915)
>> >  {
>> >       if (IS_ENABLED(CONFIG_PM)) {
>> >               struct pci_dev *pdev = i915->drm.pdev;
>> > @@ -4220,15 +4269,13 @@ bool intel_runtime_pm_get_if_in_use(struct drm_i915_private *i915)
>> >                * atm to the late/early system suspend/resume handlers.
>> >                */
>> >               if (pm_runtime_get_if_in_use(kdev) <= 0)
>> > -                     return false;
>> > +                     return 0;
>> >       }
>> >  
>> >       atomic_inc(&i915->runtime_pm.wakeref_count);
>> >       assert_rpm_wakelock_held(i915);
>> >  
>> > -     track_intel_runtime_pm_wakeref(i915);
>> > -
>> > -     return true;
>> > +     return track_intel_runtime_pm_wakeref(i915);
>> >  }
>> >  
>> >  /**
>> > @@ -4248,7 +4295,7 @@ bool intel_runtime_pm_get_if_in_use(struct drm_i915_private *i915)
>> >   * Any runtime pm reference obtained by this function must have a symmetric
>> >   * call to intel_runtime_pm_put() to release the reference again.
>> >   */
>> 
>> Document update needed here also.
>
> Nope.
>
>> > -void intel_runtime_pm_get_noresume(struct drm_i915_private *i915)
>> > +intel_wakeref_t intel_runtime_pm_get_noresume(struct drm_i915_private *i915)
>> >  {
>> >       struct pci_dev *pdev = i915->drm.pdev;
>> >       struct device *kdev = &pdev->dev;
>> > @@ -4258,7 +4305,7 @@ void intel_runtime_pm_get_noresume(struct drm_i915_private *i915)
>> >  
>> >       atomic_inc(&i915->runtime_pm.wakeref_count);
>> >  
>> > -     track_intel_runtime_pm_wakeref(i915);
>> > +     return track_intel_runtime_pm_wakeref(i915);
>> >  }
>> >  
>> >  /**
>> > @@ -4269,7 +4316,7 @@ void intel_runtime_pm_get_noresume(struct drm_i915_private *i915)
>> >   * intel_runtime_pm_get() and might power down the corresponding
>> >   * hardware block right away if this is the last reference.
>> >   */
>> 
>> Documentation part needs updating.
>
> I either don't get your point, or you missed the point of the wakeref
> tracking? :)

I should have been more specific. My concern was on documenting
the changing return values.
-Mika


> -Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 111+ messages in thread

* Re: [PATCH 05/46] drm/i915: Track GT wakeref
  2019-01-07 11:54 ` [PATCH 05/46] drm/i915: Track GT wakeref Chris Wilson
@ 2019-01-09  9:52   ` Mika Kuoppala
  0 siblings, 0 replies; 111+ messages in thread
From: Mika Kuoppala @ 2019-01-09  9:52 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx; +Cc: Jani Nikula

Chris Wilson <chris@chris-wilson.co.uk> writes:

> Record the wakeref used for keeping the device awake as the GPU is
> executing requests and be sure to cancel the tracking upon parking.
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Jani Nikula <jani.nikula@intel.com>

Reviewed-by: Mika Kuoppala <mika.kuoppala@linux.intel.com>

> ---
>  drivers/gpu/drm/i915/i915_drv.h |  2 +-
>  drivers/gpu/drm/i915/i915_gem.c | 11 +++++++----
>  2 files changed, 8 insertions(+), 5 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 464ff89d1464..a20bd2ec48de 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -1966,7 +1966,7 @@ struct drm_i915_private {
>  		 * In order to reduce the effect on performance, there
>  		 * is a slight delay before we do so.
>  		 */
> -		bool awake;
> +		intel_wakeref_t awake;
>  
>  		/**
>  		 * The number of times we have woken up.
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index 5ac32ea4c8fa..27f207cbabd9 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -139,6 +139,8 @@ int i915_mutex_lock_interruptible(struct drm_device *dev)
>  
>  static u32 __i915_gem_park(struct drm_i915_private *i915)
>  {
> +	intel_wakeref_t wakeref;
> +
>  	GEM_TRACE("\n");
>  
>  	lockdep_assert_held(&i915->drm.struct_mutex);
> @@ -169,14 +171,15 @@ static u32 __i915_gem_park(struct drm_i915_private *i915)
>  	i915_pmu_gt_parked(i915);
>  	i915_vma_parked(i915);
>  
> -	i915->gt.awake = false;
> +	wakeref = fetch_and_zero(&i915->gt.awake);
> +	GEM_BUG_ON(!wakeref);
>  
>  	if (INTEL_GEN(i915) >= 6)
>  		gen6_rps_idle(i915);
>  
>  	intel_display_power_put(i915, POWER_DOMAIN_GT_IRQ);
>  
> -	intel_runtime_pm_put_unchecked(i915);
> +	intel_runtime_pm_put(i915, wakeref);
>  
>  	return i915->gt.epoch;
>  }
> @@ -205,7 +208,8 @@ void i915_gem_unpark(struct drm_i915_private *i915)
>  	if (i915->gt.awake)
>  		return;
>  
> -	intel_runtime_pm_get_noresume(i915);
> +	i915->gt.awake = intel_runtime_pm_get_noresume(i915);
> +	GEM_BUG_ON(!i915->gt.awake);
>  
>  	/*
>  	 * It seems that the DMC likes to transition between the DC states a lot
> @@ -220,7 +224,6 @@ void i915_gem_unpark(struct drm_i915_private *i915)
>  	 */
>  	intel_display_power_get(i915, POWER_DOMAIN_GT_IRQ);
>  
> -	i915->gt.awake = true;
>  	if (unlikely(++i915->gt.epoch == 0)) /* keep 0 as invalid */
>  		i915->gt.epoch = 1;
>  
> -- 
> 2.20.1
>
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/intel-gfx
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 111+ messages in thread

* Re: [PATCH 06/46] drm/i915: Track the rpm wakerefs for error handling
  2019-01-07 11:54 ` [PATCH 06/46] drm/i915: Track the rpm wakerefs for error handling Chris Wilson
@ 2019-01-09 10:12   ` Mika Kuoppala
  0 siblings, 0 replies; 111+ messages in thread
From: Mika Kuoppala @ 2019-01-09 10:12 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx; +Cc: Jani Nikula

Chris Wilson <chris@chris-wilson.co.uk> writes:

> Keep hold of the local wakeref used in error handling, to cancel
> the tracking upon release so that leaks can be identified.
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Jani Nikula <jani.nikula@intel.com>

Reviewed-by: Mika Kuoppala <mika.kuoppala@linux.intel.com>

> ---
>  drivers/gpu/drm/i915/i915_irq.c | 5 +++--
>  1 file changed, 3 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
> index 72b799c5e8f6..3272bd02c3cf 100644
> --- a/drivers/gpu/drm/i915/i915_irq.c
> +++ b/drivers/gpu/drm/i915/i915_irq.c
> @@ -3292,6 +3292,7 @@ void i915_handle_error(struct drm_i915_private *dev_priv,
>  		       const char *fmt, ...)
>  {
>  	struct intel_engine_cs *engine;
> +	intel_wakeref_t wakeref;
>  	unsigned int tmp;
>  	char error_msg[80];
>  	char *msg = NULL;
> @@ -3313,7 +3314,7 @@ void i915_handle_error(struct drm_i915_private *dev_priv,
>  	 * isn't the case at least when we get here by doing a
>  	 * simulated reset via debugfs, so get an RPM reference.
>  	 */
> -	intel_runtime_pm_get(dev_priv);
> +	wakeref = intel_runtime_pm_get(dev_priv);
>  
>  	engine_mask &= INTEL_INFO(dev_priv)->ring_mask;
>  
> @@ -3375,7 +3376,7 @@ void i915_handle_error(struct drm_i915_private *dev_priv,
>  	wake_up_all(&dev_priv->gpu_error.reset_queue);
>  
>  out:
> -	intel_runtime_pm_put_unchecked(dev_priv);
> +	intel_runtime_pm_put(dev_priv, wakeref);
>  }
>  
>  /* Called from drm generic code, passed 'crtc' which
> -- 
> 2.20.1
>
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/intel-gfx
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 111+ messages in thread

* Re: [PATCH 07/46] drm/i915: Mark up sysfs with rpm wakeref tracking
  2019-01-07 11:54 ` [PATCH 07/46] drm/i915: Mark up sysfs with rpm wakeref tracking Chris Wilson
@ 2019-01-09 10:13   ` Mika Kuoppala
  0 siblings, 0 replies; 111+ messages in thread
From: Mika Kuoppala @ 2019-01-09 10:13 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx; +Cc: Jani Nikula

Chris Wilson <chris@chris-wilson.co.uk> writes:

> As sysfs has a simple pattern of taking a rpm wakeref around the user
> access, we can track the local reference and drop it as soon as
> possible.
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Jani Nikula <jani.nikula@intel.com>

Reviewed-by: Mika Kuoppala <mika.kuoppala@linux.intel.com>

> ---
>  drivers/gpu/drm/i915/i915_sysfs.c | 24 ++++++++++++++----------
>  1 file changed, 14 insertions(+), 10 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_sysfs.c b/drivers/gpu/drm/i915/i915_sysfs.c
> index 53c20e103d56..2cbbf165d179 100644
> --- a/drivers/gpu/drm/i915/i915_sysfs.c
> +++ b/drivers/gpu/drm/i915/i915_sysfs.c
> @@ -42,11 +42,12 @@ static inline struct drm_i915_private *kdev_minor_to_i915(struct device *kdev)
>  static u32 calc_residency(struct drm_i915_private *dev_priv,
>  			  i915_reg_t reg)
>  {
> +	intel_wakeref_t wakeref;
>  	u64 res;
>  
> -	intel_runtime_pm_get(dev_priv);
> +	wakeref = intel_runtime_pm_get(dev_priv);
>  	res = intel_rc6_residency_us(dev_priv, reg);
> -	intel_runtime_pm_put_unchecked(dev_priv);
> +	intel_runtime_pm_put(dev_priv, wakeref);
>  
>  	return DIV_ROUND_CLOSEST_ULL(res, 1000);
>  }
> @@ -258,9 +259,10 @@ static ssize_t gt_act_freq_mhz_show(struct device *kdev,
>  				    struct device_attribute *attr, char *buf)
>  {
>  	struct drm_i915_private *dev_priv = kdev_minor_to_i915(kdev);
> +	intel_wakeref_t wakeref;
>  	int ret;
>  
> -	intel_runtime_pm_get(dev_priv);
> +	wakeref = intel_runtime_pm_get(dev_priv);
>  
>  	mutex_lock(&dev_priv->pcu_lock);
>  	if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) {
> @@ -274,7 +276,7 @@ static ssize_t gt_act_freq_mhz_show(struct device *kdev,
>  	}
>  	mutex_unlock(&dev_priv->pcu_lock);
>  
> -	intel_runtime_pm_put_unchecked(dev_priv);
> +	intel_runtime_pm_put(dev_priv, wakeref);
>  
>  	return snprintf(buf, PAGE_SIZE, "%d\n", ret);
>  }
> @@ -354,6 +356,7 @@ static ssize_t gt_max_freq_mhz_store(struct device *kdev,
>  {
>  	struct drm_i915_private *dev_priv = kdev_minor_to_i915(kdev);
>  	struct intel_rps *rps = &dev_priv->gt_pm.rps;
> +	intel_wakeref_t wakeref;
>  	u32 val;
>  	ssize_t ret;
>  
> @@ -361,7 +364,7 @@ static ssize_t gt_max_freq_mhz_store(struct device *kdev,
>  	if (ret)
>  		return ret;
>  
> -	intel_runtime_pm_get(dev_priv);
> +	wakeref = intel_runtime_pm_get(dev_priv);
>  
>  	mutex_lock(&dev_priv->pcu_lock);
>  
> @@ -371,7 +374,7 @@ static ssize_t gt_max_freq_mhz_store(struct device *kdev,
>  	    val > rps->max_freq ||
>  	    val < rps->min_freq_softlimit) {
>  		mutex_unlock(&dev_priv->pcu_lock);
> -		intel_runtime_pm_put_unchecked(dev_priv);
> +		intel_runtime_pm_put(dev_priv, wakeref);
>  		return -EINVAL;
>  	}
>  
> @@ -392,7 +395,7 @@ static ssize_t gt_max_freq_mhz_store(struct device *kdev,
>  
>  	mutex_unlock(&dev_priv->pcu_lock);
>  
> -	intel_runtime_pm_put_unchecked(dev_priv);
> +	intel_runtime_pm_put(dev_priv, wakeref);
>  
>  	return ret ?: count;
>  }
> @@ -412,6 +415,7 @@ static ssize_t gt_min_freq_mhz_store(struct device *kdev,
>  {
>  	struct drm_i915_private *dev_priv = kdev_minor_to_i915(kdev);
>  	struct intel_rps *rps = &dev_priv->gt_pm.rps;
> +	intel_wakeref_t wakeref;
>  	u32 val;
>  	ssize_t ret;
>  
> @@ -419,7 +423,7 @@ static ssize_t gt_min_freq_mhz_store(struct device *kdev,
>  	if (ret)
>  		return ret;
>  
> -	intel_runtime_pm_get(dev_priv);
> +	wakeref = intel_runtime_pm_get(dev_priv);
>  
>  	mutex_lock(&dev_priv->pcu_lock);
>  
> @@ -429,7 +433,7 @@ static ssize_t gt_min_freq_mhz_store(struct device *kdev,
>  	    val > rps->max_freq ||
>  	    val > rps->max_freq_softlimit) {
>  		mutex_unlock(&dev_priv->pcu_lock);
> -		intel_runtime_pm_put_unchecked(dev_priv);
> +		intel_runtime_pm_put(dev_priv, wakeref);
>  		return -EINVAL;
>  	}
>  
> @@ -446,7 +450,7 @@ static ssize_t gt_min_freq_mhz_store(struct device *kdev,
>  
>  	mutex_unlock(&dev_priv->pcu_lock);
>  
> -	intel_runtime_pm_put_unchecked(dev_priv);
> +	intel_runtime_pm_put(dev_priv, wakeref);
>  
>  	return ret ?: count;
>  }
> -- 
> 2.20.1
>
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/intel-gfx
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 111+ messages in thread

* Re: [PATCH 08/46] drm/i915: Mark up debugfs with rpm wakeref tracking
  2019-01-07 11:54 ` [PATCH 08/46] drm/i915: Mark up debugfs " Chris Wilson
@ 2019-01-09 10:20   ` Mika Kuoppala
  2019-01-09 11:49     ` Chris Wilson
  0 siblings, 1 reply; 111+ messages in thread
From: Mika Kuoppala @ 2019-01-09 10:20 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx; +Cc: Jani Nikula

Chris Wilson <chris@chris-wilson.co.uk> writes:

> As debugfs has a simple pattern of taking a rpm wakeref around the user
> access, we can track the local reference and drop it as soon as
> possible.
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Jani Nikula <jani.nikula@intel.com>
> ---
>  drivers/gpu/drm/i915/i915_debugfs.c | 135 +++++++++++++++++-----------
>  1 file changed, 82 insertions(+), 53 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
> index 6b8da14f213b..d667b05e7ca4 100644
> --- a/drivers/gpu/drm/i915/i915_debugfs.c
> +++ b/drivers/gpu/drm/i915/i915_debugfs.c
> @@ -674,9 +674,10 @@ static int i915_interrupt_info(struct seq_file *m, void *data)
>  	struct drm_i915_private *dev_priv = node_to_i915(m->private);
>  	struct intel_engine_cs *engine;
>  	enum intel_engine_id id;
> +	intel_wakeref_t wakeref;
>  	int i, pipe;
>  
> -	intel_runtime_pm_get(dev_priv);
> +	wakeref = intel_runtime_pm_get(dev_priv);
>  
>  	if (IS_CHERRYVIEW(dev_priv)) {
>  		seq_printf(m, "Master Interrupt Control:\t%08x\n",
> @@ -877,7 +878,7 @@ static int i915_interrupt_info(struct seq_file *m, void *data)
>  		}
>  	}
>  
> -	intel_runtime_pm_put_unchecked(dev_priv);
> +	intel_runtime_pm_put(dev_priv, wakeref);
>  
>  	return 0;
>  }
> @@ -950,10 +951,11 @@ static int i915_gpu_info_open(struct inode *inode, struct file *file)
>  {
>  	struct drm_i915_private *i915 = inode->i_private;
>  	struct i915_gpu_state *gpu;
> +	intel_wakeref_t wakeref;
>  
> -	intel_runtime_pm_get(i915);
> +	wakeref = intel_runtime_pm_get(i915);
>  	gpu = i915_capture_gpu_state(i915);
> -	intel_runtime_pm_put_unchecked(i915);
> +	intel_runtime_pm_put(i915, wakeref);
>  	if (IS_ERR(gpu))
>  		return PTR_ERR(gpu);
>  
> @@ -1012,9 +1014,10 @@ static int i915_frequency_info(struct seq_file *m, void *unused)
>  {
>  	struct drm_i915_private *dev_priv = node_to_i915(m->private);
>  	struct intel_rps *rps = &dev_priv->gt_pm.rps;
> +	intel_wakeref_t wakeref;
>  	int ret = 0;
>  
> -	intel_runtime_pm_get(dev_priv);
> +	wakeref = intel_runtime_pm_get(dev_priv);
>  
>  	if (IS_GEN(dev_priv, 5)) {
>  		u16 rgvswctl = I915_READ16(MEMSWCTL);
> @@ -1226,7 +1229,7 @@ static int i915_frequency_info(struct seq_file *m, void *unused)
>  	seq_printf(m, "Max CD clock frequency: %d kHz\n", dev_priv->max_cdclk_freq);
>  	seq_printf(m, "Max pixel clock frequency: %d kHz\n", dev_priv->max_dotclk_freq);
>  
> -	intel_runtime_pm_put_unchecked(dev_priv);
> +	intel_runtime_pm_put(dev_priv, wakeref);
>  	return ret;
>  }
>  
> @@ -1265,6 +1268,7 @@ static int i915_hangcheck_info(struct seq_file *m, void *unused)
>  	u64 acthd[I915_NUM_ENGINES];
>  	u32 seqno[I915_NUM_ENGINES];
>  	struct intel_instdone instdone;
> +	intel_wakeref_t wakeref;
>  	enum intel_engine_id id;
>  
>  	if (test_bit(I915_WEDGED, &dev_priv->gpu_error.flags))
> @@ -1283,7 +1287,7 @@ static int i915_hangcheck_info(struct seq_file *m, void *unused)
>  		return 0;
>  	}
>  
> -	intel_runtime_pm_get(dev_priv);
> +	wakeref = intel_runtime_pm_get(dev_priv);
>  
>  	for_each_engine(engine, dev_priv, id) {
>  		acthd[id] = intel_engine_get_active_head(engine);
> @@ -1292,7 +1296,7 @@ static int i915_hangcheck_info(struct seq_file *m, void *unused)
>  
>  	intel_engine_get_instdone(dev_priv->engine[RCS], &instdone);
>  
> -	intel_runtime_pm_put_unchecked(dev_priv);
> +	intel_runtime_pm_put(dev_priv, wakeref);
>  
>  	if (timer_pending(&dev_priv->gpu_error.hangcheck_work.timer))
>  		seq_printf(m, "Hangcheck active, timer fires in %dms\n",
> @@ -1568,9 +1572,10 @@ static int gen6_drpc_info(struct seq_file *m)
>  static int i915_drpc_info(struct seq_file *m, void *unused)
>  {
>  	struct drm_i915_private *dev_priv = node_to_i915(m->private);
> +	intel_wakeref_t wakeref;
>  	int err;
>  
> -	intel_runtime_pm_get(dev_priv);
> +	wakeref = intel_runtime_pm_get(dev_priv);
>  
>  	if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv))
>  		err = vlv_drpc_info(m);
> @@ -1579,7 +1584,7 @@ static int i915_drpc_info(struct seq_file *m, void *unused)
>  	else
>  		err = ironlake_drpc_info(m);
>  
> -	intel_runtime_pm_put_unchecked(dev_priv);
> +	intel_runtime_pm_put(dev_priv, wakeref);
>  
>  	return err;
>  }
> @@ -1601,11 +1606,12 @@ static int i915_fbc_status(struct seq_file *m, void *unused)
>  {
>  	struct drm_i915_private *dev_priv = node_to_i915(m->private);
>  	struct intel_fbc *fbc = &dev_priv->fbc;
> +	intel_wakeref_t wakeref;
>  
>  	if (!HAS_FBC(dev_priv))
>  		return -ENODEV;
>  
> -	intel_runtime_pm_get(dev_priv);
> +	wakeref = intel_runtime_pm_get(dev_priv);
>  	mutex_lock(&fbc->lock);
>  
>  	if (intel_fbc_is_active(dev_priv))
> @@ -1632,7 +1638,7 @@ static int i915_fbc_status(struct seq_file *m, void *unused)
>  	}
>  
>  	mutex_unlock(&fbc->lock);
> -	intel_runtime_pm_put_unchecked(dev_priv);
> +	intel_runtime_pm_put(dev_priv, wakeref);
>  
>  	return 0;
>  }
> @@ -1677,11 +1683,12 @@ DEFINE_SIMPLE_ATTRIBUTE(i915_fbc_false_color_fops,
>  static int i915_ips_status(struct seq_file *m, void *unused)
>  {
>  	struct drm_i915_private *dev_priv = node_to_i915(m->private);
> +	intel_wakeref_t wakeref;
>  
>  	if (!HAS_IPS(dev_priv))
>  		return -ENODEV;
>  
> -	intel_runtime_pm_get(dev_priv);
> +	wakeref = intel_runtime_pm_get(dev_priv);
>  
>  	seq_printf(m, "Enabled by kernel parameter: %s\n",
>  		   yesno(i915_modparams.enable_ips));
> @@ -1695,7 +1702,7 @@ static int i915_ips_status(struct seq_file *m, void *unused)
>  			seq_puts(m, "Currently: disabled\n");
>  	}
>  
> -	intel_runtime_pm_put_unchecked(dev_priv);
> +	intel_runtime_pm_put(dev_priv, wakeref);
>  
>  	return 0;
>  }
> @@ -1703,9 +1710,10 @@ static int i915_ips_status(struct seq_file *m, void *unused)
>  static int i915_sr_status(struct seq_file *m, void *unused)
>  {
>  	struct drm_i915_private *dev_priv = node_to_i915(m->private);
> +	intel_wakeref_t wakeref;
>  	bool sr_enabled = false;
>  
> -	intel_runtime_pm_get(dev_priv);
> +	wakeref = intel_runtime_pm_get(dev_priv);
>  	intel_display_power_get(dev_priv, POWER_DOMAIN_INIT);
>  
>  	if (INTEL_GEN(dev_priv) >= 9)
> @@ -1723,7 +1731,7 @@ static int i915_sr_status(struct seq_file *m, void *unused)
>  		sr_enabled = I915_READ(FW_BLC_SELF_VLV) & FW_CSPWRDWNEN;
>  
>  	intel_display_power_put(dev_priv, POWER_DOMAIN_INIT);
> -	intel_runtime_pm_put_unchecked(dev_priv);
> +	intel_runtime_pm_put(dev_priv, wakeref);
>  
>  	seq_printf(m, "self-refresh: %s\n", enableddisabled(sr_enabled));
>  
> @@ -1735,29 +1743,30 @@ static int i915_emon_status(struct seq_file *m, void *unused)
>  	struct drm_i915_private *dev_priv = node_to_i915(m->private);
>  	struct drm_device *dev = &dev_priv->drm;
>  	unsigned long temp, chipset, gfx;
> +	intel_wakeref_t wakeref;
>  	int ret;
>  
>  	if (!IS_GEN(dev_priv, 5))
>  		return -ENODEV;
>  
> -	intel_runtime_pm_get(dev_priv);
> -
>  	ret = mutex_lock_interruptible(&dev->struct_mutex);
>  	if (ret)
>  		return ret;
>  
> +	wakeref = intel_runtime_pm_get(dev_priv);
> +
>  	temp = i915_mch_val(dev_priv);
>  	chipset = i915_chipset_val(dev_priv);
>  	gfx = i915_gfx_val(dev_priv);
>  	mutex_unlock(&dev->struct_mutex);
>  
> +	intel_runtime_pm_put(dev_priv, wakeref);
> +

I am a little surprised if this was the only callsite
for tighter scoping in this file.

Nitpick for symmetry: releasing the mutex should have been done
after.

Reviewed-by: Mika Kuoppala <mika.kuoppala@linux.intel.com>

>  	seq_printf(m, "GMCH temp: %ld\n", temp);
>  	seq_printf(m, "Chipset power: %ld\n", chipset);
>  	seq_printf(m, "GFX power: %ld\n", gfx);
>  	seq_printf(m, "Total power: %ld\n", chipset + gfx);
>  
> -	intel_runtime_pm_put_unchecked(dev_priv);
> -
>  	return 0;
>  }
>  
> @@ -1766,13 +1775,14 @@ static int i915_ring_freq_table(struct seq_file *m, void *unused)
>  	struct drm_i915_private *dev_priv = node_to_i915(m->private);
>  	struct intel_rps *rps = &dev_priv->gt_pm.rps;
>  	unsigned int max_gpu_freq, min_gpu_freq;
> +	intel_wakeref_t wakeref;
>  	int gpu_freq, ia_freq;
>  	int ret;
>  
>  	if (!HAS_LLC(dev_priv))
>  		return -ENODEV;
>  
> -	intel_runtime_pm_get(dev_priv);
> +	wakeref = intel_runtime_pm_get(dev_priv);
>  
>  	ret = mutex_lock_interruptible(&dev_priv->pcu_lock);
>  	if (ret)
> @@ -1805,7 +1815,7 @@ static int i915_ring_freq_table(struct seq_file *m, void *unused)
>  	mutex_unlock(&dev_priv->pcu_lock);
>  
>  out:
> -	intel_runtime_pm_put_unchecked(dev_priv);
> +	intel_runtime_pm_put(dev_priv, wakeref);
>  	return ret;
>  }
>  
> @@ -1978,8 +1988,9 @@ static const char *swizzle_string(unsigned swizzle)
>  static int i915_swizzle_info(struct seq_file *m, void *data)
>  {
>  	struct drm_i915_private *dev_priv = node_to_i915(m->private);
> +	intel_wakeref_t wakeref;
>  
> -	intel_runtime_pm_get(dev_priv);
> +	wakeref = intel_runtime_pm_get(dev_priv);
>  
>  	seq_printf(m, "bit6 swizzle for X-tiling = %s\n",
>  		   swizzle_string(dev_priv->mm.bit_6_swizzle_x));
> @@ -2017,7 +2028,7 @@ static int i915_swizzle_info(struct seq_file *m, void *data)
>  	if (dev_priv->quirks & QUIRK_PIN_SWIZZLED_PAGES)
>  		seq_puts(m, "L-shaped memory detected\n");
>  
> -	intel_runtime_pm_put_unchecked(dev_priv);
> +	intel_runtime_pm_put(dev_priv, wakeref);
>  
>  	return 0;
>  }
> @@ -2054,9 +2065,11 @@ static int i915_rps_boost_info(struct seq_file *m, void *data)
>  	struct drm_device *dev = &dev_priv->drm;
>  	struct intel_rps *rps = &dev_priv->gt_pm.rps;
>  	u32 act_freq = rps->cur_freq;
> +	intel_wakeref_t wakeref;
>  	struct drm_file *file;
>  
> -	if (intel_runtime_pm_get_if_in_use(dev_priv)) {
> +	wakeref = intel_runtime_pm_get_if_in_use(dev_priv);
> +	if (wakeref) {
>  		if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) {
>  			mutex_lock(&dev_priv->pcu_lock);
>  			act_freq = vlv_punit_read(dev_priv,
> @@ -2067,7 +2080,7 @@ static int i915_rps_boost_info(struct seq_file *m, void *data)
>  			act_freq = intel_get_cagf(dev_priv,
>  						  I915_READ(GEN6_RPSTAT1));
>  		}
> -		intel_runtime_pm_put_unchecked(dev_priv);
> +		intel_runtime_pm_put(dev_priv, wakeref);
>  	}
>  
>  	seq_printf(m, "RPS enabled? %d\n", rps->enabled);
> @@ -2150,6 +2163,7 @@ static int i915_llc(struct seq_file *m, void *data)
>  static int i915_huc_load_status_info(struct seq_file *m, void *data)
>  {
>  	struct drm_i915_private *dev_priv = node_to_i915(m->private);
> +	intel_wakeref_t wakeref;
>  	struct drm_printer p;
>  
>  	if (!HAS_HUC(dev_priv))
> @@ -2158,9 +2172,9 @@ static int i915_huc_load_status_info(struct seq_file *m, void *data)
>  	p = drm_seq_file_printer(m);
>  	intel_uc_fw_dump(&dev_priv->huc.fw, &p);
>  
> -	intel_runtime_pm_get(dev_priv);
> +	wakeref = intel_runtime_pm_get(dev_priv);
>  	seq_printf(m, "\nHuC status 0x%08x:\n", I915_READ(HUC_STATUS2));
> -	intel_runtime_pm_put_unchecked(dev_priv);
> +	intel_runtime_pm_put(dev_priv, wakeref);
>  
>  	return 0;
>  }
> @@ -2168,6 +2182,7 @@ static int i915_huc_load_status_info(struct seq_file *m, void *data)
>  static int i915_guc_load_status_info(struct seq_file *m, void *data)
>  {
>  	struct drm_i915_private *dev_priv = node_to_i915(m->private);
> +	intel_wakeref_t wakeref;
>  	struct drm_printer p;
>  	u32 tmp, i;
>  
> @@ -2177,7 +2192,7 @@ static int i915_guc_load_status_info(struct seq_file *m, void *data)
>  	p = drm_seq_file_printer(m);
>  	intel_uc_fw_dump(&dev_priv->guc.fw, &p);
>  
> -	intel_runtime_pm_get(dev_priv);
> +	wakeref = intel_runtime_pm_get(dev_priv);
>  
>  	tmp = I915_READ(GUC_STATUS);
>  
> @@ -2192,7 +2207,7 @@ static int i915_guc_load_status_info(struct seq_file *m, void *data)
>  	for (i = 0; i < 16; i++)
>  		seq_printf(m, "\t%2d: \t0x%x\n", i, I915_READ(SOFT_SCRATCH(i)));
>  
> -	intel_runtime_pm_put_unchecked(dev_priv);
> +	intel_runtime_pm_put(dev_priv, wakeref);
>  
>  	return 0;
>  }
> @@ -2550,6 +2565,7 @@ psr_source_status(struct drm_i915_private *dev_priv, struct seq_file *m)
>  static int i915_edp_psr_status(struct seq_file *m, void *data)
>  {
>  	struct drm_i915_private *dev_priv = node_to_i915(m->private);
> +	intel_wakeref_t wakeref;
>  	u32 psrperf = 0;
>  	bool enabled = false;
>  	bool sink_support;
> @@ -2562,7 +2578,7 @@ static int i915_edp_psr_status(struct seq_file *m, void *data)
>  	if (!sink_support)
>  		return 0;
>  
> -	intel_runtime_pm_get(dev_priv);
> +	wakeref = intel_runtime_pm_get(dev_priv);
>  
>  	mutex_lock(&dev_priv->psr.lock);
>  	seq_printf(m, "PSR mode: %s\n",
> @@ -2601,7 +2617,7 @@ static int i915_edp_psr_status(struct seq_file *m, void *data)
>  			   dev_priv->psr.last_exit);
>  	}
>  
> -	intel_runtime_pm_put_unchecked(dev_priv);
> +	intel_runtime_pm_put(dev_priv, wakeref);
>  	return 0;
>  }
>  
> @@ -2610,6 +2626,7 @@ i915_edp_psr_debug_set(void *data, u64 val)
>  {
>  	struct drm_i915_private *dev_priv = data;
>  	struct drm_modeset_acquire_ctx ctx;
> +	intel_wakeref_t wakeref;
>  	int ret;
>  
>  	if (!CAN_PSR(dev_priv))
> @@ -2617,7 +2634,7 @@ i915_edp_psr_debug_set(void *data, u64 val)
>  
>  	DRM_DEBUG_KMS("Setting PSR debug to %llx\n", val);
>  
> -	intel_runtime_pm_get(dev_priv);
> +	wakeref = intel_runtime_pm_get(dev_priv);
>  
>  	drm_modeset_acquire_init(&ctx, DRM_MODESET_ACQUIRE_INTERRUPTIBLE);
>  
> @@ -2632,7 +2649,7 @@ i915_edp_psr_debug_set(void *data, u64 val)
>  	drm_modeset_drop_locks(&ctx);
>  	drm_modeset_acquire_fini(&ctx);
>  
> -	intel_runtime_pm_put_unchecked(dev_priv);
> +	intel_runtime_pm_put(dev_priv, wakeref);
>  
>  	return ret;
>  }
> @@ -2657,15 +2674,16 @@ static int i915_energy_uJ(struct seq_file *m, void *data)
>  {
>  	struct drm_i915_private *dev_priv = node_to_i915(m->private);
>  	unsigned long long power;
> +	intel_wakeref_t wakeref;
>  	u32 units;
>  
>  	if (INTEL_GEN(dev_priv) < 6)
>  		return -ENODEV;
>  
> -	intel_runtime_pm_get(dev_priv);
> +	wakeref = intel_runtime_pm_get(dev_priv);
>  
>  	if (rdmsrl_safe(MSR_RAPL_POWER_UNIT, &power)) {
> -		intel_runtime_pm_put_unchecked(dev_priv);
> +		intel_runtime_pm_put(dev_priv, wakeref);
>  		return -ENODEV;
>  	}
>  
> @@ -2673,7 +2691,7 @@ static int i915_energy_uJ(struct seq_file *m, void *data)
>  	power = I915_READ(MCH_SECP_NRG_STTS);
>  	power = (1000000 * power) >> units; /* convert to uJ */
>  
> -	intel_runtime_pm_put_unchecked(dev_priv);
> +	intel_runtime_pm_put(dev_priv, wakeref);
>  
>  	seq_printf(m, "%llu", power);
>  
> @@ -2742,6 +2760,7 @@ static int i915_power_domain_info(struct seq_file *m, void *unused)
>  static int i915_dmc_info(struct seq_file *m, void *unused)
>  {
>  	struct drm_i915_private *dev_priv = node_to_i915(m->private);
> +	intel_wakeref_t wakeref;
>  	struct intel_csr *csr;
>  
>  	if (!HAS_CSR(dev_priv))
> @@ -2749,7 +2768,7 @@ static int i915_dmc_info(struct seq_file *m, void *unused)
>  
>  	csr = &dev_priv->csr;
>  
> -	intel_runtime_pm_get(dev_priv);
> +	wakeref = intel_runtime_pm_get(dev_priv);
>  
>  	seq_printf(m, "fw loaded: %s\n", yesno(csr->dmc_payload != NULL));
>  	seq_printf(m, "path: %s\n", csr->fw_path);
> @@ -2775,7 +2794,7 @@ static int i915_dmc_info(struct seq_file *m, void *unused)
>  	seq_printf(m, "ssp base: 0x%08x\n", I915_READ(CSR_SSP_BASE));
>  	seq_printf(m, "htp: 0x%08x\n", I915_READ(CSR_HTP_SKL));
>  
> -	intel_runtime_pm_put_unchecked(dev_priv);
> +	intel_runtime_pm_put(dev_priv, wakeref);
>  
>  	return 0;
>  }
> @@ -3065,8 +3084,10 @@ static int i915_display_info(struct seq_file *m, void *unused)
>  	struct intel_crtc *crtc;
>  	struct drm_connector *connector;
>  	struct drm_connector_list_iter conn_iter;
> +	intel_wakeref_t wakeref;
> +
> +	wakeref = intel_runtime_pm_get(dev_priv);
>  
> -	intel_runtime_pm_get(dev_priv);
>  	seq_printf(m, "CRTC info\n");
>  	seq_printf(m, "---------\n");
>  	for_each_intel_crtc(dev, crtc) {
> @@ -3114,7 +3135,7 @@ static int i915_display_info(struct seq_file *m, void *unused)
>  	drm_connector_list_iter_end(&conn_iter);
>  	mutex_unlock(&dev->mode_config.mutex);
>  
> -	intel_runtime_pm_put_unchecked(dev_priv);
> +	intel_runtime_pm_put(dev_priv, wakeref);
>  
>  	return 0;
>  }
> @@ -3123,10 +3144,11 @@ static int i915_engine_info(struct seq_file *m, void *unused)
>  {
>  	struct drm_i915_private *dev_priv = node_to_i915(m->private);
>  	struct intel_engine_cs *engine;
> +	intel_wakeref_t wakeref;
>  	enum intel_engine_id id;
>  	struct drm_printer p;
>  
> -	intel_runtime_pm_get(dev_priv);
> +	wakeref = intel_runtime_pm_get(dev_priv);
>  
>  	seq_printf(m, "GT awake? %s (epoch %u)\n",
>  		   yesno(dev_priv->gt.awake), dev_priv->gt.epoch);
> @@ -3139,7 +3161,7 @@ static int i915_engine_info(struct seq_file *m, void *unused)
>  	for_each_engine(engine, dev_priv, id)
>  		intel_engine_dump(engine, &p, "%s\n", engine->name);
>  
> -	intel_runtime_pm_put_unchecked(dev_priv);
> +	intel_runtime_pm_put(dev_priv, wakeref);
>  
>  	return 0;
>  }
> @@ -3252,6 +3274,7 @@ static ssize_t i915_ipc_status_write(struct file *file, const char __user *ubuf,
>  {
>  	struct seq_file *m = file->private_data;
>  	struct drm_i915_private *dev_priv = m->private;
> +	intel_wakeref_t wakeref;
>  	int ret;
>  	bool enable;
>  
> @@ -3259,13 +3282,15 @@ static ssize_t i915_ipc_status_write(struct file *file, const char __user *ubuf,
>  	if (ret < 0)
>  		return ret;
>  
> -	intel_runtime_pm_get(dev_priv);
> +	wakeref = intel_runtime_pm_get(dev_priv);
> +
>  	if (!dev_priv->ipc_enabled && enable)
>  		DRM_INFO("Enabling IPC: WM will be proper only after next commit\n");
>  	dev_priv->wm.distrust_bios_wm = true;
>  	dev_priv->ipc_enabled = enable;
>  	intel_enable_ipc(dev_priv);
> -	intel_runtime_pm_put_unchecked(dev_priv);
> +
> +	intel_runtime_pm_put(dev_priv, wakeref);
>  
>  	return len;
>  }
> @@ -4031,11 +4056,12 @@ static int
>  i915_drop_caches_set(void *data, u64 val)
>  {
>  	struct drm_i915_private *i915 = data;
> +	intel_wakeref_t wakeref;
>  	int ret = 0;
>  
>  	DRM_DEBUG("Dropping caches: 0x%08llx [0x%08llx]\n",
>  		  val, val & DROP_ALL);
> -	intel_runtime_pm_get(i915);
> +	wakeref = intel_runtime_pm_get(i915);
>  
>  	if (val & DROP_RESET_ACTIVE && !intel_engines_are_idle(i915))
>  		i915_gem_set_wedged(i915);
> @@ -4090,7 +4116,7 @@ i915_drop_caches_set(void *data, u64 val)
>  		i915_gem_drain_freed_objects(i915);
>  
>  out:
> -	intel_runtime_pm_put_unchecked(i915);
> +	intel_runtime_pm_put(i915, wakeref);
>  
>  	return ret;
>  }
> @@ -4103,16 +4129,17 @@ static int
>  i915_cache_sharing_get(void *data, u64 *val)
>  {
>  	struct drm_i915_private *dev_priv = data;
> +	intel_wakeref_t wakeref;
>  	u32 snpcr;
>  
>  	if (!(IS_GEN_RANGE(dev_priv, 6, 7)))
>  		return -ENODEV;
>  
> -	intel_runtime_pm_get(dev_priv);
> +	wakeref = intel_runtime_pm_get(dev_priv);
>  
>  	snpcr = I915_READ(GEN6_MBCUNIT_SNPCR);
>  
> -	intel_runtime_pm_put_unchecked(dev_priv);
> +	intel_runtime_pm_put(dev_priv, wakeref);
>  
>  	*val = (snpcr & GEN6_MBC_SNPCR_MASK) >> GEN6_MBC_SNPCR_SHIFT;
>  
> @@ -4123,6 +4150,7 @@ static int
>  i915_cache_sharing_set(void *data, u64 val)
>  {
>  	struct drm_i915_private *dev_priv = data;
> +	intel_wakeref_t wakeref;
>  	u32 snpcr;
>  
>  	if (!(IS_GEN_RANGE(dev_priv, 6, 7)))
> @@ -4131,7 +4159,7 @@ i915_cache_sharing_set(void *data, u64 val)
>  	if (val > 3)
>  		return -EINVAL;
>  
> -	intel_runtime_pm_get(dev_priv);
> +	wakeref = intel_runtime_pm_get(dev_priv);
>  	DRM_DEBUG_DRIVER("Manually setting uncore sharing to %llu\n", val);
>  
>  	/* Update the cache sharing policy here as well */
> @@ -4140,7 +4168,7 @@ i915_cache_sharing_set(void *data, u64 val)
>  	snpcr |= (val << GEN6_MBC_SNPCR_SHIFT);
>  	I915_WRITE(GEN6_MBCUNIT_SNPCR, snpcr);
>  
> -	intel_runtime_pm_put_unchecked(dev_priv);
> +	intel_runtime_pm_put(dev_priv, wakeref);
>  	return 0;
>  }
>  
> @@ -4362,6 +4390,7 @@ static int i915_sseu_status(struct seq_file *m, void *unused)
>  {
>  	struct drm_i915_private *dev_priv = node_to_i915(m->private);
>  	struct sseu_dev_info sseu;
> +	intel_wakeref_t wakeref;
>  
>  	if (INTEL_GEN(dev_priv) < 8)
>  		return -ENODEV;
> @@ -4376,7 +4405,7 @@ static int i915_sseu_status(struct seq_file *m, void *unused)
>  	sseu.max_eus_per_subslice =
>  		RUNTIME_INFO(dev_priv)->sseu.max_eus_per_subslice;
>  
> -	intel_runtime_pm_get(dev_priv);
> +	wakeref = intel_runtime_pm_get(dev_priv);
>  
>  	if (IS_CHERRYVIEW(dev_priv)) {
>  		cherryview_sseu_device_status(dev_priv, &sseu);
> @@ -4388,7 +4417,7 @@ static int i915_sseu_status(struct seq_file *m, void *unused)
>  		gen10_sseu_device_status(dev_priv, &sseu);
>  	}
>  
> -	intel_runtime_pm_put_unchecked(dev_priv);
> +	intel_runtime_pm_put(dev_priv, wakeref);
>  
>  	i915_print_sseu_info(m, false, &sseu);
>  
> -- 
> 2.20.1
>
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/intel-gfx
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 111+ messages in thread

* Re: [PATCH 09/46] drm/i915/perf: Track the rpm wakeref
  2019-01-07 11:54 ` [PATCH 09/46] drm/i915/perf: Track the rpm wakeref Chris Wilson
@ 2019-01-09 10:30   ` Mika Kuoppala
  2019-01-09 11:45     ` Chris Wilson
  0 siblings, 1 reply; 111+ messages in thread
From: Mika Kuoppala @ 2019-01-09 10:30 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx; +Cc: Jani Nikula

Chris Wilson <chris@chris-wilson.co.uk> writes:

> Keep track of our wakeref used to keep the device awake so we can catch
> any leak.
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Jani Nikula <jani.nikula@intel.com>
> ---
>  drivers/gpu/drm/i915/i915_drv.h  |  2 ++
>  drivers/gpu/drm/i915/i915_perf.c | 10 +++++-----
>  2 files changed, 7 insertions(+), 5 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index a20bd2ec48de..bf25ae92f5de 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -1320,6 +1320,8 @@ struct i915_perf_stream {
>  	 */
>  	struct list_head link;
>  
> +	intel_wakeref_t wakeref;
> +
>  	/**
>  	 * @sample_flags: Flags representing the `DRM_I915_PERF_PROP_SAMPLE_*`
>  	 * properties given when opening a stream, representing the contents
> diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
> index e4dfd1477c78..b0cbad2e83c5 100644
> --- a/drivers/gpu/drm/i915/i915_perf.c
> +++ b/drivers/gpu/drm/i915/i915_perf.c
> @@ -1364,14 +1364,14 @@ static void i915_oa_stream_destroy(struct i915_perf_stream *stream)
>  
>  	free_oa_buffer(dev_priv);
>  
> +	put_oa_config(dev_priv, stream->oa_config);
> +

Hmm you wanted to put this inside the wakeref. But
I fail to see the reason.
-Mika

>  	intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
> -	intel_runtime_pm_put_unchecked(dev_priv);
> +	intel_runtime_pm_put(dev_priv, stream->wakeref);
>  
>  	if (stream->ctx)
>  		oa_put_render_ctx_id(stream);
>  
> -	put_oa_config(dev_priv, stream->oa_config);
> -
>  	if (dev_priv->perf.oa.spurious_report_rs.missed) {
>  		DRM_NOTE("%d spurious OA report notices suppressed due to ratelimiting\n",
>  			 dev_priv->perf.oa.spurious_report_rs.missed);
> @@ -2087,7 +2087,7 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream,
>  	 *   In our case we are expecting that taking pm + FORCEWAKE
>  	 *   references will effectively disable RC6.
>  	 */
> -	intel_runtime_pm_get(dev_priv);
> +	stream->wakeref = intel_runtime_pm_get(dev_priv);
>  	intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
>  
>  	ret = alloc_oa_buffer(dev_priv);
> @@ -2123,7 +2123,7 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream,
>  	put_oa_config(dev_priv, stream->oa_config);
>  
>  	intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
> -	intel_runtime_pm_put_unchecked(dev_priv);
> +	intel_runtime_pm_put(dev_priv, stream->wakeref);
>  
>  err_config:
>  	if (stream->ctx)
> -- 
> 2.20.1
>
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/intel-gfx
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 111+ messages in thread

* Re: [PATCH 10/46] drm/i915/pmu: Track rpm wakeref
  2019-01-07 11:54 ` [PATCH 10/46] drm/i915/pmu: Track " Chris Wilson
@ 2019-01-09 10:37   ` Mika Kuoppala
  0 siblings, 0 replies; 111+ messages in thread
From: Mika Kuoppala @ 2019-01-09 10:37 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx; +Cc: Jani Nikula

Chris Wilson <chris@chris-wilson.co.uk> writes:

> Track the wakeref used for temporary access to the device, and discard
> it upon release so that leaks can be identified.
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Jani Nikula <jani.nikula@intel.com>

Reviewed-by: Mika Kuoppala <mika.kuoppala@linux.intel.com>

> ---
>  drivers/gpu/drm/i915/i915_pmu.c | 26 +++++++++++++++++---------
>  1 file changed, 17 insertions(+), 9 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c
> index c99fcfce79d5..3d43fc9dd25d 100644
> --- a/drivers/gpu/drm/i915/i915_pmu.c
> +++ b/drivers/gpu/drm/i915/i915_pmu.c
> @@ -167,6 +167,7 @@ engines_sample(struct drm_i915_private *dev_priv, unsigned int period_ns)
>  {
>  	struct intel_engine_cs *engine;
>  	enum intel_engine_id id;
> +	intel_wakeref_t wakeref;
>  	bool fw = false;
>  
>  	if ((dev_priv->pmu.enable & ENGINE_SAMPLE_MASK) == 0)
> @@ -175,7 +176,8 @@ engines_sample(struct drm_i915_private *dev_priv, unsigned int period_ns)
>  	if (!dev_priv->gt.awake)
>  		return;
>  
> -	if (!intel_runtime_pm_get_if_in_use(dev_priv))
> +	wakeref = intel_runtime_pm_get_if_in_use(dev_priv);
> +	if (!wakeref)
>  		return;
>  
>  	for_each_engine(engine, dev_priv, id) {
> @@ -210,7 +212,7 @@ engines_sample(struct drm_i915_private *dev_priv, unsigned int period_ns)
>  	if (fw)
>  		intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
>  
> -	intel_runtime_pm_put_unchecked(dev_priv);
> +	intel_runtime_pm_put(dev_priv, wakeref);
>  }
>  
>  static void
> @@ -227,11 +229,15 @@ frequency_sample(struct drm_i915_private *dev_priv, unsigned int period_ns)
>  		u32 val;
>  
>  		val = dev_priv->gt_pm.rps.cur_freq;
> -		if (dev_priv->gt.awake &&
> -		    intel_runtime_pm_get_if_in_use(dev_priv)) {
> -			val = intel_get_cagf(dev_priv,
> -					     I915_READ_NOTRACE(GEN6_RPSTAT1));
> -			intel_runtime_pm_put_unchecked(dev_priv);
> +		if (dev_priv->gt.awake) {
> +			intel_wakeref_t wakeref =
> +				intel_runtime_pm_get_if_in_use(dev_priv);
> +
> +			if (wakeref) {
> +				val = intel_get_cagf(dev_priv,
> +						     I915_READ_NOTRACE(GEN6_RPSTAT1));
> +				intel_runtime_pm_put(dev_priv, wakeref);
> +			}
>  		}
>  
>  		add_sample_mult(&dev_priv->pmu.sample[__I915_SAMPLE_FREQ_ACT],
> @@ -443,12 +449,14 @@ static u64 __get_rc6(struct drm_i915_private *i915)
>  static u64 get_rc6(struct drm_i915_private *i915)
>  {
>  #if IS_ENABLED(CONFIG_PM)
> +	intel_wakeref_t wakeref;
>  	unsigned long flags;
>  	u64 val;
>  
> -	if (intel_runtime_pm_get_if_in_use(i915)) {
> +	wakeref = intel_runtime_pm_get_if_in_use(i915);
> +	if (wakeref) {
>  		val = __get_rc6(i915);
> -		intel_runtime_pm_put_unchecked(i915);
> +		intel_runtime_pm_put(i915, wakeref);
>  
>  		/*
>  		 * If we are coming back from being runtime suspended we must
> -- 
> 2.20.1
>
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/intel-gfx
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 111+ messages in thread

* Re: [PATCH 11/46] drm/i915/guc: Track the rpm wakeref
  2019-01-07 11:54 ` [PATCH 11/46] drm/i915/guc: Track the " Chris Wilson
@ 2019-01-09 10:53   ` Mika Kuoppala
  0 siblings, 0 replies; 111+ messages in thread
From: Mika Kuoppala @ 2019-01-09 10:53 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx; +Cc: Jani Nikula

Chris Wilson <chris@chris-wilson.co.uk> writes:

> Keep track of our acquired wakeref for interacting with the guc, so that
> we can cancel it upon release and so clearly identify leaks.
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Jani Nikula <jani.nikula@intel.com>

Reviewed-by: Mika Kuoppala <mika.kuoppala@linux.intel.com>

> ---
>  drivers/gpu/drm/i915/intel_guc_log.c | 15 +++++++++------
>  drivers/gpu/drm/i915/intel_huc.c     |  5 +++--
>  2 files changed, 12 insertions(+), 8 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/intel_guc_log.c b/drivers/gpu/drm/i915/intel_guc_log.c
> index 1b1581a42aa1..20c0b36d748e 100644
> --- a/drivers/gpu/drm/i915/intel_guc_log.c
> +++ b/drivers/gpu/drm/i915/intel_guc_log.c
> @@ -436,6 +436,7 @@ static void guc_log_capture_logs(struct intel_guc_log *log)
>  {
>  	struct intel_guc *guc = log_to_guc(log);
>  	struct drm_i915_private *dev_priv = guc_to_i915(guc);
> +	intel_wakeref_t wakeref;
>  
>  	guc_read_update_log_buffer(log);
>  
> @@ -443,9 +444,9 @@ static void guc_log_capture_logs(struct intel_guc_log *log)
>  	 * Generally device is expected to be active only at this
>  	 * time, so get/put should be really quick.
>  	 */
> -	intel_runtime_pm_get(dev_priv);
> +	wakeref = intel_runtime_pm_get(dev_priv);
>  	guc_action_flush_log_complete(guc);
> -	intel_runtime_pm_put_unchecked(dev_priv);
> +	intel_runtime_pm_put(dev_priv, wakeref);
>  }
>  
>  int intel_guc_log_create(struct intel_guc_log *log)
> @@ -505,6 +506,7 @@ int intel_guc_log_set_level(struct intel_guc_log *log, u32 level)
>  {
>  	struct intel_guc *guc = log_to_guc(log);
>  	struct drm_i915_private *dev_priv = guc_to_i915(guc);
> +	intel_wakeref_t wakeref;
>  	int ret;
>  
>  	BUILD_BUG_ON(GUC_LOG_VERBOSITY_MIN != 0);
> @@ -524,11 +526,11 @@ int intel_guc_log_set_level(struct intel_guc_log *log, u32 level)
>  		goto out_unlock;
>  	}
>  
> -	intel_runtime_pm_get(dev_priv);
> +	wakeref = intel_runtime_pm_get(dev_priv);
>  	ret = guc_action_control_log(guc, GUC_LOG_LEVEL_IS_VERBOSE(level),
>  				     GUC_LOG_LEVEL_IS_ENABLED(level),
>  				     GUC_LOG_LEVEL_TO_VERBOSITY(level));
> -	intel_runtime_pm_put_unchecked(dev_priv);
> +	intel_runtime_pm_put(dev_priv, wakeref);
>  	if (ret) {
>  		DRM_DEBUG_DRIVER("guc_log_control action failed %d\n", ret);
>  		goto out_unlock;
> @@ -601,6 +603,7 @@ void intel_guc_log_relay_flush(struct intel_guc_log *log)
>  {
>  	struct intel_guc *guc = log_to_guc(log);
>  	struct drm_i915_private *i915 = guc_to_i915(guc);
> +	intel_wakeref_t wakeref;
>  
>  	/*
>  	 * Before initiating the forceful flush, wait for any pending/ongoing
> @@ -608,9 +611,9 @@ void intel_guc_log_relay_flush(struct intel_guc_log *log)
>  	 */
>  	flush_work(&log->relay.flush_work);
>  
> -	intel_runtime_pm_get(i915);
> +	wakeref = intel_runtime_pm_get(i915);
>  	guc_action_flush_log(guc);
> -	intel_runtime_pm_put_unchecked(i915);
> +	intel_runtime_pm_put(i915, wakeref);
>  
>  	/* GuC would have updated log buffer by now, so capture it */
>  	guc_log_capture_logs(log);
> diff --git a/drivers/gpu/drm/i915/intel_huc.c b/drivers/gpu/drm/i915/intel_huc.c
> index c2b076e9bada..3e8c18b6a42d 100644
> --- a/drivers/gpu/drm/i915/intel_huc.c
> +++ b/drivers/gpu/drm/i915/intel_huc.c
> @@ -115,14 +115,15 @@ int intel_huc_auth(struct intel_huc *huc)
>  int intel_huc_check_status(struct intel_huc *huc)
>  {
>  	struct drm_i915_private *dev_priv = huc_to_i915(huc);
> +	intel_wakeref_t wakeref;
>  	bool status;
>  
>  	if (!HAS_HUC(dev_priv))
>  		return -ENODEV;
>  
> -	intel_runtime_pm_get(dev_priv);
> +	wakeref = intel_runtime_pm_get(dev_priv);
>  	status = I915_READ(HUC_STATUS2) & HUC_FW_VERIFIED;
> -	intel_runtime_pm_put_unchecked(dev_priv);
> +	intel_runtime_pm_put(dev_priv, wakeref);
>  
>  	return status;
>  }
> -- 
> 2.20.1
>
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/intel-gfx
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 111+ messages in thread

* Re: [PATCH 12/46] drm/i915/gem: Track the rpm wakerefs
  2019-01-07 11:54 ` [PATCH 12/46] drm/i915/gem: Track the rpm wakerefs Chris Wilson
@ 2019-01-09 11:16   ` Mika Kuoppala
  2019-01-09 23:45     ` John Harrison
  0 siblings, 1 reply; 111+ messages in thread
From: Mika Kuoppala @ 2019-01-09 11:16 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx; +Cc: Jani Nikula

Chris Wilson <chris@chris-wilson.co.uk> writes:

> Keep track of the temporary rpm wakerefs used for user access to the
> device, so that we can cancel them upon release and clearly identify any
> leaks.
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Jani Nikula <jani.nikula@intel.com>
> ---
>  drivers/gpu/drm/i915/i915_gem.c            | 47 +++++++++++++---------
>  drivers/gpu/drm/i915/i915_gem_execbuffer.c |  5 ++-
>  drivers/gpu/drm/i915/i915_gem_fence_reg.c  |  6 ++-
>  drivers/gpu/drm/i915/i915_gem_gtt.c        | 22 ++++++----
>  drivers/gpu/drm/i915/i915_gem_shrinker.c   | 32 +++++++++------
>  drivers/gpu/drm/i915/intel_engine_cs.c     | 12 ++++--
>  drivers/gpu/drm/i915/intel_uncore.c        |  5 ++-
>  7 files changed, 81 insertions(+), 48 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index 27f207cbabd9..e04dadeca879 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -786,6 +786,8 @@ fb_write_origin(struct drm_i915_gem_object *obj, unsigned int domain)
>  
>  void i915_gem_flush_ggtt_writes(struct drm_i915_private *dev_priv)
>  {
> +	intel_wakeref_t wakeref;
> +
>  	/*
>  	 * No actual flushing is required for the GTT write domain for reads
>  	 * from the GTT domain. Writes to it "immediately" go to main memory
> @@ -812,13 +814,13 @@ void i915_gem_flush_ggtt_writes(struct drm_i915_private *dev_priv)
>  
>  	i915_gem_chipset_flush(dev_priv);
>  
> -	intel_runtime_pm_get(dev_priv);
> +	wakeref = intel_runtime_pm_get(dev_priv);
>  	spin_lock_irq(&dev_priv->uncore.lock);
>  
>  	POSTING_READ_FW(RING_HEAD(RENDER_RING_BASE));
>  
>  	spin_unlock_irq(&dev_priv->uncore.lock);
> -	intel_runtime_pm_put_unchecked(dev_priv);
> +	intel_runtime_pm_put(dev_priv, wakeref);
>  }
>  
>  static void
> @@ -1070,6 +1072,7 @@ i915_gem_gtt_pread(struct drm_i915_gem_object *obj,
>  {
>  	struct drm_i915_private *i915 = to_i915(obj->base.dev);
>  	struct i915_ggtt *ggtt = &i915->ggtt;
> +	intel_wakeref_t wakeref;
>  	struct drm_mm_node node;
>  	struct i915_vma *vma;
>  	void __user *user_data;
> @@ -1080,7 +1083,7 @@ i915_gem_gtt_pread(struct drm_i915_gem_object *obj,
>  	if (ret)
>  		return ret;
>  
> -	intel_runtime_pm_get(i915);
> +	wakeref = intel_runtime_pm_get(i915);
>  	vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0,
>  				       PIN_MAPPABLE |
>  				       PIN_NONFAULT |
> @@ -1153,7 +1156,7 @@ i915_gem_gtt_pread(struct drm_i915_gem_object *obj,
>  		i915_vma_unpin(vma);
>  	}
>  out_unlock:
> -	intel_runtime_pm_put_unchecked(i915);
> +	intel_runtime_pm_put(i915, wakeref);
>  	mutex_unlock(&i915->drm.struct_mutex);
>  
>  	return ret;
> @@ -1254,6 +1257,7 @@ i915_gem_gtt_pwrite_fast(struct drm_i915_gem_object *obj,
>  {
>  	struct drm_i915_private *i915 = to_i915(obj->base.dev);
>  	struct i915_ggtt *ggtt = &i915->ggtt;
> +	intel_wakeref_t wakeref;
>  	struct drm_mm_node node;
>  	struct i915_vma *vma;
>  	u64 remain, offset;
> @@ -1272,13 +1276,14 @@ i915_gem_gtt_pwrite_fast(struct drm_i915_gem_object *obj,
>  		 * This easily dwarfs any performance advantage from
>  		 * using the cache bypass of indirect GGTT access.
>  		 */
> -		if (!intel_runtime_pm_get_if_in_use(i915)) {
> +		wakeref = intel_runtime_pm_get_if_in_use(i915);
> +		if (!wakeref) {
>  			ret = -EFAULT;
>  			goto out_unlock;
>  		}
>  	} else {
>  		/* No backing pages, no fallback, we must force GGTT access */
> -		intel_runtime_pm_get(i915);
> +		wakeref = intel_runtime_pm_get(i915);
>  	}
>  
>  	vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0,
> @@ -1360,7 +1365,7 @@ i915_gem_gtt_pwrite_fast(struct drm_i915_gem_object *obj,
>  		i915_vma_unpin(vma);
>  	}
>  out_rpm:
> -	intel_runtime_pm_put_unchecked(i915);
> +	intel_runtime_pm_put(i915, wakeref);
>  out_unlock:
>  	mutex_unlock(&i915->drm.struct_mutex);
>  	return ret;
> @@ -1865,6 +1870,7 @@ vm_fault_t i915_gem_fault(struct vm_fault *vmf)
>  	struct drm_i915_private *dev_priv = to_i915(dev);
>  	struct i915_ggtt *ggtt = &dev_priv->ggtt;
>  	bool write = area->vm_flags & VM_WRITE;
> +	intel_wakeref_t wakeref;
>  	struct i915_vma *vma;
>  	pgoff_t page_offset;
>  	int ret;
> @@ -1894,7 +1900,7 @@ vm_fault_t i915_gem_fault(struct vm_fault *vmf)
>  	if (ret)
>  		goto err;
>  
> -	intel_runtime_pm_get(dev_priv);
> +	wakeref = intel_runtime_pm_get(dev_priv);
>  
>  	ret = i915_mutex_lock_interruptible(dev);
>  	if (ret)
> @@ -1972,7 +1978,7 @@ vm_fault_t i915_gem_fault(struct vm_fault *vmf)
>  err_unlock:
>  	mutex_unlock(&dev->struct_mutex);
>  err_rpm:
> -	intel_runtime_pm_put_unchecked(dev_priv);
> +	intel_runtime_pm_put(dev_priv, wakeref);
>  	i915_gem_object_unpin_pages(obj);
>  err:
>  	switch (ret) {
> @@ -2045,6 +2051,7 @@ void
>  i915_gem_release_mmap(struct drm_i915_gem_object *obj)
>  {
>  	struct drm_i915_private *i915 = to_i915(obj->base.dev);
> +	intel_wakeref_t wakeref;
>  
>  	/* Serialisation between user GTT access and our code depends upon
>  	 * revoking the CPU's PTE whilst the mutex is held. The next user
> @@ -2055,7 +2062,7 @@ i915_gem_release_mmap(struct drm_i915_gem_object *obj)
>  	 * wakeref.
>  	 */
>  	lockdep_assert_held(&i915->drm.struct_mutex);
> -	intel_runtime_pm_get(i915);
> +	wakeref = intel_runtime_pm_get(i915);
>  
>  	if (!obj->userfault_count)
>  		goto out;
> @@ -2072,7 +2079,7 @@ i915_gem_release_mmap(struct drm_i915_gem_object *obj)
>  	wmb();
>  
>  out:
> -	intel_runtime_pm_put_unchecked(i915);
> +	intel_runtime_pm_put(i915, wakeref);
>  }
>  
>  void i915_gem_runtime_suspend(struct drm_i915_private *dev_priv)
> @@ -4707,8 +4714,9 @@ static void __i915_gem_free_objects(struct drm_i915_private *i915,
>  				    struct llist_node *freed)
>  {
>  	struct drm_i915_gem_object *obj, *on;
> +	intel_wakeref_t wakeref;
>  
> -	intel_runtime_pm_get(i915);
> +	wakeref = intel_runtime_pm_get(i915);
>  	llist_for_each_entry_safe(obj, on, freed, freed) {
>  		struct i915_vma *vma, *vn;
>  
> @@ -4769,7 +4777,7 @@ static void __i915_gem_free_objects(struct drm_i915_private *i915,
>  		if (on)
>  			cond_resched();
>  	}
> -	intel_runtime_pm_put_unchecked(i915);
> +	intel_runtime_pm_put(i915, wakeref);
>  }
>  
>  static void i915_gem_flush_free_objects(struct drm_i915_private *i915)
> @@ -4878,11 +4886,13 @@ void __i915_gem_object_release_unless_active(struct drm_i915_gem_object *obj)
>  
>  void i915_gem_sanitize(struct drm_i915_private *i915)
>  {
> +	intel_wakeref_t wakeref;
> +
>  	GEM_TRACE("\n");
>  
>  	mutex_lock(&i915->drm.struct_mutex);
>  
> -	intel_runtime_pm_get(i915);
> +	wakeref = intel_runtime_pm_get(i915);
>  	intel_uncore_forcewake_get(i915, FORCEWAKE_ALL);
>  
>  	/*
> @@ -4905,7 +4915,7 @@ void i915_gem_sanitize(struct drm_i915_private *i915)
>  	intel_engines_sanitize(i915, false);
>  
>  	intel_uncore_forcewake_put(i915, FORCEWAKE_ALL);
> -	intel_runtime_pm_put_unchecked(i915);
> +	intel_runtime_pm_put(i915, wakeref);
>  
>  	i915_gem_contexts_lost(i915);
>  	mutex_unlock(&i915->drm.struct_mutex);
> @@ -4913,11 +4923,12 @@ void i915_gem_sanitize(struct drm_i915_private *i915)
>  
>  int i915_gem_suspend(struct drm_i915_private *i915)
>  {
> +	intel_wakeref_t wakeref;
>  	int ret;
>  
>  	GEM_TRACE("\n");
>  
> -	intel_runtime_pm_get(i915);
> +	wakeref = intel_runtime_pm_get(i915);
>  	intel_suspend_gt_powersave(i915);
>  
>  	mutex_lock(&i915->drm.struct_mutex);
> @@ -4969,12 +4980,12 @@ int i915_gem_suspend(struct drm_i915_private *i915)
>  	if (WARN_ON(!intel_engines_are_idle(i915)))
>  		i915_gem_set_wedged(i915); /* no hope, discard everything */
>  
> -	intel_runtime_pm_put_unchecked(i915);
> +	intel_runtime_pm_put(i915, wakeref);
>  	return 0;
>  
>  err_unlock:
>  	mutex_unlock(&i915->drm.struct_mutex);
> -	intel_runtime_pm_put_unchecked(i915);
> +	intel_runtime_pm_put(i915, wakeref);
>  	return ret;
>  }
>  
> diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> index a52fa42ed8b1..76bb1a89e530 100644
> --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> @@ -2203,6 +2203,7 @@ i915_gem_do_execbuffer(struct drm_device *dev,
>  	struct i915_execbuffer eb;
>  	struct dma_fence *in_fence = NULL;
>  	struct sync_file *out_fence = NULL;
> +	intel_wakeref_t wakeref;
>  	int out_fence_fd = -1;
>  	int err;
>  
> @@ -2273,7 +2274,7 @@ i915_gem_do_execbuffer(struct drm_device *dev,
>  	 * wakeref that we hold until the GPU has been idle for at least
>  	 * 100ms.
>  	 */
> -	intel_runtime_pm_get(eb.i915);
> +	wakeref = intel_runtime_pm_get(eb.i915);
>  
>  	err = i915_mutex_lock_interruptible(dev);
>  	if (err)
> @@ -2425,7 +2426,7 @@ i915_gem_do_execbuffer(struct drm_device *dev,
>  		eb_release_vmas(&eb);
>  	mutex_unlock(&dev->struct_mutex);
>  err_rpm:
> -	intel_runtime_pm_put_unchecked(eb.i915);
> +	intel_runtime_pm_put(eb.i915, wakeref);
>  	i915_gem_context_put(eb.ctx);
>  err_destroy:
>  	eb_destroy(&eb);
> diff --git a/drivers/gpu/drm/i915/i915_gem_fence_reg.c b/drivers/gpu/drm/i915/i915_gem_fence_reg.c
> index 1f72f5047945..e6edcd83450c 100644
> --- a/drivers/gpu/drm/i915/i915_gem_fence_reg.c
> +++ b/drivers/gpu/drm/i915/i915_gem_fence_reg.c
> @@ -210,6 +210,7 @@ static void fence_write(struct drm_i915_fence_reg *fence,
>  static int fence_update(struct drm_i915_fence_reg *fence,
>  			struct i915_vma *vma)
>  {
> +	intel_wakeref_t wakeref;
>  	int ret;
>  
>  	if (vma) {
> @@ -257,9 +258,10 @@ static int fence_update(struct drm_i915_fence_reg *fence,
>  	 * If the device is currently powered down, we will defer the write
>  	 * to the runtime resume, see i915_gem_restore_fences().
>  	 */
> -	if (intel_runtime_pm_get_if_in_use(fence->i915)) {
> +	wakeref = intel_runtime_pm_get_if_in_use(fence->i915);
> +	if (wakeref) {
>  		fence_write(fence, vma);
> -		intel_runtime_pm_put_unchecked(fence->i915);
> +		intel_runtime_pm_put(fence->i915, wakeref);
>  	}
>  
>  	if (vma) {
> diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
> index 6dac9614f7ba..4bec10286487 100644
> --- a/drivers/gpu/drm/i915/i915_gem_gtt.c
> +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
> @@ -2528,6 +2528,7 @@ static int ggtt_bind_vma(struct i915_vma *vma,
>  {
>  	struct drm_i915_private *i915 = vma->vm->i915;
>  	struct drm_i915_gem_object *obj = vma->obj;
> +	intel_wakeref_t wakeref;
>  	u32 pte_flags;
>  
>  	/* Applicable to VLV (gen8+ do not support RO in the GGTT) */
> @@ -2535,9 +2536,9 @@ static int ggtt_bind_vma(struct i915_vma *vma,
>  	if (i915_gem_object_is_readonly(obj))
>  		pte_flags |= PTE_READ_ONLY;
>  
> -	intel_runtime_pm_get(i915);
> +	wakeref = intel_runtime_pm_get(i915);
>  	vma->vm->insert_entries(vma->vm, vma, cache_level, pte_flags);
> -	intel_runtime_pm_put_unchecked(i915);
> +	intel_runtime_pm_put(i915, wakeref);
>  
>  	vma->page_sizes.gtt = I915_GTT_PAGE_SIZE;
>  
> @@ -2554,10 +2555,11 @@ static int ggtt_bind_vma(struct i915_vma *vma,
>  static void ggtt_unbind_vma(struct i915_vma *vma)
>  {
>  	struct drm_i915_private *i915 = vma->vm->i915;
> +	intel_wakeref_t wakeref;
>  
> -	intel_runtime_pm_get(i915);
> +	wakeref = intel_runtime_pm_get(i915);
>  	vma->vm->clear_range(vma->vm, vma->node.start, vma->size);
> -	intel_runtime_pm_put_unchecked(i915);
> +	intel_runtime_pm_put(i915, wakeref);
>  }
>  
>  static int aliasing_gtt_bind_vma(struct i915_vma *vma,
> @@ -2589,9 +2591,11 @@ static int aliasing_gtt_bind_vma(struct i915_vma *vma,
>  	}
>  
>  	if (flags & I915_VMA_GLOBAL_BIND) {
> -		intel_runtime_pm_get(i915);
> +		intel_wakeref_t wakeref;
> +
> +		wakeref = intel_runtime_pm_get(i915);
>  		vma->vm->insert_entries(vma->vm, vma, cache_level, pte_flags);
> -		intel_runtime_pm_put_unchecked(i915);
> +		intel_runtime_pm_put(i915, wakeref);
>  	}
>  
>  	return 0;
> @@ -2602,9 +2606,11 @@ static void aliasing_gtt_unbind_vma(struct i915_vma *vma)
>  	struct drm_i915_private *i915 = vma->vm->i915;
>  
>  	if (vma->flags & I915_VMA_GLOBAL_BIND) {
> -		intel_runtime_pm_get(i915);
> +		intel_wakeref_t wakeref;
> +
> +		wakeref = intel_runtime_pm_get(i915);
>  		vma->vm->clear_range(vma->vm, vma->node.start, vma->size);
> -		intel_runtime_pm_put_unchecked(i915);
> +		intel_runtime_pm_put(i915, wakeref);
>  	}
>  
>  	if (vma->flags & I915_VMA_LOCAL_BIND) {
> diff --git a/drivers/gpu/drm/i915/i915_gem_shrinker.c b/drivers/gpu/drm/i915/i915_gem_shrinker.c
> index 16693dd4d019..bc230e43b98f 100644
> --- a/drivers/gpu/drm/i915/i915_gem_shrinker.c
> +++ b/drivers/gpu/drm/i915/i915_gem_shrinker.c
> @@ -154,6 +154,7 @@ i915_gem_shrink(struct drm_i915_private *i915,
>  		{ &i915->mm.bound_list, I915_SHRINK_BOUND },
>  		{ NULL, 0 },
>  	}, *phase;
> +	intel_wakeref_t wakeref = 0;
>  	unsigned long count = 0;
>  	unsigned long scanned = 0;
>  	bool unlock;
> @@ -183,9 +184,11 @@ i915_gem_shrink(struct drm_i915_private *i915,
>  	 * device just to recover a little memory. If absolutely necessary,
>  	 * we will force the wake during oom-notifier.
>  	 */
> -	if ((flags & I915_SHRINK_BOUND) &&
> -	    !intel_runtime_pm_get_if_in_use(i915))
> -		flags &= ~I915_SHRINK_BOUND;
> +	if (flags & I915_SHRINK_BOUND) {
> +		wakeref = intel_runtime_pm_get_if_in_use(i915);
> +		if (!wakeref)
> +			flags &= ~I915_SHRINK_BOUND;
> +	}
>  
>  	/*
>  	 * As we may completely rewrite the (un)bound list whilst unbinding
> @@ -266,7 +269,7 @@ i915_gem_shrink(struct drm_i915_private *i915,
>  	}
>  
>  	if (flags & I915_SHRINK_BOUND)
> -		intel_runtime_pm_put_unchecked(i915);
> +		intel_runtime_pm_put(i915, wakeref);

This is ok but raises a question that did we have
GEM_BUG_ON(wakeref == 0) on pm_put? Perhaps not needed
per se as we do find that we don't have ref for 0.

Reviewed-by: Mika Kuoppala <mika.kuoppala@linux.intel.com>

>  
>  	i915_retire_requests(i915);
>  
> @@ -293,14 +296,15 @@ i915_gem_shrink(struct drm_i915_private *i915,
>   */
>  unsigned long i915_gem_shrink_all(struct drm_i915_private *i915)
>  {
> +	intel_wakeref_t wakeref;
>  	unsigned long freed;
>  
> -	intel_runtime_pm_get(i915);
> +	wakeref = intel_runtime_pm_get(i915);
>  	freed = i915_gem_shrink(i915, -1UL, NULL,
>  				I915_SHRINK_BOUND |
>  				I915_SHRINK_UNBOUND |
>  				I915_SHRINK_ACTIVE);
> -	intel_runtime_pm_put_unchecked(i915);
> +	intel_runtime_pm_put(i915, wakeref);
>  
>  	return freed;
>  }
> @@ -371,14 +375,16 @@ i915_gem_shrinker_scan(struct shrinker *shrinker, struct shrink_control *sc)
>  					 I915_SHRINK_BOUND |
>  					 I915_SHRINK_UNBOUND);
>  	if (sc->nr_scanned < sc->nr_to_scan && current_is_kswapd()) {
> -		intel_runtime_pm_get(i915);
> +		intel_wakeref_t wakeref;
> +
> +		wakeref = intel_runtime_pm_get(i915);
>  		freed += i915_gem_shrink(i915,
>  					 sc->nr_to_scan - sc->nr_scanned,
>  					 &sc->nr_scanned,
>  					 I915_SHRINK_ACTIVE |
>  					 I915_SHRINK_BOUND |
>  					 I915_SHRINK_UNBOUND);
> -		intel_runtime_pm_put_unchecked(i915);
> +		intel_runtime_pm_put(i915, wakeref);
>  	}
>  
>  	shrinker_unlock(i915, unlock);
> @@ -418,12 +424,13 @@ i915_gem_shrinker_oom(struct notifier_block *nb, unsigned long event, void *ptr)
>  		container_of(nb, struct drm_i915_private, mm.oom_notifier);
>  	struct drm_i915_gem_object *obj;
>  	unsigned long unevictable, bound, unbound, freed_pages;
> +	intel_wakeref_t wakeref;
>  
> -	intel_runtime_pm_get(i915);
> +	wakeref = intel_runtime_pm_get(i915);
>  	freed_pages = i915_gem_shrink(i915, -1UL, NULL,
>  				      I915_SHRINK_BOUND |
>  				      I915_SHRINK_UNBOUND);
> -	intel_runtime_pm_put_unchecked(i915);
> +	intel_runtime_pm_put(i915, wakeref);
>  
>  	/* Because we may be allocating inside our own driver, we cannot
>  	 * assert that there are no objects with pinned pages that are not
> @@ -461,6 +468,7 @@ i915_gem_shrinker_vmap(struct notifier_block *nb, unsigned long event, void *ptr
>  		container_of(nb, struct drm_i915_private, mm.vmap_notifier);
>  	struct i915_vma *vma, *next;
>  	unsigned long freed_pages = 0;
> +	intel_wakeref_t wakeref;
>  	bool unlock;
>  	int ret;
>  
> @@ -474,12 +482,12 @@ i915_gem_shrinker_vmap(struct notifier_block *nb, unsigned long event, void *ptr
>  	if (ret)
>  		goto out;
>  
> -	intel_runtime_pm_get(i915);
> +	wakeref = intel_runtime_pm_get(i915);
>  	freed_pages += i915_gem_shrink(i915, -1UL, NULL,
>  				       I915_SHRINK_BOUND |
>  				       I915_SHRINK_UNBOUND |
>  				       I915_SHRINK_VMAPS);
> -	intel_runtime_pm_put_unchecked(i915);
> +	intel_runtime_pm_put(i915, wakeref);
>  
>  	/* We also want to clear any cached iomaps as they wrap vmap */
>  	list_for_each_entry_safe(vma, next,
> diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c
> index 85131166589c..bf4dae2649ab 100644
> --- a/drivers/gpu/drm/i915/intel_engine_cs.c
> +++ b/drivers/gpu/drm/i915/intel_engine_cs.c
> @@ -913,10 +913,12 @@ void intel_engine_get_instdone(struct intel_engine_cs *engine,
>  static bool ring_is_idle(struct intel_engine_cs *engine)
>  {
>  	struct drm_i915_private *dev_priv = engine->i915;
> +	intel_wakeref_t wakeref;
>  	bool idle = true;
>  
>  	/* If the whole device is asleep, the engine must be idle */
> -	if (!intel_runtime_pm_get_if_in_use(dev_priv))
> +	wakeref = intel_runtime_pm_get_if_in_use(dev_priv);
> +	if (!wakeref)
>  		return true;
>  
>  	/* First check that no commands are left in the ring */
> @@ -928,7 +930,7 @@ static bool ring_is_idle(struct intel_engine_cs *engine)
>  	if (INTEL_GEN(dev_priv) > 2 && !(I915_READ_MODE(engine) & MODE_IDLE))
>  		idle = false;
>  
> -	intel_runtime_pm_put_unchecked(dev_priv);
> +	intel_runtime_pm_put(dev_priv, wakeref);
>  
>  	return idle;
>  }
> @@ -1425,6 +1427,7 @@ void intel_engine_dump(struct intel_engine_cs *engine,
>  	const struct intel_engine_execlists * const execlists = &engine->execlists;
>  	struct i915_gpu_error * const error = &engine->i915->gpu_error;
>  	struct i915_request *rq, *last;
> +	intel_wakeref_t wakeref;
>  	unsigned long flags;
>  	struct rb_node *rb;
>  	int count;
> @@ -1483,9 +1486,10 @@ void intel_engine_dump(struct intel_engine_cs *engine,
>  
>  	rcu_read_unlock();
>  
> -	if (intel_runtime_pm_get_if_in_use(engine->i915)) {
> +	wakeref = intel_runtime_pm_get_if_in_use(engine->i915);
> +	if (wakeref) {
>  		intel_engine_print_registers(engine, m);
> -		intel_runtime_pm_put_unchecked(engine->i915);
> +		intel_runtime_pm_put(engine->i915, wakeref);
>  	} else {
>  		drm_printf(m, "\tDevice is asleep; skipping register dump\n");
>  	}
> diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c
> index 8d4c76ac0e7d..d494d92da02c 100644
> --- a/drivers/gpu/drm/i915/intel_uncore.c
> +++ b/drivers/gpu/drm/i915/intel_uncore.c
> @@ -1670,6 +1670,7 @@ int i915_reg_read_ioctl(struct drm_device *dev,
>  	struct drm_i915_private *dev_priv = to_i915(dev);
>  	struct drm_i915_reg_read *reg = data;
>  	struct reg_whitelist const *entry;
> +	intel_wakeref_t wakeref;
>  	unsigned int flags;
>  	int remain;
>  	int ret = 0;
> @@ -1695,7 +1696,7 @@ int i915_reg_read_ioctl(struct drm_device *dev,
>  
>  	flags = reg->offset & (entry->size - 1);
>  
> -	intel_runtime_pm_get(dev_priv);
> +	wakeref = intel_runtime_pm_get(dev_priv);
>  	if (entry->size == 8 && flags == I915_REG_READ_8B_WA)
>  		reg->val = I915_READ64_2x32(entry->offset_ldw,
>  					    entry->offset_udw);
> @@ -1709,7 +1710,7 @@ int i915_reg_read_ioctl(struct drm_device *dev,
>  		reg->val = I915_READ8(entry->offset_ldw);
>  	else
>  		ret = -EINVAL;
> -	intel_runtime_pm_put_unchecked(dev_priv);
> +	intel_runtime_pm_put(dev_priv, wakeref);
>  
>  	return ret;
>  }
> -- 
> 2.20.1
>
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/intel-gfx
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 111+ messages in thread

* Re: [PATCH 13/46] drm/i915/fb: Track rpm wakerefs
  2019-01-07 11:54 ` [PATCH 13/46] drm/i915/fb: Track " Chris Wilson
@ 2019-01-09 11:39   ` Mika Kuoppala
  0 siblings, 0 replies; 111+ messages in thread
From: Mika Kuoppala @ 2019-01-09 11:39 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx; +Cc: Jani Nikula

Chris Wilson <chris@chris-wilson.co.uk> writes:

> Keep track of the rpm wakeref used for framebuffer access so that we can
> cancel upon release and so more clearly identify leaks.
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Jani Nikula <jani.nikula@intel.com>

Reviewed-by: Mika Kuoppala <mika.kuoppala@linux.intel.com>

> ---
>  drivers/gpu/drm/i915/intel_display.c | 5 +++--
>  drivers/gpu/drm/i915/intel_fbdev.c   | 9 +++++----
>  2 files changed, 8 insertions(+), 6 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
> index c6000aa47a8d..ea70cb8cf50a 100644
> --- a/drivers/gpu/drm/i915/intel_display.c
> +++ b/drivers/gpu/drm/i915/intel_display.c
> @@ -2024,6 +2024,7 @@ intel_pin_and_fence_fb_obj(struct drm_framebuffer *fb,
>  	struct drm_device *dev = fb->dev;
>  	struct drm_i915_private *dev_priv = to_i915(dev);
>  	struct drm_i915_gem_object *obj = intel_fb_obj(fb);
> +	intel_wakeref_t wakeref;
>  	struct i915_vma *vma;
>  	unsigned int pinctl;
>  	u32 alignment;
> @@ -2047,7 +2048,7 @@ intel_pin_and_fence_fb_obj(struct drm_framebuffer *fb,
>  	 * intel_runtime_pm_put(), so it is correct to wrap only the
>  	 * pin/unpin/fence and not more.
>  	 */
> -	intel_runtime_pm_get(dev_priv);
> +	wakeref = intel_runtime_pm_get(dev_priv);
>  
>  	atomic_inc(&dev_priv->gpu_error.pending_fb_pin);
>  
> @@ -2102,7 +2103,7 @@ intel_pin_and_fence_fb_obj(struct drm_framebuffer *fb,
>  err:
>  	atomic_dec(&dev_priv->gpu_error.pending_fb_pin);
>  
> -	intel_runtime_pm_put_unchecked(dev_priv);
> +	intel_runtime_pm_put(dev_priv, wakeref);
>  	return vma;
>  }
>  
> diff --git a/drivers/gpu/drm/i915/intel_fbdev.c b/drivers/gpu/drm/i915/intel_fbdev.c
> index 11d877b908e2..de14cd78aa0f 100644
> --- a/drivers/gpu/drm/i915/intel_fbdev.c
> +++ b/drivers/gpu/drm/i915/intel_fbdev.c
> @@ -178,8 +178,9 @@ static int intelfb_create(struct drm_fb_helper *helper,
>  	const struct i915_ggtt_view view = {
>  		.type = I915_GGTT_VIEW_NORMAL,
>  	};
> -	struct fb_info *info;
>  	struct drm_framebuffer *fb;
> +	intel_wakeref_t wakeref;
> +	struct fb_info *info;
>  	struct i915_vma *vma;
>  	unsigned long flags = 0;
>  	bool prealloc = false;
> @@ -210,7 +211,7 @@ static int intelfb_create(struct drm_fb_helper *helper,
>  	}
>  
>  	mutex_lock(&dev->struct_mutex);
> -	intel_runtime_pm_get(dev_priv);
> +	wakeref = intel_runtime_pm_get(dev_priv);
>  
>  	/* Pin the GGTT vma for our access via info->screen_base.
>  	 * This also validates that any existing fb inherited from the
> @@ -277,7 +278,7 @@ static int intelfb_create(struct drm_fb_helper *helper,
>  	ifbdev->vma = vma;
>  	ifbdev->vma_flags = flags;
>  
> -	intel_runtime_pm_put_unchecked(dev_priv);
> +	intel_runtime_pm_put(dev_priv, wakeref);
>  	mutex_unlock(&dev->struct_mutex);
>  	vga_switcheroo_client_fb_set(pdev, info);
>  	return 0;
> @@ -285,7 +286,7 @@ static int intelfb_create(struct drm_fb_helper *helper,
>  out_unpin:
>  	intel_unpin_fb_vma(vma, flags);
>  out_unlock:
> -	intel_runtime_pm_put_unchecked(dev_priv);
> +	intel_runtime_pm_put(dev_priv, wakeref);
>  	mutex_unlock(&dev->struct_mutex);
>  	return ret;
>  }
> -- 
> 2.20.1
>
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/intel-gfx
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 111+ messages in thread

* Re: [PATCH 14/46] drm/i915/hotplug: Track temporary rpm wakeref
  2019-01-07 11:54 ` [PATCH 14/46] drm/i915/hotplug: Track temporary rpm wakeref Chris Wilson
@ 2019-01-09 11:40   ` Mika Kuoppala
  0 siblings, 0 replies; 111+ messages in thread
From: Mika Kuoppala @ 2019-01-09 11:40 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx; +Cc: Jani Nikula

Chris Wilson <chris@chris-wilson.co.uk> writes:

> Keep track of the temporary rpm wakeref inside hotplug detection, so
> that we can cancel it immediately upon release and so clearly identify
> leaks.
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Jani Nikula <jani.nikula@intel.com>

Reviewed-by: Mika Kuoppala <mika.kuoppala@linux.intel.com>

> ---
>  drivers/gpu/drm/i915/intel_hotplug.c | 5 +++--
>  1 file changed, 3 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/intel_hotplug.c b/drivers/gpu/drm/i915/intel_hotplug.c
> index 067277ca7cff..6df8820b8b80 100644
> --- a/drivers/gpu/drm/i915/intel_hotplug.c
> +++ b/drivers/gpu/drm/i915/intel_hotplug.c
> @@ -227,9 +227,10 @@ static void intel_hpd_irq_storm_reenable_work(struct work_struct *work)
>  		container_of(work, typeof(*dev_priv),
>  			     hotplug.reenable_work.work);
>  	struct drm_device *dev = &dev_priv->drm;
> +	intel_wakeref_t wakeref;
>  	enum hpd_pin pin;
>  
> -	intel_runtime_pm_get(dev_priv);
> +	wakeref = intel_runtime_pm_get(dev_priv);
>  
>  	spin_lock_irq(&dev_priv->irq_lock);
>  	for_each_hpd_pin(pin) {
> @@ -262,7 +263,7 @@ static void intel_hpd_irq_storm_reenable_work(struct work_struct *work)
>  		dev_priv->display.hpd_irq_setup(dev_priv);
>  	spin_unlock_irq(&dev_priv->irq_lock);
>  
> -	intel_runtime_pm_put_unchecked(dev_priv);
> +	intel_runtime_pm_put(dev_priv, wakeref);
>  }
>  
>  bool intel_encoder_hotplug(struct intel_encoder *encoder,
> -- 
> 2.20.1
>
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/intel-gfx
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 111+ messages in thread

* Re: [PATCH 15/46] drm/i915/panel: Track temporary rpm wakeref
  2019-01-07 11:54 ` [PATCH 15/46] drm/i915/panel: " Chris Wilson
@ 2019-01-09 11:41   ` Mika Kuoppala
  0 siblings, 0 replies; 111+ messages in thread
From: Mika Kuoppala @ 2019-01-09 11:41 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx; +Cc: Jani Nikula

Chris Wilson <chris@chris-wilson.co.uk> writes:

> Keep track of the temporary rpm wakeref used for panel backlight access,
> so that we can cancel it immediately upon release and so more clearly
> identify leaks.
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Jani Nikula <jani.nikula@intel.com>

Reviewed-by: Mika Kuoppala <mika.kuoppala@linux.intel.com>

> ---
>  drivers/gpu/drm/i915/intel_panel.c | 5 +++--
>  1 file changed, 3 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/intel_panel.c b/drivers/gpu/drm/i915/intel_panel.c
> index c2b7455a023e..93a2e4b5c54c 100644
> --- a/drivers/gpu/drm/i915/intel_panel.c
> +++ b/drivers/gpu/drm/i915/intel_panel.c
> @@ -1203,17 +1203,18 @@ static int intel_backlight_device_get_brightness(struct backlight_device *bd)
>  	struct intel_connector *connector = bl_get_data(bd);
>  	struct drm_device *dev = connector->base.dev;
>  	struct drm_i915_private *dev_priv = to_i915(dev);
> +	intel_wakeref_t wakeref;
>  	u32 hw_level;
>  	int ret;
>  
> -	intel_runtime_pm_get(dev_priv);
> +	wakeref = intel_runtime_pm_get(dev_priv);
>  	drm_modeset_lock(&dev->mode_config.connection_mutex, NULL);
>  
>  	hw_level = intel_panel_get_backlight(connector);
>  	ret = scale_hw_to_user(connector, hw_level, bd->props.max_brightness);
>  
>  	drm_modeset_unlock(&dev->mode_config.connection_mutex);
> -	intel_runtime_pm_put_unchecked(dev_priv);
> +	intel_runtime_pm_put(dev_priv, wakeref);
>  
>  	return ret;
>  }
> -- 
> 2.20.1
>
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/intel-gfx
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 111+ messages in thread

* Re: [PATCH 09/46] drm/i915/perf: Track the rpm wakeref
  2019-01-09 10:30   ` Mika Kuoppala
@ 2019-01-09 11:45     ` Chris Wilson
  0 siblings, 0 replies; 111+ messages in thread
From: Chris Wilson @ 2019-01-09 11:45 UTC (permalink / raw)
  To: Mika Kuoppala, intel-gfx; +Cc: Jani Nikula

Quoting Mika Kuoppala (2019-01-09 10:30:56)
> Chris Wilson <chris@chris-wilson.co.uk> writes:
> 
> > Keep track of our wakeref used to keep the device awake so we can catch
> > any leak.
> >
> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > Cc: Jani Nikula <jani.nikula@intel.com>
> > ---
> >  drivers/gpu/drm/i915/i915_drv.h  |  2 ++
> >  drivers/gpu/drm/i915/i915_perf.c | 10 +++++-----
> >  2 files changed, 7 insertions(+), 5 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> > index a20bd2ec48de..bf25ae92f5de 100644
> > --- a/drivers/gpu/drm/i915/i915_drv.h
> > +++ b/drivers/gpu/drm/i915/i915_drv.h
> > @@ -1320,6 +1320,8 @@ struct i915_perf_stream {
> >        */
> >       struct list_head link;
> >  
> > +     intel_wakeref_t wakeref;
> > +
> >       /**
> >        * @sample_flags: Flags representing the `DRM_I915_PERF_PROP_SAMPLE_*`
> >        * properties given when opening a stream, representing the contents
> > diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
> > index e4dfd1477c78..b0cbad2e83c5 100644
> > --- a/drivers/gpu/drm/i915/i915_perf.c
> > +++ b/drivers/gpu/drm/i915/i915_perf.c
> > @@ -1364,14 +1364,14 @@ static void i915_oa_stream_destroy(struct i915_perf_stream *stream)
> >  
> >       free_oa_buffer(dev_priv);
> >  
> > +     put_oa_config(dev_priv, stream->oa_config);
> > +
> 
> Hmm you wanted to put this inside the wakeref. But
> I fail to see the reason.

I thought I undid it. Hazy memory says setup does it inside, but
teardown outside; consistency!
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 111+ messages in thread

* Re: [PATCH 08/46] drm/i915: Mark up debugfs with rpm wakeref tracking
  2019-01-09 10:20   ` Mika Kuoppala
@ 2019-01-09 11:49     ` Chris Wilson
  0 siblings, 0 replies; 111+ messages in thread
From: Chris Wilson @ 2019-01-09 11:49 UTC (permalink / raw)
  To: Mika Kuoppala, intel-gfx; +Cc: Jani Nikula

Quoting Mika Kuoppala (2019-01-09 10:20:26)
> Chris Wilson <chris@chris-wilson.co.uk> writes:
> > @@ -1735,29 +1743,30 @@ static int i915_emon_status(struct seq_file *m, void *unused)
> >       struct drm_i915_private *dev_priv = node_to_i915(m->private);
> >       struct drm_device *dev = &dev_priv->drm;
> >       unsigned long temp, chipset, gfx;
> > +     intel_wakeref_t wakeref;
> >       int ret;
> >  
> >       if (!IS_GEN(dev_priv, 5))
> >               return -ENODEV;
> >  
> > -     intel_runtime_pm_get(dev_priv);
> > -
> >       ret = mutex_lock_interruptible(&dev->struct_mutex);
> >       if (ret)
> >               return ret;
> >  
> > +     wakeref = intel_runtime_pm_get(dev_priv);
> > +
> >       temp = i915_mch_val(dev_priv);
> >       chipset = i915_chipset_val(dev_priv);
> >       gfx = i915_gfx_val(dev_priv);
> >       mutex_unlock(&dev->struct_mutex);
> >  
> > +     intel_runtime_pm_put(dev_priv, wakeref);
> > +
> 
> I am a little surprised if this was the only callsite
> for tighter scoping in this file.

It's a recent regression. (Despite a patch to fix it correctly... Bitter,
moi?)
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 111+ messages in thread

* Re: [PATCH 04/46] drm/i915: Markup paired operations on wakerefs
  2019-01-09  9:23       ` Mika Kuoppala
@ 2019-01-09 11:51         ` Chris Wilson
  2019-01-09 23:33           ` John Harrison
  0 siblings, 1 reply; 111+ messages in thread
From: Chris Wilson @ 2019-01-09 11:51 UTC (permalink / raw)
  To: Mika Kuoppala, intel-gfx; +Cc: Jani Nikula

Quoting Mika Kuoppala (2019-01-09 09:23:53)
> I should have been more specific. My concern was on documenting
> the changing return values.

The interface isn't documented, there's nothing in the header about the
functions? Where else would it be?
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 111+ messages in thread

* Re: [PATCH 16/46] drm/i915/selftests: Mark up rpm wakerefs
  2019-01-07 11:54 ` [PATCH 16/46] drm/i915/selftests: Mark up rpm wakerefs Chris Wilson
@ 2019-01-09 12:54   ` Mika Kuoppala
  0 siblings, 0 replies; 111+ messages in thread
From: Mika Kuoppala @ 2019-01-09 12:54 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx; +Cc: Jani Nikula

Chris Wilson <chris@chris-wilson.co.uk> writes:

> Track the temporary wakerefs used within the selftests so that leaks are
> clear.
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Jani Nikula <jani.nikula@intel.com>
> ---
>  drivers/gpu/drm/i915/selftests/huge_pages.c   |  5 ++--
>  drivers/gpu/drm/i915/selftests/i915_gem.c     | 29 ++++++++++++-------
>  .../drm/i915/selftests/i915_gem_coherency.c   |  5 ++--
>  .../gpu/drm/i915/selftests/i915_gem_context.c | 27 ++++++++++-------
>  .../gpu/drm/i915/selftests/i915_gem_evict.c   | 11 ++++---
>  drivers/gpu/drm/i915/selftests/i915_gem_gtt.c | 10 ++++---
>  .../gpu/drm/i915/selftests/i915_gem_object.c  | 18 ++++++++----
>  drivers/gpu/drm/i915/selftests/i915_request.c | 22 ++++++++------
>  drivers/gpu/drm/i915/selftests/intel_guc.c    | 10 ++++---
>  .../gpu/drm/i915/selftests/intel_hangcheck.c  | 15 ++++++----
>  drivers/gpu/drm/i915/selftests/intel_lrc.c    | 25 +++++++++-------
>  .../drm/i915/selftests/intel_workarounds.c    | 27 ++++++++++-------
>  12 files changed, 126 insertions(+), 78 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/selftests/huge_pages.c b/drivers/gpu/drm/i915/selftests/huge_pages.c
> index 731dfd3d3fc8..c7a4599173bb 100644
> --- a/drivers/gpu/drm/i915/selftests/huge_pages.c
> +++ b/drivers/gpu/drm/i915/selftests/huge_pages.c
> @@ -1760,6 +1760,7 @@ int i915_gem_huge_page_live_selftests(struct drm_i915_private *dev_priv)
>  	};
>  	struct drm_file *file;
>  	struct i915_gem_context *ctx;
> +	intel_wakeref_t wakeref;
>  	int err;
>  
>  	if (!HAS_PPGTT(dev_priv)) {
> @@ -1775,7 +1776,7 @@ int i915_gem_huge_page_live_selftests(struct drm_i915_private *dev_priv)
>  		return PTR_ERR(file);
>  
>  	mutex_lock(&dev_priv->drm.struct_mutex);
> -	intel_runtime_pm_get(dev_priv);
> +	wakeref = intel_runtime_pm_get(dev_priv);
>  
>  	ctx = live_context(dev_priv, file);
>  	if (IS_ERR(ctx)) {
> @@ -1789,7 +1790,7 @@ int i915_gem_huge_page_live_selftests(struct drm_i915_private *dev_priv)
>  	err = i915_subtests(tests, ctx);
>  
>  out_unlock:
> -	intel_runtime_pm_put_unchecked(dev_priv);
> +	intel_runtime_pm_put(dev_priv, wakeref);
>  	mutex_unlock(&dev_priv->drm.struct_mutex);
>  
>  	mock_file_free(dev_priv, file);
> diff --git a/drivers/gpu/drm/i915/selftests/i915_gem.c b/drivers/gpu/drm/i915/selftests/i915_gem.c
> index 762e1a7125f5..01a46c46fe25 100644
> --- a/drivers/gpu/drm/i915/selftests/i915_gem.c
> +++ b/drivers/gpu/drm/i915/selftests/i915_gem.c
> @@ -16,9 +16,10 @@ static int switch_to_context(struct drm_i915_private *i915,
>  {
>  	struct intel_engine_cs *engine;
>  	enum intel_engine_id id;
> +	intel_wakeref_t wakeref;
>  	int err = 0;
>  
> -	intel_runtime_pm_get(i915);
> +	wakeref = intel_runtime_pm_get(i915);
>  
>  	for_each_engine(engine, i915, id) {
>  		struct i915_request *rq;
> @@ -32,7 +33,7 @@ static int switch_to_context(struct drm_i915_private *i915,
>  		i915_request_add(rq);
>  	}
>  
> -	intel_runtime_pm_put_unchecked(i915);
> +	intel_runtime_pm_put(i915, wakeref);
>  
>  	return err;
>  }
> @@ -65,7 +66,9 @@ static void trash_stolen(struct drm_i915_private *i915)
>  
>  static void simulate_hibernate(struct drm_i915_private *i915)
>  {
> -	intel_runtime_pm_get(i915);
> +	intel_wakeref_t wakeref;
> +
> +	wakeref = intel_runtime_pm_get(i915);
>  
>  	/*
>  	 * As a final sting in the tail, invalidate stolen. Under a real S4,
> @@ -76,7 +79,7 @@ static void simulate_hibernate(struct drm_i915_private *i915)
>  	 */
>  	trash_stolen(i915);
>  
> -	intel_runtime_pm_put_unchecked(i915);
> +	intel_runtime_pm_put(i915, wakeref);
>  }
>  
>  static int pm_prepare(struct drm_i915_private *i915)
> @@ -93,39 +96,45 @@ static int pm_prepare(struct drm_i915_private *i915)
>  
>  static void pm_suspend(struct drm_i915_private *i915)
>  {
> -	intel_runtime_pm_get(i915);
> +	intel_wakeref_t wakeref;
> +
> +	wakeref = intel_runtime_pm_get(i915);
>  
>  	i915_gem_suspend_gtt_mappings(i915);
>  	i915_gem_suspend_late(i915);
>  
> -	intel_runtime_pm_put_unchecked(i915);
> +	intel_runtime_pm_put(i915, wakeref);
>  }
>  
>  static void pm_hibernate(struct drm_i915_private *i915)
>  {
> -	intel_runtime_pm_get(i915);
> +	intel_wakeref_t wakeref;
> +
> +	wakeref = intel_runtime_pm_get(i915);
>  
>  	i915_gem_suspend_gtt_mappings(i915);
>  
>  	i915_gem_freeze(i915);
>  	i915_gem_freeze_late(i915);
>  
> -	intel_runtime_pm_put_unchecked(i915);
> +	intel_runtime_pm_put(i915, wakeref);
>  }
>  
>  static void pm_resume(struct drm_i915_private *i915)
>  {
> +	intel_wakeref_t wakeref;
> +
>  	/*
>  	 * Both suspend and hibernate follow the same wakeup path and assume
>  	 * that runtime-pm just works.
>  	 */
> -	intel_runtime_pm_get(i915);
> +	wakeref = intel_runtime_pm_get(i915);
>  
>  	intel_engines_sanitize(i915, false);
>  	i915_gem_sanitize(i915);
>  	i915_gem_resume(i915);
>  
> -	intel_runtime_pm_put_unchecked(i915);
> +	intel_runtime_pm_put(i915, wakeref);
>  }
>  
>  static int igt_gem_suspend(void *arg)
> diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_coherency.c b/drivers/gpu/drm/i915/selftests/i915_gem_coherency.c
> index eea4fc2445ae..fd89a5a33c1a 100644
> --- a/drivers/gpu/drm/i915/selftests/i915_gem_coherency.c
> +++ b/drivers/gpu/drm/i915/selftests/i915_gem_coherency.c
> @@ -279,6 +279,7 @@ static int igt_gem_coherency(void *arg)
>  	struct drm_i915_private *i915 = arg;
>  	const struct igt_coherency_mode *read, *write, *over;
>  	struct drm_i915_gem_object *obj;
> +	intel_wakeref_t wakeref;
>  	unsigned long count, n;
>  	u32 *offsets, *values;
>  	int err = 0;
> @@ -298,7 +299,7 @@ static int igt_gem_coherency(void *arg)
>  	values = offsets + ncachelines;
>  
>  	mutex_lock(&i915->drm.struct_mutex);
> -	intel_runtime_pm_get(i915);
> +	wakeref = intel_runtime_pm_get(i915);
>  	for (over = igt_coherency_mode; over->name; over++) {
>  		if (!over->set)
>  			continue;
> @@ -376,7 +377,7 @@ static int igt_gem_coherency(void *arg)
>  		}
>  	}
>  unlock:
> -	intel_runtime_pm_put_unchecked(i915);
> +	intel_runtime_pm_put(i915, wakeref);
>  	mutex_unlock(&i915->drm.struct_mutex);
>  	kfree(offsets);
>  	return err;
> diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/selftests/i915_gem_context.c
> index 6e1a0711d201..7a9b1f20b019 100644
> --- a/drivers/gpu/drm/i915/selftests/i915_gem_context.c
> +++ b/drivers/gpu/drm/i915/selftests/i915_gem_context.c
> @@ -119,6 +119,7 @@ static int live_nop_switch(void *arg)
>  	struct intel_engine_cs *engine;
>  	struct i915_gem_context **ctx;
>  	enum intel_engine_id id;
> +	intel_wakeref_t wakeref;
>  	struct drm_file *file;
>  	struct live_test t;
>  	unsigned long n;
> @@ -140,7 +141,7 @@ static int live_nop_switch(void *arg)
>  		return PTR_ERR(file);
>  
>  	mutex_lock(&i915->drm.struct_mutex);
> -	intel_runtime_pm_get(i915);
> +	wakeref = intel_runtime_pm_get(i915);
>  
>  	ctx = kcalloc(nctx, sizeof(*ctx), GFP_KERNEL);
>  	if (!ctx) {
> @@ -243,7 +244,7 @@ static int live_nop_switch(void *arg)
>  	}
>  
>  out_unlock:
> -	intel_runtime_pm_put_unchecked(i915);
> +	intel_runtime_pm_put(i915, wakeref);
>  	mutex_unlock(&i915->drm.struct_mutex);
>  	mock_file_free(i915, file);
>  	return err;
> @@ -593,6 +594,8 @@ static int igt_ctx_exec(void *arg)
>  		}
>  
>  		for_each_engine(engine, i915, id) {
> +			intel_wakeref_t wakeref;
> +
>  			if (!engine->context_size)
>  				continue; /* No logical context support in HW */
>  
> @@ -607,9 +610,9 @@ static int igt_ctx_exec(void *arg)
>  				}
>  			}
>  
> -			intel_runtime_pm_get(i915);
> +			wakeref = intel_runtime_pm_get(i915);
>  			err = gpu_fill(obj, ctx, engine, dw);
> -			intel_runtime_pm_put_unchecked(i915);
> +			intel_runtime_pm_put(i915, wakeref);
>  			if (err) {
>  				pr_err("Failed to fill dword %lu [%lu/%lu] with gpu (%s) in ctx %u [full-ppgtt? %s], err=%d\n",
>  				       ndwords, dw, max_dwords(obj),
> @@ -699,6 +702,8 @@ static int igt_ctx_readonly(void *arg)
>  		unsigned int id;
>  
>  		for_each_engine(engine, i915, id) {
> +			intel_wakeref_t wakeref;
> +
>  			if (!intel_engine_can_store_dword(engine))
>  				continue;
>  
> @@ -713,9 +718,9 @@ static int igt_ctx_readonly(void *arg)
>  					i915_gem_object_set_readonly(obj);
>  			}
>  
> -			intel_runtime_pm_get(i915);
> +			wakeref = intel_runtime_pm_get(i915);
>  			err = gpu_fill(obj, ctx, engine, dw);
> -			intel_runtime_pm_put_unchecked(i915);
> +			intel_runtime_pm_put(i915, wakeref);
>  			if (err) {
>  				pr_err("Failed to fill dword %lu [%lu/%lu] with gpu (%s) in ctx %u [full-ppgtt? %s], err=%d\n",
>  				       ndwords, dw, max_dwords(obj),
> @@ -976,6 +981,7 @@ static int igt_vm_isolation(void *arg)
>  	struct drm_i915_private *i915 = arg;
>  	struct i915_gem_context *ctx_a, *ctx_b;
>  	struct intel_engine_cs *engine;
> +	intel_wakeref_t wakeref;
>  	struct drm_file *file;
>  	I915_RND_STATE(prng);
>  	unsigned long count;
> @@ -1022,7 +1028,7 @@ static int igt_vm_isolation(void *arg)
>  	GEM_BUG_ON(ctx_b->ppgtt->vm.total != vm_total);
>  	vm_total -= I915_GTT_PAGE_SIZE;
>  
> -	intel_runtime_pm_get(i915);
> +	wakeref = intel_runtime_pm_get(i915);
>  
>  	count = 0;
>  	for_each_engine(engine, i915, id) {
> @@ -1067,7 +1073,7 @@ static int igt_vm_isolation(void *arg)
>  		count, RUNTIME_INFO(i915)->num_rings);
>  
>  out_rpm:
> -	intel_runtime_pm_put_unchecked(i915);
> +	intel_runtime_pm_put(i915, wakeref);
>  out_unlock:
>  	if (end_live_test(&t))
>  		err = -EIO;
> @@ -1165,6 +1171,7 @@ static int igt_switch_to_kernel_context(void *arg)
>  	struct intel_engine_cs *engine;
>  	struct i915_gem_context *ctx;
>  	enum intel_engine_id id;
> +	intel_wakeref_t wakeref;
>  	int err;
>  
>  	/*
> @@ -1175,7 +1182,7 @@ static int igt_switch_to_kernel_context(void *arg)
>  	 */
>  
>  	mutex_lock(&i915->drm.struct_mutex);
> -	intel_runtime_pm_get(i915);
> +	wakeref = intel_runtime_pm_get(i915);
>  
>  	ctx = kernel_context(i915);
>  	if (IS_ERR(ctx)) {
> @@ -1200,7 +1207,7 @@ static int igt_switch_to_kernel_context(void *arg)
>  	if (igt_flush_test(i915, I915_WAIT_LOCKED))
>  		err = -EIO;
>  
> -	intel_runtime_pm_put_unchecked(i915);
> +	intel_runtime_pm_put(i915, wakeref);
>  	mutex_unlock(&i915->drm.struct_mutex);
>  
>  	kernel_context_close(ctx);
> diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_evict.c b/drivers/gpu/drm/i915/selftests/i915_gem_evict.c
> index 8d22f73a9b63..e1ff6a1c2cb0 100644
> --- a/drivers/gpu/drm/i915/selftests/i915_gem_evict.c
> +++ b/drivers/gpu/drm/i915/selftests/i915_gem_evict.c
> @@ -336,6 +336,7 @@ static int igt_evict_contexts(void *arg)
>  		struct drm_mm_node node;
>  		struct reserved *next;
>  	} *reserved = NULL;
> +	intel_wakeref_t wakeref;
>  	struct drm_mm_node hole;
>  	unsigned long count;
>  	int err;
> @@ -355,7 +356,7 @@ static int igt_evict_contexts(void *arg)
>  		return 0;
>  
>  	mutex_lock(&i915->drm.struct_mutex);
> -	intel_runtime_pm_get(i915);
> +	wakeref = intel_runtime_pm_get(i915);
>  
>  	/* Reserve a block so that we know we have enough to fit a few rq */
>  	memset(&hole, 0, sizeof(hole));
> @@ -400,8 +401,10 @@ static int igt_evict_contexts(void *arg)
>  		struct drm_file *file;
>  
>  		file = mock_file(i915);
> -		if (IS_ERR(file))
> -			return PTR_ERR(file);
> +		if (IS_ERR(file)) {
> +			err = PTR_ERR(file);
> +			break;
> +		}

Among the repetitions, this woke me up.
Looked like we break with mutex held but we don't.

And the drm mm node removal seems to cope
aswell.

Reviewed-by: Mika Kuoppala <mika.kuoppala@linux.intel.com>

>  
>  		count = 0;
>  		mutex_lock(&i915->drm.struct_mutex);
> @@ -464,7 +467,7 @@ static int igt_evict_contexts(void *arg)
>  	}
>  	if (drm_mm_node_allocated(&hole))
>  		drm_mm_remove_node(&hole);
> -	intel_runtime_pm_put_unchecked(i915);
> +	intel_runtime_pm_put(i915, wakeref);
>  	mutex_unlock(&i915->drm.struct_mutex);
>  
>  	return err;
> diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c
> index 87cb0602a5fc..fea8ab14e79d 100644
> --- a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c
> +++ b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c
> @@ -275,6 +275,7 @@ static int lowlevel_hole(struct drm_i915_private *i915,
>  
>  		for (n = 0; n < count; n++) {
>  			u64 addr = hole_start + order[n] * BIT_ULL(size);
> +			intel_wakeref_t wakeref;
>  
>  			GEM_BUG_ON(addr + BIT_ULL(size) > vm->total);
>  
> @@ -293,9 +294,9 @@ static int lowlevel_hole(struct drm_i915_private *i915,
>  			mock_vma.node.size = BIT_ULL(size);
>  			mock_vma.node.start = addr;
>  
> -			intel_runtime_pm_get(i915);
> +			wakeref = intel_runtime_pm_get(i915);
>  			vm->insert_entries(vm, &mock_vma, I915_CACHE_NONE, 0);
> -			intel_runtime_pm_put_unchecked(i915);
> +			intel_runtime_pm_put(i915, wakeref);
>  		}
>  		count = n;
>  
> @@ -1144,6 +1145,7 @@ static int igt_ggtt_page(void *arg)
>  	struct drm_i915_private *i915 = arg;
>  	struct i915_ggtt *ggtt = &i915->ggtt;
>  	struct drm_i915_gem_object *obj;
> +	intel_wakeref_t wakeref;
>  	struct drm_mm_node tmp;
>  	unsigned int *order, n;
>  	int err;
> @@ -1169,7 +1171,7 @@ static int igt_ggtt_page(void *arg)
>  	if (err)
>  		goto out_unpin;
>  
> -	intel_runtime_pm_get(i915);
> +	wakeref = intel_runtime_pm_get(i915);
>  
>  	for (n = 0; n < count; n++) {
>  		u64 offset = tmp.start + n * PAGE_SIZE;
> @@ -1216,7 +1218,7 @@ static int igt_ggtt_page(void *arg)
>  	kfree(order);
>  out_remove:
>  	ggtt->vm.clear_range(&ggtt->vm, tmp.start, tmp.size);
> -	intel_runtime_pm_put_unchecked(i915);
> +	intel_runtime_pm_put(i915, wakeref);
>  	drm_mm_remove_node(&tmp);
>  out_unpin:
>  	i915_gem_object_unpin_pages(obj);
> diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_object.c b/drivers/gpu/drm/i915/selftests/i915_gem_object.c
> index b03890c590d7..3575e1387c3f 100644
> --- a/drivers/gpu/drm/i915/selftests/i915_gem_object.c
> +++ b/drivers/gpu/drm/i915/selftests/i915_gem_object.c
> @@ -308,6 +308,7 @@ static int igt_partial_tiling(void *arg)
>  	const unsigned int nreal = 1 << 12; /* largest tile row x2 */
>  	struct drm_i915_private *i915 = arg;
>  	struct drm_i915_gem_object *obj;
> +	intel_wakeref_t wakeref;
>  	int tiling;
>  	int err;
>  
> @@ -333,7 +334,7 @@ static int igt_partial_tiling(void *arg)
>  	}
>  
>  	mutex_lock(&i915->drm.struct_mutex);
> -	intel_runtime_pm_get(i915);
> +	wakeref = intel_runtime_pm_get(i915);
>  
>  	if (1) {
>  		IGT_TIMEOUT(end);
> @@ -444,7 +445,7 @@ next_tiling: ;
>  	}
>  
>  out_unlock:
> -	intel_runtime_pm_put_unchecked(i915);
> +	intel_runtime_pm_put(i915, wakeref);
>  	mutex_unlock(&i915->drm.struct_mutex);
>  	i915_gem_object_unpin_pages(obj);
>  out:
> @@ -506,11 +507,14 @@ static void disable_retire_worker(struct drm_i915_private *i915)
>  
>  	mutex_lock(&i915->drm.struct_mutex);
>  	if (!i915->gt.active_requests++) {
> -		intel_runtime_pm_get(i915);
> +		intel_wakeref_t wakeref;
> +
> +		wakeref = intel_runtime_pm_get(i915);
>  		i915_gem_unpark(i915);
> -		intel_runtime_pm_put_unchecked(i915);
> +		intel_runtime_pm_put(i915, wakeref);
>  	}
>  	mutex_unlock(&i915->drm.struct_mutex);
> +
>  	cancel_delayed_work_sync(&i915->gt.retire_work);
>  	cancel_delayed_work_sync(&i915->gt.idle_work);
>  }
> @@ -578,6 +582,8 @@ static int igt_mmap_offset_exhaustion(void *arg)
>  
>  	/* Now fill with busy dead objects that we expect to reap */
>  	for (loop = 0; loop < 3; loop++) {
> +		intel_wakeref_t wakeref;
> +
>  		if (i915_terminally_wedged(&i915->gpu_error))
>  			break;
>  
> @@ -588,9 +594,9 @@ static int igt_mmap_offset_exhaustion(void *arg)
>  		}
>  
>  		mutex_lock(&i915->drm.struct_mutex);
> -		intel_runtime_pm_get(i915);
> +		wakeref = intel_runtime_pm_get(i915);
>  		err = make_obj_busy(obj);
> -		intel_runtime_pm_put_unchecked(i915);
> +		intel_runtime_pm_put(i915, wakeref);
>  		mutex_unlock(&i915->drm.struct_mutex);
>  		if (err) {
>  			pr_err("[loop %d] Failed to busy the object\n", loop);
> diff --git a/drivers/gpu/drm/i915/selftests/i915_request.c b/drivers/gpu/drm/i915/selftests/i915_request.c
> index e8880cabd5c7..8b73a8c21377 100644
> --- a/drivers/gpu/drm/i915/selftests/i915_request.c
> +++ b/drivers/gpu/drm/i915/selftests/i915_request.c
> @@ -332,6 +332,7 @@ static int live_nop_request(void *arg)
>  {
>  	struct drm_i915_private *i915 = arg;
>  	struct intel_engine_cs *engine;
> +	intel_wakeref_t wakeref;
>  	struct live_test t;
>  	unsigned int id;
>  	int err = -ENODEV;
> @@ -342,7 +343,7 @@ static int live_nop_request(void *arg)
>  	 */
>  
>  	mutex_lock(&i915->drm.struct_mutex);
> -	intel_runtime_pm_get(i915);
> +	wakeref = intel_runtime_pm_get(i915);
>  
>  	for_each_engine(engine, i915, id) {
>  		struct i915_request *request = NULL;
> @@ -403,7 +404,7 @@ static int live_nop_request(void *arg)
>  	}
>  
>  out_unlock:
> -	intel_runtime_pm_put_unchecked(i915);
> +	intel_runtime_pm_put(i915, wakeref);
>  	mutex_unlock(&i915->drm.struct_mutex);
>  	return err;
>  }
> @@ -478,8 +479,9 @@ static int live_empty_request(void *arg)
>  {
>  	struct drm_i915_private *i915 = arg;
>  	struct intel_engine_cs *engine;
> -	struct live_test t;
> +	intel_wakeref_t wakeref;
>  	struct i915_vma *batch;
> +	struct live_test t;
>  	unsigned int id;
>  	int err = 0;
>  
> @@ -489,7 +491,7 @@ static int live_empty_request(void *arg)
>  	 */
>  
>  	mutex_lock(&i915->drm.struct_mutex);
> -	intel_runtime_pm_get(i915);
> +	wakeref = intel_runtime_pm_get(i915);
>  
>  	batch = empty_batch(i915);
>  	if (IS_ERR(batch)) {
> @@ -553,7 +555,7 @@ static int live_empty_request(void *arg)
>  	i915_vma_unpin(batch);
>  	i915_vma_put(batch);
>  out_unlock:
> -	intel_runtime_pm_put_unchecked(i915);
> +	intel_runtime_pm_put(i915, wakeref);
>  	mutex_unlock(&i915->drm.struct_mutex);
>  	return err;
>  }
> @@ -637,6 +639,7 @@ static int live_all_engines(void *arg)
>  	struct drm_i915_private *i915 = arg;
>  	struct intel_engine_cs *engine;
>  	struct i915_request *request[I915_NUM_ENGINES];
> +	intel_wakeref_t wakeref;
>  	struct i915_vma *batch;
>  	struct live_test t;
>  	unsigned int id;
> @@ -648,7 +651,7 @@ static int live_all_engines(void *arg)
>  	 */
>  
>  	mutex_lock(&i915->drm.struct_mutex);
> -	intel_runtime_pm_get(i915);
> +	wakeref = intel_runtime_pm_get(i915);
>  
>  	err = begin_live_test(&t, i915, __func__, "");
>  	if (err)
> @@ -731,7 +734,7 @@ static int live_all_engines(void *arg)
>  	i915_vma_unpin(batch);
>  	i915_vma_put(batch);
>  out_unlock:
> -	intel_runtime_pm_put_unchecked(i915);
> +	intel_runtime_pm_put(i915, wakeref);
>  	mutex_unlock(&i915->drm.struct_mutex);
>  	return err;
>  }
> @@ -742,6 +745,7 @@ static int live_sequential_engines(void *arg)
>  	struct i915_request *request[I915_NUM_ENGINES] = {};
>  	struct i915_request *prev = NULL;
>  	struct intel_engine_cs *engine;
> +	intel_wakeref_t wakeref;
>  	struct live_test t;
>  	unsigned int id;
>  	int err;
> @@ -753,7 +757,7 @@ static int live_sequential_engines(void *arg)
>  	 */
>  
>  	mutex_lock(&i915->drm.struct_mutex);
> -	intel_runtime_pm_get(i915);
> +	wakeref = intel_runtime_pm_get(i915);
>  
>  	err = begin_live_test(&t, i915, __func__, "");
>  	if (err)
> @@ -860,7 +864,7 @@ static int live_sequential_engines(void *arg)
>  		i915_request_put(request[id]);
>  	}
>  out_unlock:
> -	intel_runtime_pm_put_unchecked(i915);
> +	intel_runtime_pm_put(i915, wakeref);
>  	mutex_unlock(&i915->drm.struct_mutex);
>  	return err;
>  }
> diff --git a/drivers/gpu/drm/i915/selftests/intel_guc.c b/drivers/gpu/drm/i915/selftests/intel_guc.c
> index 3590ba3d8897..c5e0a0e98fcb 100644
> --- a/drivers/gpu/drm/i915/selftests/intel_guc.c
> +++ b/drivers/gpu/drm/i915/selftests/intel_guc.c
> @@ -137,12 +137,13 @@ static bool client_doorbell_in_sync(struct intel_guc_client *client)
>  static int igt_guc_clients(void *args)
>  {
>  	struct drm_i915_private *dev_priv = args;
> +	intel_wakeref_t wakeref;
>  	struct intel_guc *guc;
>  	int err = 0;
>  
>  	GEM_BUG_ON(!HAS_GUC(dev_priv));
>  	mutex_lock(&dev_priv->drm.struct_mutex);
> -	intel_runtime_pm_get(dev_priv);
> +	wakeref = intel_runtime_pm_get(dev_priv);
>  
>  	guc = &dev_priv->guc;
>  	if (!guc) {
> @@ -225,7 +226,7 @@ static int igt_guc_clients(void *args)
>  	guc_clients_create(guc);
>  	guc_clients_enable(guc);
>  unlock:
> -	intel_runtime_pm_put_unchecked(dev_priv);
> +	intel_runtime_pm_put(dev_priv, wakeref);
>  	mutex_unlock(&dev_priv->drm.struct_mutex);
>  	return err;
>  }
> @@ -238,13 +239,14 @@ static int igt_guc_clients(void *args)
>  static int igt_guc_doorbells(void *arg)
>  {
>  	struct drm_i915_private *dev_priv = arg;
> +	intel_wakeref_t wakeref;
>  	struct intel_guc *guc;
>  	int i, err = 0;
>  	u16 db_id;
>  
>  	GEM_BUG_ON(!HAS_GUC(dev_priv));
>  	mutex_lock(&dev_priv->drm.struct_mutex);
> -	intel_runtime_pm_get(dev_priv);
> +	wakeref = intel_runtime_pm_get(dev_priv);
>  
>  	guc = &dev_priv->guc;
>  	if (!guc) {
> @@ -337,7 +339,7 @@ static int igt_guc_doorbells(void *arg)
>  			guc_client_free(clients[i]);
>  		}
>  unlock:
> -	intel_runtime_pm_put_unchecked(dev_priv);
> +	intel_runtime_pm_put(dev_priv, wakeref);
>  	mutex_unlock(&dev_priv->drm.struct_mutex);
>  	return err;
>  }
> diff --git a/drivers/gpu/drm/i915/selftests/intel_hangcheck.c b/drivers/gpu/drm/i915/selftests/intel_hangcheck.c
> index 33bd3c4b6fa3..12550b55c42f 100644
> --- a/drivers/gpu/drm/i915/selftests/intel_hangcheck.c
> +++ b/drivers/gpu/drm/i915/selftests/intel_hangcheck.c
> @@ -388,12 +388,13 @@ static int igt_global_reset(void *arg)
>  static int igt_wedged_reset(void *arg)
>  {
>  	struct drm_i915_private *i915 = arg;
> +	intel_wakeref_t wakeref;
>  
>  	/* Check that we can recover a wedged device with a GPU reset */
>  
>  	igt_global_reset_lock(i915);
>  	mutex_lock(&i915->drm.struct_mutex);
> -	intel_runtime_pm_get(i915);
> +	wakeref = intel_runtime_pm_get(i915);
>  
>  	i915_gem_set_wedged(i915);
>  	GEM_BUG_ON(!i915_terminally_wedged(&i915->gpu_error));
> @@ -402,7 +403,7 @@ static int igt_wedged_reset(void *arg)
>  	i915_reset(i915, ALL_ENGINES, NULL);
>  	GEM_BUG_ON(test_bit(I915_RESET_HANDOFF, &i915->gpu_error.flags));
>  
> -	intel_runtime_pm_put_unchecked(i915);
> +	intel_runtime_pm_put(i915, wakeref);
>  	mutex_unlock(&i915->drm.struct_mutex);
>  	igt_global_reset_unlock(i915);
>  
> @@ -1600,6 +1601,7 @@ static int igt_atomic_reset(void *arg)
>  		{ }
>  	};
>  	struct drm_i915_private *i915 = arg;
> +	intel_wakeref_t wakeref;
>  	int err = 0;
>  
>  	/* Check that the resets are usable from atomic context */
> @@ -1609,7 +1611,7 @@ static int igt_atomic_reset(void *arg)
>  
>  	igt_global_reset_lock(i915);
>  	mutex_lock(&i915->drm.struct_mutex);
> -	intel_runtime_pm_get(i915);
> +	wakeref = intel_runtime_pm_get(i915);
>  
>  	/* Flush any requests before we get started and check basics */
>  	force_reset(i915);
> @@ -1636,7 +1638,7 @@ static int igt_atomic_reset(void *arg)
>  	force_reset(i915);
>  
>  unlock:
> -	intel_runtime_pm_put_unchecked(i915);
> +	intel_runtime_pm_put(i915, wakeref);
>  	mutex_unlock(&i915->drm.struct_mutex);
>  	igt_global_reset_unlock(i915);
>  
> @@ -1660,6 +1662,7 @@ int intel_hangcheck_live_selftests(struct drm_i915_private *i915)
>  		SUBTEST(igt_handle_error),
>  		SUBTEST(igt_atomic_reset),
>  	};
> +	intel_wakeref_t wakeref;
>  	bool saved_hangcheck;
>  	int err;
>  
> @@ -1669,7 +1672,7 @@ int intel_hangcheck_live_selftests(struct drm_i915_private *i915)
>  	if (i915_terminally_wedged(&i915->gpu_error))
>  		return -EIO; /* we're long past hope of a successful reset */
>  
> -	intel_runtime_pm_get(i915);
> +	wakeref = intel_runtime_pm_get(i915);
>  	saved_hangcheck = fetch_and_zero(&i915_modparams.enable_hangcheck);
>  
>  	err = i915_subtests(tests, i915);
> @@ -1679,7 +1682,7 @@ int intel_hangcheck_live_selftests(struct drm_i915_private *i915)
>  	mutex_unlock(&i915->drm.struct_mutex);
>  
>  	i915_modparams.enable_hangcheck = saved_hangcheck;
> -	intel_runtime_pm_put_unchecked(i915);
> +	intel_runtime_pm_put(i915, wakeref);
>  
>  	return err;
>  }
> diff --git a/drivers/gpu/drm/i915/selftests/intel_lrc.c b/drivers/gpu/drm/i915/selftests/intel_lrc.c
> index ac1b18a17f3c..e6073cd4719c 100644
> --- a/drivers/gpu/drm/i915/selftests/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/selftests/intel_lrc.c
> @@ -18,13 +18,14 @@ static int live_sanitycheck(void *arg)
>  	struct i915_gem_context *ctx;
>  	enum intel_engine_id id;
>  	struct igt_spinner spin;
> +	intel_wakeref_t wakeref;
>  	int err = -ENOMEM;
>  
>  	if (!HAS_LOGICAL_RING_CONTEXTS(i915))
>  		return 0;
>  
>  	mutex_lock(&i915->drm.struct_mutex);
> -	intel_runtime_pm_get(i915);
> +	wakeref = intel_runtime_pm_get(i915);
>  
>  	if (igt_spinner_init(&spin, i915))
>  		goto err_unlock;
> @@ -65,7 +66,7 @@ static int live_sanitycheck(void *arg)
>  	igt_spinner_fini(&spin);
>  err_unlock:
>  	igt_flush_test(i915, I915_WAIT_LOCKED);
> -	intel_runtime_pm_put_unchecked(i915);
> +	intel_runtime_pm_put(i915, wakeref);
>  	mutex_unlock(&i915->drm.struct_mutex);
>  	return err;
>  }
> @@ -77,13 +78,14 @@ static int live_preempt(void *arg)
>  	struct igt_spinner spin_hi, spin_lo;
>  	struct intel_engine_cs *engine;
>  	enum intel_engine_id id;
> +	intel_wakeref_t wakeref;
>  	int err = -ENOMEM;
>  
>  	if (!HAS_LOGICAL_RING_PREEMPTION(i915))
>  		return 0;
>  
>  	mutex_lock(&i915->drm.struct_mutex);
> -	intel_runtime_pm_get(i915);
> +	wakeref = intel_runtime_pm_get(i915);
>  
>  	if (igt_spinner_init(&spin_hi, i915))
>  		goto err_unlock;
> @@ -158,7 +160,7 @@ static int live_preempt(void *arg)
>  	igt_spinner_fini(&spin_hi);
>  err_unlock:
>  	igt_flush_test(i915, I915_WAIT_LOCKED);
> -	intel_runtime_pm_put_unchecked(i915);
> +	intel_runtime_pm_put(i915, wakeref);
>  	mutex_unlock(&i915->drm.struct_mutex);
>  	return err;
>  }
> @@ -171,13 +173,14 @@ static int live_late_preempt(void *arg)
>  	struct intel_engine_cs *engine;
>  	struct i915_sched_attr attr = {};
>  	enum intel_engine_id id;
> +	intel_wakeref_t wakeref;
>  	int err = -ENOMEM;
>  
>  	if (!HAS_LOGICAL_RING_PREEMPTION(i915))
>  		return 0;
>  
>  	mutex_lock(&i915->drm.struct_mutex);
> -	intel_runtime_pm_get(i915);
> +	wakeref = intel_runtime_pm_get(i915);
>  
>  	if (igt_spinner_init(&spin_hi, i915))
>  		goto err_unlock;
> @@ -251,7 +254,7 @@ static int live_late_preempt(void *arg)
>  	igt_spinner_fini(&spin_hi);
>  err_unlock:
>  	igt_flush_test(i915, I915_WAIT_LOCKED);
> -	intel_runtime_pm_put_unchecked(i915);
> +	intel_runtime_pm_put(i915, wakeref);
>  	mutex_unlock(&i915->drm.struct_mutex);
>  	return err;
>  
> @@ -270,6 +273,7 @@ static int live_preempt_hang(void *arg)
>  	struct igt_spinner spin_hi, spin_lo;
>  	struct intel_engine_cs *engine;
>  	enum intel_engine_id id;
> +	intel_wakeref_t wakeref;
>  	int err = -ENOMEM;
>  
>  	if (!HAS_LOGICAL_RING_PREEMPTION(i915))
> @@ -279,7 +283,7 @@ static int live_preempt_hang(void *arg)
>  		return 0;
>  
>  	mutex_lock(&i915->drm.struct_mutex);
> -	intel_runtime_pm_get(i915);
> +	wakeref = intel_runtime_pm_get(i915);
>  
>  	if (igt_spinner_init(&spin_hi, i915))
>  		goto err_unlock;
> @@ -374,7 +378,7 @@ static int live_preempt_hang(void *arg)
>  	igt_spinner_fini(&spin_hi);
>  err_unlock:
>  	igt_flush_test(i915, I915_WAIT_LOCKED);
> -	intel_runtime_pm_put_unchecked(i915);
> +	intel_runtime_pm_put(i915, wakeref);
>  	mutex_unlock(&i915->drm.struct_mutex);
>  	return err;
>  }
> @@ -562,6 +566,7 @@ static int live_preempt_smoke(void *arg)
>  		.ncontext = 1024,
>  	};
>  	const unsigned int phase[] = { 0, BATCH };
> +	intel_wakeref_t wakeref;
>  	int err = -ENOMEM;
>  	u32 *cs;
>  	int n;
> @@ -576,7 +581,7 @@ static int live_preempt_smoke(void *arg)
>  		return -ENOMEM;
>  
>  	mutex_lock(&smoke.i915->drm.struct_mutex);
> -	intel_runtime_pm_get(smoke.i915);
> +	wakeref = intel_runtime_pm_get(smoke.i915);
>  
>  	smoke.batch = i915_gem_object_create_internal(smoke.i915, PAGE_SIZE);
>  	if (IS_ERR(smoke.batch)) {
> @@ -627,7 +632,7 @@ static int live_preempt_smoke(void *arg)
>  err_batch:
>  	i915_gem_object_put(smoke.batch);
>  err_unlock:
> -	intel_runtime_pm_put_unchecked(smoke.i915);
> +	intel_runtime_pm_put(smoke.i915, wakeref);
>  	mutex_unlock(&smoke.i915->drm.struct_mutex);
>  	kfree(smoke.contexts);
>  
> diff --git a/drivers/gpu/drm/i915/selftests/intel_workarounds.c b/drivers/gpu/drm/i915/selftests/intel_workarounds.c
> index 54f5c2de3d08..47e62e1999a9 100644
> --- a/drivers/gpu/drm/i915/selftests/intel_workarounds.c
> +++ b/drivers/gpu/drm/i915/selftests/intel_workarounds.c
> @@ -15,10 +15,11 @@
>  static struct drm_i915_gem_object *
>  read_nonprivs(struct i915_gem_context *ctx, struct intel_engine_cs *engine)
>  {
> +	const u32 base = engine->mmio_base;
>  	struct drm_i915_gem_object *result;
> +	intel_wakeref_t wakeref;
>  	struct i915_request *rq;
>  	struct i915_vma *vma;
> -	const u32 base = engine->mmio_base;
>  	u32 srm, *cs;
>  	int err;
>  	int i;
> @@ -47,9 +48,9 @@ read_nonprivs(struct i915_gem_context *ctx, struct intel_engine_cs *engine)
>  	if (err)
>  		goto err_obj;
>  
> -	intel_runtime_pm_get(engine->i915);
> +	wakeref = intel_runtime_pm_get(engine->i915);
>  	rq = i915_request_alloc(engine, ctx);
> -	intel_runtime_pm_put_unchecked(engine->i915);
> +	intel_runtime_pm_put(engine->i915, wakeref);
>  	if (IS_ERR(rq)) {
>  		err = PTR_ERR(rq);
>  		goto err_pin;
> @@ -183,20 +184,21 @@ switch_to_scratch_context(struct intel_engine_cs *engine,
>  {
>  	struct i915_gem_context *ctx;
>  	struct i915_request *rq;
> +	intel_wakeref_t wakeref;
>  	int err = 0;
>  
>  	ctx = kernel_context(engine->i915);
>  	if (IS_ERR(ctx))
>  		return PTR_ERR(ctx);
>  
> -	intel_runtime_pm_get(engine->i915);
> +	wakeref = intel_runtime_pm_get(engine->i915);
>  
>  	if (spin)
>  		rq = igt_spinner_create_request(spin, ctx, engine, MI_NOOP);
>  	else
>  		rq = i915_request_alloc(engine, ctx);
>  
> -	intel_runtime_pm_put_unchecked(engine->i915);
> +	intel_runtime_pm_put(engine->i915, wakeref);
>  
>  	kernel_context_close(ctx);
>  
> @@ -228,6 +230,7 @@ static int check_whitelist_across_reset(struct intel_engine_cs *engine,
>  	bool want_spin = reset == do_engine_reset;
>  	struct i915_gem_context *ctx;
>  	struct igt_spinner spin;
> +	intel_wakeref_t wakeref;
>  	int err;
>  
>  	pr_info("Checking %d whitelisted registers (RING_NONPRIV) [%s]\n",
> @@ -253,9 +256,9 @@ static int check_whitelist_across_reset(struct intel_engine_cs *engine,
>  	if (err)
>  		goto out;
>  
> -	intel_runtime_pm_get(i915);
> +	wakeref = intel_runtime_pm_get(i915);
>  	err = reset(engine);
> -	intel_runtime_pm_put_unchecked(i915);
> +	intel_runtime_pm_put(i915, wakeref);
>  
>  	if (want_spin) {
>  		igt_spinner_end(&spin);
> @@ -344,6 +347,7 @@ live_gpu_reset_gt_engine_workarounds(void *arg)
>  {
>  	struct drm_i915_private *i915 = arg;
>  	struct i915_gpu_error *error = &i915->gpu_error;
> +	intel_wakeref_t wakeref;
>  	bool ok;
>  
>  	if (!intel_has_gpu_reset(i915))
> @@ -352,7 +356,7 @@ live_gpu_reset_gt_engine_workarounds(void *arg)
>  	pr_info("Verifying after GPU reset...\n");
>  
>  	igt_global_reset_lock(i915);
> -	intel_runtime_pm_get(i915);
> +	wakeref = intel_runtime_pm_get(i915);
>  
>  	ok = verify_gt_engine_wa(i915, "before reset");
>  	if (!ok)
> @@ -364,7 +368,7 @@ live_gpu_reset_gt_engine_workarounds(void *arg)
>  	ok = verify_gt_engine_wa(i915, "after reset");
>  
>  out:
> -	intel_runtime_pm_put_unchecked(i915);
> +	intel_runtime_pm_put(i915, wakeref);
>  	igt_global_reset_unlock(i915);
>  
>  	return ok ? 0 : -ESRCH;
> @@ -379,6 +383,7 @@ live_engine_reset_gt_engine_workarounds(void *arg)
>  	struct igt_spinner spin;
>  	enum intel_engine_id id;
>  	struct i915_request *rq;
> +	intel_wakeref_t wakeref;
>  	int ret = 0;
>  
>  	if (!intel_has_reset_engine(i915))
> @@ -389,7 +394,7 @@ live_engine_reset_gt_engine_workarounds(void *arg)
>  		return PTR_ERR(ctx);
>  
>  	igt_global_reset_lock(i915);
> -	intel_runtime_pm_get(i915);
> +	wakeref = intel_runtime_pm_get(i915);
>  
>  	for_each_engine(engine, i915, id) {
>  		bool ok;
> @@ -443,7 +448,7 @@ live_engine_reset_gt_engine_workarounds(void *arg)
>  	}
>  
>  err:
> -	intel_runtime_pm_put_unchecked(i915);
> +	intel_runtime_pm_put(i915, wakeref);
>  	igt_global_reset_unlock(i915);
>  	kernel_context_close(ctx);
>  
> -- 
> 2.20.1
>
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/intel-gfx
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 111+ messages in thread

* Re: [PATCH 17/46] drm/i915: Syntatic sugar for using intel_runtime_pm
  2019-01-07 11:54 ` [PATCH 17/46] drm/i915: Syntatic sugar for using intel_runtime_pm Chris Wilson
@ 2019-01-09 14:30   ` Mika Kuoppala
  2019-01-10  0:24   ` John Harrison
  1 sibling, 0 replies; 111+ messages in thread
From: Mika Kuoppala @ 2019-01-09 14:30 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx; +Cc: Jani Nikula

Chris Wilson <chris@chris-wilson.co.uk> writes:

> Frequently, we use intel_runtime_pm_get/_put around a small block.
> Formalise that usage by providing a macro to define such a block with an
> automatic closure to scope the intel_runtime_pm wakeref to that block,
> i.e. macro abuse smelling of python.
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Jani Nikula <jani.nikula@intel.com>
> ---
>  drivers/gpu/drm/i915/i915_debugfs.c           | 162 ++++++++----------
>  drivers/gpu/drm/i915/i915_gem.c               |  10 +-
>  drivers/gpu/drm/i915/i915_gem_gtt.c           |  23 ++-
>  drivers/gpu/drm/i915/i915_gem_shrinker.c      |  51 +++---
>  drivers/gpu/drm/i915/i915_pmu.c               |   7 +-
>  drivers/gpu/drm/i915/i915_sysfs.c             |   7 +-
>  drivers/gpu/drm/i915/intel_drv.h              |   8 +
>  drivers/gpu/drm/i915/intel_guc_log.c          |  26 ++-
>  drivers/gpu/drm/i915/intel_huc.c              |   7 +-
>  drivers/gpu/drm/i915/intel_panel.c            |  18 +-
>  drivers/gpu/drm/i915/intel_uncore.c           |  30 ++--
>  drivers/gpu/drm/i915/selftests/i915_gem.c     |  34 ++--
>  .../gpu/drm/i915/selftests/i915_gem_context.c |  12 +-
>  .../gpu/drm/i915/selftests/i915_gem_object.c  |  11 +-
>  .../drm/i915/selftests/intel_workarounds.c    |  28 +--
>  15 files changed, 203 insertions(+), 231 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
> index d667b05e7ca4..1521e08861d1 100644
> --- a/drivers/gpu/drm/i915/i915_debugfs.c
> +++ b/drivers/gpu/drm/i915/i915_debugfs.c
> @@ -953,9 +953,9 @@ static int i915_gpu_info_open(struct inode *inode, struct file *file)
>  	struct i915_gpu_state *gpu;
>  	intel_wakeref_t wakeref;
>  
> -	wakeref = intel_runtime_pm_get(i915);
> -	gpu = i915_capture_gpu_state(i915);
> -	intel_runtime_pm_put(i915, wakeref);
> +	gpu = NULL;
> +	with_intel_runtime_pm(i915, wakeref)
> +		gpu = i915_capture_gpu_state(i915);
>  	if (IS_ERR(gpu))
>  		return PTR_ERR(gpu);
>  
> @@ -1287,17 +1287,15 @@ static int i915_hangcheck_info(struct seq_file *m, void *unused)
>  		return 0;
>  	}
>  
> -	wakeref = intel_runtime_pm_get(dev_priv);
> +	with_intel_runtime_pm(dev_priv, wakeref) {
> +		for_each_engine(engine, dev_priv, id) {
> +			acthd[id] = intel_engine_get_active_head(engine);
> +			seqno[id] = intel_engine_get_seqno(engine);
> +		}
>  
> -	for_each_engine(engine, dev_priv, id) {
> -		acthd[id] = intel_engine_get_active_head(engine);
> -		seqno[id] = intel_engine_get_seqno(engine);
> +		intel_engine_get_instdone(dev_priv->engine[RCS], &instdone);
>  	}
>  
> -	intel_engine_get_instdone(dev_priv->engine[RCS], &instdone);
> -
> -	intel_runtime_pm_put(dev_priv, wakeref);
> -
>  	if (timer_pending(&dev_priv->gpu_error.hangcheck_work.timer))
>  		seq_printf(m, "Hangcheck active, timer fires in %dms\n",
>  			   jiffies_to_msecs(dev_priv->gpu_error.hangcheck_work.timer.expires -
> @@ -1573,18 +1571,16 @@ static int i915_drpc_info(struct seq_file *m, void *unused)
>  {
>  	struct drm_i915_private *dev_priv = node_to_i915(m->private);
>  	intel_wakeref_t wakeref;
> -	int err;
> -
> -	wakeref = intel_runtime_pm_get(dev_priv);
> -
> -	if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv))
> -		err = vlv_drpc_info(m);
> -	else if (INTEL_GEN(dev_priv) >= 6)
> -		err = gen6_drpc_info(m);
> -	else
> -		err = ironlake_drpc_info(m);
> +	int err = -ENODEV;
>  
> -	intel_runtime_pm_put(dev_priv, wakeref);
> +	with_intel_runtime_pm(dev_priv, wakeref) {
> +		if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv))
> +			err = vlv_drpc_info(m);
> +		else if (INTEL_GEN(dev_priv) >= 6)
> +			err = gen6_drpc_info(m);
> +		else
> +			err = ironlake_drpc_info(m);
> +	}
>  
>  	return err;
>  }
> @@ -2068,8 +2064,7 @@ static int i915_rps_boost_info(struct seq_file *m, void *data)
>  	intel_wakeref_t wakeref;
>  	struct drm_file *file;
>  
> -	wakeref = intel_runtime_pm_get_if_in_use(dev_priv);
> -	if (wakeref) {
> +	with_intel_runtime_pm_if_in_use(dev_priv, wakeref) {
>  		if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) {
>  			mutex_lock(&dev_priv->pcu_lock);
>  			act_freq = vlv_punit_read(dev_priv,
> @@ -2080,7 +2075,6 @@ static int i915_rps_boost_info(struct seq_file *m, void *data)
>  			act_freq = intel_get_cagf(dev_priv,
>  						  I915_READ(GEN6_RPSTAT1));
>  		}
> -		intel_runtime_pm_put(dev_priv, wakeref);
>  	}
>  
>  	seq_printf(m, "RPS enabled? %d\n", rps->enabled);
> @@ -2172,9 +2166,8 @@ static int i915_huc_load_status_info(struct seq_file *m, void *data)
>  	p = drm_seq_file_printer(m);
>  	intel_uc_fw_dump(&dev_priv->huc.fw, &p);
>  
> -	wakeref = intel_runtime_pm_get(dev_priv);
> -	seq_printf(m, "\nHuC status 0x%08x:\n", I915_READ(HUC_STATUS2));
> -	intel_runtime_pm_put(dev_priv, wakeref);
> +	with_intel_runtime_pm(dev_priv, wakeref)
> +		seq_printf(m, "\nHuC status 0x%08x:\n", I915_READ(HUC_STATUS2));
>  
>  	return 0;
>  }
> @@ -2184,7 +2177,6 @@ static int i915_guc_load_status_info(struct seq_file *m, void *data)
>  	struct drm_i915_private *dev_priv = node_to_i915(m->private);
>  	intel_wakeref_t wakeref;
>  	struct drm_printer p;
> -	u32 tmp, i;
>  
>  	if (!HAS_GUC(dev_priv))
>  		return -ENODEV;
> @@ -2192,22 +2184,23 @@ static int i915_guc_load_status_info(struct seq_file *m, void *data)
>  	p = drm_seq_file_printer(m);
>  	intel_uc_fw_dump(&dev_priv->guc.fw, &p);
>  
> -	wakeref = intel_runtime_pm_get(dev_priv);
> -
> -	tmp = I915_READ(GUC_STATUS);
> -
> -	seq_printf(m, "\nGuC status 0x%08x:\n", tmp);
> -	seq_printf(m, "\tBootrom status = 0x%x\n",
> -		(tmp & GS_BOOTROM_MASK) >> GS_BOOTROM_SHIFT);
> -	seq_printf(m, "\tuKernel status = 0x%x\n",
> -		(tmp & GS_UKERNEL_MASK) >> GS_UKERNEL_SHIFT);
> -	seq_printf(m, "\tMIA Core status = 0x%x\n",
> -		(tmp & GS_MIA_MASK) >> GS_MIA_SHIFT);
> -	seq_puts(m, "\nScratch registers:\n");
> -	for (i = 0; i < 16; i++)
> -		seq_printf(m, "\t%2d: \t0x%x\n", i, I915_READ(SOFT_SCRATCH(i)));
> -
> -	intel_runtime_pm_put(dev_priv, wakeref);
> +	with_intel_runtime_pm(dev_priv, wakeref) {
> +		u32 tmp = I915_READ(GUC_STATUS);
> +		u32 i;
> +
> +		seq_printf(m, "\nGuC status 0x%08x:\n", tmp);
> +		seq_printf(m, "\tBootrom status = 0x%x\n",
> +			   (tmp & GS_BOOTROM_MASK) >> GS_BOOTROM_SHIFT);
> +		seq_printf(m, "\tuKernel status = 0x%x\n",
> +			   (tmp & GS_UKERNEL_MASK) >> GS_UKERNEL_SHIFT);
> +		seq_printf(m, "\tMIA Core status = 0x%x\n",
> +			   (tmp & GS_MIA_MASK) >> GS_MIA_SHIFT);
> +		seq_puts(m, "\nScratch registers:\n");
> +		for (i = 0; i < 16; i++) {
> +			seq_printf(m, "\t%2d: \t0x%x\n",
> +				   i, I915_READ(SOFT_SCRATCH(i)));
> +		}
> +	}
>  
>  	return 0;
>  }
> @@ -2680,19 +2673,14 @@ static int i915_energy_uJ(struct seq_file *m, void *data)
>  	if (INTEL_GEN(dev_priv) < 6)
>  		return -ENODEV;
>  
> -	wakeref = intel_runtime_pm_get(dev_priv);
> -
> -	if (rdmsrl_safe(MSR_RAPL_POWER_UNIT, &power)) {
> -		intel_runtime_pm_put(dev_priv, wakeref);
> +	if (rdmsrl_safe(MSR_RAPL_POWER_UNIT, &power))
>  		return -ENODEV;
> -	}
>  
>  	units = (power & 0x1f00) >> 8;
> -	power = I915_READ(MCH_SECP_NRG_STTS);
> -	power = (1000000 * power) >> units; /* convert to uJ */
> -
> -	intel_runtime_pm_put(dev_priv, wakeref);
> +	with_intel_runtime_pm(dev_priv, wakeref)
> +		power = I915_READ(MCH_SECP_NRG_STTS);
>  
> +	power = (1000000 * power) >> units; /* convert to uJ */
>  	seq_printf(m, "%llu", power);
>  
>  	return 0;
> @@ -3275,22 +3263,20 @@ static ssize_t i915_ipc_status_write(struct file *file, const char __user *ubuf,
>  	struct seq_file *m = file->private_data;
>  	struct drm_i915_private *dev_priv = m->private;
>  	intel_wakeref_t wakeref;
> -	int ret;
>  	bool enable;
> +	int ret;
>  
>  	ret = kstrtobool_from_user(ubuf, len, &enable);
>  	if (ret < 0)
>  		return ret;
>  
> -	wakeref = intel_runtime_pm_get(dev_priv);
> -
> -	if (!dev_priv->ipc_enabled && enable)
> -		DRM_INFO("Enabling IPC: WM will be proper only after next commit\n");
> -	dev_priv->wm.distrust_bios_wm = true;
> -	dev_priv->ipc_enabled = enable;
> -	intel_enable_ipc(dev_priv);
> -
> -	intel_runtime_pm_put(dev_priv, wakeref);
> +	with_intel_runtime_pm(dev_priv, wakeref) {
> +		if (!dev_priv->ipc_enabled && enable)
> +			DRM_INFO("Enabling IPC: WM will be proper only after next commit\n");
> +		dev_priv->wm.distrust_bios_wm = true;
> +		dev_priv->ipc_enabled = enable;
> +		intel_enable_ipc(dev_priv);
> +	}
>  
>  	return len;
>  }
> @@ -4130,16 +4116,13 @@ i915_cache_sharing_get(void *data, u64 *val)
>  {
>  	struct drm_i915_private *dev_priv = data;
>  	intel_wakeref_t wakeref;
> -	u32 snpcr;
> +	u32 snpcr = 0;
>  
>  	if (!(IS_GEN_RANGE(dev_priv, 6, 7)))
>  		return -ENODEV;
>  
> -	wakeref = intel_runtime_pm_get(dev_priv);
> -
> -	snpcr = I915_READ(GEN6_MBCUNIT_SNPCR);
> -
> -	intel_runtime_pm_put(dev_priv, wakeref);
> +	with_intel_runtime_pm(dev_priv, wakeref)
> +		snpcr = I915_READ(GEN6_MBCUNIT_SNPCR);
>  
>  	*val = (snpcr & GEN6_MBC_SNPCR_MASK) >> GEN6_MBC_SNPCR_SHIFT;
>  
> @@ -4151,7 +4134,6 @@ i915_cache_sharing_set(void *data, u64 val)
>  {
>  	struct drm_i915_private *dev_priv = data;
>  	intel_wakeref_t wakeref;
> -	u32 snpcr;
>  
>  	if (!(IS_GEN_RANGE(dev_priv, 6, 7)))
>  		return -ENODEV;
> @@ -4159,16 +4141,17 @@ i915_cache_sharing_set(void *data, u64 val)
>  	if (val > 3)
>  		return -EINVAL;
>  
> -	wakeref = intel_runtime_pm_get(dev_priv);
>  	DRM_DEBUG_DRIVER("Manually setting uncore sharing to %llu\n", val);
> +	with_intel_runtime_pm(dev_priv, wakeref) {
> +		u32 snpcr;
> +
> +		/* Update the cache sharing policy here as well */
> +		snpcr = I915_READ(GEN6_MBCUNIT_SNPCR);
> +		snpcr &= ~GEN6_MBC_SNPCR_MASK;
> +		snpcr |= val << GEN6_MBC_SNPCR_SHIFT;
> +		I915_WRITE(GEN6_MBCUNIT_SNPCR, snpcr);
> +	}
>  
> -	/* Update the cache sharing policy here as well */
> -	snpcr = I915_READ(GEN6_MBCUNIT_SNPCR);
> -	snpcr &= ~GEN6_MBC_SNPCR_MASK;
> -	snpcr |= (val << GEN6_MBC_SNPCR_SHIFT);
> -	I915_WRITE(GEN6_MBCUNIT_SNPCR, snpcr);
> -
> -	intel_runtime_pm_put(dev_priv, wakeref);
>  	return 0;
>  }
>  
> @@ -4405,20 +4388,17 @@ static int i915_sseu_status(struct seq_file *m, void *unused)
>  	sseu.max_eus_per_subslice =
>  		RUNTIME_INFO(dev_priv)->sseu.max_eus_per_subslice;
>  
> -	wakeref = intel_runtime_pm_get(dev_priv);
> -
> -	if (IS_CHERRYVIEW(dev_priv)) {
> -		cherryview_sseu_device_status(dev_priv, &sseu);
> -	} else if (IS_BROADWELL(dev_priv)) {
> -		broadwell_sseu_device_status(dev_priv, &sseu);
> -	} else if (IS_GEN(dev_priv, 9)) {
> -		gen9_sseu_device_status(dev_priv, &sseu);
> -	} else if (INTEL_GEN(dev_priv) >= 10) {
> -		gen10_sseu_device_status(dev_priv, &sseu);
> +	with_intel_runtime_pm(dev_priv, wakeref) {
> +		if (IS_CHERRYVIEW(dev_priv))
> +			cherryview_sseu_device_status(dev_priv, &sseu);
> +		else if (IS_BROADWELL(dev_priv))
> +			broadwell_sseu_device_status(dev_priv, &sseu);
> +		else if (IS_GEN(dev_priv, 9))
> +			gen9_sseu_device_status(dev_priv, &sseu);
> +		else if (INTEL_GEN(dev_priv) >= 10)
> +			gen10_sseu_device_status(dev_priv, &sseu);
>  	}
>  
> -	intel_runtime_pm_put(dev_priv, wakeref);
> -
>  	i915_print_sseu_info(m, false, &sseu);
>  
>  	return 0;
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index e04dadeca879..9dd31c3236fb 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -814,13 +814,13 @@ void i915_gem_flush_ggtt_writes(struct drm_i915_private *dev_priv)
>  
>  	i915_gem_chipset_flush(dev_priv);
>  
> -	wakeref = intel_runtime_pm_get(dev_priv);
> -	spin_lock_irq(&dev_priv->uncore.lock);
> +	with_intel_runtime_pm(dev_priv, wakeref) {
> +		spin_lock_irq(&dev_priv->uncore.lock);
>  
> -	POSTING_READ_FW(RING_HEAD(RENDER_RING_BASE));
> +		POSTING_READ_FW(RING_HEAD(RENDER_RING_BASE));
>  
> -	spin_unlock_irq(&dev_priv->uncore.lock);
> -	intel_runtime_pm_put(dev_priv, wakeref);
> +		spin_unlock_irq(&dev_priv->uncore.lock);
> +	}
>  }
>  
>  static void
> diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
> index 4bec10286487..9e9ce31142b1 100644
> --- a/drivers/gpu/drm/i915/i915_gem_gtt.c
> +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
> @@ -2536,9 +2536,8 @@ static int ggtt_bind_vma(struct i915_vma *vma,
>  	if (i915_gem_object_is_readonly(obj))
>  		pte_flags |= PTE_READ_ONLY;
>  
> -	wakeref = intel_runtime_pm_get(i915);
> -	vma->vm->insert_entries(vma->vm, vma, cache_level, pte_flags);
> -	intel_runtime_pm_put(i915, wakeref);
> +	with_intel_runtime_pm(i915, wakeref)
> +		vma->vm->insert_entries(vma->vm, vma, cache_level, pte_flags);
>  
>  	vma->page_sizes.gtt = I915_GTT_PAGE_SIZE;
>  
> @@ -2557,9 +2556,8 @@ static void ggtt_unbind_vma(struct i915_vma *vma)
>  	struct drm_i915_private *i915 = vma->vm->i915;
>  	intel_wakeref_t wakeref;
>  
> -	wakeref = intel_runtime_pm_get(i915);
> -	vma->vm->clear_range(vma->vm, vma->node.start, vma->size);
> -	intel_runtime_pm_put(i915, wakeref);
> +	with_intel_runtime_pm(i915, wakeref)
> +		vma->vm->clear_range(vma->vm, vma->node.start, vma->size);
>  }
>  
>  static int aliasing_gtt_bind_vma(struct i915_vma *vma,
> @@ -2593,9 +2591,10 @@ static int aliasing_gtt_bind_vma(struct i915_vma *vma,
>  	if (flags & I915_VMA_GLOBAL_BIND) {
>  		intel_wakeref_t wakeref;
>  
> -		wakeref = intel_runtime_pm_get(i915);
> -		vma->vm->insert_entries(vma->vm, vma, cache_level, pte_flags);
> -		intel_runtime_pm_put(i915, wakeref);
> +		with_intel_runtime_pm(i915, wakeref) {
> +			vma->vm->insert_entries(vma->vm, vma,
> +						cache_level, pte_flags);
> +		}
>  	}
>  
>  	return 0;
> @@ -2606,11 +2605,11 @@ static void aliasing_gtt_unbind_vma(struct i915_vma *vma)
>  	struct drm_i915_private *i915 = vma->vm->i915;
>  
>  	if (vma->flags & I915_VMA_GLOBAL_BIND) {
> +		struct i915_address_space *vm = vma->vm;
>  		intel_wakeref_t wakeref;
>  
> -		wakeref = intel_runtime_pm_get(i915);
> -		vma->vm->clear_range(vma->vm, vma->node.start, vma->size);
> -		intel_runtime_pm_put(i915, wakeref);
> +		with_intel_runtime_pm(i915, wakeref)
> +			vm->clear_range(vm, vma->node.start, vma->size);
>  	}
>  
>  	if (vma->flags & I915_VMA_LOCAL_BIND) {
> diff --git a/drivers/gpu/drm/i915/i915_gem_shrinker.c b/drivers/gpu/drm/i915/i915_gem_shrinker.c
> index bc230e43b98f..e9a79059bc43 100644
> --- a/drivers/gpu/drm/i915/i915_gem_shrinker.c
> +++ b/drivers/gpu/drm/i915/i915_gem_shrinker.c
> @@ -297,14 +297,14 @@ i915_gem_shrink(struct drm_i915_private *i915,
>  unsigned long i915_gem_shrink_all(struct drm_i915_private *i915)
>  {
>  	intel_wakeref_t wakeref;
> -	unsigned long freed;
> +	unsigned long freed = 0;
>  
> -	wakeref = intel_runtime_pm_get(i915);
> -	freed = i915_gem_shrink(i915, -1UL, NULL,
> -				I915_SHRINK_BOUND |
> -				I915_SHRINK_UNBOUND |
> -				I915_SHRINK_ACTIVE);
> -	intel_runtime_pm_put(i915, wakeref);
> +	with_intel_runtime_pm(i915, wakeref) {
> +		freed = i915_gem_shrink(i915, -1UL, NULL,
> +					I915_SHRINK_BOUND |
> +					I915_SHRINK_UNBOUND |
> +					I915_SHRINK_ACTIVE);
> +	}
>  
>  	return freed;
>  }
> @@ -377,14 +377,14 @@ i915_gem_shrinker_scan(struct shrinker *shrinker, struct shrink_control *sc)
>  	if (sc->nr_scanned < sc->nr_to_scan && current_is_kswapd()) {
>  		intel_wakeref_t wakeref;
>  
> -		wakeref = intel_runtime_pm_get(i915);
> -		freed += i915_gem_shrink(i915,
> -					 sc->nr_to_scan - sc->nr_scanned,
> -					 &sc->nr_scanned,
> -					 I915_SHRINK_ACTIVE |
> -					 I915_SHRINK_BOUND |
> -					 I915_SHRINK_UNBOUND);
> -		intel_runtime_pm_put(i915, wakeref);
> +		with_intel_runtime_pm(i915, wakeref) {
> +			freed += i915_gem_shrink(i915,
> +						 sc->nr_to_scan - sc->nr_scanned,
> +						 &sc->nr_scanned,
> +						 I915_SHRINK_ACTIVE |
> +						 I915_SHRINK_BOUND |
> +						 I915_SHRINK_UNBOUND);
> +		}
>  	}
>  
>  	shrinker_unlock(i915, unlock);
> @@ -426,11 +426,11 @@ i915_gem_shrinker_oom(struct notifier_block *nb, unsigned long event, void *ptr)
>  	unsigned long unevictable, bound, unbound, freed_pages;
>  	intel_wakeref_t wakeref;
>  
> -	wakeref = intel_runtime_pm_get(i915);
> -	freed_pages = i915_gem_shrink(i915, -1UL, NULL,
> -				      I915_SHRINK_BOUND |
> -				      I915_SHRINK_UNBOUND);
> -	intel_runtime_pm_put(i915, wakeref);
> +	freed_pages = 0;
> +	with_intel_runtime_pm(i915, wakeref)
> +		freed_pages += i915_gem_shrink(i915, -1UL, NULL,
> +					       I915_SHRINK_BOUND |
> +					       I915_SHRINK_UNBOUND);
>  
>  	/* Because we may be allocating inside our own driver, we cannot
>  	 * assert that there are no objects with pinned pages that are not
> @@ -482,12 +482,11 @@ i915_gem_shrinker_vmap(struct notifier_block *nb, unsigned long event, void *ptr
>  	if (ret)
>  		goto out;
>  
> -	wakeref = intel_runtime_pm_get(i915);
> -	freed_pages += i915_gem_shrink(i915, -1UL, NULL,
> -				       I915_SHRINK_BOUND |
> -				       I915_SHRINK_UNBOUND |
> -				       I915_SHRINK_VMAPS);
> -	intel_runtime_pm_put(i915, wakeref);
> +	with_intel_runtime_pm(i915, wakeref)
> +		freed_pages += i915_gem_shrink(i915, -1UL, NULL,
> +					       I915_SHRINK_BOUND |
> +					       I915_SHRINK_UNBOUND |
> +					       I915_SHRINK_VMAPS);
>  
>  	/* We also want to clear any cached iomaps as they wrap vmap */
>  	list_for_each_entry_safe(vma, next,
> diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c
> index 3d43fc9dd25d..b1cb2d3cae16 100644
> --- a/drivers/gpu/drm/i915/i915_pmu.c
> +++ b/drivers/gpu/drm/i915/i915_pmu.c
> @@ -230,14 +230,11 @@ frequency_sample(struct drm_i915_private *dev_priv, unsigned int period_ns)
>  
>  		val = dev_priv->gt_pm.rps.cur_freq;
>  		if (dev_priv->gt.awake) {
> -			intel_wakeref_t wakeref =
> -				intel_runtime_pm_get_if_in_use(dev_priv);
> +			intel_wakeref_t wakeref;
>  
> -			if (wakeref) {
> +			with_intel_runtime_pm_if_in_use(dev_priv, wakeref)
>  				val = intel_get_cagf(dev_priv,
>  						     I915_READ_NOTRACE(GEN6_RPSTAT1));
> -				intel_runtime_pm_put(dev_priv, wakeref);
> -			}
>  		}
>  
>  		add_sample_mult(&dev_priv->pmu.sample[__I915_SAMPLE_FREQ_ACT],
> diff --git a/drivers/gpu/drm/i915/i915_sysfs.c b/drivers/gpu/drm/i915/i915_sysfs.c
> index 2cbbf165d179..41313005af42 100644
> --- a/drivers/gpu/drm/i915/i915_sysfs.c
> +++ b/drivers/gpu/drm/i915/i915_sysfs.c
> @@ -43,11 +43,10 @@ static u32 calc_residency(struct drm_i915_private *dev_priv,
>  			  i915_reg_t reg)
>  {
>  	intel_wakeref_t wakeref;
> -	u64 res;
> +	u64 res = 0;
>  
> -	wakeref = intel_runtime_pm_get(dev_priv);
> -	res = intel_rc6_residency_us(dev_priv, reg);
> -	intel_runtime_pm_put(dev_priv, wakeref);
> +	with_intel_runtime_pm(dev_priv, wakeref)
> +		res = intel_rc6_residency_us(dev_priv, reg);
>  
>  	return DIV_ROUND_CLOSEST_ULL(res, 1000);
>  }
> diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h
> index a1e4e1033289..4272c260b6e1 100644
> --- a/drivers/gpu/drm/i915/intel_drv.h
> +++ b/drivers/gpu/drm/i915/intel_drv.h
> @@ -2187,6 +2187,14 @@ intel_wakeref_t intel_runtime_pm_get(struct drm_i915_private *i915);
>  intel_wakeref_t intel_runtime_pm_get_if_in_use(struct drm_i915_private *i915);
>  intel_wakeref_t intel_runtime_pm_get_noresume(struct drm_i915_private *i915);
>  
> +#define with_intel_runtime_pm(i915, wf) \
> +	for (wf = intel_runtime_pm_get(i915); wf; \
> +	     intel_runtime_pm_put(i915, wf), wf = 0)
> +
> +#define with_intel_runtime_pm_if_in_use(i915, wf) \
> +	for (wf = intel_runtime_pm_get_if_in_use(i915); wf; \
> +	     intel_runtime_pm_put(i915, wf), wf = 0)
> +
Macro arguments and parenthesis.

It shifts code right but it clearly binds the reffed regions
adding to readability and reducing boilerplate. On my
weighted scale it compensates the added learning burden
Jani mentioned. So I am in favour.

Didn't notice anything missing except the parenthesis so,
Reviewed-by: Mika Kuoppala <mika.kuoppala@linux.intel.com>

Please gather acks for this from display people,
-Mika

>  void intel_runtime_pm_put_unchecked(struct drm_i915_private *i915);
>  #if IS_ENABLED(CONFIG_DRM_I915_DEBUG_RUNTIME_PM)
>  void intel_runtime_pm_put(struct drm_i915_private *i915, intel_wakeref_t wref);
> diff --git a/drivers/gpu/drm/i915/intel_guc_log.c b/drivers/gpu/drm/i915/intel_guc_log.c
> index 20c0b36d748e..b53582c0c6c1 100644
> --- a/drivers/gpu/drm/i915/intel_guc_log.c
> +++ b/drivers/gpu/drm/i915/intel_guc_log.c
> @@ -444,9 +444,8 @@ static void guc_log_capture_logs(struct intel_guc_log *log)
>  	 * Generally device is expected to be active only at this
>  	 * time, so get/put should be really quick.
>  	 */
> -	wakeref = intel_runtime_pm_get(dev_priv);
> -	guc_action_flush_log_complete(guc);
> -	intel_runtime_pm_put(dev_priv, wakeref);
> +	with_intel_runtime_pm(dev_priv, wakeref)
> +		guc_action_flush_log_complete(guc);
>  }
>  
>  int intel_guc_log_create(struct intel_guc_log *log)
> @@ -507,7 +506,7 @@ int intel_guc_log_set_level(struct intel_guc_log *log, u32 level)
>  	struct intel_guc *guc = log_to_guc(log);
>  	struct drm_i915_private *dev_priv = guc_to_i915(guc);
>  	intel_wakeref_t wakeref;
> -	int ret;
> +	int ret = 0;
>  
>  	BUILD_BUG_ON(GUC_LOG_VERBOSITY_MIN != 0);
>  	GEM_BUG_ON(!log->vma);
> @@ -521,16 +520,14 @@ int intel_guc_log_set_level(struct intel_guc_log *log, u32 level)
>  
>  	mutex_lock(&dev_priv->drm.struct_mutex);
>  
> -	if (log->level == level) {
> -		ret = 0;
> +	if (log->level == level)
>  		goto out_unlock;
> -	}
>  
> -	wakeref = intel_runtime_pm_get(dev_priv);
> -	ret = guc_action_control_log(guc, GUC_LOG_LEVEL_IS_VERBOSE(level),
> -				     GUC_LOG_LEVEL_IS_ENABLED(level),
> -				     GUC_LOG_LEVEL_TO_VERBOSITY(level));
> -	intel_runtime_pm_put(dev_priv, wakeref);
> +	with_intel_runtime_pm(dev_priv, wakeref)
> +		ret = guc_action_control_log(guc,
> +					     GUC_LOG_LEVEL_IS_VERBOSE(level),
> +					     GUC_LOG_LEVEL_IS_ENABLED(level),
> +					     GUC_LOG_LEVEL_TO_VERBOSITY(level));
>  	if (ret) {
>  		DRM_DEBUG_DRIVER("guc_log_control action failed %d\n", ret);
>  		goto out_unlock;
> @@ -611,9 +608,8 @@ void intel_guc_log_relay_flush(struct intel_guc_log *log)
>  	 */
>  	flush_work(&log->relay.flush_work);
>  
> -	wakeref = intel_runtime_pm_get(i915);
> -	guc_action_flush_log(guc);
> -	intel_runtime_pm_put(i915, wakeref);
> +	with_intel_runtime_pm(i915, wakeref)
> +		guc_action_flush_log(guc);
>  
>  	/* GuC would have updated log buffer by now, so capture it */
>  	guc_log_capture_logs(log);
> diff --git a/drivers/gpu/drm/i915/intel_huc.c b/drivers/gpu/drm/i915/intel_huc.c
> index 3e8c18b6a42d..9bd1c9002c2a 100644
> --- a/drivers/gpu/drm/i915/intel_huc.c
> +++ b/drivers/gpu/drm/i915/intel_huc.c
> @@ -116,14 +116,13 @@ int intel_huc_check_status(struct intel_huc *huc)
>  {
>  	struct drm_i915_private *dev_priv = huc_to_i915(huc);
>  	intel_wakeref_t wakeref;
> -	bool status;
> +	bool status = false;
>  
>  	if (!HAS_HUC(dev_priv))
>  		return -ENODEV;
>  
> -	wakeref = intel_runtime_pm_get(dev_priv);
> -	status = I915_READ(HUC_STATUS2) & HUC_FW_VERIFIED;
> -	intel_runtime_pm_put(dev_priv, wakeref);
> +	with_intel_runtime_pm(dev_priv, wakeref)
> +		status = I915_READ(HUC_STATUS2) & HUC_FW_VERIFIED;
>  
>  	return status;
>  }
> diff --git a/drivers/gpu/drm/i915/intel_panel.c b/drivers/gpu/drm/i915/intel_panel.c
> index 93a2e4b5c54c..5a39a6347a7a 100644
> --- a/drivers/gpu/drm/i915/intel_panel.c
> +++ b/drivers/gpu/drm/i915/intel_panel.c
> @@ -1204,17 +1204,19 @@ static int intel_backlight_device_get_brightness(struct backlight_device *bd)
>  	struct drm_device *dev = connector->base.dev;
>  	struct drm_i915_private *dev_priv = to_i915(dev);
>  	intel_wakeref_t wakeref;
> -	u32 hw_level;
> -	int ret;
> +	int ret = 0;
>  
> -	wakeref = intel_runtime_pm_get(dev_priv);
> -	drm_modeset_lock(&dev->mode_config.connection_mutex, NULL);
> +	with_intel_runtime_pm(dev_priv, wakeref) {
> +		u32 hw_level;
>  
> -	hw_level = intel_panel_get_backlight(connector);
> -	ret = scale_hw_to_user(connector, hw_level, bd->props.max_brightness);
> +		drm_modeset_lock(&dev->mode_config.connection_mutex, NULL);
>  
> -	drm_modeset_unlock(&dev->mode_config.connection_mutex);
> -	intel_runtime_pm_put(dev_priv, wakeref);
> +		hw_level = intel_panel_get_backlight(connector);
> +		ret = scale_hw_to_user(connector,
> +				       hw_level, bd->props.max_brightness);
> +
> +		drm_modeset_unlock(&dev->mode_config.connection_mutex);
> +	}
>  
>  	return ret;
>  }
> diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c
> index d494d92da02c..681ea532585e 100644
> --- a/drivers/gpu/drm/i915/intel_uncore.c
> +++ b/drivers/gpu/drm/i915/intel_uncore.c
> @@ -1696,21 +1696,21 @@ int i915_reg_read_ioctl(struct drm_device *dev,
>  
>  	flags = reg->offset & (entry->size - 1);
>  
> -	wakeref = intel_runtime_pm_get(dev_priv);
> -	if (entry->size == 8 && flags == I915_REG_READ_8B_WA)
> -		reg->val = I915_READ64_2x32(entry->offset_ldw,
> -					    entry->offset_udw);
> -	else if (entry->size == 8 && flags == 0)
> -		reg->val = I915_READ64(entry->offset_ldw);
> -	else if (entry->size == 4 && flags == 0)
> -		reg->val = I915_READ(entry->offset_ldw);
> -	else if (entry->size == 2 && flags == 0)
> -		reg->val = I915_READ16(entry->offset_ldw);
> -	else if (entry->size == 1 && flags == 0)
> -		reg->val = I915_READ8(entry->offset_ldw);
> -	else
> -		ret = -EINVAL;
> -	intel_runtime_pm_put(dev_priv, wakeref);
> +	with_intel_runtime_pm(dev_priv, wakeref) {
> +		if (entry->size == 8 && flags == I915_REG_READ_8B_WA)
> +			reg->val = I915_READ64_2x32(entry->offset_ldw,
> +						    entry->offset_udw);
> +		else if (entry->size == 8 && flags == 0)
> +			reg->val = I915_READ64(entry->offset_ldw);
> +		else if (entry->size == 4 && flags == 0)
> +			reg->val = I915_READ(entry->offset_ldw);
> +		else if (entry->size == 2 && flags == 0)
> +			reg->val = I915_READ16(entry->offset_ldw);
> +		else if (entry->size == 1 && flags == 0)
> +			reg->val = I915_READ8(entry->offset_ldw);
> +		else
> +			ret = -EINVAL;
> +	}
>  
>  	return ret;
>  }
> diff --git a/drivers/gpu/drm/i915/selftests/i915_gem.c b/drivers/gpu/drm/i915/selftests/i915_gem.c
> index 01a46c46fe25..e77b7ed449ae 100644
> --- a/drivers/gpu/drm/i915/selftests/i915_gem.c
> +++ b/drivers/gpu/drm/i915/selftests/i915_gem.c
> @@ -98,26 +98,22 @@ static void pm_suspend(struct drm_i915_private *i915)
>  {
>  	intel_wakeref_t wakeref;
>  
> -	wakeref = intel_runtime_pm_get(i915);
> -
> -	i915_gem_suspend_gtt_mappings(i915);
> -	i915_gem_suspend_late(i915);
> -
> -	intel_runtime_pm_put(i915, wakeref);
> +	with_intel_runtime_pm(i915, wakeref) {
> +		i915_gem_suspend_gtt_mappings(i915);
> +		i915_gem_suspend_late(i915);
> +	}
>  }
>  
>  static void pm_hibernate(struct drm_i915_private *i915)
>  {
>  	intel_wakeref_t wakeref;
>  
> -	wakeref = intel_runtime_pm_get(i915);
> -
> -	i915_gem_suspend_gtt_mappings(i915);
> -
> -	i915_gem_freeze(i915);
> -	i915_gem_freeze_late(i915);
> +	with_intel_runtime_pm(i915, wakeref) {
> +		i915_gem_suspend_gtt_mappings(i915);
>  
> -	intel_runtime_pm_put(i915, wakeref);
> +		i915_gem_freeze(i915);
> +		i915_gem_freeze_late(i915);
> +	}
>  }
>  
>  static void pm_resume(struct drm_i915_private *i915)
> @@ -128,13 +124,11 @@ static void pm_resume(struct drm_i915_private *i915)
>  	 * Both suspend and hibernate follow the same wakeup path and assume
>  	 * that runtime-pm just works.
>  	 */
> -	wakeref = intel_runtime_pm_get(i915);
> -
> -	intel_engines_sanitize(i915, false);
> -	i915_gem_sanitize(i915);
> -	i915_gem_resume(i915);
> -
> -	intel_runtime_pm_put(i915, wakeref);
> +	with_intel_runtime_pm(i915, wakeref) {
> +		intel_engines_sanitize(i915, false);
> +		i915_gem_sanitize(i915);
> +		i915_gem_resume(i915);
> +	}
>  }
>  
>  static int igt_gem_suspend(void *arg)
> diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/selftests/i915_gem_context.c
> index 7a9b1f20b019..4cba50679607 100644
> --- a/drivers/gpu/drm/i915/selftests/i915_gem_context.c
> +++ b/drivers/gpu/drm/i915/selftests/i915_gem_context.c
> @@ -610,9 +610,9 @@ static int igt_ctx_exec(void *arg)
>  				}
>  			}
>  
> -			wakeref = intel_runtime_pm_get(i915);
> -			err = gpu_fill(obj, ctx, engine, dw);
> -			intel_runtime_pm_put(i915, wakeref);
> +			err = 0;
> +			with_intel_runtime_pm(i915, wakeref)
> +				err = gpu_fill(obj, ctx, engine, dw);
>  			if (err) {
>  				pr_err("Failed to fill dword %lu [%lu/%lu] with gpu (%s) in ctx %u [full-ppgtt? %s], err=%d\n",
>  				       ndwords, dw, max_dwords(obj),
> @@ -718,9 +718,9 @@ static int igt_ctx_readonly(void *arg)
>  					i915_gem_object_set_readonly(obj);
>  			}
>  
> -			wakeref = intel_runtime_pm_get(i915);
> -			err = gpu_fill(obj, ctx, engine, dw);
> -			intel_runtime_pm_put(i915, wakeref);
> +			err = 0;
> +			with_intel_runtime_pm(i915, wakeref)
> +				err = gpu_fill(obj, ctx, engine, dw);
>  			if (err) {
>  				pr_err("Failed to fill dword %lu [%lu/%lu] with gpu (%s) in ctx %u [full-ppgtt? %s], err=%d\n",
>  				       ndwords, dw, max_dwords(obj),
> diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_object.c b/drivers/gpu/drm/i915/selftests/i915_gem_object.c
> index 3575e1387c3f..395ae878e0f7 100644
> --- a/drivers/gpu/drm/i915/selftests/i915_gem_object.c
> +++ b/drivers/gpu/drm/i915/selftests/i915_gem_object.c
> @@ -509,9 +509,8 @@ static void disable_retire_worker(struct drm_i915_private *i915)
>  	if (!i915->gt.active_requests++) {
>  		intel_wakeref_t wakeref;
>  
> -		wakeref = intel_runtime_pm_get(i915);
> -		i915_gem_unpark(i915);
> -		intel_runtime_pm_put(i915, wakeref);
> +		with_intel_runtime_pm(i915, wakeref)
> +			i915_gem_unpark(i915);
>  	}
>  	mutex_unlock(&i915->drm.struct_mutex);
>  
> @@ -593,10 +592,10 @@ static int igt_mmap_offset_exhaustion(void *arg)
>  			goto out;
>  		}
>  
> +		err = 0;
>  		mutex_lock(&i915->drm.struct_mutex);
> -		wakeref = intel_runtime_pm_get(i915);
> -		err = make_obj_busy(obj);
> -		intel_runtime_pm_put(i915, wakeref);
> +		with_intel_runtime_pm(i915, wakeref)
> +			err = make_obj_busy(obj);
>  		mutex_unlock(&i915->drm.struct_mutex);
>  		if (err) {
>  			pr_err("[loop %d] Failed to busy the object\n", loop);
> diff --git a/drivers/gpu/drm/i915/selftests/intel_workarounds.c b/drivers/gpu/drm/i915/selftests/intel_workarounds.c
> index 47e62e1999a9..be2ffc9cd38d 100644
> --- a/drivers/gpu/drm/i915/selftests/intel_workarounds.c
> +++ b/drivers/gpu/drm/i915/selftests/intel_workarounds.c
> @@ -48,9 +48,9 @@ read_nonprivs(struct i915_gem_context *ctx, struct intel_engine_cs *engine)
>  	if (err)
>  		goto err_obj;
>  
> -	wakeref = intel_runtime_pm_get(engine->i915);
> -	rq = i915_request_alloc(engine, ctx);
> -	intel_runtime_pm_put(engine->i915, wakeref);
> +	rq = ERR_PTR(-ENODEV);
> +	with_intel_runtime_pm(engine->i915, wakeref)
> +		rq = i915_request_alloc(engine, ctx);
>  	if (IS_ERR(rq)) {
>  		err = PTR_ERR(rq);
>  		goto err_pin;
> @@ -191,14 +191,15 @@ switch_to_scratch_context(struct intel_engine_cs *engine,
>  	if (IS_ERR(ctx))
>  		return PTR_ERR(ctx);
>  
> -	wakeref = intel_runtime_pm_get(engine->i915);
> -
> -	if (spin)
> -		rq = igt_spinner_create_request(spin, ctx, engine, MI_NOOP);
> -	else
> -		rq = i915_request_alloc(engine, ctx);
> -
> -	intel_runtime_pm_put(engine->i915, wakeref);
> +	rq = ERR_PTR(-ENODEV);
> +	with_intel_runtime_pm(engine->i915, wakeref) {
> +		if (spin)
> +			rq = igt_spinner_create_request(spin,
> +							ctx, engine,
> +							MI_NOOP);
> +		else
> +			rq = i915_request_alloc(engine, ctx);
> +	}
>  
>  	kernel_context_close(ctx);
>  
> @@ -256,9 +257,8 @@ static int check_whitelist_across_reset(struct intel_engine_cs *engine,
>  	if (err)
>  		goto out;
>  
> -	wakeref = intel_runtime_pm_get(i915);
> -	err = reset(engine);
> -	intel_runtime_pm_put(i915, wakeref);
> +	with_intel_runtime_pm(i915, wakeref)
> +		err = reset(engine);
>  
>  	if (want_spin) {
>  		igt_spinner_end(&spin);
> -- 
> 2.20.1
>
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/intel-gfx
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 111+ messages in thread

* Re: [PATCH 04/46] drm/i915: Markup paired operations on wakerefs
  2019-01-09 11:51         ` Chris Wilson
@ 2019-01-09 23:33           ` John Harrison
  0 siblings, 0 replies; 111+ messages in thread
From: John Harrison @ 2019-01-09 23:33 UTC (permalink / raw)
  To: Chris Wilson, Mika Kuoppala, intel-gfx; +Cc: Jani Nikula

On 1/9/2019 03:51, Chris Wilson wrote:
> Quoting Mika Kuoppala (2019-01-09 09:23:53)
>> I should have been more specific. My concern was on documenting
>> the changing return values.
> The interface isn't documented, there's nothing in the header about the
> functions? Where else would it be?

I think Mika's point is that you now have inaccurate comments at the 
start of the _get functions:

@@ -4207,7 +4256,7 @@ void intel_runtime_pm_get(struct drm_i915_private *i915)
   *
   * Returns: True if the wakeref was acquired, or False otherwise.
     ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
   */
-bool intel_runtime_pm_get_if_in_use(struct drm_i915_private *i915)
+intel_wakeref_t intel_runtime_pm_get_if_in_use(struct drm_i915_private *i915)
  {

The comment says 'returns: true ... or false' but should actually say 'returns: an intel_wakeref_t if the wakeref was acquired or zero otherwise'. With the assumption that zero is guaranteed to be an invalid value for an intel_wakeref_t.

The other _get functions were previously void but also now return a wakeref_t. Hence, they should have their comments updated too.


John.


> -Chris
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/intel-gfx

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 111+ messages in thread

* Re: [PATCH 12/46] drm/i915/gem: Track the rpm wakerefs
  2019-01-09 11:16   ` Mika Kuoppala
@ 2019-01-09 23:45     ` John Harrison
  0 siblings, 0 replies; 111+ messages in thread
From: John Harrison @ 2019-01-09 23:45 UTC (permalink / raw)
  To: Mika Kuoppala, Chris Wilson, intel-gfx; +Cc: Jani Nikula

On 1/9/2019 03:16, Mika Kuoppala wrote:
> Chris Wilson <chris@chris-wilson.co.uk> writes:
>
>> diff --git a/drivers/gpu/drm/i915/i915_gem_shrinker.c b/drivers/gpu/drm/i915/i915_gem_shrinker.c
>> index 16693dd4d019..bc230e43b98f 100644
>> --- a/drivers/gpu/drm/i915/i915_gem_shrinker.c
>> +++ b/drivers/gpu/drm/i915/i915_gem_shrinker.c
>> @@ -154,6 +154,7 @@ i915_gem_shrink(struct drm_i915_private *i915,
>>   		{ &i915->mm.bound_list, I915_SHRINK_BOUND },
>>   		{ NULL, 0 },
>>   	}, *phase;
>> +	intel_wakeref_t wakeref = 0;
>>   	unsigned long count = 0;
>>   	unsigned long scanned = 0;
>>   	bool unlock;
>> @@ -183,9 +184,11 @@ i915_gem_shrink(struct drm_i915_private *i915,
>>   	 * device just to recover a little memory. If absolutely necessary,
>>   	 * we will force the wake during oom-notifier.
>>   	 */
>> -	if ((flags & I915_SHRINK_BOUND) &&
>> -	    !intel_runtime_pm_get_if_in_use(i915))
>> -		flags &= ~I915_SHRINK_BOUND;
>> +	if (flags & I915_SHRINK_BOUND) {
>> +		wakeref = intel_runtime_pm_get_if_in_use(i915);
>> +		if (!wakeref)
>> +			flags &= ~I915_SHRINK_BOUND;
>> +	}
>>   
>>   	/*
>>   	 * As we may completely rewrite the (un)bound list whilst unbinding
>> @@ -266,7 +269,7 @@ i915_gem_shrink(struct drm_i915_private *i915,
>>   	}
>>   
>>   	if (flags & I915_SHRINK_BOUND)
>> -		intel_runtime_pm_put_unchecked(i915);
>> +		intel_runtime_pm_put(i915, wakeref);
> This is ok but raises a question that did we have
> GEM_BUG_ON(wakeref == 0) on pm_put? Perhaps not needed
> per se as we do find that we don't have ref for 0.
>
> Reviewed-by: Mika Kuoppala <mika.kuoppala@linux.intel.com>
>

There is a WARN not a BUG if pm_put() is called with a zero wakeref (in 
the cancel_ function after the search fails to find a match with zero). 
However, the flag checks mean that it can't happen from here.

John.

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 111+ messages in thread

* Re: [PATCH 17/46] drm/i915: Syntatic sugar for using intel_runtime_pm
  2019-01-07 11:54 ` [PATCH 17/46] drm/i915: Syntatic sugar for using intel_runtime_pm Chris Wilson
  2019-01-09 14:30   ` Mika Kuoppala
@ 2019-01-10  0:24   ` John Harrison
  2019-01-10  1:10     ` John Harrison
  1 sibling, 1 reply; 111+ messages in thread
From: John Harrison @ 2019-01-10  0:24 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx; +Cc: Jani Nikula

On 1/7/2019 03:54, Chris Wilson wrote:
> Frequently, we use intel_runtime_pm_get/_put around a small block.
> Formalise that usage by providing a macro to define such a block with an
> automatic closure to scope the intel_runtime_pm wakeref to that block,
> i.e. macro abuse smelling of python.
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Jani Nikula <jani.nikula@intel.com>
> ---
>   
> +#define with_intel_runtime_pm(i915, wf) \
> +	for (wf = intel_runtime_pm_get(i915); wf; \
> +	     intel_runtime_pm_put(i915, wf), wf = 0)
> +
> +#define with_intel_runtime_pm_if_in_use(i915, wf) \
> +	for (wf = intel_runtime_pm_get_if_in_use(i915); wf; \
> +	     intel_runtime_pm_put(i915, wf), wf = 0)
> +
This is a potential change in behaviour. Previously the simple 'get' 
version would unconditionally execute the wrapped code. Whereas now, if 
the get function fails for some reason and returns zero, the wrapped 
code will be skipped. Currently, the get() function can't return zero - 
it returns -1 in the case of the tracking code failing to allocate or 
similar. But is that guaranteed to be the case forevermore? It would be 
a better match for the original behaviour if the 'for' loop of the 'get' 
version was unconditional and only the 'get_if_in_use' version could 
skip. E.g. something like:
    for (intel_wakeref_t loop = -1, wf = intel_runtime_pm_get(i915) ; 
loop; intel_runtime_pm_put(i915, wf), wf = loop = 0)

Although that does mean the wf becomes local to the loop. On the other 
hand, I'm also not sure why it needs to be external anyway? If it is 
guaranteed to be zero on exit and any value on entry is overwritten, 
then why have it external at all? Would it not be neater/smaller source 
to get rid of all the local instantiations?

John.

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 111+ messages in thread

* Re: [PATCH 18/46] drm/i915: Markup paired operations on display power domains
  2019-01-07 11:54 ` [PATCH 18/46] drm/i915: Markup paired operations on display power domains Chris Wilson
@ 2019-01-10  0:55   ` John Harrison
  2019-01-10 10:00     ` Chris Wilson
  0 siblings, 1 reply; 111+ messages in thread
From: John Harrison @ 2019-01-10  0:55 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx; +Cc: Jani Nikula

On 1/7/2019 03:54, Chris Wilson wrote:
> The majority of runtime-pm operations are bounded and scoped within a
> function; these are easy to verify that the wakeref are handled
> correctly. We can employ the compiler to help us, and reduce the number
> of wakerefs tracked when debugging, by passing around cookies provided
> by the various rpm_get functions to their rpm_put counterpart. This
> makes the pairing explicit, and given the required wakeref cookie the
> compiler can verify that we pass an initialised value to the rpm_put
> (quite handy for double checking error paths).
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Jani Nikula <jani.nikula@intel.com>
> ---
> diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
> index b0cbad2e83c5..faff6cf1aaa1 100644
> --- a/drivers/gpu/drm/i915/i915_perf.c
> +++ b/drivers/gpu/drm/i915/i915_perf.c
> @@ -1364,14 +1364,14 @@ static void i915_oa_stream_destroy(struct i915_perf_stream *stream)
>   
>   	free_oa_buffer(dev_priv);
>   
> -	put_oa_config(dev_priv, stream->oa_config);
> -
>   	intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
>   	intel_runtime_pm_put(dev_priv, stream->wakeref);
>   
>   	if (stream->ctx)
>   		oa_put_render_ctx_id(stream);
>   
> +	put_oa_config(dev_priv, stream->oa_config);
> +
>   	if (dev_priv->perf.oa.spurious_report_rs.missed) {
>   		DRM_NOTE("%d spurious OA report notices suppressed due to ratelimiting\n",
>   			 dev_priv->perf.oa.spurious_report_rs.missed);

Is this not reversing a change from patch 9/46? Is there a reason why 
the oa_config scope needs to change temporarily for some of the series? 
Or can this diff be folded down and optimised out of both patches?

John.

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 111+ messages in thread

* Re: [PATCH 17/46] drm/i915: Syntatic sugar for using intel_runtime_pm
  2019-01-10  0:24   ` John Harrison
@ 2019-01-10  1:10     ` John Harrison
  2019-01-10  9:59       ` Chris Wilson
  0 siblings, 1 reply; 111+ messages in thread
From: John Harrison @ 2019-01-10  1:10 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx; +Cc: Jani Nikula

On 1/9/2019 16:24, John Harrison wrote:
> On 1/7/2019 03:54, Chris Wilson wrote:
>> Frequently, we use intel_runtime_pm_get/_put around a small block.
>> Formalise that usage by providing a macro to define such a block with an
>> automatic closure to scope the intel_runtime_pm wakeref to that block,
>> i.e. macro abuse smelling of python.
>>
>> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
>> Cc: Jani Nikula <jani.nikula@intel.com>
>> ---
>>   +#define with_intel_runtime_pm(i915, wf) \
>> +    for (wf = intel_runtime_pm_get(i915); wf; \
>> +         intel_runtime_pm_put(i915, wf), wf = 0)
>> +
>> +#define with_intel_runtime_pm_if_in_use(i915, wf) \
>> +    for (wf = intel_runtime_pm_get_if_in_use(i915); wf; \
>> +         intel_runtime_pm_put(i915, wf), wf = 0)
>> +
> This is a potential change in behaviour. Previously the simple 'get' 
> version would unconditionally execute the wrapped code. Whereas now, 
> if the get function fails for some reason and returns zero, the 
> wrapped code will be skipped. Currently, the get() function can't 
> return zero - it returns -1 in the case of the tracking code failing 
> to allocate or similar. But is that guaranteed to be the case 
> forevermore? It would be a better match for the original behaviour if 
> the 'for' loop of the 'get' version was unconditional and only the 
> 'get_if_in_use' version could skip. E.g. something like:
>    for (intel_wakeref_t loop = -1, wf = intel_runtime_pm_get(i915) ; 
> loop; intel_runtime_pm_put(i915, wf), wf = loop = 0)
>
> Although that does mean the wf becomes local to the loop. On the other 
> hand, I'm also not sure why it needs to be external anyway? If it is 
> guaranteed to be zero on exit and any value on entry is overwritten, 
> then why have it external at all? Would it not be neater/smaller 
> source to get rid of all the local instantiations?
>
> John.
>
Doh. Not sure why I was thinking C99 extensions were valid in the 
kernel. I can't think of an alternative way to fix the above issues 
without making the macro truly hideous. So maybe it's not enough of a 
worry to worry about.

John.

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 111+ messages in thread

* Re: [PATCH 17/46] drm/i915: Syntatic sugar for using intel_runtime_pm
  2019-01-10  1:10     ` John Harrison
@ 2019-01-10  9:59       ` Chris Wilson
  0 siblings, 0 replies; 111+ messages in thread
From: Chris Wilson @ 2019-01-10  9:59 UTC (permalink / raw)
  To: John Harrison, intel-gfx; +Cc: Jani Nikula

Quoting John Harrison (2019-01-10 01:10:09)
> On 1/9/2019 16:24, John Harrison wrote:
> > On 1/7/2019 03:54, Chris Wilson wrote:
> >> Frequently, we use intel_runtime_pm_get/_put around a small block.
> >> Formalise that usage by providing a macro to define such a block with an
> >> automatic closure to scope the intel_runtime_pm wakeref to that block,
> >> i.e. macro abuse smelling of python.
> >>
> >> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> >> Cc: Jani Nikula <jani.nikula@intel.com>
> >> ---
> >>   +#define with_intel_runtime_pm(i915, wf) \
> >> +    for (wf = intel_runtime_pm_get(i915); wf; \
> >> +         intel_runtime_pm_put(i915, wf), wf = 0)
> >> +
> >> +#define with_intel_runtime_pm_if_in_use(i915, wf) \
> >> +    for (wf = intel_runtime_pm_get_if_in_use(i915); wf; \
> >> +         intel_runtime_pm_put(i915, wf), wf = 0)
> >> +
> > This is a potential change in behaviour. Previously the simple 'get' 
> > version would unconditionally execute the wrapped code. Whereas now, 
> > if the get function fails for some reason and returns zero, the 
> > wrapped code will be skipped. Currently, the get() function can't 
> > return zero - it returns -1 in the case of the tracking code failing 
> > to allocate or similar. But is that guaranteed to be the case 
> > forevermore? It would be a better match for the original behaviour if 
> > the 'for' loop of the 'get' version was unconditional and only the 
> > 'get_if_in_use' version could skip. E.g. something like:
> >    for (intel_wakeref_t loop = -1, wf = intel_runtime_pm_get(i915) ; 
> > loop; intel_runtime_pm_put(i915, wf), wf = loop = 0)
> >
> > Although that does mean the wf becomes local to the loop. On the other 
> > hand, I'm also not sure why it needs to be external anyway? If it is 
> > guaranteed to be zero on exit and any value on entry is overwritten, 
> > then why have it external at all? Would it not be neater/smaller 
> > source to get rid of all the local instantiations?
> >
> > John.
> >
> Doh. Not sure why I was thinking C99 extensions were valid in the 
> kernel. I can't think of an alternative way to fix the above issues 
> without making the macro truly hideous. So maybe it's not enough of a 
> worry to worry about.

Using C99 would be a nice improvement for a lot of our macros, and I
hope it comes to pass.

Yes, the whole reason we return -1 on tracking-failure-but-rpm-success
is so that we keep 0 as meaning rpm-failure so that the different cases
are identifiable required for the markup and cookie tracking. So using
-1 here just falls out of the general case.
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 111+ messages in thread

* Re: [PATCH 18/46] drm/i915: Markup paired operations on display power domains
  2019-01-10  0:55   ` John Harrison
@ 2019-01-10 10:00     ` Chris Wilson
  0 siblings, 0 replies; 111+ messages in thread
From: Chris Wilson @ 2019-01-10 10:00 UTC (permalink / raw)
  To: John Harrison, intel-gfx; +Cc: Jani Nikula

Quoting John Harrison (2019-01-10 00:55:07)
> On 1/7/2019 03:54, Chris Wilson wrote:
> > The majority of runtime-pm operations are bounded and scoped within a
> > function; these are easy to verify that the wakeref are handled
> > correctly. We can employ the compiler to help us, and reduce the number
> > of wakerefs tracked when debugging, by passing around cookies provided
> > by the various rpm_get functions to their rpm_put counterpart. This
> > makes the pairing explicit, and given the required wakeref cookie the
> > compiler can verify that we pass an initialised value to the rpm_put
> > (quite handy for double checking error paths).
> >
> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > Cc: Jani Nikula <jani.nikula@intel.com>
> > ---
> > diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
> > index b0cbad2e83c5..faff6cf1aaa1 100644
> > --- a/drivers/gpu/drm/i915/i915_perf.c
> > +++ b/drivers/gpu/drm/i915/i915_perf.c
> > @@ -1364,14 +1364,14 @@ static void i915_oa_stream_destroy(struct i915_perf_stream *stream)
> >   
> >       free_oa_buffer(dev_priv);
> >   
> > -     put_oa_config(dev_priv, stream->oa_config);
> > -
> >       intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
> >       intel_runtime_pm_put(dev_priv, stream->wakeref);
> >   
> >       if (stream->ctx)
> >               oa_put_render_ctx_id(stream);
> >   
> > +     put_oa_config(dev_priv, stream->oa_config);
> > +
> >       if (dev_priv->perf.oa.spurious_report_rs.missed) {
> >               DRM_NOTE("%d spurious OA report notices suppressed due to ratelimiting\n",
> >                        dev_priv->perf.oa.spurious_report_rs.missed);
> 
> Is this not reversing a change from patch 9/46? Is there a reason why 
> the oa_config scope needs to change temporarily for some of the series? 
> Or can this diff be folded down and optimised out of both patches?

No, I just applied the removal to the wrong patch. That explains how it
was still in the series when I had thought I had applied the review
comments!
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 111+ messages in thread

* Re: [PATCH 39/46] drm/i915: Always allocate an object/vma for the HWSP
  2019-01-07 11:55 ` [PATCH 39/46] drm/i915: Always allocate an object/vma for the HWSP Chris Wilson
@ 2019-01-10 10:52   ` Matthew Auld
  2019-01-10 11:07     ` Chris Wilson
  2019-01-10 11:24   ` Matthew Auld
  1 sibling, 1 reply; 111+ messages in thread
From: Matthew Auld @ 2019-01-10 10:52 UTC (permalink / raw)
  To: Chris Wilson; +Cc: Intel Graphics Development

On Mon, 7 Jan 2019 at 11:55, Chris Wilson <chris@chris-wilson.co.uk> wrote:
>
> Currently we only allocate an object and vma if we are using a GGTT
> virtual HWSP, and a plain struct page for a physical HWSP. For
> convenience later on with global timelines, it will be useful to always
> have the status page being tracked by a struct i915_vma. Make it so.
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>  drivers/gpu/drm/i915/intel_engine_cs.c       | 109 ++++++++++---------
>  drivers/gpu/drm/i915/intel_guc_submission.c  |   5 +
>  drivers/gpu/drm/i915/intel_lrc.c             |  11 +-
>  drivers/gpu/drm/i915/intel_ringbuffer.c      |  20 +++-
>  drivers/gpu/drm/i915/intel_ringbuffer.h      |  23 +---
>  drivers/gpu/drm/i915/selftests/mock_engine.c |   2 +-
>  6 files changed, 90 insertions(+), 80 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c
> index 1a9de4a01b9d..ffef7f43fda3 100644
> --- a/drivers/gpu/drm/i915/intel_engine_cs.c
> +++ b/drivers/gpu/drm/i915/intel_engine_cs.c
> @@ -506,27 +506,61 @@ void intel_engine_setup_common(struct intel_engine_cs *engine)
>
>  static void cleanup_status_page(struct intel_engine_cs *engine)
>  {
> +       struct i915_vma *vma;
> +
>         /* Prevent writes into HWSP after returning the page to the system */
>         intel_engine_set_hwsp_writemask(engine, ~0u);
>
> -       if (HWS_NEEDS_PHYSICAL(engine->i915)) {
> -               void *addr = fetch_and_zero(&engine->status_page.page_addr);
> +       vma = fetch_and_zero(&engine->status_page.vma);
> +       if (!vma)
> +               return;
>
> -               __free_page(virt_to_page(addr));
> -       }
> +       if (!HWS_NEEDS_PHYSICAL(engine->i915))
> +               i915_vma_unpin(vma);
> +
> +       i915_gem_object_unpin_map(vma->obj);
> +       __i915_gem_object_release_unless_active(vma->obj);
> +}
> +
> +static int pin_ggtt_status_page(struct intel_engine_cs *engine,
> +                               struct i915_vma *vma)
> +{
> +       unsigned int flags;
> +
> +       flags = PIN_GLOBAL;
> +       if (!HAS_LLC(engine->i915))
> +               /*
> +                * On g33, we cannot place HWS above 256MiB, so
> +                * restrict its pinning to the low mappable arena.
> +                * Though this restriction is not documented for
> +                * gen4, gen5, or byt, they also behave similarly
> +                * and hang if the HWS is placed at the top of the
> +                * GTT. To generalise, it appears that all !llc
> +                * platforms have issues with us placing the HWS
> +                * above the mappable region (even though we never
> +                * actually map it).
> +                */
> +               flags |= PIN_MAPPABLE;
> +       else
> +               flags |= PIN_HIGH;
>
> -       i915_vma_unpin_and_release(&engine->status_page.vma,
> -                                  I915_VMA_RELEASE_MAP);
> +       return i915_vma_pin(vma, 0, 0, flags);
>  }
>
>  static int init_status_page(struct intel_engine_cs *engine)
>  {
>         struct drm_i915_gem_object *obj;
>         struct i915_vma *vma;
> -       unsigned int flags;
>         void *vaddr;
>         int ret;
>
> +       /*
> +        * Though the HWS register does support 36bit addresses, historically
> +        * we have had hangs and corruption reported due to wild writes if
> +        * the HWS is placed above 4G. We only allow objects to be allocated
> +        * in GFP_DMA32 for i965, and no earlier physical address users had
> +        * access to more than 4G.
> +        */
>         obj = i915_gem_object_create_internal(engine->i915, PAGE_SIZE);
>         if (IS_ERR(obj)) {
>                 DRM_ERROR("Failed to allocate status page\n");
> @@ -543,61 +577,30 @@ static int init_status_page(struct intel_engine_cs *engine)
>                 goto err;
>         }
>
> -       flags = PIN_GLOBAL;
> -       if (!HAS_LLC(engine->i915))
> -               /* On g33, we cannot place HWS above 256MiB, so
> -                * restrict its pinning to the low mappable arena.
> -                * Though this restriction is not documented for
> -                * gen4, gen5, or byt, they also behave similarly
> -                * and hang if the HWS is placed at the top of the
> -                * GTT. To generalise, it appears that all !llc
> -                * platforms have issues with us placing the HWS
> -                * above the mappable region (even though we never
> -                * actually map it).
> -                */
> -               flags |= PIN_MAPPABLE;
> -       else
> -               flags |= PIN_HIGH;
> -       ret = i915_vma_pin(vma, 0, 0, flags);
> -       if (ret)
> -               goto err;
> -
>         vaddr = i915_gem_object_pin_map(obj, I915_MAP_WB);
>         if (IS_ERR(vaddr)) {
>                 ret = PTR_ERR(vaddr);
> -               goto err_unpin;
> +               goto err;
>         }
>
> +       engine->status_page.addr = memset(vaddr, 0, PAGE_SIZE);
>         engine->status_page.vma = vma;
> -       engine->status_page.ggtt_offset = i915_ggtt_offset(vma);
> -       engine->status_page.page_addr = memset(vaddr, 0, PAGE_SIZE);
> +
> +       if (!HWS_NEEDS_PHYSICAL(engine->i915)) {
> +               ret = pin_ggtt_status_page(engine, vma);
> +               if (ret)
> +                       goto err_unpin;
> +       }

Don't we now need special casing in gem_record_rings, since the error
capture will now try to iterate over vma->pages for the status page?
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 111+ messages in thread

* Re: [PATCH 39/46] drm/i915: Always allocate an object/vma for the HWSP
  2019-01-10 10:52   ` Matthew Auld
@ 2019-01-10 11:07     ` Chris Wilson
  0 siblings, 0 replies; 111+ messages in thread
From: Chris Wilson @ 2019-01-10 11:07 UTC (permalink / raw)
  To: Matthew Auld; +Cc: Intel Graphics Development

Quoting Matthew Auld (2019-01-10 10:52:48)
> On Mon, 7 Jan 2019 at 11:55, Chris Wilson <chris@chris-wilson.co.uk> wrote:
> >
> > Currently we only allocate an object and vma if we are using a GGTT
> > virtual HWSP, and a plain struct page for a physical HWSP. For
> > convenience later on with global timelines, it will be useful to always
> > have the status page being tracked by a struct i915_vma. Make it so.
> >
> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > ---
> > +       engine->status_page.addr = memset(vaddr, 0, PAGE_SIZE);
> >         engine->status_page.vma = vma;
> > -       engine->status_page.ggtt_offset = i915_ggtt_offset(vma);
> > -       engine->status_page.page_addr = memset(vaddr, 0, PAGE_SIZE);
> > +
> > +       if (!HWS_NEEDS_PHYSICAL(engine->i915)) {
> > +               ret = pin_ggtt_status_page(engine, vma);
> > +               if (ret)
> > +                       goto err_unpin;
> > +       }
> 
> Don't we now need special casing in gem_record_rings, since the error
> capture will now try to iterate over vma->pages for the status page?

The weird part is that I spent several hours debugging hangs on
Crestline with this patch enabled, and we even trigger GPU capture in
CI, neither died.

No idea as that for_each_sgt_dma(vma->pages) ought to be a NULL deref.
Whatever,
-       if (!vma)
+       if (!vma || !vma->pages)
                return NULL;
suffices.
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 111+ messages in thread

* Re: [PATCH 39/46] drm/i915: Always allocate an object/vma for the HWSP
  2019-01-07 11:55 ` [PATCH 39/46] drm/i915: Always allocate an object/vma for the HWSP Chris Wilson
  2019-01-10 10:52   ` Matthew Auld
@ 2019-01-10 11:24   ` Matthew Auld
  1 sibling, 0 replies; 111+ messages in thread
From: Matthew Auld @ 2019-01-10 11:24 UTC (permalink / raw)
  To: Chris Wilson; +Cc: Intel Graphics Development

On Mon, 7 Jan 2019 at 11:55, Chris Wilson <chris@chris-wilson.co.uk> wrote:
>
> Currently we only allocate an object and vma if we are using a GGTT
> virtual HWSP, and a plain struct page for a physical HWSP. For
> convenience later on with global timelines, it will be useful to always
> have the status page being tracked by a struct i915_vma. Make it so.
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Matthew Auld <matthew.auld@intel.com>
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 111+ messages in thread

* Re: [PATCH 41/46] drm/i915: Introduce concept of per-timeline (context) HWSP
  2019-01-07 11:55 ` [PATCH 41/46] drm/i915: Introduce concept of per-timeline (context) HWSP Chris Wilson
@ 2019-01-15  0:55   ` John Harrison
  2019-01-15  9:14     ` Chris Wilson
  0 siblings, 1 reply; 111+ messages in thread
From: John Harrison @ 2019-01-15  0:55 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx

On 1/7/2019 03:55, Chris Wilson wrote:
> Supplement the per-engine HWSP with a per-timeline HWSP. That is a
> per-request pointer through which we can check a local seqno,
> abstracting away the presumption of a global seqno. In this first step,
> we point each request back into the engine's HWSP so everything
> continues to work with the global timeline.
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>   drivers/gpu/drm/i915/i915_request.c | 16 +++++++++++-----
>   drivers/gpu/drm/i915/i915_request.h | 16 +++++++++-------
>   drivers/gpu/drm/i915/intel_lrc.c    |  9 ++++++---
>   3 files changed, 26 insertions(+), 15 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
> index c467392f62d7..3b69c62d040f 100644
> --- a/drivers/gpu/drm/i915/i915_request.c
> +++ b/drivers/gpu/drm/i915/i915_request.c
> @@ -182,10 +182,11 @@ static void free_capture_list(struct i915_request *request)
>   static void __retire_engine_request(struct intel_engine_cs *engine,
>   				    struct i915_request *rq)
>   {
> -	GEM_TRACE("%s(%s) fence %llx:%lld, global=%d, current %d\n",
> +	GEM_TRACE("%s(%s) fence %llx:%lld, global=%d, current %d:%d\n",
>   		  __func__, engine->name,
>   		  rq->fence.context, rq->fence.seqno,
>   		  rq->global_seqno,
> +		  i915_request_hwsp(rq),
>   		  intel_engine_get_seqno(engine));
>   
>   	GEM_BUG_ON(!i915_request_completed(rq));
> @@ -244,10 +245,11 @@ static void i915_request_retire(struct i915_request *request)
>   {
>   	struct i915_gem_active *active, *next;
>   
> -	GEM_TRACE("%s fence %llx:%lld, global=%d, current %d\n",
> +	GEM_TRACE("%s fence %llx:%lld, global=%d, current %d:%d\n",
>   		  request->engine->name,
>   		  request->fence.context, request->fence.seqno,
>   		  request->global_seqno,
> +		  i915_request_hwsp(request),
>   		  intel_engine_get_seqno(request->engine));
>   
>   	lockdep_assert_held(&request->i915->drm.struct_mutex);
> @@ -307,10 +309,11 @@ void i915_request_retire_upto(struct i915_request *rq)
>   	struct intel_ring *ring = rq->ring;
>   	struct i915_request *tmp;
>   
> -	GEM_TRACE("%s fence %llx:%lld, global=%d, current %d\n",
> +	GEM_TRACE("%s fence %llx:%lld, global=%d, current %d:%d\n",
>   		  rq->engine->name,
>   		  rq->fence.context, rq->fence.seqno,
>   		  rq->global_seqno,
> +		  i915_request_hwsp(rq),
>   		  intel_engine_get_seqno(rq->engine));
>   
>   	lockdep_assert_held(&rq->i915->drm.struct_mutex);
> @@ -348,10 +351,11 @@ void __i915_request_submit(struct i915_request *request)
>   	struct intel_engine_cs *engine = request->engine;
>   	u32 seqno;
>   
> -	GEM_TRACE("%s fence %llx:%lld -> global=%d, current %d\n",
> +	GEM_TRACE("%s fence %llx:%lld -> global=%d, current %d:%d\n",
>   		  engine->name,
>   		  request->fence.context, request->fence.seqno,
>   		  engine->timeline.seqno + 1,
> +		  i915_request_hwsp(request),
>   		  intel_engine_get_seqno(engine));
>   
>   	GEM_BUG_ON(!irqs_disabled());
> @@ -398,10 +402,11 @@ void __i915_request_unsubmit(struct i915_request *request)
>   {
>   	struct intel_engine_cs *engine = request->engine;
>   
> -	GEM_TRACE("%s fence %llx:%lld <- global=%d, current %d\n",
> +	GEM_TRACE("%s fence %llx:%lld <- global=%d, current %d:%d\n",
>   		  engine->name,
>   		  request->fence.context, request->fence.seqno,
>   		  request->global_seqno,
> +		  i915_request_hwsp(request),
>   		  intel_engine_get_seqno(engine));
>   
>   	GEM_BUG_ON(!irqs_disabled());
> @@ -585,6 +590,7 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx)
>   	rq->ring = ce->ring;
>   	rq->timeline = ce->ring->timeline;
>   	GEM_BUG_ON(rq->timeline == &engine->timeline);
> +	rq->hwsp_seqno = &engine->status_page.addr[I915_GEM_HWS_INDEX];
>   
>   	spin_lock_init(&rq->lock);
>   	dma_fence_init(&rq->fence,
> diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h
> index d014b0605445..e2b209a26a8e 100644
> --- a/drivers/gpu/drm/i915/i915_request.h
> +++ b/drivers/gpu/drm/i915/i915_request.h
> @@ -130,6 +130,8 @@ struct i915_request {
>   	struct i915_sched_node sched;
>   	struct i915_dependency dep;
>   
> +	const u32 *hwsp_seqno;
> +
>   	/**
>   	 * GEM sequence number associated with this request on the
>   	 * global execution timeline. It is zero when the request is not
> @@ -280,11 +282,6 @@ long i915_request_wait(struct i915_request *rq,
>   #define I915_WAIT_ALL		BIT(3) /* used by i915_gem_object_wait() */
>   #define I915_WAIT_FOR_IDLE_BOOST BIT(4)
>   
> -static inline bool intel_engine_has_started(struct intel_engine_cs *engine,
> -					    u32 seqno);
> -static inline bool intel_engine_has_completed(struct intel_engine_cs *engine,
> -					      u32 seqno);
> -
>   /**
>    * Returns true if seq1 is later than seq2.
>    */
> @@ -293,6 +290,11 @@ static inline bool i915_seqno_passed(u32 seq1, u32 seq2)
>   	return (s32)(seq1 - seq2) >= 0;
>   }
>   
> +static inline u32 i915_request_hwsp(const struct i915_request *rq)
> +{
> +	return READ_ONCE(*rq->hwsp_seqno);
> +}
> +
Shouldn't the function name have an _seqno as well? Just 
'i915_request_hwsp()' is fairly ambiguous, there could be many different 
things stored in the HWSP.

>   /**
>    * i915_request_started - check if the request has begun being executed
>    * @rq: the request
> @@ -310,14 +312,14 @@ static inline bool i915_request_started(const struct i915_request *rq)
>   	if (!seqno) /* not yet submitted to HW */
>   		return false;
>   
> -	return intel_engine_has_started(rq->engine, seqno);
> +	return i915_seqno_passed(i915_request_hwsp(rq), seqno - 1);
>   }
>   
>   static inline bool
>   __i915_request_completed(const struct i915_request *rq, u32 seqno)
>   {
>   	GEM_BUG_ON(!seqno);
> -	return intel_engine_has_completed(rq->engine, seqno) &&
> +	return i915_seqno_passed(i915_request_hwsp(rq), seqno) &&
>   		seqno == i915_request_global_seqno(rq);
>   }
>   
> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> index 3b512a54aacb..1df2a1868622 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/intel_lrc.c
> @@ -445,11 +445,12 @@ static void execlists_submit_ports(struct intel_engine_cs *engine)
>   			desc = execlists_update_context(rq);
>   			GEM_DEBUG_EXEC(port[n].context_id = upper_32_bits(desc));
>   
> -			GEM_TRACE("%s in[%d]:  ctx=%d.%d, global=%d (fence %llx:%lld) (current %d), prio=%d\n",
> +			GEM_TRACE("%s in[%d]:  ctx=%d.%d, global=%d (fence %llx:%lld) (current %d:%d), prio=%d\n",
>   				  engine->name, n,
>   				  port[n].context_id, count,
>   				  rq->global_seqno,
>   				  rq->fence.context, rq->fence.seqno,
> +				  i915_request_hwsp(rq),
>   				  intel_engine_get_seqno(engine),
>   				  rq_prio(rq));
>   		} else {
> @@ -738,11 +739,12 @@ execlists_cancel_port_requests(struct intel_engine_execlists * const execlists)
>   	while (num_ports-- && port_isset(port)) {
>   		struct i915_request *rq = port_request(port);
>   
> -		GEM_TRACE("%s:port%u global=%d (fence %llx:%lld), (current %d)\n",
> +		GEM_TRACE("%s:port%u global=%d (fence %llx:%lld), (current %d:%d)\n",
>   			  rq->engine->name,
>   			  (unsigned int)(port - execlists->port),
>   			  rq->global_seqno,
>   			  rq->fence.context, rq->fence.seqno,
> +			  i915_request_hwsp(rq),
>   			  intel_engine_get_seqno(rq->engine));
>   
>   		GEM_BUG_ON(!execlists->active);
> @@ -966,12 +968,13 @@ static void process_csb(struct intel_engine_cs *engine)
>   						EXECLISTS_ACTIVE_USER));
>   
>   		rq = port_unpack(port, &count);
> -		GEM_TRACE("%s out[0]: ctx=%d.%d, global=%d (fence %llx:%lld) (current %d), prio=%d\n",
> +		GEM_TRACE("%s out[0]: ctx=%d.%d, global=%d (fence %llx:%lld) (current %d:%d), prio=%d\n",
>   			  engine->name,
>   			  port->context_id, count,
>   			  rq ? rq->global_seqno : 0,
>   			  rq ? rq->fence.context : 0,
>   			  rq ? rq->fence.seqno : 0,
> +			  rq ? i915_request_hwsp(rq) : 0,
>   			  intel_engine_get_seqno(engine),
>   			  rq ? rq_prio(rq) : 0);
>   

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 111+ messages in thread

* Re: [PATCH 43/46] drm/i915: Allocate a status page for each timeline
  2019-01-07 11:55 ` [PATCH 43/46] drm/i915: Allocate a status page for each timeline Chris Wilson
@ 2019-01-15  0:56   ` John Harrison
  2019-01-15  9:50     ` Chris Wilson
  0 siblings, 1 reply; 111+ messages in thread
From: John Harrison @ 2019-01-15  0:56 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx

On 1/7/2019 03:55, Chris Wilson wrote:
> Allocate a page for use as a status page by a group of timelines, as we
> only need a dword of storage for each (rounded up to the cacheline for
> safety) we can pack multiple timelines into the same page. Each timeline
> will then be able to track its own HW seqno.
>
> v2: Reuse the common per-engine HWSP for the solitary ringbuffer
> timeline, so that we do not have to emit (using per-gen specialised
> vfuncs) the breadcrumb into the distinct timeline HWSP and instead can
> keep on using the common MI_STORE_DWORD_INDEX. However, to maintain the
> sleight-of-hand for the global/per-context seqno switchover, we will
> store both temporarily (and so use a custom offset for the shared timeline
> HWSP until the switch over).
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>   drivers/gpu/drm/i915/i915_drv.h               |   4 +
>   drivers/gpu/drm/i915/i915_timeline.c          | 159 +++++++-
>   drivers/gpu/drm/i915/i915_timeline.h          |  21 +-
>   drivers/gpu/drm/i915/intel_engine_cs.c        |  64 +--
>   drivers/gpu/drm/i915/intel_lrc.c              |  22 +-
>   drivers/gpu/drm/i915/intel_ringbuffer.c       |  10 +-
>   drivers/gpu/drm/i915/intel_ringbuffer.h       |   6 +-
>   .../drm/i915/selftests/i915_live_selftests.h  |   1 +
>   .../drm/i915/selftests/i915_mock_selftests.h  |   2 +-
>   .../gpu/drm/i915/selftests/i915_timeline.c    | 373 +++++++++++++++++-
>   drivers/gpu/drm/i915/selftests/mock_engine.c  |  17 +-
>   11 files changed, 626 insertions(+), 53 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 4a8c45949c4d..a0009e7fe05a 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -1961,6 +1961,10 @@ struct drm_i915_private {
>   		struct mutex timeline_lock;
>   		struct list_head timelines;
>   
> +		/* Pack multiple timelines' seqnos into the same page */
> +		struct i915_vma *timeline_hwsp;
> +		u64 timeline_free;
> +
>   		struct list_head active_rings;
>   		struct list_head closed_vma;
>   		u32 active_requests;
> diff --git a/drivers/gpu/drm/i915/i915_timeline.c b/drivers/gpu/drm/i915/i915_timeline.c
> index 0434b5e0d3e1..73ad951c74d1 100644
> --- a/drivers/gpu/drm/i915/i915_timeline.c
> +++ b/drivers/gpu/drm/i915/i915_timeline.c
> @@ -9,10 +9,75 @@
>   #include "i915_timeline.h"
>   #include "i915_syncmap.h"
>   
> -void i915_timeline_init(struct drm_i915_private *i915,
> -			struct i915_timeline *timeline,
> -			const char *name)
> +#define NBITS BITS_PER_TYPE(typeof(i915->gt.timeline_free))
> +
> +static int find_first_cacheline(struct drm_i915_private *i915)
> +{
> +	return find_first_bit((unsigned long *)&i915->gt.timeline_free, NBITS);
> +}
> +
> +static int alloc_hwsp(struct i915_timeline *timeline)
> +{
> +	struct drm_i915_private *i915 = timeline->i915;
> +	struct i915_vma *vma;
> +	int offset;
> +
> +	mutex_lock(&i915->gt.timeline_lock);
> +
> +restart:
> +	offset = find_first_cacheline(i915);
> +	if (offset == NBITS && i915->gt.timeline_hwsp) {
> +		i915_vma_put(i915->gt.timeline_hwsp);
> +		i915->gt.timeline_hwsp = NULL;
> +	}
> +
> +	vma = i915->gt.timeline_hwsp;
> +	if (!vma) {
> +		struct drm_i915_gem_object *bo;
> +
> +		/* Drop the lock before allocations */
> +		mutex_unlock(&i915->gt.timeline_lock);
> +
> +		BUILD_BUG_ON(NBITS * CACHELINE_BYTES > PAGE_SIZE);
> +		bo = i915_gem_object_create_internal(i915, PAGE_SIZE);
> +		if (IS_ERR(bo))
> +			return PTR_ERR(bo);
> +
> +		i915_gem_object_set_cache_level(bo, I915_CACHE_LLC);
> +
> +		vma = i915_vma_instance(bo, &i915->ggtt.vm, NULL);
> +		if (IS_ERR(vma))
> +			return PTR_ERR(vma);
> +
> +		mutex_lock(&i915->gt.timeline_lock);
> +		if (i915->gt.timeline_hwsp) {
> +			i915_gem_object_put(bo);
> +			goto restart;
> +		}
> +
> +		i915->gt.timeline_hwsp = vma;
> +		i915->gt.timeline_free = ~0ull;
> +		offset = 0;
> +	}
> +
> +	i915->gt.timeline_free &= ~BIT_ULL(offset);
> +
> +	timeline->hwsp_ggtt = i915_vma_get(vma);
> +	timeline->hwsp_offset = offset * CACHELINE_BYTES;
> +
> +	mutex_unlock(&i915->gt.timeline_lock);
> +
> +	return 0;
> +}
If I'm reading this correctly then gt.timeline_hwsp/free is the a cached 
copy of the most recently allocated but not yet filled bank of seqno 
locations. When it gets full, the i915->gt reference gets dropped and a 
new page is allocated and used up line by line. Meanwhile, each timeline 
has it's own private reference to the page so dropping the i915->gt 
reference is safe. And once the last timeline using a given page is 
freed, the last reference to that page will be dropped and so the page 
itself will also be freed. If a timeline is freed before the currently 
cached page is filled, then that timeline's slot will be released and 
re-used by the next timeline to be created.

But what about the scenario of a long running system with a small but 
growing number of persistent tasks interspersed with many short lived 
tasks? In that case, you would end up with many sparsely populated pages 
that whose free slots will not get re-used. You could have a linked list 
of cached pages. When a page is filled, move it to a 'full' list. When a 
timeline is freed, if it's page was on the 'full' list, clear the slot 
and move it back to the 'available' list.

Or is the idea that a worst case of a single page vma allocation per 
timeline is the least of our worries if there is an ever growing number 
of timelines/contexts/users in the system?


> +
> +int i915_timeline_init(struct drm_i915_private *i915,
> +		       struct i915_timeline *timeline,
> +		       const char *name,
> +		       struct i915_vma *global_hwsp)
>   {
> +	void *vaddr;
> +	int err;
> +
>   	/*
>   	 * Ideally we want a set of engines on a single leaf as we expect
>   	 * to mostly be tracking synchronisation between engines. It is not
> @@ -23,10 +88,27 @@ void i915_timeline_init(struct drm_i915_private *i915,
>   
>   	timeline->i915 = i915;
>   	timeline->name = name;
> +	timeline->pin_count = 0;
>   
> -	mutex_lock(&i915->gt.timeline_lock);
> -	list_add(&timeline->link, &i915->gt.timelines);
> -	mutex_unlock(&i915->gt.timeline_lock);
> +	if (global_hwsp) {
> +		timeline->hwsp_ggtt = i915_vma_get(global_hwsp);
> +		timeline->hwsp_offset = I915_GEM_HWS_SEQNO_ADDR;
> +	} else {
> +		err = alloc_hwsp(timeline);
> +		if (err)
> +			return err;
> +	}
> +
> +	vaddr = i915_gem_object_pin_map(timeline->hwsp_ggtt->obj, I915_MAP_WB);
> +	if (IS_ERR(vaddr)) { /* leak the cacheline, but will clean up later */
Can you explain this comment more? Where/when is the later?

> +		i915_vma_put(timeline->hwsp_ggtt);
> +		return PTR_ERR(vaddr);
> +	}
> +
> +	timeline->hwsp_seqno =
> +		memset(vaddr + timeline->hwsp_offset,
> +		       0,
> +		       sizeof(*timeline->hwsp_seqno));
>   
>   	/* Called during early_init before we know how many engines there are */
>   
> @@ -38,6 +120,12 @@ void i915_timeline_init(struct drm_i915_private *i915,
>   	INIT_LIST_HEAD(&timeline->requests);
>   
>   	i915_syncmap_init(&timeline->sync);
> +
> +	mutex_lock(&i915->gt.timeline_lock);
> +	list_add(&timeline->link, &i915->gt.timelines);
> +	mutex_unlock(&i915->gt.timeline_lock);
> +
> +	return 0;
>   }
>   
>   void i915_timelines_init(struct drm_i915_private *i915)
> @@ -78,30 +166,75 @@ void i915_timelines_park(struct drm_i915_private *i915)
>   
>   void i915_timeline_fini(struct i915_timeline *timeline)
>   {
> +	struct drm_i915_private *i915 = timeline->i915;
> +
> +	GEM_BUG_ON(timeline->pin_count);
>   	GEM_BUG_ON(!list_empty(&timeline->requests));
>   
>   	i915_syncmap_free(&timeline->sync);
>   
> -	mutex_lock(&timeline->i915->gt.timeline_lock);
> +	mutex_lock(&i915->gt.timeline_lock);
>   	list_del(&timeline->link);
> -	mutex_unlock(&timeline->i915->gt.timeline_lock);
> +	if (timeline->hwsp_ggtt == i915->gt.timeline_hwsp)
> +		i915->gt.timeline_free |=
> +			BIT_ULL(timeline->hwsp_offset / CACHELINE_BYTES);
> +	mutex_unlock(&i915->gt.timeline_lock);
> +
> +	i915_gem_object_unpin_map(timeline->hwsp_ggtt->obj);
> +	i915_vma_put(timeline->hwsp_ggtt);
>   }
>   
>   struct i915_timeline *
> -i915_timeline_create(struct drm_i915_private *i915, const char *name)
> +i915_timeline_create(struct drm_i915_private *i915,
> +		     const char *name,
> +		     struct i915_vma *global_hwsp)
>   {
>   	struct i915_timeline *timeline;
> +	int err;
>   
>   	timeline = kzalloc(sizeof(*timeline), GFP_KERNEL);
>   	if (!timeline)
>   		return ERR_PTR(-ENOMEM);
>   
> -	i915_timeline_init(i915, timeline, name);
> +	err = i915_timeline_init(i915, timeline, name, global_hwsp);
> +	if (err) {
> +		kfree(timeline);
> +		return ERR_PTR(err);
> +	}
> +
>   	kref_init(&timeline->kref);
>   
>   	return timeline;
>   }
>   
> +int i915_timeline_pin(struct i915_timeline *tl)
> +{
> +	int err;
> +
> +	if (tl->pin_count++)
> +		return 0;
> +	GEM_BUG_ON(!tl->pin_count);
> +
> +	err = i915_vma_pin(tl->hwsp_ggtt, 0, 0, PIN_GLOBAL | PIN_HIGH);
> +	if (err)
> +		goto unpin;
> +
> +	return 0;
> +
> +unpin:
> +	tl->pin_count = 0;
> +	return err;
> +}
> +
> +void i915_timeline_unpin(struct i915_timeline *tl)
> +{
> +	GEM_BUG_ON(!tl->pin_count);
> +	if (--tl->pin_count)
> +		return;
> +
> +	__i915_vma_unpin(tl->hwsp_ggtt);
> +}
> +
>   void __i915_timeline_free(struct kref *kref)
>   {
>   	struct i915_timeline *timeline =
> @@ -113,8 +246,14 @@ void __i915_timeline_free(struct kref *kref)
>   
>   void i915_timelines_fini(struct drm_i915_private *i915)
>   {
> +	struct i915_vma *vma;
> +
>   	GEM_BUG_ON(!list_empty(&i915->gt.timelines));
>   
> +	vma = fetch_and_zero(&i915->gt.timeline_hwsp);
> +	if (vma)
> +		i915_vma_put(vma);
> +
>   	mutex_destroy(&i915->gt.timeline_lock);
>   }
>   
> diff --git a/drivers/gpu/drm/i915/i915_timeline.h b/drivers/gpu/drm/i915/i915_timeline.h
> index 87ad2dd31c20..0c3739d53d79 100644
> --- a/drivers/gpu/drm/i915/i915_timeline.h
> +++ b/drivers/gpu/drm/i915/i915_timeline.h
> @@ -32,6 +32,8 @@
>   #include "i915_syncmap.h"
>   #include "i915_utils.h"
>   
> +struct i915_vma;
> +
>   struct i915_timeline {
>   	u64 fence_context;
>   	u32 seqno;
> @@ -40,6 +42,11 @@ struct i915_timeline {
>   #define TIMELINE_CLIENT 0 /* default subclass */
>   #define TIMELINE_ENGINE 1
>   
> +	unsigned int pin_count;
> +	const u32 *hwsp_seqno;
> +	struct i915_vma *hwsp_ggtt;
> +	u32 hwsp_offset;
> +
>   	/**
>   	 * List of breadcrumbs associated with GPU requests currently
>   	 * outstanding.
> @@ -71,9 +78,10 @@ struct i915_timeline {
>   	struct kref kref;
>   };
>   
> -void i915_timeline_init(struct drm_i915_private *i915,
> -			struct i915_timeline *tl,
> -			const char *name);
> +int i915_timeline_init(struct drm_i915_private *i915,
> +		       struct i915_timeline *tl,
> +		       const char *name,
> +		       struct i915_vma *hwsp);
>   void i915_timeline_fini(struct i915_timeline *tl);
>   
>   static inline void
> @@ -96,7 +104,9 @@ i915_timeline_set_subclass(struct i915_timeline *timeline,
>   }
>   
>   struct i915_timeline *
> -i915_timeline_create(struct drm_i915_private *i915, const char *name);
> +i915_timeline_create(struct drm_i915_private *i915,
> +		     const char *name,
> +		     struct i915_vma *global_hwsp);
>   
>   static inline struct i915_timeline *
>   i915_timeline_get(struct i915_timeline *timeline)
> @@ -135,6 +145,9 @@ static inline bool i915_timeline_sync_is_later(struct i915_timeline *tl,
>   	return __i915_timeline_sync_is_later(tl, fence->context, fence->seqno);
>   }
>   
> +int i915_timeline_pin(struct i915_timeline *tl);
> +void i915_timeline_unpin(struct i915_timeline *tl);
> +
>   void i915_timelines_init(struct drm_i915_private *i915);
>   void i915_timelines_park(struct drm_i915_private *i915);
>   void i915_timelines_fini(struct drm_i915_private *i915);
> diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c
> index ffef7f43fda3..f168f2fee979 100644
> --- a/drivers/gpu/drm/i915/intel_engine_cs.c
> +++ b/drivers/gpu/drm/i915/intel_engine_cs.c
> @@ -484,26 +484,6 @@ static void intel_engine_init_execlist(struct intel_engine_cs *engine)
>   	execlists->queue = RB_ROOT_CACHED;
>   }
>   
> -/**
> - * intel_engines_setup_common - setup engine state not requiring hw access
> - * @engine: Engine to setup.
> - *
> - * Initializes @engine@ structure members shared between legacy and execlists
> - * submission modes which do not require hardware access.
> - *
> - * Typically done early in the submission mode specific engine setup stage.
> - */
> -void intel_engine_setup_common(struct intel_engine_cs *engine)
> -{
> -	i915_timeline_init(engine->i915, &engine->timeline, engine->name);
> -	i915_timeline_set_subclass(&engine->timeline, TIMELINE_ENGINE);
> -
> -	intel_engine_init_execlist(engine);
> -	intel_engine_init_hangcheck(engine);
> -	intel_engine_init_batch_pool(engine);
> -	intel_engine_init_cmd_parser(engine);
> -}
> -
>   static void cleanup_status_page(struct intel_engine_cs *engine)
>   {
>   	struct i915_vma *vma;
> @@ -601,6 +581,44 @@ static int init_status_page(struct intel_engine_cs *engine)
>   	return ret;
>   }
>   
> +/**
> + * intel_engines_setup_common - setup engine state not requiring hw access
> + * @engine: Engine to setup.
> + *
> + * Initializes @engine@ structure members shared between legacy and execlists
> + * submission modes which do not require hardware access.
> + *
> + * Typically done early in the submission mode specific engine setup stage.
> + */
> +int intel_engine_setup_common(struct intel_engine_cs *engine)
> +{
> +	int err;
> +
> +	err = init_status_page(engine);
> +	if (err)
> +		return err;
> +
> +	err = i915_timeline_init(engine->i915,
> +				 &engine->timeline,
> +				 engine->name,
> +				 engine->status_page.vma);
> +	if (err)
> +		goto err_hwsp;
> +
> +	i915_timeline_set_subclass(&engine->timeline, TIMELINE_ENGINE);
> +
> +	intel_engine_init_execlist(engine);
> +	intel_engine_init_hangcheck(engine);
> +	intel_engine_init_batch_pool(engine);
> +	intel_engine_init_cmd_parser(engine);
> +
> +	return 0;
> +
> +err_hwsp:
> +	cleanup_status_page(engine);
> +	return err;
> +}
> +
>   static void __intel_context_unpin(struct i915_gem_context *ctx,
>   				  struct intel_engine_cs *engine)
>   {
> @@ -653,14 +671,8 @@ int intel_engine_init_common(struct intel_engine_cs *engine)
>   	if (ret)
>   		goto err_unpin_preempt;
>   
> -	ret = init_status_page(engine);
> -	if (ret)
> -		goto err_breadcrumbs;
> -
>   	return 0;
>   
> -err_breadcrumbs:
> -	intel_engine_fini_breadcrumbs(engine);
>   err_unpin_preempt:
>   	if (i915->preempt_context)
>   		__intel_context_unpin(i915->preempt_context, engine);
> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> index 1df2a1868622..5927ef124bf9 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/intel_lrc.c
> @@ -2188,10 +2188,14 @@ logical_ring_default_irqs(struct intel_engine_cs *engine)
>   	engine->irq_keep_mask = GT_CONTEXT_SWITCH_INTERRUPT << shift;
>   }
>   
> -static void
> +static int
>   logical_ring_setup(struct intel_engine_cs *engine)
>   {
> -	intel_engine_setup_common(engine);
> +	int err;
> +
> +	err = intel_engine_setup_common(engine);
> +	if (err)
> +		return err;
>   
>   	/* Intentionally left blank. */
>   	engine->buffer = NULL;
> @@ -2201,6 +2205,8 @@ logical_ring_setup(struct intel_engine_cs *engine)
>   
>   	logical_ring_default_vfuncs(engine);
>   	logical_ring_default_irqs(engine);
> +
> +	return 0;
>   }
>   
>   static int logical_ring_init(struct intel_engine_cs *engine)
> @@ -2248,7 +2254,9 @@ int logical_render_ring_init(struct intel_engine_cs *engine)
>   	struct drm_i915_private *dev_priv = engine->i915;
>   	int ret;
>   
> -	logical_ring_setup(engine);
> +	ret = logical_ring_setup(engine);
> +	if (ret)
> +		return ret;
>   
>   	if (HAS_L3_DPF(dev_priv))
>   		engine->irq_keep_mask |= GT_RENDER_L3_PARITY_ERROR_INTERRUPT;
> @@ -2282,7 +2290,11 @@ int logical_render_ring_init(struct intel_engine_cs *engine)
>   
>   int logical_xcs_ring_init(struct intel_engine_cs *engine)
>   {
> -	logical_ring_setup(engine);
> +	int err;
> +
> +	err = logical_ring_setup(engine);
> +	if (err)
> +		return err;
>   
>   	return logical_ring_init(engine);
>   }
> @@ -2616,7 +2628,7 @@ static int execlists_context_deferred_alloc(struct i915_gem_context *ctx,
>   		goto error_deref_obj;
>   	}
>   
> -	timeline = i915_timeline_create(ctx->i915, ctx->name);
> +	timeline = i915_timeline_create(ctx->i915, ctx->name, NULL);
Why does this use the global HWSP rather than a per context one?

>   	if (IS_ERR(timeline)) {
>   		ret = PTR_ERR(timeline);
>   		goto error_deref_obj;
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
> index 8700f102f669..24bd6f5b4f57 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.c
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
> @@ -1540,9 +1540,13 @@ static int intel_init_ring_buffer(struct intel_engine_cs *engine)
>   	struct intel_ring *ring;
>   	int err;
>   
> -	intel_engine_setup_common(engine);
> +	err = intel_engine_setup_common(engine);
> +	if (err)
> +		return err;
>   
> -	timeline = i915_timeline_create(engine->i915, engine->name);
> +	timeline = i915_timeline_create(engine->i915,
> +					engine->name,
> +					engine->status_page.vma);
>   	if (IS_ERR(timeline)) {
>   		err = PTR_ERR(timeline);
>   		goto err;
> @@ -1566,6 +1570,8 @@ static int intel_init_ring_buffer(struct intel_engine_cs *engine)
>   	if (err)
>   		goto err_unpin;
>   
> +	GEM_BUG_ON(ring->timeline->hwsp_ggtt != engine->status_page.vma);
> +
>   	return 0;
>   
>   err_unpin:
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
> index c232549c188e..e6ec96e0ab56 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.h
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
> @@ -716,7 +716,9 @@ intel_write_status_page(struct intel_engine_cs *engine, int reg, u32 value)
>   #define I915_GEM_HWS_INDEX_ADDR (I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT)
>   #define I915_GEM_HWS_PREEMPT_INDEX	0x32
>   #define I915_GEM_HWS_PREEMPT_ADDR (I915_GEM_HWS_PREEMPT_INDEX << MI_STORE_DWORD_INDEX_SHIFT)
> -#define I915_GEM_HWS_SCRATCH_INDEX	0x40
> +#define I915_GEM_HWS_SEQNO		0x40
> +#define I915_GEM_HWS_SEQNO_ADDR (I915_GEM_HWS_SEQNO << MI_STORE_DWORD_INDEX_SHIFT)
> +#define I915_GEM_HWS_SCRATCH_INDEX	0x80
>   #define I915_GEM_HWS_SCRATCH_ADDR (I915_GEM_HWS_SCRATCH_INDEX << MI_STORE_DWORD_INDEX_SHIFT)
>   
>   #define I915_HWS_CSB_BUF0_INDEX		0x10
> @@ -822,7 +824,7 @@ intel_ring_set_tail(struct intel_ring *ring, unsigned int tail)
>   
>   void intel_engine_write_global_seqno(struct intel_engine_cs *engine, u32 seqno);
>   
> -void intel_engine_setup_common(struct intel_engine_cs *engine);
> +int intel_engine_setup_common(struct intel_engine_cs *engine);
>   int intel_engine_init_common(struct intel_engine_cs *engine);
>   void intel_engine_cleanup_common(struct intel_engine_cs *engine);
>   
> diff --git a/drivers/gpu/drm/i915/selftests/i915_live_selftests.h b/drivers/gpu/drm/i915/selftests/i915_live_selftests.h
> index a15713cae3b3..76b4f87fc853 100644
> --- a/drivers/gpu/drm/i915/selftests/i915_live_selftests.h
> +++ b/drivers/gpu/drm/i915/selftests/i915_live_selftests.h
> @@ -13,6 +13,7 @@ selftest(sanitycheck, i915_live_sanitycheck) /* keep first (igt selfcheck) */
>   selftest(uncore, intel_uncore_live_selftests)
>   selftest(workarounds, intel_workarounds_live_selftests)
>   selftest(requests, i915_request_live_selftests)
> +selftest(timelines, i915_timeline_live_selftests)
>   selftest(objects, i915_gem_object_live_selftests)
>   selftest(dmabuf, i915_gem_dmabuf_live_selftests)
>   selftest(coherency, i915_gem_coherency_live_selftests)
> diff --git a/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h b/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h
> index 1b70208eeea7..4a83a1c6c406 100644
> --- a/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h
> +++ b/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h
> @@ -16,7 +16,7 @@ selftest(syncmap, i915_syncmap_mock_selftests)
>   selftest(uncore, intel_uncore_mock_selftests)
>   selftest(engine, intel_engine_cs_mock_selftests)
>   selftest(breadcrumbs, intel_breadcrumbs_mock_selftests)
> -selftest(timelines, i915_gem_timeline_mock_selftests)
> +selftest(timelines, i915_timeline_mock_selftests)
>   selftest(requests, i915_request_mock_selftests)
>   selftest(objects, i915_gem_object_mock_selftests)
>   selftest(dmabuf, i915_gem_dmabuf_mock_selftests)
> diff --git a/drivers/gpu/drm/i915/selftests/i915_timeline.c b/drivers/gpu/drm/i915/selftests/i915_timeline.c
> index 19f1c6a5c8fb..d13779808200 100644
> --- a/drivers/gpu/drm/i915/selftests/i915_timeline.c
> +++ b/drivers/gpu/drm/i915/selftests/i915_timeline.c
> @@ -256,7 +256,7 @@ static int bench_sync(void *arg)
>   	return 0;
>   }
>   
> -int i915_gem_timeline_mock_selftests(void)
> +int i915_timeline_mock_selftests(void)
>   {
>   	static const struct i915_subtest tests[] = {
>   		SUBTEST(igt_sync),
> @@ -265,3 +265,374 @@ int i915_gem_timeline_mock_selftests(void)
>   
>   	return i915_subtests(tests, NULL);
>   }
> +
> +static int live_hwsp_engine(void *arg)
> +{
> +#define NUM_TIMELINES 4096
> +	struct drm_i915_private *i915 = arg;
> +	struct i915_timeline **timelines;
> +	struct intel_engine_cs *engine;
> +	enum intel_engine_id id;
> +	intel_wakeref_t wakeref;
> +	unsigned long count, n;
> +	int err = 0;
> +
> +	/*
> +	 * Create a bunch of timelines and check we can write
> +	 * independently to each of their breadcrumb slots.
> +	 */
> +
> +	timelines = kvmalloc_array(NUM_TIMELINES * I915_NUM_ENGINES,
> +				   sizeof(*timelines),
> +				   GFP_KERNEL);
> +	if (!timelines)
> +		return -ENOMEM;
> +
> +	mutex_lock(&i915->drm.struct_mutex);
> +	wakeref = intel_runtime_pm_get(i915);
> +
> +	count = 0;
> +	for_each_engine(engine, i915, id) {
> +		if (!intel_engine_can_store_dword(engine))
> +			continue;
> +
> +		for (n = 0; n < NUM_TIMELINES; n++) {
> +			struct i915_timeline *tl;
> +			struct i915_request *rq;
> +			u32 addr;
> +			u32 *cs;
> +
> +			tl = i915_timeline_create(i915, "live", NULL);
> +			if (IS_ERR(tl)) {
> +				err = PTR_ERR(tl);
> +				goto out;
> +			}
> +
> +			if (*tl->hwsp_seqno) {
> +				pr_err("Timeline %lu created with non-zero breadcrumb, found %x\n",
> +				       count, *tl->hwsp_seqno);
> +				err = -EINVAL;
> +				i915_timeline_put(tl);
> +				goto out;
> +			}
> +
> +			err = i915_timeline_pin(tl);
> +			if (err) {
> +				i915_timeline_put(tl);
> +				goto out;
> +			}
> +
> +			rq = i915_request_alloc(engine, i915->kernel_context);
> +			if (IS_ERR(rq)) {
> +				i915_timeline_unpin(tl);
> +				i915_timeline_put(tl);
> +				err = PTR_ERR(rq);
> +				goto out;
> +			}
> +
> +			cs = intel_ring_begin(rq, 4);
> +			if (IS_ERR(cs)) {
> +				i915_request_add(rq);
> +				i915_timeline_unpin(tl);
> +				i915_timeline_put(tl);
> +				err = PTR_ERR(cs);
> +				goto out;
> +			}
> +
> +			addr = i915_ggtt_offset(tl->hwsp_ggtt) + tl->hwsp_offset;
> +
> +			if (INTEL_GEN(i915) >= 8) {
> +				*cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
> +				*cs++ = addr;
> +				*cs++ = 0;
> +				*cs++ = count;
> +			} else if (INTEL_GEN(i915) >= 4) {
> +				*cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
> +				*cs++ = 0;
> +				*cs++ = addr;
> +				*cs++ = count;
> +			} else {
> +				*cs++ = MI_STORE_DWORD_IMM | MI_MEM_VIRTUAL;
> +				*cs++ = addr;
> +				*cs++ = count;
> +				*cs++ = MI_NOOP;
> +			}
> +			intel_ring_advance(rq, cs);
> +
> +			i915_request_add(rq);
> +			i915_timeline_unpin(tl);
> +
> +			timelines[count++] = tl;
> +		}
> +	}
> +
> +	err = i915_gem_wait_for_idle(i915, I915_WAIT_LOCKED, MAX_SCHEDULE_TIMEOUT);
> +
> +out:
> +	for (n = 0; n < count; n++) {
> +		struct i915_timeline *tl = timelines[n];
> +
> +		if (!err && *tl->hwsp_seqno != n) {
> +			pr_err("Invalid seqno stored in timeline %lu, found 0x%x\n",
> +			       n, *tl->hwsp_seqno);
> +			err = -EINVAL;
> +		}
> +		i915_timeline_put(tl);
> +	}
> +
> +	intel_runtime_pm_put(i915, wakeref);
> +	mutex_unlock(&i915->drm.struct_mutex);
> +
> +	kvfree(timelines);
> +
> +	return err;
> +#undef NUM_TIMELINES
> +}
> +
> +static int live_hwsp_alternate(void *arg)
> +{
> +#define NUM_TIMELINES 4096
> +	struct drm_i915_private *i915 = arg;
> +	struct i915_timeline **timelines;
> +	struct intel_engine_cs *engine;
> +	enum intel_engine_id id;
> +	intel_wakeref_t wakeref;
> +	unsigned long count, n;
> +	int err = 0;
> +
> +	/*
> +	 * Create a bunch of timelines and check we can write
> +	 * independently to each of their breadcrumb slots with adjacent
> +	 * engines.
> +	 */
> +
> +	timelines = kvmalloc_array(NUM_TIMELINES * I915_NUM_ENGINES,
> +				   sizeof(*timelines),
> +				   GFP_KERNEL);
> +	if (!timelines)
> +		return -ENOMEM;
> +
> +	mutex_lock(&i915->drm.struct_mutex);
> +	wakeref = intel_runtime_pm_get(i915);
> +
> +	count = 0;
> +	for (n = 0; n < NUM_TIMELINES; n++) {
> +		for_each_engine(engine, i915, id) {
> +			struct i915_timeline *tl;
> +			struct i915_request *rq;
> +			u32 addr;
> +			u32 *cs;
> +
> +			if (!intel_engine_can_store_dword(engine))
> +				continue;
> +
> +			tl = i915_timeline_create(i915, "live", NULL);
> +			if (IS_ERR(tl)) {
> +				err = PTR_ERR(tl);
> +				goto out;
> +			}
> +
> +			if (*tl->hwsp_seqno) {
> +				pr_err("Timeline %lu created with non-zero breadcrumb, found %x\n",
> +				       count, *tl->hwsp_seqno);
> +				err = -EINVAL;
> +				i915_timeline_put(tl);
> +				goto out;
> +			}
> +
> +			err = i915_timeline_pin(tl);
> +			if (err) {
> +				i915_timeline_put(tl);
> +				goto out;
> +			}
> +
> +			rq = i915_request_alloc(engine, i915->kernel_context);
> +			if (IS_ERR(rq)) {
> +				i915_timeline_unpin(tl);
> +				i915_timeline_put(tl);
> +				err = PTR_ERR(rq);
> +				goto out;
> +			}
> +
> +			cs = intel_ring_begin(rq, 4);
> +			if (IS_ERR(cs)) {
> +				i915_request_add(rq);
> +				i915_timeline_unpin(tl);
> +				i915_timeline_put(tl);
> +				err = PTR_ERR(cs);
> +				goto out;
> +			}
> +
> +			addr = i915_ggtt_offset(tl->hwsp_ggtt) + tl->hwsp_offset;
> +
> +			if (INTEL_GEN(i915) >= 8) {
> +				*cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
> +				*cs++ = addr;
> +				*cs++ = 0;
> +				*cs++ = count;
> +			} else if (INTEL_GEN(i915) >= 4) {
> +				*cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
> +				*cs++ = 0;
> +				*cs++ = addr;
> +				*cs++ = count;
> +			} else {
> +				*cs++ = MI_STORE_DWORD_IMM | MI_MEM_VIRTUAL;
> +				*cs++ = addr;
> +				*cs++ = count;
> +				*cs++ = MI_NOOP;
> +			}
> +			intel_ring_advance(rq, cs);
> +
> +			i915_request_add(rq);
> +			i915_timeline_unpin(tl);
> +
> +			timelines[count++] = tl;
> +		}
> +	}
> +
> +	err = i915_gem_wait_for_idle(i915, I915_WAIT_LOCKED, MAX_SCHEDULE_TIMEOUT);
> +
> +out:
> +	for (n = 0; n < count; n++) {
> +		struct i915_timeline *tl = timelines[n];
> +
> +		if (!err && *tl->hwsp_seqno != n) {
> +			pr_err("Invalid seqno stored in timeline %lu, found 0x%x\n",
> +			       n, *tl->hwsp_seqno);
> +			err = -EINVAL;
> +		}
> +		i915_timeline_put(tl);
> +	}
> +
> +	intel_runtime_pm_put(i915, wakeref);
> +	mutex_unlock(&i915->drm.struct_mutex);
> +
> +	kvfree(timelines);
> +
> +	return err;
> +#undef NUM_TIMELINES
> +}
> +
> +static int live_hwsp_recycle(void *arg)
> +{
> +	struct drm_i915_private *i915 = arg;
> +	struct intel_engine_cs *engine;
> +	enum intel_engine_id id;
> +	intel_wakeref_t wakeref;
> +	unsigned long count;
> +	int err = 0;
> +
> +	/*
> +	 * Check seqno writes into one timeline at a time. We expect to
> +	 * recycle the breadcrumb slot between iterations and neither
> +	 * want to confuse ourselves or the GPU.
> +	 */
> +
> +	mutex_lock(&i915->drm.struct_mutex);
> +	wakeref = intel_runtime_pm_get(i915);
> +
> +	count = 0;
> +	for_each_engine(engine, i915, id) {
> +		IGT_TIMEOUT(end_time);
> +
> +		if (!intel_engine_can_store_dword(engine))
> +			continue;
> +
> +		do {
> +			struct i915_timeline *tl;
> +			struct i915_request *rq;
> +			u32 addr;
> +			u32 *cs;
> +
> +			tl = i915_timeline_create(i915, "live", NULL);
> +			if (IS_ERR(tl)) {
> +				err = PTR_ERR(tl);
> +				goto out;
> +			}
> +
> +			if (*tl->hwsp_seqno) {
> +				pr_err("Timeline %lu created with non-zero breadcrumb, found %x\n",
> +				       count, *tl->hwsp_seqno);
> +				err = -EINVAL;
> +				i915_timeline_put(tl);
> +				goto out;
> +			}
> +
> +			err = i915_timeline_pin(tl);
> +			if (err) {
> +				i915_timeline_put(tl);
> +				goto out;
> +			}
> +
> +			rq = i915_request_alloc(engine, i915->kernel_context);
> +			if (IS_ERR(rq)) {
> +				i915_timeline_unpin(tl);
> +				i915_timeline_put(tl);
> +				err = PTR_ERR(rq);
> +				goto out;
> +			}
> +
> +			cs = intel_ring_begin(rq, 4);
> +			if (IS_ERR(cs)) {
> +				i915_request_add(rq);
> +				i915_timeline_unpin(tl);
> +				i915_timeline_put(tl);
> +				err = PTR_ERR(cs);
> +				goto out;
> +			}
> +
> +			addr = i915_ggtt_offset(tl->hwsp_ggtt) + tl->hwsp_offset;
> +
> +			if (INTEL_GEN(i915) >= 8) {
> +				*cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
> +				*cs++ = addr;
> +				*cs++ = 0;
> +				*cs++ = count;
> +			} else if (INTEL_GEN(i915) >= 4) {
> +				*cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
> +				*cs++ = 0;
> +				*cs++ = addr;
> +				*cs++ = count;
> +			} else {
> +				*cs++ = MI_STORE_DWORD_IMM | MI_MEM_VIRTUAL;
> +				*cs++ = addr;
> +				*cs++ = count;
> +				*cs++ = MI_NOOP;
> +			}
> +			intel_ring_advance(rq, cs);
> +
> +			i915_request_add(rq);
> +			i915_timeline_unpin(tl);
> +
> +			i915_request_wait(rq, I915_WAIT_LOCKED, MAX_SCHEDULE_TIMEOUT);
> +			if (*tl->hwsp_seqno != count) {
> +				pr_err("Invalid seqno stored in timeline %lu, found 0x%x\n",
> +				       count, *tl->hwsp_seqno);
> +				err = -EINVAL;
> +			}
> +
> +			i915_timeline_put(tl);
> +			count++;
> +
> +			if (err)
> +				goto out;
> +		} while (!__igt_timeout(end_time, NULL));
> +	}
> +
> +out:
> +	intel_runtime_pm_put(i915, wakeref);
> +	mutex_unlock(&i915->drm.struct_mutex);
> +
> +	return err;
> +}
> +
> +int i915_timeline_live_selftests(struct drm_i915_private *i915)
> +{
> +	static const struct i915_subtest tests[] = {
> +		SUBTEST(live_hwsp_recycle),
> +		SUBTEST(live_hwsp_engine),
> +		SUBTEST(live_hwsp_alternate),
> +	};
> +
> +	return i915_subtests(tests, i915);
> +}
> diff --git a/drivers/gpu/drm/i915/selftests/mock_engine.c b/drivers/gpu/drm/i915/selftests/mock_engine.c
> index 968a7e139a67..acd27c7e807b 100644
> --- a/drivers/gpu/drm/i915/selftests/mock_engine.c
> +++ b/drivers/gpu/drm/i915/selftests/mock_engine.c
> @@ -34,12 +34,20 @@ static struct intel_ring *mock_ring(struct intel_engine_cs *engine)
>   {
>   	const unsigned long sz = PAGE_SIZE / 2;
>   	struct mock_ring *ring;
> +	int err;
>   
>   	ring = kzalloc(sizeof(*ring) + sz, GFP_KERNEL);
>   	if (!ring)
>   		return NULL;
>   
> -	i915_timeline_init(engine->i915, &ring->timeline, engine->name);
> +	err = i915_timeline_init(engine->i915,
> +				 &ring->timeline,
> +				 engine->name,
> +				 NULL);
> +	if (err) {
> +		kfree(ring);
> +		return NULL;
> +	}
>   
>   	ring->base.size = sz;
>   	ring->base.effective_size = sz;
> @@ -209,7 +217,11 @@ struct intel_engine_cs *mock_engine(struct drm_i915_private *i915,
>   	engine->base.emit_breadcrumb = mock_emit_breadcrumb;
>   	engine->base.submit_request = mock_submit_request;
>   
> -	i915_timeline_init(i915, &engine->base.timeline, engine->base.name);
> +	if (i915_timeline_init(i915,
> +			       &engine->base.timeline,
> +			       engine->base.name,
> +			       NULL))
> +		goto err_free;
>   	i915_timeline_set_subclass(&engine->base.timeline, TIMELINE_ENGINE);
>   
>   	intel_engine_init_breadcrumbs(&engine->base);
> @@ -227,6 +239,7 @@ struct intel_engine_cs *mock_engine(struct drm_i915_private *i915,
>   err_breadcrumbs:
>   	intel_engine_fini_breadcrumbs(&engine->base);
>   	i915_timeline_fini(&engine->base.timeline);
> +err_free:
>   	kfree(engine);
>   	return NULL;
>   }

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 111+ messages in thread

* Re: [PATCH 41/46] drm/i915: Introduce concept of per-timeline (context) HWSP
  2019-01-15  0:55   ` John Harrison
@ 2019-01-15  9:14     ` Chris Wilson
  2019-01-15 15:40       ` Chris Wilson
  0 siblings, 1 reply; 111+ messages in thread
From: Chris Wilson @ 2019-01-15  9:14 UTC (permalink / raw)
  To: John Harrison, intel-gfx

Quoting John Harrison (2019-01-15 00:55:39)
> On 1/7/2019 03:55, Chris Wilson wrote:
> > Supplement the per-engine HWSP with a per-timeline HWSP. That is a
> > per-request pointer through which we can check a local seqno,
> > abstracting away the presumption of a global seqno. In this first step,
> > we point each request back into the engine's HWSP so everything
> > continues to work with the global timeline.
> > ---
> > +static inline u32 i915_request_hwsp(const struct i915_request *rq)
> > +{
> > +     return READ_ONCE(*rq->hwsp_seqno);
> > +}
> > +
> Shouldn't the function name have an _seqno as well? Just 
> 'i915_request_hwsp()' is fairly ambiguous, there could be many different 
> things stored in the HWSP.

It's not even necessarily the HWSP! :)

i915_request_hw_seqno() // dissatisfying
-> i915_request_hwsp_seqno() // but rq only stores one element in HWSP!
-> i915_request_hwsp()

Was the evolution of names I chose.

Of that mix, i915_request_hwsp_seqno(). hw_seqno just feels nondescript.

i915_request_current_[hw]_seqno() maybe, but because we start with
i915_request I find it confusing and expect the seqno to be tied to the
request. So maybe just drop i915_request here, and go with something
like hwsp_breadcrumb(), that just happens to take i915_request as a
convenience.
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 111+ messages in thread

* Re: [PATCH 43/46] drm/i915: Allocate a status page for each timeline
  2019-01-15  0:56   ` John Harrison
@ 2019-01-15  9:50     ` Chris Wilson
  2019-01-15 18:17       ` John Harrison
  0 siblings, 1 reply; 111+ messages in thread
From: Chris Wilson @ 2019-01-15  9:50 UTC (permalink / raw)
  To: John Harrison, intel-gfx

Quoting John Harrison (2019-01-15 00:56:13)
> On 1/7/2019 03:55, Chris Wilson wrote:
> > +static int alloc_hwsp(struct i915_timeline *timeline)
> > +{
> > +     struct drm_i915_private *i915 = timeline->i915;
> > +     struct i915_vma *vma;
> > +     int offset;
> > +
> > +     mutex_lock(&i915->gt.timeline_lock);
> > +
> > +restart:
> > +     offset = find_first_cacheline(i915);
> > +     if (offset == NBITS && i915->gt.timeline_hwsp) {
> > +             i915_vma_put(i915->gt.timeline_hwsp);
> > +             i915->gt.timeline_hwsp = NULL;
> > +     }
> > +
> > +     vma = i915->gt.timeline_hwsp;
> > +     if (!vma) {
> > +             struct drm_i915_gem_object *bo;
> > +
> > +             /* Drop the lock before allocations */
> > +             mutex_unlock(&i915->gt.timeline_lock);
> > +
> > +             BUILD_BUG_ON(NBITS * CACHELINE_BYTES > PAGE_SIZE);
> > +             bo = i915_gem_object_create_internal(i915, PAGE_SIZE);
> > +             if (IS_ERR(bo))
> > +                     return PTR_ERR(bo);
> > +
> > +             i915_gem_object_set_cache_level(bo, I915_CACHE_LLC);
> > +
> > +             vma = i915_vma_instance(bo, &i915->ggtt.vm, NULL);
> > +             if (IS_ERR(vma))
> > +                     return PTR_ERR(vma);
> > +
> > +             mutex_lock(&i915->gt.timeline_lock);
> > +             if (i915->gt.timeline_hwsp) {
> > +                     i915_gem_object_put(bo);
> > +                     goto restart;
> > +             }
> > +
> > +             i915->gt.timeline_hwsp = vma;
> > +             i915->gt.timeline_free = ~0ull;
> > +             offset = 0;
> > +     }
> > +
> > +     i915->gt.timeline_free &= ~BIT_ULL(offset);
> > +
> > +     timeline->hwsp_ggtt = i915_vma_get(vma);
> > +     timeline->hwsp_offset = offset * CACHELINE_BYTES;
> > +
> > +     mutex_unlock(&i915->gt.timeline_lock);
> > +
> > +     return 0;
> > +}
> If I'm reading this correctly then gt.timeline_hwsp/free is the a cached 
> copy of the most recently allocated but not yet filled bank of seqno 
> locations. When it gets full, the i915->gt reference gets dropped and a 
> new page is allocated and used up line by line. Meanwhile, each timeline 
> has it's own private reference to the page so dropping the i915->gt 
> reference is safe. And once the last timeline using a given page is 
> freed, the last reference to that page will be dropped and so the page 
> itself will also be freed. If a timeline is freed before the currently 
> cached page is filled, then that timeline's slot will be released and 
> re-used by the next timeline to be created.
> 
> But what about the scenario of a long running system with a small but 
> growing number of persistent tasks interspersed with many short lived 
> tasks? In that case, you would end up with many sparsely populated pages 
> that whose free slots will not get re-used. You could have a linked list 
> of cached pages. When a page is filled, move it to a 'full' list. When a 
> timeline is freed, if it's page was on the 'full' list, clear the slot 
> and move it back to the 'available' list.

Yes. My thinking was a plain slab cache was a quick-and-dirty
improvement over a page-per-timeline. And a freelist would be the next 
step.

> Or is the idea that a worst case of a single page vma allocation per 
> timeline is the least of our worries if there is an ever growing number 
> of timelines/contexts/users in the system?

Nah, it was just an attempt to quickly reduce the number of allocations,
where the worst case of one page+vma per timeline was the starting
point.

We should break this patch down into 1) one-page-per-timeline, 2) slab
cache, 3) free list 4) profit.

At other times we have been wanting to be able to suballocate pages,
something to keep in mind would be extending this to arbitrary cacheline
allocations.

> > +     if (global_hwsp) {
> > +             timeline->hwsp_ggtt = i915_vma_get(global_hwsp);
> > +             timeline->hwsp_offset = I915_GEM_HWS_SEQNO_ADDR;
> > +     } else {
> > +             err = alloc_hwsp(timeline);
> > +             if (err)
> > +                     return err;
> > +     }
> > +
> > +     vaddr = i915_gem_object_pin_map(timeline->hwsp_ggtt->obj, I915_MAP_WB);
> > +     if (IS_ERR(vaddr)) { /* leak the cacheline, but will clean up later */
> Can you explain this comment more? Where/when is the later?

On failure here, the cacheline is still marked as allocated in the slab,
but the reference to the page is released. So the backing page will be
released when everyone else finally drops their reference.

Just laziness, since we have the ability to return the cacheline later
on...

> > @@ -2616,7 +2628,7 @@ static int execlists_context_deferred_alloc(struct i915_gem_context *ctx,
> >               goto error_deref_obj;
> >       }
> >   
> > -     timeline = i915_timeline_create(ctx->i915, ctx->name);
> > +     timeline = i915_timeline_create(ctx->i915, ctx->name, NULL);
> Why does this use the global HWSP rather than a per context one?

.global_hwsp = NULL => it allocates its own HWSP.

Were you thinking of intel_engine_setup_common() which is still using
the global HWSP at this point in time?
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 111+ messages in thread

* Re: [PATCH 41/46] drm/i915: Introduce concept of per-timeline (context) HWSP
  2019-01-15  9:14     ` Chris Wilson
@ 2019-01-15 15:40       ` Chris Wilson
  2019-01-15 17:56         ` John Harrison
  0 siblings, 1 reply; 111+ messages in thread
From: Chris Wilson @ 2019-01-15 15:40 UTC (permalink / raw)
  To: John Harrison, intel-gfx

Quoting Chris Wilson (2019-01-15 09:14:17)
> Quoting John Harrison (2019-01-15 00:55:39)
> > On 1/7/2019 03:55, Chris Wilson wrote:
> > > Supplement the per-engine HWSP with a per-timeline HWSP. That is a
> > > per-request pointer through which we can check a local seqno,
> > > abstracting away the presumption of a global seqno. In this first step,
> > > we point each request back into the engine's HWSP so everything
> > > continues to work with the global timeline.
> > > ---
> > > +static inline u32 i915_request_hwsp(const struct i915_request *rq)
> > > +{
> > > +     return READ_ONCE(*rq->hwsp_seqno);
> > > +}
> > > +
> > Shouldn't the function name have an _seqno as well? Just 
> > 'i915_request_hwsp()' is fairly ambiguous, there could be many different 
> > things stored in the HWSP.
> 
> It's not even necessarily the HWSP! :)
> 
> i915_request_hw_seqno() // dissatisfying
> -> i915_request_hwsp_seqno() // but rq only stores one element in HWSP!
> -> i915_request_hwsp()
> 
> Was the evolution of names I chose.
> 
> Of that mix, i915_request_hwsp_seqno(). hw_seqno just feels nondescript.
> 
> i915_request_current_[hw]_seqno() maybe, but because we start with
> i915_request I find it confusing and expect the seqno to be tied to the
> request. So maybe just drop i915_request here, and go with something
> like hwsp_breadcrumb(), that just happens to take i915_request as a
> convenience.

Alternatively,

static inline u32 i915_request_hwsp(struct i915_request *rq, int index)
{
	return READ_ONCE(rq->hwsp_seqno[index]);
}

And probably rename s/rq->hwsp_seqno/rq->hwsp/. That should compile away
the argument, but you'll still probably want a

static inline u32 i915_request_hwsp_seqno(struct i915_request *rq)
{
	return i915_request_hwsp(rq, 0);
}

I can't win! But it does look more methodical.
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 111+ messages in thread

* Re: [PATCH 41/46] drm/i915: Introduce concept of per-timeline (context) HWSP
  2019-01-15 15:40       ` Chris Wilson
@ 2019-01-15 17:56         ` John Harrison
  0 siblings, 0 replies; 111+ messages in thread
From: John Harrison @ 2019-01-15 17:56 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx

On 1/15/2019 07:40, Chris Wilson wrote:
> Quoting Chris Wilson (2019-01-15 09:14:17)
>> Quoting John Harrison (2019-01-15 00:55:39)
>>> On 1/7/2019 03:55, Chris Wilson wrote:
>>>> Supplement the per-engine HWSP with a per-timeline HWSP. That is a
>>>> per-request pointer through which we can check a local seqno,
>>>> abstracting away the presumption of a global seqno. In this first step,
>>>> we point each request back into the engine's HWSP so everything
>>>> continues to work with the global timeline.
>>>> ---
>>>> +static inline u32 i915_request_hwsp(const struct i915_request *rq)
>>>> +{
>>>> +     return READ_ONCE(*rq->hwsp_seqno);
>>>> +}
>>>> +
>>> Shouldn't the function name have an _seqno as well? Just
>>> 'i915_request_hwsp()' is fairly ambiguous, there could be many different
>>> things stored in the HWSP.
>> It's not even necessarily the HWSP! :)
>>
>> i915_request_hw_seqno() // dissatisfying
>> -> i915_request_hwsp_seqno() // but rq only stores one element in HWSP!
>> -> i915_request_hwsp()
>>
>> Was the evolution of names I chose.
>>
>> Of that mix, i915_request_hwsp_seqno(). hw_seqno just feels nondescript.
>>
>> i915_request_current_[hw]_seqno() maybe, but because we start with
>> i915_request I find it confusing and expect the seqno to be tied to the
>> request. So maybe just drop i915_request here, and go with something
>> like hwsp_breadcrumb(), that just happens to take i915_request as a
>> convenience.
My vote would be 'hwsp_breadcrumb()' or similar. As you say, the seqno 
in the HWSP isn't actually tied to the request. Quite the opposite in 
fact - you are generally comparing multiple requests' seqnos to the HWSP 
seqno to see which have or have not completed. It should really be tied 
to the timeline (or more accurately, to the context as that is what 
dictates the timeline). The code is generally starting from a request 
structure so it makes sense to have a shortcut via the request. But 
logically, it should be req->timeline->hwsp[SEQNO]. Maybe even something 
like i915_timeline_out_seqno(rq)? Or i915_timeline_done_seqno(rq)?


> Alternatively,
>
> static inline u32 i915_request_hwsp(struct i915_request *rq, int index)
> {
> 	return READ_ONCE(rq->hwsp_seqno[index]);
> }
>
> And probably rename s/rq->hwsp_seqno/rq->hwsp/. That should compile away
> the argument, but you'll still probably want a
>
> static inline u32 i915_request_hwsp_seqno(struct i915_request *rq)
> {
> 	return i915_request_hwsp(rq, 0);
> }
Given that there is only a single per context element in the HWSP at 
present, this version does seem overkill. It might be useful to move to 
that later when there are more entries, if that ever happens. For now, 
keep things simple I think.

>
> I can't win! But it does look more methodical.
> -Chris

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 111+ messages in thread

* Re: [PATCH 43/46] drm/i915: Allocate a status page for each timeline
  2019-01-15  9:50     ` Chris Wilson
@ 2019-01-15 18:17       ` John Harrison
  2019-01-15 18:43         ` Chris Wilson
  0 siblings, 1 reply; 111+ messages in thread
From: John Harrison @ 2019-01-15 18:17 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx

On 1/15/2019 01:50, Chris Wilson wrote:
> Quoting John Harrison (2019-01-15 00:56:13)
>> On 1/7/2019 03:55, Chris Wilson wrote:
>>> +static int alloc_hwsp(struct i915_timeline *timeline)
>>> +{
>>> +     struct drm_i915_private *i915 = timeline->i915;
>>> +     struct i915_vma *vma;
>>> +     int offset;
>>> +
>>> +     mutex_lock(&i915->gt.timeline_lock);
>>> +
>>> +restart:
>>> +     offset = find_first_cacheline(i915);
>>> +     if (offset == NBITS && i915->gt.timeline_hwsp) {
>>> +             i915_vma_put(i915->gt.timeline_hwsp);
>>> +             i915->gt.timeline_hwsp = NULL;
>>> +     }
>>> +
>>> +     vma = i915->gt.timeline_hwsp;
>>> +     if (!vma) {
>>> +             struct drm_i915_gem_object *bo;
>>> +
>>> +             /* Drop the lock before allocations */
>>> +             mutex_unlock(&i915->gt.timeline_lock);
>>> +
>>> +             BUILD_BUG_ON(NBITS * CACHELINE_BYTES > PAGE_SIZE);
>>> +             bo = i915_gem_object_create_internal(i915, PAGE_SIZE);
>>> +             if (IS_ERR(bo))
>>> +                     return PTR_ERR(bo);
>>> +
>>> +             i915_gem_object_set_cache_level(bo, I915_CACHE_LLC);
>>> +
>>> +             vma = i915_vma_instance(bo, &i915->ggtt.vm, NULL);
>>> +             if (IS_ERR(vma))
>>> +                     return PTR_ERR(vma);
>>> +
>>> +             mutex_lock(&i915->gt.timeline_lock);
>>> +             if (i915->gt.timeline_hwsp) {
>>> +                     i915_gem_object_put(bo);
>>> +                     goto restart;
>>> +             }
>>> +
>>> +             i915->gt.timeline_hwsp = vma;
>>> +             i915->gt.timeline_free = ~0ull;
>>> +             offset = 0;
>>> +     }
>>> +
>>> +     i915->gt.timeline_free &= ~BIT_ULL(offset);
>>> +
>>> +     timeline->hwsp_ggtt = i915_vma_get(vma);
>>> +     timeline->hwsp_offset = offset * CACHELINE_BYTES;
>>> +
>>> +     mutex_unlock(&i915->gt.timeline_lock);
>>> +
>>> +     return 0;
>>> +}
>> If I'm reading this correctly then gt.timeline_hwsp/free is the a cached
>> copy of the most recently allocated but not yet filled bank of seqno
>> locations. When it gets full, the i915->gt reference gets dropped and a
>> new page is allocated and used up line by line. Meanwhile, each timeline
>> has it's own private reference to the page so dropping the i915->gt
>> reference is safe. And once the last timeline using a given page is
>> freed, the last reference to that page will be dropped and so the page
>> itself will also be freed. If a timeline is freed before the currently
>> cached page is filled, then that timeline's slot will be released and
>> re-used by the next timeline to be created.
>>
>> But what about the scenario of a long running system with a small but
>> growing number of persistent tasks interspersed with many short lived
>> tasks? In that case, you would end up with many sparsely populated pages
>> that whose free slots will not get re-used. You could have a linked list
>> of cached pages. When a page is filled, move it to a 'full' list. When a
>> timeline is freed, if it's page was on the 'full' list, clear the slot
>> and move it back to the 'available' list.
> Yes. My thinking was a plain slab cache was a quick-and-dirty
> improvement over a page-per-timeline. And a freelist would be the next
> step.
>
>> Or is the idea that a worst case of a single page vma allocation per
>> timeline is the least of our worries if there is an ever growing number
>> of timelines/contexts/users in the system?
> Nah, it was just an attempt to quickly reduce the number of allocations,
> where the worst case of one page+vma per timeline was the starting
> point.
>
> We should break this patch down into 1) one-page-per-timeline, 2) slab
> cache, 3) free list 4) profit.
>
> At other times we have been wanting to be able to suballocate pages,
> something to keep in mind would be extending this to arbitrary cacheline
> allocations.
The multi-stage approach sounds good. Keep things simple in this patch 
and then improve the situation later. One thing to be careful of with a 
cacheline allocator would be make sure whatever is being converted 
wasn't using full pages for security reasons. I.e. a page can be private 
to a process, a cacheline will be shared by many. I guess that would 
only really apply to allocations being passed to user land as the kernel 
is considered secure? Or can a user batch buffer write to arbitrary 
locations within the ppHWSP and thereby splat someone else's seqno?

>>> +     if (global_hwsp) {
>>> +             timeline->hwsp_ggtt = i915_vma_get(global_hwsp);
>>> +             timeline->hwsp_offset = I915_GEM_HWS_SEQNO_ADDR;
>>> +     } else {
>>> +             err = alloc_hwsp(timeline);
>>> +             if (err)
>>> +                     return err;
>>> +     }
>>> +
>>> +     vaddr = i915_gem_object_pin_map(timeline->hwsp_ggtt->obj, I915_MAP_WB);
>>> +     if (IS_ERR(vaddr)) { /* leak the cacheline, but will clean up later */
>> Can you explain this comment more? Where/when is the later?
> On failure here, the cacheline is still marked as allocated in the slab,
> but the reference to the page is released. So the backing page will be
> released when everyone else finally drops their reference.
>
> Just laziness, since we have the ability to return the cacheline later
> on...
Meaning the actual leak is the bit in 'i915->gt.timeline_free' that says 
this cacheline can or can't be used for the next allocation? Presumably 
you could do the bit map munging in the case that 'global_hwsp' is null, 
but the code would certainly be messier for not a lot of gain.

>
>>> @@ -2616,7 +2628,7 @@ static int execlists_context_deferred_alloc(struct i915_gem_context *ctx,
>>>                goto error_deref_obj;
>>>        }
>>>    
>>> -     timeline = i915_timeline_create(ctx->i915, ctx->name);
>>> +     timeline = i915_timeline_create(ctx->i915, ctx->name, NULL);
>> Why does this use the global HWSP rather than a per context one?
> .global_hwsp = NULL => it allocates its own HWSP.
>
> Were you thinking of intel_engine_setup_common() which is still using
> the global HWSP at this point in time?
Doh. Brain fart. Presumably the engine one will disappear completely? Or 
is it still needed for legacy mode?


> -Chris

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 111+ messages in thread

* Re: [PATCH 43/46] drm/i915: Allocate a status page for each timeline
  2019-01-15 18:17       ` John Harrison
@ 2019-01-15 18:43         ` Chris Wilson
  2019-01-16 21:06           ` John Harrison
  0 siblings, 1 reply; 111+ messages in thread
From: Chris Wilson @ 2019-01-15 18:43 UTC (permalink / raw)
  To: John Harrison, intel-gfx

Quoting John Harrison (2019-01-15 18:17:21)
> On 1/15/2019 01:50, Chris Wilson wrote:
> > Quoting John Harrison (2019-01-15 00:56:13)
> >> On 1/7/2019 03:55, Chris Wilson wrote:
> >>> +static int alloc_hwsp(struct i915_timeline *timeline)
> >>> +{
> >>> +     struct drm_i915_private *i915 = timeline->i915;
> >>> +     struct i915_vma *vma;
> >>> +     int offset;
> >>> +
> >>> +     mutex_lock(&i915->gt.timeline_lock);
> >>> +
> >>> +restart:
> >>> +     offset = find_first_cacheline(i915);
> >>> +     if (offset == NBITS && i915->gt.timeline_hwsp) {
> >>> +             i915_vma_put(i915->gt.timeline_hwsp);
> >>> +             i915->gt.timeline_hwsp = NULL;
> >>> +     }
> >>> +
> >>> +     vma = i915->gt.timeline_hwsp;
> >>> +     if (!vma) {
> >>> +             struct drm_i915_gem_object *bo;
> >>> +
> >>> +             /* Drop the lock before allocations */
> >>> +             mutex_unlock(&i915->gt.timeline_lock);
> >>> +
> >>> +             BUILD_BUG_ON(NBITS * CACHELINE_BYTES > PAGE_SIZE);
> >>> +             bo = i915_gem_object_create_internal(i915, PAGE_SIZE);
> >>> +             if (IS_ERR(bo))
> >>> +                     return PTR_ERR(bo);
> >>> +
> >>> +             i915_gem_object_set_cache_level(bo, I915_CACHE_LLC);
> >>> +
> >>> +             vma = i915_vma_instance(bo, &i915->ggtt.vm, NULL);
> >>> +             if (IS_ERR(vma))
> >>> +                     return PTR_ERR(vma);
> >>> +
> >>> +             mutex_lock(&i915->gt.timeline_lock);
> >>> +             if (i915->gt.timeline_hwsp) {
> >>> +                     i915_gem_object_put(bo);
> >>> +                     goto restart;
> >>> +             }
> >>> +
> >>> +             i915->gt.timeline_hwsp = vma;
> >>> +             i915->gt.timeline_free = ~0ull;
> >>> +             offset = 0;
> >>> +     }
> >>> +
> >>> +     i915->gt.timeline_free &= ~BIT_ULL(offset);
> >>> +
> >>> +     timeline->hwsp_ggtt = i915_vma_get(vma);
> >>> +     timeline->hwsp_offset = offset * CACHELINE_BYTES;
> >>> +
> >>> +     mutex_unlock(&i915->gt.timeline_lock);
> >>> +
> >>> +     return 0;
> >>> +}
> >> If I'm reading this correctly then gt.timeline_hwsp/free is the a cached
> >> copy of the most recently allocated but not yet filled bank of seqno
> >> locations. When it gets full, the i915->gt reference gets dropped and a
> >> new page is allocated and used up line by line. Meanwhile, each timeline
> >> has it's own private reference to the page so dropping the i915->gt
> >> reference is safe. And once the last timeline using a given page is
> >> freed, the last reference to that page will be dropped and so the page
> >> itself will also be freed. If a timeline is freed before the currently
> >> cached page is filled, then that timeline's slot will be released and
> >> re-used by the next timeline to be created.
> >>
> >> But what about the scenario of a long running system with a small but
> >> growing number of persistent tasks interspersed with many short lived
> >> tasks? In that case, you would end up with many sparsely populated pages
> >> that whose free slots will not get re-used. You could have a linked list
> >> of cached pages. When a page is filled, move it to a 'full' list. When a
> >> timeline is freed, if it's page was on the 'full' list, clear the slot
> >> and move it back to the 'available' list.
> > Yes. My thinking was a plain slab cache was a quick-and-dirty
> > improvement over a page-per-timeline. And a freelist would be the next
> > step.
> >
> >> Or is the idea that a worst case of a single page vma allocation per
> >> timeline is the least of our worries if there is an ever growing number
> >> of timelines/contexts/users in the system?
> > Nah, it was just an attempt to quickly reduce the number of allocations,
> > where the worst case of one page+vma per timeline was the starting
> > point.
> >
> > We should break this patch down into 1) one-page-per-timeline, 2) slab
> > cache, 3) free list 4) profit.
> >
> > At other times we have been wanting to be able to suballocate pages,
> > something to keep in mind would be extending this to arbitrary cacheline
> > allocations.
> The multi-stage approach sounds good. Keep things simple in this patch 
> and then improve the situation later. One thing to be careful of with a 
> cacheline allocator would be make sure whatever is being converted 
> wasn't using full pages for security reasons. I.e. a page can be private 
> to a process, a cacheline will be shared by many. I guess that would 
> only really apply to allocations being passed to user land as the kernel 
> is considered secure? Or can a user batch buffer write to arbitrary 
> locations within the ppHWSP and thereby splat someone else's seqno?

ppHWSP, yes. But for internal allocations, only accessible via the ring
+ GGTT, should be no problem. I agree that we definitely don't want to
expose subpage sharing across the userspace boundary (all isolation
controls are only on pages and above).

If userspace wants suballocations, it can (and does) do them for itself
and should regulate its own sharing.

> >>> +     if (global_hwsp) {
> >>> +             timeline->hwsp_ggtt = i915_vma_get(global_hwsp);
> >>> +             timeline->hwsp_offset = I915_GEM_HWS_SEQNO_ADDR;
> >>> +     } else {
> >>> +             err = alloc_hwsp(timeline);
> >>> +             if (err)
> >>> +                     return err;
> >>> +     }
> >>> +
> >>> +     vaddr = i915_gem_object_pin_map(timeline->hwsp_ggtt->obj, I915_MAP_WB);
> >>> +     if (IS_ERR(vaddr)) { /* leak the cacheline, but will clean up later */
> >> Can you explain this comment more? Where/when is the later?
> > On failure here, the cacheline is still marked as allocated in the slab,
> > but the reference to the page is released. So the backing page will be
> > released when everyone else finally drops their reference.
> >
> > Just laziness, since we have the ability to return the cacheline later
> > on...
> Meaning the actual leak is the bit in 'i915->gt.timeline_free' that says 
> this cacheline can or can't be used for the next allocation? Presumably 
> you could do the bit map munging in the case that 'global_hwsp' is null, 
> but the code would certainly be messier for not a lot of gain.

Having been pointed out that I was being lazy, a bit of refactoring
later showed how lazy I was.

> >>> @@ -2616,7 +2628,7 @@ static int execlists_context_deferred_alloc(struct i915_gem_context *ctx,
> >>>                goto error_deref_obj;
> >>>        }
> >>>    
> >>> -     timeline = i915_timeline_create(ctx->i915, ctx->name);
> >>> +     timeline = i915_timeline_create(ctx->i915, ctx->name, NULL);
> >> Why does this use the global HWSP rather than a per context one?
> > .global_hwsp = NULL => it allocates its own HWSP.
> >
> > Were you thinking of intel_engine_setup_common() which is still using
> > the global HWSP at this point in time?
> Doh. Brain fart. Presumably the engine one will disappear completely? Or 
> is it still needed for legacy mode?

It (the timeline embedded inside the engine) is killed later, once
the internal clients (perf/pmu, hangcheck and idling at the last count)
are ready for the lack of globally ordered execution queue. The single
ringbuffer + timeline persists for legacy. (Multiple timelines for gen7,
coming later!)
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 111+ messages in thread

* Re: [PATCH 42/46] drm/i915: Enlarge vma->pin_count
  2019-01-07 11:55 ` [PATCH 42/46] drm/i915: Enlarge vma->pin_count Chris Wilson
@ 2019-01-15 19:57   ` John Harrison
  2019-01-15 20:17     ` Chris Wilson
  0 siblings, 1 reply; 111+ messages in thread
From: John Harrison @ 2019-01-15 19:57 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx

On 1/7/2019 03:55, Chris Wilson wrote:
> Previously we only accommodated having a vma pinned by a small number of
> users, with the maximum being pinned for use by the display engine. As
> such, we used a small bitfield only large enough to allow the vma to
> be pinned twice (for back/front buffers) in each scanout plane. Keeping
> the maximum permissible pin_count small allows us to quickly catch a
> potential leak. However, as we want to split a 4096B page into 64
> different cachelines and pin each cacheline for use by a different
> timeline, we will exceed the current maximum permissible vma->pin_count
> and so time has come to enlarge it.
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>   drivers/gpu/drm/i915/i915_gem_gtt.h | 26 +++++++++++++-------------
>   drivers/gpu/drm/i915/i915_vma.h     | 28 +++++++++-------------------
>   2 files changed, 22 insertions(+), 32 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h
> index bd679c8c56dd..03ade71b8d9a 100644
> --- a/drivers/gpu/drm/i915/i915_gem_gtt.h
> +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h
> @@ -642,19 +642,19 @@ int i915_gem_gtt_insert(struct i915_address_space *vm,
>   
>   /* Flags used by pin/bind&friends. */
>   #define PIN_NONBLOCK		BIT_ULL(0)
> -#define PIN_MAPPABLE		BIT_ULL(1)
> -#define PIN_ZONE_4G		BIT_ULL(2)
> -#define PIN_NONFAULT		BIT_ULL(3)
> -#define PIN_NOEVICT		BIT_ULL(4)
> -
> -#define PIN_MBZ			BIT_ULL(5) /* I915_VMA_PIN_OVERFLOW */
> -#define PIN_GLOBAL		BIT_ULL(6) /* I915_VMA_GLOBAL_BIND */
> -#define PIN_USER		BIT_ULL(7) /* I915_VMA_LOCAL_BIND */
> -#define PIN_UPDATE		BIT_ULL(8)
> -
> -#define PIN_HIGH		BIT_ULL(9)
> -#define PIN_OFFSET_BIAS		BIT_ULL(10)
> -#define PIN_OFFSET_FIXED	BIT_ULL(11)
> +#define PIN_NONFAULT		BIT_ULL(1)
> +#define PIN_NOEVICT		BIT_ULL(2)
> +#define PIN_MAPPABLE		BIT_ULL(3)
> +#define PIN_ZONE_4G		BIT_ULL(4)
> +#define PIN_HIGH		BIT_ULL(5)
> +#define PIN_OFFSET_BIAS		BIT_ULL(6)
> +#define PIN_OFFSET_FIXED	BIT_ULL(7)
> +
> +#define PIN_MBZ			BIT_ULL(8) /* I915_VMA_PIN_OVERFLOW */
> +#define PIN_GLOBAL		BIT_ULL(9) /* I915_VMA_GLOBAL_BIND */
> +#define PIN_USER		BIT_ULL(10) /* I915_VMA_LOCAL_BIND */
> +#define PIN_UPDATE		BIT_ULL(11)
> +
The upper bits need moving to accommodate the larger count. And the 
HIGH/OFFSET_* fields are not shared with vma-flags so can be moved down 
with the other pin only flags. But I don't see a reason to shuffle the 
lower bits around? MAPPABLE to NOEVICT were 1,2,3,4 but are now 3,4,1,2. 
Is there some semantic meaning to the new order?


>   #define PIN_OFFSET_MASK		(-I915_GTT_PAGE_SIZE)
>   
>   #endif
> diff --git a/drivers/gpu/drm/i915/i915_vma.h b/drivers/gpu/drm/i915/i915_vma.h
> index 7252abc73d3e..266b226ebef2 100644
> --- a/drivers/gpu/drm/i915/i915_vma.h
> +++ b/drivers/gpu/drm/i915/i915_vma.h
> @@ -70,30 +70,20 @@ struct i915_vma {
>   	 */
>   	unsigned int open_count;
>   	unsigned long flags;
> -	/**
> -	 * How many users have pinned this object in GTT space. The following
> -	 * users can each hold at most one reference: pwrite/pread, execbuffer
> -	 * (objects are not allowed multiple times for the same batchbuffer),
> -	 * and the framebuffer code. When switching/pageflipping, the
> -	 * framebuffer code has at most two buffers pinned per crtc.
> -	 *
> -	 * In the worst case this is 1 + 1 + 1 + 2*2 = 7. That would fit into 3
> -	 * bits with absolutely no headroom. So use 4 bits.
> -	 */
Is it not worth keeping some comment about the maximum pin count being 
bounded so 8-bits is guaranteed to be sufficient? Also, is the old 
comment actually valid? Surely modern hardware has more than two CRTCs 
so the limit of 7 was wrong anyway? Maybe even have a compile time 
assert that the mask size is greater than max(1 + 1 + 1 + 1 + 
2*MAX_CRTC, PAGE_SIZE/CACHELINE_SIZE)?


> -#define I915_VMA_PIN_MASK 0xf
> -#define I915_VMA_PIN_OVERFLOW	BIT(5)
> +#define I915_VMA_PIN_MASK 0xff
> +#define I915_VMA_PIN_OVERFLOW	BIT(8)
>   
>   	/** Flags and address space this VMA is bound to */
> -#define I915_VMA_GLOBAL_BIND	BIT(6)
> -#define I915_VMA_LOCAL_BIND	BIT(7)
> +#define I915_VMA_GLOBAL_BIND	BIT(9)
> +#define I915_VMA_LOCAL_BIND	BIT(10)
>   #define I915_VMA_BIND_MASK (I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND | I915_VMA_PIN_OVERFLOW)
>   
> -#define I915_VMA_GGTT		BIT(8)
> -#define I915_VMA_CAN_FENCE	BIT(9)
> -#define I915_VMA_CLOSED		BIT(10)
> -#define I915_VMA_USERFAULT_BIT	11
> +#define I915_VMA_GGTT		BIT(11)
> +#define I915_VMA_CAN_FENCE	BIT(12)
> +#define I915_VMA_CLOSED		BIT(13)
> +#define I915_VMA_USERFAULT_BIT	14
>   #define I915_VMA_USERFAULT	BIT(I915_VMA_USERFAULT_BIT)
> -#define I915_VMA_GGTT_WRITE	BIT(12)
> +#define I915_VMA_GGTT_WRITE	BIT(15)
>   
>   	unsigned int active_count;
>   	struct rb_root active;

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 111+ messages in thread

* Re: [PATCH 42/46] drm/i915: Enlarge vma->pin_count
  2019-01-15 19:57   ` John Harrison
@ 2019-01-15 20:17     ` Chris Wilson
  2019-01-16  0:18       ` John Harrison
  0 siblings, 1 reply; 111+ messages in thread
From: Chris Wilson @ 2019-01-15 20:17 UTC (permalink / raw)
  To: John Harrison, intel-gfx

Quoting John Harrison (2019-01-15 19:57:19)
> On 1/7/2019 03:55, Chris Wilson wrote:
> > Previously we only accommodated having a vma pinned by a small number of
> > users, with the maximum being pinned for use by the display engine. As
> > such, we used a small bitfield only large enough to allow the vma to
> > be pinned twice (for back/front buffers) in each scanout plane. Keeping
> > the maximum permissible pin_count small allows us to quickly catch a
> > potential leak. However, as we want to split a 4096B page into 64
> > different cachelines and pin each cacheline for use by a different
> > timeline, we will exceed the current maximum permissible vma->pin_count
> > and so time has come to enlarge it.
> >
> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > ---
> >   drivers/gpu/drm/i915/i915_gem_gtt.h | 26 +++++++++++++-------------
> >   drivers/gpu/drm/i915/i915_vma.h     | 28 +++++++++-------------------
> >   2 files changed, 22 insertions(+), 32 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h
> > index bd679c8c56dd..03ade71b8d9a 100644
> > --- a/drivers/gpu/drm/i915/i915_gem_gtt.h
> > +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h
> > @@ -642,19 +642,19 @@ int i915_gem_gtt_insert(struct i915_address_space *vm,
> >   
> >   /* Flags used by pin/bind&friends. */
> >   #define PIN_NONBLOCK                BIT_ULL(0)
> > -#define PIN_MAPPABLE         BIT_ULL(1)
> > -#define PIN_ZONE_4G          BIT_ULL(2)
> > -#define PIN_NONFAULT         BIT_ULL(3)
> > -#define PIN_NOEVICT          BIT_ULL(4)
> > -
> > -#define PIN_MBZ                      BIT_ULL(5) /* I915_VMA_PIN_OVERFLOW */
> > -#define PIN_GLOBAL           BIT_ULL(6) /* I915_VMA_GLOBAL_BIND */
> > -#define PIN_USER             BIT_ULL(7) /* I915_VMA_LOCAL_BIND */
> > -#define PIN_UPDATE           BIT_ULL(8)
> > -
> > -#define PIN_HIGH             BIT_ULL(9)
> > -#define PIN_OFFSET_BIAS              BIT_ULL(10)
> > -#define PIN_OFFSET_FIXED     BIT_ULL(11)
> > +#define PIN_NONFAULT         BIT_ULL(1)
> > +#define PIN_NOEVICT          BIT_ULL(2)
> > +#define PIN_MAPPABLE         BIT_ULL(3)
> > +#define PIN_ZONE_4G          BIT_ULL(4)
> > +#define PIN_HIGH             BIT_ULL(5)
> > +#define PIN_OFFSET_BIAS              BIT_ULL(6)
> > +#define PIN_OFFSET_FIXED     BIT_ULL(7)
> > +
> > +#define PIN_MBZ                      BIT_ULL(8) /* I915_VMA_PIN_OVERFLOW */
> > +#define PIN_GLOBAL           BIT_ULL(9) /* I915_VMA_GLOBAL_BIND */
> > +#define PIN_USER             BIT_ULL(10) /* I915_VMA_LOCAL_BIND */
> > +#define PIN_UPDATE           BIT_ULL(11)
> > +
> The upper bits need moving to accommodate the larger count. And the 
> HIGH/OFFSET_* fields are not shared with vma-flags so can be moved down 
> with the other pin only flags. But I don't see a reason to shuffle the 
> lower bits around? MAPPABLE to NOEVICT were 1,2,3,4 but are now 3,4,1,2. 
> Is there some semantic meaning to the new order?

Just that:
 - bias, mappable, zone_4g: address limit specifiers
   + high: not strictly an address limit, but an address direction to search
   + fixed: address override, limits still apply though
 - nonblock, nonfault, noevict: search specifiers

I just hadn't had an excuse to reorder them for a while.

> >   #define PIN_OFFSET_MASK             (-I915_GTT_PAGE_SIZE)
> >   
> >   #endif
> > diff --git a/drivers/gpu/drm/i915/i915_vma.h b/drivers/gpu/drm/i915/i915_vma.h
> > index 7252abc73d3e..266b226ebef2 100644
> > --- a/drivers/gpu/drm/i915/i915_vma.h
> > +++ b/drivers/gpu/drm/i915/i915_vma.h
> > @@ -70,30 +70,20 @@ struct i915_vma {
> >        */
> >       unsigned int open_count;
> >       unsigned long flags;
> > -     /**
> > -      * How many users have pinned this object in GTT space. The following
> > -      * users can each hold at most one reference: pwrite/pread, execbuffer
> > -      * (objects are not allowed multiple times for the same batchbuffer),
> > -      * and the framebuffer code. When switching/pageflipping, the
> > -      * framebuffer code has at most two buffers pinned per crtc.
> > -      *
> > -      * In the worst case this is 1 + 1 + 1 + 2*2 = 7. That would fit into 3
> > -      * bits with absolutely no headroom. So use 4 bits.
> > -      */
> Is it not worth keeping some comment about the maximum pin count being 
> bounded so 8-bits is guaranteed to be sufficient? Also, is the old 
> comment actually valid? Surely modern hardware has more than two CRTCs 
> so the limit of 7 was wrong anyway? Maybe even have a compile time 
> assert that the mask size is greater than max(1 + 1 + 1 + 1 + 
> 2*MAX_CRTC, PAGE_SIZE/CACHELINE_SIZE)?

Is a comment accurate? rotfl

Also I think we are up to 3*NUM_PLANES*NUM_CRTCS, but can't be quite sure
with the atomic state tracking, so it might just be still 2 (but just
wait until we have an actual flip queue).

I was also wondering if we used 7b + negative byte for the overflow
would generate better code.

Still a comment towards that this should be bounded to a small number
hence the "validity" in checking for overflow as part of the flags might
be in order.
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 111+ messages in thread

* Re: [PATCH 42/46] drm/i915: Enlarge vma->pin_count
  2019-01-15 20:17     ` Chris Wilson
@ 2019-01-16  0:18       ` John Harrison
  0 siblings, 0 replies; 111+ messages in thread
From: John Harrison @ 2019-01-16  0:18 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx

On 1/15/2019 12:17, Chris Wilson wrote:
> Quoting John Harrison (2019-01-15 19:57:19)
>> On 1/7/2019 03:55, Chris Wilson wrote:
>>> Previously we only accommodated having a vma pinned by a small number of
>>> users, with the maximum being pinned for use by the display engine. As
>>> such, we used a small bitfield only large enough to allow the vma to
>>> be pinned twice (for back/front buffers) in each scanout plane. Keeping
>>> the maximum permissible pin_count small allows us to quickly catch a
>>> potential leak. However, as we want to split a 4096B page into 64
>>> different cachelines and pin each cacheline for use by a different
>>> timeline, we will exceed the current maximum permissible vma->pin_count
>>> and so time has come to enlarge it.
>>>
>>> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
>>> ---
>>>    drivers/gpu/drm/i915/i915_gem_gtt.h | 26 +++++++++++++-------------
>>>    drivers/gpu/drm/i915/i915_vma.h     | 28 +++++++++-------------------
>>>    2 files changed, 22 insertions(+), 32 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h
>>> index bd679c8c56dd..03ade71b8d9a 100644
>>> --- a/drivers/gpu/drm/i915/i915_gem_gtt.h
>>> +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h
>>> @@ -642,19 +642,19 @@ int i915_gem_gtt_insert(struct i915_address_space *vm,
>>>    
>>>    /* Flags used by pin/bind&friends. */
>>>    #define PIN_NONBLOCK                BIT_ULL(0)
>>> -#define PIN_MAPPABLE         BIT_ULL(1)
>>> -#define PIN_ZONE_4G          BIT_ULL(2)
>>> -#define PIN_NONFAULT         BIT_ULL(3)
>>> -#define PIN_NOEVICT          BIT_ULL(4)
>>> -
>>> -#define PIN_MBZ                      BIT_ULL(5) /* I915_VMA_PIN_OVERFLOW */
>>> -#define PIN_GLOBAL           BIT_ULL(6) /* I915_VMA_GLOBAL_BIND */
>>> -#define PIN_USER             BIT_ULL(7) /* I915_VMA_LOCAL_BIND */
>>> -#define PIN_UPDATE           BIT_ULL(8)
>>> -
>>> -#define PIN_HIGH             BIT_ULL(9)
>>> -#define PIN_OFFSET_BIAS              BIT_ULL(10)
>>> -#define PIN_OFFSET_FIXED     BIT_ULL(11)
>>> +#define PIN_NONFAULT         BIT_ULL(1)
>>> +#define PIN_NOEVICT          BIT_ULL(2)
>>> +#define PIN_MAPPABLE         BIT_ULL(3)
>>> +#define PIN_ZONE_4G          BIT_ULL(4)
>>> +#define PIN_HIGH             BIT_ULL(5)
>>> +#define PIN_OFFSET_BIAS              BIT_ULL(6)
>>> +#define PIN_OFFSET_FIXED     BIT_ULL(7)
>>> +
>>> +#define PIN_MBZ                      BIT_ULL(8) /* I915_VMA_PIN_OVERFLOW */
>>> +#define PIN_GLOBAL           BIT_ULL(9) /* I915_VMA_GLOBAL_BIND */
>>> +#define PIN_USER             BIT_ULL(10) /* I915_VMA_LOCAL_BIND */
>>> +#define PIN_UPDATE           BIT_ULL(11)
>>> +
>> The upper bits need moving to accommodate the larger count. And the
>> HIGH/OFFSET_* fields are not shared with vma-flags so can be moved down
>> with the other pin only flags. But I don't see a reason to shuffle the
>> lower bits around? MAPPABLE to NOEVICT were 1,2,3,4 but are now 3,4,1,2.
>> Is there some semantic meaning to the new order?
> Just that:
>   - bias, mappable, zone_4g: address limit specifiers
>     + high: not strictly an address limit, but an address direction to search
>     + fixed: address override, limits still apply though
>   - nonblock, nonfault, noevict: search specifiers
>
> I just hadn't had an excuse to reorder them for a while.
Fair enough. I just wanted to check it was deliberate and not some 
accidental remnant of some other change that was dropped along the way.

>
>>>    #define PIN_OFFSET_MASK             (-I915_GTT_PAGE_SIZE)
>>>    
>>>    #endif
>>> diff --git a/drivers/gpu/drm/i915/i915_vma.h b/drivers/gpu/drm/i915/i915_vma.h
>>> index 7252abc73d3e..266b226ebef2 100644
>>> --- a/drivers/gpu/drm/i915/i915_vma.h
>>> +++ b/drivers/gpu/drm/i915/i915_vma.h
>>> @@ -70,30 +70,20 @@ struct i915_vma {
>>>         */
>>>        unsigned int open_count;
>>>        unsigned long flags;
>>> -     /**
>>> -      * How many users have pinned this object in GTT space. The following
>>> -      * users can each hold at most one reference: pwrite/pread, execbuffer
>>> -      * (objects are not allowed multiple times for the same batchbuffer),
>>> -      * and the framebuffer code. When switching/pageflipping, the
>>> -      * framebuffer code has at most two buffers pinned per crtc.
>>> -      *
>>> -      * In the worst case this is 1 + 1 + 1 + 2*2 = 7. That would fit into 3
>>> -      * bits with absolutely no headroom. So use 4 bits.
>>> -      */
>> Is it not worth keeping some comment about the maximum pin count being
>> bounded so 8-bits is guaranteed to be sufficient? Also, is the old
>> comment actually valid? Surely modern hardware has more than two CRTCs
>> so the limit of 7 was wrong anyway? Maybe even have a compile time
>> assert that the mask size is greater than max(1 + 1 + 1 + 1 +
>> 2*MAX_CRTC, PAGE_SIZE/CACHELINE_SIZE)?
> Is a comment accurate? rotfl
One can but dream...

> Also I think we are up to 3*NUM_PLANES*NUM_CRTCS, but can't be quite sure
> with the atomic state tracking, so it might just be still 2 (but just
> wait until we have an actual flip queue).
That sounds like we should already be overflowing the current limit of 16?!

>
> I was also wondering if we used 7b + negative byte for the overflow
> would generate better code.
Meaning limit the count to 127 and use a signed char cast? Thus test for 
-ve rather than BIT(x)? Maybe. Although the above comment makes me 
nervous that even 127 might not be sufficient for very much longer. New 
hardware always brings more planes and heads!

>
> Still a comment towards that this should be bounded to a small number
> hence the "validity" in checking for overflow as part of the flags might
> be in order.
> -Chris

I think some kind of comment along those lines would be worth having.

With that comment added:
Reviewed-by: John Harrison <John.C.Harrison@Intel.com>

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 111+ messages in thread

* Re: [PATCH 31/46] drm/i915: Stop tracking MRU activity on VMA
  2019-01-07 11:54 ` [PATCH 31/46] drm/i915: Stop tracking MRU activity on VMA Chris Wilson
@ 2019-01-16 16:27   ` Tvrtko Ursulin
  2019-01-16 16:37     ` Chris Wilson
  0 siblings, 1 reply; 111+ messages in thread
From: Tvrtko Ursulin @ 2019-01-16 16:27 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 07/01/2019 11:54, Chris Wilson wrote:
> Our goal is to remove struct_mutex and replace it with fine grained
> locking. One of the thorny issues is our eviction logic for reclaiming
> space for an execbuffer (or GTT mmaping, among a few other examples).
> While eviction itself is easy to move under a per-VM mutex, performing
> the activity tracking is less agreeable. One solution is not to do any
> MRU tracking and do a simple coarse evaluation during eviction of
> active/inactive, with a loose temporal ordering of last
> insertion/evaluation. That keeps all the locking constrained to when we
> are manipulating the VM itself, neatly avoiding the tricky handling of
> possible recursive locking during execbuf and elsewhere.
> 
> Note that discarding the MRU is unlikely to impact upon our efficiency
> to reclaim VM space (where we think a LRU model is best) as our
> current strategy is to use random idle replacement first before doing
> a search, and over time the use of softpinned 48b per-ppGTT is growing
> (thereby eliminating any need to perform any eviction searches, in
> theory at least).

I've noticed you did some changes since I last reviewed it, but there is 
not change log so I have to find them manually. Also, the ones you did 
not do I suppose means you disagree with?

On the commit message my comment was that I think you should mention the 
removal of active/inactive lists in favour of a single list.

> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>   drivers/gpu/drm/i915/i915_gem.c               | 10 +--
>   drivers/gpu/drm/i915/i915_gem_evict.c         | 71 ++++++++++++-------
>   drivers/gpu/drm/i915/i915_gem_gtt.c           | 15 ++--
>   drivers/gpu/drm/i915/i915_gem_gtt.h           | 26 +------
>   drivers/gpu/drm/i915/i915_gem_shrinker.c      |  8 ++-
>   drivers/gpu/drm/i915/i915_gem_stolen.c        |  3 +-
>   drivers/gpu/drm/i915/i915_gpu_error.c         | 37 +++++-----
>   drivers/gpu/drm/i915/i915_vma.c               |  9 +--
>   .../gpu/drm/i915/selftests/i915_gem_evict.c   |  4 +-
>   drivers/gpu/drm/i915/selftests/i915_gem_gtt.c |  2 +-
>   10 files changed, 84 insertions(+), 101 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index 83fb02dab18c..6ed44aeee583 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -254,10 +254,7 @@ i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data,
>   
>   	pinned = ggtt->vm.reserved;
>   	mutex_lock(&dev->struct_mutex);
> -	list_for_each_entry(vma, &ggtt->vm.active_list, vm_link)
> -		if (i915_vma_is_pinned(vma))
> -			pinned += vma->node.size;
> -	list_for_each_entry(vma, &ggtt->vm.inactive_list, vm_link)
> +	list_for_each_entry(vma, &ggtt->vm.bound_list, vm_link)
>   		if (i915_vma_is_pinned(vma))
>   			pinned += vma->node.size;
>   	mutex_unlock(&dev->struct_mutex);
> @@ -1540,13 +1537,10 @@ static void i915_gem_object_bump_inactive_ggtt(struct drm_i915_gem_object *obj)
>   	GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj));
>   
>   	for_each_ggtt_vma(vma, obj) {
> -		if (i915_vma_is_active(vma))
> -			continue;
> -
>   		if (!drm_mm_node_allocated(&vma->node))
>   			continue;
>   
> -		list_move_tail(&vma->vm_link, &vma->vm->inactive_list);
> +		list_move_tail(&vma->vm_link, &vma->vm->bound_list);
>   	}
>   
>   	i915 = to_i915(obj->base.dev);
> diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c b/drivers/gpu/drm/i915/i915_gem_evict.c
> index 02b83a5ed96c..a76f65fe86be 100644
> --- a/drivers/gpu/drm/i915/i915_gem_evict.c
> +++ b/drivers/gpu/drm/i915/i915_gem_evict.c
> @@ -127,14 +127,10 @@ i915_gem_evict_something(struct i915_address_space *vm,
>   	struct drm_i915_private *dev_priv = vm->i915;
>   	struct drm_mm_scan scan;
>   	struct list_head eviction_list;
> -	struct list_head *phases[] = {
> -		&vm->inactive_list,
> -		&vm->active_list,
> -		NULL,
> -	}, **phase;
>   	struct i915_vma *vma, *next;
>   	struct drm_mm_node *node;
>   	enum drm_mm_insert_mode mode;
> +	struct i915_vma *active;
>   	int ret;
>   
>   	lockdep_assert_held(&vm->i915->drm.struct_mutex);

There is this a comment around here not shown in the diff which talks 
about active and inactive lists. Plus it is misleading on the lists 
ordering now.

> @@ -170,17 +166,46 @@ i915_gem_evict_something(struct i915_address_space *vm,
>   	 */
>   	if (!(flags & PIN_NONBLOCK))
>   		i915_retire_requests(dev_priv);
> -	else
> -		phases[1] = NULL;
>   
>   search_again:
> +	active = NULL;
>   	INIT_LIST_HEAD(&eviction_list);
> -	phase = phases;
> -	do {
> -		list_for_each_entry(vma, *phase, vm_link)
> -			if (mark_free(&scan, vma, flags, &eviction_list))
> -				goto found;
> -	} while (*++phase);
> +	list_for_each_entry_safe(vma, next, &vm->bound_list, vm_link) {
> +		/*
> +		 * We keep this list in a rough least-recently scanned order
> +		 * of active elements (inactive elements are cheap to reap).
> +		 * New entries are added to the end, and we move anything we
> +		 * scan to the end. The assumption is that the working set
> +		 * of applications is either steady state (and thanks to the
> +		 * userspace bo cache it almost always is) or volatile and
> +		 * frequently replaced after a frame, which are self-evicting!
> +		 * Given that assumption, the MRU order of the scan list is
> +		 * fairly static, and keeping it in least-recently scan order
> +		 * is suitable.
> +		 *
> +		 * To notice when we complete one full cycle, we record the
> +		 * first active element seen, before moving it to the tail.
> +		 */

This is one change since v1 I spotted and it is a good one.

> +		if (i915_vma_is_active(vma)) {
> +			if (vma == active) {
> +				if (flags & PIN_NONBLOCK)
> +					break;
> +
> +				active = ERR_PTR(-EAGAIN);
> +			}
> +
> +			if (active != ERR_PTR(-EAGAIN)) {
> +				if (!active)
> +					active = vma;
> +
> +				list_move_tail(&vma->vm_link, &vm->bound_list);
> +				continue;
> +			}
> +		}
> +
> +		if (mark_free(&scan, vma, flags, &eviction_list))
> +			goto found;
> +	}
>   
>   	/* Nothing found, clean up and bail out! */
>   	list_for_each_entry_safe(vma, next, &eviction_list, evict_link) {
> @@ -389,11 +414,6 @@ int i915_gem_evict_for_node(struct i915_address_space *vm,
>    */
>   int i915_gem_evict_vm(struct i915_address_space *vm)
>   {
> -	struct list_head *phases[] = {
> -		&vm->inactive_list,
> -		&vm->active_list,
> -		NULL
> -	}, **phase;
>   	struct list_head eviction_list;
>   	struct i915_vma *vma, *next;
>   	int ret;
> @@ -413,16 +433,13 @@ int i915_gem_evict_vm(struct i915_address_space *vm)
>   	}
>   
>   	INIT_LIST_HEAD(&eviction_list);
> -	phase = phases;
> -	do {
> -		list_for_each_entry(vma, *phase, vm_link) {
> -			if (i915_vma_is_pinned(vma))
> -				continue;
> +	list_for_each_entry(vma, &vm->bound_list, vm_link) {
> +		if (i915_vma_is_pinned(vma))
> +			continue;
>   
> -			__i915_vma_pin(vma);
> -			list_add(&vma->evict_link, &eviction_list);
> -		}
> -	} while (*++phase);
> +		__i915_vma_pin(vma);
> +		list_add(&vma->evict_link, &eviction_list);
> +	}
>   
>   	ret = 0;
>   	list_for_each_entry_safe(vma, next, &eviction_list, evict_link) {
> diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
> index 45c7c8b6c7c8..ad4ef8980b97 100644
> --- a/drivers/gpu/drm/i915/i915_gem_gtt.c
> +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
> @@ -492,9 +492,8 @@ static void i915_address_space_init(struct i915_address_space *vm, int subclass)
>   
>   	stash_init(&vm->free_pages);
>   
> -	INIT_LIST_HEAD(&vm->active_list);
> -	INIT_LIST_HEAD(&vm->inactive_list);
>   	INIT_LIST_HEAD(&vm->unbound_list);
> +	INIT_LIST_HEAD(&vm->bound_list);
>   }
>   
>   static void i915_address_space_fini(struct i915_address_space *vm)
> @@ -2112,8 +2111,7 @@ void i915_ppgtt_close(struct i915_address_space *vm)
>   static void ppgtt_destroy_vma(struct i915_address_space *vm)
>   {
>   	struct list_head *phases[] = {
> -		&vm->active_list,
> -		&vm->inactive_list,
> +		&vm->bound_list,
>   		&vm->unbound_list,
>   		NULL,
>   	}, **phase;
> @@ -2136,8 +2134,7 @@ void i915_ppgtt_release(struct kref *kref)
>   
>   	ppgtt_destroy_vma(&ppgtt->vm);
>   
> -	GEM_BUG_ON(!list_empty(&ppgtt->vm.active_list));
> -	GEM_BUG_ON(!list_empty(&ppgtt->vm.inactive_list));
> +	GEM_BUG_ON(!list_empty(&ppgtt->vm.bound_list));
>   	GEM_BUG_ON(!list_empty(&ppgtt->vm.unbound_list));
>   
>   	ppgtt->vm.cleanup(&ppgtt->vm);
> @@ -2802,8 +2799,7 @@ void i915_ggtt_cleanup_hw(struct drm_i915_private *dev_priv)
>   	mutex_lock(&dev_priv->drm.struct_mutex);
>   	i915_gem_fini_aliasing_ppgtt(dev_priv);
>   
> -	GEM_BUG_ON(!list_empty(&ggtt->vm.active_list));
> -	list_for_each_entry_safe(vma, vn, &ggtt->vm.inactive_list, vm_link)
> +	list_for_each_entry_safe(vma, vn, &ggtt->vm.bound_list, vm_link)
>   		WARN_ON(i915_vma_unbind(vma));
>   
>   	if (drm_mm_node_allocated(&ggtt->error_capture))
> @@ -3514,8 +3510,7 @@ void i915_gem_restore_gtt_mappings(struct drm_i915_private *dev_priv)
>   	ggtt->vm.closed = true; /* skip rewriting PTE on VMA unbind */
>   
>   	/* clflush objects bound into the GGTT and rebind them. */
> -	GEM_BUG_ON(!list_empty(&ggtt->vm.active_list));
> -	list_for_each_entry_safe(vma, vn, &ggtt->vm.inactive_list, vm_link) {
> +	list_for_each_entry_safe(vma, vn, &ggtt->vm.bound_list, vm_link) {
>   		struct drm_i915_gem_object *obj = vma->obj;
>   
>   		if (!(vma->flags & I915_VMA_GLOBAL_BIND))
> diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h
> index a0039ea97cdc..bd679c8c56dd 100644
> --- a/drivers/gpu/drm/i915/i915_gem_gtt.h
> +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h
> @@ -299,32 +299,12 @@ struct i915_address_space {
>   	struct i915_page_directory_pointer *scratch_pdp; /* GEN8+ & 48b PPGTT */
>   
>   	/**
> -	 * List of objects currently involved in rendering.
> -	 *
> -	 * Includes buffers having the contents of their GPU caches
> -	 * flushed, not necessarily primitives. last_read_req
> -	 * represents when the rendering involved will be completed.
> -	 *
> -	 * A reference is held on the buffer while on this list.
> +	 * List of vma currently bound.
>   	 */
> -	struct list_head active_list;
> +	struct list_head bound_list;
>   
>   	/**
> -	 * LRU list of objects which are not in the ringbuffer and
> -	 * are ready to unbind, but are still in the GTT.
> -	 *
> -	 * last_read_req is NULL while an object is in this list.
> -	 *
> -	 * A reference is not held on the buffer while on this list,
> -	 * as merely being GTT-bound shouldn't prevent its being
> -	 * freed, and we'll pull it off the list in the free path.
> -	 */
> -	struct list_head inactive_list;
> -
> -	/**
> -	 * List of vma that have been unbound.
> -	 *
> -	 * A reference is not held on the buffer while on this list.
> +	 * List of vma that are not unbound.
>   	 */
>   	struct list_head unbound_list;
>   
> diff --git a/drivers/gpu/drm/i915/i915_gem_shrinker.c b/drivers/gpu/drm/i915/i915_gem_shrinker.c
> index e9a79059bc43..1531534eea02 100644
> --- a/drivers/gpu/drm/i915/i915_gem_shrinker.c
> +++ b/drivers/gpu/drm/i915/i915_gem_shrinker.c
> @@ -490,9 +490,13 @@ i915_gem_shrinker_vmap(struct notifier_block *nb, unsigned long event, void *ptr
>   
>   	/* We also want to clear any cached iomaps as they wrap vmap */
>   	list_for_each_entry_safe(vma, next,
> -				 &i915->ggtt.vm.inactive_list, vm_link) {
> +				 &i915->ggtt.vm.bound_list, vm_link) {
>   		unsigned long count = vma->node.size >> PAGE_SHIFT;
> -		if (vma->iomap && i915_vma_unbind(vma) == 0)
> +
> +		if (!vma->iomap || i915_vma_is_active(vma))
> +			continue;
> +
> +		if (i915_vma_unbind(vma) == 0)
>   			freed_pages += count;
>   	}
>   
> diff --git a/drivers/gpu/drm/i915/i915_gem_stolen.c b/drivers/gpu/drm/i915/i915_gem_stolen.c
> index 2f756a97689a..75b97d71f072 100644
> --- a/drivers/gpu/drm/i915/i915_gem_stolen.c
> +++ b/drivers/gpu/drm/i915/i915_gem_stolen.c
> @@ -702,7 +702,8 @@ i915_gem_object_create_stolen_for_preallocated(struct drm_i915_private *dev_priv
>   	vma->pages = obj->mm.pages;
>   	vma->flags |= I915_VMA_GLOBAL_BIND;
>   	__i915_vma_set_map_and_fenceable(vma);
> -	list_move_tail(&vma->vm_link, &ggtt->vm.inactive_list);
> +
> +	list_move_tail(&vma->vm_link, &ggtt->vm.bound_list);
>   
>   	spin_lock(&dev_priv->mm.obj_lock);
>   	list_move_tail(&obj->mm.link, &dev_priv->mm.bound_list);
> diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
> index 5533a741abeb..6e975c43dae9 100644
> --- a/drivers/gpu/drm/i915/i915_gpu_error.c
> +++ b/drivers/gpu/drm/i915/i915_gpu_error.c
> @@ -1124,7 +1124,7 @@ static void capture_bo(struct drm_i915_error_buffer *err,
>   
>   static u32 capture_error_bo(struct drm_i915_error_buffer *err,
>   			    int count, struct list_head *head,
> -			    bool pinned_only)
> +			    bool active_only, bool pinned_only)

Here I suggested having flags instead of two booleans would be more 
readable at the call sites.

>   {
>   	struct i915_vma *vma;
>   	int i = 0;
> @@ -1133,6 +1133,9 @@ static u32 capture_error_bo(struct drm_i915_error_buffer *err,
>   		if (!vma->obj)
>   			continue;
>   
> +		if (active_only && !i915_vma_is_active(vma))
> +			continue;
> +
>   		if (pinned_only && !i915_vma_is_pinned(vma))
>   			continue;
>   
> @@ -1610,14 +1613,16 @@ static void gem_capture_vm(struct i915_gpu_state *error,
>   	int count;
>   
>   	count = 0;
> -	list_for_each_entry(vma, &vm->active_list, vm_link)
> -		count++;
> +	list_for_each_entry(vma, &vm->bound_list, vm_link)
> +		if (i915_vma_is_active(vma))
> +			count++;
>   
>   	active_bo = NULL;
>   	if (count)
>   		active_bo = kcalloc(count, sizeof(*active_bo), GFP_ATOMIC);
>   	if (active_bo)
> -		count = capture_error_bo(active_bo, count, &vm->active_list, false);
> +		count = capture_error_bo(active_bo, count, &vm->bound_list,
> +					 true, false);
>   	else
>   		count = 0;
>   
> @@ -1655,28 +1660,20 @@ static void capture_pinned_buffers(struct i915_gpu_state *error)
>   	struct i915_address_space *vm = &error->i915->ggtt.vm;
>   	struct drm_i915_error_buffer *bo;
>   	struct i915_vma *vma;
> -	int count_inactive, count_active;
> -
> -	count_inactive = 0;
> -	list_for_each_entry(vma, &vm->inactive_list, vm_link)
> -		count_inactive++;
> +	int count;
>   
> -	count_active = 0;
> -	list_for_each_entry(vma, &vm->active_list, vm_link)
> -		count_active++;
> +	count = 0;
> +	list_for_each_entry(vma, &vm->bound_list, vm_link)
> +		count++;
>   
>   	bo = NULL;
> -	if (count_inactive + count_active)
> -		bo = kcalloc(count_inactive + count_active,
> -			     sizeof(*bo), GFP_ATOMIC);
> +	if (count)
> +		bo = kcalloc(count, sizeof(*bo), GFP_ATOMIC);
>   	if (!bo)
>   		return;
>   
> -	count_inactive = capture_error_bo(bo, count_inactive,
> -					  &vm->active_list, true);
> -	count_active = capture_error_bo(bo + count_inactive, count_active,
> -					&vm->inactive_list, true);
> -	error->pinned_bo_count = count_inactive + count_active;
> +	error->pinned_bo_count =
> +		capture_error_bo(bo, count, &vm->bound_list, false, true);
>   	error->pinned_bo = bo;
>   }
>   
> diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
> index 5b4d78cdb4ca..7de28baffb8f 100644
> --- a/drivers/gpu/drm/i915/i915_vma.c
> +++ b/drivers/gpu/drm/i915/i915_vma.c
> @@ -79,9 +79,6 @@ __i915_vma_retire(struct i915_vma *vma, struct i915_request *rq)
>   	if (--vma->active_count)
>   		return;
>   
> -	GEM_BUG_ON(!drm_mm_node_allocated(&vma->node));
> -	list_move_tail(&vma->vm_link, &vma->vm->inactive_list);
> -
>   	GEM_BUG_ON(!i915_gem_object_is_active(obj));
>   	if (--obj->active_count)
>   		return;
> @@ -659,7 +656,7 @@ i915_vma_insert(struct i915_vma *vma, u64 size, u64 alignment, u64 flags)
>   	GEM_BUG_ON(!drm_mm_node_allocated(&vma->node));
>   	GEM_BUG_ON(!i915_gem_valid_gtt_space(vma, cache_level));
>   
> -	list_move_tail(&vma->vm_link, &vma->vm->inactive_list);
> +	list_move_tail(&vma->vm_link, &vma->vm->bound_list);
>   
>   	if (vma->obj) {
>   		struct drm_i915_gem_object *obj = vma->obj;
> @@ -1003,10 +1000,8 @@ int i915_vma_move_to_active(struct i915_vma *vma,
>   	 * add the active reference first and queue for it to be dropped
>   	 * *last*.
>   	 */
> -	if (!i915_gem_active_isset(active) && !vma->active_count++) {
> -		list_move_tail(&vma->vm_link, &vma->vm->active_list);
> +	if (!i915_gem_active_isset(active) && !vma->active_count++)
>   		obj->active_count++;
> -	}
>   	i915_gem_active_set(active, rq);
>   	GEM_BUG_ON(!i915_vma_is_active(vma));
>   	GEM_BUG_ON(!obj->active_count);
> diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_evict.c b/drivers/gpu/drm/i915/selftests/i915_gem_evict.c
> index e1ff6a1c2cb0..9d0fe8aac219 100644
> --- a/drivers/gpu/drm/i915/selftests/i915_gem_evict.c
> +++ b/drivers/gpu/drm/i915/selftests/i915_gem_evict.c
> @@ -57,7 +57,7 @@ static int populate_ggtt(struct drm_i915_private *i915)
>   		return -EINVAL;
>   	}
>   
> -	if (list_empty(&i915->ggtt.vm.inactive_list)) {
> +	if (list_empty(&i915->ggtt.vm.bound_list)) {
>   		pr_err("No objects on the GGTT inactive list!\n");
>   		return -EINVAL;
>   	}
> @@ -69,7 +69,7 @@ static void unpin_ggtt(struct drm_i915_private *i915)
>   {
>   	struct i915_vma *vma;
>   
> -	list_for_each_entry(vma, &i915->ggtt.vm.inactive_list, vm_link)
> +	list_for_each_entry(vma, &i915->ggtt.vm.bound_list, vm_link)
>   		i915_vma_unpin(vma);
>   }
>   
> diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c
> index fea8ab14e79d..852b06cb50a0 100644
> --- a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c
> +++ b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c
> @@ -1237,7 +1237,7 @@ static void track_vma_bind(struct i915_vma *vma)
>   	__i915_gem_object_pin_pages(obj);
>   
>   	vma->pages = obj->mm.pages;
> -	list_move_tail(&vma->vm_link, &vma->vm->inactive_list);
> +	list_move_tail(&vma->vm_link, &vma->vm->bound_list);
>   }
>   
>   static int exercise_mock(struct drm_i915_private *i915,
> 

The rest looks okay.

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 111+ messages in thread

* Re: [PATCH 31/46] drm/i915: Stop tracking MRU activity on VMA
  2019-01-16 16:27   ` Tvrtko Ursulin
@ 2019-01-16 16:37     ` Chris Wilson
  0 siblings, 0 replies; 111+ messages in thread
From: Chris Wilson @ 2019-01-16 16:37 UTC (permalink / raw)
  To: Tvrtko Ursulin, intel-gfx

Quoting Tvrtko Ursulin (2019-01-16 16:27:16)
> 
> On 07/01/2019 11:54, Chris Wilson wrote:
> > Our goal is to remove struct_mutex and replace it with fine grained
> > locking. One of the thorny issues is our eviction logic for reclaiming
> > space for an execbuffer (or GTT mmaping, among a few other examples).
> > While eviction itself is easy to move under a per-VM mutex, performing
> > the activity tracking is less agreeable. One solution is not to do any
> > MRU tracking and do a simple coarse evaluation during eviction of
> > active/inactive, with a loose temporal ordering of last
> > insertion/evaluation. That keeps all the locking constrained to when we
> > are manipulating the VM itself, neatly avoiding the tricky handling of
> > possible recursive locking during execbuf and elsewhere.
> > 
> > Note that discarding the MRU is unlikely to impact upon our efficiency
> > to reclaim VM space (where we think a LRU model is best) as our
> > current strategy is to use random idle replacement first before doing
> > a search, and over time the use of softpinned 48b per-ppGTT is growing
> > (thereby eliminating any need to perform any eviction searches, in
> > theory at least).
> 
> I've noticed you did some changes since I last reviewed it, but there is 
> not change log so I have to find them manually. Also, the ones you did 
> not do I suppose means you disagree with?

I updated the commit msg wrt to the changes

> On the commit message my comment was that I think you should mention the 
> removal of active/inactive lists in favour of a single list.

I did, did I not?

> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > ---
> >   drivers/gpu/drm/i915/i915_gem.c               | 10 +--
> >   drivers/gpu/drm/i915/i915_gem_evict.c         | 71 ++++++++++++-------
> >   drivers/gpu/drm/i915/i915_gem_gtt.c           | 15 ++--
> >   drivers/gpu/drm/i915/i915_gem_gtt.h           | 26 +------
> >   drivers/gpu/drm/i915/i915_gem_shrinker.c      |  8 ++-
> >   drivers/gpu/drm/i915/i915_gem_stolen.c        |  3 +-
> >   drivers/gpu/drm/i915/i915_gpu_error.c         | 37 +++++-----
> >   drivers/gpu/drm/i915/i915_vma.c               |  9 +--
> >   .../gpu/drm/i915/selftests/i915_gem_evict.c   |  4 +-
> >   drivers/gpu/drm/i915/selftests/i915_gem_gtt.c |  2 +-
> >   10 files changed, 84 insertions(+), 101 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> > index 83fb02dab18c..6ed44aeee583 100644
> > --- a/drivers/gpu/drm/i915/i915_gem.c
> > +++ b/drivers/gpu/drm/i915/i915_gem.c
> > @@ -254,10 +254,7 @@ i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data,
> >   
> >       pinned = ggtt->vm.reserved;
> >       mutex_lock(&dev->struct_mutex);
> > -     list_for_each_entry(vma, &ggtt->vm.active_list, vm_link)
> > -             if (i915_vma_is_pinned(vma))
> > -                     pinned += vma->node.size;
> > -     list_for_each_entry(vma, &ggtt->vm.inactive_list, vm_link)
> > +     list_for_each_entry(vma, &ggtt->vm.bound_list, vm_link)
> >               if (i915_vma_is_pinned(vma))
> >                       pinned += vma->node.size;
> >       mutex_unlock(&dev->struct_mutex);
> > @@ -1540,13 +1537,10 @@ static void i915_gem_object_bump_inactive_ggtt(struct drm_i915_gem_object *obj)
> >       GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj));
> >   
> >       for_each_ggtt_vma(vma, obj) {
> > -             if (i915_vma_is_active(vma))
> > -                     continue;
> > -
> >               if (!drm_mm_node_allocated(&vma->node))
> >                       continue;
> >   
> > -             list_move_tail(&vma->vm_link, &vma->vm->inactive_list);
> > +             list_move_tail(&vma->vm_link, &vma->vm->bound_list);
> >       }
> >   
> >       i915 = to_i915(obj->base.dev);
> > diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c b/drivers/gpu/drm/i915/i915_gem_evict.c
> > index 02b83a5ed96c..a76f65fe86be 100644
> > --- a/drivers/gpu/drm/i915/i915_gem_evict.c
> > +++ b/drivers/gpu/drm/i915/i915_gem_evict.c
> > @@ -127,14 +127,10 @@ i915_gem_evict_something(struct i915_address_space *vm,
> >       struct drm_i915_private *dev_priv = vm->i915;
> >       struct drm_mm_scan scan;
> >       struct list_head eviction_list;
> > -     struct list_head *phases[] = {
> > -             &vm->inactive_list,
> > -             &vm->active_list,
> > -             NULL,
> > -     }, **phase;
> >       struct i915_vma *vma, *next;
> >       struct drm_mm_node *node;
> >       enum drm_mm_insert_mode mode;
> > +     struct i915_vma *active;
> >       int ret;
> >   
> >       lockdep_assert_held(&vm->i915->drm.struct_mutex);
> 
> There is this a comment around here not shown in the diff which talks 
> about active and inactive lists. Plus it is misleading on the lists 
> ordering now.

The sequence is still in tact. Just a minor mental adjustment that we
scan both in the same list, with softer ordering. Par for the course as
comments go.

> > @@ -170,17 +166,46 @@ i915_gem_evict_something(struct i915_address_space *vm,
> >        */
> >       if (!(flags & PIN_NONBLOCK))
> >               i915_retire_requests(dev_priv);
> > -     else
> > -             phases[1] = NULL;
> >   
> >   search_again:
> > +     active = NULL;
> >       INIT_LIST_HEAD(&eviction_list);
> > -     phase = phases;
> > -     do {
> > -             list_for_each_entry(vma, *phase, vm_link)
> > -                     if (mark_free(&scan, vma, flags, &eviction_list))
> > -                             goto found;
> > -     } while (*++phase);
> > +     list_for_each_entry_safe(vma, next, &vm->bound_list, vm_link) {
> > +             /*
> > +              * We keep this list in a rough least-recently scanned order
> > +              * of active elements (inactive elements are cheap to reap).
> > +              * New entries are added to the end, and we move anything we
> > +              * scan to the end. The assumption is that the working set
> > +              * of applications is either steady state (and thanks to the
> > +              * userspace bo cache it almost always is) or volatile and
> > +              * frequently replaced after a frame, which are self-evicting!
> > +              * Given that assumption, the MRU order of the scan list is
> > +              * fairly static, and keeping it in least-recently scan order
> > +              * is suitable.
> > +              *
> > +              * To notice when we complete one full cycle, we record the
> > +              * first active element seen, before moving it to the tail.
> > +              */
> 
> This is one change since v1 I spotted and it is a good one.

The intent was to highlight it in the commitmsg by rewriting the
commitmsg...
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 111+ messages in thread

* Re: [PATCH 32/46] drm/i915: Pull VM lists under the VM mutex.
  2019-01-07 11:54 ` [PATCH 32/46] drm/i915: Pull VM lists under the VM mutex Chris Wilson
@ 2019-01-16 16:47   ` Tvrtko Ursulin
  2019-01-16 17:01     ` Chris Wilson
  0 siblings, 1 reply; 111+ messages in thread
From: Tvrtko Ursulin @ 2019-01-16 16:47 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 07/01/2019 11:54, Chris Wilson wrote:
> A starting point to counter the pervasive struct_mutex. For the goal of
> avoiding (or at least blocking under them!) global locks during user
> request submission, a simple but important step is being able to manage
> each clients GTT separately. For which, we want to replace using the
> struct_mutex as the guard for all things GTT/VM and switch instead to a
> specific mutex inside i915_address_space.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>   drivers/gpu/drm/i915/i915_gem.c                 | 14 ++++++++------
>   drivers/gpu/drm/i915/i915_gem_evict.c           |  2 ++
>   drivers/gpu/drm/i915/i915_gem_gtt.c             | 15 +++++++++++++--
>   drivers/gpu/drm/i915/i915_gem_shrinker.c        |  4 ++++
>   drivers/gpu/drm/i915/i915_gem_stolen.c          |  2 ++
>   drivers/gpu/drm/i915/i915_vma.c                 | 11 +++++++++++
>   drivers/gpu/drm/i915/selftests/i915_gem_evict.c |  3 +++
>   drivers/gpu/drm/i915/selftests/i915_gem_gtt.c   |  3 +++
>   8 files changed, 46 insertions(+), 8 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index 6ed44aeee583..5141a8ba4836 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -246,18 +246,19 @@ int
>   i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data,
>   			    struct drm_file *file)
>   {
> -	struct drm_i915_private *dev_priv = to_i915(dev);
> -	struct i915_ggtt *ggtt = &dev_priv->ggtt;
> +	struct i915_ggtt *ggtt = &to_i915(dev)->ggtt;
>   	struct drm_i915_gem_get_aperture *args = data;
>   	struct i915_vma *vma;
>   	u64 pinned;
>   
> +	mutex_lock(&ggtt->vm.mutex);
> +
>   	pinned = ggtt->vm.reserved;
> -	mutex_lock(&dev->struct_mutex);
>   	list_for_each_entry(vma, &ggtt->vm.bound_list, vm_link)
>   		if (i915_vma_is_pinned(vma))
>   			pinned += vma->node.size;
> -	mutex_unlock(&dev->struct_mutex);
> +
> +	mutex_unlock(&ggtt->vm.mutex);
>   
>   	args->aper_size = ggtt->vm.total;
>   	args->aper_available_size = args->aper_size - pinned;
> @@ -1530,20 +1531,21 @@ i915_gem_pwrite_ioctl(struct drm_device *dev, void *data,
>   
>   static void i915_gem_object_bump_inactive_ggtt(struct drm_i915_gem_object *obj)
>   {
> -	struct drm_i915_private *i915;
> +	struct drm_i915_private *i915 = to_i915(obj->base.dev);
>   	struct list_head *list;
>   	struct i915_vma *vma;
>   
>   	GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj));
>   
> +	mutex_lock(&i915->ggtt.vm.mutex);
>   	for_each_ggtt_vma(vma, obj) {
>   		if (!drm_mm_node_allocated(&vma->node))
>   			continue;
>   
>   		list_move_tail(&vma->vm_link, &vma->vm->bound_list);
>   	}
> +	mutex_unlock(&i915->ggtt.vm.mutex);

This is now struct_mutex -> vm->mutex nesting, which we would preferably 
want to avoid? There only two callers for the function.

It looks we could remove nesting from i915_gem_set_domain_ioctl by just 
moving the call to after mutex unlock.

i915_gem_object_unpin_from_display_plane callers are not as easy so 
maybe at least do the one above?

>   
> -	i915 = to_i915(obj->base.dev);
>   	spin_lock(&i915->mm.obj_lock);
>   	list = obj->bind_count ? &i915->mm.bound_list : &i915->mm.unbound_list;
>   	list_move_tail(&obj->mm.link, list);
> diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c b/drivers/gpu/drm/i915/i915_gem_evict.c
> index a76f65fe86be..4a0c6830659d 100644
> --- a/drivers/gpu/drm/i915/i915_gem_evict.c
> +++ b/drivers/gpu/drm/i915/i915_gem_evict.c
> @@ -433,6 +433,7 @@ int i915_gem_evict_vm(struct i915_address_space *vm)
>   	}
>   
>   	INIT_LIST_HEAD(&eviction_list);
> +	mutex_lock(&vm->mutex);
>   	list_for_each_entry(vma, &vm->bound_list, vm_link) {
>   		if (i915_vma_is_pinned(vma))
>   			continue;
> @@ -440,6 +441,7 @@ int i915_gem_evict_vm(struct i915_address_space *vm)
>   		__i915_vma_pin(vma);
>   		list_add(&vma->evict_link, &eviction_list);
>   	}
> +	mutex_unlock(&vm->mutex);

This is another nesting so I suppose you leave all this fun for later?

>   
>   	ret = 0;
>   	list_for_each_entry_safe(vma, next, &eviction_list, evict_link) {
> diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
> index ad4ef8980b97..c3363a9b586b 100644
> --- a/drivers/gpu/drm/i915/i915_gem_gtt.c
> +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
> @@ -1932,7 +1932,10 @@ static struct i915_vma *pd_vma_create(struct gen6_hw_ppgtt *ppgtt, int size)
>   	vma->ggtt_view.type = I915_GGTT_VIEW_ROTATED; /* prevent fencing */
>   
>   	INIT_LIST_HEAD(&vma->obj_link);
> +
> +	mutex_lock(&vma->vm->mutex);
>   	list_add(&vma->vm_link, &vma->vm->unbound_list);
> +	mutex_unlock(&vma->vm->mutex);
>   
>   	return vma;
>   }
> @@ -3504,9 +3507,10 @@ void i915_gem_restore_gtt_mappings(struct drm_i915_private *dev_priv)
>   
>   	i915_check_and_clear_faults(dev_priv);
>   
> +	mutex_lock(&ggtt->vm.mutex);
> +
>   	/* First fill our portion of the GTT with scratch pages */
>   	ggtt->vm.clear_range(&ggtt->vm, 0, ggtt->vm.total);
> -
>   	ggtt->vm.closed = true; /* skip rewriting PTE on VMA unbind */
>   
>   	/* clflush objects bound into the GGTT and rebind them. */
> @@ -3516,19 +3520,26 @@ void i915_gem_restore_gtt_mappings(struct drm_i915_private *dev_priv)
>   		if (!(vma->flags & I915_VMA_GLOBAL_BIND))
>   			continue;
>   
> +		mutex_unlock(&ggtt->vm.mutex);
> +
>   		if (!i915_vma_unbind(vma))
> -			continue;
> +			goto lock;
>   
>   		WARN_ON(i915_vma_bind(vma,
>   				      obj ? obj->cache_level : 0,
>   				      PIN_UPDATE));
>   		if (obj)
>   			WARN_ON(i915_gem_object_set_to_gtt_domain(obj, false));
> +
> +lock:
> +		mutex_lock(&ggtt->vm.mutex);
>   	}
>   
>   	ggtt->vm.closed = false;
>   	i915_ggtt_invalidate(dev_priv);
>   
> +	mutex_unlock(&ggtt->vm.mutex);
> +
>   	if (INTEL_GEN(dev_priv) >= 8) {
>   		struct intel_ppat *ppat = &dev_priv->ppat;
>   
> diff --git a/drivers/gpu/drm/i915/i915_gem_shrinker.c b/drivers/gpu/drm/i915/i915_gem_shrinker.c
> index 1531534eea02..786121609016 100644
> --- a/drivers/gpu/drm/i915/i915_gem_shrinker.c
> +++ b/drivers/gpu/drm/i915/i915_gem_shrinker.c
> @@ -489,6 +489,7 @@ i915_gem_shrinker_vmap(struct notifier_block *nb, unsigned long event, void *ptr
>   					       I915_SHRINK_VMAPS);
>   
>   	/* We also want to clear any cached iomaps as they wrap vmap */
> +	mutex_lock(&i915->ggtt.vm.mutex);
>   	list_for_each_entry_safe(vma, next,
>   				 &i915->ggtt.vm.bound_list, vm_link) {
>   		unsigned long count = vma->node.size >> PAGE_SHIFT;
> @@ -496,9 +497,12 @@ i915_gem_shrinker_vmap(struct notifier_block *nb, unsigned long event, void *ptr
>   		if (!vma->iomap || i915_vma_is_active(vma))
>   			continue;
>   
> +		mutex_unlock(&i915->ggtt.vm.mutex);
>   		if (i915_vma_unbind(vma) == 0)
>   			freed_pages += count;
> +		mutex_lock(&i915->ggtt.vm.mutex);
>   	}
> +	mutex_unlock(&i915->ggtt.vm.mutex);
>   
>   out:
>   	shrinker_unlock(i915, unlock);
> diff --git a/drivers/gpu/drm/i915/i915_gem_stolen.c b/drivers/gpu/drm/i915/i915_gem_stolen.c
> index 75b97d71f072..21de3a5e9910 100644
> --- a/drivers/gpu/drm/i915/i915_gem_stolen.c
> +++ b/drivers/gpu/drm/i915/i915_gem_stolen.c
> @@ -703,7 +703,9 @@ i915_gem_object_create_stolen_for_preallocated(struct drm_i915_private *dev_priv
>   	vma->flags |= I915_VMA_GLOBAL_BIND;
>   	__i915_vma_set_map_and_fenceable(vma);
>   
> +	mutex_lock(&ggtt->vm.mutex);
>   	list_move_tail(&vma->vm_link, &ggtt->vm.bound_list);
> +	mutex_unlock(&ggtt->vm.mutex);
>   
>   	spin_lock(&dev_priv->mm.obj_lock);
>   	list_move_tail(&obj->mm.link, &dev_priv->mm.bound_list);
> diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
> index 7de28baffb8f..dcbd0d345c72 100644
> --- a/drivers/gpu/drm/i915/i915_vma.c
> +++ b/drivers/gpu/drm/i915/i915_vma.c
> @@ -213,7 +213,10 @@ vma_create(struct drm_i915_gem_object *obj,
>   	}
>   	rb_link_node(&vma->obj_node, rb, p);
>   	rb_insert_color(&vma->obj_node, &obj->vma_tree);
> +
> +	mutex_lock(&vm->mutex);
>   	list_add(&vma->vm_link, &vm->unbound_list);
> +	mutex_unlock(&vm->mutex);
>   
>   	return vma;
>   
> @@ -656,7 +659,9 @@ i915_vma_insert(struct i915_vma *vma, u64 size, u64 alignment, u64 flags)
>   	GEM_BUG_ON(!drm_mm_node_allocated(&vma->node));
>   	GEM_BUG_ON(!i915_gem_valid_gtt_space(vma, cache_level));
>   
> +	mutex_lock(&vma->vm->mutex);
>   	list_move_tail(&vma->vm_link, &vma->vm->bound_list);
> +	mutex_unlock(&vma->vm->mutex);
>   
>   	if (vma->obj) {
>   		struct drm_i915_gem_object *obj = vma->obj;
> @@ -689,8 +694,10 @@ i915_vma_remove(struct i915_vma *vma)
>   
>   	vma->ops->clear_pages(vma);
>   
> +	mutex_lock(&vma->vm->mutex);
>   	drm_mm_remove_node(&vma->node);

This is by design also protected by the vm->mutex? But insertion is not 
AFAICT.

>   	list_move_tail(&vma->vm_link, &vma->vm->unbound_list);
> +	mutex_unlock(&vma->vm->mutex);
>   
>   	/*
>   	 * Since the unbound list is global, only move to that list if
> @@ -802,7 +809,11 @@ static void __i915_vma_destroy(struct i915_vma *vma)
>   	GEM_BUG_ON(i915_gem_active_isset(&vma->last_fence));
>   
>   	list_del(&vma->obj_link);
> +
> +	mutex_lock(&vma->vm->mutex);
>   	list_del(&vma->vm_link);
> +	mutex_unlock(&vma->vm->mutex);
> +
>   	if (vma->obj)
>   		rb_erase(&vma->obj_node, &vma->obj->vma_tree);
>   
> diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_evict.c b/drivers/gpu/drm/i915/selftests/i915_gem_evict.c
> index 9d0fe8aac219..eaefba7470f7 100644
> --- a/drivers/gpu/drm/i915/selftests/i915_gem_evict.c
> +++ b/drivers/gpu/drm/i915/selftests/i915_gem_evict.c
> @@ -67,10 +67,13 @@ static int populate_ggtt(struct drm_i915_private *i915)
>   
>   static void unpin_ggtt(struct drm_i915_private *i915)
>   {
> +	struct i915_ggtt *ggtt = &i915->ggtt;
>   	struct i915_vma *vma;
>   
> +	mutex_lock(&ggtt->vm.mutex);
>   	list_for_each_entry(vma, &i915->ggtt.vm.bound_list, vm_link)
>   		i915_vma_unpin(vma);
> +	mutex_unlock(&ggtt->vm.mutex);
>   }
>   
>   static void cleanup_objects(struct drm_i915_private *i915)
> diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c
> index 852b06cb50a0..35eb40e5de91 100644
> --- a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c
> +++ b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c
> @@ -1237,7 +1237,10 @@ static void track_vma_bind(struct i915_vma *vma)
>   	__i915_gem_object_pin_pages(obj);
>   
>   	vma->pages = obj->mm.pages;
> +
> +	mutex_lock(&vma->vm->mutex);
>   	list_move_tail(&vma->vm_link, &vma->vm->bound_list);
> +	mutex_unlock(&vma->vm->mutex);
>   }
>   
>   static int exercise_mock(struct drm_i915_private *i915,
> 

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 111+ messages in thread

* Re: [PATCH 32/46] drm/i915: Pull VM lists under the VM mutex.
  2019-01-16 16:47   ` Tvrtko Ursulin
@ 2019-01-16 17:01     ` Chris Wilson
  2019-01-17 16:23       ` Tvrtko Ursulin
  0 siblings, 1 reply; 111+ messages in thread
From: Chris Wilson @ 2019-01-16 17:01 UTC (permalink / raw)
  To: Tvrtko Ursulin, intel-gfx

Quoting Tvrtko Ursulin (2019-01-16 16:47:43)
> 
> On 07/01/2019 11:54, Chris Wilson wrote:
> > @@ -1530,20 +1531,21 @@ i915_gem_pwrite_ioctl(struct drm_device *dev, void *data,
> >   
> >   static void i915_gem_object_bump_inactive_ggtt(struct drm_i915_gem_object *obj)
> >   {
> > -     struct drm_i915_private *i915;
> > +     struct drm_i915_private *i915 = to_i915(obj->base.dev);
> >       struct list_head *list;
> >       struct i915_vma *vma;
> >   
> >       GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj));
> >   
> > +     mutex_lock(&i915->ggtt.vm.mutex);
> >       for_each_ggtt_vma(vma, obj) {
> >               if (!drm_mm_node_allocated(&vma->node))
> >                       continue;
> >   
> >               list_move_tail(&vma->vm_link, &vma->vm->bound_list);
> >       }
> > +     mutex_unlock(&i915->ggtt.vm.mutex);
> 
> This is now struct_mutex -> vm->mutex nesting, which we would preferably 
> want to avoid? There only two callers for the function.
> 
> It looks we could remove nesting from i915_gem_set_domain_ioctl by just 
> moving the call to after mutex unlock.
> 
> i915_gem_object_unpin_from_display_plane callers are not as easy so 
> maybe at least do the one above?

unpin_from_display_plane is the goal here tbh.

> > -     i915 = to_i915(obj->base.dev);
> >       spin_lock(&i915->mm.obj_lock);
> >       list = obj->bind_count ? &i915->mm.bound_list : &i915->mm.unbound_list;
> >       list_move_tail(&obj->mm.link, list);
> > diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c b/drivers/gpu/drm/i915/i915_gem_evict.c
> > index a76f65fe86be..4a0c6830659d 100644
> > --- a/drivers/gpu/drm/i915/i915_gem_evict.c
> > +++ b/drivers/gpu/drm/i915/i915_gem_evict.c
> > @@ -433,6 +433,7 @@ int i915_gem_evict_vm(struct i915_address_space *vm)
> >       }
> >   
> >       INIT_LIST_HEAD(&eviction_list);
> > +     mutex_lock(&vm->mutex);
> >       list_for_each_entry(vma, &vm->bound_list, vm_link) {
> >               if (i915_vma_is_pinned(vma))
> >                       continue;
> > @@ -440,6 +441,7 @@ int i915_gem_evict_vm(struct i915_address_space *vm)
> >               __i915_vma_pin(vma);
> >               list_add(&vma->evict_link, &eviction_list);
> >       }
> > +     mutex_unlock(&vm->mutex);
> 
> This is another nesting so I suppose you leave all this fun for later?

Yes, the intent was to put the locks in place (gradually) then peel back
struct_mutex (gradually).

> > @@ -689,8 +694,10 @@ i915_vma_remove(struct i915_vma *vma)
> >   
> >       vma->ops->clear_pages(vma);
> >   
> > +     mutex_lock(&vma->vm->mutex);
> >       drm_mm_remove_node(&vma->node);
> 
> This is by design also protected by the vm->mutex? But insertion is not 
> AFAICT.

Not yet. Can you guess which bit proved tricky? ;) Getting the right
point to lock for execbuf, and eviction. At the same time over there is
the fuss with ww_mutex, as well as contexts et al, and it all gets
confusing quickly.

...(tries to remember why this patch is actually here; this set was
picked so that I could do obj->vma_list without struct_mutex (which
was used for timeline allocation) and I pulled in anything required to
resolve conflicts, but why this one)...
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 111+ messages in thread

* Re: [PATCH 43/46] drm/i915: Allocate a status page for each timeline
  2019-01-15 18:43         ` Chris Wilson
@ 2019-01-16 21:06           ` John Harrison
  2019-01-16 21:15             ` Chris Wilson
  0 siblings, 1 reply; 111+ messages in thread
From: John Harrison @ 2019-01-16 21:06 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx

On 1/15/2019 10:43, Chris Wilson wrote:
> Quoting John Harrison (2019-01-15 18:17:21)
>> On 1/15/2019 01:50, Chris Wilson wrote:
>>> Quoting John Harrison (2019-01-15 00:56:13)
>>>> On 1/7/2019 03:55, Chris Wilson wrote:
>>>>> +static int alloc_hwsp(struct i915_timeline *timeline)
>>>>> +{
>>>>> +     struct drm_i915_private *i915 = timeline->i915;
>>>>> +     struct i915_vma *vma;
>>>>> +     int offset;
>>>>> +
>>>>> +     mutex_lock(&i915->gt.timeline_lock);
>>>>> +
>>>>> +restart:
>>>>> +     offset = find_first_cacheline(i915);
>>>>> +     if (offset == NBITS && i915->gt.timeline_hwsp) {
>>>>> +             i915_vma_put(i915->gt.timeline_hwsp);
>>>>> +             i915->gt.timeline_hwsp = NULL;
>>>>> +     }
>>>>> +
>>>>> +     vma = i915->gt.timeline_hwsp;
>>>>> +     if (!vma) {
>>>>> +             struct drm_i915_gem_object *bo;
>>>>> +
>>>>> +             /* Drop the lock before allocations */
>>>>> +             mutex_unlock(&i915->gt.timeline_lock);
>>>>> +
>>>>> +             BUILD_BUG_ON(NBITS * CACHELINE_BYTES > PAGE_SIZE);
>>>>> +             bo = i915_gem_object_create_internal(i915, PAGE_SIZE);
>>>>> +             if (IS_ERR(bo))
>>>>> +                     return PTR_ERR(bo);
>>>>> +
>>>>> +             i915_gem_object_set_cache_level(bo, I915_CACHE_LLC);
>>>>> +
>>>>> +             vma = i915_vma_instance(bo, &i915->ggtt.vm, NULL);
>>>>> +             if (IS_ERR(vma))
>>>>> +                     return PTR_ERR(vma);
>>>>> +
>>>>> +             mutex_lock(&i915->gt.timeline_lock);
>>>>> +             if (i915->gt.timeline_hwsp) {
>>>>> +                     i915_gem_object_put(bo);
>>>>> +                     goto restart;
>>>>> +             }
>>>>> +
>>>>> +             i915->gt.timeline_hwsp = vma;
>>>>> +             i915->gt.timeline_free = ~0ull;
>>>>> +             offset = 0;
>>>>> +     }
>>>>> +
>>>>> +     i915->gt.timeline_free &= ~BIT_ULL(offset);
>>>>> +
>>>>> +     timeline->hwsp_ggtt = i915_vma_get(vma);
>>>>> +     timeline->hwsp_offset = offset * CACHELINE_BYTES;
>>>>> +
>>>>> +     mutex_unlock(&i915->gt.timeline_lock);
>>>>> +
>>>>> +     return 0;
>>>>> +}
>>>> If I'm reading this correctly then gt.timeline_hwsp/free is the a cached
>>>> copy of the most recently allocated but not yet filled bank of seqno
>>>> locations. When it gets full, the i915->gt reference gets dropped and a
>>>> new page is allocated and used up line by line. Meanwhile, each timeline
>>>> has it's own private reference to the page so dropping the i915->gt
>>>> reference is safe. And once the last timeline using a given page is
>>>> freed, the last reference to that page will be dropped and so the page
>>>> itself will also be freed. If a timeline is freed before the currently
>>>> cached page is filled, then that timeline's slot will be released and
>>>> re-used by the next timeline to be created.
>>>>
>>>> But what about the scenario of a long running system with a small but
>>>> growing number of persistent tasks interspersed with many short lived
>>>> tasks? In that case, you would end up with many sparsely populated pages
>>>> that whose free slots will not get re-used. You could have a linked list
>>>> of cached pages. When a page is filled, move it to a 'full' list. When a
>>>> timeline is freed, if it's page was on the 'full' list, clear the slot
>>>> and move it back to the 'available' list.
>>> Yes. My thinking was a plain slab cache was a quick-and-dirty
>>> improvement over a page-per-timeline. And a freelist would be the next
>>> step.
>>>
>>>> Or is the idea that a worst case of a single page vma allocation per
>>>> timeline is the least of our worries if there is an ever growing number
>>>> of timelines/contexts/users in the system?
>>> Nah, it was just an attempt to quickly reduce the number of allocations,
>>> where the worst case of one page+vma per timeline was the starting
>>> point.
>>>
>>> We should break this patch down into 1) one-page-per-timeline, 2) slab
>>> cache, 3) free list 4) profit.
>>>
>>> At other times we have been wanting to be able to suballocate pages,
>>> something to keep in mind would be extending this to arbitrary cacheline
>>> allocations.
>> The multi-stage approach sounds good. Keep things simple in this patch
>> and then improve the situation later. One thing to be careful of with a
>> cacheline allocator would be make sure whatever is being converted
>> wasn't using full pages for security reasons. I.e. a page can be private
>> to a process, a cacheline will be shared by many. I guess that would
>> only really apply to allocations being passed to user land as the kernel
>> is considered secure? Or can a user batch buffer write to arbitrary
>> locations within the ppHWSP and thereby splat someone else's seqno?
> ppHWSP, yes. But for internal allocations, only accessible via the ring
> + GGTT, should be no problem. I agree that we definitely don't want to
> expose subpage sharing across the userspace boundary (all isolation
> controls are only on pages and above).
>
> If userspace wants suballocations, it can (and does) do them for itself
> and should regulate its own sharing.

I'm a little confused. Are you saying that a rogue batch buffer could 
splat some other context's ppHWSP seqno or that it can't? It would be 
bad if one dodgy user could cause hangchecks in another user's batch by 
splatting their seqnos.

>
>>>>> +     if (global_hwsp) {
>>>>> +             timeline->hwsp_ggtt = i915_vma_get(global_hwsp);
>>>>> +             timeline->hwsp_offset = I915_GEM_HWS_SEQNO_ADDR;
>>>>> +     } else {
>>>>> +             err = alloc_hwsp(timeline);
>>>>> +             if (err)
>>>>> +                     return err;
>>>>> +     }
>>>>> +
>>>>> +     vaddr = i915_gem_object_pin_map(timeline->hwsp_ggtt->obj, I915_MAP_WB);
>>>>> +     if (IS_ERR(vaddr)) { /* leak the cacheline, but will clean up later */
>>>> Can you explain this comment more? Where/when is the later?
>>> On failure here, the cacheline is still marked as allocated in the slab,
>>> but the reference to the page is released. So the backing page will be
>>> released when everyone else finally drops their reference.
>>>
>>> Just laziness, since we have the ability to return the cacheline later
>>> on...
>> Meaning the actual leak is the bit in 'i915->gt.timeline_free' that says
>> this cacheline can or can't be used for the next allocation? Presumably
>> you could do the bit map munging in the case that 'global_hwsp' is null,
>> but the code would certainly be messier for not a lot of gain.
> Having been pointed out that I was being lazy, a bit of refactoring
> later showed how lazy I was.
Does that mean you are going to re-work this patch or follow it up with 
a subsequent one?

>
>>>>> @@ -2616,7 +2628,7 @@ static int execlists_context_deferred_alloc(struct i915_gem_context *ctx,
>>>>>                 goto error_deref_obj;
>>>>>         }
>>>>>     
>>>>> -     timeline = i915_timeline_create(ctx->i915, ctx->name);
>>>>> +     timeline = i915_timeline_create(ctx->i915, ctx->name, NULL);
>>>> Why does this use the global HWSP rather than a per context one?
>>> .global_hwsp = NULL => it allocates its own HWSP.
>>>
>>> Were you thinking of intel_engine_setup_common() which is still using
>>> the global HWSP at this point in time?
>> Doh. Brain fart. Presumably the engine one will disappear completely? Or
>> is it still needed for legacy mode?
> It (the timeline embedded inside the engine) is killed later, once
> the internal clients (perf/pmu, hangcheck and idling at the last count)
> are ready for the lack of globally ordered execution queue. The single
> ringbuffer + timeline persists for legacy. (Multiple timelines for gen7,
> coming later!)
> -Chris
Sounds good :).

John.

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 111+ messages in thread

* Re: [PATCH 43/46] drm/i915: Allocate a status page for each timeline
  2019-01-16 21:06           ` John Harrison
@ 2019-01-16 21:15             ` Chris Wilson
  0 siblings, 0 replies; 111+ messages in thread
From: Chris Wilson @ 2019-01-16 21:15 UTC (permalink / raw)
  To: John Harrison, intel-gfx

Quoting John Harrison (2019-01-16 21:06:36)
> On 1/15/2019 10:43, Chris Wilson wrote:
> > Quoting John Harrison (2019-01-15 18:17:21)
> >> On 1/15/2019 01:50, Chris Wilson wrote:
> >>> Quoting John Harrison (2019-01-15 00:56:13)
> >>>> On 1/7/2019 03:55, Chris Wilson wrote:
> >>>>> +static int alloc_hwsp(struct i915_timeline *timeline)
> >>>>> +{
> >>>>> +     struct drm_i915_private *i915 = timeline->i915;
> >>>>> +     struct i915_vma *vma;
> >>>>> +     int offset;
> >>>>> +
> >>>>> +     mutex_lock(&i915->gt.timeline_lock);
> >>>>> +
> >>>>> +restart:
> >>>>> +     offset = find_first_cacheline(i915);
> >>>>> +     if (offset == NBITS && i915->gt.timeline_hwsp) {
> >>>>> +             i915_vma_put(i915->gt.timeline_hwsp);
> >>>>> +             i915->gt.timeline_hwsp = NULL;
> >>>>> +     }
> >>>>> +
> >>>>> +     vma = i915->gt.timeline_hwsp;
> >>>>> +     if (!vma) {
> >>>>> +             struct drm_i915_gem_object *bo;
> >>>>> +
> >>>>> +             /* Drop the lock before allocations */
> >>>>> +             mutex_unlock(&i915->gt.timeline_lock);
> >>>>> +
> >>>>> +             BUILD_BUG_ON(NBITS * CACHELINE_BYTES > PAGE_SIZE);
> >>>>> +             bo = i915_gem_object_create_internal(i915, PAGE_SIZE);
> >>>>> +             if (IS_ERR(bo))
> >>>>> +                     return PTR_ERR(bo);
> >>>>> +
> >>>>> +             i915_gem_object_set_cache_level(bo, I915_CACHE_LLC);
> >>>>> +
> >>>>> +             vma = i915_vma_instance(bo, &i915->ggtt.vm, NULL);
> >>>>> +             if (IS_ERR(vma))
> >>>>> +                     return PTR_ERR(vma);
> >>>>> +
> >>>>> +             mutex_lock(&i915->gt.timeline_lock);
> >>>>> +             if (i915->gt.timeline_hwsp) {
> >>>>> +                     i915_gem_object_put(bo);
> >>>>> +                     goto restart;
> >>>>> +             }
> >>>>> +
> >>>>> +             i915->gt.timeline_hwsp = vma;
> >>>>> +             i915->gt.timeline_free = ~0ull;
> >>>>> +             offset = 0;
> >>>>> +     }
> >>>>> +
> >>>>> +     i915->gt.timeline_free &= ~BIT_ULL(offset);
> >>>>> +
> >>>>> +     timeline->hwsp_ggtt = i915_vma_get(vma);
> >>>>> +     timeline->hwsp_offset = offset * CACHELINE_BYTES;
> >>>>> +
> >>>>> +     mutex_unlock(&i915->gt.timeline_lock);
> >>>>> +
> >>>>> +     return 0;
> >>>>> +}
> >>>> If I'm reading this correctly then gt.timeline_hwsp/free is the a cached
> >>>> copy of the most recently allocated but not yet filled bank of seqno
> >>>> locations. When it gets full, the i915->gt reference gets dropped and a
> >>>> new page is allocated and used up line by line. Meanwhile, each timeline
> >>>> has it's own private reference to the page so dropping the i915->gt
> >>>> reference is safe. And once the last timeline using a given page is
> >>>> freed, the last reference to that page will be dropped and so the page
> >>>> itself will also be freed. If a timeline is freed before the currently
> >>>> cached page is filled, then that timeline's slot will be released and
> >>>> re-used by the next timeline to be created.
> >>>>
> >>>> But what about the scenario of a long running system with a small but
> >>>> growing number of persistent tasks interspersed with many short lived
> >>>> tasks? In that case, you would end up with many sparsely populated pages
> >>>> that whose free slots will not get re-used. You could have a linked list
> >>>> of cached pages. When a page is filled, move it to a 'full' list. When a
> >>>> timeline is freed, if it's page was on the 'full' list, clear the slot
> >>>> and move it back to the 'available' list.
> >>> Yes. My thinking was a plain slab cache was a quick-and-dirty
> >>> improvement over a page-per-timeline. And a freelist would be the next
> >>> step.
> >>>
> >>>> Or is the idea that a worst case of a single page vma allocation per
> >>>> timeline is the least of our worries if there is an ever growing number
> >>>> of timelines/contexts/users in the system?
> >>> Nah, it was just an attempt to quickly reduce the number of allocations,
> >>> where the worst case of one page+vma per timeline was the starting
> >>> point.
> >>>
> >>> We should break this patch down into 1) one-page-per-timeline, 2) slab
> >>> cache, 3) free list 4) profit.
> >>>
> >>> At other times we have been wanting to be able to suballocate pages,
> >>> something to keep in mind would be extending this to arbitrary cacheline
> >>> allocations.
> >> The multi-stage approach sounds good. Keep things simple in this patch
> >> and then improve the situation later. One thing to be careful of with a
> >> cacheline allocator would be make sure whatever is being converted
> >> wasn't using full pages for security reasons. I.e. a page can be private
> >> to a process, a cacheline will be shared by many. I guess that would
> >> only really apply to allocations being passed to user land as the kernel
> >> is considered secure? Or can a user batch buffer write to arbitrary
> >> locations within the ppHWSP and thereby splat someone else's seqno?
> > ppHWSP, yes. But for internal allocations, only accessible via the ring
> > + GGTT, should be no problem. I agree that we definitely don't want to
> > expose subpage sharing across the userspace boundary (all isolation
> > controls are only on pages and above).
> >
> > If userspace wants suballocations, it can (and does) do them for itself
> > and should regulate its own sharing.
> 
> I'm a little confused. Are you saying that a rogue batch buffer could 
> splat some other context's ppHWSP seqno or that it can't? It would be 
> bad if one dodgy user could cause hangchecks in another user's batch by 
> splatting their seqnos.

It can't access another context's ppHWSP, only the kernel doing it bit
in the ring between batches. So not without a kernel bug, as with a
great many things.

> >>>>> +     if (global_hwsp) {
> >>>>> +             timeline->hwsp_ggtt = i915_vma_get(global_hwsp);
> >>>>> +             timeline->hwsp_offset = I915_GEM_HWS_SEQNO_ADDR;
> >>>>> +     } else {
> >>>>> +             err = alloc_hwsp(timeline);
> >>>>> +             if (err)
> >>>>> +                     return err;
> >>>>> +     }
> >>>>> +
> >>>>> +     vaddr = i915_gem_object_pin_map(timeline->hwsp_ggtt->obj, I915_MAP_WB);
> >>>>> +     if (IS_ERR(vaddr)) { /* leak the cacheline, but will clean up later */
> >>>> Can you explain this comment more? Where/when is the later?
> >>> On failure here, the cacheline is still marked as allocated in the slab,
> >>> but the reference to the page is released. So the backing page will be
> >>> released when everyone else finally drops their reference.
> >>>
> >>> Just laziness, since we have the ability to return the cacheline later
> >>> on...
> >> Meaning the actual leak is the bit in 'i915->gt.timeline_free' that says
> >> this cacheline can or can't be used for the next allocation? Presumably
> >> you could do the bit map munging in the case that 'global_hwsp' is null,
> >> but the code would certainly be messier for not a lot of gain.
> > Having been pointed out that I was being lazy, a bit of refactoring
> > later showed how lazy I was.
> Does that mean you are going to re-work this patch or follow it up with 
> a subsequent one?

I've new patches, just CI has been a little dead. First it didn't like
series with more than 10 patches, then it didn't like utf8 patches, and
now it is not responding...

https://patchwork.freedesktop.org/patch/277702/
https://patchwork.freedesktop.org/patch/277691/
https://patchwork.freedesktop.org/patch/277692/
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 111+ messages in thread

* Re: [PATCH 32/46] drm/i915: Pull VM lists under the VM mutex.
  2019-01-16 17:01     ` Chris Wilson
@ 2019-01-17 16:23       ` Tvrtko Ursulin
  2019-01-17 23:20         ` Chris Wilson
  0 siblings, 1 reply; 111+ messages in thread
From: Tvrtko Ursulin @ 2019-01-17 16:23 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 16/01/2019 17:01, Chris Wilson wrote:
> Quoting Tvrtko Ursulin (2019-01-16 16:47:43)
>>
>> On 07/01/2019 11:54, Chris Wilson wrote:
>>> @@ -1530,20 +1531,21 @@ i915_gem_pwrite_ioctl(struct drm_device *dev, void *data,
>>>    
>>>    static void i915_gem_object_bump_inactive_ggtt(struct drm_i915_gem_object *obj)
>>>    {
>>> -     struct drm_i915_private *i915;
>>> +     struct drm_i915_private *i915 = to_i915(obj->base.dev);
>>>        struct list_head *list;
>>>        struct i915_vma *vma;
>>>    
>>>        GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj));
>>>    
>>> +     mutex_lock(&i915->ggtt.vm.mutex);
>>>        for_each_ggtt_vma(vma, obj) {
>>>                if (!drm_mm_node_allocated(&vma->node))
>>>                        continue;
>>>    
>>>                list_move_tail(&vma->vm_link, &vma->vm->bound_list);
>>>        }
>>> +     mutex_unlock(&i915->ggtt.vm.mutex);
>>
>> This is now struct_mutex -> vm->mutex nesting, which we would preferably
>> want to avoid? There only two callers for the function.
>>
>> It looks we could remove nesting from i915_gem_set_domain_ioctl by just
>> moving the call to after mutex unlock.
>>
>> i915_gem_object_unpin_from_display_plane callers are not as easy so
>> maybe at least do the one above?
> 
> unpin_from_display_plane is the goal here tbh.
> 
>>> -     i915 = to_i915(obj->base.dev);
>>>        spin_lock(&i915->mm.obj_lock);
>>>        list = obj->bind_count ? &i915->mm.bound_list : &i915->mm.unbound_list;
>>>        list_move_tail(&obj->mm.link, list);
>>> diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c b/drivers/gpu/drm/i915/i915_gem_evict.c
>>> index a76f65fe86be..4a0c6830659d 100644
>>> --- a/drivers/gpu/drm/i915/i915_gem_evict.c
>>> +++ b/drivers/gpu/drm/i915/i915_gem_evict.c
>>> @@ -433,6 +433,7 @@ int i915_gem_evict_vm(struct i915_address_space *vm)
>>>        }
>>>    
>>>        INIT_LIST_HEAD(&eviction_list);
>>> +     mutex_lock(&vm->mutex);
>>>        list_for_each_entry(vma, &vm->bound_list, vm_link) {
>>>                if (i915_vma_is_pinned(vma))
>>>                        continue;
>>> @@ -440,6 +441,7 @@ int i915_gem_evict_vm(struct i915_address_space *vm)
>>>                __i915_vma_pin(vma);
>>>                list_add(&vma->evict_link, &eviction_list);
>>>        }
>>> +     mutex_unlock(&vm->mutex);
>>
>> This is another nesting so I suppose you leave all this fun for later?
> 
> Yes, the intent was to put the locks in place (gradually) then peel back
> struct_mutex (gradually).
> 
>>> @@ -689,8 +694,10 @@ i915_vma_remove(struct i915_vma *vma)
>>>    
>>>        vma->ops->clear_pages(vma);
>>>    
>>> +     mutex_lock(&vma->vm->mutex);
>>>        drm_mm_remove_node(&vma->node);
>>
>> This is by design also protected by the vm->mutex? But insertion is not
>> AFAICT.
> 
> Not yet. Can you guess which bit proved tricky? ;) Getting the right
> point to lock for execbuf, and eviction. At the same time over there is
> the fuss with ww_mutex, as well as contexts et al, and it all gets
> confusing quickly.
> 
> ...(tries to remember why this patch is actually here; this set was
> picked so that I could do obj->vma_list without struct_mutex (which
> was used for timeline allocation) and I pulled in anything required to
> resolve conflicts, but why this one)...

Have you figured it out in the meantime?

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 111+ messages in thread

* Re: [PATCH 32/46] drm/i915: Pull VM lists under the VM mutex.
  2019-01-17 16:23       ` Tvrtko Ursulin
@ 2019-01-17 23:20         ` Chris Wilson
  0 siblings, 0 replies; 111+ messages in thread
From: Chris Wilson @ 2019-01-17 23:20 UTC (permalink / raw)
  To: Tvrtko Ursulin, intel-gfx

Quoting Tvrtko Ursulin (2019-01-17 16:23:48)
> 
> On 16/01/2019 17:01, Chris Wilson wrote:
> > Quoting Tvrtko Ursulin (2019-01-16 16:47:43)
> >>
> >> On 07/01/2019 11:54, Chris Wilson wrote:
> >>> @@ -1530,20 +1531,21 @@ i915_gem_pwrite_ioctl(struct drm_device *dev, void *data,
> >>>    
> >>>    static void i915_gem_object_bump_inactive_ggtt(struct drm_i915_gem_object *obj)
> >>>    {
> >>> -     struct drm_i915_private *i915;
> >>> +     struct drm_i915_private *i915 = to_i915(obj->base.dev);
> >>>        struct list_head *list;
> >>>        struct i915_vma *vma;
> >>>    
> >>>        GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj));
> >>>    
> >>> +     mutex_lock(&i915->ggtt.vm.mutex);
> >>>        for_each_ggtt_vma(vma, obj) {
> >>>                if (!drm_mm_node_allocated(&vma->node))
> >>>                        continue;
> >>>    
> >>>                list_move_tail(&vma->vm_link, &vma->vm->bound_list);
> >>>        }
> >>> +     mutex_unlock(&i915->ggtt.vm.mutex);
> >>
> >> This is now struct_mutex -> vm->mutex nesting, which we would preferably
> >> want to avoid? There only two callers for the function.
> >>
> >> It looks we could remove nesting from i915_gem_set_domain_ioctl by just
> >> moving the call to after mutex unlock.
> >>
> >> i915_gem_object_unpin_from_display_plane callers are not as easy so
> >> maybe at least do the one above?
> > 
> > unpin_from_display_plane is the goal here tbh.
> > 
> >>> -     i915 = to_i915(obj->base.dev);
> >>>        spin_lock(&i915->mm.obj_lock);
> >>>        list = obj->bind_count ? &i915->mm.bound_list : &i915->mm.unbound_list;
> >>>        list_move_tail(&obj->mm.link, list);
> >>> diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c b/drivers/gpu/drm/i915/i915_gem_evict.c
> >>> index a76f65fe86be..4a0c6830659d 100644
> >>> --- a/drivers/gpu/drm/i915/i915_gem_evict.c
> >>> +++ b/drivers/gpu/drm/i915/i915_gem_evict.c
> >>> @@ -433,6 +433,7 @@ int i915_gem_evict_vm(struct i915_address_space *vm)
> >>>        }
> >>>    
> >>>        INIT_LIST_HEAD(&eviction_list);
> >>> +     mutex_lock(&vm->mutex);
> >>>        list_for_each_entry(vma, &vm->bound_list, vm_link) {
> >>>                if (i915_vma_is_pinned(vma))
> >>>                        continue;
> >>> @@ -440,6 +441,7 @@ int i915_gem_evict_vm(struct i915_address_space *vm)
> >>>                __i915_vma_pin(vma);
> >>>                list_add(&vma->evict_link, &eviction_list);
> >>>        }
> >>> +     mutex_unlock(&vm->mutex);
> >>
> >> This is another nesting so I suppose you leave all this fun for later?

Yes, I remember some more of the fun that made me defer this task -- and
that was the random waits we could hit, requiring the GPU reset dilemma
be resolved (i.e. reworking reset to avoid taking any these resets,
which also prevents us from hitting these from the shrinker).

> > Yes, the intent was to put the locks in place (gradually) then peel back
> > struct_mutex (gradually).
> > 
> >>> @@ -689,8 +694,10 @@ i915_vma_remove(struct i915_vma *vma)
> >>>    
> >>>        vma->ops->clear_pages(vma);
> >>>    
> >>> +     mutex_lock(&vma->vm->mutex);
> >>>        drm_mm_remove_node(&vma->node);
> >>
> >> This is by design also protected by the vm->mutex? But insertion is not
> >> AFAICT.
> > 
> > Not yet. Can you guess which bit proved tricky? ;) Getting the right
> > point to lock for execbuf, and eviction. At the same time over there is
> > the fuss with ww_mutex, as well as contexts et al, and it all gets
> > confusing quickly.
> > 
> > ...(tries to remember why this patch is actually here; this set was
> > picked so that I could do obj->vma_list without struct_mutex (which
> > was used for timeline allocation) and I pulled in anything required to
> > resolve conflicts, but why this one)...
> 
> Have you figured it out in the meantime?

The patch does at it says and protects the vma->vm_link/vm->*_list. You
start to look at trying to decide if i915_vma_pin() does atomic magic or
if it should require the caller lock(vm->mutex), and I quickly descend
into wanting to do the domain+fence management of
ww_mutex_lock(obj->resv.lock) instead.

Bah, make the caller take vm->mutex and then we can see if that is
better than atomic magic later.
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 111+ messages in thread

end of thread, other threads:[~2019-01-17 23:20 UTC | newest]

Thread overview: 111+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-01-07 11:54 [PATCH 01/46] drm/i915: Return immediately if trylock fails for direct-reclaim Chris Wilson
2019-01-07 11:54 ` [PATCH 02/46] drm/i915: Report the number of closed vma held by each context in debugfs Chris Wilson
2019-01-07 12:35   ` Mika Kuoppala
2019-01-07 11:54 ` [PATCH 03/46] drm/i915: Track all held rpm wakerefs Chris Wilson
2019-01-07 13:14   ` Mika Kuoppala
2019-01-07 13:22     ` Chris Wilson
2019-01-08 11:45   ` [PATCH v2] " Chris Wilson
2019-01-08 12:22   ` [PATCH v3] " Chris Wilson
2019-01-08 12:49     ` Mika Kuoppala
2019-01-08 20:05     ` kbuild test robot
2019-01-07 11:54 ` [PATCH 04/46] drm/i915: Markup paired operations on wakerefs Chris Wilson
2019-01-08 16:23   ` Mika Kuoppala
2019-01-08 16:41     ` Chris Wilson
2019-01-09  9:23       ` Mika Kuoppala
2019-01-09 11:51         ` Chris Wilson
2019-01-09 23:33           ` John Harrison
2019-01-07 11:54 ` [PATCH 05/46] drm/i915: Track GT wakeref Chris Wilson
2019-01-09  9:52   ` Mika Kuoppala
2019-01-07 11:54 ` [PATCH 06/46] drm/i915: Track the rpm wakerefs for error handling Chris Wilson
2019-01-09 10:12   ` Mika Kuoppala
2019-01-07 11:54 ` [PATCH 07/46] drm/i915: Mark up sysfs with rpm wakeref tracking Chris Wilson
2019-01-09 10:13   ` Mika Kuoppala
2019-01-07 11:54 ` [PATCH 08/46] drm/i915: Mark up debugfs " Chris Wilson
2019-01-09 10:20   ` Mika Kuoppala
2019-01-09 11:49     ` Chris Wilson
2019-01-07 11:54 ` [PATCH 09/46] drm/i915/perf: Track the rpm wakeref Chris Wilson
2019-01-09 10:30   ` Mika Kuoppala
2019-01-09 11:45     ` Chris Wilson
2019-01-07 11:54 ` [PATCH 10/46] drm/i915/pmu: Track " Chris Wilson
2019-01-09 10:37   ` Mika Kuoppala
2019-01-07 11:54 ` [PATCH 11/46] drm/i915/guc: Track the " Chris Wilson
2019-01-09 10:53   ` Mika Kuoppala
2019-01-07 11:54 ` [PATCH 12/46] drm/i915/gem: Track the rpm wakerefs Chris Wilson
2019-01-09 11:16   ` Mika Kuoppala
2019-01-09 23:45     ` John Harrison
2019-01-07 11:54 ` [PATCH 13/46] drm/i915/fb: Track " Chris Wilson
2019-01-09 11:39   ` Mika Kuoppala
2019-01-07 11:54 ` [PATCH 14/46] drm/i915/hotplug: Track temporary rpm wakeref Chris Wilson
2019-01-09 11:40   ` Mika Kuoppala
2019-01-07 11:54 ` [PATCH 15/46] drm/i915/panel: " Chris Wilson
2019-01-09 11:41   ` Mika Kuoppala
2019-01-07 11:54 ` [PATCH 16/46] drm/i915/selftests: Mark up rpm wakerefs Chris Wilson
2019-01-09 12:54   ` Mika Kuoppala
2019-01-07 11:54 ` [PATCH 17/46] drm/i915: Syntatic sugar for using intel_runtime_pm Chris Wilson
2019-01-09 14:30   ` Mika Kuoppala
2019-01-10  0:24   ` John Harrison
2019-01-10  1:10     ` John Harrison
2019-01-10  9:59       ` Chris Wilson
2019-01-07 11:54 ` [PATCH 18/46] drm/i915: Markup paired operations on display power domains Chris Wilson
2019-01-10  0:55   ` John Harrison
2019-01-10 10:00     ` Chris Wilson
2019-01-07 11:54 ` [PATCH 19/46] drm/i915: Track the wakeref used to initialise " Chris Wilson
2019-01-07 11:54 ` [PATCH 20/46] drm/i915: Combined gt.awake/gt.power wakerefs Chris Wilson
2019-01-07 11:54 ` [PATCH 21/46] drm/i915/dp: Markup pps lock power well Chris Wilson
2019-01-07 11:54 ` [PATCH 22/46] drm/i915: Complain if hsw_get_pipe_config acquires the same power well twice Chris Wilson
2019-01-07 11:54 ` [PATCH 23/46] drm/i915: Mark up Ironlake ips with rpm wakerefs Chris Wilson
2019-01-07 11:54 ` [PATCH 24/46] drm/i915: Serialise concurrent calls to i915_gem_set_wedged() Chris Wilson
2019-01-07 11:54 ` [PATCH 25/46] drm/i915: Differentiate between ggtt->mutex and ppgtt->mutex Chris Wilson
2019-01-08  9:00   ` Tvrtko Ursulin
2019-01-07 11:54 ` [PATCH 26/46] drm/i915: Pull all the reset functionality together into i915_reset.c Chris Wilson
2019-01-07 11:54 ` [PATCH 27/46] drm/i915: Make all GPU resets atomic Chris Wilson
2019-01-07 11:54 ` [PATCH 28/46] drm/i915/guc: Disable global reset Chris Wilson
2019-01-07 11:54 ` [PATCH 29/46] drm/i915: Remove GPU reset dependence on struct_mutex Chris Wilson
2019-01-07 11:54 ` [PATCH 30/46] drm/i915: Issue engine resets onto idle engines Chris Wilson
2019-01-07 11:54 ` [PATCH 31/46] drm/i915: Stop tracking MRU activity on VMA Chris Wilson
2019-01-16 16:27   ` Tvrtko Ursulin
2019-01-16 16:37     ` Chris Wilson
2019-01-07 11:54 ` [PATCH 32/46] drm/i915: Pull VM lists under the VM mutex Chris Wilson
2019-01-16 16:47   ` Tvrtko Ursulin
2019-01-16 17:01     ` Chris Wilson
2019-01-17 16:23       ` Tvrtko Ursulin
2019-01-17 23:20         ` Chris Wilson
2019-01-07 11:54 ` [PATCH 33/46] drm/i915: Move vma lookup to its own lock Chris Wilson
2019-01-07 11:54 ` [PATCH 34/46] drm/i915: Move intel_execlists_show_requests() aside Chris Wilson
2019-01-07 11:54 ` [PATCH 35/46] drm/i915: Use b->irq_enable() as predicate for mock engine Chris Wilson
2019-01-07 11:54 ` [PATCH 36/46] drm/i915/selftests: Allocate mock ring/timeline per context Chris Wilson
2019-01-07 11:55 ` [PATCH 37/46] drm/i915/selftests: Make evict tolerant of foreign objects Chris Wilson
2019-01-07 11:55 ` [PATCH 38/46] drm/i915: Remove the intel_engine_notify tracepoint Chris Wilson
2019-01-07 11:55 ` [PATCH 39/46] drm/i915: Always allocate an object/vma for the HWSP Chris Wilson
2019-01-10 10:52   ` Matthew Auld
2019-01-10 11:07     ` Chris Wilson
2019-01-10 11:24   ` Matthew Auld
2019-01-07 11:55 ` [PATCH 40/46] drm/i915: Move list of timelines under its own lock Chris Wilson
2019-01-07 11:55 ` [PATCH 41/46] drm/i915: Introduce concept of per-timeline (context) HWSP Chris Wilson
2019-01-15  0:55   ` John Harrison
2019-01-15  9:14     ` Chris Wilson
2019-01-15 15:40       ` Chris Wilson
2019-01-15 17:56         ` John Harrison
2019-01-07 11:55 ` [PATCH 42/46] drm/i915: Enlarge vma->pin_count Chris Wilson
2019-01-15 19:57   ` John Harrison
2019-01-15 20:17     ` Chris Wilson
2019-01-16  0:18       ` John Harrison
2019-01-07 11:55 ` [PATCH 43/46] drm/i915: Allocate a status page for each timeline Chris Wilson
2019-01-15  0:56   ` John Harrison
2019-01-15  9:50     ` Chris Wilson
2019-01-15 18:17       ` John Harrison
2019-01-15 18:43         ` Chris Wilson
2019-01-16 21:06           ` John Harrison
2019-01-16 21:15             ` Chris Wilson
2019-01-07 11:55 ` [PATCH 44/46] drm/i915: Track the context's seqno in its own timeline HWSP Chris Wilson
2019-01-07 11:55 ` [PATCH 45/46] drm/i915: Identify active requests Chris Wilson
2019-01-07 11:55 ` [PATCH 46/46] drm/i915: Replace global breadcrumbs with per-context interrupt tracking Chris Wilson
2019-01-07 12:45 ` ✗ Fi.CI.CHECKPATCH: warning for series starting with [01/46] drm/i915: Return immediately if trylock fails for direct-reclaim Patchwork
2019-01-07 13:02 ` ✗ Fi.CI.SPARSE: " Patchwork
2019-01-07 13:05 ` ✓ Fi.CI.BAT: success " Patchwork
2019-01-07 14:00 ` [PATCH 01/46] " Tvrtko Ursulin
2019-01-07 14:07   ` Chris Wilson
2019-01-08  8:58     ` Tvrtko Ursulin
2019-01-07 17:10 ` ✗ Fi.CI.IGT: failure for series starting with [01/46] " Patchwork
2019-01-07 17:19   ` Chris Wilson
2019-01-08 13:50 ` ✗ Fi.CI.BAT: failure for series starting with [01/46] drm/i915: Return immediately if trylock fails for direct-reclaim (rev3) Patchwork

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.