[PATCH 01/40] drm: Use default dma_fence hooks where possible for null syncobj

All of lore.kernel.org
 help / color / mirror / Atom feed

* [PATCH 01/40] drm: Use default dma_fence hooks where possible for null syncobj
@ 2018-09-19 19:55 Chris Wilson
  2018-09-19 19:55 ` [PATCH 02/40] drm: Fix syncobj handing of schedule() returning 0 Chris Wilson
                   ` (42 more replies)
  0 siblings, 43 replies; 106+ messages in thread
From: Chris Wilson @ 2018-09-19 19:55 UTC (permalink / raw)
  To: intel-gfx

Both the .enable_signaling and .release of the null syncobj fence
can be replaced by the default callbacks for a small reduction in code
size.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/drm_syncobj.c | 11 -----------
 1 file changed, 11 deletions(-)

diff --git a/drivers/gpu/drm/drm_syncobj.c b/drivers/gpu/drm/drm_syncobj.c
index 497729202bfe..e254f97fed7d 100644
--- a/drivers/gpu/drm/drm_syncobj.c
+++ b/drivers/gpu/drm/drm_syncobj.c
@@ -66,20 +66,9 @@ static const char *drm_syncobj_stub_fence_get_name(struct dma_fence *fence)
         return "syncobjstub";
 }
 
-static bool drm_syncobj_stub_fence_enable_signaling(struct dma_fence *fence)
-{
-    return !dma_fence_is_signaled(fence);
-}
-
-static void drm_syncobj_stub_fence_release(struct dma_fence *f)
-{
-	kfree(f);
-}
 static const struct dma_fence_ops drm_syncobj_stub_fence_ops = {
 	.get_driver_name = drm_syncobj_stub_fence_get_name,
 	.get_timeline_name = drm_syncobj_stub_fence_get_name,
-	.enable_signaling = drm_syncobj_stub_fence_enable_signaling,
-	.release = drm_syncobj_stub_fence_release,
 };
 
 
-- 
2.19.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 106+ messages in thread

* [PATCH 02/40] drm: Fix syncobj handing of schedule() returning 0
  2018-09-19 19:55 [PATCH 01/40] drm: Use default dma_fence hooks where possible for null syncobj Chris Wilson
@ 2018-09-19 19:55 ` Chris Wilson
  2018-09-20 14:13   ` Tvrtko Ursulin
  2018-09-19 19:55 ` [PATCH 03/40] drm/i915/selftests: Live tests emit requests and so require rpm Chris Wilson
                   ` (41 subsequent siblings)
  42 siblings, 1 reply; 106+ messages in thread
From: Chris Wilson @ 2018-09-19 19:55 UTC (permalink / raw)
  To: intel-gfx

After schedule() returns 0, we must do one last check of COND to
determine the reason for the wakeup with 0 jiffies remaining before
reporting the timeout -- otherwise we may lose the signal due to
scheduler delays.

References: https://bugs.freedesktop.org/show_bug.cgi?id=106690
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/drm_syncobj.c | 41 +++++++++++++----------------------
 1 file changed, 15 insertions(+), 26 deletions(-)

diff --git a/drivers/gpu/drm/drm_syncobj.c b/drivers/gpu/drm/drm_syncobj.c
index e254f97fed7d..5bcb3ef9b256 100644
--- a/drivers/gpu/drm/drm_syncobj.c
+++ b/drivers/gpu/drm/drm_syncobj.c
@@ -672,7 +672,6 @@ static signed long drm_syncobj_array_wait_timeout(struct drm_syncobj **syncobjs,
 {
 	struct syncobj_wait_entry *entries;
 	struct dma_fence *fence;
-	signed long ret;
 	uint32_t signaled_count, i;
 
 	entries = kcalloc(count, sizeof(*entries), GFP_KERNEL);
@@ -692,7 +691,7 @@ static signed long drm_syncobj_array_wait_timeout(struct drm_syncobj **syncobjs,
 			if (flags & DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT) {
 				continue;
 			} else {
-				ret = -EINVAL;
+				timeout = -EINVAL;
 				goto cleanup_entries;
 			}
 		}
@@ -704,12 +703,6 @@ static signed long drm_syncobj_array_wait_timeout(struct drm_syncobj **syncobjs,
 		}
 	}
 
-	/* Initialize ret to the max of timeout and 1.  That way, the
-	 * default return value indicates a successful wait and not a
-	 * timeout.
-	 */
-	ret = max_t(signed long, timeout, 1);
-
 	if (signaled_count == count ||
 	    (signaled_count > 0 &&
 	     !(flags & DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL)))
@@ -760,18 +753,17 @@ static signed long drm_syncobj_array_wait_timeout(struct drm_syncobj **syncobjs,
 			goto done_waiting;
 
 		if (timeout == 0) {
-			/* If we are doing a 0 timeout wait and we got
-			 * here, then we just timed out.
-			 */
-			ret = 0;
+			timeout = -ETIME;
 			goto done_waiting;
 		}
 
-		ret = schedule_timeout(ret);
+		if (signal_pending(current)) {
+			timeout = -ERESTARTSYS;
+			goto done_waiting;
+		}
 
-		if (ret > 0 && signal_pending(current))
-			ret = -ERESTARTSYS;
-	} while (ret > 0);
+		timeout = schedule_timeout(timeout);
+	} while (1);
 
 done_waiting:
 	__set_current_state(TASK_RUNNING);
@@ -788,7 +780,7 @@ static signed long drm_syncobj_array_wait_timeout(struct drm_syncobj **syncobjs,
 	}
 	kfree(entries);
 
-	return ret;
+	return timeout;
 }
 
 /**
@@ -829,19 +821,16 @@ static int drm_syncobj_array_wait(struct drm_device *dev,
 				  struct drm_syncobj **syncobjs)
 {
 	signed long timeout = drm_timeout_abs_to_jiffies(wait->timeout_nsec);
-	signed long ret = 0;
 	uint32_t first = ~0;
 
-	ret = drm_syncobj_array_wait_timeout(syncobjs,
-					     wait->count_handles,
-					     wait->flags,
-					     timeout, &first);
-	if (ret < 0)
-		return ret;
+	timeout = drm_syncobj_array_wait_timeout(syncobjs,
+						 wait->count_handles,
+						 wait->flags,
+						 timeout, &first);
+	if (timeout < 0)
+		return timeout;
 
 	wait->first_signaled = first;
-	if (ret == 0)
-		return -ETIME;
 	return 0;
 }
 
-- 
2.19.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 106+ messages in thread

* [PATCH 03/40] drm/i915/selftests: Live tests emit requests and so require rpm
  2018-09-19 19:55 [PATCH 01/40] drm: Use default dma_fence hooks where possible for null syncobj Chris Wilson
  2018-09-19 19:55 ` [PATCH 02/40] drm: Fix syncobj handing of schedule() returning 0 Chris Wilson
@ 2018-09-19 19:55 ` Chris Wilson
  2018-09-20 14:01   ` Tvrtko Ursulin
  2018-09-19 19:55 ` [PATCH 04/40] drm/i915: Park the GPU on module load Chris Wilson
                   ` (40 subsequent siblings)
  42 siblings, 1 reply; 106+ messages in thread
From: Chris Wilson @ 2018-09-19 19:55 UTC (permalink / raw)
  To: intel-gfx

As we emit requests or touch HW directly for some of the live tests, the
requirement is that we hold the rpm wakeref before doing so. We want a
mix of granularity since we will want to test runtime suspend, so try to
mark up only the critical sections where we need rpm for the live test.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/selftests/i915_gem_coherency.c | 2 ++
 drivers/gpu/drm/i915/selftests/i915_gem_context.c   | 4 ++++
 drivers/gpu/drm/i915/selftests/i915_request.c       | 8 ++++++++
 drivers/gpu/drm/i915/selftests/intel_guc.c          | 4 ++++
 drivers/gpu/drm/i915/selftests/intel_lrc.c          | 8 ++++++++
 drivers/gpu/drm/i915/selftests/intel_workarounds.c  | 5 +++++
 6 files changed, 31 insertions(+)

diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_coherency.c b/drivers/gpu/drm/i915/selftests/i915_gem_coherency.c
index 4e6a221063ac..f7392c1ffe75 100644
--- a/drivers/gpu/drm/i915/selftests/i915_gem_coherency.c
+++ b/drivers/gpu/drm/i915/selftests/i915_gem_coherency.c
@@ -298,6 +298,7 @@ static int igt_gem_coherency(void *arg)
 	values = offsets + ncachelines;
 
 	mutex_lock(&i915->drm.struct_mutex);
+	intel_runtime_pm_get(i915);
 	for (over = igt_coherency_mode; over->name; over++) {
 		if (!over->set)
 			continue;
@@ -375,6 +376,7 @@ static int igt_gem_coherency(void *arg)
 		}
 	}
 unlock:
+	intel_runtime_pm_put(i915);
 	mutex_unlock(&i915->drm.struct_mutex);
 	kfree(offsets);
 	return err;
diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/selftests/i915_gem_context.c
index 1c92560d35da..79f9bcaf0c05 100644
--- a/drivers/gpu/drm/i915/selftests/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/selftests/i915_gem_context.c
@@ -636,6 +636,8 @@ static int igt_switch_to_kernel_context(void *arg)
 	 */
 
 	mutex_lock(&i915->drm.struct_mutex);
+	intel_runtime_pm_get(i915);
+
 	ctx = kernel_context(i915);
 	if (IS_ERR(ctx)) {
 		mutex_unlock(&i915->drm.struct_mutex);
@@ -658,6 +660,8 @@ static int igt_switch_to_kernel_context(void *arg)
 	GEM_TRACE_DUMP_ON(err);
 	if (igt_flush_test(i915, I915_WAIT_LOCKED))
 		err = -EIO;
+
+	intel_runtime_pm_put(i915);
 	mutex_unlock(&i915->drm.struct_mutex);
 
 	kernel_context_close(ctx);
diff --git a/drivers/gpu/drm/i915/selftests/i915_request.c b/drivers/gpu/drm/i915/selftests/i915_request.c
index c4aac6141e04..07e557815308 100644
--- a/drivers/gpu/drm/i915/selftests/i915_request.c
+++ b/drivers/gpu/drm/i915/selftests/i915_request.c
@@ -342,6 +342,7 @@ static int live_nop_request(void *arg)
 	 */
 
 	mutex_lock(&i915->drm.struct_mutex);
+	intel_runtime_pm_get(i915);
 
 	for_each_engine(engine, i915, id) {
 		struct i915_request *request = NULL;
@@ -402,6 +403,7 @@ static int live_nop_request(void *arg)
 	}
 
 out_unlock:
+	intel_runtime_pm_put(i915);
 	mutex_unlock(&i915->drm.struct_mutex);
 	return err;
 }
@@ -487,6 +489,7 @@ static int live_empty_request(void *arg)
 	 */
 
 	mutex_lock(&i915->drm.struct_mutex);
+	intel_runtime_pm_get(i915);
 
 	batch = empty_batch(i915);
 	if (IS_ERR(batch)) {
@@ -550,6 +553,7 @@ static int live_empty_request(void *arg)
 	i915_vma_unpin(batch);
 	i915_vma_put(batch);
 out_unlock:
+	intel_runtime_pm_put(i915);
 	mutex_unlock(&i915->drm.struct_mutex);
 	return err;
 }
@@ -644,6 +648,7 @@ static int live_all_engines(void *arg)
 	 */
 
 	mutex_lock(&i915->drm.struct_mutex);
+	intel_runtime_pm_get(i915);
 
 	err = begin_live_test(&t, i915, __func__, "");
 	if (err)
@@ -726,6 +731,7 @@ static int live_all_engines(void *arg)
 	i915_vma_unpin(batch);
 	i915_vma_put(batch);
 out_unlock:
+	intel_runtime_pm_put(i915);
 	mutex_unlock(&i915->drm.struct_mutex);
 	return err;
 }
@@ -747,6 +753,7 @@ static int live_sequential_engines(void *arg)
 	 */
 
 	mutex_lock(&i915->drm.struct_mutex);
+	intel_runtime_pm_get(i915);
 
 	err = begin_live_test(&t, i915, __func__, "");
 	if (err)
@@ -853,6 +860,7 @@ static int live_sequential_engines(void *arg)
 		i915_request_put(request[id]);
 	}
 out_unlock:
+	intel_runtime_pm_put(i915);
 	mutex_unlock(&i915->drm.struct_mutex);
 	return err;
 }
diff --git a/drivers/gpu/drm/i915/selftests/intel_guc.c b/drivers/gpu/drm/i915/selftests/intel_guc.c
index 90ba88c972cf..0c0ab82b6228 100644
--- a/drivers/gpu/drm/i915/selftests/intel_guc.c
+++ b/drivers/gpu/drm/i915/selftests/intel_guc.c
@@ -142,6 +142,7 @@ static int igt_guc_clients(void *args)
 
 	GEM_BUG_ON(!HAS_GUC(dev_priv));
 	mutex_lock(&dev_priv->drm.struct_mutex);
+	intel_runtime_pm_get(dev_priv);
 
 	guc = &dev_priv->guc;
 	if (!guc) {
@@ -269,6 +270,7 @@ static int igt_guc_clients(void *args)
 	guc_clients_create(guc);
 	guc_clients_doorbell_init(guc);
 unlock:
+	intel_runtime_pm_put(dev_priv);
 	mutex_unlock(&dev_priv->drm.struct_mutex);
 	return err;
 }
@@ -287,6 +289,7 @@ static int igt_guc_doorbells(void *arg)
 
 	GEM_BUG_ON(!HAS_GUC(dev_priv));
 	mutex_lock(&dev_priv->drm.struct_mutex);
+	intel_runtime_pm_get(dev_priv);
 
 	guc = &dev_priv->guc;
 	if (!guc) {
@@ -379,6 +382,7 @@ static int igt_guc_doorbells(void *arg)
 			guc_client_free(clients[i]);
 		}
 unlock:
+	intel_runtime_pm_put(dev_priv);
 	mutex_unlock(&dev_priv->drm.struct_mutex);
 	return err;
 }
diff --git a/drivers/gpu/drm/i915/selftests/intel_lrc.c b/drivers/gpu/drm/i915/selftests/intel_lrc.c
index 582566faef09..1aea7a8f2224 100644
--- a/drivers/gpu/drm/i915/selftests/intel_lrc.c
+++ b/drivers/gpu/drm/i915/selftests/intel_lrc.c
@@ -221,6 +221,7 @@ static int live_sanitycheck(void *arg)
 		return 0;
 
 	mutex_lock(&i915->drm.struct_mutex);
+	intel_runtime_pm_get(i915);
 
 	if (spinner_init(&spin, i915))
 		goto err_unlock;
@@ -261,6 +262,7 @@ static int live_sanitycheck(void *arg)
 	spinner_fini(&spin);
 err_unlock:
 	igt_flush_test(i915, I915_WAIT_LOCKED);
+	intel_runtime_pm_put(i915);
 	mutex_unlock(&i915->drm.struct_mutex);
 	return err;
 }
@@ -278,6 +280,7 @@ static int live_preempt(void *arg)
 		return 0;
 
 	mutex_lock(&i915->drm.struct_mutex);
+	intel_runtime_pm_get(i915);
 
 	if (spinner_init(&spin_hi, i915))
 		goto err_unlock;
@@ -350,6 +353,7 @@ static int live_preempt(void *arg)
 	spinner_fini(&spin_hi);
 err_unlock:
 	igt_flush_test(i915, I915_WAIT_LOCKED);
+	intel_runtime_pm_put(i915);
 	mutex_unlock(&i915->drm.struct_mutex);
 	return err;
 }
@@ -368,6 +372,7 @@ static int live_late_preempt(void *arg)
 		return 0;
 
 	mutex_lock(&i915->drm.struct_mutex);
+	intel_runtime_pm_get(i915);
 
 	if (spinner_init(&spin_hi, i915))
 		goto err_unlock;
@@ -440,6 +445,7 @@ static int live_late_preempt(void *arg)
 	spinner_fini(&spin_hi);
 err_unlock:
 	igt_flush_test(i915, I915_WAIT_LOCKED);
+	intel_runtime_pm_put(i915);
 	mutex_unlock(&i915->drm.struct_mutex);
 	return err;
 
@@ -467,6 +473,7 @@ static int live_preempt_hang(void *arg)
 		return 0;
 
 	mutex_lock(&i915->drm.struct_mutex);
+	intel_runtime_pm_get(i915);
 
 	if (spinner_init(&spin_hi, i915))
 		goto err_unlock;
@@ -561,6 +568,7 @@ static int live_preempt_hang(void *arg)
 	spinner_fini(&spin_hi);
 err_unlock:
 	igt_flush_test(i915, I915_WAIT_LOCKED);
+	intel_runtime_pm_put(i915);
 	mutex_unlock(&i915->drm.struct_mutex);
 	return err;
 }
diff --git a/drivers/gpu/drm/i915/selftests/intel_workarounds.c b/drivers/gpu/drm/i915/selftests/intel_workarounds.c
index 0d39b3bf0c0d..d1a0923d2f38 100644
--- a/drivers/gpu/drm/i915/selftests/intel_workarounds.c
+++ b/drivers/gpu/drm/i915/selftests/intel_workarounds.c
@@ -44,7 +44,9 @@ read_nonprivs(struct i915_gem_context *ctx, struct intel_engine_cs *engine)
 	if (err)
 		goto err_obj;
 
+	intel_runtime_pm_get(engine->i915);
 	rq = i915_request_alloc(engine, ctx);
+	intel_runtime_pm_put(engine->i915);
 	if (IS_ERR(rq)) {
 		err = PTR_ERR(rq);
 		goto err_pin;
@@ -175,7 +177,10 @@ static int switch_to_scratch_context(struct intel_engine_cs *engine)
 	if (IS_ERR(ctx))
 		return PTR_ERR(ctx);
 
+	intel_runtime_pm_get(engine->i915);
 	rq = i915_request_alloc(engine, ctx);
+	intel_runtime_pm_put(engine->i915);
+
 	kernel_context_close(ctx);
 	if (IS_ERR(rq))
 		return PTR_ERR(rq);
-- 
2.19.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 106+ messages in thread

* [PATCH 04/40] drm/i915: Park the GPU on module load
  2018-09-19 19:55 [PATCH 01/40] drm: Use default dma_fence hooks where possible for null syncobj Chris Wilson
  2018-09-19 19:55 ` [PATCH 02/40] drm: Fix syncobj handing of schedule() returning 0 Chris Wilson
  2018-09-19 19:55 ` [PATCH 03/40] drm/i915/selftests: Live tests emit requests and so require rpm Chris Wilson
@ 2018-09-19 19:55 ` Chris Wilson
  2018-09-20 14:02   ` Tvrtko Ursulin
  2018-09-19 19:55   ` Chris Wilson
                   ` (39 subsequent siblings)
  42 siblings, 1 reply; 106+ messages in thread
From: Chris Wilson @ 2018-09-19 19:55 UTC (permalink / raw)
  To: intel-gfx

Once we have flushed the first request through the system to both load a
context and record the default state; tell the GPU to park and idle
itself, putting itself immediately (hopefully at least) into a
powersaving state, and allowing ourselves to start from known state
after setting up all our bookkeeping.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 drivers/gpu/drm/i915/i915_gem.c | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index a94d5a308c4d..6b347ffb996b 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -5417,6 +5417,14 @@ static int __intel_engines_record_defaults(struct drm_i915_private *i915)
 
 	assert_kernel_context_is_current(i915);
 
+	/*
+	 * Immediately park the GPU so that we enable powersaving and
+	 * treat it as idle. The next time we issue a request, we will
+	 * unpark and start using the engine->pinned_default_state, otherwise
+	 * it is in limbo and an early reset may fail.
+	 */
+	__i915_gem_park(i915);
+
 	for_each_engine(engine, i915, id) {
 		struct i915_vma *state;
 		void *vaddr;
-- 
2.19.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 106+ messages in thread

* [PATCH 05/40] drm/i915: Handle incomplete Z_FINISH for compressed error states
  2018-09-19 19:55 [PATCH 01/40] drm: Use default dma_fence hooks where possible for null syncobj Chris Wilson
@ 2018-09-19 19:55   ` Chris Wilson
  2018-09-19 19:55 ` [PATCH 03/40] drm/i915/selftests: Live tests emit requests and so require rpm Chris Wilson
                     ` (41 subsequent siblings)
  42 siblings, 0 replies; 106+ messages in thread
From: Chris Wilson @ 2018-09-19 19:55 UTC (permalink / raw)
  To: intel-gfx; +Cc: Chris Wilson, Joonas Lahtinen, stable

The final call to zlib_deflate(Z_FINISH) may require more output
space to be allocated and so needs to re-invoked. Failure to do so in
the current code leads to incomplete zlib streams (albeit intact due to
the use of Z_SYNC_FLUSH) resulting in the occasional short object
capture.

Testcase: igt/i915-error-capture.js
Fixes: 0a97015d45ee ("drm/i915: Compress GPU objects in error state")
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Cc: <stable@vger.kernel.org> # v4.10+
---
 drivers/gpu/drm/i915/i915_gpu_error.c | 60 +++++++++++++++++++++------
 1 file changed, 47 insertions(+), 13 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
index 2835cacd0d08..9789f4ff8c32 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -237,6 +237,7 @@ static int compress_page(struct compress *c,
 			 struct drm_i915_error_object *dst)
 {
 	struct z_stream_s *zstream = &c->zstream;
+	int flush = Z_NO_FLUSH;
 
 	zstream->next_in = src;
 	if (c->tmp && i915_memcpy_from_wc(c->tmp, src, PAGE_SIZE))
@@ -257,8 +258,11 @@ static int compress_page(struct compress *c,
 			zstream->avail_out = PAGE_SIZE;
 		}
 
-		if (zlib_deflate(zstream, Z_SYNC_FLUSH) != Z_OK)
+		if (zlib_deflate(zstream, flush) != Z_OK)
 			return -EIO;
+
+		if (zstream->avail_out)
+			flush = Z_SYNC_FLUSH;
 	} while (zstream->avail_in);
 
 	/* Fallback to uncompressed if we increase size? */
@@ -268,19 +272,43 @@ static int compress_page(struct compress *c,
 	return 0;
 }
 
-static void compress_fini(struct compress *c,
+static int compress_flush(struct compress *c,
 			  struct drm_i915_error_object *dst)
 {
 	struct z_stream_s *zstream = &c->zstream;
+	unsigned long page;
 
-	if (dst) {
-		zlib_deflate(zstream, Z_FINISH);
-		dst->unused = zstream->avail_out;
-	}
+	do {
+		switch (zlib_deflate(zstream, Z_FINISH)) {
+		case Z_OK: /* more space requested */
+			page = __get_free_page(GFP_ATOMIC | __GFP_NOWARN);
+			if (!page)
+				return -ENOMEM;
+
+			dst->pages[dst->page_count++] = (void *)page;
+			zstream->next_out = (void *)page;
+			zstream->avail_out = PAGE_SIZE;
+			break;
+		case Z_STREAM_END:
+			goto end;
+		default: /* any error */
+			return -EIO;
+		}
+	} while (1);
+
+end:
+	memset(zstream->next_out, 0, zstream->avail_out);
+	dst->unused = zstream->avail_out;
+	return 0;
+}
+
+static void compress_fini(struct compress *c,
+			  struct drm_i915_error_object *dst)
+{
+	struct z_stream_s *zstream = &c->zstream;
 
 	zlib_deflateEnd(zstream);
 	kfree(zstream->workspace);
-
 	if (c->tmp)
 		free_page((unsigned long)c->tmp);
 }
@@ -319,6 +347,12 @@ static int compress_page(struct compress *c,
 	return 0;
 }
 
+static int compress_flush(struct compress *c,
+			  struct drm_i915_error_object *dst)
+{
+	return 0;
+}
+
 static void compress_fini(struct compress *c,
 			  struct drm_i915_error_object *dst)
 {
@@ -951,15 +985,15 @@ i915_error_object_create(struct drm_i915_private *i915,
 		if (ret)
 			goto unwind;
 	}
-	goto out;
 
+	if (compress_flush(&compress, dst)) {
 unwind:
-	while (dst->page_count--)
-		free_page((unsigned long)dst->pages[dst->page_count]);
-	kfree(dst);
-	dst = NULL;
+		while (dst->page_count--)
+			free_page((unsigned long)dst->pages[dst->page_count]);
+		kfree(dst);
+		dst = NULL;
+	}
 
-out:
 	compress_fini(&compress, dst);
 	ggtt->vm.clear_range(&ggtt->vm, slot, PAGE_SIZE);
 	return dst;
-- 
2.19.0

^ permalink raw reply related	[flat|nested] 106+ messages in thread

* [PATCH 05/40] drm/i915: Handle incomplete Z_FINISH for compressed error states
@ 2018-09-19 19:55   ` Chris Wilson
  0 siblings, 0 replies; 106+ messages in thread
From: Chris Wilson @ 2018-09-19 19:55 UTC (permalink / raw)
  To: intel-gfx; +Cc: stable

The final call to zlib_deflate(Z_FINISH) may require more output
space to be allocated and so needs to re-invoked. Failure to do so in
the current code leads to incomplete zlib streams (albeit intact due to
the use of Z_SYNC_FLUSH) resulting in the occasional short object
capture.

Testcase: igt/i915-error-capture.js
Fixes: 0a97015d45ee ("drm/i915: Compress GPU objects in error state")
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Cc: <stable@vger.kernel.org> # v4.10+
---
 drivers/gpu/drm/i915/i915_gpu_error.c | 60 +++++++++++++++++++++------
 1 file changed, 47 insertions(+), 13 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
index 2835cacd0d08..9789f4ff8c32 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -237,6 +237,7 @@ static int compress_page(struct compress *c,
 			 struct drm_i915_error_object *dst)
 {
 	struct z_stream_s *zstream = &c->zstream;
+	int flush = Z_NO_FLUSH;
 
 	zstream->next_in = src;
 	if (c->tmp && i915_memcpy_from_wc(c->tmp, src, PAGE_SIZE))
@@ -257,8 +258,11 @@ static int compress_page(struct compress *c,
 			zstream->avail_out = PAGE_SIZE;
 		}
 
-		if (zlib_deflate(zstream, Z_SYNC_FLUSH) != Z_OK)
+		if (zlib_deflate(zstream, flush) != Z_OK)
 			return -EIO;
+
+		if (zstream->avail_out)
+			flush = Z_SYNC_FLUSH;
 	} while (zstream->avail_in);
 
 	/* Fallback to uncompressed if we increase size? */
@@ -268,19 +272,43 @@ static int compress_page(struct compress *c,
 	return 0;
 }
 
-static void compress_fini(struct compress *c,
+static int compress_flush(struct compress *c,
 			  struct drm_i915_error_object *dst)
 {
 	struct z_stream_s *zstream = &c->zstream;
+	unsigned long page;
 
-	if (dst) {
-		zlib_deflate(zstream, Z_FINISH);
-		dst->unused = zstream->avail_out;
-	}
+	do {
+		switch (zlib_deflate(zstream, Z_FINISH)) {
+		case Z_OK: /* more space requested */
+			page = __get_free_page(GFP_ATOMIC | __GFP_NOWARN);
+			if (!page)
+				return -ENOMEM;
+
+			dst->pages[dst->page_count++] = (void *)page;
+			zstream->next_out = (void *)page;
+			zstream->avail_out = PAGE_SIZE;
+			break;
+		case Z_STREAM_END:
+			goto end;
+		default: /* any error */
+			return -EIO;
+		}
+	} while (1);
+
+end:
+	memset(zstream->next_out, 0, zstream->avail_out);
+	dst->unused = zstream->avail_out;
+	return 0;
+}
+
+static void compress_fini(struct compress *c,
+			  struct drm_i915_error_object *dst)
+{
+	struct z_stream_s *zstream = &c->zstream;
 
 	zlib_deflateEnd(zstream);
 	kfree(zstream->workspace);
-
 	if (c->tmp)
 		free_page((unsigned long)c->tmp);
 }
@@ -319,6 +347,12 @@ static int compress_page(struct compress *c,
 	return 0;
 }
 
+static int compress_flush(struct compress *c,
+			  struct drm_i915_error_object *dst)
+{
+	return 0;
+}
+
 static void compress_fini(struct compress *c,
 			  struct drm_i915_error_object *dst)
 {
@@ -951,15 +985,15 @@ i915_error_object_create(struct drm_i915_private *i915,
 		if (ret)
 			goto unwind;
 	}
-	goto out;
 
+	if (compress_flush(&compress, dst)) {
 unwind:
-	while (dst->page_count--)
-		free_page((unsigned long)dst->pages[dst->page_count]);
-	kfree(dst);
-	dst = NULL;
+		while (dst->page_count--)
+			free_page((unsigned long)dst->pages[dst->page_count]);
+		kfree(dst);
+		dst = NULL;
+	}
 
-out:
 	compress_fini(&compress, dst);
 	ggtt->vm.clear_range(&ggtt->vm, slot, PAGE_SIZE);
 	return dst;
-- 
2.19.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 106+ messages in thread

* [PATCH 06/40] drm/i915: Clear the error PTE just once on finish
  2018-09-19 19:55 [PATCH 01/40] drm: Use default dma_fence hooks where possible for null syncobj Chris Wilson
                   ` (3 preceding siblings ...)
  2018-09-19 19:55   ` Chris Wilson
@ 2018-09-19 19:55 ` Chris Wilson
  2018-09-19 19:55 ` [PATCH 07/40] drm/i915: Cache the error string Chris Wilson
                   ` (37 subsequent siblings)
  42 siblings, 0 replies; 106+ messages in thread
From: Chris Wilson @ 2018-09-19 19:55 UTC (permalink / raw)
  To: intel-gfx

We do not need to continually clear our dedicated PTE for error capture
as it will be updated and invalidated to the next object. Only at the
end do we wish to be sure that the PTE doesn't point back to any buffer.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_gpu_error.c | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
index 9789f4ff8c32..08fdf4dc200f 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -995,7 +995,6 @@ i915_error_object_create(struct drm_i915_private *i915,
 	}
 
 	compress_fini(&compress, dst);
-	ggtt->vm.clear_range(&ggtt->vm, slot, PAGE_SIZE);
 	return dst;
 }
 
@@ -1781,6 +1780,14 @@ static unsigned long capture_find_epoch(const struct i915_gpu_state *error)
 	return epoch;
 }
 
+static void capture_finish(struct i915_gpu_state *error)
+{
+	struct i915_ggtt *ggtt = &error->i915->ggtt;
+	const u64 slot = ggtt->error_capture.start;
+
+	ggtt->vm.clear_range(&ggtt->vm, slot, PAGE_SIZE);
+}
+
 static int capture(void *data)
 {
 	struct i915_gpu_state *error = data;
@@ -1805,6 +1812,7 @@ static int capture(void *data)
 
 	error->epoch = capture_find_epoch(error);
 
+	capture_finish(error);
 	return 0;
 }
 
-- 
2.19.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 106+ messages in thread

* [PATCH 07/40] drm/i915: Cache the error string
  2018-09-19 19:55 [PATCH 01/40] drm: Use default dma_fence hooks where possible for null syncobj Chris Wilson
                   ` (4 preceding siblings ...)
  2018-09-19 19:55 ` [PATCH 06/40] drm/i915: Clear the error PTE just once on finish Chris Wilson
@ 2018-09-19 19:55 ` Chris Wilson
  2018-09-19 19:55 ` [PATCH 08/40] drm/i915/execlists: Avoid kicking priority on the current context Chris Wilson
                   ` (36 subsequent siblings)
  42 siblings, 0 replies; 106+ messages in thread
From: Chris Wilson @ 2018-09-19 19:55 UTC (permalink / raw)
  To: intel-gfx

Currently, we convert the error state into a string every time we read
from sysfs (and sysfs reads in page size (4KiB) chunks). We do try to
window the string and only capture the portion that is being read, but
that means that we must always convert up to the window to find the
start. For a very large error state bordering on EXEC_OBJECT_CAPTURE
abuse, this is noticeable as it degrades to O(N^2)!

As we do not have a convenient hook for sysfs open(), and we would like
to keep the lazy conversion into a string, do the conversion of the
whole string on the first read and keep the string until the error state
is freed.

v2: Don't double advance simple_read_from_buffer
v3: Due to extreme pain of lack of vrealloc, use a scatterlist
v4: Keep the forward iterator loosely cached

Reported-by: Jason Ekstrand <jason@jlekstrand.net>
Testcase: igt/gem_exec_capture/many*
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Cc: Jason Ekstrand <jason@jlekstrand.net>
---
 drivers/gpu/drm/i915/i915_debugfs.c   |  32 +-
 drivers/gpu/drm/i915/i915_gpu_error.c | 402 +++++++++++++++-----------
 drivers/gpu/drm/i915/i915_gpu_error.h |  28 +-
 drivers/gpu/drm/i915/i915_sysfs.c     |  27 +-
 4 files changed, 273 insertions(+), 216 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index b4744a68cd88..2ac75bc10afa 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -943,30 +943,28 @@ static int i915_gem_fence_regs_info(struct seq_file *m, void *data)
 static ssize_t gpu_state_read(struct file *file, char __user *ubuf,
 			      size_t count, loff_t *pos)
 {
-	struct i915_gpu_state *error = file->private_data;
-	struct drm_i915_error_state_buf str;
+	struct i915_gpu_state *error;
 	ssize_t ret;
-	loff_t tmp;
+	void *buf;
 
+	error = file->private_data;
 	if (!error)
 		return 0;
 
-	ret = i915_error_state_buf_init(&str, error->i915, count, *pos);
-	if (ret)
-		return ret;
-
-	ret = i915_error_state_to_str(&str, error);
-	if (ret)
-		goto out;
+	/* Bounce buffer required because of kernfs __user API convenience. */
+	buf = kmalloc(count, GFP_KERNEL);
+	if (!buf)
+		return -ENOMEM;
 
-	tmp = 0;
-	ret = simple_read_from_buffer(ubuf, count, &tmp, str.buf, str.bytes);
-	if (ret < 0)
-		goto out;
+	ret = i915_gpu_state_copy_to_buffer(error, buf, *pos, count);
+	if (ret > 0) {
+		if (!copy_to_user(ubuf, buf, ret))
+			*pos += ret;
+		else
+			ret = -EFAULT;
+	}
 
-	*pos = str.start + ret;
-out:
-	i915_error_state_buf_release(&str);
+	kfree(buf);
 	return ret;
 }
 
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
index 08fdf4dc200f..4d0090328c12 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -28,6 +28,8 @@
  */
 
 #include <generated/utsrelease.h>
+#include <linux/nmi.h>
+#include <linux/scatterlist.h>
 #include <linux/stop_machine.h>
 #include <linux/zlib.h>
 #include <drm/drm_print.h>
@@ -77,112 +79,110 @@ static const char *purgeable_flag(int purgeable)
 	return purgeable ? " purgeable" : "";
 }
 
-static bool __i915_error_ok(struct drm_i915_error_state_buf *e)
+static void __sg_set_buf(struct scatterlist *sg,
+			 void *addr, unsigned int len, loff_t it)
 {
-
-	if (!e->err && WARN(e->bytes > (e->size - 1), "overflow")) {
-		e->err = -ENOSPC;
-		return false;
-	}
-
-	if (e->bytes == e->size - 1 || e->err)
-		return false;
-
-	return true;
+	sg->page_link = (unsigned long)virt_to_page(addr);
+	sg->offset = offset_in_page(addr);
+	sg->length = len;
+	sg->dma_address = it;
 }
 
-static bool __i915_error_seek(struct drm_i915_error_state_buf *e,
-			      unsigned len)
+static bool __i915_error_grow(struct drm_i915_error_state_buf *e, size_t len)
 {
-	if (e->pos + len <= e->start) {
-		e->pos += len;
+	if (!len)
 		return false;
-	}
 
-	/* First vsnprintf needs to fit in its entirety for memmove */
-	if (len >= e->size) {
-		e->err = -EIO;
-		return false;
-	}
+	if (e->bytes + len + 1 > e->size) {
+		if (e->bytes) {
+			__sg_set_buf(e->cur++, e->buf, e->bytes, e->iter);
+			e->iter += e->bytes;
+			e->buf = NULL;
+			e->bytes = 0;
+		}
 
-	return true;
-}
+		if (e->cur == e->end) {
+			struct scatterlist *sgl;
 
-static void __i915_error_advance(struct drm_i915_error_state_buf *e,
-				 unsigned len)
-{
-	/* If this is first printf in this window, adjust it so that
-	 * start position matches start of the buffer
-	 */
+			sgl = (typeof(sgl))__get_free_page(GFP_KERNEL);
+			if (!sgl) {
+				e->err = -ENOMEM;
+				return false;
+			}
 
-	if (e->pos < e->start) {
-		const size_t off = e->start - e->pos;
+			if (e->cur) {
+				e->cur->offset = 0;
+				e->cur->length = 0;
+				e->cur->page_link =
+					(unsigned long)sgl | SG_CHAIN;
+			} else {
+				e->sgl = sgl;
+			}
 
-		/* Should not happen but be paranoid */
-		if (off > len || e->bytes) {
-			e->err = -EIO;
-			return;
+			e->cur = sgl;
+			e->end = sgl + SG_MAX_SINGLE_ALLOC - 1;
 		}
 
-		memmove(e->buf, e->buf + off, len - off);
-		e->bytes = len - off;
-		e->pos = e->start;
-		return;
+		e->size = ALIGN(len + 1, SZ_64K);
+		e->buf = kmalloc(e->size,
+				 GFP_KERNEL | __GFP_NOWARN | __GFP_NORETRY);
+		if (!e->buf) {
+			e->size = PAGE_ALIGN(len + 1);
+			e->buf = kmalloc(e->size, GFP_KERNEL);
+		}
+		if (!e->buf) {
+			e->err = -ENOMEM;
+			return false;
+		}
 	}
 
-	e->bytes += len;
-	e->pos += len;
+	return true;
 }
 
 __printf(2, 0)
 static void i915_error_vprintf(struct drm_i915_error_state_buf *e,
-			       const char *f, va_list args)
+			       const char *fmt, va_list args)
 {
-	unsigned len;
+	va_list ap;
+	int len;
 
-	if (!__i915_error_ok(e))
+	if (e->err)
 		return;
 
-	/* Seek the first printf which is hits start position */
-	if (e->pos < e->start) {
-		va_list tmp;
-
-		va_copy(tmp, args);
-		len = vsnprintf(NULL, 0, f, tmp);
-		va_end(tmp);
-
-		if (!__i915_error_seek(e, len))
-			return;
+	va_copy(ap, args);
+	len = vsnprintf(NULL, 0, fmt, ap);
+	va_end(ap);
+	if (len <= 0) {
+		e->err = len;
+		return;
 	}
 
-	len = vsnprintf(e->buf + e->bytes, e->size - e->bytes, f, args);
-	if (len >= e->size - e->bytes)
-		len = e->size - e->bytes - 1;
+	if (!__i915_error_grow(e, len))
+		return;
 
-	__i915_error_advance(e, len);
+	GEM_BUG_ON(e->bytes >= e->size);
+	len = vscnprintf(e->buf + e->bytes, e->size - e->bytes, fmt, args);
+	if (len < 0) {
+		e->err = len;
+		return;
+	}
+	e->bytes += len;
 }
 
-static void i915_error_puts(struct drm_i915_error_state_buf *e,
-			    const char *str)
+static void i915_error_puts(struct drm_i915_error_state_buf *e, const char *str)
 {
 	unsigned len;
 
-	if (!__i915_error_ok(e))
+	if (e->err || !str)
 		return;
 
 	len = strlen(str);
+	if (!__i915_error_grow(e, len))
+		return;
 
-	/* Seek the first printf which is hits start position */
-	if (e->pos < e->start) {
-		if (!__i915_error_seek(e, len))
-			return;
-	}
-
-	if (len >= e->size - e->bytes)
-		len = e->size - e->bytes - 1;
+	GEM_BUG_ON(e->bytes + len > e->size);
 	memcpy(e->buf + e->bytes, str, len);
-
-	__i915_error_advance(e, len);
+	e->bytes += len;
 }
 
 #define err_printf(e, ...) i915_error_printf(e, __VA_ARGS__)
@@ -263,6 +263,8 @@ static int compress_page(struct compress *c,
 
 		if (zstream->avail_out)
 			flush = Z_SYNC_FLUSH;
+
+		touch_nmi_watchdog();
 	} while (zstream->avail_in);
 
 	/* Fallback to uncompressed if we increase size? */
@@ -631,33 +633,50 @@ static void err_print_uc(struct drm_i915_error_state_buf *m,
 	print_error_obj(m, NULL, "GuC log buffer", error_uc->guc_log);
 }
 
-int i915_error_state_to_str(struct drm_i915_error_state_buf *m,
-			    const struct i915_gpu_state *error)
+static void err_free_sgl(struct scatterlist *sgl)
 {
-	struct drm_i915_private *dev_priv = m->i915;
+	while (sgl) {
+		struct scatterlist *sg;
+
+		for (sg = sgl; !sg_is_chain(sg); sg++) {
+			kfree(sg_virt(sg));
+			if (sg_is_last(sg))
+				break;
+		}
+
+		sg = sg_is_last(sg) ? NULL : sg_chain_ptr(sg);
+		free_page((unsigned long)sgl);
+		sgl = sg;
+	}
+}
+
+static int err_print_to_sgl(struct i915_gpu_state *error)
+{
+	struct drm_i915_error_state_buf m;
 	struct drm_i915_error_object *obj;
 	struct timespec64 ts;
 	int i, j;
 
-	if (!error) {
-		err_printf(m, "No error state collected\n");
+	if (READ_ONCE(error->sgl))
 		return 0;
-	}
+
+	memset(&m, 0, sizeof(m));
+	m.i915 = error->i915;
 
 	if (*error->error_msg)
-		err_printf(m, "%s\n", error->error_msg);
-	err_printf(m, "Kernel: " UTS_RELEASE "\n");
+		err_printf(&m, "%s\n", error->error_msg);
+	err_printf(&m, "Kernel: " UTS_RELEASE "\n");
 	ts = ktime_to_timespec64(error->time);
-	err_printf(m, "Time: %lld s %ld us\n",
+	err_printf(&m, "Time: %lld s %ld us\n",
 		   (s64)ts.tv_sec, ts.tv_nsec / NSEC_PER_USEC);
 	ts = ktime_to_timespec64(error->boottime);
-	err_printf(m, "Boottime: %lld s %ld us\n",
+	err_printf(&m, "Boottime: %lld s %ld us\n",
 		   (s64)ts.tv_sec, ts.tv_nsec / NSEC_PER_USEC);
 	ts = ktime_to_timespec64(error->uptime);
-	err_printf(m, "Uptime: %lld s %ld us\n",
+	err_printf(&m, "Uptime: %lld s %ld us\n",
 		   (s64)ts.tv_sec, ts.tv_nsec / NSEC_PER_USEC);
-	err_printf(m, "Epoch: %lu jiffies (%u HZ)\n", error->epoch, HZ);
-	err_printf(m, "Capture: %lu jiffies; %d ms ago, %d ms after epoch\n",
+	err_printf(&m, "Epoch: %lu jiffies (%u HZ)\n", error->epoch, HZ);
+	err_printf(&m, "Capture: %lu jiffies; %d ms ago, %d ms after epoch\n",
 		   error->capture,
 		   jiffies_to_msecs(jiffies - error->capture),
 		   jiffies_to_msecs(error->capture - error->epoch));
@@ -665,63 +684,63 @@ int i915_error_state_to_str(struct drm_i915_error_state_buf *m,
 	for (i = 0; i < ARRAY_SIZE(error->engine); i++) {
 		if (error->engine[i].hangcheck_stalled &&
 		    error->engine[i].context.pid) {
-			err_printf(m, "Active process (on ring %s): %s [%d], score %d%s\n",
-				   engine_name(m->i915, i),
+			err_printf(&m, "Active process (on ring %s): %s [%d], score %d%s\n",
+				   engine_name(m.i915, i),
 				   error->engine[i].context.comm,
 				   error->engine[i].context.pid,
 				   error->engine[i].context.ban_score,
 				   bannable(&error->engine[i].context));
 		}
 	}
-	err_printf(m, "Reset count: %u\n", error->reset_count);
-	err_printf(m, "Suspend count: %u\n", error->suspend_count);
-	err_printf(m, "Platform: %s\n", intel_platform_name(error->device_info.platform));
-	err_print_pciid(m, error->i915);
+	err_printf(&m, "Reset count: %u\n", error->reset_count);
+	err_printf(&m, "Suspend count: %u\n", error->suspend_count);
+	err_printf(&m, "Platform: %s\n", intel_platform_name(error->device_info.platform));
+	err_print_pciid(&m, m.i915);
 
-	err_printf(m, "IOMMU enabled?: %d\n", error->iommu);
+	err_printf(&m, "IOMMU enabled?: %d\n", error->iommu);
 
-	if (HAS_CSR(dev_priv)) {
-		struct intel_csr *csr = &dev_priv->csr;
+	if (HAS_CSR(m.i915)) {
+		struct intel_csr *csr = &m.i915->csr;
 
-		err_printf(m, "DMC loaded: %s\n",
+		err_printf(&m, "DMC loaded: %s\n",
 			   yesno(csr->dmc_payload != NULL));
-		err_printf(m, "DMC fw version: %d.%d\n",
+		err_printf(&m, "DMC fw version: %d.%d\n",
 			   CSR_VERSION_MAJOR(csr->version),
 			   CSR_VERSION_MINOR(csr->version));
 	}
 
-	err_printf(m, "GT awake: %s\n", yesno(error->awake));
-	err_printf(m, "RPM wakelock: %s\n", yesno(error->wakelock));
-	err_printf(m, "PM suspended: %s\n", yesno(error->suspended));
-	err_printf(m, "EIR: 0x%08x\n", error->eir);
-	err_printf(m, "IER: 0x%08x\n", error->ier);
+	err_printf(&m, "GT awake: %s\n", yesno(error->awake));
+	err_printf(&m, "RPM wakelock: %s\n", yesno(error->wakelock));
+	err_printf(&m, "PM suspended: %s\n", yesno(error->suspended));
+	err_printf(&m, "EIR: 0x%08x\n", error->eir);
+	err_printf(&m, "IER: 0x%08x\n", error->ier);
 	for (i = 0; i < error->ngtier; i++)
-		err_printf(m, "GTIER[%d]: 0x%08x\n", i, error->gtier[i]);
-	err_printf(m, "PGTBL_ER: 0x%08x\n", error->pgtbl_er);
-	err_printf(m, "FORCEWAKE: 0x%08x\n", error->forcewake);
-	err_printf(m, "DERRMR: 0x%08x\n", error->derrmr);
-	err_printf(m, "CCID: 0x%08x\n", error->ccid);
-	err_printf(m, "Missed interrupts: 0x%08lx\n", dev_priv->gpu_error.missed_irq_rings);
+		err_printf(&m, "GTIER[%d]: 0x%08x\n", i, error->gtier[i]);
+	err_printf(&m, "PGTBL_ER: 0x%08x\n", error->pgtbl_er);
+	err_printf(&m, "FORCEWAKE: 0x%08x\n", error->forcewake);
+	err_printf(&m, "DERRMR: 0x%08x\n", error->derrmr);
+	err_printf(&m, "CCID: 0x%08x\n", error->ccid);
+	err_printf(&m, "Missed interrupts: 0x%08lx\n", m.i915->gpu_error.missed_irq_rings);
 
 	for (i = 0; i < error->nfence; i++)
-		err_printf(m, "  fence[%d] = %08llx\n", i, error->fence[i]);
+		err_printf(&m, "  fence[%d] = %08llx\n", i, error->fence[i]);
 
-	if (INTEL_GEN(dev_priv) >= 6) {
-		err_printf(m, "ERROR: 0x%08x\n", error->error);
+	if (INTEL_GEN(m.i915) >= 6) {
+		err_printf(&m, "ERROR: 0x%08x\n", error->error);
 
-		if (INTEL_GEN(dev_priv) >= 8)
-			err_printf(m, "FAULT_TLB_DATA: 0x%08x 0x%08x\n",
+		if (INTEL_GEN(m.i915) >= 8)
+			err_printf(&m, "FAULT_TLB_DATA: 0x%08x 0x%08x\n",
 				   error->fault_data1, error->fault_data0);
 
-		err_printf(m, "DONE_REG: 0x%08x\n", error->done_reg);
+		err_printf(&m, "DONE_REG: 0x%08x\n", error->done_reg);
 	}
 
-	if (IS_GEN7(dev_priv))
-		err_printf(m, "ERR_INT: 0x%08x\n", error->err_int);
+	if (IS_GEN7(m.i915))
+		err_printf(&m, "ERR_INT: 0x%08x\n", error->err_int);
 
 	for (i = 0; i < ARRAY_SIZE(error->engine); i++) {
 		if (error->engine[i].engine_id != -1)
-			error_print_engine(m, &error->engine[i], error->epoch);
+			error_print_engine(&m, &error->engine[i], error->epoch);
 	}
 
 	for (i = 0; i < ARRAY_SIZE(error->active_vm); i++) {
@@ -738,16 +757,16 @@ int i915_error_state_to_str(struct drm_i915_error_state_buf *m,
 
 			len += scnprintf(buf + len, sizeof(buf), "%s%s",
 					 first ? "" : ", ",
-					 dev_priv->engine[j]->name);
+					 m.i915->engine[j]->name);
 			first = 0;
 		}
 		scnprintf(buf + len, sizeof(buf), ")");
-		print_error_buffers(m, buf,
+		print_error_buffers(&m, buf,
 				    error->active_bo[i],
 				    error->active_bo_count[i]);
 	}
 
-	print_error_buffers(m, "Pinned (global)",
+	print_error_buffers(&m, "Pinned (global)",
 			    error->pinned_bo,
 			    error->pinned_bo_count);
 
@@ -756,115 +775,163 @@ int i915_error_state_to_str(struct drm_i915_error_state_buf *m,
 
 		obj = ee->batchbuffer;
 		if (obj) {
-			err_puts(m, dev_priv->engine[i]->name);
+			err_puts(&m, m.i915->engine[i]->name);
 			if (ee->context.pid)
-				err_printf(m, " (submitted by %s [%d], ctx %d [%d], score %d%s)",
+				err_printf(&m, " (submitted by %s [%d], ctx %d [%d], score %d%s)",
 					   ee->context.comm,
 					   ee->context.pid,
 					   ee->context.handle,
 					   ee->context.hw_id,
 					   ee->context.ban_score,
 					   bannable(&ee->context));
-			err_printf(m, " --- gtt_offset = 0x%08x %08x\n",
+			err_printf(&m, " --- gtt_offset = 0x%08x %08x\n",
 				   upper_32_bits(obj->gtt_offset),
 				   lower_32_bits(obj->gtt_offset));
-			print_error_obj(m, dev_priv->engine[i], NULL, obj);
+			print_error_obj(&m, m.i915->engine[i], NULL, obj);
 		}
 
 		for (j = 0; j < ee->user_bo_count; j++)
-			print_error_obj(m, dev_priv->engine[i],
+			print_error_obj(&m, m.i915->engine[i],
 					"user", ee->user_bo[j]);
 
 		if (ee->num_requests) {
-			err_printf(m, "%s --- %d requests\n",
-				   dev_priv->engine[i]->name,
+			err_printf(&m, "%s --- %d requests\n",
+				   m.i915->engine[i]->name,
 				   ee->num_requests);
 			for (j = 0; j < ee->num_requests; j++)
-				error_print_request(m, " ",
+				error_print_request(&m, " ",
 						    &ee->requests[j],
 						    error->epoch);
 		}
 
 		if (IS_ERR(ee->waiters)) {
-			err_printf(m, "%s --- ? waiters [unable to acquire spinlock]\n",
-				   dev_priv->engine[i]->name);
+			err_printf(&m, "%s --- ? waiters [unable to acquire spinlock]\n",
+				   m.i915->engine[i]->name);
 		} else if (ee->num_waiters) {
-			err_printf(m, "%s --- %d waiters\n",
-				   dev_priv->engine[i]->name,
+			err_printf(&m, "%s --- %d waiters\n",
+				   m.i915->engine[i]->name,
 				   ee->num_waiters);
 			for (j = 0; j < ee->num_waiters; j++) {
-				err_printf(m, " seqno 0x%08x for %s [%d]\n",
+				err_printf(&m, " seqno 0x%08x for %s [%d]\n",
 					   ee->waiters[j].seqno,
 					   ee->waiters[j].comm,
 					   ee->waiters[j].pid);
 			}
 		}
 
-		print_error_obj(m, dev_priv->engine[i],
+		print_error_obj(&m, m.i915->engine[i],
 				"ringbuffer", ee->ringbuffer);
 
-		print_error_obj(m, dev_priv->engine[i],
+		print_error_obj(&m, m.i915->engine[i],
 				"HW Status", ee->hws_page);
 
-		print_error_obj(m, dev_priv->engine[i],
+		print_error_obj(&m, m.i915->engine[i],
 				"HW context", ee->ctx);
 
-		print_error_obj(m, dev_priv->engine[i],
+		print_error_obj(&m, m.i915->engine[i],
 				"WA context", ee->wa_ctx);
 
-		print_error_obj(m, dev_priv->engine[i],
+		print_error_obj(&m, m.i915->engine[i],
 				"WA batchbuffer", ee->wa_batchbuffer);
 
-		print_error_obj(m, dev_priv->engine[i],
+		print_error_obj(&m, m.i915->engine[i],
 				"NULL context", ee->default_state);
 	}
 
 	if (error->overlay)
-		intel_overlay_print_error_state(m, error->overlay);
+		intel_overlay_print_error_state(&m, error->overlay);
 
 	if (error->display)
-		intel_display_print_error_state(m, error->display);
+		intel_display_print_error_state(&m, error->display);
 
-	err_print_capabilities(m, &error->device_info, &error->driver_caps);
-	err_print_params(m, &error->params);
-	err_print_uc(m, &error->uc);
+	err_print_capabilities(&m, &error->device_info, &error->driver_caps);
+	err_print_params(&m, &error->params);
+	err_print_uc(&m, &error->uc);
 
-	if (m->bytes == 0 && m->err)
-		return m->err;
+	if (m.buf) {
+		__sg_set_buf(m.cur++, m.buf, m.bytes, m.iter);
+		m.bytes = 0;
+		m.buf = 0;
+	}
+	if (m.cur) {
+		GEM_BUG_ON(m.end < m.cur);
+		sg_mark_end(m.cur - 1);
+	}
+	GEM_BUG_ON(m.sgl && !m.cur);
+
+	if (m.err) {
+		err_free_sgl(m.sgl);
+		return m.err;
+	}
+
+	if (cmpxchg(&error->sgl, NULL, m.sgl))
+		err_free_sgl(m.sgl);
 
 	return 0;
 }
 
-int i915_error_state_buf_init(struct drm_i915_error_state_buf *ebuf,
-			      struct drm_i915_private *i915,
-			      size_t count, loff_t pos)
+ssize_t i915_gpu_state_copy_to_buffer(struct i915_gpu_state *error,
+				      char *buf, loff_t off, size_t rem)
 {
-	memset(ebuf, 0, sizeof(*ebuf));
-	ebuf->i915 = i915;
+	struct scatterlist *sg;
+	size_t count;
+	loff_t pos;
+	int err;
 
-	/* We need to have enough room to store any i915_error_state printf
-	 * so that we can move it to start position.
-	 */
-	ebuf->size = count + 1 > PAGE_SIZE ? count + 1 : PAGE_SIZE;
-	ebuf->buf = kmalloc(ebuf->size,
-				GFP_KERNEL | __GFP_NORETRY | __GFP_NOWARN);
+	if (!error || !rem)
+		return 0;
 
-	if (ebuf->buf == NULL) {
-		ebuf->size = PAGE_SIZE;
-		ebuf->buf = kmalloc(ebuf->size, GFP_KERNEL);
-	}
+	err = err_print_to_sgl(error);
+	if (err)
+		return err;
 
-	if (ebuf->buf == NULL) {
-		ebuf->size = 128;
-		ebuf->buf = kmalloc(ebuf->size, GFP_KERNEL);
-	}
+	sg = READ_ONCE(error->fit);
+	if (!sg || off < sg->dma_address)
+		sg = error->sgl;
+	if (!sg)
+		return 0;
 
-	if (ebuf->buf == NULL)
-		return -ENOMEM;
+	pos = sg->dma_address;
+	count = 0;
+	do {
+		size_t len, start;
 
-	ebuf->start = pos;
+		if (sg_is_chain(sg)) {
+			sg = sg_chain_ptr(sg);
+			GEM_BUG_ON(sg_is_chain(sg));
+		}
 
-	return 0;
+		len = sg->length;
+		if (pos + len <= off) {
+			pos += len;
+			continue;
+		}
+
+		start = sg->offset;
+		if (pos < off) {
+			GEM_BUG_ON(off - pos > len);
+			len -= off - pos;
+			start += off - pos;
+			pos = off;
+		}
+
+		len = min(len, rem);
+		GEM_BUG_ON(!len || len > sg->length);
+
+		memcpy(buf, page_address(sg_page(sg)) + start, len);
+
+		count += len;
+		pos += len;
+
+		buf += len;
+		rem -= len;
+		if (!rem) {
+			WRITE_ONCE(error->fit, sg);
+			break;
+		}
+	} while (!sg_is_last(sg++));
+
+	return count;
 }
 
 static void i915_error_object_free(struct drm_i915_error_object *obj)
@@ -937,6 +1004,7 @@ void __i915_gpu_state_free(struct kref *error_ref)
 	cleanup_params(error);
 	cleanup_uc_state(error);
 
+	err_free_sgl(error->sgl);
 	kfree(error);
 }
 
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.h b/drivers/gpu/drm/i915/i915_gpu_error.h
index f893a4e8b783..1c1bc0c23468 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.h
+++ b/drivers/gpu/drm/i915/i915_gpu_error.h
@@ -191,6 +191,8 @@ struct i915_gpu_state {
 	} *active_bo[I915_NUM_ENGINES], *pinned_bo;
 	u32 active_bo_count[I915_NUM_ENGINES], pinned_bo_count;
 	struct i915_address_space *active_vm[I915_NUM_ENGINES];
+
+	struct scatterlist *sgl, *fit;
 };
 
 struct i915_gpu_error {
@@ -297,29 +299,20 @@ struct i915_gpu_error {
 
 struct drm_i915_error_state_buf {
 	struct drm_i915_private *i915;
-	unsigned int bytes;
-	unsigned int size;
+	struct scatterlist *sgl, *cur, *end;
+
+	char *buf;
+	size_t bytes;
+	size_t size;
+	loff_t iter;
+
 	int err;
-	u8 *buf;
-	loff_t start;
-	loff_t pos;
 };
 
 #if IS_ENABLED(CONFIG_DRM_I915_CAPTURE_ERROR)
 
 __printf(2, 3)
 void i915_error_printf(struct drm_i915_error_state_buf *e, const char *f, ...);
-int i915_error_state_to_str(struct drm_i915_error_state_buf *estr,
-			    const struct i915_gpu_state *gpu);
-int i915_error_state_buf_init(struct drm_i915_error_state_buf *eb,
-			      struct drm_i915_private *i915,
-			      size_t count, loff_t pos);
-
-static inline void
-i915_error_state_buf_release(struct drm_i915_error_state_buf *eb)
-{
-	kfree(eb->buf);
-}
 
 struct i915_gpu_state *i915_capture_gpu_state(struct drm_i915_private *i915);
 void i915_capture_error_state(struct drm_i915_private *dev_priv,
@@ -333,6 +326,9 @@ i915_gpu_state_get(struct i915_gpu_state *gpu)
 	return gpu;
 }
 
+ssize_t i915_gpu_state_copy_to_buffer(struct i915_gpu_state *error,
+				      char *buf, loff_t offset, size_t count);
+
 void __i915_gpu_state_free(struct kref *kref);
 static inline void i915_gpu_state_put(struct i915_gpu_state *gpu)
 {
diff --git a/drivers/gpu/drm/i915/i915_sysfs.c b/drivers/gpu/drm/i915/i915_sysfs.c
index e5e6f6bb2b05..ae63a7d0f51d 100644
--- a/drivers/gpu/drm/i915/i915_sysfs.c
+++ b/drivers/gpu/drm/i915/i915_sysfs.c
@@ -516,26 +516,21 @@ static ssize_t error_state_read(struct file *filp, struct kobject *kobj,
 {
 
 	struct device *kdev = kobj_to_dev(kobj);
-	struct drm_i915_private *dev_priv = kdev_minor_to_i915(kdev);
-	struct drm_i915_error_state_buf error_str;
+	struct drm_i915_private *i915 = kdev_minor_to_i915(kdev);
 	struct i915_gpu_state *gpu;
 	ssize_t ret;
 
-	ret = i915_error_state_buf_init(&error_str, dev_priv, count, off);
-	if (ret)
-		return ret;
-
-	gpu = i915_first_error_state(dev_priv);
-	ret = i915_error_state_to_str(&error_str, gpu);
-	if (ret)
-		goto out;
-
-	ret = count < error_str.bytes ? count : error_str.bytes;
-	memcpy(buf, error_str.buf, ret);
+	gpu = i915_first_error_state(i915);
+	if (gpu) {
+		ret = i915_gpu_state_copy_to_buffer(gpu, buf, off, count);
+		i915_gpu_state_put(gpu);
+	} else {
+		const char *str = "No error state collected\n";
+		size_t len = strlen(str);
 
-out:
-	i915_gpu_state_put(gpu);
-	i915_error_state_buf_release(&error_str);
+		ret = min_t(size_t, count, len - off);
+		memcpy(buf, str + off, ret);
+	}
 
 	return ret;
 }
-- 
2.19.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 106+ messages in thread

* [PATCH 08/40] drm/i915/execlists: Avoid kicking priority on the current context
  2018-09-19 19:55 [PATCH 01/40] drm: Use default dma_fence hooks where possible for null syncobj Chris Wilson
                   ` (5 preceding siblings ...)
  2018-09-19 19:55 ` [PATCH 07/40] drm/i915: Cache the error string Chris Wilson
@ 2018-09-19 19:55 ` Chris Wilson
  2018-09-19 19:55 ` [PATCH 09/40] drm/i915/selftests: Free the batch along the contexts error path Chris Wilson
                   ` (35 subsequent siblings)
  42 siblings, 0 replies; 106+ messages in thread
From: Chris Wilson @ 2018-09-19 19:55 UTC (permalink / raw)
  To: intel-gfx

If the request is currently on the HW (in port 0), then we do not need
to kick the submission tasklet to evaluate whether we should be
preempting itself in order to execute it again.

In the case that was annoying me:

   execlists_schedule: rq(18:211173).prio=0 -> 2
   need_preempt: last(18:211174).prio=0, queue.prio=2

We are bumping the priority of the first of a pair of requests running
in the current context. Then when evaluating preempt, we would see that
that our priority request is higher than the last executing request in
ELSP0 and so trigger preemption, not realising that our intended request
was already executing.

v2: As we assume state of the execlists->port[] that is only valid while
we hold the timeline lock we have to repeat some earlier tests that on
the validity of the node.
v3: Wrap guc submission under the timeline.lock as is now the way of all
things.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 drivers/gpu/drm/i915/intel_guc_submission.c | 18 +++------
 drivers/gpu/drm/i915/intel_lrc.c            | 41 +++++++++++++++------
 2 files changed, 36 insertions(+), 23 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_guc_submission.c b/drivers/gpu/drm/i915/intel_guc_submission.c
index 07b9d313b019..6f693ef62c64 100644
--- a/drivers/gpu/drm/i915/intel_guc_submission.c
+++ b/drivers/gpu/drm/i915/intel_guc_submission.c
@@ -771,19 +771,8 @@ static bool __guc_dequeue(struct intel_engine_cs *engine)
 
 static void guc_dequeue(struct intel_engine_cs *engine)
 {
-	unsigned long flags;
-	bool submit;
-
-	local_irq_save(flags);
-
-	spin_lock(&engine->timeline.lock);
-	submit = __guc_dequeue(engine);
-	spin_unlock(&engine->timeline.lock);
-
-	if (submit)
+	if (__guc_dequeue(engine))
 		guc_submit(engine);
-
-	local_irq_restore(flags);
 }
 
 static void guc_submission_tasklet(unsigned long data)
@@ -792,6 +781,9 @@ static void guc_submission_tasklet(unsigned long data)
 	struct intel_engine_execlists * const execlists = &engine->execlists;
 	struct execlist_port *port = execlists->port;
 	struct i915_request *rq;
+	unsigned long flags;
+
+	spin_lock_irqsave(&engine->timeline.lock, flags);
 
 	rq = port_request(port);
 	while (rq && i915_request_completed(rq)) {
@@ -815,6 +807,8 @@ static void guc_submission_tasklet(unsigned long data)
 
 	if (!execlists_is_active(execlists, EXECLISTS_ACTIVE_PREEMPT))
 		guc_dequeue(engine);
+
+	spin_unlock_irqrestore(&engine->timeline.lock, flags);
 }
 
 static struct i915_request *
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index a51be16ddaac..b4448b05d78a 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -355,13 +355,8 @@ execlists_unwind_incomplete_requests(struct intel_engine_execlists *execlists)
 {
 	struct intel_engine_cs *engine =
 		container_of(execlists, typeof(*engine), execlists);
-	unsigned long flags;
-
-	spin_lock_irqsave(&engine->timeline.lock, flags);
 
 	__unwind_incomplete_requests(engine);
-
-	spin_unlock_irqrestore(&engine->timeline.lock, flags);
 }
 
 static inline void
@@ -1233,9 +1228,13 @@ static void execlists_schedule(struct i915_request *request,
 
 		engine = sched_lock_engine(node, engine);
 
+		/* Recheck after acquiring the engine->timeline.lock */
 		if (prio <= node->attr.priority)
 			continue;
 
+		if (i915_sched_node_signaled(node))
+			continue;
+
 		node->attr.priority = prio;
 		if (!list_empty(&node->link)) {
 			if (last != engine) {
@@ -1244,14 +1243,34 @@ static void execlists_schedule(struct i915_request *request,
 			}
 			GEM_BUG_ON(pl->priority != prio);
 			list_move_tail(&node->link, &pl->requests);
+		} else {
+			/*
+			 * If the request is not in the priolist queue because
+			 * it is not yet runnable, then it doesn't contribute
+			 * to our preemption decisions. On the other hand,
+			 * if the request is on the HW, it too is not in the
+			 * queue; but in that case we may still need to reorder
+			 * the inflight requests.
+			 */
+			if (!i915_sw_fence_done(&sched_to_request(node)->submit))
+				continue;
 		}
 
-		if (prio > engine->execlists.queue_priority &&
-		    i915_sw_fence_done(&sched_to_request(node)->submit)) {
-			/* defer submission until after all of our updates */
-			__update_queue(engine, prio);
-			tasklet_hi_schedule(&engine->execlists.tasklet);
-		}
+		if (prio <= engine->execlists.queue_priority)
+			continue;
+
+		/*
+		 * If we are already the currently executing context, don't
+		 * bother evaluating if we should preempt ourselves.
+		 */
+		if (sched_to_request(node)->global_seqno &&
+		    i915_seqno_passed(port_request(engine->execlists.port)->global_seqno,
+				      sched_to_request(node)->global_seqno))
+			continue;
+
+		/* Defer (tasklet) submission until after all of our updates. */
+		__update_queue(engine, prio);
+		tasklet_hi_schedule(&engine->execlists.tasklet);
 	}
 
 	spin_unlock_irq(&engine->timeline.lock);
-- 
2.19.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 106+ messages in thread

* [PATCH 09/40] drm/i915/selftests: Free the batch along the contexts error path
  2018-09-19 19:55 [PATCH 01/40] drm: Use default dma_fence hooks where possible for null syncobj Chris Wilson
                   ` (6 preceding siblings ...)
  2018-09-19 19:55 ` [PATCH 08/40] drm/i915/execlists: Avoid kicking priority on the current context Chris Wilson
@ 2018-09-19 19:55 ` Chris Wilson
  2018-09-20  8:30   ` Mika Kuoppala
  2018-09-19 19:55 ` [PATCH 10/40] drm/i915/selftests: Basic stress test for rapid context switching Chris Wilson
                   ` (34 subsequent siblings)
  42 siblings, 1 reply; 106+ messages in thread
From: Chris Wilson @ 2018-09-19 19:55 UTC (permalink / raw)
  To: intel-gfx

Remember to release the batch bo reference if we hit an error trying to
submit our MI_STORE_DWORD_IMM.

References: https://bugs.freedesktop.org/show_bug.cgi?id=107979
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/selftests/i915_gem_context.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/selftests/i915_gem_context.c
index 79f9bcaf0c05..a89d31075c7a 100644
--- a/drivers/gpu/drm/i915/selftests/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/selftests/i915_gem_context.c
@@ -195,6 +195,7 @@ static int gpu_fill(struct drm_i915_gem_object *obj,
 	i915_request_add(rq);
 err_batch:
 	i915_vma_unpin(batch);
+	i915_vma_put(batch);
 err_vma:
 	i915_vma_unpin(vma);
 	return err;
-- 
2.19.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 106+ messages in thread

* [PATCH 10/40] drm/i915/selftests: Basic stress test for rapid context switching
  2018-09-19 19:55 [PATCH 01/40] drm: Use default dma_fence hooks where possible for null syncobj Chris Wilson
                   ` (7 preceding siblings ...)
  2018-09-19 19:55 ` [PATCH 09/40] drm/i915/selftests: Free the batch along the contexts error path Chris Wilson
@ 2018-09-19 19:55 ` Chris Wilson
  2018-09-20 10:38   ` Mika Kuoppala
  2018-09-19 19:55 ` [PATCH 11/40] drm/i915/execlists: Onion unwind for logical_ring_init() failure Chris Wilson
                   ` (33 subsequent siblings)
  42 siblings, 1 reply; 106+ messages in thread
From: Chris Wilson @ 2018-09-19 19:55 UTC (permalink / raw)
  To: intel-gfx

We need to exercise the HW and submission paths for switching contexts
rapidly to check that features such as execlists' wa_tail are adequate.
Plus it's an interesting baseline latency metric.

v2: Check the initial request for allocation errors

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 .../gpu/drm/i915/selftests/i915_gem_context.c | 188 ++++++++++++++++++
 1 file changed, 188 insertions(+)

diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/selftests/i915_gem_context.c
index a89d31075c7a..f1a57e3c3820 100644
--- a/drivers/gpu/drm/i915/selftests/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/selftests/i915_gem_context.c
@@ -22,6 +22,8 @@
  *
  */
 
+#include <linux/prime_numbers.h>
+
 #include "../i915_selftest.h"
 #include "i915_random.h"
 #include "igt_flush_test.h"
@@ -32,6 +34,191 @@
 
 #define DW_PER_PAGE (PAGE_SIZE / sizeof(u32))
 
+struct live_test {
+	struct drm_i915_private *i915;
+	const char *func;
+	const char *name;
+
+	unsigned int reset_count;
+};
+
+static int begin_live_test(struct live_test *t,
+			   struct drm_i915_private *i915,
+			   const char *func,
+			   const char *name)
+{
+	int err;
+
+	t->i915 = i915;
+	t->func = func;
+	t->name = name;
+
+	err = i915_gem_wait_for_idle(i915,
+				     I915_WAIT_LOCKED,
+				     MAX_SCHEDULE_TIMEOUT);
+	if (err) {
+		pr_err("%s(%s): failed to idle before, with err=%d!",
+		       func, name, err);
+		return err;
+	}
+
+	i915->gpu_error.missed_irq_rings = 0;
+	t->reset_count = i915_reset_count(&i915->gpu_error);
+
+	return 0;
+}
+
+static int end_live_test(struct live_test *t)
+{
+	struct drm_i915_private *i915 = t->i915;
+
+	i915_retire_requests(i915);
+
+	if (wait_for(intel_engines_are_idle(i915), 10)) {
+		pr_err("%s(%s): GPU not idle\n", t->func, t->name);
+		return -EIO;
+	}
+
+	if (t->reset_count != i915_reset_count(&i915->gpu_error)) {
+		pr_err("%s(%s): GPU was reset %d times!\n",
+		       t->func, t->name,
+		       i915_reset_count(&i915->gpu_error) - t->reset_count);
+		return -EIO;
+	}
+
+	if (i915->gpu_error.missed_irq_rings) {
+		pr_err("%s(%s): Missed interrupts on engines %lx\n",
+		       t->func, t->name, i915->gpu_error.missed_irq_rings);
+		return -EIO;
+	}
+
+	return 0;
+}
+
+static int live_nop_switch(void *arg)
+{
+	const unsigned int nctx = 1024;
+	struct drm_i915_private *i915 = arg;
+	struct intel_engine_cs *engine;
+	struct i915_gem_context **ctx;
+	enum intel_engine_id id;
+	struct drm_file *file;
+	struct live_test t;
+	unsigned long n;
+	int err = -ENODEV;
+
+	/*
+	 * Create as many contexts as we can feasibly get away with
+	 * and check we can switch between them rapidly.
+	 *
+	 * Serves as very simple stress test for submission and HW switching
+	 * between contexts.
+	 */
+
+	if (!DRIVER_CAPS(i915)->has_logical_contexts)
+		return 0;
+
+	file = mock_file(i915);
+	if (IS_ERR(file))
+		return PTR_ERR(file);
+
+	mutex_lock(&i915->drm.struct_mutex);
+
+	ctx = kcalloc(nctx, sizeof(*ctx), GFP_KERNEL);
+	if (!ctx) {
+		err = -ENOMEM;
+		goto out_unlock;
+	}
+
+	for (n = 0; n < nctx; n++) {
+		ctx[n] = i915_gem_create_context(i915, file->driver_priv);
+		if (IS_ERR(ctx[n])) {
+			err = PTR_ERR(ctx[n]);
+			goto out_unlock;
+		}
+	}
+
+	for_each_engine(engine, i915, id) {
+		struct i915_request *rq;
+		unsigned long end_time, prime;
+		ktime_t times[2] = {};
+
+		times[0] = ktime_get_raw();
+		for (n = 0; n < nctx; n++) {
+			rq = i915_request_alloc(engine, ctx[n]);
+			if (IS_ERR(rq)) {
+				err = PTR_ERR(rq);
+				goto out_unlock;
+			}
+			i915_request_add(rq);
+		}
+		i915_request_wait(rq,
+				  I915_WAIT_LOCKED,
+				  MAX_SCHEDULE_TIMEOUT);
+		times[1] = ktime_get_raw();
+
+		pr_info("Populated %d contexts on %s in %lluns\n",
+			nctx, engine->name, ktime_to_ns(times[1] - times[0]));
+
+		err = begin_live_test(&t, i915, __func__, engine->name);
+		if (err)
+			goto out_unlock;
+
+		end_time = jiffies + i915_selftest.timeout_jiffies;
+		for_each_prime_number_from(prime, 2, 8192) {
+			times[1] = ktime_get_raw();
+
+			for (n = 0; n < prime; n++) {
+				rq = i915_request_alloc(engine, ctx[n % nctx]);
+				if (IS_ERR(rq)) {
+					err = PTR_ERR(rq);
+					goto out_unlock;
+				}
+
+				/*
+				 * This space is left intentionally blank.
+				 *
+				 * We do not actually want to perform any
+				 * action with this request, we just want
+				 * to measure the latency in allocation
+				 * and submission of our breadcrumbs -
+				 * ensuring that the bare request is sufficient
+				 * for the system to work (i.e. proper HEAD
+				 * tracking of the rings, interrupt handling,
+				 * etc). It also gives us the lowest bounds
+				 * for latency.
+				 */
+
+				i915_request_add(rq);
+			}
+			i915_request_wait(rq,
+					  I915_WAIT_LOCKED,
+					  MAX_SCHEDULE_TIMEOUT);
+
+			times[1] = ktime_sub(ktime_get_raw(), times[1]);
+			if (prime == 2)
+				times[0] = times[1];
+
+			if (__igt_timeout(end_time, NULL))
+				break;
+		}
+
+		err = end_live_test(&t);
+		if (err)
+			goto out_unlock;
+
+		pr_info("Switch latencies on %s: 1 = %lluns, %lu = %lluns\n",
+			engine->name,
+			ktime_to_ns(times[0]),
+			prime - 1, div64_u64(ktime_to_ns(times[1]), prime - 1));
+	}
+
+out_unlock:
+	mutex_unlock(&i915->drm.struct_mutex);
+	mock_file_free(i915, file);
+	return err;
+}
+
 static struct i915_vma *
 gpu_fill_dw(struct i915_vma *vma, u64 offset, unsigned long count, u32 value)
 {
@@ -718,6 +905,7 @@ int i915_gem_context_live_selftests(struct drm_i915_private *dev_priv)
 {
 	static const struct i915_subtest tests[] = {
 		SUBTEST(igt_switch_to_kernel_context),
+		SUBTEST(live_nop_switch),
 		SUBTEST(igt_ctx_exec),
 		SUBTEST(igt_ctx_readonly),
 	};
-- 
2.19.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 106+ messages in thread

* [PATCH 11/40] drm/i915/execlists: Onion unwind for logical_ring_init() failure
  2018-09-19 19:55 [PATCH 01/40] drm: Use default dma_fence hooks where possible for null syncobj Chris Wilson
                   ` (8 preceding siblings ...)
  2018-09-19 19:55 ` [PATCH 10/40] drm/i915/selftests: Basic stress test for rapid context switching Chris Wilson
@ 2018-09-19 19:55 ` Chris Wilson
  2018-09-20 14:18   ` Mika Kuoppala
  2018-09-20 14:21   ` Tvrtko Ursulin
  2018-09-19 19:55 ` [PATCH 12/40] drm/i915/execlists: Assert the queue is non-empty on unsubmitting Chris Wilson
                   ` (32 subsequent siblings)
  42 siblings, 2 replies; 106+ messages in thread
From: Chris Wilson @ 2018-09-19 19:55 UTC (permalink / raw)
  To: intel-gfx

Fix up the error unwind for logical_ring_init() failing by moving the
cleanup into the callers who own the various bits of state during
initialisation.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/intel_lrc.c | 18 +++++++++++-------
 1 file changed, 11 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index b4448b05d78a..3edb417caa7b 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -2415,7 +2415,7 @@ static int logical_ring_init(struct intel_engine_cs *engine)
 
 	ret = intel_engine_init_common(engine);
 	if (ret)
-		goto error;
+		return ret;
 
 	if (HAS_LOGICAL_RING_ELSQ(i915)) {
 		execlists->submit_reg = i915->regs +
@@ -2457,10 +2457,6 @@ static int logical_ring_init(struct intel_engine_cs *engine)
 	reset_csb_pointers(execlists);
 
 	return 0;
-
-error:
-	intel_logical_ring_cleanup(engine);
-	return ret;
 }
 
 int logical_render_ring_init(struct intel_engine_cs *engine)
@@ -2483,10 +2479,14 @@ int logical_render_ring_init(struct intel_engine_cs *engine)
 	engine->emit_breadcrumb = gen8_emit_breadcrumb_rcs;
 	engine->emit_breadcrumb_sz = gen8_emit_breadcrumb_rcs_sz;
 
-	ret = intel_engine_create_scratch(engine, PAGE_SIZE);
+	ret = logical_ring_init(engine);
 	if (ret)
 		return ret;
 
+	ret = intel_engine_create_scratch(engine, PAGE_SIZE);
+	if (ret)
+		goto err_cleanup_common;
+
 	ret = intel_init_workaround_bb(engine);
 	if (ret) {
 		/*
@@ -2498,7 +2498,11 @@ int logical_render_ring_init(struct intel_engine_cs *engine)
 			  ret);
 	}
 
-	return logical_ring_init(engine);
+	return 0;
+
+err_cleanup_common:
+	intel_engine_cleanup_common(engine);
+	return ret;
 }
 
 int logical_xcs_ring_init(struct intel_engine_cs *engine)
-- 
2.19.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 106+ messages in thread

* [PATCH 12/40] drm/i915/execlists: Assert the queue is non-empty on unsubmitting
  2018-09-19 19:55 [PATCH 01/40] drm: Use default dma_fence hooks where possible for null syncobj Chris Wilson
                   ` (9 preceding siblings ...)
  2018-09-19 19:55 ` [PATCH 11/40] drm/i915/execlists: Onion unwind for logical_ring_init() failure Chris Wilson
@ 2018-09-19 19:55 ` Chris Wilson
  2018-09-24  9:07   ` Tvrtko Ursulin
  2018-09-19 19:55 ` [PATCH 13/40] drm/i915: Reserve some priority bits for internal use Chris Wilson
                   ` (31 subsequent siblings)
  42 siblings, 1 reply; 106+ messages in thread
From: Chris Wilson @ 2018-09-19 19:55 UTC (permalink / raw)
  To: intel-gfx

In the sequence

<0>[  531.960431] drv_self-4806    7.... 527402570us : intel_gpu_reset: engine_mask=1, ret=0, retry=0
<0>[  531.960431] drv_self-4806    7.... 527402571us : execlists_reset: rcs0 request global=115de, current=71133
<0>[  531.960431] drv_self-4806    7d..1 527402571us : execlists_cancel_port_requests: rcs0:port0 global=71134 (fence 826b:198), (current 71133)
<0>[  531.960431] drv_self-4806    7d..1 527402572us : execlists_cancel_port_requests: rcs0:port1 global=71135 (fence 826c:53), (current 71133)
<0>[  531.960431] drv_self-4806    7d..1 527402572us : __i915_request_unsubmit: rcs0 fence 826c:53 <- global=71135, current 71133
<0>[  531.960431] drv_self-4806    7d..1 527402579us : __i915_request_unsubmit: rcs0 fence 826b:198 <- global=71134, current 71133
<0>[  531.960431] drv_self-4806    7.... 527402613us : intel_engine_cancel_stop_cs: rcs0
<0>[  531.960431] drv_self-4806    7.... 527402624us : execlists_reset_finish: rcs0

we are missing the execlists_submission_tasklet() invocation before the
execlists_reset_fini() implying that either the queue is empty, or we
failed to schedule and run the tasklet on finish. Add an assert so we
are sure that on unsubmitting the incomplete request after reset, the
queue is indeed populated.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/intel_lrc.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 3edb417caa7b..e8de250c3413 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -344,6 +344,7 @@ static void __unwind_incomplete_requests(struct intel_engine_cs *engine)
 			last_prio = rq_prio(rq);
 			p = lookup_priolist(engine, last_prio);
 		}
+		GEM_BUG_ON(RB_EMPTY_ROOT(&engine->execlists.queue.rb_root));
 
 		GEM_BUG_ON(p->priority != rq_prio(rq));
 		list_add(&rq->sched.link, &p->requests);
-- 
2.19.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 106+ messages in thread

* [PATCH 13/40] drm/i915: Reserve some priority bits for internal use
  2018-09-19 19:55 [PATCH 01/40] drm: Use default dma_fence hooks where possible for null syncobj Chris Wilson
                   ` (10 preceding siblings ...)
  2018-09-19 19:55 ` [PATCH 12/40] drm/i915/execlists: Assert the queue is non-empty on unsubmitting Chris Wilson
@ 2018-09-19 19:55 ` Chris Wilson
  2018-09-24  9:12   ` Tvrtko Ursulin
  2018-09-19 19:55 ` [PATCH 14/40] drm/i915: Combine multiple internal plists into the same i915_priolist bucket Chris Wilson
                   ` (30 subsequent siblings)
  42 siblings, 1 reply; 106+ messages in thread
From: Chris Wilson @ 2018-09-19 19:55 UTC (permalink / raw)
  To: intel-gfx

In the next few patches, we will want to give a small priority boost to
some requests/queues but not so much that we perturb the user controlled
order. As such we shift the user priority bits higher leaving ourselves
a few low priority bits for our bumping.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_drv.h            | 2 +-
 drivers/gpu/drm/i915/i915_gem_context.c    | 9 +++++----
 drivers/gpu/drm/i915/i915_scheduler.h      | 6 ++++++
 drivers/gpu/drm/i915/selftests/intel_lrc.c | 8 +++++---
 4 files changed, 17 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 7d4daa7412f1..b902bb96e0be 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -3230,7 +3230,7 @@ int i915_gem_object_wait(struct drm_i915_gem_object *obj,
 int i915_gem_object_wait_priority(struct drm_i915_gem_object *obj,
 				  unsigned int flags,
 				  const struct i915_sched_attr *attr);
-#define I915_PRIORITY_DISPLAY I915_PRIORITY_MAX
+#define I915_PRIORITY_DISPLAY I915_USER_PRIORITY(I915_PRIORITY_MAX)
 
 int __must_check
 i915_gem_object_set_to_wc_domain(struct drm_i915_gem_object *obj, bool write);
diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
index f772593b99ab..150d7a6b2bd3 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -337,7 +337,7 @@ __create_hw_context(struct drm_i915_private *dev_priv,
 	kref_init(&ctx->ref);
 	list_add_tail(&ctx->link, &dev_priv->contexts.list);
 	ctx->i915 = dev_priv;
-	ctx->sched.priority = I915_PRIORITY_NORMAL;
+	ctx->sched.priority = I915_USER_PRIORITY(I915_PRIORITY_NORMAL);
 
 	for (n = 0; n < ARRAY_SIZE(ctx->__engine); n++) {
 		struct intel_context *ce = &ctx->__engine[n];
@@ -504,7 +504,7 @@ i915_gem_context_create_kernel(struct drm_i915_private *i915, int prio)
 	}
 
 	i915_gem_context_clear_bannable(ctx);
-	ctx->sched.priority = prio;
+	ctx->sched.priority = I915_USER_PRIORITY(prio);
 	ctx->ring_size = PAGE_SIZE;
 
 	GEM_BUG_ON(!i915_gem_context_is_kernel(ctx));
@@ -879,7 +879,7 @@ int i915_gem_context_getparam_ioctl(struct drm_device *dev, void *data,
 		args->value = i915_gem_context_is_bannable(ctx);
 		break;
 	case I915_CONTEXT_PARAM_PRIORITY:
-		args->value = ctx->sched.priority;
+		args->value = ctx->sched.priority >> I915_USER_PRIORITY_SHIFT;
 		break;
 	default:
 		ret = -EINVAL;
@@ -948,7 +948,8 @@ int i915_gem_context_setparam_ioctl(struct drm_device *dev, void *data,
 				 !capable(CAP_SYS_NICE))
 				ret = -EPERM;
 			else
-				ctx->sched.priority = priority;
+				ctx->sched.priority =
+					I915_USER_PRIORITY(priority);
 		}
 		break;
 
diff --git a/drivers/gpu/drm/i915/i915_scheduler.h b/drivers/gpu/drm/i915/i915_scheduler.h
index 70a42220358d..7edfad0abfd7 100644
--- a/drivers/gpu/drm/i915/i915_scheduler.h
+++ b/drivers/gpu/drm/i915/i915_scheduler.h
@@ -19,6 +19,12 @@ enum {
 	I915_PRIORITY_INVALID = INT_MIN
 };
 
+#define I915_USER_PRIORITY_SHIFT 0
+#define I915_USER_PRIORITY(x) ((x) << I915_USER_PRIORITY_SHIFT)
+
+#define I915_PRIORITY_COUNT BIT(I915_USER_PRIORITY_SHIFT)
+#define I915_PRIORITY_MASK (-I915_PRIORITY_COUNT)
+
 struct i915_sched_attr {
 	/**
 	 * @priority: execution and service priority
diff --git a/drivers/gpu/drm/i915/selftests/intel_lrc.c b/drivers/gpu/drm/i915/selftests/intel_lrc.c
index 1aea7a8f2224..94ceb5f6c507 100644
--- a/drivers/gpu/drm/i915/selftests/intel_lrc.c
+++ b/drivers/gpu/drm/i915/selftests/intel_lrc.c
@@ -291,12 +291,14 @@ static int live_preempt(void *arg)
 	ctx_hi = kernel_context(i915);
 	if (!ctx_hi)
 		goto err_spin_lo;
-	ctx_hi->sched.priority = I915_CONTEXT_MAX_USER_PRIORITY;
+	ctx_hi->sched.priority =
+		I915_USER_PRIORITY(I915_CONTEXT_MAX_USER_PRIORITY);
 
 	ctx_lo = kernel_context(i915);
 	if (!ctx_lo)
 		goto err_ctx_hi;
-	ctx_lo->sched.priority = I915_CONTEXT_MIN_USER_PRIORITY;
+	ctx_lo->sched.priority =
+		I915_USER_PRIORITY(I915_CONTEXT_MIN_USER_PRIORITY);
 
 	for_each_engine(engine, i915, id) {
 		struct i915_request *rq;
@@ -417,7 +419,7 @@ static int live_late_preempt(void *arg)
 			goto err_wedged;
 		}
 
-		attr.priority = I915_PRIORITY_MAX;
+		attr.priority = I915_USER_PRIORITY(I915_PRIORITY_MAX);
 		engine->schedule(rq, &attr);
 
 		if (!wait_for_spinner(&spin_hi, rq)) {
-- 
2.19.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 106+ messages in thread

* [PATCH 14/40] drm/i915: Combine multiple internal plists into the same i915_priolist bucket
  2018-09-19 19:55 [PATCH 01/40] drm: Use default dma_fence hooks where possible for null syncobj Chris Wilson
                   ` (11 preceding siblings ...)
  2018-09-19 19:55 ` [PATCH 13/40] drm/i915: Reserve some priority bits for internal use Chris Wilson
@ 2018-09-19 19:55 ` Chris Wilson
  2018-09-24 10:25   ` Tvrtko Ursulin
  2018-09-19 19:55 ` [PATCH 15/40] drm/i915: Priority boost for new clients Chris Wilson
                   ` (29 subsequent siblings)
  42 siblings, 1 reply; 106+ messages in thread
From: Chris Wilson @ 2018-09-19 19:55 UTC (permalink / raw)
  To: intel-gfx

As we are about to allow ourselves to slightly bump the user priority
into a few different sublevels, packthose internal priority lists
into the same i915_priolist to keep the rbtree compact and avoid having
to allocate the default user priority even after the internal bumping.
The downside to having an requests[] rather than a node per active list,
is that we then have to walk over the empty higher priority lists. To
compensate, we track the active buckets and use a small bitmap to skip
over any inactive ones.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/intel_engine_cs.c      |  6 +-
 drivers/gpu/drm/i915/intel_guc_submission.c | 12 ++-
 drivers/gpu/drm/i915/intel_lrc.c            | 87 ++++++++++++++-------
 drivers/gpu/drm/i915/intel_ringbuffer.h     | 13 ++-
 4 files changed, 80 insertions(+), 38 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c
index 217ed3ee1cab..83f2f7774c1f 100644
--- a/drivers/gpu/drm/i915/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/intel_engine_cs.c
@@ -1534,10 +1534,10 @@ void intel_engine_dump(struct intel_engine_cs *engine,
 	count = 0;
 	drm_printf(m, "\t\tQueue priority: %d\n", execlists->queue_priority);
 	for (rb = rb_first_cached(&execlists->queue); rb; rb = rb_next(rb)) {
-		struct i915_priolist *p =
-			rb_entry(rb, typeof(*p), node);
+		struct i915_priolist *p = rb_entry(rb, typeof(*p), node);
+		int i;
 
-		list_for_each_entry(rq, &p->requests, sched.link) {
+		priolist_for_each_request(rq, p, i) {
 			if (count++ < MAX_REQUESTS_TO_SHOW - 1)
 				print_request(m, rq, "\t\tQ ");
 			else
diff --git a/drivers/gpu/drm/i915/intel_guc_submission.c b/drivers/gpu/drm/i915/intel_guc_submission.c
index 6f693ef62c64..8531bd917ec3 100644
--- a/drivers/gpu/drm/i915/intel_guc_submission.c
+++ b/drivers/gpu/drm/i915/intel_guc_submission.c
@@ -726,30 +726,28 @@ static bool __guc_dequeue(struct intel_engine_cs *engine)
 	while ((rb = rb_first_cached(&execlists->queue))) {
 		struct i915_priolist *p = to_priolist(rb);
 		struct i915_request *rq, *rn;
+		int i;
 
-		list_for_each_entry_safe(rq, rn, &p->requests, sched.link) {
+		priolist_for_each_request_consume(rq, rn, p, i) {
 			if (last && rq->hw_context != last->hw_context) {
-				if (port == last_port) {
-					__list_del_many(&p->requests,
-							&rq->sched.link);
+				if (port == last_port)
 					goto done;
-				}
 
 				if (submit)
 					port_assign(port, last);
 				port++;
 			}
 
-			INIT_LIST_HEAD(&rq->sched.link);
+			list_del_init(&rq->sched.link);
 
 			__i915_request_submit(rq);
 			trace_i915_request_in(rq, port_index(port, execlists));
+
 			last = rq;
 			submit = true;
 		}
 
 		rb_erase_cached(&p->node, &execlists->queue);
-		INIT_LIST_HEAD(&p->requests);
 		if (p->priority != I915_PRIORITY_NORMAL)
 			kmem_cache_free(engine->i915->priorities, p);
 	}
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index e8de250c3413..aeae82b5223c 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -259,14 +259,49 @@ intel_lr_context_descriptor_update(struct i915_gem_context *ctx,
 	ce->lrc_desc = desc;
 }
 
-static struct i915_priolist *
+static void assert_priolists(struct intel_engine_execlists * const execlists,
+			     int queue_priority)
+{
+	struct rb_node *rb;
+	int last_prio, i;
+
+	if (!IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
+		return;
+
+	GEM_BUG_ON(rb_first_cached(&execlists->queue) !=
+		   rb_first(&execlists->queue.rb_root));
+
+	last_prio = (queue_priority >> I915_USER_PRIORITY_SHIFT) + 1;
+	for (rb = rb_first_cached(&execlists->queue); rb; rb = rb_next(rb)) {
+		struct i915_priolist *p = to_priolist(rb);
+
+		GEM_BUG_ON(p->priority >= last_prio);
+		last_prio = p->priority;
+
+		GEM_BUG_ON(!p->used);
+		for (i = 0; i < ARRAY_SIZE(p->requests); i++) {
+			if (list_empty(&p->requests[i]))
+				continue;
+
+			GEM_BUG_ON(!(p->used & BIT(i)));
+		}
+	}
+}
+
+static struct list_head *
 lookup_priolist(struct intel_engine_cs *engine, int prio)
 {
 	struct intel_engine_execlists * const execlists = &engine->execlists;
 	struct i915_priolist *p;
 	struct rb_node **parent, *rb;
 	bool first = true;
+	int idx, i;
+
+	assert_priolists(execlists, INT_MAX);
 
+	/* buckets sorted from highest [in slot 0] to lowest priority */
+	idx = I915_PRIORITY_COUNT - (prio & ~I915_PRIORITY_MASK) - 1;
+	prio >>= I915_USER_PRIORITY_SHIFT;
 	if (unlikely(execlists->no_priolist))
 		prio = I915_PRIORITY_NORMAL;
 
@@ -283,7 +318,7 @@ lookup_priolist(struct intel_engine_cs *engine, int prio)
 			parent = &rb->rb_right;
 			first = false;
 		} else {
-			return p;
+			goto out;
 		}
 	}
 
@@ -309,11 +344,15 @@ lookup_priolist(struct intel_engine_cs *engine, int prio)
 	}
 
 	p->priority = prio;
-	INIT_LIST_HEAD(&p->requests);
+	for (i = 0; i < ARRAY_SIZE(p->requests); i++)
+		INIT_LIST_HEAD(&p->requests[i]);
 	rb_link_node(&p->node, rb, parent);
 	rb_insert_color_cached(&p->node, &execlists->queue, first);
+	p->used = 0;
 
-	return p;
+out:
+	p->used |= BIT(idx);
+	return &p->requests[idx];
 }
 
 static void unwind_wa_tail(struct i915_request *rq)
@@ -325,7 +364,7 @@ static void unwind_wa_tail(struct i915_request *rq)
 static void __unwind_incomplete_requests(struct intel_engine_cs *engine)
 {
 	struct i915_request *rq, *rn;
-	struct i915_priolist *uninitialized_var(p);
+	struct list_head *uninitialized_var(pl);
 	int last_prio = I915_PRIORITY_INVALID;
 
 	lockdep_assert_held(&engine->timeline.lock);
@@ -342,12 +381,11 @@ static void __unwind_incomplete_requests(struct intel_engine_cs *engine)
 		GEM_BUG_ON(rq_prio(rq) == I915_PRIORITY_INVALID);
 		if (rq_prio(rq) != last_prio) {
 			last_prio = rq_prio(rq);
-			p = lookup_priolist(engine, last_prio);
+			pl = lookup_priolist(engine, last_prio);
 		}
 		GEM_BUG_ON(RB_EMPTY_ROOT(&engine->execlists.queue.rb_root));
 
-		GEM_BUG_ON(p->priority != rq_prio(rq));
-		list_add(&rq->sched.link, &p->requests);
+		list_add(&rq->sched.link, pl);
 	}
 }
 
@@ -665,8 +703,9 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
 	while ((rb = rb_first_cached(&execlists->queue))) {
 		struct i915_priolist *p = to_priolist(rb);
 		struct i915_request *rq, *rn;
+		int i;
 
-		list_for_each_entry_safe(rq, rn, &p->requests, sched.link) {
+		priolist_for_each_request_consume(rq, rn, p, i) {
 			/*
 			 * Can we combine this request with the current port?
 			 * It has to be the same context/ringbuffer and not
@@ -685,11 +724,8 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
 				 * combine this request with the last, then we
 				 * are done.
 				 */
-				if (port == last_port) {
-					__list_del_many(&p->requests,
-							&rq->sched.link);
+				if (port == last_port)
 					goto done;
-				}
 
 				/*
 				 * If GVT overrides us we only ever submit
@@ -699,11 +735,8 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
 				 * request) to the second port.
 				 */
 				if (ctx_single_port_submission(last->hw_context) ||
-				    ctx_single_port_submission(rq->hw_context)) {
-					__list_del_many(&p->requests,
-							&rq->sched.link);
+				    ctx_single_port_submission(rq->hw_context))
 					goto done;
-				}
 
 				GEM_BUG_ON(last->hw_context == rq->hw_context);
 
@@ -714,15 +747,16 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
 				GEM_BUG_ON(port_isset(port));
 			}
 
-			INIT_LIST_HEAD(&rq->sched.link);
+			list_del_init(&rq->sched.link);
+
 			__i915_request_submit(rq);
 			trace_i915_request_in(rq, port_index(port, execlists));
+
 			last = rq;
 			submit = true;
 		}
 
 		rb_erase_cached(&p->node, &execlists->queue);
-		INIT_LIST_HEAD(&p->requests);
 		if (p->priority != I915_PRIORITY_NORMAL)
 			kmem_cache_free(engine->i915->priorities, p);
 	}
@@ -746,6 +780,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
 	 */
 	execlists->queue_priority =
 		port != execlists->port ? rq_prio(last) : INT_MIN;
+	assert_priolists(execlists, execlists->queue_priority);
 
 	if (submit) {
 		port_assign(port, last);
@@ -857,16 +892,16 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine)
 	/* Flush the queued requests to the timeline list (for retiring). */
 	while ((rb = rb_first_cached(&execlists->queue))) {
 		struct i915_priolist *p = to_priolist(rb);
+		int i;
 
-		list_for_each_entry_safe(rq, rn, &p->requests, sched.link) {
-			INIT_LIST_HEAD(&rq->sched.link);
+		priolist_for_each_request_consume(rq, rn, p, i) {
+			list_del_init(&rq->sched.link);
 
 			dma_fence_set_error(&rq->fence, -EIO);
 			__i915_request_submit(rq);
 		}
 
 		rb_erase_cached(&p->node, &execlists->queue);
-		INIT_LIST_HEAD(&p->requests);
 		if (p->priority != I915_PRIORITY_NORMAL)
 			kmem_cache_free(engine->i915->priorities, p);
 	}
@@ -1072,8 +1107,7 @@ static void queue_request(struct intel_engine_cs *engine,
 			  struct i915_sched_node *node,
 			  int prio)
 {
-	list_add_tail(&node->link,
-		      &lookup_priolist(engine, prio)->requests);
+	list_add_tail(&node->link, lookup_priolist(engine, prio));
 }
 
 static void __update_queue(struct intel_engine_cs *engine, int prio)
@@ -1143,7 +1177,7 @@ sched_lock_engine(struct i915_sched_node *node, struct intel_engine_cs *locked)
 static void execlists_schedule(struct i915_request *request,
 			       const struct i915_sched_attr *attr)
 {
-	struct i915_priolist *uninitialized_var(pl);
+	struct list_head *uninitialized_var(pl);
 	struct intel_engine_cs *engine, *last;
 	struct i915_dependency *dep, *p;
 	struct i915_dependency stack;
@@ -1242,8 +1276,7 @@ static void execlists_schedule(struct i915_request *request,
 				pl = lookup_priolist(engine, prio);
 				last = engine;
 			}
-			GEM_BUG_ON(pl->priority != prio);
-			list_move_tail(&node->link, &pl->requests);
+			list_move_tail(&node->link, pl);
 		} else {
 			/*
 			 * If the request is not in the priolist queue because
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index 2dfa585712c2..1534de5bb852 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -190,11 +190,22 @@ enum intel_engine_id {
 };
 
 struct i915_priolist {
+	struct list_head requests[I915_PRIORITY_COUNT];
 	struct rb_node node;
-	struct list_head requests;
+	unsigned long used;
 	int priority;
 };
 
+#define priolist_for_each_request(it, plist, idx) \
+	for (idx = 0; idx < ARRAY_SIZE((plist)->requests); idx++) \
+		list_for_each_entry(it, &(plist)->requests[idx], sched.link)
+
+#define priolist_for_each_request_consume(it, n, plist, idx) \
+	for (; (idx = ffs((plist)->used)); (plist)->used &= ~BIT(idx - 1)) \
+		list_for_each_entry_safe(it, n, \
+					 &(plist)->requests[idx - 1], \
+					 sched.link)
+
 struct st_preempt_hang {
 	struct completion completion;
 	bool inject_hang;
-- 
2.19.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 106+ messages in thread

* [PATCH 15/40] drm/i915: Priority boost for new clients
  2018-09-19 19:55 [PATCH 01/40] drm: Use default dma_fence hooks where possible for null syncobj Chris Wilson
                   ` (12 preceding siblings ...)
  2018-09-19 19:55 ` [PATCH 14/40] drm/i915: Combine multiple internal plists into the same i915_priolist bucket Chris Wilson
@ 2018-09-19 19:55 ` Chris Wilson
  2018-09-24 10:29   ` Tvrtko Ursulin
  2018-09-19 19:55 ` [PATCH 16/40] drm/i915: Pull scheduling under standalone lock Chris Wilson
                   ` (28 subsequent siblings)
  42 siblings, 1 reply; 106+ messages in thread
From: Chris Wilson @ 2018-09-19 19:55 UTC (permalink / raw)
  To: intel-gfx

Taken from an idea used for FQ_CODEL, we give the first request of a
new request flows a small priority boost. These flows are likely to
correspond with short, interactive tasks and so be more latency sensitive
than the longer free running queues. As soon as the client has more than
one request in the queue, further requests are not boosted and it settles
down into ordinary steady state behaviour.  Such small kicks dramatically
help combat the starvation issue, by allowing each client the opportunity
to run even when the system is under heavy throughput load (within the
constraints of the user selected priority).

v2: Mark the preempted request as the start of a new flow, to prevent a
single client being continually gazumped by its peers.

Testcase: igt/benchmarks/rrul
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
---
 drivers/gpu/drm/i915/i915_request.c   | 16 ++++++++++++++--
 drivers/gpu/drm/i915/i915_scheduler.h |  4 +++-
 drivers/gpu/drm/i915/intel_lrc.c      | 25 +++++++++++++++++++------
 3 files changed, 36 insertions(+), 9 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
index a492385b2089..56140ca054e8 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -1127,8 +1127,20 @@ void i915_request_add(struct i915_request *request)
 	 */
 	local_bh_disable();
 	rcu_read_lock(); /* RCU serialisation for set-wedged protection */
-	if (engine->schedule)
-		engine->schedule(request, &request->gem_context->sched);
+	if (engine->schedule) {
+		struct i915_sched_attr attr = request->gem_context->sched;
+
+		/*
+		 * Boost priorities to new clients (new request flows).
+		 *
+		 * Allow interactive/synchronous clients to jump ahead of
+		 * the bulk clients. (FQ_CODEL)
+		 */
+		if (!prev || i915_request_completed(prev))
+			attr.priority |= I915_PRIORITY_NEWCLIENT;
+
+		engine->schedule(request, &attr);
+	}
 	rcu_read_unlock();
 	i915_sw_fence_commit(&request->submit);
 	local_bh_enable(); /* Kick the execlists tasklet if just scheduled */
diff --git a/drivers/gpu/drm/i915/i915_scheduler.h b/drivers/gpu/drm/i915/i915_scheduler.h
index 7edfad0abfd7..93e43e263d8c 100644
--- a/drivers/gpu/drm/i915/i915_scheduler.h
+++ b/drivers/gpu/drm/i915/i915_scheduler.h
@@ -19,12 +19,14 @@ enum {
 	I915_PRIORITY_INVALID = INT_MIN
 };
 
-#define I915_USER_PRIORITY_SHIFT 0
+#define I915_USER_PRIORITY_SHIFT 1
 #define I915_USER_PRIORITY(x) ((x) << I915_USER_PRIORITY_SHIFT)
 
 #define I915_PRIORITY_COUNT BIT(I915_USER_PRIORITY_SHIFT)
 #define I915_PRIORITY_MASK (-I915_PRIORITY_COUNT)
 
+#define I915_PRIORITY_NEWCLIENT	((u8)BIT(0))
+
 struct i915_sched_attr {
 	/**
 	 * @priority: execution and service priority
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index aeae82b5223c..ee9a656e549c 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -363,9 +363,9 @@ static void unwind_wa_tail(struct i915_request *rq)
 
 static void __unwind_incomplete_requests(struct intel_engine_cs *engine)
 {
-	struct i915_request *rq, *rn;
+	struct i915_request *rq, *rn, *active = NULL;
 	struct list_head *uninitialized_var(pl);
-	int last_prio = I915_PRIORITY_INVALID;
+	int prio = I915_PRIORITY_INVALID | I915_PRIORITY_NEWCLIENT;
 
 	lockdep_assert_held(&engine->timeline.lock);
 
@@ -373,19 +373,32 @@ static void __unwind_incomplete_requests(struct intel_engine_cs *engine)
 					 &engine->timeline.requests,
 					 link) {
 		if (i915_request_completed(rq))
-			return;
+			break;
 
 		__i915_request_unsubmit(rq);
 		unwind_wa_tail(rq);
 
 		GEM_BUG_ON(rq_prio(rq) == I915_PRIORITY_INVALID);
-		if (rq_prio(rq) != last_prio) {
-			last_prio = rq_prio(rq);
-			pl = lookup_priolist(engine, last_prio);
+		if (rq_prio(rq) != prio) {
+			prio = rq_prio(rq);
+			pl = lookup_priolist(engine, prio);
 		}
 		GEM_BUG_ON(RB_EMPTY_ROOT(&engine->execlists.queue.rb_root));
 
 		list_add(&rq->sched.link, pl);
+
+		active = rq;
+	}
+
+	/*
+	 * The active request is now effectively the start of a new client
+	 * stream, so give it the equivalent small priority bump to prevent
+	 * it being gazumped a second time by another peer.
+	 */
+	if (!(prio & I915_PRIORITY_NEWCLIENT)) {
+		prio |= I915_PRIORITY_NEWCLIENT;
+		list_move_tail(&active->sched.link,
+			       lookup_priolist(engine, prio));
 	}
 }
 
-- 
2.19.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 106+ messages in thread

* [PATCH 16/40] drm/i915: Pull scheduling under standalone lock
  2018-09-19 19:55 [PATCH 01/40] drm: Use default dma_fence hooks where possible for null syncobj Chris Wilson
                   ` (13 preceding siblings ...)
  2018-09-19 19:55 ` [PATCH 15/40] drm/i915: Priority boost for new clients Chris Wilson
@ 2018-09-19 19:55 ` Chris Wilson
  2018-09-24 11:19   ` Tvrtko Ursulin
  2018-09-19 19:55 ` [PATCH 17/40] drm/i915: Priority boost for waiting clients Chris Wilson
                   ` (27 subsequent siblings)
  42 siblings, 1 reply; 106+ messages in thread
From: Chris Wilson @ 2018-09-19 19:55 UTC (permalink / raw)
  To: intel-gfx

Currently, the backend scheduling code abuses struct_mutex into order to
have a global lock to manipulate a temporary list (without widespread
allocation) and to protect against list modifications. This is an
extraneous coupling to struct_mutex and further can not extend beyond
the local device.

Pull all the code that needs to be under the one true lock into
i915_scheduler.c, and make it so.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/Makefile           |   1 +
 drivers/gpu/drm/i915/i915_request.c     |  85 ------
 drivers/gpu/drm/i915/i915_request.h     |   8 -
 drivers/gpu/drm/i915/i915_scheduler.c   | 377 ++++++++++++++++++++++++
 drivers/gpu/drm/i915/i915_scheduler.h   |  25 ++
 drivers/gpu/drm/i915/intel_display.c    |   3 +-
 drivers/gpu/drm/i915/intel_lrc.c        | 268 +----------------
 drivers/gpu/drm/i915/intel_ringbuffer.h |   5 +-
 8 files changed, 411 insertions(+), 361 deletions(-)
 create mode 100644 drivers/gpu/drm/i915/i915_scheduler.c

diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
index 5794f102f9b8..ef1480c14e4e 100644
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -75,6 +75,7 @@ i915-y += i915_cmd_parser.o \
 	  i915_gemfs.o \
 	  i915_query.o \
 	  i915_request.o \
+	  i915_scheduler.o \
 	  i915_timeline.o \
 	  i915_trace_points.o \
 	  i915_vma.o \
diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
index 56140ca054e8..d73ad490a261 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -111,91 +111,6 @@ i915_request_remove_from_client(struct i915_request *request)
 	spin_unlock(&file_priv->mm.lock);
 }
 
-static struct i915_dependency *
-i915_dependency_alloc(struct drm_i915_private *i915)
-{
-	return kmem_cache_alloc(i915->dependencies, GFP_KERNEL);
-}
-
-static void
-i915_dependency_free(struct drm_i915_private *i915,
-		     struct i915_dependency *dep)
-{
-	kmem_cache_free(i915->dependencies, dep);
-}
-
-static void
-__i915_sched_node_add_dependency(struct i915_sched_node *node,
-				 struct i915_sched_node *signal,
-				 struct i915_dependency *dep,
-				 unsigned long flags)
-{
-	INIT_LIST_HEAD(&dep->dfs_link);
-	list_add(&dep->wait_link, &signal->waiters_list);
-	list_add(&dep->signal_link, &node->signalers_list);
-	dep->signaler = signal;
-	dep->flags = flags;
-}
-
-static int
-i915_sched_node_add_dependency(struct drm_i915_private *i915,
-			       struct i915_sched_node *node,
-			       struct i915_sched_node *signal)
-{
-	struct i915_dependency *dep;
-
-	dep = i915_dependency_alloc(i915);
-	if (!dep)
-		return -ENOMEM;
-
-	__i915_sched_node_add_dependency(node, signal, dep,
-					 I915_DEPENDENCY_ALLOC);
-	return 0;
-}
-
-static void
-i915_sched_node_fini(struct drm_i915_private *i915,
-		     struct i915_sched_node *node)
-{
-	struct i915_dependency *dep, *tmp;
-
-	GEM_BUG_ON(!list_empty(&node->link));
-
-	/*
-	 * Everyone we depended upon (the fences we wait to be signaled)
-	 * should retire before us and remove themselves from our list.
-	 * However, retirement is run independently on each timeline and
-	 * so we may be called out-of-order.
-	 */
-	list_for_each_entry_safe(dep, tmp, &node->signalers_list, signal_link) {
-		GEM_BUG_ON(!i915_sched_node_signaled(dep->signaler));
-		GEM_BUG_ON(!list_empty(&dep->dfs_link));
-
-		list_del(&dep->wait_link);
-		if (dep->flags & I915_DEPENDENCY_ALLOC)
-			i915_dependency_free(i915, dep);
-	}
-
-	/* Remove ourselves from everyone who depends upon us */
-	list_for_each_entry_safe(dep, tmp, &node->waiters_list, wait_link) {
-		GEM_BUG_ON(dep->signaler != node);
-		GEM_BUG_ON(!list_empty(&dep->dfs_link));
-
-		list_del(&dep->signal_link);
-		if (dep->flags & I915_DEPENDENCY_ALLOC)
-			i915_dependency_free(i915, dep);
-	}
-}
-
-static void
-i915_sched_node_init(struct i915_sched_node *node)
-{
-	INIT_LIST_HEAD(&node->signalers_list);
-	INIT_LIST_HEAD(&node->waiters_list);
-	INIT_LIST_HEAD(&node->link);
-	node->attr.priority = I915_PRIORITY_INVALID;
-}
-
 static int reset_all_global_seqno(struct drm_i915_private *i915, u32 seqno)
 {
 	struct intel_engine_cs *engine;
diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h
index 7fa94b024968..5f7361e0fca6 100644
--- a/drivers/gpu/drm/i915/i915_request.h
+++ b/drivers/gpu/drm/i915/i915_request.h
@@ -332,14 +332,6 @@ static inline bool i915_request_completed(const struct i915_request *rq)
 	return __i915_request_completed(rq, seqno);
 }
 
-static inline bool i915_sched_node_signaled(const struct i915_sched_node *node)
-{
-	const struct i915_request *rq =
-		container_of(node, const struct i915_request, sched);
-
-	return i915_request_completed(rq);
-}
-
 void i915_retire_requests(struct drm_i915_private *i915);
 
 /*
diff --git a/drivers/gpu/drm/i915/i915_scheduler.c b/drivers/gpu/drm/i915/i915_scheduler.c
new file mode 100644
index 000000000000..910ac7089596
--- /dev/null
+++ b/drivers/gpu/drm/i915/i915_scheduler.c
@@ -0,0 +1,377 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2018 Intel Corporation
+ */
+
+#include <linux/mutex.h>
+
+#include "i915_drv.h"
+#include "i915_request.h"
+#include "i915_scheduler.h"
+
+static DEFINE_SPINLOCK(schedule_lock);
+
+static const struct i915_request *
+node_to_request(const struct i915_sched_node *node)
+{
+	return container_of(node, const struct i915_request, sched);
+}
+
+static inline bool node_signaled(const struct i915_sched_node *node)
+{
+	return i915_request_completed(node_to_request(node));
+}
+
+void i915_sched_node_init(struct i915_sched_node *node)
+{
+	INIT_LIST_HEAD(&node->signalers_list);
+	INIT_LIST_HEAD(&node->waiters_list);
+	INIT_LIST_HEAD(&node->link);
+	node->attr.priority = I915_PRIORITY_INVALID;
+}
+
+static struct i915_dependency *
+i915_dependency_alloc(struct drm_i915_private *i915)
+{
+	return kmem_cache_alloc(i915->dependencies, GFP_KERNEL);
+}
+
+static void
+i915_dependency_free(struct drm_i915_private *i915,
+		     struct i915_dependency *dep)
+{
+	kmem_cache_free(i915->dependencies, dep);
+}
+
+bool __i915_sched_node_add_dependency(struct i915_sched_node *node,
+				      struct i915_sched_node *signal,
+				      struct i915_dependency *dep,
+				      unsigned long flags)
+{
+	bool ret = false;
+
+	spin_lock(&schedule_lock);
+
+	if (!node_signaled(signal)) {
+		INIT_LIST_HEAD(&dep->dfs_link);
+		list_add(&dep->wait_link, &signal->waiters_list);
+		list_add(&dep->signal_link, &node->signalers_list);
+		dep->signaler = signal;
+		dep->flags = flags;
+
+		ret = true;
+	}
+
+	spin_unlock(&schedule_lock);
+
+	return ret;
+}
+
+int i915_sched_node_add_dependency(struct drm_i915_private *i915,
+				   struct i915_sched_node *node,
+				   struct i915_sched_node *signal)
+{
+	struct i915_dependency *dep;
+
+	dep = i915_dependency_alloc(i915);
+	if (!dep)
+		return -ENOMEM;
+
+	if (!__i915_sched_node_add_dependency(node, signal, dep,
+					      I915_DEPENDENCY_ALLOC))
+		i915_dependency_free(i915, dep);
+
+	return 0;
+}
+
+void i915_sched_node_fini(struct drm_i915_private *i915,
+			  struct i915_sched_node *node)
+{
+	struct i915_dependency *dep, *tmp;
+
+	GEM_BUG_ON(!list_empty(&node->link));
+
+	spin_lock(&schedule_lock);
+
+	/*
+	 * Everyone we depended upon (the fences we wait to be signaled)
+	 * should retire before us and remove themselves from our list.
+	 * However, retirement is run independently on each timeline and
+	 * so we may be called out-of-order.
+	 */
+	list_for_each_entry_safe(dep, tmp, &node->signalers_list, signal_link) {
+		GEM_BUG_ON(!node_signaled(dep->signaler));
+		GEM_BUG_ON(!list_empty(&dep->dfs_link));
+
+		list_del(&dep->wait_link);
+		if (dep->flags & I915_DEPENDENCY_ALLOC)
+			i915_dependency_free(i915, dep);
+	}
+
+	/* Remove ourselves from everyone who depends upon us */
+	list_for_each_entry_safe(dep, tmp, &node->waiters_list, wait_link) {
+		GEM_BUG_ON(dep->signaler != node);
+		GEM_BUG_ON(!list_empty(&dep->dfs_link));
+
+		list_del(&dep->signal_link);
+		if (dep->flags & I915_DEPENDENCY_ALLOC)
+			i915_dependency_free(i915, dep);
+	}
+
+	spin_unlock(&schedule_lock);
+}
+
+static inline struct i915_priolist *to_priolist(struct rb_node *rb)
+{
+	return rb_entry(rb, struct i915_priolist, node);
+}
+
+static void assert_priolists(struct intel_engine_execlists * const execlists,
+			     int queue_priority)
+{
+	struct rb_node *rb;
+	int last_prio, i;
+
+	if (!IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
+		return;
+
+	GEM_BUG_ON(rb_first_cached(&execlists->queue) !=
+		   rb_first(&execlists->queue.rb_root));
+
+	last_prio = (queue_priority >> I915_USER_PRIORITY_SHIFT) + 1;
+	for (rb = rb_first_cached(&execlists->queue); rb; rb = rb_next(rb)) {
+		const struct i915_priolist *p = to_priolist(rb);
+
+		GEM_BUG_ON(p->priority >= last_prio);
+		last_prio = p->priority;
+
+		GEM_BUG_ON(!p->used);
+		for (i = 0; i < ARRAY_SIZE(p->requests); i++) {
+			if (list_empty(&p->requests[i]))
+				continue;
+
+			GEM_BUG_ON(!(p->used & BIT(i)));
+		}
+	}
+}
+
+struct list_head *
+i915_sched_lookup_priolist(struct intel_engine_cs *engine, int prio)
+{
+	struct intel_engine_execlists * const execlists = &engine->execlists;
+	struct i915_priolist *p;
+	struct rb_node **parent, *rb;
+	bool first = true;
+	int idx, i;
+
+	lockdep_assert_held(&engine->timeline.lock);
+	assert_priolists(execlists, INT_MAX);
+
+	/* buckets sorted from highest [in slot 0] to lowest priority */
+	idx = I915_PRIORITY_COUNT - (prio & ~I915_PRIORITY_MASK) - 1;
+	prio >>= I915_USER_PRIORITY_SHIFT;
+	if (unlikely(execlists->no_priolist))
+		prio = I915_PRIORITY_NORMAL;
+
+find_priolist:
+	/* most positive priority is scheduled first, equal priorities fifo */
+	rb = NULL;
+	parent = &execlists->queue.rb_root.rb_node;
+	while (*parent) {
+		rb = *parent;
+		p = to_priolist(rb);
+		if (prio > p->priority) {
+			parent = &rb->rb_left;
+		} else if (prio < p->priority) {
+			parent = &rb->rb_right;
+			first = false;
+		} else {
+			goto out;
+		}
+	}
+
+	if (prio == I915_PRIORITY_NORMAL) {
+		p = &execlists->default_priolist;
+	} else {
+		p = kmem_cache_alloc(engine->i915->priorities, GFP_ATOMIC);
+		/* Convert an allocation failure to a priority bump */
+		if (unlikely(!p)) {
+			prio = I915_PRIORITY_NORMAL; /* recurses just once */
+
+			/* To maintain ordering with all rendering, after an
+			 * allocation failure we have to disable all scheduling.
+			 * Requests will then be executed in fifo, and schedule
+			 * will ensure that dependencies are emitted in fifo.
+			 * There will be still some reordering with existing
+			 * requests, so if userspace lied about their
+			 * dependencies that reordering may be visible.
+			 */
+			execlists->no_priolist = true;
+			goto find_priolist;
+		}
+	}
+
+	p->priority = prio;
+	for (i = 0; i < ARRAY_SIZE(p->requests); i++)
+		INIT_LIST_HEAD(&p->requests[i]);
+	rb_link_node(&p->node, rb, parent);
+	rb_insert_color_cached(&p->node, &execlists->queue, first);
+	p->used = 0;
+
+out:
+	p->used |= BIT(idx);
+	return &p->requests[idx];
+}
+
+static struct intel_engine_cs *
+sched_lock_engine(struct i915_sched_node *node, struct intel_engine_cs *locked)
+{
+	struct intel_engine_cs *engine = node_to_request(node)->engine;
+
+	GEM_BUG_ON(!locked);
+
+	if (engine != locked) {
+		spin_unlock(&locked->timeline.lock);
+		spin_lock(&engine->timeline.lock);
+	}
+
+	return engine;
+}
+
+void i915_schedule(struct i915_request *rq, const struct i915_sched_attr *attr)
+{
+	struct list_head *uninitialized_var(pl);
+	struct intel_engine_cs *engine, *last;
+	struct i915_dependency *dep, *p;
+	struct i915_dependency stack;
+	const int prio = attr->priority;
+	LIST_HEAD(dfs);
+
+	GEM_BUG_ON(prio == I915_PRIORITY_INVALID);
+
+	if (i915_request_completed(rq))
+		return;
+
+	if (prio <= READ_ONCE(rq->sched.attr.priority))
+		return;
+
+	/* Needed in order to use the temporary link inside i915_dependency */
+	spin_lock(&schedule_lock);
+
+	stack.signaler = &rq->sched;
+	list_add(&stack.dfs_link, &dfs);
+
+	/*
+	 * Recursively bump all dependent priorities to match the new request.
+	 *
+	 * A naive approach would be to use recursion:
+	 * static void update_priorities(struct i915_sched_node *node, prio) {
+	 *	list_for_each_entry(dep, &node->signalers_list, signal_link)
+	 *		update_priorities(dep->signal, prio)
+	 *	queue_request(node);
+	 * }
+	 * but that may have unlimited recursion depth and so runs a very
+	 * real risk of overunning the kernel stack. Instead, we build
+	 * a flat list of all dependencies starting with the current request.
+	 * As we walk the list of dependencies, we add all of its dependencies
+	 * to the end of the list (this may include an already visited
+	 * request) and continue to walk onwards onto the new dependencies. The
+	 * end result is a topological list of requests in reverse order, the
+	 * last element in the list is the request we must execute first.
+	 */
+	list_for_each_entry(dep, &dfs, dfs_link) {
+		struct i915_sched_node *node = dep->signaler;
+
+		/*
+		 * Within an engine, there can be no cycle, but we may
+		 * refer to the same dependency chain multiple times
+		 * (redundant dependencies are not eliminated) and across
+		 * engines.
+		 */
+		list_for_each_entry(p, &node->signalers_list, signal_link) {
+			GEM_BUG_ON(p == dep); /* no cycles! */
+
+			if (node_signaled(p->signaler))
+				continue;
+
+			GEM_BUG_ON(p->signaler->attr.priority < node->attr.priority);
+			if (prio > READ_ONCE(p->signaler->attr.priority))
+				list_move_tail(&p->dfs_link, &dfs);
+		}
+	}
+
+	/*
+	 * If we didn't need to bump any existing priorities, and we haven't
+	 * yet submitted this request (i.e. there is no potential race with
+	 * execlists_submit_request()), we can set our own priority and skip
+	 * acquiring the engine locks.
+	 */
+	if (rq->sched.attr.priority == I915_PRIORITY_INVALID) {
+		GEM_BUG_ON(!list_empty(&rq->sched.link));
+		rq->sched.attr = *attr;
+
+		if (stack.dfs_link.next == stack.dfs_link.prev)
+			goto out_unlock;
+
+		__list_del_entry(&stack.dfs_link);
+	}
+
+	last = NULL;
+	engine = rq->engine;
+	spin_lock_irq(&engine->timeline.lock);
+
+	/* Fifo and depth-first replacement ensure our deps execute before us */
+	list_for_each_entry_safe_reverse(dep, p, &dfs, dfs_link) {
+		struct i915_sched_node *node = dep->signaler;
+
+		INIT_LIST_HEAD(&dep->dfs_link);
+
+		engine = sched_lock_engine(node, engine);
+
+		/* Recheck after acquiring the engine->timeline.lock */
+		if (prio <= node->attr.priority || node_signaled(node))
+			continue;
+
+		node->attr.priority = prio;
+		if (!list_empty(&node->link)) {
+			if (last != engine) {
+				pl = i915_sched_lookup_priolist(engine, prio);
+				last = engine;
+			}
+			list_move_tail(&node->link, pl);
+		} else {
+			/*
+			 * If the request is not in the priolist queue because
+			 * it is not yet runnable, then it doesn't contribute
+			 * to our preemption decisions. On the other hand,
+			 * if the request is on the HW, it too is not in the
+			 * queue; but in that case we may still need to reorder
+			 * the inflight requests.
+			 */
+			if (!i915_sw_fence_done(&node_to_request(node)->submit))
+				continue;
+		}
+
+		if (prio <= engine->execlists.queue_priority)
+			continue;
+
+		/*
+		 * If we are already the currently executing context, don't
+		 * bother evaluating if we should preempt ourselves.
+		 */
+		if (node_to_request(node)->global_seqno &&
+		    i915_seqno_passed(port_request(engine->execlists.port)->global_seqno,
+				      node_to_request(node)->global_seqno))
+			continue;
+
+		/* Defer (tasklet) submission until after all of our updates. */
+		engine->execlists.queue_priority = prio;
+		tasklet_hi_schedule(&engine->execlists.tasklet);
+	}
+
+	spin_unlock_irq(&engine->timeline.lock);
+
+out_unlock:
+	spin_unlock(&schedule_lock);
+}
diff --git a/drivers/gpu/drm/i915/i915_scheduler.h b/drivers/gpu/drm/i915/i915_scheduler.h
index 93e43e263d8c..8058c17ae96a 100644
--- a/drivers/gpu/drm/i915/i915_scheduler.h
+++ b/drivers/gpu/drm/i915/i915_scheduler.h
@@ -8,9 +8,14 @@
 #define _I915_SCHEDULER_H_
 
 #include <linux/bitops.h>
+#include <linux/kernel.h>
 
 #include <uapi/drm/i915_drm.h>
 
+struct drm_i915_private;
+struct i915_request;
+struct intel_engine_cs;
+
 enum {
 	I915_PRIORITY_MIN = I915_CONTEXT_MIN_USER_PRIORITY - 1,
 	I915_PRIORITY_NORMAL = I915_CONTEXT_DEFAULT_PRIORITY,
@@ -77,4 +82,24 @@ struct i915_dependency {
 #define I915_DEPENDENCY_ALLOC BIT(0)
 };
 
+void i915_sched_node_init(struct i915_sched_node *node);
+
+bool __i915_sched_node_add_dependency(struct i915_sched_node *node,
+				      struct i915_sched_node *signal,
+				      struct i915_dependency *dep,
+				      unsigned long flags);
+
+int i915_sched_node_add_dependency(struct drm_i915_private *i915,
+				   struct i915_sched_node *node,
+				   struct i915_sched_node *signal);
+
+void i915_sched_node_fini(struct drm_i915_private *i915,
+			  struct i915_sched_node *node);
+
+void i915_schedule(struct i915_request *request,
+		   const struct i915_sched_attr *attr);
+
+struct list_head *
+i915_sched_lookup_priolist(struct intel_engine_cs *engine, int prio);
+
 #endif /* _I915_SCHEDULER_H_ */
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index fbcc56caffb6..31a3fdf8f051 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -13169,13 +13169,12 @@ intel_prepare_plane_fb(struct drm_plane *plane,
 
 	ret = intel_plane_pin_fb(to_intel_plane_state(new_state));
 
-	fb_obj_bump_render_priority(obj);
-
 	mutex_unlock(&dev_priv->drm.struct_mutex);
 	i915_gem_object_unpin_pages(obj);
 	if (ret)
 		return ret;
 
+	fb_obj_bump_render_priority(obj);
 	intel_fb_obj_flush(obj, ORIGIN_DIRTYFB);
 
 	if (!new_state->fence) { /* implicit fencing */
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index ee9a656e549c..74be9a49ef9e 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -259,102 +259,6 @@ intel_lr_context_descriptor_update(struct i915_gem_context *ctx,
 	ce->lrc_desc = desc;
 }
 
-static void assert_priolists(struct intel_engine_execlists * const execlists,
-			     int queue_priority)
-{
-	struct rb_node *rb;
-	int last_prio, i;
-
-	if (!IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
-		return;
-
-	GEM_BUG_ON(rb_first_cached(&execlists->queue) !=
-		   rb_first(&execlists->queue.rb_root));
-
-	last_prio = (queue_priority >> I915_USER_PRIORITY_SHIFT) + 1;
-	for (rb = rb_first_cached(&execlists->queue); rb; rb = rb_next(rb)) {
-		struct i915_priolist *p = to_priolist(rb);
-
-		GEM_BUG_ON(p->priority >= last_prio);
-		last_prio = p->priority;
-
-		GEM_BUG_ON(!p->used);
-		for (i = 0; i < ARRAY_SIZE(p->requests); i++) {
-			if (list_empty(&p->requests[i]))
-				continue;
-
-			GEM_BUG_ON(!(p->used & BIT(i)));
-		}
-	}
-}
-
-static struct list_head *
-lookup_priolist(struct intel_engine_cs *engine, int prio)
-{
-	struct intel_engine_execlists * const execlists = &engine->execlists;
-	struct i915_priolist *p;
-	struct rb_node **parent, *rb;
-	bool first = true;
-	int idx, i;
-
-	assert_priolists(execlists, INT_MAX);
-
-	/* buckets sorted from highest [in slot 0] to lowest priority */
-	idx = I915_PRIORITY_COUNT - (prio & ~I915_PRIORITY_MASK) - 1;
-	prio >>= I915_USER_PRIORITY_SHIFT;
-	if (unlikely(execlists->no_priolist))
-		prio = I915_PRIORITY_NORMAL;
-
-find_priolist:
-	/* most positive priority is scheduled first, equal priorities fifo */
-	rb = NULL;
-	parent = &execlists->queue.rb_root.rb_node;
-	while (*parent) {
-		rb = *parent;
-		p = to_priolist(rb);
-		if (prio > p->priority) {
-			parent = &rb->rb_left;
-		} else if (prio < p->priority) {
-			parent = &rb->rb_right;
-			first = false;
-		} else {
-			goto out;
-		}
-	}
-
-	if (prio == I915_PRIORITY_NORMAL) {
-		p = &execlists->default_priolist;
-	} else {
-		p = kmem_cache_alloc(engine->i915->priorities, GFP_ATOMIC);
-		/* Convert an allocation failure to a priority bump */
-		if (unlikely(!p)) {
-			prio = I915_PRIORITY_NORMAL; /* recurses just once */
-
-			/* To maintain ordering with all rendering, after an
-			 * allocation failure we have to disable all scheduling.
-			 * Requests will then be executed in fifo, and schedule
-			 * will ensure that dependencies are emitted in fifo.
-			 * There will be still some reordering with existing
-			 * requests, so if userspace lied about their
-			 * dependencies that reordering may be visible.
-			 */
-			execlists->no_priolist = true;
-			goto find_priolist;
-		}
-	}
-
-	p->priority = prio;
-	for (i = 0; i < ARRAY_SIZE(p->requests); i++)
-		INIT_LIST_HEAD(&p->requests[i]);
-	rb_link_node(&p->node, rb, parent);
-	rb_insert_color_cached(&p->node, &execlists->queue, first);
-	p->used = 0;
-
-out:
-	p->used |= BIT(idx);
-	return &p->requests[idx];
-}
-
 static void unwind_wa_tail(struct i915_request *rq)
 {
 	rq->tail = intel_ring_wrap(rq->ring, rq->wa_tail - WA_TAIL_BYTES);
@@ -381,7 +285,7 @@ static void __unwind_incomplete_requests(struct intel_engine_cs *engine)
 		GEM_BUG_ON(rq_prio(rq) == I915_PRIORITY_INVALID);
 		if (rq_prio(rq) != prio) {
 			prio = rq_prio(rq);
-			pl = lookup_priolist(engine, prio);
+			pl = i915_sched_lookup_priolist(engine, prio);
 		}
 		GEM_BUG_ON(RB_EMPTY_ROOT(&engine->execlists.queue.rb_root));
 
@@ -398,7 +302,7 @@ static void __unwind_incomplete_requests(struct intel_engine_cs *engine)
 	if (!(prio & I915_PRIORITY_NEWCLIENT)) {
 		prio |= I915_PRIORITY_NEWCLIENT;
 		list_move_tail(&active->sched.link,
-			       lookup_priolist(engine, prio));
+			       i915_sched_lookup_priolist(engine, prio));
 	}
 }
 
@@ -793,7 +697,6 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
 	 */
 	execlists->queue_priority =
 		port != execlists->port ? rq_prio(last) : INT_MIN;
-	assert_priolists(execlists, execlists->queue_priority);
 
 	if (submit) {
 		port_assign(port, last);
@@ -1120,12 +1023,7 @@ static void queue_request(struct intel_engine_cs *engine,
 			  struct i915_sched_node *node,
 			  int prio)
 {
-	list_add_tail(&node->link, lookup_priolist(engine, prio));
-}
-
-static void __update_queue(struct intel_engine_cs *engine, int prio)
-{
-	engine->execlists.queue_priority = prio;
+	list_add_tail(&node->link, i915_sched_lookup_priolist(engine, prio));
 }
 
 static void __submit_queue_imm(struct intel_engine_cs *engine)
@@ -1144,7 +1042,7 @@ static void __submit_queue_imm(struct intel_engine_cs *engine)
 static void submit_queue(struct intel_engine_cs *engine, int prio)
 {
 	if (prio > engine->execlists.queue_priority) {
-		__update_queue(engine, prio);
+		engine->execlists.queue_priority = prio;
 		__submit_queue_imm(engine);
 	}
 }
@@ -1167,162 +1065,6 @@ static void execlists_submit_request(struct i915_request *request)
 	spin_unlock_irqrestore(&engine->timeline.lock, flags);
 }
 
-static struct i915_request *sched_to_request(struct i915_sched_node *node)
-{
-	return container_of(node, struct i915_request, sched);
-}
-
-static struct intel_engine_cs *
-sched_lock_engine(struct i915_sched_node *node, struct intel_engine_cs *locked)
-{
-	struct intel_engine_cs *engine = sched_to_request(node)->engine;
-
-	GEM_BUG_ON(!locked);
-
-	if (engine != locked) {
-		spin_unlock(&locked->timeline.lock);
-		spin_lock(&engine->timeline.lock);
-	}
-
-	return engine;
-}
-
-static void execlists_schedule(struct i915_request *request,
-			       const struct i915_sched_attr *attr)
-{
-	struct list_head *uninitialized_var(pl);
-	struct intel_engine_cs *engine, *last;
-	struct i915_dependency *dep, *p;
-	struct i915_dependency stack;
-	const int prio = attr->priority;
-	LIST_HEAD(dfs);
-
-	GEM_BUG_ON(prio == I915_PRIORITY_INVALID);
-
-	if (i915_request_completed(request))
-		return;
-
-	if (prio <= READ_ONCE(request->sched.attr.priority))
-		return;
-
-	/* Need BKL in order to use the temporary link inside i915_dependency */
-	lockdep_assert_held(&request->i915->drm.struct_mutex);
-
-	stack.signaler = &request->sched;
-	list_add(&stack.dfs_link, &dfs);
-
-	/*
-	 * Recursively bump all dependent priorities to match the new request.
-	 *
-	 * A naive approach would be to use recursion:
-	 * static void update_priorities(struct i915_sched_node *node, prio) {
-	 *	list_for_each_entry(dep, &node->signalers_list, signal_link)
-	 *		update_priorities(dep->signal, prio)
-	 *	queue_request(node);
-	 * }
-	 * but that may have unlimited recursion depth and so runs a very
-	 * real risk of overunning the kernel stack. Instead, we build
-	 * a flat list of all dependencies starting with the current request.
-	 * As we walk the list of dependencies, we add all of its dependencies
-	 * to the end of the list (this may include an already visited
-	 * request) and continue to walk onwards onto the new dependencies. The
-	 * end result is a topological list of requests in reverse order, the
-	 * last element in the list is the request we must execute first.
-	 */
-	list_for_each_entry(dep, &dfs, dfs_link) {
-		struct i915_sched_node *node = dep->signaler;
-
-		/*
-		 * Within an engine, there can be no cycle, but we may
-		 * refer to the same dependency chain multiple times
-		 * (redundant dependencies are not eliminated) and across
-		 * engines.
-		 */
-		list_for_each_entry(p, &node->signalers_list, signal_link) {
-			GEM_BUG_ON(p == dep); /* no cycles! */
-
-			if (i915_sched_node_signaled(p->signaler))
-				continue;
-
-			GEM_BUG_ON(p->signaler->attr.priority < node->attr.priority);
-			if (prio > READ_ONCE(p->signaler->attr.priority))
-				list_move_tail(&p->dfs_link, &dfs);
-		}
-	}
-
-	/*
-	 * If we didn't need to bump any existing priorities, and we haven't
-	 * yet submitted this request (i.e. there is no potential race with
-	 * execlists_submit_request()), we can set our own priority and skip
-	 * acquiring the engine locks.
-	 */
-	if (request->sched.attr.priority == I915_PRIORITY_INVALID) {
-		GEM_BUG_ON(!list_empty(&request->sched.link));
-		request->sched.attr = *attr;
-		if (stack.dfs_link.next == stack.dfs_link.prev)
-			return;
-		__list_del_entry(&stack.dfs_link);
-	}
-
-	last = NULL;
-	engine = request->engine;
-	spin_lock_irq(&engine->timeline.lock);
-
-	/* Fifo and depth-first replacement ensure our deps execute before us */
-	list_for_each_entry_safe_reverse(dep, p, &dfs, dfs_link) {
-		struct i915_sched_node *node = dep->signaler;
-
-		INIT_LIST_HEAD(&dep->dfs_link);
-
-		engine = sched_lock_engine(node, engine);
-
-		/* Recheck after acquiring the engine->timeline.lock */
-		if (prio <= node->attr.priority)
-			continue;
-
-		if (i915_sched_node_signaled(node))
-			continue;
-
-		node->attr.priority = prio;
-		if (!list_empty(&node->link)) {
-			if (last != engine) {
-				pl = lookup_priolist(engine, prio);
-				last = engine;
-			}
-			list_move_tail(&node->link, pl);
-		} else {
-			/*
-			 * If the request is not in the priolist queue because
-			 * it is not yet runnable, then it doesn't contribute
-			 * to our preemption decisions. On the other hand,
-			 * if the request is on the HW, it too is not in the
-			 * queue; but in that case we may still need to reorder
-			 * the inflight requests.
-			 */
-			if (!i915_sw_fence_done(&sched_to_request(node)->submit))
-				continue;
-		}
-
-		if (prio <= engine->execlists.queue_priority)
-			continue;
-
-		/*
-		 * If we are already the currently executing context, don't
-		 * bother evaluating if we should preempt ourselves.
-		 */
-		if (sched_to_request(node)->global_seqno &&
-		    i915_seqno_passed(port_request(engine->execlists.port)->global_seqno,
-				      sched_to_request(node)->global_seqno))
-			continue;
-
-		/* Defer (tasklet) submission until after all of our updates. */
-		__update_queue(engine, prio);
-		tasklet_hi_schedule(&engine->execlists.tasklet);
-	}
-
-	spin_unlock_irq(&engine->timeline.lock);
-}
-
 static void execlists_context_destroy(struct intel_context *ce)
 {
 	GEM_BUG_ON(ce->pin_count);
@@ -2360,7 +2102,7 @@ void intel_execlists_set_default_submission(struct intel_engine_cs *engine)
 {
 	engine->submit_request = execlists_submit_request;
 	engine->cancel_requests = execlists_cancel_requests;
-	engine->schedule = execlists_schedule;
+	engine->schedule = i915_schedule;
 	engine->execlists.tasklet.func = execlists_submission_tasklet;
 
 	engine->reset.prepare = execlists_reset_prepare;
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index 1534de5bb852..f6ec48a75a69 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -498,11 +498,10 @@ struct intel_engine_cs {
 	 */
 	void		(*submit_request)(struct i915_request *rq);
 
-	/* Call when the priority on a request has changed and it and its
+	/*
+	 * Call when the priority on a request has changed and it and its
 	 * dependencies may need rescheduling. Note the request itself may
 	 * not be ready to run!
-	 *
-	 * Called under the struct_mutex.
 	 */
 	void		(*schedule)(struct i915_request *request,
 				    const struct i915_sched_attr *attr);
-- 
2.19.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 106+ messages in thread

* [PATCH 17/40] drm/i915: Priority boost for waiting clients
  2018-09-19 19:55 [PATCH 01/40] drm: Use default dma_fence hooks where possible for null syncobj Chris Wilson
                   ` (14 preceding siblings ...)
  2018-09-19 19:55 ` [PATCH 16/40] drm/i915: Pull scheduling under standalone lock Chris Wilson
@ 2018-09-19 19:55 ` Chris Wilson
  2018-09-24 11:29   ` Tvrtko Ursulin
  2018-09-19 19:55 ` [PATCH 18/40] drm/i915: Report the number of closed vma held by each context in debugfs Chris Wilson
                   ` (26 subsequent siblings)
  42 siblings, 1 reply; 106+ messages in thread
From: Chris Wilson @ 2018-09-19 19:55 UTC (permalink / raw)
  To: intel-gfx

Latency is in the eye of the beholder. In the case where a client stops
and waits for the gpu, give that request chain a small priority boost
(not so that it overtakes higher priority clients, to preserve the
external ordering) so that ideally the wait completes earlier.

Testcase: igt/gem_sync/switch-default
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Cc: Dmitry Rogozhkin <dmitry.v.rogozhkin@intel.com>
---
 drivers/gpu/drm/i915/i915_gem.c       |  5 +++-
 drivers/gpu/drm/i915/i915_request.c   |  2 ++
 drivers/gpu/drm/i915/i915_request.h   |  5 ++--
 drivers/gpu/drm/i915/i915_scheduler.c | 34 ++++++++++++++++++++++-----
 drivers/gpu/drm/i915/i915_scheduler.h |  5 +++-
 5 files changed, 41 insertions(+), 10 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 6b347ffb996b..2fa75f2a1980 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -1748,6 +1748,7 @@ i915_gem_set_domain_ioctl(struct drm_device *dev, void *data,
 	 */
 	err = i915_gem_object_wait(obj,
 				   I915_WAIT_INTERRUPTIBLE |
+				   I915_WAIT_PRIORITY |
 				   (write_domain ? I915_WAIT_ALL : 0),
 				   MAX_SCHEDULE_TIMEOUT,
 				   to_rps_client(file));
@@ -3749,7 +3750,9 @@ i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
 	start = ktime_get();
 
 	ret = i915_gem_object_wait(obj,
-				   I915_WAIT_INTERRUPTIBLE | I915_WAIT_ALL,
+				   I915_WAIT_INTERRUPTIBLE |
+				   I915_WAIT_PRIORITY |
+				   I915_WAIT_ALL,
 				   to_wait_timeout(args->timeout_ns),
 				   to_rps_client(file));
 
diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
index d73ad490a261..abd4dacbab8e 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -1237,6 +1237,8 @@ long i915_request_wait(struct i915_request *rq,
 		add_wait_queue(errq, &reset);
 
 	intel_wait_init(&wait);
+	if (flags & I915_WAIT_PRIORITY)
+		i915_schedule_bump_priority(rq, I915_PRIORITY_WAIT);
 
 restart:
 	do {
diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h
index 5f7361e0fca6..90e9d170a0cd 100644
--- a/drivers/gpu/drm/i915/i915_request.h
+++ b/drivers/gpu/drm/i915/i915_request.h
@@ -277,8 +277,9 @@ long i915_request_wait(struct i915_request *rq,
 	__attribute__((nonnull(1)));
 #define I915_WAIT_INTERRUPTIBLE	BIT(0)
 #define I915_WAIT_LOCKED	BIT(1) /* struct_mutex held, handle GPU reset */
-#define I915_WAIT_ALL		BIT(2) /* used by i915_gem_object_wait() */
-#define I915_WAIT_FOR_IDLE_BOOST BIT(3)
+#define I915_WAIT_PRIORITY	BIT(2) /* small priority bump for the request */
+#define I915_WAIT_ALL		BIT(3) /* used by i915_gem_object_wait() */
+#define I915_WAIT_FOR_IDLE_BOOST BIT(4)
 
 static inline bool intel_engine_has_started(struct intel_engine_cs *engine,
 					    u32 seqno);
diff --git a/drivers/gpu/drm/i915/i915_scheduler.c b/drivers/gpu/drm/i915/i915_scheduler.c
index 910ac7089596..1423088dceff 100644
--- a/drivers/gpu/drm/i915/i915_scheduler.c
+++ b/drivers/gpu/drm/i915/i915_scheduler.c
@@ -239,7 +239,8 @@ sched_lock_engine(struct i915_sched_node *node, struct intel_engine_cs *locked)
 	return engine;
 }
 
-void i915_schedule(struct i915_request *rq, const struct i915_sched_attr *attr)
+static void __i915_schedule(struct i915_request *rq,
+			    const struct i915_sched_attr *attr)
 {
 	struct list_head *uninitialized_var(pl);
 	struct intel_engine_cs *engine, *last;
@@ -248,6 +249,8 @@ void i915_schedule(struct i915_request *rq, const struct i915_sched_attr *attr)
 	const int prio = attr->priority;
 	LIST_HEAD(dfs);
 
+	/* Needed in order to use the temporary link inside i915_dependency */
+	lockdep_assert_held(&schedule_lock);
 	GEM_BUG_ON(prio == I915_PRIORITY_INVALID);
 
 	if (i915_request_completed(rq))
@@ -256,9 +259,6 @@ void i915_schedule(struct i915_request *rq, const struct i915_sched_attr *attr)
 	if (prio <= READ_ONCE(rq->sched.attr.priority))
 		return;
 
-	/* Needed in order to use the temporary link inside i915_dependency */
-	spin_lock(&schedule_lock);
-
 	stack.signaler = &rq->sched;
 	list_add(&stack.dfs_link, &dfs);
 
@@ -312,7 +312,7 @@ void i915_schedule(struct i915_request *rq, const struct i915_sched_attr *attr)
 		rq->sched.attr = *attr;
 
 		if (stack.dfs_link.next == stack.dfs_link.prev)
-			goto out_unlock;
+			return;
 
 		__list_del_entry(&stack.dfs_link);
 	}
@@ -371,7 +371,29 @@ void i915_schedule(struct i915_request *rq, const struct i915_sched_attr *attr)
 	}
 
 	spin_unlock_irq(&engine->timeline.lock);
+}
 
-out_unlock:
+void i915_schedule(struct i915_request *rq, const struct i915_sched_attr *attr)
+{
+	spin_lock(&schedule_lock);
+	__i915_schedule(rq, attr);
 	spin_unlock(&schedule_lock);
 }
+
+void i915_schedule_bump_priority(struct i915_request *rq, unsigned int bump)
+{
+	struct i915_sched_attr attr;
+
+	GEM_BUG_ON(bump & I915_PRIORITY_MASK);
+
+	if (READ_ONCE(rq->sched.attr.priority) == I915_PRIORITY_INVALID)
+		return;
+
+	spin_lock_bh(&schedule_lock);
+
+	attr = rq->sched.attr;
+	attr.priority |= bump;
+	__i915_schedule(rq, &attr);
+
+	spin_unlock_bh(&schedule_lock);
+}
diff --git a/drivers/gpu/drm/i915/i915_scheduler.h b/drivers/gpu/drm/i915/i915_scheduler.h
index 8058c17ae96a..cbfb64288c61 100644
--- a/drivers/gpu/drm/i915/i915_scheduler.h
+++ b/drivers/gpu/drm/i915/i915_scheduler.h
@@ -24,12 +24,13 @@ enum {
 	I915_PRIORITY_INVALID = INT_MIN
 };
 
-#define I915_USER_PRIORITY_SHIFT 1
+#define I915_USER_PRIORITY_SHIFT 2
 #define I915_USER_PRIORITY(x) ((x) << I915_USER_PRIORITY_SHIFT)
 
 #define I915_PRIORITY_COUNT BIT(I915_USER_PRIORITY_SHIFT)
 #define I915_PRIORITY_MASK (-I915_PRIORITY_COUNT)
 
+#define I915_PRIORITY_WAIT	((u8)BIT(1))
 #define I915_PRIORITY_NEWCLIENT	((u8)BIT(0))
 
 struct i915_sched_attr {
@@ -99,6 +100,8 @@ void i915_sched_node_fini(struct drm_i915_private *i915,
 void i915_schedule(struct i915_request *request,
 		   const struct i915_sched_attr *attr);
 
+void i915_schedule_bump_priority(struct i915_request *rq, unsigned int bump);
+
 struct list_head *
 i915_sched_lookup_priolist(struct intel_engine_cs *engine, int prio);
 
-- 
2.19.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 106+ messages in thread

* [PATCH 18/40] drm/i915: Report the number of closed vma held by each context in debugfs
  2018-09-19 19:55 [PATCH 01/40] drm: Use default dma_fence hooks where possible for null syncobj Chris Wilson
                   ` (15 preceding siblings ...)
  2018-09-19 19:55 ` [PATCH 17/40] drm/i915: Priority boost for waiting clients Chris Wilson
@ 2018-09-19 19:55 ` Chris Wilson
  2018-09-24 11:57   ` Tvrtko Ursulin
  2018-09-19 19:55 ` [PATCH 19/40] drm/i915: Remove debugfs/i915_ppgtt_info Chris Wilson
                   ` (25 subsequent siblings)
  42 siblings, 1 reply; 106+ messages in thread
From: Chris Wilson @ 2018-09-19 19:55 UTC (permalink / raw)
  To: intel-gfx

Include the total size of closed vma when reporting the per_ctx_stats of
debugfs/i915_gem_objects.

Whilst adjusting the context tracking, note that we can simply use our
list of contexts in i915->contexts rather than circumlocute via
dev->filelist and the per-file context idr.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_debugfs.c | 113 +++++++++++-----------------
 1 file changed, 42 insertions(+), 71 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index 2ac75bc10afa..6b5cc30f3e09 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -302,6 +302,7 @@ struct file_stats {
 	u64 total, unbound;
 	u64 global, shared;
 	u64 active, inactive;
+	u64 closed;
 };
 
 static int per_file_stats(int id, void *ptr, void *data)
@@ -336,6 +337,9 @@ static int per_file_stats(int id, void *ptr, void *data)
 			stats->active += vma->node.size;
 		else
 			stats->inactive += vma->node.size;
+
+		if (i915_vma_is_closed(vma))
+			stats->closed += vma->node.size;
 	}
 
 	return 0;
@@ -343,7 +347,7 @@ static int per_file_stats(int id, void *ptr, void *data)
 
 #define print_file_stats(m, name, stats) do { \
 	if (stats.count) \
-		seq_printf(m, "%s: %lu objects, %llu bytes (%llu active, %llu inactive, %llu global, %llu shared, %llu unbound)\n", \
+		seq_printf(m, "%s: %lu objects, %llu bytes (%llu active, %llu inactive, %llu global, %llu shared, %llu unbound, %llu closed)\n", \
 			   name, \
 			   stats.count, \
 			   stats.total, \
@@ -351,7 +355,8 @@ static int per_file_stats(int id, void *ptr, void *data)
 			   stats.inactive, \
 			   stats.global, \
 			   stats.shared, \
-			   stats.unbound); \
+			   stats.unbound, \
+			   stats.closed); \
 } while (0)
 
 static void print_batch_pool_stats(struct seq_file *m,
@@ -377,44 +382,44 @@ static void print_batch_pool_stats(struct seq_file *m,
 	print_file_stats(m, "[k]batch pool", stats);
 }
 
-static int per_file_ctx_stats(int idx, void *ptr, void *data)
+static void print_context_stats(struct seq_file *m,
+				struct drm_i915_private *i915)
 {
-	struct i915_gem_context *ctx = ptr;
-	struct intel_engine_cs *engine;
-	enum intel_engine_id id;
-
-	for_each_engine(engine, ctx->i915, id) {
-		struct intel_context *ce = to_intel_context(ctx, engine);
+	struct file_stats kstats = {};
+	struct i915_gem_context *ctx;
 
-		if (ce->state)
-			per_file_stats(0, ce->state->obj, data);
-		if (ce->ring)
-			per_file_stats(0, ce->ring->vma->obj, data);
-	}
+	list_for_each_entry(ctx, &i915->contexts.list, link) {
+		struct file_stats stats = { .file_priv = ctx->file_priv };
+		struct intel_engine_cs *engine;
+		enum intel_engine_id id;
 
-	return 0;
-}
+		for_each_engine(engine, i915, id) {
+			struct intel_context *ce = to_intel_context(ctx, engine);
 
-static void print_context_stats(struct seq_file *m,
-				struct drm_i915_private *dev_priv)
-{
-	struct drm_device *dev = &dev_priv->drm;
-	struct file_stats stats;
-	struct drm_file *file;
+			if (ce->state)
+				per_file_stats(0, ce->state->obj, &kstats);
+			if (ce->ring)
+				per_file_stats(0, ce->ring->vma->obj, &kstats);
+		}
 
-	memset(&stats, 0, sizeof(stats));
+		if (!IS_ERR_OR_NULL(stats.file_priv)) {
+			struct drm_file *file = stats.file_priv->file;
+			struct task_struct *task;
 
-	mutex_lock(&dev->struct_mutex);
-	if (dev_priv->kernel_context)
-		per_file_ctx_stats(0, dev_priv->kernel_context, &stats);
+			spin_lock(&file->table_lock);
+			idr_for_each(&file->object_idr, per_file_stats, &stats);
+			spin_unlock(&file->table_lock);
 
-	list_for_each_entry(file, &dev->filelist, lhead) {
-		struct drm_i915_file_private *fpriv = file->driver_priv;
-		idr_for_each(&fpriv->context_idr, per_file_ctx_stats, &stats);
+			rcu_read_lock();
+			task = pid_task(ctx->pid ?: file->pid, PIDTYPE_PID);
+			print_file_stats(m,
+					 task ? task->comm : "<unknown>",
+					 stats);
+			rcu_read_unlock();
+		}
 	}
-	mutex_unlock(&dev->struct_mutex);
 
-	print_file_stats(m, "[k]contexts", stats);
+	print_file_stats(m, "[k]contexts", kstats);
 }
 
 static int i915_gem_object_info(struct seq_file *m, void *data)
@@ -426,14 +431,9 @@ static int i915_gem_object_info(struct seq_file *m, void *data)
 	u64 size, mapped_size, purgeable_size, dpy_size, huge_size;
 	struct drm_i915_gem_object *obj;
 	unsigned int page_sizes = 0;
-	struct drm_file *file;
 	char buf[80];
 	int ret;
 
-	ret = mutex_lock_interruptible(&dev->struct_mutex);
-	if (ret)
-		return ret;
-
 	seq_printf(m, "%u objects, %llu bytes\n",
 		   dev_priv->mm.object_count,
 		   dev_priv->mm.object_memory);
@@ -514,43 +514,14 @@ static int i915_gem_object_info(struct seq_file *m, void *data)
 					buf, sizeof(buf)));
 
 	seq_putc(m, '\n');
-	print_batch_pool_stats(m, dev_priv);
-	mutex_unlock(&dev->struct_mutex);
-
-	mutex_lock(&dev->filelist_mutex);
-	print_context_stats(m, dev_priv);
-	list_for_each_entry_reverse(file, &dev->filelist, lhead) {
-		struct file_stats stats;
-		struct drm_i915_file_private *file_priv = file->driver_priv;
-		struct i915_request *request;
-		struct task_struct *task;
-
-		mutex_lock(&dev->struct_mutex);
 
-		memset(&stats, 0, sizeof(stats));
-		stats.file_priv = file->driver_priv;
-		spin_lock(&file->table_lock);
-		idr_for_each(&file->object_idr, per_file_stats, &stats);
-		spin_unlock(&file->table_lock);
-		/*
-		 * Although we have a valid reference on file->pid, that does
-		 * not guarantee that the task_struct who called get_pid() is
-		 * still alive (e.g. get_pid(current) => fork() => exit()).
-		 * Therefore, we need to protect this ->comm access using RCU.
-		 */
-		request = list_first_entry_or_null(&file_priv->mm.request_list,
-						   struct i915_request,
-						   client_link);
-		rcu_read_lock();
-		task = pid_task(request && request->gem_context->pid ?
-				request->gem_context->pid : file->pid,
-				PIDTYPE_PID);
-		print_file_stats(m, task ? task->comm : "<unknown>", stats);
-		rcu_read_unlock();
+	ret = mutex_lock_interruptible(&dev->struct_mutex);
+	if (ret)
+		return ret;
 
-		mutex_unlock(&dev->struct_mutex);
-	}
-	mutex_unlock(&dev->filelist_mutex);
+	print_batch_pool_stats(m, dev_priv);
+	print_context_stats(m, dev_priv);
+	mutex_unlock(&dev->struct_mutex);
 
 	return 0;
 }
-- 
2.19.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 106+ messages in thread

* [PATCH 19/40] drm/i915: Remove debugfs/i915_ppgtt_info
  2018-09-19 19:55 [PATCH 01/40] drm: Use default dma_fence hooks where possible for null syncobj Chris Wilson
                   ` (16 preceding siblings ...)
  2018-09-19 19:55 ` [PATCH 18/40] drm/i915: Report the number of closed vma held by each context in debugfs Chris Wilson
@ 2018-09-19 19:55 ` Chris Wilson
  2018-09-24 12:03   ` Tvrtko Ursulin
  2018-09-19 19:55 ` [PATCH 20/40] drm/i915: Track all held rpm wakerefs Chris Wilson
                   ` (24 subsequent siblings)
  42 siblings, 1 reply; 106+ messages in thread
From: Chris Wilson @ 2018-09-19 19:55 UTC (permalink / raw)
  To: intel-gfx

The information presented here is not relevant to current development.
We can either use the context information, but more often we want to
inspect the active gpu state.

The ulterior motive is to eradicate dev->filelist.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_debugfs.c | 119 ----------------------------
 1 file changed, 119 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index 6b5cc30f3e09..39f319c49def 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -2035,124 +2035,6 @@ static int i915_swizzle_info(struct seq_file *m, void *data)
 	return 0;
 }
 
-static int per_file_ctx(int id, void *ptr, void *data)
-{
-	struct i915_gem_context *ctx = ptr;
-	struct seq_file *m = data;
-	struct i915_hw_ppgtt *ppgtt = ctx->ppgtt;
-
-	if (!ppgtt) {
-		seq_printf(m, "  no ppgtt for context %d\n",
-			   ctx->user_handle);
-		return 0;
-	}
-
-	if (i915_gem_context_is_default(ctx))
-		seq_puts(m, "  default context:\n");
-	else
-		seq_printf(m, "  context %d:\n", ctx->user_handle);
-	ppgtt->debug_dump(ppgtt, m);
-
-	return 0;
-}
-
-static void gen8_ppgtt_info(struct seq_file *m,
-			    struct drm_i915_private *dev_priv)
-{
-	struct i915_hw_ppgtt *ppgtt = dev_priv->mm.aliasing_ppgtt;
-	struct intel_engine_cs *engine;
-	enum intel_engine_id id;
-	int i;
-
-	if (!ppgtt)
-		return;
-
-	for_each_engine(engine, dev_priv, id) {
-		seq_printf(m, "%s\n", engine->name);
-		for (i = 0; i < 4; i++) {
-			u64 pdp = I915_READ(GEN8_RING_PDP_UDW(engine, i));
-			pdp <<= 32;
-			pdp |= I915_READ(GEN8_RING_PDP_LDW(engine, i));
-			seq_printf(m, "\tPDP%d 0x%016llx\n", i, pdp);
-		}
-	}
-}
-
-static void gen6_ppgtt_info(struct seq_file *m,
-			    struct drm_i915_private *dev_priv)
-{
-	struct intel_engine_cs *engine;
-	enum intel_engine_id id;
-
-	if (IS_GEN6(dev_priv))
-		seq_printf(m, "GFX_MODE: 0x%08x\n", I915_READ(GFX_MODE));
-
-	for_each_engine(engine, dev_priv, id) {
-		seq_printf(m, "%s\n", engine->name);
-		if (IS_GEN7(dev_priv))
-			seq_printf(m, "GFX_MODE: 0x%08x\n",
-				   I915_READ(RING_MODE_GEN7(engine)));
-		seq_printf(m, "PP_DIR_BASE: 0x%08x\n",
-			   I915_READ(RING_PP_DIR_BASE(engine)));
-		seq_printf(m, "PP_DIR_BASE_READ: 0x%08x\n",
-			   I915_READ(RING_PP_DIR_BASE_READ(engine)));
-		seq_printf(m, "PP_DIR_DCLV: 0x%08x\n",
-			   I915_READ(RING_PP_DIR_DCLV(engine)));
-	}
-	if (dev_priv->mm.aliasing_ppgtt) {
-		struct i915_hw_ppgtt *ppgtt = dev_priv->mm.aliasing_ppgtt;
-
-		seq_puts(m, "aliasing PPGTT:\n");
-		seq_printf(m, "pd gtt offset: 0x%08x\n", ppgtt->pd.base.ggtt_offset);
-
-		ppgtt->debug_dump(ppgtt, m);
-	}
-
-	seq_printf(m, "ECOCHK: 0x%08x\n", I915_READ(GAM_ECOCHK));
-}
-
-static int i915_ppgtt_info(struct seq_file *m, void *data)
-{
-	struct drm_i915_private *dev_priv = node_to_i915(m->private);
-	struct drm_device *dev = &dev_priv->drm;
-	struct drm_file *file;
-	int ret;
-
-	mutex_lock(&dev->filelist_mutex);
-	ret = mutex_lock_interruptible(&dev->struct_mutex);
-	if (ret)
-		goto out_unlock;
-
-	intel_runtime_pm_get(dev_priv);
-
-	if (INTEL_GEN(dev_priv) >= 8)
-		gen8_ppgtt_info(m, dev_priv);
-	else if (INTEL_GEN(dev_priv) >= 6)
-		gen6_ppgtt_info(m, dev_priv);
-
-	list_for_each_entry_reverse(file, &dev->filelist, lhead) {
-		struct drm_i915_file_private *file_priv = file->driver_priv;
-		struct task_struct *task;
-
-		task = get_pid_task(file->pid, PIDTYPE_PID);
-		if (!task) {
-			ret = -ESRCH;
-			goto out_rpm;
-		}
-		seq_printf(m, "\nproc: %s\n", task->comm);
-		put_task_struct(task);
-		idr_for_each(&file_priv->context_idr, per_file_ctx,
-			     (void *)(unsigned long)m);
-	}
-
-out_rpm:
-	intel_runtime_pm_put(dev_priv);
-	mutex_unlock(&dev->struct_mutex);
-out_unlock:
-	mutex_unlock(&dev->filelist_mutex);
-	return ret;
-}
-
 static int count_irq_waiters(struct drm_i915_private *i915)
 {
 	struct intel_engine_cs *engine;
@@ -4741,7 +4623,6 @@ static const struct drm_info_list i915_debugfs_list[] = {
 	{"i915_context_status", i915_context_status, 0},
 	{"i915_forcewake_domains", i915_forcewake_domains, 0},
 	{"i915_swizzle_info", i915_swizzle_info, 0},
-	{"i915_ppgtt_info", i915_ppgtt_info, 0},
 	{"i915_llc", i915_llc, 0},
 	{"i915_edp_psr_status", i915_edp_psr_status, 0},
 	{"i915_energy_uJ", i915_energy_uJ, 0},
-- 
2.19.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 106+ messages in thread

* [PATCH 20/40] drm/i915: Track all held rpm wakerefs
  2018-09-19 19:55 [PATCH 01/40] drm: Use default dma_fence hooks where possible for null syncobj Chris Wilson
                   ` (17 preceding siblings ...)
  2018-09-19 19:55 ` [PATCH 19/40] drm/i915: Remove debugfs/i915_ppgtt_info Chris Wilson
@ 2018-09-19 19:55 ` Chris Wilson
  2018-09-19 19:55 ` [PATCH 21/40] drm/i915: Markup paired operations on wakerefs Chris Wilson
                   ` (23 subsequent siblings)
  42 siblings, 0 replies; 106+ messages in thread
From: Chris Wilson @ 2018-09-19 19:55 UTC (permalink / raw)
  To: intel-gfx

Everytime we take a wakeref, record the stack trace of where it was
taken; clearing the set if we ever drop back to no owners. For debugging
a rpm leak, we can look at all the current wakerefs and check if they
have a matching rpm_put.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/Kconfig.debug            |   2 +-
 drivers/gpu/drm/i915/i915_debugfs.c           |   6 +
 drivers/gpu/drm/i915/i915_drv.c               |   8 +-
 drivers/gpu/drm/i915/i915_drv.h               |   7 +
 drivers/gpu/drm/i915/intel_drv.h              |  44 ++-
 drivers/gpu/drm/i915/intel_runtime_pm.c       | 267 ++++++++++++++++--
 .../gpu/drm/i915/selftests/mock_gem_device.c  |   8 +-
 7 files changed, 292 insertions(+), 50 deletions(-)

diff --git a/drivers/gpu/drm/i915/Kconfig.debug b/drivers/gpu/drm/i915/Kconfig.debug
index 9e36ffb5eb7c..a97929c47466 100644
--- a/drivers/gpu/drm/i915/Kconfig.debug
+++ b/drivers/gpu/drm/i915/Kconfig.debug
@@ -21,11 +21,11 @@ config DRM_I915_DEBUG
         select DEBUG_FS
         select PREEMPT_COUNT
         select I2C_CHARDEV
+        select STACKDEPOT
         select DRM_DP_AUX_CHARDEV
         select X86_MSR # used by igt/pm_rpm
         select DRM_VGEM # used by igt/prime_vgem (dmabuf interop checks)
         select DRM_DEBUG_MM if DRM=y
-        select STACKDEPOT if DRM=y # for DRM_DEBUG_MM
 	select DRM_DEBUG_SELFTEST
 	select SW_SYNC # signaling validation framework (igt/syncobj*)
 	select DRM_I915_SW_FENCE_DEBUG_OBJECTS
diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index 39f319c49def..6d2e1cc61906 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -2699,6 +2699,12 @@ static int i915_runtime_pm_status(struct seq_file *m, void *unused)
 		   pci_power_name(pdev->current_state),
 		   pdev->current_state);
 
+	if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_RUNTIME_PM)) {
+		struct drm_printer p = drm_seq_file_printer(m);
+
+		print_intel_runtime_pm_wakeref(dev_priv, &p);
+	}
+
 	return 0;
 }
 
diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index 44e2c0f5ec50..9884902a4a9c 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -895,6 +895,7 @@ static int i915_driver_init_early(struct drm_i915_private *dev_priv)
 	mutex_init(&dev_priv->pps_mutex);
 
 	i915_memcpy_init_early(dev_priv);
+	intel_runtime_pm_init_early(dev_priv);
 
 	ret = i915_workqueues_init(dev_priv);
 	if (ret < 0)
@@ -1789,8 +1790,7 @@ void i915_driver_unload(struct drm_device *dev)
 	i915_driver_cleanup_mmio(dev_priv);
 
 	enable_rpm_wakeref_asserts(dev_priv);
-
-	WARN_ON(atomic_read(&dev_priv->runtime_pm.wakeref_count));
+	intel_runtime_pm_cleanup(dev_priv);
 }
 
 static void i915_driver_release(struct drm_device *dev)
@@ -1994,6 +1994,8 @@ static int i915_drm_suspend_late(struct drm_device *dev, bool hibernation)
 
 out:
 	enable_rpm_wakeref_asserts(dev_priv);
+	if (!dev_priv->uncore.user_forcewake.count)
+		intel_runtime_pm_cleanup(dev_priv);
 
 	return ret;
 }
@@ -2948,7 +2950,7 @@ static int intel_runtime_suspend(struct device *kdev)
 	}
 
 	enable_rpm_wakeref_asserts(dev_priv);
-	WARN_ON_ONCE(atomic_read(&dev_priv->runtime_pm.wakeref_count));
+	intel_runtime_pm_cleanup(dev_priv);
 
 	if (intel_uncore_arm_unclaimed_mmio_detection(dev_priv))
 		DRM_ERROR("Unclaimed access detected prior to suspending\n");
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index b902bb96e0be..f124827cee67 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -45,6 +45,7 @@
 #include <linux/pm_qos.h>
 #include <linux/reservation.h>
 #include <linux/shmem_fs.h>
+#include <linux/stackdepot.h>
 
 #include <drm/drmP.h>
 #include <drm/intel-gtt.h>
@@ -1288,6 +1289,12 @@ struct i915_runtime_pm {
 	atomic_t wakeref_count;
 	bool suspended;
 	bool irqs_enabled;
+
+#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_RUNTIME_PM)
+	spinlock_t debug_lock;
+	depot_stack_handle_t *debug_owners;
+	unsigned long debug_count;
+#endif
 };
 
 enum intel_pipe_crc_source {
diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h
index 5a07fae3578b..bc6396366016 100644
--- a/drivers/gpu/drm/i915/intel_drv.h
+++ b/drivers/gpu/drm/i915/intel_drv.h
@@ -40,6 +40,8 @@
 #include <drm/drm_atomic.h>
 #include <media/cec-notifier.h>
 
+struct drm_printer;
+
 /**
  * __wait_for - magic wait macro
  *
@@ -1961,6 +1963,7 @@ int intel_psr_wait_for_idle(const struct intel_crtc_state *new_crtc_state,
 			    u32 *out_value);
 
 /* intel_runtime_pm.c */
+void intel_runtime_pm_init_early(struct drm_i915_private *dev_priv);
 int intel_power_domains_init(struct drm_i915_private *);
 void intel_power_domains_cleanup(struct drm_i915_private *dev_priv);
 void intel_power_domains_init_hw(struct drm_i915_private *dev_priv, bool resume);
@@ -1981,6 +1984,7 @@ void bxt_display_core_init(struct drm_i915_private *dev_priv, bool resume);
 void bxt_display_core_uninit(struct drm_i915_private *dev_priv);
 void intel_runtime_pm_enable(struct drm_i915_private *dev_priv);
 void intel_runtime_pm_disable(struct drm_i915_private *dev_priv);
+void intel_runtime_pm_cleanup(struct drm_i915_private *dev_priv);
 const char *
 intel_display_power_domain_str(enum intel_display_power_domain domain);
 
@@ -1998,23 +2002,23 @@ void icl_dbuf_slices_update(struct drm_i915_private *dev_priv,
 			    u8 req_slices);
 
 static inline void
-assert_rpm_device_not_suspended(struct drm_i915_private *dev_priv)
+assert_rpm_device_not_suspended(struct drm_i915_private *i915)
 {
-	WARN_ONCE(dev_priv->runtime_pm.suspended,
+	WARN_ONCE(i915->runtime_pm.suspended,
 		  "Device suspended during HW access\n");
 }
 
 static inline void
-assert_rpm_wakelock_held(struct drm_i915_private *dev_priv)
+assert_rpm_wakelock_held(struct drm_i915_private *i915)
 {
-	assert_rpm_device_not_suspended(dev_priv);
-	WARN_ONCE(!atomic_read(&dev_priv->runtime_pm.wakeref_count),
+	assert_rpm_device_not_suspended(i915);
+	WARN_ONCE(!atomic_read(&i915->runtime_pm.wakeref_count),
 		  "RPM wakelock ref not held during HW access");
 }
 
 /**
  * disable_rpm_wakeref_asserts - disable the RPM assert checks
- * @dev_priv: i915 device instance
+ * @i915: i915 device instance
  *
  * This function disable asserts that check if we hold an RPM wakelock
  * reference, while keeping the device-not-suspended checks still enabled.
@@ -2031,14 +2035,14 @@ assert_rpm_wakelock_held(struct drm_i915_private *dev_priv)
  * enable_rpm_wakeref_asserts().
  */
 static inline void
-disable_rpm_wakeref_asserts(struct drm_i915_private *dev_priv)
+disable_rpm_wakeref_asserts(struct drm_i915_private *i915)
 {
-	atomic_inc(&dev_priv->runtime_pm.wakeref_count);
+	atomic_inc(&i915->runtime_pm.wakeref_count);
 }
 
 /**
  * enable_rpm_wakeref_asserts - re-enable the RPM assert checks
- * @dev_priv: i915 device instance
+ * @i915: i915 device instance
  *
  * This function re-enables the RPM assert checks after disabling them with
  * disable_rpm_wakeref_asserts. It's meant to be used only in special
@@ -2048,15 +2052,25 @@ disable_rpm_wakeref_asserts(struct drm_i915_private *dev_priv)
  * disable_rpm_wakeref_asserts().
  */
 static inline void
-enable_rpm_wakeref_asserts(struct drm_i915_private *dev_priv)
+enable_rpm_wakeref_asserts(struct drm_i915_private *i915)
 {
-	atomic_dec(&dev_priv->runtime_pm.wakeref_count);
+	atomic_dec(&i915->runtime_pm.wakeref_count);
 }
 
-void intel_runtime_pm_get(struct drm_i915_private *dev_priv);
-bool intel_runtime_pm_get_if_in_use(struct drm_i915_private *dev_priv);
-void intel_runtime_pm_get_noresume(struct drm_i915_private *dev_priv);
-void intel_runtime_pm_put(struct drm_i915_private *dev_priv);
+void intel_runtime_pm_get(struct drm_i915_private *i915);
+bool intel_runtime_pm_get_if_in_use(struct drm_i915_private *i915);
+void intel_runtime_pm_get_noresume(struct drm_i915_private *i915);
+void intel_runtime_pm_put(struct drm_i915_private *i915);
+
+#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_RUNTIME_PM)
+void print_intel_runtime_pm_wakeref(struct drm_i915_private *i915,
+				    struct drm_printer *p);
+#else
+static inline void print_intel_runtime_pm_wakeref(struct drm_i915_private *i915,
+						  struct drm_printer *p)
+{
+}
+#endif
 
 void chv_phy_powergate_lanes(struct intel_encoder *encoder,
 			     bool override, unsigned int mask);
diff --git a/drivers/gpu/drm/i915/intel_runtime_pm.c b/drivers/gpu/drm/i915/intel_runtime_pm.c
index 0fdabce647ab..ed8e0bcea078 100644
--- a/drivers/gpu/drm/i915/intel_runtime_pm.c
+++ b/drivers/gpu/drm/i915/intel_runtime_pm.c
@@ -29,6 +29,8 @@
 #include <linux/pm_runtime.h>
 #include <linux/vgaarb.h>
 
+#include <drm/drm_print.h>
+
 #include "i915_drv.h"
 #include "intel_drv.h"
 
@@ -49,6 +51,189 @@
  * present for a given platform.
  */
 
+#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_RUNTIME_PM)
+
+#include <linux/sort.h>
+
+#define STACKDEPTH 8
+
+static void init_intel_runtime_pm_wakeref(struct drm_i915_private *i915)
+{
+	spin_lock_init(&i915->runtime_pm.debug_lock);
+}
+
+static noinline void
+track_intel_runtime_pm_wakeref(struct drm_i915_private *i915)
+{
+	struct i915_runtime_pm *rpm = &i915->runtime_pm;
+	unsigned long entries[STACKDEPTH];
+	struct stack_trace trace = {
+		.entries = entries,
+		.max_entries = ARRAY_SIZE(entries),
+		.skip = 1
+	};
+	unsigned long flags;
+	depot_stack_handle_t stack, *stacks;
+
+	if (!HAS_RUNTIME_PM(i915))
+		return;
+
+	save_stack_trace(&trace);
+	if (trace.nr_entries &&
+	    trace.entries[trace.nr_entries - 1] == ULONG_MAX)
+		trace.nr_entries--;
+
+	stack = depot_save_stack(&trace, GFP_NOWAIT | __GFP_NOWARN);
+	if (!stack)
+		return;
+
+	spin_lock_irqsave(&rpm->debug_lock, flags);
+	stacks = krealloc(rpm->debug_owners,
+			  (rpm->debug_count + 1) * sizeof(*stacks),
+			  GFP_NOWAIT | __GFP_NOWARN);
+	if (stacks) {
+		stacks[rpm->debug_count++] = stack;
+		rpm->debug_owners = stacks;
+	}
+	spin_unlock_irqrestore(&rpm->debug_lock, flags);
+}
+
+static void untrack_intel_runtime_pm_wakeref(struct drm_i915_private *i915)
+{
+	struct i915_runtime_pm *rpm = &i915->runtime_pm;
+	depot_stack_handle_t *stacks;
+	unsigned long flags;
+
+	spin_lock_irqsave(&rpm->debug_lock, flags);
+	stacks = fetch_and_zero(&rpm->debug_owners);
+	rpm->debug_count = 0;
+	spin_unlock_irqrestore(&rpm->debug_lock, flags);
+
+	kfree(stacks);
+}
+
+static int cmphandle(const void *_a, const void *_b)
+{
+	const depot_stack_handle_t * const a = _a, * const b = _b;
+
+	if (*a < *b)
+		return -1;
+	else if (*a > *b)
+		return 1;
+	else
+		return 0;
+}
+
+static void __print_intel_runtime_pm_wakeref(struct drm_printer *p,
+					     depot_stack_handle_t *stacks,
+					     unsigned long count)
+{
+	unsigned long entries[STACKDEPTH];
+	unsigned long i;
+	char *buf;
+
+	drm_printf(p, "Wakeref count: %lu\n", count);
+
+	buf = kmalloc(PAGE_SIZE, GFP_KERNEL);
+	if (!buf)
+		return;
+
+	sort(stacks, count, sizeof(*stacks), cmphandle, NULL);
+
+	for (i = 0; i < count; i++) {
+		struct stack_trace trace = {
+			.entries = entries,
+			.max_entries = ARRAY_SIZE(entries),
+		};
+		depot_stack_handle_t stack = stacks[i];
+		unsigned long rep;
+
+		rep = 1;
+		while (i + 1 < count && stacks[i + 1] == stack)
+			rep++, i++;
+		depot_fetch_stack(stack, &trace);
+		snprint_stack_trace(buf, PAGE_SIZE, &trace, 2);
+		drm_printf(p, "Wakeref x%lu taken at:\n%s", rep, buf);
+	}
+
+	kfree(buf);
+}
+
+static void show_intel_runtime_pm_wakeref(struct drm_i915_private *i915)
+{
+	struct i915_runtime_pm *rpm = &i915->runtime_pm;
+	depot_stack_handle_t *stacks;
+	unsigned long flags, count;
+	struct drm_printer p;
+
+	spin_lock_irqsave(&rpm->debug_lock, flags);
+	stacks = fetch_and_zero(&rpm->debug_owners);
+	count = fetch_and_zero(&rpm->debug_count);
+	spin_unlock_irqrestore(&rpm->debug_lock, flags);
+	if (!count)
+		return;
+
+	p = drm_debug_printer("i915");
+	__print_intel_runtime_pm_wakeref(&p, stacks, count);
+
+	kfree(stacks);
+}
+
+void print_intel_runtime_pm_wakeref(struct drm_i915_private *i915,
+				    struct drm_printer *p)
+{
+	depot_stack_handle_t *stacks = NULL;
+	unsigned long count = 0;
+
+	do {
+		struct i915_runtime_pm *rpm = &i915->runtime_pm;
+		unsigned long alloc = count;
+		depot_stack_handle_t *s;
+
+		spin_lock_irq(&rpm->debug_lock);
+		count = rpm->debug_count;
+		if (count == alloc) {
+			memcpy(stacks,
+			       rpm->debug_owners,
+			       count * sizeof(*stacks));
+		}
+		spin_unlock_irq(&rpm->debug_lock);
+		if (count == alloc)
+			break;
+
+		s = krealloc(stacks, count * sizeof(*stacks), GFP_KERNEL);
+		if (!s)
+			goto out;
+
+		stacks = s;
+	} while (1);
+
+	__print_intel_runtime_pm_wakeref(p, stacks, count);
+
+out:
+	kfree(stacks);
+}
+
+#else
+
+static void init_intel_runtime_pm_wakeref(struct drm_i915_private *i915)
+{
+}
+
+static void track_intel_runtime_pm_wakeref(struct drm_i915_private *i915)
+{
+}
+
+static void untrack_intel_runtime_pm_wakeref(struct drm_i915_private *i915)
+{
+}
+
+static void show_intel_runtime_pm_wakeref(struct drm_i915_private *i915)
+{
+}
+
+#endif
+
 bool intel_display_power_well_is_enabled(struct drm_i915_private *dev_priv,
 					 enum i915_power_well_id power_well_id);
 
@@ -4004,7 +4189,7 @@ static void intel_power_domains_verify_state(struct drm_i915_private *dev_priv)
 
 /**
  * intel_runtime_pm_get - grab a runtime pm reference
- * @dev_priv: i915 device instance
+ * @i915: i915 device instance
  *
  * This function grabs a device-level runtime pm reference (mostly used for GEM
  * code to ensure the GTT or GT is on) and ensures that it is powered up.
@@ -4012,22 +4197,24 @@ static void intel_power_domains_verify_state(struct drm_i915_private *dev_priv)
  * Any runtime pm reference obtained by this function must have a symmetric
  * call to intel_runtime_pm_put() to release the reference again.
  */
-void intel_runtime_pm_get(struct drm_i915_private *dev_priv)
+void intel_runtime_pm_get(struct drm_i915_private *i915)
 {
-	struct pci_dev *pdev = dev_priv->drm.pdev;
+	struct pci_dev *pdev = i915->drm.pdev;
 	struct device *kdev = &pdev->dev;
 	int ret;
 
 	ret = pm_runtime_get_sync(kdev);
 	WARN_ONCE(ret < 0, "pm_runtime_get_sync() failed: %d\n", ret);
 
-	atomic_inc(&dev_priv->runtime_pm.wakeref_count);
-	assert_rpm_wakelock_held(dev_priv);
+	atomic_inc(&i915->runtime_pm.wakeref_count);
+	assert_rpm_wakelock_held(i915);
+
+	track_intel_runtime_pm_wakeref(i915);
 }
 
 /**
  * intel_runtime_pm_get_if_in_use - grab a runtime pm reference if device in use
- * @dev_priv: i915 device instance
+ * @i915: i915 device instance
  *
  * This function grabs a device-level runtime pm reference if the device is
  * already in use and ensures that it is powered up. It is illegal to try
@@ -4038,10 +4225,10 @@ void intel_runtime_pm_get(struct drm_i915_private *dev_priv)
  *
  * Returns: True if the wakeref was acquired, or False otherwise.
  */
-bool intel_runtime_pm_get_if_in_use(struct drm_i915_private *dev_priv)
+bool intel_runtime_pm_get_if_in_use(struct drm_i915_private *i915)
 {
 	if (IS_ENABLED(CONFIG_PM)) {
-		struct pci_dev *pdev = dev_priv->drm.pdev;
+		struct pci_dev *pdev = i915->drm.pdev;
 		struct device *kdev = &pdev->dev;
 
 		/*
@@ -4054,15 +4241,17 @@ bool intel_runtime_pm_get_if_in_use(struct drm_i915_private *dev_priv)
 			return false;
 	}
 
-	atomic_inc(&dev_priv->runtime_pm.wakeref_count);
-	assert_rpm_wakelock_held(dev_priv);
+	atomic_inc(&i915->runtime_pm.wakeref_count);
+	assert_rpm_wakelock_held(i915);
+
+	track_intel_runtime_pm_wakeref(i915);
 
 	return true;
 }
 
 /**
  * intel_runtime_pm_get_noresume - grab a runtime pm reference
- * @dev_priv: i915 device instance
+ * @i915: i915 device instance
  *
  * This function grabs a device-level runtime pm reference (mostly used for GEM
  * code to ensure the GTT or GT is on).
@@ -4077,32 +4266,35 @@ bool intel_runtime_pm_get_if_in_use(struct drm_i915_private *dev_priv)
  * Any runtime pm reference obtained by this function must have a symmetric
  * call to intel_runtime_pm_put() to release the reference again.
  */
-void intel_runtime_pm_get_noresume(struct drm_i915_private *dev_priv)
+void intel_runtime_pm_get_noresume(struct drm_i915_private *i915)
 {
-	struct pci_dev *pdev = dev_priv->drm.pdev;
+	struct pci_dev *pdev = i915->drm.pdev;
 	struct device *kdev = &pdev->dev;
 
-	assert_rpm_wakelock_held(dev_priv);
+	assert_rpm_wakelock_held(i915);
 	pm_runtime_get_noresume(kdev);
 
-	atomic_inc(&dev_priv->runtime_pm.wakeref_count);
+	atomic_inc(&i915->runtime_pm.wakeref_count);
+
+	track_intel_runtime_pm_wakeref(i915);
 }
 
 /**
  * intel_runtime_pm_put - release a runtime pm reference
- * @dev_priv: i915 device instance
+ * @i915: i915 device instance
  *
  * This function drops the device-level runtime pm reference obtained by
  * intel_runtime_pm_get() and might power down the corresponding
  * hardware block right away if this is the last reference.
  */
-void intel_runtime_pm_put(struct drm_i915_private *dev_priv)
+void intel_runtime_pm_put(struct drm_i915_private *i915)
 {
-	struct pci_dev *pdev = dev_priv->drm.pdev;
+	struct pci_dev *pdev = i915->drm.pdev;
 	struct device *kdev = &pdev->dev;
 
-	assert_rpm_wakelock_held(dev_priv);
-	atomic_dec(&dev_priv->runtime_pm.wakeref_count);
+	assert_rpm_wakelock_held(i915);
+	if (atomic_dec_and_test(&i915->runtime_pm.wakeref_count))
+		untrack_intel_runtime_pm_wakeref(i915);
 
 	pm_runtime_mark_last_busy(kdev);
 	pm_runtime_put_autosuspend(kdev);
@@ -4110,7 +4302,7 @@ void intel_runtime_pm_put(struct drm_i915_private *dev_priv)
 
 /**
  * intel_runtime_pm_enable - enable runtime pm
- * @dev_priv: i915 device instance
+ * @i915: i915 device instance
  *
  * This function enables runtime pm at the end of the driver load sequence.
  *
@@ -4118,9 +4310,9 @@ void intel_runtime_pm_put(struct drm_i915_private *dev_priv)
  * subordinate display power domains. That is done by
  * intel_power_domains_enable().
  */
-void intel_runtime_pm_enable(struct drm_i915_private *dev_priv)
+void intel_runtime_pm_enable(struct drm_i915_private *i915)
 {
-	struct pci_dev *pdev = dev_priv->drm.pdev;
+	struct pci_dev *pdev = i915->drm.pdev;
 	struct device *kdev = &pdev->dev;
 
 	/*
@@ -4142,7 +4334,7 @@ void intel_runtime_pm_enable(struct drm_i915_private *dev_priv)
 	 * so the driver's own RPM reference tracking asserts also work on
 	 * platforms without RPM support.
 	 */
-	if (!HAS_RUNTIME_PM(dev_priv)) {
+	if (!HAS_RUNTIME_PM(i915)) {
 		int ret;
 
 		pm_runtime_dont_use_autosuspend(kdev);
@@ -4160,17 +4352,36 @@ void intel_runtime_pm_enable(struct drm_i915_private *dev_priv)
 	pm_runtime_put_autosuspend(kdev);
 }
 
-void intel_runtime_pm_disable(struct drm_i915_private *dev_priv)
+void intel_runtime_pm_disable(struct drm_i915_private *i915)
 {
-	struct pci_dev *pdev = dev_priv->drm.pdev;
+	struct pci_dev *pdev = i915->drm.pdev;
 	struct device *kdev = &pdev->dev;
 
 	/* Transfer rpm ownership back to core */
-	WARN(pm_runtime_get_sync(&dev_priv->drm.pdev->dev) < 0,
+	WARN(pm_runtime_get_sync(kdev) < 0,
 	     "Failed to pass rpm ownership back to core\n");
 
 	pm_runtime_dont_use_autosuspend(kdev);
 
-	if (!HAS_RUNTIME_PM(dev_priv))
+	if (!HAS_RUNTIME_PM(i915))
 		pm_runtime_put(kdev);
 }
+
+void intel_runtime_pm_cleanup(struct drm_i915_private *i915)
+{
+	struct i915_runtime_pm *rpm = &i915->runtime_pm;
+
+	if (WARN(atomic_read(&rpm->wakeref_count),
+		 "i915->runtime_pm.wakeref_count=%d on cleanup\n",
+		 atomic_read(&rpm->wakeref_count))) {
+		show_intel_runtime_pm_wakeref(i915);
+		atomic_set(&rpm->wakeref_count, 0);
+	}
+
+	untrack_intel_runtime_pm_wakeref(i915);
+}
+
+void intel_runtime_pm_init_early(struct drm_i915_private *i915)
+{
+	init_intel_runtime_pm_wakeref(i915);
+}
diff --git a/drivers/gpu/drm/i915/selftests/mock_gem_device.c b/drivers/gpu/drm/i915/selftests/mock_gem_device.c
index 43ed8b28aeaa..0eb283e7fc96 100644
--- a/drivers/gpu/drm/i915/selftests/mock_gem_device.c
+++ b/drivers/gpu/drm/i915/selftests/mock_gem_device.c
@@ -154,15 +154,17 @@ struct drm_i915_private *mock_gem_device(void)
 	pdev->dev.archdata.iommu = (void *)-1;
 #endif
 
+	i915 = (struct drm_i915_private *)(pdev + 1);
+	pci_set_drvdata(pdev, i915);
+
+	intel_runtime_pm_init_early(i915);
+
 	dev_pm_domain_set(&pdev->dev, &pm_domain);
 	pm_runtime_enable(&pdev->dev);
 	pm_runtime_dont_use_autosuspend(&pdev->dev);
 	if (pm_runtime_enabled(&pdev->dev))
 		WARN_ON(pm_runtime_get_sync(&pdev->dev));
 
-	i915 = (struct drm_i915_private *)(pdev + 1);
-	pci_set_drvdata(pdev, i915);
-
 	err = drm_dev_init(&i915->drm, &mock_driver, &pdev->dev);
 	if (err) {
 		pr_err("Failed to initialise mock GEM device: err=%d\n", err);
-- 
2.19.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 106+ messages in thread

* [PATCH 21/40] drm/i915: Markup paired operations on wakerefs
  2018-09-19 19:55 [PATCH 01/40] drm: Use default dma_fence hooks where possible for null syncobj Chris Wilson
                   ` (18 preceding siblings ...)
  2018-09-19 19:55 ` [PATCH 20/40] drm/i915: Track all held rpm wakerefs Chris Wilson
@ 2018-09-19 19:55 ` Chris Wilson
  2018-09-19 19:55 ` [PATCH 22/40] drm/i915: Syntatic sugar for using intel_runtime_pm Chris Wilson
                   ` (22 subsequent siblings)
  42 siblings, 0 replies; 106+ messages in thread
From: Chris Wilson @ 2018-09-19 19:55 UTC (permalink / raw)
  To: intel-gfx

The majority of runtime-pm operations are bounded and scoped within a
function; these are easy to verify that the wakeref are handled
correctly. We can employ the compiler to help us, and reduce the number
of wakerefs tracked when debugging, by passing around cookies provided
by the various rpm_get functions to their rpm_put counterpart. This
makes the pairing explicit, and given the required wakeref cookie the
compiler can verify that we pass an initialised value to the rpm_put
(quite handy for double checking error paths).

For regular builds, the compiler should be able to eliminate the unused
local variables and the program growth should be minimal. Fwiw, it came
out as a net improvement as gcc was able to refactor rpm_get and
rpm_get_if_in_use together,

add/remove: 1/1 grow/shrink: 20/9 up/down: 191/-268 (-77)
Function                                     old     new   delta
intel_runtime_pm_put_unchecked                 -     136    +136
i915_gem_unpark                              396     406     +10
intel_runtime_pm_get                         135     141      +6
intel_runtime_pm_get_noresume                136     141      +5
i915_perf_open_ioctl                        4375    4379      +4
i915_gpu_busy                                 72      76      +4
i915_gem_idle_work_handler                   954     958      +4
capture                                     6814    6818      +4
mock_gem_device                             1433    1436      +3
__execlists_submission_tasklet              2573    2576      +3
i915_sample                                  756     758      +2
intel_guc_submission_disable                 364     365      +1
igt_mmap_offset_exhaustion                  1035    1036      +1
i915_runtime_pm_status                       257     258      +1
i915_rps_boost_info                         1358    1359      +1
i915_hangcheck_info                         1229    1230      +1
i915_gem_switch_to_kernel_context            682     683      +1
i915_gem_suspend                             410     411      +1
i915_gem_resume                              254     255      +1
i915_gem_park                                190     191      +1
i915_engine_info                             279     280      +1
intel_rps_mark_interactive                   194     193      -1
i915_hangcheck_elapsed                      1526    1525      -1
i915_gem_wait_for_idle                       298     297      -1
i915_drop_caches_set                         555     554      -1
execlists_submission_tasklet                 126     125      -1
aliasing_gtt_bind_vma                        235     234      -1
i915_gem_retire_work_handler                 144     142      -2
igt_evict_contexts.part                      916     910      -6
intel_runtime_pm_get_if_in_use               141      23    -118
intel_runtime_pm_put                         136       -    -136

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/gvt/aperture_gm.c        |   8 +-
 drivers/gpu/drm/i915/gvt/gvt.h                |   2 +-
 drivers/gpu/drm/i915/gvt/sched_policy.c       |   2 +-
 drivers/gpu/drm/i915/gvt/scheduler.c          |   4 +-
 drivers/gpu/drm/i915/i915_debugfs.c           | 130 +++++++++++-------
 drivers/gpu/drm/i915/i915_drv.h               |   6 +-
 drivers/gpu/drm/i915/i915_gem.c               |  57 +++++---
 drivers/gpu/drm/i915/i915_gem_execbuffer.c    |   5 +-
 drivers/gpu/drm/i915/i915_gem_fence_reg.c     |   6 +-
 drivers/gpu/drm/i915/i915_gem_gtt.c           |  22 +--
 drivers/gpu/drm/i915/i915_gem_shrinker.c      |  27 ++--
 drivers/gpu/drm/i915/i915_irq.c               |   5 +-
 drivers/gpu/drm/i915/i915_perf.c              |  10 +-
 drivers/gpu/drm/i915/i915_pmu.c               |  26 ++--
 drivers/gpu/drm/i915/i915_sysfs.c             |  24 ++--
 drivers/gpu/drm/i915/intel_display.c          |   5 +-
 drivers/gpu/drm/i915/intel_drv.h              |  15 +-
 drivers/gpu/drm/i915/intel_engine_cs.c        |  12 +-
 drivers/gpu/drm/i915/intel_fbdev.c            |   9 +-
 drivers/gpu/drm/i915/intel_guc_log.c          |  15 +-
 drivers/gpu/drm/i915/intel_hotplug.c          |   5 +-
 drivers/gpu/drm/i915/intel_huc.c              |   5 +-
 drivers/gpu/drm/i915/intel_panel.c            |   5 +-
 drivers/gpu/drm/i915/intel_runtime_pm.c       |  89 +++++++++---
 drivers/gpu/drm/i915/intel_uncore.c           |   5 +-
 drivers/gpu/drm/i915/selftests/huge_pages.c   |   5 +-
 drivers/gpu/drm/i915/selftests/i915_gem.c     |  29 ++--
 .../drm/i915/selftests/i915_gem_coherency.c   |   5 +-
 .../gpu/drm/i915/selftests/i915_gem_context.c |  17 ++-
 .../gpu/drm/i915/selftests/i915_gem_evict.c   |  11 +-
 drivers/gpu/drm/i915/selftests/i915_gem_gtt.c |  10 +-
 .../gpu/drm/i915/selftests/i915_gem_object.c  |  18 ++-
 drivers/gpu/drm/i915/selftests/i915_request.c |  22 +--
 drivers/gpu/drm/i915/selftests/intel_guc.c    |  10 +-
 .../gpu/drm/i915/selftests/intel_hangcheck.c  |   5 +-
 drivers/gpu/drm/i915/selftests/intel_lrc.c    |  20 +--
 .../drm/i915/selftests/intel_workarounds.c    |  12 +-
 37 files changed, 427 insertions(+), 236 deletions(-)

diff --git a/drivers/gpu/drm/i915/gvt/aperture_gm.c b/drivers/gpu/drm/i915/gvt/aperture_gm.c
index fe754022e356..6372ece10880 100644
--- a/drivers/gpu/drm/i915/gvt/aperture_gm.c
+++ b/drivers/gpu/drm/i915/gvt/aperture_gm.c
@@ -178,7 +178,7 @@ static void free_vgpu_fence(struct intel_vgpu *vgpu)
 	}
 	mutex_unlock(&dev_priv->drm.struct_mutex);
 
-	intel_runtime_pm_put(dev_priv);
+	intel_runtime_pm_put_unchecked(dev_priv);
 }
 
 static int alloc_vgpu_fence(struct intel_vgpu *vgpu)
@@ -204,7 +204,7 @@ static int alloc_vgpu_fence(struct intel_vgpu *vgpu)
 	_clear_vgpu_fence(vgpu);
 
 	mutex_unlock(&dev_priv->drm.struct_mutex);
-	intel_runtime_pm_put(dev_priv);
+	intel_runtime_pm_put_unchecked(dev_priv);
 	return 0;
 out_free_fence:
 	gvt_vgpu_err("Failed to alloc fences\n");
@@ -217,7 +217,7 @@ static int alloc_vgpu_fence(struct intel_vgpu *vgpu)
 		vgpu->fence.regs[i] = NULL;
 	}
 	mutex_unlock(&dev_priv->drm.struct_mutex);
-	intel_runtime_pm_put(dev_priv);
+	intel_runtime_pm_put_unchecked(dev_priv);
 	return -ENOSPC;
 }
 
@@ -315,7 +315,7 @@ void intel_vgpu_reset_resource(struct intel_vgpu *vgpu)
 
 	intel_runtime_pm_get(dev_priv);
 	_clear_vgpu_fence(vgpu);
-	intel_runtime_pm_put(dev_priv);
+	intel_runtime_pm_put_unchecked(dev_priv);
 }
 
 /**
diff --git a/drivers/gpu/drm/i915/gvt/gvt.h b/drivers/gpu/drm/i915/gvt/gvt.h
index 31f6cdbe5c42..57438fb60b77 100644
--- a/drivers/gpu/drm/i915/gvt/gvt.h
+++ b/drivers/gpu/drm/i915/gvt/gvt.h
@@ -593,7 +593,7 @@ static inline void mmio_hw_access_pre(struct drm_i915_private *dev_priv)
 
 static inline void mmio_hw_access_post(struct drm_i915_private *dev_priv)
 {
-	intel_runtime_pm_put(dev_priv);
+	intel_runtime_pm_put_unchecked(dev_priv);
 }
 
 /**
diff --git a/drivers/gpu/drm/i915/gvt/sched_policy.c b/drivers/gpu/drm/i915/gvt/sched_policy.c
index c32e7d5e8629..f04b3b965bfc 100644
--- a/drivers/gpu/drm/i915/gvt/sched_policy.c
+++ b/drivers/gpu/drm/i915/gvt/sched_policy.c
@@ -474,6 +474,6 @@ void intel_vgpu_stop_schedule(struct intel_vgpu *vgpu)
 		}
 	}
 	spin_unlock_bh(&scheduler->mmio_context_lock);
-	intel_runtime_pm_put(dev_priv);
+	intel_runtime_pm_put_unchecked(dev_priv);
 	mutex_unlock(&vgpu->gvt->sched_lock);
 }
diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c
index ea34003d6dd2..09e2c425bcf3 100644
--- a/drivers/gpu/drm/i915/gvt/scheduler.c
+++ b/drivers/gpu/drm/i915/gvt/scheduler.c
@@ -969,7 +969,7 @@ static int workload_thread(void *priv)
 			intel_uncore_forcewake_put(gvt->dev_priv,
 					FORCEWAKE_ALL);
 
-		intel_runtime_pm_put(gvt->dev_priv);
+		intel_runtime_pm_put_unchecked(gvt->dev_priv);
 		if (ret && (vgpu_is_vm_unhealthy(ret)))
 			enter_failsafe_mode(vgpu, GVT_FAILSAFE_GUEST_ERR);
 	}
@@ -1390,7 +1390,7 @@ intel_vgpu_create_workload(struct intel_vgpu *vgpu, int ring_id,
 		mutex_lock(&dev_priv->drm.struct_mutex);
 		ret = intel_gvt_scan_and_shadow_workload(workload);
 		mutex_unlock(&dev_priv->drm.struct_mutex);
-		intel_runtime_pm_put(dev_priv);
+		intel_runtime_pm_put_unchecked(dev_priv);
 	}
 
 	if (ret && (vgpu_is_vm_unhealthy(ret))) {
diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index 6d2e1cc61906..dbfe4e456d97 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -675,9 +675,10 @@ static int i915_interrupt_info(struct seq_file *m, void *data)
 	struct drm_i915_private *dev_priv = node_to_i915(m->private);
 	struct intel_engine_cs *engine;
 	enum intel_engine_id id;
+	intel_wakeref_t wakeref;
 	int i, pipe;
 
-	intel_runtime_pm_get(dev_priv);
+	wakeref = intel_runtime_pm_get(dev_priv);
 
 	if (IS_CHERRYVIEW(dev_priv)) {
 		seq_printf(m, "Master Interrupt Control:\t%08x\n",
@@ -878,7 +879,7 @@ static int i915_interrupt_info(struct seq_file *m, void *data)
 		}
 	}
 
-	intel_runtime_pm_put(dev_priv);
+	intel_runtime_pm_put(dev_priv, wakeref);
 
 	return 0;
 }
@@ -949,10 +950,11 @@ static int i915_gpu_info_open(struct inode *inode, struct file *file)
 {
 	struct drm_i915_private *i915 = inode->i_private;
 	struct i915_gpu_state *gpu;
+	intel_wakeref_t wakeref;
 
-	intel_runtime_pm_get(i915);
+	wakeref = intel_runtime_pm_get(i915);
 	gpu = i915_capture_gpu_state(i915);
-	intel_runtime_pm_put(i915);
+	intel_runtime_pm_put(i915, wakeref);
 	if (!gpu)
 		return -ENOMEM;
 
@@ -1006,15 +1008,16 @@ i915_next_seqno_set(void *data, u64 val)
 {
 	struct drm_i915_private *dev_priv = data;
 	struct drm_device *dev = &dev_priv->drm;
+	intel_wakeref_t wakeref;
 	int ret;
 
 	ret = mutex_lock_interruptible(&dev->struct_mutex);
 	if (ret)
 		return ret;
 
-	intel_runtime_pm_get(dev_priv);
+	wakeref = intel_runtime_pm_get(dev_priv);
 	ret = i915_gem_set_global_seqno(dev, val);
-	intel_runtime_pm_put(dev_priv);
+	intel_runtime_pm_put(dev_priv, wakeref);
 
 	mutex_unlock(&dev->struct_mutex);
 
@@ -1029,9 +1032,10 @@ static int i915_frequency_info(struct seq_file *m, void *unused)
 {
 	struct drm_i915_private *dev_priv = node_to_i915(m->private);
 	struct intel_rps *rps = &dev_priv->gt_pm.rps;
+	intel_wakeref_t wakeref;
 	int ret = 0;
 
-	intel_runtime_pm_get(dev_priv);
+	wakeref = intel_runtime_pm_get(dev_priv);
 
 	if (IS_GEN5(dev_priv)) {
 		u16 rgvswctl = I915_READ16(MEMSWCTL);
@@ -1243,7 +1247,7 @@ static int i915_frequency_info(struct seq_file *m, void *unused)
 	seq_printf(m, "Max CD clock frequency: %d kHz\n", dev_priv->max_cdclk_freq);
 	seq_printf(m, "Max pixel clock frequency: %d kHz\n", dev_priv->max_dotclk_freq);
 
-	intel_runtime_pm_put(dev_priv);
+	intel_runtime_pm_put(dev_priv, wakeref);
 	return ret;
 }
 
@@ -1282,6 +1286,7 @@ static int i915_hangcheck_info(struct seq_file *m, void *unused)
 	u64 acthd[I915_NUM_ENGINES];
 	u32 seqno[I915_NUM_ENGINES];
 	struct intel_instdone instdone;
+	intel_wakeref_t wakeref;
 	enum intel_engine_id id;
 
 	if (test_bit(I915_WEDGED, &dev_priv->gpu_error.flags))
@@ -1300,7 +1305,7 @@ static int i915_hangcheck_info(struct seq_file *m, void *unused)
 		return 0;
 	}
 
-	intel_runtime_pm_get(dev_priv);
+	wakeref = intel_runtime_pm_get(dev_priv);
 
 	for_each_engine(engine, dev_priv, id) {
 		acthd[id] = intel_engine_get_active_head(engine);
@@ -1309,7 +1314,7 @@ static int i915_hangcheck_info(struct seq_file *m, void *unused)
 
 	intel_engine_get_instdone(dev_priv->engine[RCS], &instdone);
 
-	intel_runtime_pm_put(dev_priv);
+	intel_runtime_pm_put(dev_priv, wakeref);
 
 	if (timer_pending(&dev_priv->gpu_error.hangcheck_work.timer))
 		seq_printf(m, "Hangcheck active, timer fires in %dms\n",
@@ -1585,9 +1590,10 @@ static int gen6_drpc_info(struct seq_file *m)
 static int i915_drpc_info(struct seq_file *m, void *unused)
 {
 	struct drm_i915_private *dev_priv = node_to_i915(m->private);
+	intel_wakeref_t wakeref;
 	int err;
 
-	intel_runtime_pm_get(dev_priv);
+	wakeref = intel_runtime_pm_get(dev_priv);
 
 	if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv))
 		err = vlv_drpc_info(m);
@@ -1596,7 +1602,7 @@ static int i915_drpc_info(struct seq_file *m, void *unused)
 	else
 		err = ironlake_drpc_info(m);
 
-	intel_runtime_pm_put(dev_priv);
+	intel_runtime_pm_put(dev_priv, wakeref);
 
 	return err;
 }
@@ -1618,11 +1624,12 @@ static int i915_fbc_status(struct seq_file *m, void *unused)
 {
 	struct drm_i915_private *dev_priv = node_to_i915(m->private);
 	struct intel_fbc *fbc = &dev_priv->fbc;
+	intel_wakeref_t wakeref;
 
 	if (!HAS_FBC(dev_priv))
 		return -ENODEV;
 
-	intel_runtime_pm_get(dev_priv);
+	wakeref = intel_runtime_pm_get(dev_priv);
 	mutex_lock(&fbc->lock);
 
 	if (intel_fbc_is_active(dev_priv))
@@ -1649,7 +1656,7 @@ static int i915_fbc_status(struct seq_file *m, void *unused)
 	}
 
 	mutex_unlock(&fbc->lock);
-	intel_runtime_pm_put(dev_priv);
+	intel_runtime_pm_put(dev_priv, wakeref);
 
 	return 0;
 }
@@ -1694,11 +1701,12 @@ DEFINE_SIMPLE_ATTRIBUTE(i915_fbc_false_color_fops,
 static int i915_ips_status(struct seq_file *m, void *unused)
 {
 	struct drm_i915_private *dev_priv = node_to_i915(m->private);
+	intel_wakeref_t wakeref;
 
 	if (!HAS_IPS(dev_priv))
 		return -ENODEV;
 
-	intel_runtime_pm_get(dev_priv);
+	wakeref = intel_runtime_pm_get(dev_priv);
 
 	seq_printf(m, "Enabled by kernel parameter: %s\n",
 		   yesno(i915_modparams.enable_ips));
@@ -1712,7 +1720,7 @@ static int i915_ips_status(struct seq_file *m, void *unused)
 			seq_puts(m, "Currently: disabled\n");
 	}
 
-	intel_runtime_pm_put(dev_priv);
+	intel_runtime_pm_put(dev_priv, wakeref);
 
 	return 0;
 }
@@ -1720,9 +1728,10 @@ static int i915_ips_status(struct seq_file *m, void *unused)
 static int i915_sr_status(struct seq_file *m, void *unused)
 {
 	struct drm_i915_private *dev_priv = node_to_i915(m->private);
+	intel_wakeref_t wakeref;
 	bool sr_enabled = false;
 
-	intel_runtime_pm_get(dev_priv);
+	wakeref = intel_runtime_pm_get(dev_priv);
 	intel_display_power_get(dev_priv, POWER_DOMAIN_INIT);
 
 	if (INTEL_GEN(dev_priv) >= 9)
@@ -1740,7 +1749,7 @@ static int i915_sr_status(struct seq_file *m, void *unused)
 		sr_enabled = I915_READ(FW_BLC_SELF_VLV) & FW_CSPWRDWNEN;
 
 	intel_display_power_put(dev_priv, POWER_DOMAIN_INIT);
-	intel_runtime_pm_put(dev_priv);
+	intel_runtime_pm_put(dev_priv, wakeref);
 
 	seq_printf(m, "self-refresh: %s\n", enableddisabled(sr_enabled));
 
@@ -1779,13 +1788,14 @@ static int i915_ring_freq_table(struct seq_file *m, void *unused)
 	struct drm_i915_private *dev_priv = node_to_i915(m->private);
 	struct intel_rps *rps = &dev_priv->gt_pm.rps;
 	unsigned int max_gpu_freq, min_gpu_freq;
+	intel_wakeref_t wakeref;
 	int gpu_freq, ia_freq;
 	int ret;
 
 	if (!HAS_LLC(dev_priv))
 		return -ENODEV;
 
-	intel_runtime_pm_get(dev_priv);
+	wakeref = intel_runtime_pm_get(dev_priv);
 
 	ret = mutex_lock_interruptible(&dev_priv->pcu_lock);
 	if (ret)
@@ -1818,7 +1828,7 @@ static int i915_ring_freq_table(struct seq_file *m, void *unused)
 	mutex_unlock(&dev_priv->pcu_lock);
 
 out:
-	intel_runtime_pm_put(dev_priv);
+	intel_runtime_pm_put(dev_priv, wakeref);
 	return ret;
 }
 
@@ -1991,8 +2001,9 @@ static const char *swizzle_string(unsigned swizzle)
 static int i915_swizzle_info(struct seq_file *m, void *data)
 {
 	struct drm_i915_private *dev_priv = node_to_i915(m->private);
+	intel_wakeref_t wakeref;
 
-	intel_runtime_pm_get(dev_priv);
+	wakeref = intel_runtime_pm_get(dev_priv);
 
 	seq_printf(m, "bit6 swizzle for X-tiling = %s\n",
 		   swizzle_string(dev_priv->mm.bit_6_swizzle_x));
@@ -2030,7 +2041,7 @@ static int i915_swizzle_info(struct seq_file *m, void *data)
 	if (dev_priv->quirks & QUIRK_PIN_SWIZZLED_PAGES)
 		seq_puts(m, "L-shaped memory detected\n");
 
-	intel_runtime_pm_put(dev_priv);
+	intel_runtime_pm_put(dev_priv, wakeref);
 
 	return 0;
 }
@@ -2147,6 +2158,7 @@ static int i915_llc(struct seq_file *m, void *data)
 static int i915_huc_load_status_info(struct seq_file *m, void *data)
 {
 	struct drm_i915_private *dev_priv = node_to_i915(m->private);
+	intel_wakeref_t wakeref;
 	struct drm_printer p;
 
 	if (!HAS_HUC(dev_priv))
@@ -2155,9 +2167,9 @@ static int i915_huc_load_status_info(struct seq_file *m, void *data)
 	p = drm_seq_file_printer(m);
 	intel_uc_fw_dump(&dev_priv->huc.fw, &p);
 
-	intel_runtime_pm_get(dev_priv);
+	wakeref = intel_runtime_pm_get(dev_priv);
 	seq_printf(m, "\nHuC status 0x%08x:\n", I915_READ(HUC_STATUS2));
-	intel_runtime_pm_put(dev_priv);
+	intel_runtime_pm_put(dev_priv, wakeref);
 
 	return 0;
 }
@@ -2165,6 +2177,7 @@ static int i915_huc_load_status_info(struct seq_file *m, void *data)
 static int i915_guc_load_status_info(struct seq_file *m, void *data)
 {
 	struct drm_i915_private *dev_priv = node_to_i915(m->private);
+	intel_wakeref_t wakeref;
 	struct drm_printer p;
 	u32 tmp, i;
 
@@ -2174,7 +2187,7 @@ static int i915_guc_load_status_info(struct seq_file *m, void *data)
 	p = drm_seq_file_printer(m);
 	intel_uc_fw_dump(&dev_priv->guc.fw, &p);
 
-	intel_runtime_pm_get(dev_priv);
+	wakeref = intel_runtime_pm_get(dev_priv);
 
 	tmp = I915_READ(GUC_STATUS);
 
@@ -2189,7 +2202,7 @@ static int i915_guc_load_status_info(struct seq_file *m, void *data)
 	for (i = 0; i < 16; i++)
 		seq_printf(m, "\t%2d: \t0x%x\n", i, I915_READ(SOFT_SCRATCH(i)));
 
-	intel_runtime_pm_put(dev_priv);
+	intel_runtime_pm_put(dev_priv, wakeref);
 
 	return 0;
 }
@@ -2547,6 +2560,7 @@ psr_source_status(struct drm_i915_private *dev_priv, struct seq_file *m)
 static int i915_edp_psr_status(struct seq_file *m, void *data)
 {
 	struct drm_i915_private *dev_priv = node_to_i915(m->private);
+	intel_wakeref_t wakeref;
 	u32 psrperf = 0;
 	bool enabled = false;
 	bool sink_support;
@@ -2559,7 +2573,7 @@ static int i915_edp_psr_status(struct seq_file *m, void *data)
 	if (!sink_support)
 		return 0;
 
-	intel_runtime_pm_get(dev_priv);
+	wakeref = intel_runtime_pm_get(dev_priv);
 
 	mutex_lock(&dev_priv->psr.lock);
 	seq_printf(m, "PSR mode: %s\n",
@@ -2598,7 +2612,7 @@ static int i915_edp_psr_status(struct seq_file *m, void *data)
 			   dev_priv->psr.last_exit);
 	}
 
-	intel_runtime_pm_put(dev_priv);
+	intel_runtime_pm_put(dev_priv, wakeref);
 	return 0;
 }
 
@@ -2607,6 +2621,7 @@ i915_edp_psr_debug_set(void *data, u64 val)
 {
 	struct drm_i915_private *dev_priv = data;
 	struct drm_modeset_acquire_ctx ctx;
+	intel_wakeref_t wakeref;
 	int ret;
 
 	if (!CAN_PSR(dev_priv))
@@ -2614,7 +2629,7 @@ i915_edp_psr_debug_set(void *data, u64 val)
 
 	DRM_DEBUG_KMS("Setting PSR debug to %llx\n", val);
 
-	intel_runtime_pm_get(dev_priv);
+	wakeref = intel_runtime_pm_get(dev_priv);
 
 	drm_modeset_acquire_init(&ctx, DRM_MODESET_ACQUIRE_INTERRUPTIBLE);
 
@@ -2629,7 +2644,7 @@ i915_edp_psr_debug_set(void *data, u64 val)
 	drm_modeset_drop_locks(&ctx);
 	drm_modeset_acquire_fini(&ctx);
 
-	intel_runtime_pm_put(dev_priv);
+	intel_runtime_pm_put(dev_priv, wakeref);
 
 	return ret;
 }
@@ -2654,15 +2669,16 @@ static int i915_energy_uJ(struct seq_file *m, void *data)
 {
 	struct drm_i915_private *dev_priv = node_to_i915(m->private);
 	unsigned long long power;
+	intel_wakeref_t wakeref;
 	u32 units;
 
 	if (INTEL_GEN(dev_priv) < 6)
 		return -ENODEV;
 
-	intel_runtime_pm_get(dev_priv);
+	wakeref = intel_runtime_pm_get(dev_priv);
 
 	if (rdmsrl_safe(MSR_RAPL_POWER_UNIT, &power)) {
-		intel_runtime_pm_put(dev_priv);
+		intel_runtime_pm_put(dev_priv, wakeref);
 		return -ENODEV;
 	}
 
@@ -2670,7 +2686,7 @@ static int i915_energy_uJ(struct seq_file *m, void *data)
 	power = I915_READ(MCH_SECP_NRG_STTS);
 	power = (1000000 * power) >> units; /* convert to uJ */
 
-	intel_runtime_pm_put(dev_priv);
+	intel_runtime_pm_put(dev_priv, wakeref);
 
 	seq_printf(m, "%llu", power);
 
@@ -2739,6 +2755,7 @@ static int i915_power_domain_info(struct seq_file *m, void *unused)
 static int i915_dmc_info(struct seq_file *m, void *unused)
 {
 	struct drm_i915_private *dev_priv = node_to_i915(m->private);
+	intel_wakeref_t wakeref;
 	struct intel_csr *csr;
 
 	if (!HAS_CSR(dev_priv))
@@ -2746,7 +2763,7 @@ static int i915_dmc_info(struct seq_file *m, void *unused)
 
 	csr = &dev_priv->csr;
 
-	intel_runtime_pm_get(dev_priv);
+	wakeref = intel_runtime_pm_get(dev_priv);
 
 	seq_printf(m, "fw loaded: %s\n", yesno(csr->dmc_payload != NULL));
 	seq_printf(m, "path: %s\n", csr->fw_path);
@@ -2773,7 +2790,7 @@ static int i915_dmc_info(struct seq_file *m, void *unused)
 	seq_printf(m, "ssp base: 0x%08x\n", I915_READ(CSR_SSP_BASE));
 	seq_printf(m, "htp: 0x%08x\n", I915_READ(CSR_HTP_SKL));
 
-	intel_runtime_pm_put(dev_priv);
+	intel_runtime_pm_put(dev_priv, wakeref);
 
 	return 0;
 }
@@ -3062,8 +3079,10 @@ static int i915_display_info(struct seq_file *m, void *unused)
 	struct intel_crtc *crtc;
 	struct drm_connector *connector;
 	struct drm_connector_list_iter conn_iter;
+	intel_wakeref_t wakeref;
+
+	wakeref = intel_runtime_pm_get(dev_priv);
 
-	intel_runtime_pm_get(dev_priv);
 	seq_printf(m, "CRTC info\n");
 	seq_printf(m, "---------\n");
 	for_each_intel_crtc(dev, crtc) {
@@ -3111,7 +3130,7 @@ static int i915_display_info(struct seq_file *m, void *unused)
 	drm_connector_list_iter_end(&conn_iter);
 	mutex_unlock(&dev->mode_config.mutex);
 
-	intel_runtime_pm_put(dev_priv);
+	intel_runtime_pm_put(dev_priv, wakeref);
 
 	return 0;
 }
@@ -3120,10 +3139,11 @@ static int i915_engine_info(struct seq_file *m, void *unused)
 {
 	struct drm_i915_private *dev_priv = node_to_i915(m->private);
 	struct intel_engine_cs *engine;
+	intel_wakeref_t wakeref;
 	enum intel_engine_id id;
 	struct drm_printer p;
 
-	intel_runtime_pm_get(dev_priv);
+	wakeref = intel_runtime_pm_get(dev_priv);
 
 	seq_printf(m, "GT awake? %s (epoch %u)\n",
 		   yesno(dev_priv->gt.awake), dev_priv->gt.epoch);
@@ -3136,7 +3156,7 @@ static int i915_engine_info(struct seq_file *m, void *unused)
 	for_each_engine(engine, dev_priv, id)
 		intel_engine_dump(engine, &p, "%s\n", engine->name);
 
-	intel_runtime_pm_put(dev_priv);
+	intel_runtime_pm_put(dev_priv, wakeref);
 
 	return 0;
 }
@@ -3247,6 +3267,7 @@ static ssize_t i915_ipc_status_write(struct file *file, const char __user *ubuf,
 {
 	struct seq_file *m = file->private_data;
 	struct drm_i915_private *dev_priv = m->private;
+	intel_wakeref_t wakeref;
 	int ret;
 	bool enable;
 
@@ -3254,13 +3275,15 @@ static ssize_t i915_ipc_status_write(struct file *file, const char __user *ubuf,
 	if (ret < 0)
 		return ret;
 
-	intel_runtime_pm_get(dev_priv);
+	wakeref = intel_runtime_pm_get(dev_priv);
+
 	if (!dev_priv->ipc_enabled && enable)
 		DRM_INFO("Enabling IPC: WM will be proper only after next commit\n");
 	dev_priv->wm.distrust_bios_wm = true;
 	dev_priv->ipc_enabled = enable;
 	intel_enable_ipc(dev_priv);
-	intel_runtime_pm_put(dev_priv);
+
+	intel_runtime_pm_put(dev_priv, wakeref);
 
 	return len;
 }
@@ -4047,9 +4070,13 @@ i915_drop_caches_set(void *data, u64 val)
 						     MAX_SCHEDULE_TIMEOUT);
 
 		if (val & DROP_RESET_SEQNO) {
-			intel_runtime_pm_get(i915);
+			intel_wakeref_t wakeref;
+
+			wakeref = intel_runtime_pm_get(i915);
+
 			ret = i915_gem_set_global_seqno(&i915->drm, 1);
-			intel_runtime_pm_put(i915);
+
+			intel_runtime_pm_put(i915, wakeref);
 		}
 
 		if (val & DROP_RETIRE)
@@ -4099,16 +4126,17 @@ static int
 i915_cache_sharing_get(void *data, u64 *val)
 {
 	struct drm_i915_private *dev_priv = data;
+	intel_wakeref_t wakeref;
 	u32 snpcr;
 
 	if (!(IS_GEN6(dev_priv) || IS_GEN7(dev_priv)))
 		return -ENODEV;
 
-	intel_runtime_pm_get(dev_priv);
+	wakeref = intel_runtime_pm_get(dev_priv);
 
 	snpcr = I915_READ(GEN6_MBCUNIT_SNPCR);
 
-	intel_runtime_pm_put(dev_priv);
+	intel_runtime_pm_put(dev_priv, wakeref);
 
 	*val = (snpcr & GEN6_MBC_SNPCR_MASK) >> GEN6_MBC_SNPCR_SHIFT;
 
@@ -4119,6 +4147,7 @@ static int
 i915_cache_sharing_set(void *data, u64 val)
 {
 	struct drm_i915_private *dev_priv = data;
+	intel_wakeref_t wakeref;
 	u32 snpcr;
 
 	if (!(IS_GEN6(dev_priv) || IS_GEN7(dev_priv)))
@@ -4127,7 +4156,7 @@ i915_cache_sharing_set(void *data, u64 val)
 	if (val > 3)
 		return -EINVAL;
 
-	intel_runtime_pm_get(dev_priv);
+	wakeref = intel_runtime_pm_get(dev_priv);
 	DRM_DEBUG_DRIVER("Manually setting uncore sharing to %llu\n", val);
 
 	/* Update the cache sharing policy here as well */
@@ -4136,7 +4165,7 @@ i915_cache_sharing_set(void *data, u64 val)
 	snpcr |= (val << GEN6_MBC_SNPCR_SHIFT);
 	I915_WRITE(GEN6_MBCUNIT_SNPCR, snpcr);
 
-	intel_runtime_pm_put(dev_priv);
+	intel_runtime_pm_put(dev_priv, wakeref);
 	return 0;
 }
 
@@ -4358,6 +4387,7 @@ static int i915_sseu_status(struct seq_file *m, void *unused)
 {
 	struct drm_i915_private *dev_priv = node_to_i915(m->private);
 	struct sseu_dev_info sseu;
+	intel_wakeref_t wakeref;
 
 	if (INTEL_GEN(dev_priv) < 8)
 		return -ENODEV;
@@ -4372,7 +4402,7 @@ static int i915_sseu_status(struct seq_file *m, void *unused)
 	sseu.max_eus_per_subslice =
 		INTEL_INFO(dev_priv)->sseu.max_eus_per_subslice;
 
-	intel_runtime_pm_get(dev_priv);
+	wakeref = intel_runtime_pm_get(dev_priv);
 
 	if (IS_CHERRYVIEW(dev_priv)) {
 		cherryview_sseu_device_status(dev_priv, &sseu);
@@ -4384,7 +4414,7 @@ static int i915_sseu_status(struct seq_file *m, void *unused)
 		gen10_sseu_device_status(dev_priv, &sseu);
 	}
 
-	intel_runtime_pm_put(dev_priv);
+	intel_runtime_pm_put(dev_priv, wakeref);
 
 	i915_print_sseu_info(m, false, &sseu);
 
@@ -4412,7 +4442,7 @@ static int i915_forcewake_release(struct inode *inode, struct file *file)
 		return 0;
 
 	intel_uncore_forcewake_user_put(i915);
-	intel_runtime_pm_put(i915);
+	intel_runtime_pm_put_unchecked(i915);
 
 	return 0;
 }
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index f124827cee67..a4cd27a7108f 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -128,6 +128,8 @@ bool i915_error_injected(void);
 	__i915_printk(i915, i915_error_injected() ? KERN_DEBUG : KERN_ERR, \
 		      fmt, ##__VA_ARGS__)
 
+typedef depot_stack_handle_t intel_wakeref_t;
+
 typedef struct {
 	uint32_t val;
 } uint_fixed_16_16_t;
@@ -1464,6 +1466,8 @@ struct i915_perf_stream {
 	 */
 	struct list_head link;
 
+	intel_wakeref_t wakeref;
+
 	/**
 	 * @sample_flags: Flags representing the `DRM_I915_PERF_PROP_SAMPLE_*`
 	 * properties given when opening a stream, representing the contents
@@ -2129,7 +2133,7 @@ struct drm_i915_private {
 		 * In order to reduce the effect on performance, there
 		 * is a slight delay before we do so.
 		 */
-		bool awake;
+		intel_wakeref_t awake;
 
 		/**
 		 * The number of times we have woken up.
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 2fa75f2a1980..b6da68846164 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -139,6 +139,8 @@ int i915_mutex_lock_interruptible(struct drm_device *dev)
 
 static u32 __i915_gem_park(struct drm_i915_private *i915)
 {
+	intel_wakeref_t wakeref;
+
 	GEM_TRACE("\n");
 
 	lockdep_assert_held(&i915->drm.struct_mutex);
@@ -169,14 +171,15 @@ static u32 __i915_gem_park(struct drm_i915_private *i915)
 	i915_pmu_gt_parked(i915);
 	i915_vma_parked(i915);
 
-	i915->gt.awake = false;
+	wakeref = fetch_and_zero(&i915->gt.awake);
+	GEM_BUG_ON(!wakeref);
 
 	if (INTEL_GEN(i915) >= 6)
 		gen6_rps_idle(i915);
 
 	intel_display_power_put(i915, POWER_DOMAIN_GT_IRQ);
 
-	intel_runtime_pm_put(i915);
+	intel_runtime_pm_put(i915, wakeref);
 
 	return i915->gt.epoch;
 }
@@ -205,7 +208,8 @@ void i915_gem_unpark(struct drm_i915_private *i915)
 	if (i915->gt.awake)
 		return;
 
-	intel_runtime_pm_get_noresume(i915);
+	i915->gt.awake = intel_runtime_pm_get_noresume(i915);
+	GEM_BUG_ON(!i915->gt.awake);
 
 	/*
 	 * It seems that the DMC likes to transition between the DC states a lot
@@ -220,7 +224,6 @@ void i915_gem_unpark(struct drm_i915_private *i915)
 	 */
 	intel_display_power_get(i915, POWER_DOMAIN_GT_IRQ);
 
-	i915->gt.awake = true;
 	if (unlikely(++i915->gt.epoch == 0)) /* keep 0 as invalid */
 		i915->gt.epoch = 1;
 
@@ -783,6 +786,8 @@ fb_write_origin(struct drm_i915_gem_object *obj, unsigned int domain)
 
 void i915_gem_flush_ggtt_writes(struct drm_i915_private *dev_priv)
 {
+	intel_wakeref_t wakeref;
+
 	/*
 	 * No actual flushing is required for the GTT write domain for reads
 	 * from the GTT domain. Writes to it "immediately" go to main memory
@@ -809,13 +814,13 @@ void i915_gem_flush_ggtt_writes(struct drm_i915_private *dev_priv)
 
 	i915_gem_chipset_flush(dev_priv);
 
-	intel_runtime_pm_get(dev_priv);
+	wakeref = intel_runtime_pm_get(dev_priv);
 	spin_lock_irq(&dev_priv->uncore.lock);
 
 	POSTING_READ_FW(RING_HEAD(RENDER_RING_BASE));
 
 	spin_unlock_irq(&dev_priv->uncore.lock);
-	intel_runtime_pm_put(dev_priv);
+	intel_runtime_pm_put(dev_priv, wakeref);
 }
 
 static void
@@ -1178,6 +1183,7 @@ i915_gem_gtt_pread(struct drm_i915_gem_object *obj,
 {
 	struct drm_i915_private *i915 = to_i915(obj->base.dev);
 	struct i915_ggtt *ggtt = &i915->ggtt;
+	intel_wakeref_t wakeref;
 	struct drm_mm_node node;
 	struct i915_vma *vma;
 	void __user *user_data;
@@ -1188,7 +1194,7 @@ i915_gem_gtt_pread(struct drm_i915_gem_object *obj,
 	if (ret)
 		return ret;
 
-	intel_runtime_pm_get(i915);
+	wakeref = intel_runtime_pm_get(i915);
 	vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0,
 				       PIN_MAPPABLE |
 				       PIN_NONFAULT |
@@ -1261,7 +1267,7 @@ i915_gem_gtt_pread(struct drm_i915_gem_object *obj,
 		i915_vma_unpin(vma);
 	}
 out_unlock:
-	intel_runtime_pm_put(i915);
+	intel_runtime_pm_put(i915, wakeref);
 	mutex_unlock(&i915->drm.struct_mutex);
 
 	return ret;
@@ -1363,6 +1369,7 @@ i915_gem_gtt_pwrite_fast(struct drm_i915_gem_object *obj,
 {
 	struct drm_i915_private *i915 = to_i915(obj->base.dev);
 	struct i915_ggtt *ggtt = &i915->ggtt;
+	intel_wakeref_t wakeref;
 	struct drm_mm_node node;
 	struct i915_vma *vma;
 	u64 remain, offset;
@@ -1381,13 +1388,14 @@ i915_gem_gtt_pwrite_fast(struct drm_i915_gem_object *obj,
 		 * This easily dwarfs any performance advantage from
 		 * using the cache bypass of indirect GGTT access.
 		 */
-		if (!intel_runtime_pm_get_if_in_use(i915)) {
+		wakeref = intel_runtime_pm_get_if_in_use(i915);
+		if (!wakeref) {
 			ret = -EFAULT;
 			goto out_unlock;
 		}
 	} else {
 		/* No backing pages, no fallback, we must force GGTT access */
-		intel_runtime_pm_get(i915);
+		wakeref = intel_runtime_pm_get(i915);
 	}
 
 	vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0,
@@ -1469,7 +1477,7 @@ i915_gem_gtt_pwrite_fast(struct drm_i915_gem_object *obj,
 		i915_vma_unpin(vma);
 	}
 out_rpm:
-	intel_runtime_pm_put(i915);
+	intel_runtime_pm_put(i915, wakeref);
 out_unlock:
 	mutex_unlock(&i915->drm.struct_mutex);
 	return ret;
@@ -2020,6 +2028,7 @@ vm_fault_t i915_gem_fault(struct vm_fault *vmf)
 	struct drm_i915_private *dev_priv = to_i915(dev);
 	struct i915_ggtt *ggtt = &dev_priv->ggtt;
 	bool write = area->vm_flags & VM_WRITE;
+	intel_wakeref_t wakeref;
 	struct i915_vma *vma;
 	pgoff_t page_offset;
 	int ret;
@@ -2049,7 +2058,7 @@ vm_fault_t i915_gem_fault(struct vm_fault *vmf)
 	if (ret)
 		goto err;
 
-	intel_runtime_pm_get(dev_priv);
+	wakeref = intel_runtime_pm_get(dev_priv);
 
 	ret = i915_mutex_lock_interruptible(dev);
 	if (ret)
@@ -2127,7 +2136,7 @@ vm_fault_t i915_gem_fault(struct vm_fault *vmf)
 err_unlock:
 	mutex_unlock(&dev->struct_mutex);
 err_rpm:
-	intel_runtime_pm_put(dev_priv);
+	intel_runtime_pm_put(dev_priv, wakeref);
 	i915_gem_object_unpin_pages(obj);
 err:
 	switch (ret) {
@@ -2200,6 +2209,7 @@ void
 i915_gem_release_mmap(struct drm_i915_gem_object *obj)
 {
 	struct drm_i915_private *i915 = to_i915(obj->base.dev);
+	intel_wakeref_t wakeref;
 
 	/* Serialisation between user GTT access and our code depends upon
 	 * revoking the CPU's PTE whilst the mutex is held. The next user
@@ -2210,7 +2220,7 @@ i915_gem_release_mmap(struct drm_i915_gem_object *obj)
 	 * wakeref.
 	 */
 	lockdep_assert_held(&i915->drm.struct_mutex);
-	intel_runtime_pm_get(i915);
+	wakeref = intel_runtime_pm_get(i915);
 
 	if (!obj->userfault_count)
 		goto out;
@@ -2227,7 +2237,7 @@ i915_gem_release_mmap(struct drm_i915_gem_object *obj)
 	wmb();
 
 out:
-	intel_runtime_pm_put(i915);
+	intel_runtime_pm_put(i915, wakeref);
 }
 
 void i915_gem_runtime_suspend(struct drm_i915_private *dev_priv)
@@ -4868,8 +4878,9 @@ static void __i915_gem_free_objects(struct drm_i915_private *i915,
 				    struct llist_node *freed)
 {
 	struct drm_i915_gem_object *obj, *on;
+	intel_wakeref_t wakeref;
 
-	intel_runtime_pm_get(i915);
+	wakeref = intel_runtime_pm_get(i915);
 	llist_for_each_entry_safe(obj, on, freed, freed) {
 		struct i915_vma *vma, *vn;
 
@@ -4930,7 +4941,7 @@ static void __i915_gem_free_objects(struct drm_i915_private *i915,
 		if (on)
 			cond_resched();
 	}
-	intel_runtime_pm_put(i915);
+	intel_runtime_pm_put(i915, wakeref);
 }
 
 static void i915_gem_flush_free_objects(struct drm_i915_private *i915)
@@ -5032,13 +5043,14 @@ void __i915_gem_object_release_unless_active(struct drm_i915_gem_object *obj)
 
 void i915_gem_sanitize(struct drm_i915_private *i915)
 {
+	intel_wakeref_t wakeref;
 	int err;
 
 	GEM_TRACE("\n");
 
 	mutex_lock(&i915->drm.struct_mutex);
 
-	intel_runtime_pm_get(i915);
+	wakeref = intel_runtime_pm_get(i915);
 	intel_uncore_forcewake_get(i915, FORCEWAKE_ALL);
 
 	/*
@@ -5065,7 +5077,7 @@ void i915_gem_sanitize(struct drm_i915_private *i915)
 		intel_engines_sanitize(i915);
 
 	intel_uncore_forcewake_put(i915, FORCEWAKE_ALL);
-	intel_runtime_pm_put(i915);
+	intel_runtime_pm_put(i915, wakeref);
 
 	i915_gem_contexts_lost(i915);
 	mutex_unlock(&i915->drm.struct_mutex);
@@ -5073,11 +5085,12 @@ void i915_gem_sanitize(struct drm_i915_private *i915)
 
 int i915_gem_suspend(struct drm_i915_private *i915)
 {
+	intel_wakeref_t wakeref;
 	int ret;
 
 	GEM_TRACE("\n");
 
-	intel_runtime_pm_get(i915);
+	wakeref = intel_runtime_pm_get(i915);
 	intel_suspend_gt_powersave(i915);
 
 	mutex_lock(&i915->drm.struct_mutex);
@@ -5129,12 +5142,12 @@ int i915_gem_suspend(struct drm_i915_private *i915)
 	if (WARN_ON(!intel_engines_are_idle(i915)))
 		i915_gem_set_wedged(i915); /* no hope, discard everything */
 
-	intel_runtime_pm_put(i915);
+	intel_runtime_pm_put(i915, wakeref);
 	return 0;
 
 err_unlock:
 	mutex_unlock(&i915->drm.struct_mutex);
-	intel_runtime_pm_put(i915);
+	intel_runtime_pm_put(i915, wakeref);
 	return ret;
 }
 
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 09187286d346..42b32e8449a3 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -2200,6 +2200,7 @@ i915_gem_do_execbuffer(struct drm_device *dev,
 	struct i915_execbuffer eb;
 	struct dma_fence *in_fence = NULL;
 	struct sync_file *out_fence = NULL;
+	intel_wakeref_t wakeref;
 	int out_fence_fd = -1;
 	int err;
 
@@ -2270,7 +2271,7 @@ i915_gem_do_execbuffer(struct drm_device *dev,
 	 * wakeref that we hold until the GPU has been idle for at least
 	 * 100ms.
 	 */
-	intel_runtime_pm_get(eb.i915);
+	wakeref = intel_runtime_pm_get(eb.i915);
 
 	err = i915_mutex_lock_interruptible(dev);
 	if (err)
@@ -2422,7 +2423,7 @@ i915_gem_do_execbuffer(struct drm_device *dev,
 		eb_release_vmas(&eb);
 	mutex_unlock(&dev->struct_mutex);
 err_rpm:
-	intel_runtime_pm_put(eb.i915);
+	intel_runtime_pm_put(eb.i915, wakeref);
 	i915_gem_context_put(eb.ctx);
 err_destroy:
 	eb_destroy(&eb);
diff --git a/drivers/gpu/drm/i915/i915_gem_fence_reg.c b/drivers/gpu/drm/i915/i915_gem_fence_reg.c
index d548ac05ccd7..33148d12dfd5 100644
--- a/drivers/gpu/drm/i915/i915_gem_fence_reg.c
+++ b/drivers/gpu/drm/i915/i915_gem_fence_reg.c
@@ -210,6 +210,7 @@ static void fence_write(struct drm_i915_fence_reg *fence,
 static int fence_update(struct drm_i915_fence_reg *fence,
 			struct i915_vma *vma)
 {
+	intel_wakeref_t wakeref;
 	int ret;
 
 	if (vma) {
@@ -257,9 +258,10 @@ static int fence_update(struct drm_i915_fence_reg *fence,
 	 * If the device is currently powered down, we will defer the write
 	 * to the runtime resume, see i915_gem_restore_fences().
 	 */
-	if (intel_runtime_pm_get_if_in_use(fence->i915)) {
+	wakeref = intel_runtime_pm_get_if_in_use(fence->i915);
+	if (wakeref) {
 		fence_write(fence, vma);
-		intel_runtime_pm_put(fence->i915);
+		intel_runtime_pm_put(fence->i915, wakeref);
 	}
 
 	if (vma) {
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 4877d4d582c2..04710d0ecf8c 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -2712,6 +2712,7 @@ static int ggtt_bind_vma(struct i915_vma *vma,
 {
 	struct drm_i915_private *i915 = vma->vm->i915;
 	struct drm_i915_gem_object *obj = vma->obj;
+	intel_wakeref_t wakeref;
 	u32 pte_flags;
 
 	/* Applicable to VLV (gen8+ do not support RO in the GGTT) */
@@ -2719,9 +2720,9 @@ static int ggtt_bind_vma(struct i915_vma *vma,
 	if (i915_gem_object_is_readonly(obj))
 		pte_flags |= PTE_READ_ONLY;
 
-	intel_runtime_pm_get(i915);
+	wakeref = intel_runtime_pm_get(i915);
 	vma->vm->insert_entries(vma->vm, vma, cache_level, pte_flags);
-	intel_runtime_pm_put(i915);
+	intel_runtime_pm_put(i915, wakeref);
 
 	vma->page_sizes.gtt = I915_GTT_PAGE_SIZE;
 
@@ -2738,10 +2739,11 @@ static int ggtt_bind_vma(struct i915_vma *vma,
 static void ggtt_unbind_vma(struct i915_vma *vma)
 {
 	struct drm_i915_private *i915 = vma->vm->i915;
+	intel_wakeref_t wakeref;
 
-	intel_runtime_pm_get(i915);
+	wakeref = intel_runtime_pm_get(i915);
 	vma->vm->clear_range(vma->vm, vma->node.start, vma->size);
-	intel_runtime_pm_put(i915);
+	intel_runtime_pm_put(i915, wakeref);
 }
 
 static int aliasing_gtt_bind_vma(struct i915_vma *vma,
@@ -2773,9 +2775,11 @@ static int aliasing_gtt_bind_vma(struct i915_vma *vma,
 	}
 
 	if (flags & I915_VMA_GLOBAL_BIND) {
-		intel_runtime_pm_get(i915);
+		intel_wakeref_t wakeref;
+
+		wakeref = intel_runtime_pm_get(i915);
 		vma->vm->insert_entries(vma->vm, vma, cache_level, pte_flags);
-		intel_runtime_pm_put(i915);
+		intel_runtime_pm_put(i915, wakeref);
 	}
 
 	return 0;
@@ -2786,9 +2790,11 @@ static void aliasing_gtt_unbind_vma(struct i915_vma *vma)
 	struct drm_i915_private *i915 = vma->vm->i915;
 
 	if (vma->flags & I915_VMA_GLOBAL_BIND) {
-		intel_runtime_pm_get(i915);
+		intel_wakeref_t wakeref;
+
+		wakeref = intel_runtime_pm_get(i915);
 		vma->vm->clear_range(vma->vm, vma->node.start, vma->size);
-		intel_runtime_pm_put(i915);
+		intel_runtime_pm_put(i915, wakeref);
 	}
 
 	if (vma->flags & I915_VMA_LOCAL_BIND) {
diff --git a/drivers/gpu/drm/i915/i915_gem_shrinker.c b/drivers/gpu/drm/i915/i915_gem_shrinker.c
index ea90d3a0d511..f25e4c7c71b1 100644
--- a/drivers/gpu/drm/i915/i915_gem_shrinker.c
+++ b/drivers/gpu/drm/i915/i915_gem_shrinker.c
@@ -156,6 +156,7 @@ i915_gem_shrink(struct drm_i915_private *i915,
 		{ &i915->mm.bound_list, I915_SHRINK_BOUND },
 		{ NULL, 0 },
 	}, *phase;
+	intel_wakeref_t wakeref = 0;
 	unsigned long count = 0;
 	unsigned long scanned = 0;
 	bool unlock;
@@ -185,9 +186,11 @@ i915_gem_shrink(struct drm_i915_private *i915,
 	 * device just to recover a little memory. If absolutely necessary,
 	 * we will force the wake during oom-notifier.
 	 */
-	if ((flags & I915_SHRINK_BOUND) &&
-	    !intel_runtime_pm_get_if_in_use(i915))
-		flags &= ~I915_SHRINK_BOUND;
+	if (flags & I915_SHRINK_BOUND) {
+		wakeref = intel_runtime_pm_get_if_in_use(i915);
+		if (!wakeref)
+			flags &= ~I915_SHRINK_BOUND;
+	}
 
 	/*
 	 * As we may completely rewrite the (un)bound list whilst unbinding
@@ -268,7 +271,7 @@ i915_gem_shrink(struct drm_i915_private *i915,
 	}
 
 	if (flags & I915_SHRINK_BOUND)
-		intel_runtime_pm_put(i915);
+		intel_runtime_pm_put(i915, wakeref);
 
 	i915_retire_requests(i915);
 
@@ -295,14 +298,15 @@ i915_gem_shrink(struct drm_i915_private *i915,
  */
 unsigned long i915_gem_shrink_all(struct drm_i915_private *i915)
 {
+	intel_wakeref_t wakeref;
 	unsigned long freed;
 
-	intel_runtime_pm_get(i915);
+	wakeref = intel_runtime_pm_get(i915);
 	freed = i915_gem_shrink(i915, -1UL, NULL,
 				I915_SHRINK_BOUND |
 				I915_SHRINK_UNBOUND |
 				I915_SHRINK_ACTIVE);
-	intel_runtime_pm_put(i915);
+	intel_runtime_pm_put(i915, wakeref);
 
 	return freed;
 }
@@ -373,14 +377,16 @@ i915_gem_shrinker_scan(struct shrinker *shrinker, struct shrink_control *sc)
 					 I915_SHRINK_BOUND |
 					 I915_SHRINK_UNBOUND);
 	if (sc->nr_scanned < sc->nr_to_scan && current_is_kswapd()) {
-		intel_runtime_pm_get(i915);
+		intel_wakeref_t wakeref;
+
+		wakeref = intel_runtime_pm_get(i915);
 		freed += i915_gem_shrink(i915,
 					 sc->nr_to_scan - sc->nr_scanned,
 					 &sc->nr_scanned,
 					 I915_SHRINK_ACTIVE |
 					 I915_SHRINK_BOUND |
 					 I915_SHRINK_UNBOUND);
-		intel_runtime_pm_put(i915);
+		intel_runtime_pm_put(i915, wakeref);
 	}
 
 	shrinker_unlock(i915, unlock);
@@ -463,6 +469,7 @@ i915_gem_shrinker_vmap(struct notifier_block *nb, unsigned long event, void *ptr
 		container_of(nb, struct drm_i915_private, mm.vmap_notifier);
 	struct i915_vma *vma, *next;
 	unsigned long freed_pages = 0;
+	intel_wakeref_t wakeref;
 	bool unlock;
 	int ret;
 
@@ -476,13 +483,13 @@ i915_gem_shrinker_vmap(struct notifier_block *nb, unsigned long event, void *ptr
 	if (ret)
 		goto out;
 
-	intel_runtime_pm_get(i915);
+	wakeref = intel_runtime_pm_get(i915);
 	freed_pages += i915_gem_shrink(i915, -1UL, NULL,
 				       I915_SHRINK_BOUND |
 				       I915_SHRINK_UNBOUND |
 				       I915_SHRINK_ACTIVE |
 				       I915_SHRINK_VMAPS);
-	intel_runtime_pm_put(i915);
+	intel_runtime_pm_put(i915, wakeref);
 
 	/* We also want to clear any cached iomaps as they wrap vmap */
 	list_for_each_entry_safe(vma, next,
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index 10f28a2ee2e6..83e15288ae96 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -3275,6 +3275,7 @@ void i915_handle_error(struct drm_i915_private *dev_priv,
 		       const char *fmt, ...)
 {
 	struct intel_engine_cs *engine;
+	intel_wakeref_t wakeref;
 	unsigned int tmp;
 	char error_msg[80];
 	char *msg = NULL;
@@ -3296,7 +3297,7 @@ void i915_handle_error(struct drm_i915_private *dev_priv,
 	 * isn't the case at least when we get here by doing a
 	 * simulated reset via debugfs, so get an RPM reference.
 	 */
-	intel_runtime_pm_get(dev_priv);
+	wakeref = intel_runtime_pm_get(dev_priv);
 
 	engine_mask &= INTEL_INFO(dev_priv)->ring_mask;
 
@@ -3358,7 +3359,7 @@ void i915_handle_error(struct drm_i915_private *dev_priv,
 	wake_up_all(&dev_priv->gpu_error.reset_queue);
 
 out:
-	intel_runtime_pm_put(dev_priv);
+	intel_runtime_pm_put(dev_priv, wakeref);
 }
 
 /* Called from drm generic code, passed 'crtc' which
diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
index 664b96bb65a3..8f8aadefd33f 100644
--- a/drivers/gpu/drm/i915/i915_perf.c
+++ b/drivers/gpu/drm/i915/i915_perf.c
@@ -1364,14 +1364,14 @@ static void i915_oa_stream_destroy(struct i915_perf_stream *stream)
 
 	free_oa_buffer(dev_priv);
 
+	put_oa_config(dev_priv, stream->oa_config);
+
 	intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
-	intel_runtime_pm_put(dev_priv);
+	intel_runtime_pm_put(dev_priv, stream->wakeref);
 
 	if (stream->ctx)
 		oa_put_render_ctx_id(stream);
 
-	put_oa_config(dev_priv, stream->oa_config);
-
 	if (dev_priv->perf.oa.spurious_report_rs.missed) {
 		DRM_NOTE("%d spurious OA report notices suppressed due to ratelimiting\n",
 			 dev_priv->perf.oa.spurious_report_rs.missed);
@@ -2081,7 +2081,7 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream,
 	 *   In our case we are expecting that taking pm + FORCEWAKE
 	 *   references will effectively disable RC6.
 	 */
-	intel_runtime_pm_get(dev_priv);
+	stream->wakeref = intel_runtime_pm_get(dev_priv);
 	intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
 
 	ret = alloc_oa_buffer(dev_priv);
@@ -2118,7 +2118,7 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream,
 	put_oa_config(dev_priv, stream->oa_config);
 
 	intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
-	intel_runtime_pm_put(dev_priv);
+	intel_runtime_pm_put(dev_priv, stream->wakeref);
 
 err_config:
 	if (stream->ctx)
diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c
index d6c8f8fdfda5..3d43fc9dd25d 100644
--- a/drivers/gpu/drm/i915/i915_pmu.c
+++ b/drivers/gpu/drm/i915/i915_pmu.c
@@ -167,6 +167,7 @@ engines_sample(struct drm_i915_private *dev_priv, unsigned int period_ns)
 {
 	struct intel_engine_cs *engine;
 	enum intel_engine_id id;
+	intel_wakeref_t wakeref;
 	bool fw = false;
 
 	if ((dev_priv->pmu.enable & ENGINE_SAMPLE_MASK) == 0)
@@ -175,7 +176,8 @@ engines_sample(struct drm_i915_private *dev_priv, unsigned int period_ns)
 	if (!dev_priv->gt.awake)
 		return;
 
-	if (!intel_runtime_pm_get_if_in_use(dev_priv))
+	wakeref = intel_runtime_pm_get_if_in_use(dev_priv);
+	if (!wakeref)
 		return;
 
 	for_each_engine(engine, dev_priv, id) {
@@ -210,7 +212,7 @@ engines_sample(struct drm_i915_private *dev_priv, unsigned int period_ns)
 	if (fw)
 		intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
 
-	intel_runtime_pm_put(dev_priv);
+	intel_runtime_pm_put(dev_priv, wakeref);
 }
 
 static void
@@ -227,11 +229,15 @@ frequency_sample(struct drm_i915_private *dev_priv, unsigned int period_ns)
 		u32 val;
 
 		val = dev_priv->gt_pm.rps.cur_freq;
-		if (dev_priv->gt.awake &&
-		    intel_runtime_pm_get_if_in_use(dev_priv)) {
-			val = intel_get_cagf(dev_priv,
-					     I915_READ_NOTRACE(GEN6_RPSTAT1));
-			intel_runtime_pm_put(dev_priv);
+		if (dev_priv->gt.awake) {
+			intel_wakeref_t wakeref =
+				intel_runtime_pm_get_if_in_use(dev_priv);
+
+			if (wakeref) {
+				val = intel_get_cagf(dev_priv,
+						     I915_READ_NOTRACE(GEN6_RPSTAT1));
+				intel_runtime_pm_put(dev_priv, wakeref);
+			}
 		}
 
 		add_sample_mult(&dev_priv->pmu.sample[__I915_SAMPLE_FREQ_ACT],
@@ -443,12 +449,14 @@ static u64 __get_rc6(struct drm_i915_private *i915)
 static u64 get_rc6(struct drm_i915_private *i915)
 {
 #if IS_ENABLED(CONFIG_PM)
+	intel_wakeref_t wakeref;
 	unsigned long flags;
 	u64 val;
 
-	if (intel_runtime_pm_get_if_in_use(i915)) {
+	wakeref = intel_runtime_pm_get_if_in_use(i915);
+	if (wakeref) {
 		val = __get_rc6(i915);
-		intel_runtime_pm_put(i915);
+		intel_runtime_pm_put(i915, wakeref);
 
 		/*
 		 * If we are coming back from being runtime suspended we must
diff --git a/drivers/gpu/drm/i915/i915_sysfs.c b/drivers/gpu/drm/i915/i915_sysfs.c
index ae63a7d0f51d..a959aee208de 100644
--- a/drivers/gpu/drm/i915/i915_sysfs.c
+++ b/drivers/gpu/drm/i915/i915_sysfs.c
@@ -42,11 +42,12 @@ static inline struct drm_i915_private *kdev_minor_to_i915(struct device *kdev)
 static u32 calc_residency(struct drm_i915_private *dev_priv,
 			  i915_reg_t reg)
 {
+	intel_wakeref_t wakeref;
 	u64 res;
 
-	intel_runtime_pm_get(dev_priv);
+	wakeref = intel_runtime_pm_get(dev_priv);
 	res = intel_rc6_residency_us(dev_priv, reg);
-	intel_runtime_pm_put(dev_priv);
+	intel_runtime_pm_put(dev_priv, wakeref);
 
 	return DIV_ROUND_CLOSEST_ULL(res, 1000);
 }
@@ -258,9 +259,10 @@ static ssize_t gt_act_freq_mhz_show(struct device *kdev,
 				    struct device_attribute *attr, char *buf)
 {
 	struct drm_i915_private *dev_priv = kdev_minor_to_i915(kdev);
+	intel_wakeref_t wakeref;
 	int ret;
 
-	intel_runtime_pm_get(dev_priv);
+	wakeref = intel_runtime_pm_get(dev_priv);
 
 	mutex_lock(&dev_priv->pcu_lock);
 	if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) {
@@ -274,7 +276,7 @@ static ssize_t gt_act_freq_mhz_show(struct device *kdev,
 	}
 	mutex_unlock(&dev_priv->pcu_lock);
 
-	intel_runtime_pm_put(dev_priv);
+	intel_runtime_pm_put(dev_priv, wakeref);
 
 	return snprintf(buf, PAGE_SIZE, "%d\n", ret);
 }
@@ -354,6 +356,7 @@ static ssize_t gt_max_freq_mhz_store(struct device *kdev,
 {
 	struct drm_i915_private *dev_priv = kdev_minor_to_i915(kdev);
 	struct intel_rps *rps = &dev_priv->gt_pm.rps;
+	intel_wakeref_t wakeref;
 	u32 val;
 	ssize_t ret;
 
@@ -361,7 +364,7 @@ static ssize_t gt_max_freq_mhz_store(struct device *kdev,
 	if (ret)
 		return ret;
 
-	intel_runtime_pm_get(dev_priv);
+	wakeref = intel_runtime_pm_get(dev_priv);
 
 	mutex_lock(&dev_priv->pcu_lock);
 
@@ -371,7 +374,7 @@ static ssize_t gt_max_freq_mhz_store(struct device *kdev,
 	    val > rps->max_freq ||
 	    val < rps->min_freq_softlimit) {
 		mutex_unlock(&dev_priv->pcu_lock);
-		intel_runtime_pm_put(dev_priv);
+		intel_runtime_pm_put(dev_priv, wakeref);
 		return -EINVAL;
 	}
 
@@ -392,7 +395,7 @@ static ssize_t gt_max_freq_mhz_store(struct device *kdev,
 
 	mutex_unlock(&dev_priv->pcu_lock);
 
-	intel_runtime_pm_put(dev_priv);
+	intel_runtime_pm_put(dev_priv, wakeref);
 
 	return ret ?: count;
 }
@@ -412,6 +415,7 @@ static ssize_t gt_min_freq_mhz_store(struct device *kdev,
 {
 	struct drm_i915_private *dev_priv = kdev_minor_to_i915(kdev);
 	struct intel_rps *rps = &dev_priv->gt_pm.rps;
+	intel_wakeref_t wakeref;
 	u32 val;
 	ssize_t ret;
 
@@ -419,7 +423,7 @@ static ssize_t gt_min_freq_mhz_store(struct device *kdev,
 	if (ret)
 		return ret;
 
-	intel_runtime_pm_get(dev_priv);
+	wakeref = intel_runtime_pm_get(dev_priv);
 
 	mutex_lock(&dev_priv->pcu_lock);
 
@@ -429,7 +433,7 @@ static ssize_t gt_min_freq_mhz_store(struct device *kdev,
 	    val > rps->max_freq ||
 	    val > rps->max_freq_softlimit) {
 		mutex_unlock(&dev_priv->pcu_lock);
-		intel_runtime_pm_put(dev_priv);
+		intel_runtime_pm_put(dev_priv, wakeref);
 		return -EINVAL;
 	}
 
@@ -446,7 +450,7 @@ static ssize_t gt_min_freq_mhz_store(struct device *kdev,
 
 	mutex_unlock(&dev_priv->pcu_lock);
 
-	intel_runtime_pm_put(dev_priv);
+	intel_runtime_pm_put(dev_priv, wakeref);
 
 	return ret ?: count;
 }
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index 31a3fdf8f051..0a50a04be8a0 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -2087,6 +2087,7 @@ intel_pin_and_fence_fb_obj(struct drm_framebuffer *fb,
 	struct drm_device *dev = fb->dev;
 	struct drm_i915_private *dev_priv = to_i915(dev);
 	struct drm_i915_gem_object *obj = intel_fb_obj(fb);
+	intel_wakeref_t wakeref;
 	struct i915_vma *vma;
 	unsigned int pinctl;
 	u32 alignment;
@@ -2110,7 +2111,7 @@ intel_pin_and_fence_fb_obj(struct drm_framebuffer *fb,
 	 * intel_runtime_pm_put(), so it is correct to wrap only the
 	 * pin/unpin/fence and not more.
 	 */
-	intel_runtime_pm_get(dev_priv);
+	wakeref = intel_runtime_pm_get(dev_priv);
 
 	atomic_inc(&dev_priv->gpu_error.pending_fb_pin);
 
@@ -2165,7 +2166,7 @@ intel_pin_and_fence_fb_obj(struct drm_framebuffer *fb,
 err:
 	atomic_dec(&dev_priv->gpu_error.pending_fb_pin);
 
-	intel_runtime_pm_put(dev_priv);
+	intel_runtime_pm_put(dev_priv, wakeref);
 	return vma;
 }
 
diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h
index bc6396366016..5d88b63a3d6b 100644
--- a/drivers/gpu/drm/i915/intel_drv.h
+++ b/drivers/gpu/drm/i915/intel_drv.h
@@ -28,6 +28,7 @@
 #include <linux/i2c.h>
 #include <linux/hdmi.h>
 #include <linux/sched/clock.h>
+#include <linux/stackdepot.h>
 #include <drm/i915_drm.h>
 #include "i915_drv.h"
 #include <drm/drm_crtc.h>
@@ -2057,10 +2058,16 @@ enable_rpm_wakeref_asserts(struct drm_i915_private *i915)
 	atomic_dec(&i915->runtime_pm.wakeref_count);
 }
 
-void intel_runtime_pm_get(struct drm_i915_private *i915);
-bool intel_runtime_pm_get_if_in_use(struct drm_i915_private *i915);
-void intel_runtime_pm_get_noresume(struct drm_i915_private *i915);
-void intel_runtime_pm_put(struct drm_i915_private *i915);
+intel_wakeref_t intel_runtime_pm_get(struct drm_i915_private *i915);
+intel_wakeref_t intel_runtime_pm_get_if_in_use(struct drm_i915_private *i915);
+intel_wakeref_t intel_runtime_pm_get_noresume(struct drm_i915_private *i915);
+
+void intel_runtime_pm_put_unchecked(struct drm_i915_private *i915);
+#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_RUNTIME_PM)
+void intel_runtime_pm_put(struct drm_i915_private *i915, intel_wakeref_t wref);
+#else
+#define intel_runtime_pm_put(i915, wref) intel_runtime_pm_put_unchecked(i915)
+#endif
 
 #if IS_ENABLED(CONFIG_DRM_I915_DEBUG_RUNTIME_PM)
 void print_intel_runtime_pm_wakeref(struct drm_i915_private *i915,
diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c
index 83f2f7774c1f..b122d82465d0 100644
--- a/drivers/gpu/drm/i915/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/intel_engine_cs.c
@@ -935,10 +935,12 @@ void intel_engine_get_instdone(struct intel_engine_cs *engine,
 static bool ring_is_idle(struct intel_engine_cs *engine)
 {
 	struct drm_i915_private *dev_priv = engine->i915;
+	intel_wakeref_t wakeref;
 	bool idle = true;
 
 	/* If the whole device is asleep, the engine must be idle */
-	if (!intel_runtime_pm_get_if_in_use(dev_priv))
+	wakeref = intel_runtime_pm_get_if_in_use(dev_priv);
+	if (!wakeref)
 		return true;
 
 	/* First check that no commands are left in the ring */
@@ -950,7 +952,7 @@ static bool ring_is_idle(struct intel_engine_cs *engine)
 	if (INTEL_GEN(dev_priv) > 2 && !(I915_READ_MODE(engine) & MODE_IDLE))
 		idle = false;
 
-	intel_runtime_pm_put(dev_priv);
+	intel_runtime_pm_put(dev_priv, wakeref);
 
 	return idle;
 }
@@ -1445,6 +1447,7 @@ void intel_engine_dump(struct intel_engine_cs *engine,
 	const struct intel_engine_execlists * const execlists = &engine->execlists;
 	struct i915_gpu_error * const error = &engine->i915->gpu_error;
 	struct i915_request *rq, *last;
+	intel_wakeref_t wakeref;
 	unsigned long flags;
 	struct rb_node *rb;
 	int count;
@@ -1503,9 +1506,10 @@ void intel_engine_dump(struct intel_engine_cs *engine,
 
 	rcu_read_unlock();
 
-	if (intel_runtime_pm_get_if_in_use(engine->i915)) {
+	wakeref = intel_runtime_pm_get_if_in_use(engine->i915);
+	if (wakeref) {
 		intel_engine_print_registers(engine, m);
-		intel_runtime_pm_put(engine->i915);
+		intel_runtime_pm_put(engine->i915, wakeref);
 	} else {
 		drm_printf(m, "\tDevice is asleep; skipping register dump\n");
 	}
diff --git a/drivers/gpu/drm/i915/intel_fbdev.c b/drivers/gpu/drm/i915/intel_fbdev.c
index 5e17efae7efd..19f7138b13e1 100644
--- a/drivers/gpu/drm/i915/intel_fbdev.c
+++ b/drivers/gpu/drm/i915/intel_fbdev.c
@@ -177,8 +177,9 @@ static int intelfb_create(struct drm_fb_helper *helper,
 	const struct i915_ggtt_view view = {
 		.type = I915_GGTT_VIEW_NORMAL,
 	};
-	struct fb_info *info;
 	struct drm_framebuffer *fb;
+	intel_wakeref_t wakeref;
+	struct fb_info *info;
 	struct i915_vma *vma;
 	unsigned long flags = 0;
 	bool prealloc = false;
@@ -209,7 +210,7 @@ static int intelfb_create(struct drm_fb_helper *helper,
 	}
 
 	mutex_lock(&dev->struct_mutex);
-	intel_runtime_pm_get(dev_priv);
+	wakeref = intel_runtime_pm_get(dev_priv);
 
 	/* Pin the GGTT vma for our access via info->screen_base.
 	 * This also validates that any existing fb inherited from the
@@ -276,7 +277,7 @@ static int intelfb_create(struct drm_fb_helper *helper,
 	ifbdev->vma = vma;
 	ifbdev->vma_flags = flags;
 
-	intel_runtime_pm_put(dev_priv);
+	intel_runtime_pm_put(dev_priv, wakeref);
 	mutex_unlock(&dev->struct_mutex);
 	vga_switcheroo_client_fb_set(pdev, info);
 	return 0;
@@ -284,7 +285,7 @@ static int intelfb_create(struct drm_fb_helper *helper,
 out_unpin:
 	intel_unpin_fb_vma(vma, flags);
 out_unlock:
-	intel_runtime_pm_put(dev_priv);
+	intel_runtime_pm_put(dev_priv, wakeref);
 	mutex_unlock(&dev->struct_mutex);
 	return ret;
 }
diff --git a/drivers/gpu/drm/i915/intel_guc_log.c b/drivers/gpu/drm/i915/intel_guc_log.c
index d3ebdbc0182e..20c0b36d748e 100644
--- a/drivers/gpu/drm/i915/intel_guc_log.c
+++ b/drivers/gpu/drm/i915/intel_guc_log.c
@@ -436,6 +436,7 @@ static void guc_log_capture_logs(struct intel_guc_log *log)
 {
 	struct intel_guc *guc = log_to_guc(log);
 	struct drm_i915_private *dev_priv = guc_to_i915(guc);
+	intel_wakeref_t wakeref;
 
 	guc_read_update_log_buffer(log);
 
@@ -443,9 +444,9 @@ static void guc_log_capture_logs(struct intel_guc_log *log)
 	 * Generally device is expected to be active only at this
 	 * time, so get/put should be really quick.
 	 */
-	intel_runtime_pm_get(dev_priv);
+	wakeref = intel_runtime_pm_get(dev_priv);
 	guc_action_flush_log_complete(guc);
-	intel_runtime_pm_put(dev_priv);
+	intel_runtime_pm_put(dev_priv, wakeref);
 }
 
 int intel_guc_log_create(struct intel_guc_log *log)
@@ -505,6 +506,7 @@ int intel_guc_log_set_level(struct intel_guc_log *log, u32 level)
 {
 	struct intel_guc *guc = log_to_guc(log);
 	struct drm_i915_private *dev_priv = guc_to_i915(guc);
+	intel_wakeref_t wakeref;
 	int ret;
 
 	BUILD_BUG_ON(GUC_LOG_VERBOSITY_MIN != 0);
@@ -524,11 +526,11 @@ int intel_guc_log_set_level(struct intel_guc_log *log, u32 level)
 		goto out_unlock;
 	}
 
-	intel_runtime_pm_get(dev_priv);
+	wakeref = intel_runtime_pm_get(dev_priv);
 	ret = guc_action_control_log(guc, GUC_LOG_LEVEL_IS_VERBOSE(level),
 				     GUC_LOG_LEVEL_IS_ENABLED(level),
 				     GUC_LOG_LEVEL_TO_VERBOSITY(level));
-	intel_runtime_pm_put(dev_priv);
+	intel_runtime_pm_put(dev_priv, wakeref);
 	if (ret) {
 		DRM_DEBUG_DRIVER("guc_log_control action failed %d\n", ret);
 		goto out_unlock;
@@ -601,6 +603,7 @@ void intel_guc_log_relay_flush(struct intel_guc_log *log)
 {
 	struct intel_guc *guc = log_to_guc(log);
 	struct drm_i915_private *i915 = guc_to_i915(guc);
+	intel_wakeref_t wakeref;
 
 	/*
 	 * Before initiating the forceful flush, wait for any pending/ongoing
@@ -608,9 +611,9 @@ void intel_guc_log_relay_flush(struct intel_guc_log *log)
 	 */
 	flush_work(&log->relay.flush_work);
 
-	intel_runtime_pm_get(i915);
+	wakeref = intel_runtime_pm_get(i915);
 	guc_action_flush_log(guc);
-	intel_runtime_pm_put(i915);
+	intel_runtime_pm_put(i915, wakeref);
 
 	/* GuC would have updated log buffer by now, so capture it */
 	guc_log_capture_logs(log);
diff --git a/drivers/gpu/drm/i915/intel_hotplug.c b/drivers/gpu/drm/i915/intel_hotplug.c
index 648a13c6043c..f762e79bde5d 100644
--- a/drivers/gpu/drm/i915/intel_hotplug.c
+++ b/drivers/gpu/drm/i915/intel_hotplug.c
@@ -210,9 +210,10 @@ static void intel_hpd_irq_storm_reenable_work(struct work_struct *work)
 		container_of(work, typeof(*dev_priv),
 			     hotplug.reenable_work.work);
 	struct drm_device *dev = &dev_priv->drm;
+	intel_wakeref_t wakeref;
 	enum hpd_pin pin;
 
-	intel_runtime_pm_get(dev_priv);
+	wakeref = intel_runtime_pm_get(dev_priv);
 
 	spin_lock_irq(&dev_priv->irq_lock);
 	for_each_hpd_pin(pin) {
@@ -243,7 +244,7 @@ static void intel_hpd_irq_storm_reenable_work(struct work_struct *work)
 		dev_priv->display.hpd_irq_setup(dev_priv);
 	spin_unlock_irq(&dev_priv->irq_lock);
 
-	intel_runtime_pm_put(dev_priv);
+	intel_runtime_pm_put(dev_priv, wakeref);
 }
 
 bool intel_encoder_hotplug(struct intel_encoder *encoder,
diff --git a/drivers/gpu/drm/i915/intel_huc.c b/drivers/gpu/drm/i915/intel_huc.c
index 37ef540dd280..a61c944c3036 100644
--- a/drivers/gpu/drm/i915/intel_huc.c
+++ b/drivers/gpu/drm/i915/intel_huc.c
@@ -114,14 +114,15 @@ int intel_huc_auth(struct intel_huc *huc)
 int intel_huc_check_status(struct intel_huc *huc)
 {
 	struct drm_i915_private *dev_priv = huc_to_i915(huc);
+	intel_wakeref_t wakeref;
 	u32 status;
 
 	if (!HAS_HUC(dev_priv))
 		return -ENODEV;
 
-	intel_runtime_pm_get(dev_priv);
+	wakeref = intel_runtime_pm_get(dev_priv);
 	status = I915_READ(HUC_STATUS2) & HUC_FW_VERIFIED;
-	intel_runtime_pm_put(dev_priv);
+	intel_runtime_pm_put(dev_priv, wakeref);
 
 	return status;
 }
diff --git a/drivers/gpu/drm/i915/intel_panel.c b/drivers/gpu/drm/i915/intel_panel.c
index 4a9f139e7b73..e57c42323b17 100644
--- a/drivers/gpu/drm/i915/intel_panel.c
+++ b/drivers/gpu/drm/i915/intel_panel.c
@@ -1203,17 +1203,18 @@ static int intel_backlight_device_get_brightness(struct backlight_device *bd)
 	struct intel_connector *connector = bl_get_data(bd);
 	struct drm_device *dev = connector->base.dev;
 	struct drm_i915_private *dev_priv = to_i915(dev);
+	intel_wakeref_t wakeref;
 	u32 hw_level;
 	int ret;
 
-	intel_runtime_pm_get(dev_priv);
+	wakeref = intel_runtime_pm_get(dev_priv);
 	drm_modeset_lock(&dev->mode_config.connection_mutex, NULL);
 
 	hw_level = intel_panel_get_backlight(connector);
 	ret = scale_hw_to_user(connector, hw_level, bd->props.max_brightness);
 
 	drm_modeset_unlock(&dev->mode_config.connection_mutex);
-	intel_runtime_pm_put(dev_priv);
+	intel_runtime_pm_put(dev_priv, wakeref);
 
 	return ret;
 }
diff --git a/drivers/gpu/drm/i915/intel_runtime_pm.c b/drivers/gpu/drm/i915/intel_runtime_pm.c
index ed8e0bcea078..b939d104b2bf 100644
--- a/drivers/gpu/drm/i915/intel_runtime_pm.c
+++ b/drivers/gpu/drm/i915/intel_runtime_pm.c
@@ -62,7 +62,7 @@ static void init_intel_runtime_pm_wakeref(struct drm_i915_private *i915)
 	spin_lock_init(&i915->runtime_pm.debug_lock);
 }
 
-static noinline void
+static noinline depot_stack_handle_t
 track_intel_runtime_pm_wakeref(struct drm_i915_private *i915)
 {
 	struct i915_runtime_pm *rpm = &i915->runtime_pm;
@@ -76,7 +76,7 @@ track_intel_runtime_pm_wakeref(struct drm_i915_private *i915)
 	depot_stack_handle_t stack, *stacks;
 
 	if (!HAS_RUNTIME_PM(i915))
-		return;
+		return -1;
 
 	save_stack_trace(&trace);
 	if (trace.nr_entries &&
@@ -85,7 +85,7 @@ track_intel_runtime_pm_wakeref(struct drm_i915_private *i915)
 
 	stack = depot_save_stack(&trace, GFP_NOWAIT | __GFP_NOWARN);
 	if (!stack)
-		return;
+		return -1;
 
 	spin_lock_irqsave(&rpm->debug_lock, flags);
 	stacks = krealloc(rpm->debug_owners,
@@ -94,8 +94,55 @@ track_intel_runtime_pm_wakeref(struct drm_i915_private *i915)
 	if (stacks) {
 		stacks[rpm->debug_count++] = stack;
 		rpm->debug_owners = stacks;
+	} else {
+		stack = -1;
 	}
 	spin_unlock_irqrestore(&rpm->debug_lock, flags);
+
+	return stack;
+}
+
+static void cancel_intel_runtime_pm_wakeref(struct drm_i915_private *i915,
+					    depot_stack_handle_t stack)
+{
+	struct i915_runtime_pm *rpm = &i915->runtime_pm;
+	unsigned long flags, n;
+	bool found = false;
+
+	if (unlikely(stack == -1))
+		return;
+
+	spin_lock_irqsave(&rpm->debug_lock, flags);
+	for (n = rpm->debug_count; n--; ) {
+		if (rpm->debug_owners[n] == stack) {
+			memmove(rpm->debug_owners + n,
+				rpm->debug_owners + n + 1,
+				(--rpm->debug_count - n) * sizeof(stack));
+			found = true;
+			break;
+		}
+	}
+	spin_unlock_irqrestore(&rpm->debug_lock, flags);
+
+	if (WARN(!found,
+		 "Unmatched wakeref (tracking %lu)\n",
+		 rpm->debug_count)) {
+		unsigned long entries[STACKDEPTH];
+		struct stack_trace trace = {
+			.entries = entries,
+			.max_entries = ARRAY_SIZE(entries),
+		};
+		char *buf;
+
+		buf = kmalloc(PAGE_SIZE, GFP_KERNEL);
+		if (!buf)
+			return;
+
+		depot_fetch_stack(stack, &trace);
+		snprint_stack_trace(buf, PAGE_SIZE, &trace, 0);
+		DRM_DEBUG_DRIVER("wakeref %x from\n%s", stack, buf);
+		kfree(buf);
+	}
 }
 
 static void untrack_intel_runtime_pm_wakeref(struct drm_i915_private *i915)
@@ -220,8 +267,10 @@ static void init_intel_runtime_pm_wakeref(struct drm_i915_private *i915)
 {
 }
 
-static void track_intel_runtime_pm_wakeref(struct drm_i915_private *i915)
+static depot_stack_handle_t
+track_intel_runtime_pm_wakeref(struct drm_i915_private *i915)
 {
+	return -1;
 }
 
 static void untrack_intel_runtime_pm_wakeref(struct drm_i915_private *i915)
@@ -1764,7 +1813,7 @@ bool intel_display_power_get_if_enabled(struct drm_i915_private *dev_priv,
 	mutex_unlock(&power_domains->lock);
 
 	if (!is_enabled)
-		intel_runtime_pm_put(dev_priv);
+		intel_runtime_pm_put_unchecked(dev_priv);
 
 	return is_enabled;
 }
@@ -1798,7 +1847,7 @@ void intel_display_power_put(struct drm_i915_private *dev_priv,
 
 	mutex_unlock(&power_domains->lock);
 
-	intel_runtime_pm_put(dev_priv);
+	intel_runtime_pm_put_unchecked(dev_priv);
 }
 
 #define I830_PIPES_POWER_DOMAINS (		\
@@ -3975,7 +4024,7 @@ void intel_power_domains_init_hw(struct drm_i915_private *dev_priv, bool resume)
 void intel_power_domains_fini_hw(struct drm_i915_private *dev_priv)
 {
 	/* Keep the power well enabled, but cancel its rpm wakeref. */
-	intel_runtime_pm_put(dev_priv);
+	intel_runtime_pm_put_unchecked(dev_priv);
 
 	/* Remove the refcount we took to keep power well support disabled. */
 	if (!i915_modparams.disable_power_well)
@@ -4197,7 +4246,7 @@ static void intel_power_domains_verify_state(struct drm_i915_private *dev_priv)
  * Any runtime pm reference obtained by this function must have a symmetric
  * call to intel_runtime_pm_put() to release the reference again.
  */
-void intel_runtime_pm_get(struct drm_i915_private *i915)
+intel_wakeref_t intel_runtime_pm_get(struct drm_i915_private *i915)
 {
 	struct pci_dev *pdev = i915->drm.pdev;
 	struct device *kdev = &pdev->dev;
@@ -4209,7 +4258,7 @@ void intel_runtime_pm_get(struct drm_i915_private *i915)
 	atomic_inc(&i915->runtime_pm.wakeref_count);
 	assert_rpm_wakelock_held(i915);
 
-	track_intel_runtime_pm_wakeref(i915);
+	return track_intel_runtime_pm_wakeref(i915);
 }
 
 /**
@@ -4225,7 +4274,7 @@ void intel_runtime_pm_get(struct drm_i915_private *i915)
  *
  * Returns: True if the wakeref was acquired, or False otherwise.
  */
-bool intel_runtime_pm_get_if_in_use(struct drm_i915_private *i915)
+intel_wakeref_t intel_runtime_pm_get_if_in_use(struct drm_i915_private *i915)
 {
 	if (IS_ENABLED(CONFIG_PM)) {
 		struct pci_dev *pdev = i915->drm.pdev;
@@ -4238,15 +4287,13 @@ bool intel_runtime_pm_get_if_in_use(struct drm_i915_private *i915)
 		 * atm to the late/early system suspend/resume handlers.
 		 */
 		if (pm_runtime_get_if_in_use(kdev) <= 0)
-			return false;
+			return 0;
 	}
 
 	atomic_inc(&i915->runtime_pm.wakeref_count);
 	assert_rpm_wakelock_held(i915);
 
-	track_intel_runtime_pm_wakeref(i915);
-
-	return true;
+	return track_intel_runtime_pm_wakeref(i915);
 }
 
 /**
@@ -4266,7 +4313,7 @@ bool intel_runtime_pm_get_if_in_use(struct drm_i915_private *i915)
  * Any runtime pm reference obtained by this function must have a symmetric
  * call to intel_runtime_pm_put() to release the reference again.
  */
-void intel_runtime_pm_get_noresume(struct drm_i915_private *i915)
+intel_wakeref_t intel_runtime_pm_get_noresume(struct drm_i915_private *i915)
 {
 	struct pci_dev *pdev = i915->drm.pdev;
 	struct device *kdev = &pdev->dev;
@@ -4276,7 +4323,7 @@ void intel_runtime_pm_get_noresume(struct drm_i915_private *i915)
 
 	atomic_inc(&i915->runtime_pm.wakeref_count);
 
-	track_intel_runtime_pm_wakeref(i915);
+	return track_intel_runtime_pm_wakeref(i915);
 }
 
 /**
@@ -4287,7 +4334,7 @@ void intel_runtime_pm_get_noresume(struct drm_i915_private *i915)
  * intel_runtime_pm_get() and might power down the corresponding
  * hardware block right away if this is the last reference.
  */
-void intel_runtime_pm_put(struct drm_i915_private *i915)
+void intel_runtime_pm_put_unchecked(struct drm_i915_private *i915)
 {
 	struct pci_dev *pdev = i915->drm.pdev;
 	struct device *kdev = &pdev->dev;
@@ -4300,6 +4347,14 @@ void intel_runtime_pm_put(struct drm_i915_private *i915)
 	pm_runtime_put_autosuspend(kdev);
 }
 
+#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_RUNTIME_PM)
+void intel_runtime_pm_put(struct drm_i915_private *i915, intel_wakeref_t wref)
+{
+	cancel_intel_runtime_pm_wakeref(i915, wref);
+	intel_runtime_pm_put_unchecked(i915);
+}
+#endif
+
 /**
  * intel_runtime_pm_enable - enable runtime pm
  * @i915: i915 device instance
diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c
index 3ad302c66254..5b842318edbc 100644
--- a/drivers/gpu/drm/i915/intel_uncore.c
+++ b/drivers/gpu/drm/i915/intel_uncore.c
@@ -1670,6 +1670,7 @@ int i915_reg_read_ioctl(struct drm_device *dev,
 	struct drm_i915_private *dev_priv = to_i915(dev);
 	struct drm_i915_reg_read *reg = data;
 	struct reg_whitelist const *entry;
+	intel_wakeref_t wakeref;
 	unsigned int flags;
 	int remain;
 	int ret = 0;
@@ -1695,7 +1696,7 @@ int i915_reg_read_ioctl(struct drm_device *dev,
 
 	flags = reg->offset & (entry->size - 1);
 
-	intel_runtime_pm_get(dev_priv);
+	wakeref = intel_runtime_pm_get(dev_priv);
 	if (entry->size == 8 && flags == I915_REG_READ_8B_WA)
 		reg->val = I915_READ64_2x32(entry->offset_ldw,
 					    entry->offset_udw);
@@ -1709,7 +1710,7 @@ int i915_reg_read_ioctl(struct drm_device *dev,
 		reg->val = I915_READ8(entry->offset_ldw);
 	else
 		ret = -EINVAL;
-	intel_runtime_pm_put(dev_priv);
+	intel_runtime_pm_put(dev_priv, wakeref);
 
 	return ret;
 }
diff --git a/drivers/gpu/drm/i915/selftests/huge_pages.c b/drivers/gpu/drm/i915/selftests/huge_pages.c
index e272127783fe..4ab98db946c8 100644
--- a/drivers/gpu/drm/i915/selftests/huge_pages.c
+++ b/drivers/gpu/drm/i915/selftests/huge_pages.c
@@ -1749,6 +1749,7 @@ int i915_gem_huge_page_live_selftests(struct drm_i915_private *dev_priv)
 	};
 	struct drm_file *file;
 	struct i915_gem_context *ctx;
+	intel_wakeref_t wakeref;
 	int err;
 
 	if (!USES_PPGTT(dev_priv)) {
@@ -1764,7 +1765,7 @@ int i915_gem_huge_page_live_selftests(struct drm_i915_private *dev_priv)
 		return PTR_ERR(file);
 
 	mutex_lock(&dev_priv->drm.struct_mutex);
-	intel_runtime_pm_get(dev_priv);
+	wakeref = intel_runtime_pm_get(dev_priv);
 
 	ctx = live_context(dev_priv, file);
 	if (IS_ERR(ctx)) {
@@ -1778,7 +1779,7 @@ int i915_gem_huge_page_live_selftests(struct drm_i915_private *dev_priv)
 	err = i915_subtests(tests, ctx);
 
 out_unlock:
-	intel_runtime_pm_put(dev_priv);
+	intel_runtime_pm_put(dev_priv, wakeref);
 	mutex_unlock(&dev_priv->drm.struct_mutex);
 
 	mock_file_free(dev_priv, file);
diff --git a/drivers/gpu/drm/i915/selftests/i915_gem.c b/drivers/gpu/drm/i915/selftests/i915_gem.c
index d0aa19d17653..896f0b37b34c 100644
--- a/drivers/gpu/drm/i915/selftests/i915_gem.c
+++ b/drivers/gpu/drm/i915/selftests/i915_gem.c
@@ -16,9 +16,10 @@ static int switch_to_context(struct drm_i915_private *i915,
 {
 	struct intel_engine_cs *engine;
 	enum intel_engine_id id;
+	intel_wakeref_t wakeref;
 	int err = 0;
 
-	intel_runtime_pm_get(i915);
+	wakeref = intel_runtime_pm_get(i915);
 
 	for_each_engine(engine, i915, id) {
 		struct i915_request *rq;
@@ -32,7 +33,7 @@ static int switch_to_context(struct drm_i915_private *i915,
 		i915_request_add(rq);
 	}
 
-	intel_runtime_pm_put(i915);
+	intel_runtime_pm_put(i915, wakeref);
 
 	return err;
 }
@@ -65,7 +66,9 @@ static void trash_stolen(struct drm_i915_private *i915)
 
 static void simulate_hibernate(struct drm_i915_private *i915)
 {
-	intel_runtime_pm_get(i915);
+	intel_wakeref_t wakeref;
+
+	wakeref = intel_runtime_pm_get(i915);
 
 	/*
 	 * As a final sting in the tail, invalidate stolen. Under a real S4,
@@ -76,7 +79,7 @@ static void simulate_hibernate(struct drm_i915_private *i915)
 	 */
 	trash_stolen(i915);
 
-	intel_runtime_pm_put(i915);
+	intel_runtime_pm_put(i915, wakeref);
 }
 
 static int pm_prepare(struct drm_i915_private *i915)
@@ -93,39 +96,45 @@ static int pm_prepare(struct drm_i915_private *i915)
 
 static void pm_suspend(struct drm_i915_private *i915)
 {
-	intel_runtime_pm_get(i915);
+	intel_wakeref_t wakeref;
+
+	wakeref = intel_runtime_pm_get(i915);
 
 	i915_gem_suspend_gtt_mappings(i915);
 	i915_gem_suspend_late(i915);
 
-	intel_runtime_pm_put(i915);
+	intel_runtime_pm_put(i915, wakeref);
 }
 
 static void pm_hibernate(struct drm_i915_private *i915)
 {
-	intel_runtime_pm_get(i915);
+	intel_wakeref_t wakeref;
+
+	wakeref = intel_runtime_pm_get(i915);
 
 	i915_gem_suspend_gtt_mappings(i915);
 
 	i915_gem_freeze(i915);
 	i915_gem_freeze_late(i915);
 
-	intel_runtime_pm_put(i915);
+	intel_runtime_pm_put(i915, wakeref);
 }
 
 static void pm_resume(struct drm_i915_private *i915)
 {
+	intel_wakeref_t wakeref;
+
 	/*
 	 * Both suspend and hibernate follow the same wakeup path and assume
 	 * that runtime-pm just works.
 	 */
-	intel_runtime_pm_get(i915);
+	wakeref = intel_runtime_pm_get(i915);
 
 	intel_engines_sanitize(i915);
 	i915_gem_sanitize(i915);
 	i915_gem_resume(i915);
 
-	intel_runtime_pm_put(i915);
+	intel_runtime_pm_put(i915, wakeref);
 }
 
 static int igt_gem_suspend(void *arg)
diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_coherency.c b/drivers/gpu/drm/i915/selftests/i915_gem_coherency.c
index f7392c1ffe75..fd89a5a33c1a 100644
--- a/drivers/gpu/drm/i915/selftests/i915_gem_coherency.c
+++ b/drivers/gpu/drm/i915/selftests/i915_gem_coherency.c
@@ -279,6 +279,7 @@ static int igt_gem_coherency(void *arg)
 	struct drm_i915_private *i915 = arg;
 	const struct igt_coherency_mode *read, *write, *over;
 	struct drm_i915_gem_object *obj;
+	intel_wakeref_t wakeref;
 	unsigned long count, n;
 	u32 *offsets, *values;
 	int err = 0;
@@ -298,7 +299,7 @@ static int igt_gem_coherency(void *arg)
 	values = offsets + ncachelines;
 
 	mutex_lock(&i915->drm.struct_mutex);
-	intel_runtime_pm_get(i915);
+	wakeref = intel_runtime_pm_get(i915);
 	for (over = igt_coherency_mode; over->name; over++) {
 		if (!over->set)
 			continue;
@@ -376,7 +377,7 @@ static int igt_gem_coherency(void *arg)
 		}
 	}
 unlock:
-	intel_runtime_pm_put(i915);
+	intel_runtime_pm_put(i915, wakeref);
 	mutex_unlock(&i915->drm.struct_mutex);
 	kfree(offsets);
 	return err;
diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/selftests/i915_gem_context.c
index f1a57e3c3820..e5db71e3991d 100644
--- a/drivers/gpu/drm/i915/selftests/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/selftests/i915_gem_context.c
@@ -564,6 +564,8 @@ static int igt_ctx_exec(void *arg)
 		}
 
 		for_each_engine(engine, i915, id) {
+			intel_wakeref_t wakeref;
+
 			if (!engine->context_size)
 				continue; /* No logical context support in HW */
 
@@ -578,9 +580,9 @@ static int igt_ctx_exec(void *arg)
 				}
 			}
 
-			intel_runtime_pm_get(i915);
+			wakeref = intel_runtime_pm_get(i915);
 			err = gpu_fill(obj, ctx, engine, dw);
-			intel_runtime_pm_put(i915);
+			intel_runtime_pm_put(i915, wakeref);
 			if (err) {
 				pr_err("Failed to fill dword %lu [%lu/%lu] with gpu (%s) in ctx %u [full-ppgtt? %s], err=%d\n",
 				       ndwords, dw, max_dwords(obj),
@@ -665,6 +667,8 @@ static int igt_ctx_readonly(void *arg)
 		unsigned int id;
 
 		for_each_engine(engine, i915, id) {
+			intel_wakeref_t wakeref;
+
 			if (!intel_engine_can_store_dword(engine))
 				continue;
 
@@ -679,9 +683,9 @@ static int igt_ctx_readonly(void *arg)
 					i915_gem_object_set_readonly(obj);
 			}
 
-			intel_runtime_pm_get(i915);
+			wakeref = intel_runtime_pm_get(i915);
 			err = gpu_fill(obj, ctx, engine, dw);
-			intel_runtime_pm_put(i915);
+			intel_runtime_pm_put(i915, wakeref);
 			if (err) {
 				pr_err("Failed to fill dword %lu [%lu/%lu] with gpu (%s) in ctx %u [full-ppgtt? %s], err=%d\n",
 				       ndwords, dw, max_dwords(obj),
@@ -814,6 +818,7 @@ static int igt_switch_to_kernel_context(void *arg)
 	struct intel_engine_cs *engine;
 	struct i915_gem_context *ctx;
 	enum intel_engine_id id;
+	intel_wakeref_t wakeref;
 	int err;
 
 	/*
@@ -824,7 +829,7 @@ static int igt_switch_to_kernel_context(void *arg)
 	 */
 
 	mutex_lock(&i915->drm.struct_mutex);
-	intel_runtime_pm_get(i915);
+	wakeref = intel_runtime_pm_get(i915);
 
 	ctx = kernel_context(i915);
 	if (IS_ERR(ctx)) {
@@ -849,7 +854,7 @@ static int igt_switch_to_kernel_context(void *arg)
 	if (igt_flush_test(i915, I915_WAIT_LOCKED))
 		err = -EIO;
 
-	intel_runtime_pm_put(i915);
+	intel_runtime_pm_put(i915, wakeref);
 	mutex_unlock(&i915->drm.struct_mutex);
 
 	kernel_context_close(ctx);
diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_evict.c b/drivers/gpu/drm/i915/selftests/i915_gem_evict.c
index 128ad1cf0647..ac59c336d570 100644
--- a/drivers/gpu/drm/i915/selftests/i915_gem_evict.c
+++ b/drivers/gpu/drm/i915/selftests/i915_gem_evict.c
@@ -336,6 +336,7 @@ static int igt_evict_contexts(void *arg)
 		struct drm_mm_node node;
 		struct reserved *next;
 	} *reserved = NULL;
+	intel_wakeref_t wakeref;
 	struct drm_mm_node hole;
 	unsigned long count;
 	int err;
@@ -355,7 +356,7 @@ static int igt_evict_contexts(void *arg)
 		return 0;
 
 	mutex_lock(&i915->drm.struct_mutex);
-	intel_runtime_pm_get(i915);
+	wakeref = intel_runtime_pm_get(i915);
 
 	/* Reserve a block so that we know we have enough to fit a few rq */
 	memset(&hole, 0, sizeof(hole));
@@ -400,8 +401,10 @@ static int igt_evict_contexts(void *arg)
 		struct drm_file *file;
 
 		file = mock_file(i915);
-		if (IS_ERR(file))
-			return PTR_ERR(file);
+		if (IS_ERR(file)) {
+			err = PTR_ERR(file);
+			break;
+		}
 
 		count = 0;
 		mutex_lock(&i915->drm.struct_mutex);
@@ -464,7 +467,7 @@ static int igt_evict_contexts(void *arg)
 	}
 	if (drm_mm_node_allocated(&hole))
 		drm_mm_remove_node(&hole);
-	intel_runtime_pm_put(i915);
+	intel_runtime_pm_put(i915, wakeref);
 	mutex_unlock(&i915->drm.struct_mutex);
 
 	return err;
diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c
index 8e2e269db97e..38414558e18b 100644
--- a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c
@@ -275,6 +275,7 @@ static int lowlevel_hole(struct drm_i915_private *i915,
 
 		for (n = 0; n < count; n++) {
 			u64 addr = hole_start + order[n] * BIT_ULL(size);
+			intel_wakeref_t wakeref;
 
 			GEM_BUG_ON(addr + BIT_ULL(size) > vm->total);
 
@@ -293,9 +294,9 @@ static int lowlevel_hole(struct drm_i915_private *i915,
 			mock_vma.node.size = BIT_ULL(size);
 			mock_vma.node.start = addr;
 
-			intel_runtime_pm_get(i915);
+			wakeref = intel_runtime_pm_get(i915);
 			vm->insert_entries(vm, &mock_vma, I915_CACHE_NONE, 0);
-			intel_runtime_pm_put(i915);
+			intel_runtime_pm_put(i915, wakeref);
 		}
 		count = n;
 
@@ -1144,6 +1145,7 @@ static int igt_ggtt_page(void *arg)
 	struct drm_i915_private *i915 = arg;
 	struct i915_ggtt *ggtt = &i915->ggtt;
 	struct drm_i915_gem_object *obj;
+	intel_wakeref_t wakeref;
 	struct drm_mm_node tmp;
 	unsigned int *order, n;
 	int err;
@@ -1169,7 +1171,7 @@ static int igt_ggtt_page(void *arg)
 	if (err)
 		goto out_unpin;
 
-	intel_runtime_pm_get(i915);
+	wakeref = intel_runtime_pm_get(i915);
 
 	for (n = 0; n < count; n++) {
 		u64 offset = tmp.start + n * PAGE_SIZE;
@@ -1216,7 +1218,7 @@ static int igt_ggtt_page(void *arg)
 	kfree(order);
 out_remove:
 	ggtt->vm.clear_range(&ggtt->vm, tmp.start, tmp.size);
-	intel_runtime_pm_put(i915);
+	intel_runtime_pm_put(i915, wakeref);
 	drm_mm_remove_node(&tmp);
 out_unpin:
 	i915_gem_object_unpin_pages(obj);
diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_object.c b/drivers/gpu/drm/i915/selftests/i915_gem_object.c
index 6d3516d5bff9..13d4dc5e8dfb 100644
--- a/drivers/gpu/drm/i915/selftests/i915_gem_object.c
+++ b/drivers/gpu/drm/i915/selftests/i915_gem_object.c
@@ -307,6 +307,7 @@ static int igt_partial_tiling(void *arg)
 	const unsigned int nreal = 1 << 12; /* largest tile row x2 */
 	struct drm_i915_private *i915 = arg;
 	struct drm_i915_gem_object *obj;
+	intel_wakeref_t wakeref;
 	int tiling;
 	int err;
 
@@ -332,7 +333,7 @@ static int igt_partial_tiling(void *arg)
 	}
 
 	mutex_lock(&i915->drm.struct_mutex);
-	intel_runtime_pm_get(i915);
+	wakeref = intel_runtime_pm_get(i915);
 
 	if (1) {
 		IGT_TIMEOUT(end);
@@ -443,7 +444,7 @@ next_tiling: ;
 	}
 
 out_unlock:
-	intel_runtime_pm_put(i915);
+	intel_runtime_pm_put(i915, wakeref);
 	mutex_unlock(&i915->drm.struct_mutex);
 	i915_gem_object_unpin_pages(obj);
 out:
@@ -503,11 +504,14 @@ static void disable_retire_worker(struct drm_i915_private *i915)
 {
 	mutex_lock(&i915->drm.struct_mutex);
 	if (!i915->gt.active_requests++) {
-		intel_runtime_pm_get(i915);
+		intel_wakeref_t wakeref;
+
+		wakeref = intel_runtime_pm_get(i915);
 		i915_gem_unpark(i915);
-		intel_runtime_pm_put(i915);
+		intel_runtime_pm_put(i915, wakeref);
 	}
 	mutex_unlock(&i915->drm.struct_mutex);
+
 	cancel_delayed_work_sync(&i915->gt.retire_work);
 	cancel_delayed_work_sync(&i915->gt.idle_work);
 }
@@ -575,6 +579,8 @@ static int igt_mmap_offset_exhaustion(void *arg)
 
 	/* Now fill with busy dead objects that we expect to reap */
 	for (loop = 0; loop < 3; loop++) {
+		intel_wakeref_t wakeref;
+
 		if (i915_terminally_wedged(&i915->gpu_error))
 			break;
 
@@ -585,9 +591,9 @@ static int igt_mmap_offset_exhaustion(void *arg)
 		}
 
 		mutex_lock(&i915->drm.struct_mutex);
-		intel_runtime_pm_get(i915);
+		wakeref = intel_runtime_pm_get(i915);
 		err = make_obj_busy(obj);
-		intel_runtime_pm_put(i915);
+		intel_runtime_pm_put(i915, wakeref);
 		mutex_unlock(&i915->drm.struct_mutex);
 		if (err) {
 			pr_err("[loop %d] Failed to busy the object\n", loop);
diff --git a/drivers/gpu/drm/i915/selftests/i915_request.c b/drivers/gpu/drm/i915/selftests/i915_request.c
index 07e557815308..8b73a8c21377 100644
--- a/drivers/gpu/drm/i915/selftests/i915_request.c
+++ b/drivers/gpu/drm/i915/selftests/i915_request.c
@@ -332,6 +332,7 @@ static int live_nop_request(void *arg)
 {
 	struct drm_i915_private *i915 = arg;
 	struct intel_engine_cs *engine;
+	intel_wakeref_t wakeref;
 	struct live_test t;
 	unsigned int id;
 	int err = -ENODEV;
@@ -342,7 +343,7 @@ static int live_nop_request(void *arg)
 	 */
 
 	mutex_lock(&i915->drm.struct_mutex);
-	intel_runtime_pm_get(i915);
+	wakeref = intel_runtime_pm_get(i915);
 
 	for_each_engine(engine, i915, id) {
 		struct i915_request *request = NULL;
@@ -403,7 +404,7 @@ static int live_nop_request(void *arg)
 	}
 
 out_unlock:
-	intel_runtime_pm_put(i915);
+	intel_runtime_pm_put(i915, wakeref);
 	mutex_unlock(&i915->drm.struct_mutex);
 	return err;
 }
@@ -478,8 +479,9 @@ static int live_empty_request(void *arg)
 {
 	struct drm_i915_private *i915 = arg;
 	struct intel_engine_cs *engine;
-	struct live_test t;
+	intel_wakeref_t wakeref;
 	struct i915_vma *batch;
+	struct live_test t;
 	unsigned int id;
 	int err = 0;
 
@@ -489,7 +491,7 @@ static int live_empty_request(void *arg)
 	 */
 
 	mutex_lock(&i915->drm.struct_mutex);
-	intel_runtime_pm_get(i915);
+	wakeref = intel_runtime_pm_get(i915);
 
 	batch = empty_batch(i915);
 	if (IS_ERR(batch)) {
@@ -553,7 +555,7 @@ static int live_empty_request(void *arg)
 	i915_vma_unpin(batch);
 	i915_vma_put(batch);
 out_unlock:
-	intel_runtime_pm_put(i915);
+	intel_runtime_pm_put(i915, wakeref);
 	mutex_unlock(&i915->drm.struct_mutex);
 	return err;
 }
@@ -637,6 +639,7 @@ static int live_all_engines(void *arg)
 	struct drm_i915_private *i915 = arg;
 	struct intel_engine_cs *engine;
 	struct i915_request *request[I915_NUM_ENGINES];
+	intel_wakeref_t wakeref;
 	struct i915_vma *batch;
 	struct live_test t;
 	unsigned int id;
@@ -648,7 +651,7 @@ static int live_all_engines(void *arg)
 	 */
 
 	mutex_lock(&i915->drm.struct_mutex);
-	intel_runtime_pm_get(i915);
+	wakeref = intel_runtime_pm_get(i915);
 
 	err = begin_live_test(&t, i915, __func__, "");
 	if (err)
@@ -731,7 +734,7 @@ static int live_all_engines(void *arg)
 	i915_vma_unpin(batch);
 	i915_vma_put(batch);
 out_unlock:
-	intel_runtime_pm_put(i915);
+	intel_runtime_pm_put(i915, wakeref);
 	mutex_unlock(&i915->drm.struct_mutex);
 	return err;
 }
@@ -742,6 +745,7 @@ static int live_sequential_engines(void *arg)
 	struct i915_request *request[I915_NUM_ENGINES] = {};
 	struct i915_request *prev = NULL;
 	struct intel_engine_cs *engine;
+	intel_wakeref_t wakeref;
 	struct live_test t;
 	unsigned int id;
 	int err;
@@ -753,7 +757,7 @@ static int live_sequential_engines(void *arg)
 	 */
 
 	mutex_lock(&i915->drm.struct_mutex);
-	intel_runtime_pm_get(i915);
+	wakeref = intel_runtime_pm_get(i915);
 
 	err = begin_live_test(&t, i915, __func__, "");
 	if (err)
@@ -860,7 +864,7 @@ static int live_sequential_engines(void *arg)
 		i915_request_put(request[id]);
 	}
 out_unlock:
-	intel_runtime_pm_put(i915);
+	intel_runtime_pm_put(i915, wakeref);
 	mutex_unlock(&i915->drm.struct_mutex);
 	return err;
 }
diff --git a/drivers/gpu/drm/i915/selftests/intel_guc.c b/drivers/gpu/drm/i915/selftests/intel_guc.c
index 0c0ab82b6228..cd8db7eedb7d 100644
--- a/drivers/gpu/drm/i915/selftests/intel_guc.c
+++ b/drivers/gpu/drm/i915/selftests/intel_guc.c
@@ -137,12 +137,13 @@ static bool client_doorbell_in_sync(struct intel_guc_client *client)
 static int igt_guc_clients(void *args)
 {
 	struct drm_i915_private *dev_priv = args;
+	intel_wakeref_t wakeref;
 	struct intel_guc *guc;
 	int err = 0;
 
 	GEM_BUG_ON(!HAS_GUC(dev_priv));
 	mutex_lock(&dev_priv->drm.struct_mutex);
-	intel_runtime_pm_get(dev_priv);
+	wakeref = intel_runtime_pm_get(dev_priv);
 
 	guc = &dev_priv->guc;
 	if (!guc) {
@@ -270,7 +271,7 @@ static int igt_guc_clients(void *args)
 	guc_clients_create(guc);
 	guc_clients_doorbell_init(guc);
 unlock:
-	intel_runtime_pm_put(dev_priv);
+	intel_runtime_pm_put(dev_priv, wakeref);
 	mutex_unlock(&dev_priv->drm.struct_mutex);
 	return err;
 }
@@ -283,13 +284,14 @@ static int igt_guc_clients(void *args)
 static int igt_guc_doorbells(void *arg)
 {
 	struct drm_i915_private *dev_priv = arg;
+	intel_wakeref_t wakeref;
 	struct intel_guc *guc;
 	int i, err = 0;
 	u16 db_id;
 
 	GEM_BUG_ON(!HAS_GUC(dev_priv));
 	mutex_lock(&dev_priv->drm.struct_mutex);
-	intel_runtime_pm_get(dev_priv);
+	wakeref = intel_runtime_pm_get(dev_priv);
 
 	guc = &dev_priv->guc;
 	if (!guc) {
@@ -382,7 +384,7 @@ static int igt_guc_doorbells(void *arg)
 			guc_client_free(clients[i]);
 		}
 unlock:
-	intel_runtime_pm_put(dev_priv);
+	intel_runtime_pm_put(dev_priv, wakeref);
 	mutex_unlock(&dev_priv->drm.struct_mutex);
 	return err;
 }
diff --git a/drivers/gpu/drm/i915/selftests/intel_hangcheck.c b/drivers/gpu/drm/i915/selftests/intel_hangcheck.c
index db378226ac10..7c10fdacd498 100644
--- a/drivers/gpu/drm/i915/selftests/intel_hangcheck.c
+++ b/drivers/gpu/drm/i915/selftests/intel_hangcheck.c
@@ -1488,6 +1488,7 @@ int intel_hangcheck_live_selftests(struct drm_i915_private *i915)
 		SUBTEST(igt_reset_evict_fence),
 		SUBTEST(igt_handle_error),
 	};
+	intel_wakeref_t wakeref;
 	bool saved_hangcheck;
 	int err;
 
@@ -1497,7 +1498,7 @@ int intel_hangcheck_live_selftests(struct drm_i915_private *i915)
 	if (i915_terminally_wedged(&i915->gpu_error))
 		return -EIO; /* we're long past hope of a successful reset */
 
-	intel_runtime_pm_get(i915);
+	wakeref = intel_runtime_pm_get(i915);
 	saved_hangcheck = fetch_and_zero(&i915_modparams.enable_hangcheck);
 
 	err = i915_subtests(tests, i915);
@@ -1507,7 +1508,7 @@ int intel_hangcheck_live_selftests(struct drm_i915_private *i915)
 	mutex_unlock(&i915->drm.struct_mutex);
 
 	i915_modparams.enable_hangcheck = saved_hangcheck;
-	intel_runtime_pm_put(i915);
+	intel_runtime_pm_put(i915, wakeref);
 
 	return err;
 }
diff --git a/drivers/gpu/drm/i915/selftests/intel_lrc.c b/drivers/gpu/drm/i915/selftests/intel_lrc.c
index 94ceb5f6c507..95f7e9364947 100644
--- a/drivers/gpu/drm/i915/selftests/intel_lrc.c
+++ b/drivers/gpu/drm/i915/selftests/intel_lrc.c
@@ -214,6 +214,7 @@ static int live_sanitycheck(void *arg)
 	struct intel_engine_cs *engine;
 	struct i915_gem_context *ctx;
 	enum intel_engine_id id;
+	intel_wakeref_t wakeref;
 	struct spinner spin;
 	int err = -ENOMEM;
 
@@ -221,7 +222,7 @@ static int live_sanitycheck(void *arg)
 		return 0;
 
 	mutex_lock(&i915->drm.struct_mutex);
-	intel_runtime_pm_get(i915);
+	wakeref = intel_runtime_pm_get(i915);
 
 	if (spinner_init(&spin, i915))
 		goto err_unlock;
@@ -262,7 +263,7 @@ static int live_sanitycheck(void *arg)
 	spinner_fini(&spin);
 err_unlock:
 	igt_flush_test(i915, I915_WAIT_LOCKED);
-	intel_runtime_pm_put(i915);
+	intel_runtime_pm_put(i915, wakeref);
 	mutex_unlock(&i915->drm.struct_mutex);
 	return err;
 }
@@ -274,13 +275,14 @@ static int live_preempt(void *arg)
 	struct spinner spin_hi, spin_lo;
 	struct intel_engine_cs *engine;
 	enum intel_engine_id id;
+	intel_wakeref_t wakeref;
 	int err = -ENOMEM;
 
 	if (!HAS_LOGICAL_RING_PREEMPTION(i915))
 		return 0;
 
 	mutex_lock(&i915->drm.struct_mutex);
-	intel_runtime_pm_get(i915);
+	wakeref = intel_runtime_pm_get(i915);
 
 	if (spinner_init(&spin_hi, i915))
 		goto err_unlock;
@@ -355,7 +357,7 @@ static int live_preempt(void *arg)
 	spinner_fini(&spin_hi);
 err_unlock:
 	igt_flush_test(i915, I915_WAIT_LOCKED);
-	intel_runtime_pm_put(i915);
+	intel_runtime_pm_put(i915, wakeref);
 	mutex_unlock(&i915->drm.struct_mutex);
 	return err;
 }
@@ -368,13 +370,14 @@ static int live_late_preempt(void *arg)
 	struct intel_engine_cs *engine;
 	struct i915_sched_attr attr = {};
 	enum intel_engine_id id;
+	intel_wakeref_t wakeref;
 	int err = -ENOMEM;
 
 	if (!HAS_LOGICAL_RING_PREEMPTION(i915))
 		return 0;
 
 	mutex_lock(&i915->drm.struct_mutex);
-	intel_runtime_pm_get(i915);
+	wakeref = intel_runtime_pm_get(i915);
 
 	if (spinner_init(&spin_hi, i915))
 		goto err_unlock;
@@ -447,7 +450,7 @@ static int live_late_preempt(void *arg)
 	spinner_fini(&spin_hi);
 err_unlock:
 	igt_flush_test(i915, I915_WAIT_LOCKED);
-	intel_runtime_pm_put(i915);
+	intel_runtime_pm_put(i915, wakeref);
 	mutex_unlock(&i915->drm.struct_mutex);
 	return err;
 
@@ -466,6 +469,7 @@ static int live_preempt_hang(void *arg)
 	struct spinner spin_hi, spin_lo;
 	struct intel_engine_cs *engine;
 	enum intel_engine_id id;
+	intel_wakeref_t wakeref;
 	int err = -ENOMEM;
 
 	if (!HAS_LOGICAL_RING_PREEMPTION(i915))
@@ -475,7 +479,7 @@ static int live_preempt_hang(void *arg)
 		return 0;
 
 	mutex_lock(&i915->drm.struct_mutex);
-	intel_runtime_pm_get(i915);
+	wakeref = intel_runtime_pm_get(i915);
 
 	if (spinner_init(&spin_hi, i915))
 		goto err_unlock;
@@ -570,7 +574,7 @@ static int live_preempt_hang(void *arg)
 	spinner_fini(&spin_hi);
 err_unlock:
 	igt_flush_test(i915, I915_WAIT_LOCKED);
-	intel_runtime_pm_put(i915);
+	intel_runtime_pm_put(i915, wakeref);
 	mutex_unlock(&i915->drm.struct_mutex);
 	return err;
 }
diff --git a/drivers/gpu/drm/i915/selftests/intel_workarounds.c b/drivers/gpu/drm/i915/selftests/intel_workarounds.c
index d1a0923d2f38..0db63eb24dfc 100644
--- a/drivers/gpu/drm/i915/selftests/intel_workarounds.c
+++ b/drivers/gpu/drm/i915/selftests/intel_workarounds.c
@@ -12,10 +12,11 @@
 static struct drm_i915_gem_object *
 read_nonprivs(struct i915_gem_context *ctx, struct intel_engine_cs *engine)
 {
+	const u32 base = engine->mmio_base;
 	struct drm_i915_gem_object *result;
+	intel_wakeref_t wakeref;
 	struct i915_request *rq;
 	struct i915_vma *vma;
-	const u32 base = engine->mmio_base;
 	u32 srm, *cs;
 	int err;
 	int i;
@@ -44,9 +45,9 @@ read_nonprivs(struct i915_gem_context *ctx, struct intel_engine_cs *engine)
 	if (err)
 		goto err_obj;
 
-	intel_runtime_pm_get(engine->i915);
+	wakeref = intel_runtime_pm_get(engine->i915);
 	rq = i915_request_alloc(engine, ctx);
-	intel_runtime_pm_put(engine->i915);
+	intel_runtime_pm_put(engine->i915, wakeref);
 	if (IS_ERR(rq)) {
 		err = PTR_ERR(rq);
 		goto err_pin;
@@ -172,14 +173,15 @@ static int switch_to_scratch_context(struct intel_engine_cs *engine)
 {
 	struct i915_gem_context *ctx;
 	struct i915_request *rq;
+	intel_wakeref_t wakeref;
 
 	ctx = kernel_context(engine->i915);
 	if (IS_ERR(ctx))
 		return PTR_ERR(ctx);
 
-	intel_runtime_pm_get(engine->i915);
+	wakeref = intel_runtime_pm_get(engine->i915);
 	rq = i915_request_alloc(engine, ctx);
-	intel_runtime_pm_put(engine->i915);
+	intel_runtime_pm_put(engine->i915, wakeref);
 
 	kernel_context_close(ctx);
 	if (IS_ERR(rq))
-- 
2.19.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 106+ messages in thread

* [PATCH 22/40] drm/i915: Syntatic sugar for using intel_runtime_pm
  2018-09-19 19:55 [PATCH 01/40] drm: Use default dma_fence hooks where possible for null syncobj Chris Wilson
                   ` (19 preceding siblings ...)
  2018-09-19 19:55 ` [PATCH 21/40] drm/i915: Markup paired operations on wakerefs Chris Wilson
@ 2018-09-19 19:55 ` Chris Wilson
  2018-09-24 12:08   ` Tvrtko Ursulin
  2018-09-19 19:55 ` [PATCH 23/40] drm/i915: Markup paired operations on display power domains Chris Wilson
                   ` (21 subsequent siblings)
  42 siblings, 1 reply; 106+ messages in thread
From: Chris Wilson @ 2018-09-19 19:55 UTC (permalink / raw)
  To: intel-gfx

Frequently, we use intel_runtime_pm_get/_put around a small block.
Formalise that usage by providing a macro to define such a block with an
automatic closure to scope the intel_runtime_pm wakeref to that block,
i.e. macro abuse smelling of python.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_debugfs.c           | 163 ++++++++----------
 drivers/gpu/drm/i915/i915_gem.c               |  10 +-
 drivers/gpu/drm/i915/i915_gem_gtt.c           |  23 ++-
 drivers/gpu/drm/i915/i915_gem_shrinker.c      |  44 ++---
 drivers/gpu/drm/i915/i915_pmu.c               |   7 +-
 drivers/gpu/drm/i915/i915_sysfs.c             |   7 +-
 drivers/gpu/drm/i915/intel_drv.h              |   8 +
 drivers/gpu/drm/i915/intel_guc_log.c          |  26 ++-
 drivers/gpu/drm/i915/intel_huc.c              |   7 +-
 drivers/gpu/drm/i915/intel_panel.c            |  18 +-
 drivers/gpu/drm/i915/intel_uncore.c           |  30 ++--
 drivers/gpu/drm/i915/selftests/i915_gem.c     |  34 ++--
 .../gpu/drm/i915/selftests/i915_gem_context.c |  12 +-
 .../gpu/drm/i915/selftests/i915_gem_object.c  |  11 +-
 .../drm/i915/selftests/intel_workarounds.c    |  12 +-
 15 files changed, 193 insertions(+), 219 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index dbfe4e456d97..98fa216d19bb 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -952,9 +952,9 @@ static int i915_gpu_info_open(struct inode *inode, struct file *file)
 	struct i915_gpu_state *gpu;
 	intel_wakeref_t wakeref;
 
-	wakeref = intel_runtime_pm_get(i915);
-	gpu = i915_capture_gpu_state(i915);
-	intel_runtime_pm_put(i915, wakeref);
+	gpu = NULL;
+	with_intel_runtime_pm(i915, wakeref)
+		gpu = i915_capture_gpu_state(i915);
 	if (!gpu)
 		return -ENOMEM;
 
@@ -1015,9 +1015,8 @@ i915_next_seqno_set(void *data, u64 val)
 	if (ret)
 		return ret;
 
-	wakeref = intel_runtime_pm_get(dev_priv);
-	ret = i915_gem_set_global_seqno(dev, val);
-	intel_runtime_pm_put(dev_priv, wakeref);
+	with_intel_runtime_pm(dev_priv, wakeref)
+		ret = i915_gem_set_global_seqno(dev, val);
 
 	mutex_unlock(&dev->struct_mutex);
 
@@ -1305,17 +1304,15 @@ static int i915_hangcheck_info(struct seq_file *m, void *unused)
 		return 0;
 	}
 
-	wakeref = intel_runtime_pm_get(dev_priv);
+	with_intel_runtime_pm(dev_priv, wakeref) {
+		for_each_engine(engine, dev_priv, id) {
+			acthd[id] = intel_engine_get_active_head(engine);
+			seqno[id] = intel_engine_get_seqno(engine);
+		}
 
-	for_each_engine(engine, dev_priv, id) {
-		acthd[id] = intel_engine_get_active_head(engine);
-		seqno[id] = intel_engine_get_seqno(engine);
+		intel_engine_get_instdone(dev_priv->engine[RCS], &instdone);
 	}
 
-	intel_engine_get_instdone(dev_priv->engine[RCS], &instdone);
-
-	intel_runtime_pm_put(dev_priv, wakeref);
-
 	if (timer_pending(&dev_priv->gpu_error.hangcheck_work.timer))
 		seq_printf(m, "Hangcheck active, timer fires in %dms\n",
 			   jiffies_to_msecs(dev_priv->gpu_error.hangcheck_work.timer.expires -
@@ -1591,18 +1588,16 @@ static int i915_drpc_info(struct seq_file *m, void *unused)
 {
 	struct drm_i915_private *dev_priv = node_to_i915(m->private);
 	intel_wakeref_t wakeref;
-	int err;
-
-	wakeref = intel_runtime_pm_get(dev_priv);
-
-	if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv))
-		err = vlv_drpc_info(m);
-	else if (INTEL_GEN(dev_priv) >= 6)
-		err = gen6_drpc_info(m);
-	else
-		err = ironlake_drpc_info(m);
+	int err = -ENODEV;
 
-	intel_runtime_pm_put(dev_priv, wakeref);
+	with_intel_runtime_pm(dev_priv, wakeref) {
+		if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv))
+			err = vlv_drpc_info(m);
+		else if (INTEL_GEN(dev_priv) >= 6)
+			err = gen6_drpc_info(m);
+		else
+			err = ironlake_drpc_info(m);
+	}
 
 	return err;
 }
@@ -2167,9 +2162,8 @@ static int i915_huc_load_status_info(struct seq_file *m, void *data)
 	p = drm_seq_file_printer(m);
 	intel_uc_fw_dump(&dev_priv->huc.fw, &p);
 
-	wakeref = intel_runtime_pm_get(dev_priv);
-	seq_printf(m, "\nHuC status 0x%08x:\n", I915_READ(HUC_STATUS2));
-	intel_runtime_pm_put(dev_priv, wakeref);
+	with_intel_runtime_pm(dev_priv, wakeref)
+		seq_printf(m, "\nHuC status 0x%08x:\n", I915_READ(HUC_STATUS2));
 
 	return 0;
 }
@@ -2179,7 +2173,6 @@ static int i915_guc_load_status_info(struct seq_file *m, void *data)
 	struct drm_i915_private *dev_priv = node_to_i915(m->private);
 	intel_wakeref_t wakeref;
 	struct drm_printer p;
-	u32 tmp, i;
 
 	if (!HAS_GUC(dev_priv))
 		return -ENODEV;
@@ -2187,22 +2180,23 @@ static int i915_guc_load_status_info(struct seq_file *m, void *data)
 	p = drm_seq_file_printer(m);
 	intel_uc_fw_dump(&dev_priv->guc.fw, &p);
 
-	wakeref = intel_runtime_pm_get(dev_priv);
-
-	tmp = I915_READ(GUC_STATUS);
-
-	seq_printf(m, "\nGuC status 0x%08x:\n", tmp);
-	seq_printf(m, "\tBootrom status = 0x%x\n",
-		(tmp & GS_BOOTROM_MASK) >> GS_BOOTROM_SHIFT);
-	seq_printf(m, "\tuKernel status = 0x%x\n",
-		(tmp & GS_UKERNEL_MASK) >> GS_UKERNEL_SHIFT);
-	seq_printf(m, "\tMIA Core status = 0x%x\n",
-		(tmp & GS_MIA_MASK) >> GS_MIA_SHIFT);
-	seq_puts(m, "\nScratch registers:\n");
-	for (i = 0; i < 16; i++)
-		seq_printf(m, "\t%2d: \t0x%x\n", i, I915_READ(SOFT_SCRATCH(i)));
-
-	intel_runtime_pm_put(dev_priv, wakeref);
+	with_intel_runtime_pm(dev_priv, wakeref) {
+		u32 tmp = I915_READ(GUC_STATUS);
+		u32 i;
+
+		seq_printf(m, "\nGuC status 0x%08x:\n", tmp);
+		seq_printf(m, "\tBootrom status = 0x%x\n",
+			   (tmp & GS_BOOTROM_MASK) >> GS_BOOTROM_SHIFT);
+		seq_printf(m, "\tuKernel status = 0x%x\n",
+			   (tmp & GS_UKERNEL_MASK) >> GS_UKERNEL_SHIFT);
+		seq_printf(m, "\tMIA Core status = 0x%x\n",
+			   (tmp & GS_MIA_MASK) >> GS_MIA_SHIFT);
+		seq_puts(m, "\nScratch registers:\n");
+		for (i = 0; i < 16; i++) {
+			seq_printf(m, "\t%2d: \t0x%x\n",
+				   i, I915_READ(SOFT_SCRATCH(i)));
+		}
+	}
 
 	return 0;
 }
@@ -2675,19 +2669,14 @@ static int i915_energy_uJ(struct seq_file *m, void *data)
 	if (INTEL_GEN(dev_priv) < 6)
 		return -ENODEV;
 
-	wakeref = intel_runtime_pm_get(dev_priv);
-
-	if (rdmsrl_safe(MSR_RAPL_POWER_UNIT, &power)) {
-		intel_runtime_pm_put(dev_priv, wakeref);
+	if (rdmsrl_safe(MSR_RAPL_POWER_UNIT, &power))
 		return -ENODEV;
-	}
 
 	units = (power & 0x1f00) >> 8;
-	power = I915_READ(MCH_SECP_NRG_STTS);
-	power = (1000000 * power) >> units; /* convert to uJ */
-
-	intel_runtime_pm_put(dev_priv, wakeref);
+	with_intel_runtime_pm(dev_priv, wakeref)
+		power = I915_READ(MCH_SECP_NRG_STTS);
 
+	power = (1000000 * power) >> units; /* convert to uJ */
 	seq_printf(m, "%llu", power);
 
 	return 0;
@@ -3268,22 +3257,20 @@ static ssize_t i915_ipc_status_write(struct file *file, const char __user *ubuf,
 	struct seq_file *m = file->private_data;
 	struct drm_i915_private *dev_priv = m->private;
 	intel_wakeref_t wakeref;
-	int ret;
 	bool enable;
+	int ret;
 
 	ret = kstrtobool_from_user(ubuf, len, &enable);
 	if (ret < 0)
 		return ret;
 
-	wakeref = intel_runtime_pm_get(dev_priv);
-
-	if (!dev_priv->ipc_enabled && enable)
-		DRM_INFO("Enabling IPC: WM will be proper only after next commit\n");
-	dev_priv->wm.distrust_bios_wm = true;
-	dev_priv->ipc_enabled = enable;
-	intel_enable_ipc(dev_priv);
-
-	intel_runtime_pm_put(dev_priv, wakeref);
+	with_intel_runtime_pm(dev_priv, wakeref) {
+		if (!dev_priv->ipc_enabled && enable)
+			DRM_INFO("Enabling IPC: WM will be proper only after next commit\n");
+		dev_priv->wm.distrust_bios_wm = true;
+		dev_priv->ipc_enabled = enable;
+		intel_enable_ipc(dev_priv);
+	}
 
 	return len;
 }
@@ -4127,16 +4114,13 @@ i915_cache_sharing_get(void *data, u64 *val)
 {
 	struct drm_i915_private *dev_priv = data;
 	intel_wakeref_t wakeref;
-	u32 snpcr;
+	u32 snpcr = 0;
 
 	if (!(IS_GEN6(dev_priv) || IS_GEN7(dev_priv)))
 		return -ENODEV;
 
-	wakeref = intel_runtime_pm_get(dev_priv);
-
-	snpcr = I915_READ(GEN6_MBCUNIT_SNPCR);
-
-	intel_runtime_pm_put(dev_priv, wakeref);
+	with_intel_runtime_pm(dev_priv, wakeref)
+		snpcr = I915_READ(GEN6_MBCUNIT_SNPCR);
 
 	*val = (snpcr & GEN6_MBC_SNPCR_MASK) >> GEN6_MBC_SNPCR_SHIFT;
 
@@ -4148,7 +4132,6 @@ i915_cache_sharing_set(void *data, u64 val)
 {
 	struct drm_i915_private *dev_priv = data;
 	intel_wakeref_t wakeref;
-	u32 snpcr;
 
 	if (!(IS_GEN6(dev_priv) || IS_GEN7(dev_priv)))
 		return -ENODEV;
@@ -4156,16 +4139,17 @@ i915_cache_sharing_set(void *data, u64 val)
 	if (val > 3)
 		return -EINVAL;
 
-	wakeref = intel_runtime_pm_get(dev_priv);
 	DRM_DEBUG_DRIVER("Manually setting uncore sharing to %llu\n", val);
+	with_intel_runtime_pm(dev_priv, wakeref) {
+		u32 snpcr;
+
+		/* Update the cache sharing policy here as well */
+		snpcr = I915_READ(GEN6_MBCUNIT_SNPCR);
+		snpcr &= ~GEN6_MBC_SNPCR_MASK;
+		snpcr |= val << GEN6_MBC_SNPCR_SHIFT;
+		I915_WRITE(GEN6_MBCUNIT_SNPCR, snpcr);
+	}
 
-	/* Update the cache sharing policy here as well */
-	snpcr = I915_READ(GEN6_MBCUNIT_SNPCR);
-	snpcr &= ~GEN6_MBC_SNPCR_MASK;
-	snpcr |= (val << GEN6_MBC_SNPCR_SHIFT);
-	I915_WRITE(GEN6_MBCUNIT_SNPCR, snpcr);
-
-	intel_runtime_pm_put(dev_priv, wakeref);
 	return 0;
 }
 
@@ -4402,20 +4386,17 @@ static int i915_sseu_status(struct seq_file *m, void *unused)
 	sseu.max_eus_per_subslice =
 		INTEL_INFO(dev_priv)->sseu.max_eus_per_subslice;
 
-	wakeref = intel_runtime_pm_get(dev_priv);
-
-	if (IS_CHERRYVIEW(dev_priv)) {
-		cherryview_sseu_device_status(dev_priv, &sseu);
-	} else if (IS_BROADWELL(dev_priv)) {
-		broadwell_sseu_device_status(dev_priv, &sseu);
-	} else if (IS_GEN9(dev_priv)) {
-		gen9_sseu_device_status(dev_priv, &sseu);
-	} else if (INTEL_GEN(dev_priv) >= 10) {
-		gen10_sseu_device_status(dev_priv, &sseu);
+	with_intel_runtime_pm(dev_priv, wakeref) {
+		if (IS_CHERRYVIEW(dev_priv))
+			cherryview_sseu_device_status(dev_priv, &sseu);
+		else if (IS_BROADWELL(dev_priv))
+			broadwell_sseu_device_status(dev_priv, &sseu);
+		else if (IS_GEN9(dev_priv))
+			gen9_sseu_device_status(dev_priv, &sseu);
+		else if (INTEL_GEN(dev_priv) >= 10)
+			gen10_sseu_device_status(dev_priv, &sseu);
 	}
 
-	intel_runtime_pm_put(dev_priv, wakeref);
-
 	i915_print_sseu_info(m, false, &sseu);
 
 	return 0;
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index b6da68846164..233a65487c3f 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -814,13 +814,13 @@ void i915_gem_flush_ggtt_writes(struct drm_i915_private *dev_priv)
 
 	i915_gem_chipset_flush(dev_priv);
 
-	wakeref = intel_runtime_pm_get(dev_priv);
-	spin_lock_irq(&dev_priv->uncore.lock);
+	with_intel_runtime_pm(dev_priv, wakeref) {
+		spin_lock_irq(&dev_priv->uncore.lock);
 
-	POSTING_READ_FW(RING_HEAD(RENDER_RING_BASE));
+		POSTING_READ_FW(RING_HEAD(RENDER_RING_BASE));
 
-	spin_unlock_irq(&dev_priv->uncore.lock);
-	intel_runtime_pm_put(dev_priv, wakeref);
+		spin_unlock_irq(&dev_priv->uncore.lock);
+	}
 }
 
 static void
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 04710d0ecf8c..ef38d09b6ce0 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -2720,9 +2720,8 @@ static int ggtt_bind_vma(struct i915_vma *vma,
 	if (i915_gem_object_is_readonly(obj))
 		pte_flags |= PTE_READ_ONLY;
 
-	wakeref = intel_runtime_pm_get(i915);
-	vma->vm->insert_entries(vma->vm, vma, cache_level, pte_flags);
-	intel_runtime_pm_put(i915, wakeref);
+	with_intel_runtime_pm(i915, wakeref)
+		vma->vm->insert_entries(vma->vm, vma, cache_level, pte_flags);
 
 	vma->page_sizes.gtt = I915_GTT_PAGE_SIZE;
 
@@ -2741,9 +2740,8 @@ static void ggtt_unbind_vma(struct i915_vma *vma)
 	struct drm_i915_private *i915 = vma->vm->i915;
 	intel_wakeref_t wakeref;
 
-	wakeref = intel_runtime_pm_get(i915);
-	vma->vm->clear_range(vma->vm, vma->node.start, vma->size);
-	intel_runtime_pm_put(i915, wakeref);
+	with_intel_runtime_pm(i915, wakeref)
+		vma->vm->clear_range(vma->vm, vma->node.start, vma->size);
 }
 
 static int aliasing_gtt_bind_vma(struct i915_vma *vma,
@@ -2777,9 +2775,10 @@ static int aliasing_gtt_bind_vma(struct i915_vma *vma,
 	if (flags & I915_VMA_GLOBAL_BIND) {
 		intel_wakeref_t wakeref;
 
-		wakeref = intel_runtime_pm_get(i915);
-		vma->vm->insert_entries(vma->vm, vma, cache_level, pte_flags);
-		intel_runtime_pm_put(i915, wakeref);
+		with_intel_runtime_pm(i915, wakeref) {
+			vma->vm->insert_entries(vma->vm, vma,
+						cache_level, pte_flags);
+		}
 	}
 
 	return 0;
@@ -2790,11 +2789,11 @@ static void aliasing_gtt_unbind_vma(struct i915_vma *vma)
 	struct drm_i915_private *i915 = vma->vm->i915;
 
 	if (vma->flags & I915_VMA_GLOBAL_BIND) {
+		struct i915_address_space *vm = vma->vm;
 		intel_wakeref_t wakeref;
 
-		wakeref = intel_runtime_pm_get(i915);
-		vma->vm->clear_range(vma->vm, vma->node.start, vma->size);
-		intel_runtime_pm_put(i915, wakeref);
+		with_intel_runtime_pm(i915, wakeref)
+			vm->clear_range(vm, vma->node.start, vma->size);
 	}
 
 	if (vma->flags & I915_VMA_LOCAL_BIND) {
diff --git a/drivers/gpu/drm/i915/i915_gem_shrinker.c b/drivers/gpu/drm/i915/i915_gem_shrinker.c
index f25e4c7c71b1..fe198824dba1 100644
--- a/drivers/gpu/drm/i915/i915_gem_shrinker.c
+++ b/drivers/gpu/drm/i915/i915_gem_shrinker.c
@@ -299,14 +299,14 @@ i915_gem_shrink(struct drm_i915_private *i915,
 unsigned long i915_gem_shrink_all(struct drm_i915_private *i915)
 {
 	intel_wakeref_t wakeref;
-	unsigned long freed;
+	unsigned long freed = 0;
 
-	wakeref = intel_runtime_pm_get(i915);
-	freed = i915_gem_shrink(i915, -1UL, NULL,
-				I915_SHRINK_BOUND |
-				I915_SHRINK_UNBOUND |
-				I915_SHRINK_ACTIVE);
-	intel_runtime_pm_put(i915, wakeref);
+	with_intel_runtime_pm(i915, wakeref) {
+		freed = i915_gem_shrink(i915, -1UL, NULL,
+					I915_SHRINK_BOUND |
+					I915_SHRINK_UNBOUND |
+					I915_SHRINK_ACTIVE);
+	}
 
 	return freed;
 }
@@ -379,14 +379,14 @@ i915_gem_shrinker_scan(struct shrinker *shrinker, struct shrink_control *sc)
 	if (sc->nr_scanned < sc->nr_to_scan && current_is_kswapd()) {
 		intel_wakeref_t wakeref;
 
-		wakeref = intel_runtime_pm_get(i915);
-		freed += i915_gem_shrink(i915,
-					 sc->nr_to_scan - sc->nr_scanned,
-					 &sc->nr_scanned,
-					 I915_SHRINK_ACTIVE |
-					 I915_SHRINK_BOUND |
-					 I915_SHRINK_UNBOUND);
-		intel_runtime_pm_put(i915, wakeref);
+		with_intel_runtime_pm(i915, wakeref) {
+			freed += i915_gem_shrink(i915,
+						 sc->nr_to_scan - sc->nr_scanned,
+						 &sc->nr_scanned,
+						 I915_SHRINK_ACTIVE |
+						 I915_SHRINK_BOUND |
+						 I915_SHRINK_UNBOUND);
+		}
 	}
 
 	shrinker_unlock(i915, unlock);
@@ -483,13 +483,13 @@ i915_gem_shrinker_vmap(struct notifier_block *nb, unsigned long event, void *ptr
 	if (ret)
 		goto out;
 
-	wakeref = intel_runtime_pm_get(i915);
-	freed_pages += i915_gem_shrink(i915, -1UL, NULL,
-				       I915_SHRINK_BOUND |
-				       I915_SHRINK_UNBOUND |
-				       I915_SHRINK_ACTIVE |
-				       I915_SHRINK_VMAPS);
-	intel_runtime_pm_put(i915, wakeref);
+	with_intel_runtime_pm(i915, wakeref) {
+		freed_pages += i915_gem_shrink(i915, -1UL, NULL,
+					       I915_SHRINK_BOUND |
+					       I915_SHRINK_UNBOUND |
+					       I915_SHRINK_ACTIVE |
+					       I915_SHRINK_VMAPS);
+	}
 
 	/* We also want to clear any cached iomaps as they wrap vmap */
 	list_for_each_entry_safe(vma, next,
diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c
index 3d43fc9dd25d..b1cb2d3cae16 100644
--- a/drivers/gpu/drm/i915/i915_pmu.c
+++ b/drivers/gpu/drm/i915/i915_pmu.c
@@ -230,14 +230,11 @@ frequency_sample(struct drm_i915_private *dev_priv, unsigned int period_ns)
 
 		val = dev_priv->gt_pm.rps.cur_freq;
 		if (dev_priv->gt.awake) {
-			intel_wakeref_t wakeref =
-				intel_runtime_pm_get_if_in_use(dev_priv);
+			intel_wakeref_t wakeref;
 
-			if (wakeref) {
+			with_intel_runtime_pm_if_in_use(dev_priv, wakeref)
 				val = intel_get_cagf(dev_priv,
 						     I915_READ_NOTRACE(GEN6_RPSTAT1));
-				intel_runtime_pm_put(dev_priv, wakeref);
-			}
 		}
 
 		add_sample_mult(&dev_priv->pmu.sample[__I915_SAMPLE_FREQ_ACT],
diff --git a/drivers/gpu/drm/i915/i915_sysfs.c b/drivers/gpu/drm/i915/i915_sysfs.c
index a959aee208de..e126196413a4 100644
--- a/drivers/gpu/drm/i915/i915_sysfs.c
+++ b/drivers/gpu/drm/i915/i915_sysfs.c
@@ -43,11 +43,10 @@ static u32 calc_residency(struct drm_i915_private *dev_priv,
 			  i915_reg_t reg)
 {
 	intel_wakeref_t wakeref;
-	u64 res;
+	u64 res = 0;
 
-	wakeref = intel_runtime_pm_get(dev_priv);
-	res = intel_rc6_residency_us(dev_priv, reg);
-	intel_runtime_pm_put(dev_priv, wakeref);
+	with_intel_runtime_pm(dev_priv, wakeref)
+		res = intel_rc6_residency_us(dev_priv, reg);
 
 	return DIV_ROUND_CLOSEST_ULL(res, 1000);
 }
diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h
index 5d88b63a3d6b..fd85a937b237 100644
--- a/drivers/gpu/drm/i915/intel_drv.h
+++ b/drivers/gpu/drm/i915/intel_drv.h
@@ -2062,6 +2062,14 @@ intel_wakeref_t intel_runtime_pm_get(struct drm_i915_private *i915);
 intel_wakeref_t intel_runtime_pm_get_if_in_use(struct drm_i915_private *i915);
 intel_wakeref_t intel_runtime_pm_get_noresume(struct drm_i915_private *i915);
 
+#define with_intel_runtime_pm(i915, wf) \
+	for (wf = intel_runtime_pm_get(i915); wf; \
+	     intel_runtime_pm_put(i915, wf), wf = 0)
+
+#define with_intel_runtime_pm_if_in_use(i915, wf) \
+	for (wf = intel_runtime_pm_get_if_in_use(i915); wf; \
+	     intel_runtime_pm_put(i915, wf), wf = 0)
+
 void intel_runtime_pm_put_unchecked(struct drm_i915_private *i915);
 #if IS_ENABLED(CONFIG_DRM_I915_DEBUG_RUNTIME_PM)
 void intel_runtime_pm_put(struct drm_i915_private *i915, intel_wakeref_t wref);
diff --git a/drivers/gpu/drm/i915/intel_guc_log.c b/drivers/gpu/drm/i915/intel_guc_log.c
index 20c0b36d748e..b53582c0c6c1 100644
--- a/drivers/gpu/drm/i915/intel_guc_log.c
+++ b/drivers/gpu/drm/i915/intel_guc_log.c
@@ -444,9 +444,8 @@ static void guc_log_capture_logs(struct intel_guc_log *log)
 	 * Generally device is expected to be active only at this
 	 * time, so get/put should be really quick.
 	 */
-	wakeref = intel_runtime_pm_get(dev_priv);
-	guc_action_flush_log_complete(guc);
-	intel_runtime_pm_put(dev_priv, wakeref);
+	with_intel_runtime_pm(dev_priv, wakeref)
+		guc_action_flush_log_complete(guc);
 }
 
 int intel_guc_log_create(struct intel_guc_log *log)
@@ -507,7 +506,7 @@ int intel_guc_log_set_level(struct intel_guc_log *log, u32 level)
 	struct intel_guc *guc = log_to_guc(log);
 	struct drm_i915_private *dev_priv = guc_to_i915(guc);
 	intel_wakeref_t wakeref;
-	int ret;
+	int ret = 0;
 
 	BUILD_BUG_ON(GUC_LOG_VERBOSITY_MIN != 0);
 	GEM_BUG_ON(!log->vma);
@@ -521,16 +520,14 @@ int intel_guc_log_set_level(struct intel_guc_log *log, u32 level)
 
 	mutex_lock(&dev_priv->drm.struct_mutex);
 
-	if (log->level == level) {
-		ret = 0;
+	if (log->level == level)
 		goto out_unlock;
-	}
 
-	wakeref = intel_runtime_pm_get(dev_priv);
-	ret = guc_action_control_log(guc, GUC_LOG_LEVEL_IS_VERBOSE(level),
-				     GUC_LOG_LEVEL_IS_ENABLED(level),
-				     GUC_LOG_LEVEL_TO_VERBOSITY(level));
-	intel_runtime_pm_put(dev_priv, wakeref);
+	with_intel_runtime_pm(dev_priv, wakeref)
+		ret = guc_action_control_log(guc,
+					     GUC_LOG_LEVEL_IS_VERBOSE(level),
+					     GUC_LOG_LEVEL_IS_ENABLED(level),
+					     GUC_LOG_LEVEL_TO_VERBOSITY(level));
 	if (ret) {
 		DRM_DEBUG_DRIVER("guc_log_control action failed %d\n", ret);
 		goto out_unlock;
@@ -611,9 +608,8 @@ void intel_guc_log_relay_flush(struct intel_guc_log *log)
 	 */
 	flush_work(&log->relay.flush_work);
 
-	wakeref = intel_runtime_pm_get(i915);
-	guc_action_flush_log(guc);
-	intel_runtime_pm_put(i915, wakeref);
+	with_intel_runtime_pm(i915, wakeref)
+		guc_action_flush_log(guc);
 
 	/* GuC would have updated log buffer by now, so capture it */
 	guc_log_capture_logs(log);
diff --git a/drivers/gpu/drm/i915/intel_huc.c b/drivers/gpu/drm/i915/intel_huc.c
index a61c944c3036..fb69cf195253 100644
--- a/drivers/gpu/drm/i915/intel_huc.c
+++ b/drivers/gpu/drm/i915/intel_huc.c
@@ -115,14 +115,13 @@ int intel_huc_check_status(struct intel_huc *huc)
 {
 	struct drm_i915_private *dev_priv = huc_to_i915(huc);
 	intel_wakeref_t wakeref;
-	u32 status;
+	u32 status = 0;
 
 	if (!HAS_HUC(dev_priv))
 		return -ENODEV;
 
-	wakeref = intel_runtime_pm_get(dev_priv);
-	status = I915_READ(HUC_STATUS2) & HUC_FW_VERIFIED;
-	intel_runtime_pm_put(dev_priv, wakeref);
+	with_intel_runtime_pm(dev_priv, wakeref)
+		status = I915_READ(HUC_STATUS2) & HUC_FW_VERIFIED;
 
 	return status;
 }
diff --git a/drivers/gpu/drm/i915/intel_panel.c b/drivers/gpu/drm/i915/intel_panel.c
index e57c42323b17..d36fccaccefa 100644
--- a/drivers/gpu/drm/i915/intel_panel.c
+++ b/drivers/gpu/drm/i915/intel_panel.c
@@ -1204,17 +1204,19 @@ static int intel_backlight_device_get_brightness(struct backlight_device *bd)
 	struct drm_device *dev = connector->base.dev;
 	struct drm_i915_private *dev_priv = to_i915(dev);
 	intel_wakeref_t wakeref;
-	u32 hw_level;
-	int ret;
+	int ret = 0;
 
-	wakeref = intel_runtime_pm_get(dev_priv);
-	drm_modeset_lock(&dev->mode_config.connection_mutex, NULL);
+	with_intel_runtime_pm(dev_priv, wakeref) {
+		u32 hw_level;
 
-	hw_level = intel_panel_get_backlight(connector);
-	ret = scale_hw_to_user(connector, hw_level, bd->props.max_brightness);
+		drm_modeset_lock(&dev->mode_config.connection_mutex, NULL);
 
-	drm_modeset_unlock(&dev->mode_config.connection_mutex);
-	intel_runtime_pm_put(dev_priv, wakeref);
+		hw_level = intel_panel_get_backlight(connector);
+		ret = scale_hw_to_user(connector,
+				       hw_level, bd->props.max_brightness);
+
+		drm_modeset_unlock(&dev->mode_config.connection_mutex);
+	}
 
 	return ret;
 }
diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c
index 5b842318edbc..e611dfb2543e 100644
--- a/drivers/gpu/drm/i915/intel_uncore.c
+++ b/drivers/gpu/drm/i915/intel_uncore.c
@@ -1696,21 +1696,21 @@ int i915_reg_read_ioctl(struct drm_device *dev,
 
 	flags = reg->offset & (entry->size - 1);
 
-	wakeref = intel_runtime_pm_get(dev_priv);
-	if (entry->size == 8 && flags == I915_REG_READ_8B_WA)
-		reg->val = I915_READ64_2x32(entry->offset_ldw,
-					    entry->offset_udw);
-	else if (entry->size == 8 && flags == 0)
-		reg->val = I915_READ64(entry->offset_ldw);
-	else if (entry->size == 4 && flags == 0)
-		reg->val = I915_READ(entry->offset_ldw);
-	else if (entry->size == 2 && flags == 0)
-		reg->val = I915_READ16(entry->offset_ldw);
-	else if (entry->size == 1 && flags == 0)
-		reg->val = I915_READ8(entry->offset_ldw);
-	else
-		ret = -EINVAL;
-	intel_runtime_pm_put(dev_priv, wakeref);
+	with_intel_runtime_pm(dev_priv, wakeref) {
+		if (entry->size == 8 && flags == I915_REG_READ_8B_WA)
+			reg->val = I915_READ64_2x32(entry->offset_ldw,
+						    entry->offset_udw);
+		else if (entry->size == 8 && flags == 0)
+			reg->val = I915_READ64(entry->offset_ldw);
+		else if (entry->size == 4 && flags == 0)
+			reg->val = I915_READ(entry->offset_ldw);
+		else if (entry->size == 2 && flags == 0)
+			reg->val = I915_READ16(entry->offset_ldw);
+		else if (entry->size == 1 && flags == 0)
+			reg->val = I915_READ8(entry->offset_ldw);
+		else
+			ret = -EINVAL;
+	}
 
 	return ret;
 }
diff --git a/drivers/gpu/drm/i915/selftests/i915_gem.c b/drivers/gpu/drm/i915/selftests/i915_gem.c
index 896f0b37b34c..8db2720b8028 100644
--- a/drivers/gpu/drm/i915/selftests/i915_gem.c
+++ b/drivers/gpu/drm/i915/selftests/i915_gem.c
@@ -98,26 +98,22 @@ static void pm_suspend(struct drm_i915_private *i915)
 {
 	intel_wakeref_t wakeref;
 
-	wakeref = intel_runtime_pm_get(i915);
-
-	i915_gem_suspend_gtt_mappings(i915);
-	i915_gem_suspend_late(i915);
-
-	intel_runtime_pm_put(i915, wakeref);
+	with_intel_runtime_pm(i915, wakeref) {
+		i915_gem_suspend_gtt_mappings(i915);
+		i915_gem_suspend_late(i915);
+	}
 }
 
 static void pm_hibernate(struct drm_i915_private *i915)
 {
 	intel_wakeref_t wakeref;
 
-	wakeref = intel_runtime_pm_get(i915);
-
-	i915_gem_suspend_gtt_mappings(i915);
-
-	i915_gem_freeze(i915);
-	i915_gem_freeze_late(i915);
+	with_intel_runtime_pm(i915, wakeref) {
+		i915_gem_suspend_gtt_mappings(i915);
 
-	intel_runtime_pm_put(i915, wakeref);
+		i915_gem_freeze(i915);
+		i915_gem_freeze_late(i915);
+	}
 }
 
 static void pm_resume(struct drm_i915_private *i915)
@@ -128,13 +124,11 @@ static void pm_resume(struct drm_i915_private *i915)
 	 * Both suspend and hibernate follow the same wakeup path and assume
 	 * that runtime-pm just works.
 	 */
-	wakeref = intel_runtime_pm_get(i915);
-
-	intel_engines_sanitize(i915);
-	i915_gem_sanitize(i915);
-	i915_gem_resume(i915);
-
-	intel_runtime_pm_put(i915, wakeref);
+	with_intel_runtime_pm(i915, wakeref) {
+		intel_engines_sanitize(i915);
+		i915_gem_sanitize(i915);
+		i915_gem_resume(i915);
+	}
 }
 
 static int igt_gem_suspend(void *arg)
diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/selftests/i915_gem_context.c
index e5db71e3991d..c7c891a287f5 100644
--- a/drivers/gpu/drm/i915/selftests/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/selftests/i915_gem_context.c
@@ -580,9 +580,9 @@ static int igt_ctx_exec(void *arg)
 				}
 			}
 
-			wakeref = intel_runtime_pm_get(i915);
-			err = gpu_fill(obj, ctx, engine, dw);
-			intel_runtime_pm_put(i915, wakeref);
+			err = 0;
+			with_intel_runtime_pm(i915, wakeref)
+				err = gpu_fill(obj, ctx, engine, dw);
 			if (err) {
 				pr_err("Failed to fill dword %lu [%lu/%lu] with gpu (%s) in ctx %u [full-ppgtt? %s], err=%d\n",
 				       ndwords, dw, max_dwords(obj),
@@ -683,9 +683,9 @@ static int igt_ctx_readonly(void *arg)
 					i915_gem_object_set_readonly(obj);
 			}
 
-			wakeref = intel_runtime_pm_get(i915);
-			err = gpu_fill(obj, ctx, engine, dw);
-			intel_runtime_pm_put(i915, wakeref);
+			err = 0;
+			with_intel_runtime_pm(i915, wakeref)
+				err = gpu_fill(obj, ctx, engine, dw);
 			if (err) {
 				pr_err("Failed to fill dword %lu [%lu/%lu] with gpu (%s) in ctx %u [full-ppgtt? %s], err=%d\n",
 				       ndwords, dw, max_dwords(obj),
diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_object.c b/drivers/gpu/drm/i915/selftests/i915_gem_object.c
index 13d4dc5e8dfb..572016a45a59 100644
--- a/drivers/gpu/drm/i915/selftests/i915_gem_object.c
+++ b/drivers/gpu/drm/i915/selftests/i915_gem_object.c
@@ -506,9 +506,8 @@ static void disable_retire_worker(struct drm_i915_private *i915)
 	if (!i915->gt.active_requests++) {
 		intel_wakeref_t wakeref;
 
-		wakeref = intel_runtime_pm_get(i915);
-		i915_gem_unpark(i915);
-		intel_runtime_pm_put(i915, wakeref);
+		with_intel_runtime_pm(i915, wakeref)
+			i915_gem_unpark(i915);
 	}
 	mutex_unlock(&i915->drm.struct_mutex);
 
@@ -590,10 +589,10 @@ static int igt_mmap_offset_exhaustion(void *arg)
 			goto out;
 		}
 
+		err = 0;
 		mutex_lock(&i915->drm.struct_mutex);
-		wakeref = intel_runtime_pm_get(i915);
-		err = make_obj_busy(obj);
-		intel_runtime_pm_put(i915, wakeref);
+		with_intel_runtime_pm(i915, wakeref)
+			err = make_obj_busy(obj);
 		mutex_unlock(&i915->drm.struct_mutex);
 		if (err) {
 			pr_err("[loop %d] Failed to busy the object\n", loop);
diff --git a/drivers/gpu/drm/i915/selftests/intel_workarounds.c b/drivers/gpu/drm/i915/selftests/intel_workarounds.c
index 0db63eb24dfc..e2b542c6bfb1 100644
--- a/drivers/gpu/drm/i915/selftests/intel_workarounds.c
+++ b/drivers/gpu/drm/i915/selftests/intel_workarounds.c
@@ -45,9 +45,9 @@ read_nonprivs(struct i915_gem_context *ctx, struct intel_engine_cs *engine)
 	if (err)
 		goto err_obj;
 
-	wakeref = intel_runtime_pm_get(engine->i915);
-	rq = i915_request_alloc(engine, ctx);
-	intel_runtime_pm_put(engine->i915, wakeref);
+	rq = ERR_PTR(-ENODEV);
+	with_intel_runtime_pm(engine->i915, wakeref)
+		rq = i915_request_alloc(engine, ctx);
 	if (IS_ERR(rq)) {
 		err = PTR_ERR(rq);
 		goto err_pin;
@@ -179,9 +179,9 @@ static int switch_to_scratch_context(struct intel_engine_cs *engine)
 	if (IS_ERR(ctx))
 		return PTR_ERR(ctx);
 
-	wakeref = intel_runtime_pm_get(engine->i915);
-	rq = i915_request_alloc(engine, ctx);
-	intel_runtime_pm_put(engine->i915, wakeref);
+	rq = ERR_PTR(-ENODEV);
+	with_intel_runtime_pm(engine->i915, wakeref)
+		rq = i915_request_alloc(engine, ctx);
 
 	kernel_context_close(ctx);
 	if (IS_ERR(rq))
-- 
2.19.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 106+ messages in thread

* [PATCH 23/40] drm/i915: Markup paired operations on display power domains
  2018-09-19 19:55 [PATCH 01/40] drm: Use default dma_fence hooks where possible for null syncobj Chris Wilson
                   ` (20 preceding siblings ...)
  2018-09-19 19:55 ` [PATCH 22/40] drm/i915: Syntatic sugar for using intel_runtime_pm Chris Wilson
@ 2018-09-19 19:55 ` Chris Wilson
  2018-09-19 19:55 ` [PATCH 24/40] drm/i915: Track the wakeref used to initialise " Chris Wilson
                   ` (20 subsequent siblings)
  42 siblings, 0 replies; 106+ messages in thread
From: Chris Wilson @ 2018-09-19 19:55 UTC (permalink / raw)
  To: intel-gfx

The majority of runtime-pm operations are bounded and scoped within a
function; these are easy to verify that the wakeref are handled
correctly. We can employ the compiler to help us, and reduce the number
of wakerefs tracked when debugging, by passing around cookies provided
by the various rpm_get functions to their rpm_put counterpart. This
makes the pairing explicit, and given the required wakeref cookie the
compiler can verify that we pass an initialised value to the rpm_put
(quite handy for double checking error paths).

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_debugfs.c     | 35 ++++++------
 drivers/gpu/drm/i915/i915_drv.h         |  2 +
 drivers/gpu/drm/i915/i915_gem.c         |  4 +-
 drivers/gpu/drm/i915/intel_audio.c      |  3 +-
 drivers/gpu/drm/i915/intel_cdclk.c      | 10 ++--
 drivers/gpu/drm/i915/intel_crt.c        | 25 +++++----
 drivers/gpu/drm/i915/intel_csr.c        | 25 +++++++--
 drivers/gpu/drm/i915/intel_ddi.c        | 36 ++++++++-----
 drivers/gpu/drm/i915/intel_display.c    | 70 +++++++++++++++---------
 drivers/gpu/drm/i915/intel_dp.c         | 29 +++++-----
 drivers/gpu/drm/i915/intel_dpll_mgr.c   | 66 +++++++++++++++--------
 drivers/gpu/drm/i915/intel_drv.h        | 17 ++++--
 drivers/gpu/drm/i915/intel_hdmi.c       | 18 ++++---
 drivers/gpu/drm/i915/intel_i2c.c        | 20 +++----
 drivers/gpu/drm/i915/intel_lvds.c       |  8 +--
 drivers/gpu/drm/i915/intel_pipe_crc.c   |  6 ++-
 drivers/gpu/drm/i915/intel_pm.c         |  7 ++-
 drivers/gpu/drm/i915/intel_runtime_pm.c | 71 ++++++++++++++++---------
 drivers/gpu/drm/i915/intel_sprite.c     | 24 ++++++---
 drivers/gpu/drm/i915/vlv_dsi.c          |  8 +--
 20 files changed, 313 insertions(+), 171 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index 98fa216d19bb..844dceae62de 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -627,10 +627,12 @@ static void gen8_display_interrupt_info(struct seq_file *m)
 
 	for_each_pipe(dev_priv, pipe) {
 		enum intel_display_power_domain power_domain;
+		intel_wakeref_t wakeref;
 
 		power_domain = POWER_DOMAIN_PIPE(pipe);
-		if (!intel_display_power_get_if_enabled(dev_priv,
-							power_domain)) {
+		wakeref = intel_display_power_get_if_enabled(dev_priv,
+							     power_domain);
+		if (!wakeref) {
 			seq_printf(m, "Pipe %c power disabled\n",
 				   pipe_name(pipe));
 			continue;
@@ -645,7 +647,7 @@ static void gen8_display_interrupt_info(struct seq_file *m)
 			   pipe_name(pipe),
 			   I915_READ(GEN8_DE_PIPE_IER(pipe)));
 
-		intel_display_power_put(dev_priv, power_domain);
+		intel_display_power_put(dev_priv, power_domain, wakeref);
 	}
 
 	seq_printf(m, "Display Engine port interrupt mask:\t%08x\n",
@@ -681,6 +683,8 @@ static int i915_interrupt_info(struct seq_file *m, void *data)
 	wakeref = intel_runtime_pm_get(dev_priv);
 
 	if (IS_CHERRYVIEW(dev_priv)) {
+		intel_wakeref_t pref;
+
 		seq_printf(m, "Master Interrupt Control:\t%08x\n",
 			   I915_READ(GEN8_MASTER_IRQ));
 
@@ -696,8 +700,9 @@ static int i915_interrupt_info(struct seq_file *m, void *data)
 			enum intel_display_power_domain power_domain;
 
 			power_domain = POWER_DOMAIN_PIPE(pipe);
-			if (!intel_display_power_get_if_enabled(dev_priv,
-								power_domain)) {
+			pref = intel_display_power_get_if_enabled(dev_priv,
+								  power_domain);
+			if (!pref) {
 				seq_printf(m, "Pipe %c power disabled\n",
 					   pipe_name(pipe));
 				continue;
@@ -707,17 +712,17 @@ static int i915_interrupt_info(struct seq_file *m, void *data)
 				   pipe_name(pipe),
 				   I915_READ(PIPESTAT(pipe)));
 
-			intel_display_power_put(dev_priv, power_domain);
+			intel_display_power_put(dev_priv, power_domain, pref);
 		}
 
-		intel_display_power_get(dev_priv, POWER_DOMAIN_INIT);
+		pref = intel_display_power_get(dev_priv, POWER_DOMAIN_INIT);
 		seq_printf(m, "Port hotplug:\t%08x\n",
 			   I915_READ(PORT_HOTPLUG_EN));
 		seq_printf(m, "DPFLIPSTAT:\t%08x\n",
 			   I915_READ(VLV_DPFLIPSTAT));
 		seq_printf(m, "DPINVGTT:\t%08x\n",
 			   I915_READ(DPINVGTT));
-		intel_display_power_put(dev_priv, POWER_DOMAIN_INIT);
+		intel_display_power_put(dev_priv, POWER_DOMAIN_INIT, pref);
 
 		for (i = 0; i < 4; i++) {
 			seq_printf(m, "GT Interrupt IMR %d:\t%08x\n",
@@ -780,10 +785,12 @@ static int i915_interrupt_info(struct seq_file *m, void *data)
 			   I915_READ(VLV_IMR));
 		for_each_pipe(dev_priv, pipe) {
 			enum intel_display_power_domain power_domain;
+			intel_wakeref_t pref;
 
 			power_domain = POWER_DOMAIN_PIPE(pipe);
-			if (!intel_display_power_get_if_enabled(dev_priv,
-								power_domain)) {
+			pref = intel_display_power_get_if_enabled(dev_priv,
+								  power_domain);
+			if (!pref) {
 				seq_printf(m, "Pipe %c power disabled\n",
 					   pipe_name(pipe));
 				continue;
@@ -792,7 +799,7 @@ static int i915_interrupt_info(struct seq_file *m, void *data)
 			seq_printf(m, "Pipe %c stat:\t%08x\n",
 				   pipe_name(pipe),
 				   I915_READ(PIPESTAT(pipe)));
-			intel_display_power_put(dev_priv, power_domain);
+			intel_display_power_put(dev_priv, power_domain, pref);
 		}
 
 		seq_printf(m, "Master IER:\t%08x\n",
@@ -1726,8 +1733,7 @@ static int i915_sr_status(struct seq_file *m, void *unused)
 	intel_wakeref_t wakeref;
 	bool sr_enabled = false;
 
-	wakeref = intel_runtime_pm_get(dev_priv);
-	intel_display_power_get(dev_priv, POWER_DOMAIN_INIT);
+	wakeref = intel_display_power_get(dev_priv, POWER_DOMAIN_INIT);
 
 	if (INTEL_GEN(dev_priv) >= 9)
 		/* no global SR status; inspect per-plane WM */;
@@ -1743,8 +1749,7 @@ static int i915_sr_status(struct seq_file *m, void *unused)
 	else if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv))
 		sr_enabled = I915_READ(FW_BLC_SELF_VLV) & FW_CSPWRDWNEN;
 
-	intel_display_power_put(dev_priv, POWER_DOMAIN_INIT);
-	intel_runtime_pm_put(dev_priv, wakeref);
+	intel_display_power_put(dev_priv, POWER_DOMAIN_INIT, wakeref);
 
 	seq_printf(m, "self-refresh: %s\n", enableddisabled(sr_enabled));
 
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index a4cd27a7108f..881ecf416d7d 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -476,6 +476,7 @@ struct intel_csr {
 	uint32_t mmiodata[8];
 	uint32_t dc_state;
 	uint32_t allowed_dc_mask;
+	intel_wakeref_t wakeref;
 };
 
 enum i915_cache_level {
@@ -2134,6 +2135,7 @@ struct drm_i915_private {
 		 * is a slight delay before we do so.
 		 */
 		intel_wakeref_t awake;
+		intel_wakeref_t power;
 
 		/**
 		 * The number of times we have woken up.
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 233a65487c3f..a8cdaeaea7a1 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -177,7 +177,7 @@ static u32 __i915_gem_park(struct drm_i915_private *i915)
 	if (INTEL_GEN(i915) >= 6)
 		gen6_rps_idle(i915);
 
-	intel_display_power_put(i915, POWER_DOMAIN_GT_IRQ);
+	intel_display_power_put(i915, POWER_DOMAIN_GT_IRQ, i915->gt.power);
 
 	intel_runtime_pm_put(i915, wakeref);
 
@@ -222,7 +222,7 @@ void i915_gem_unpark(struct drm_i915_private *i915)
 	 * Work around it by grabbing a GT IRQ power domain whilst there is any
 	 * GT activity, preventing any DC state transitions.
 	 */
-	intel_display_power_get(i915, POWER_DOMAIN_GT_IRQ);
+	i915->gt.power = intel_display_power_get(i915, POWER_DOMAIN_GT_IRQ);
 
 	if (unlikely(++i915->gt.epoch == 0)) /* keep 0 as invalid */
 		i915->gt.epoch = 1;
diff --git a/drivers/gpu/drm/i915/intel_audio.c b/drivers/gpu/drm/i915/intel_audio.c
index 769f3f586661..290b7f456260 100644
--- a/drivers/gpu/drm/i915/intel_audio.c
+++ b/drivers/gpu/drm/i915/intel_audio.c
@@ -732,7 +732,8 @@ static void i915_audio_component_get_power(struct device *kdev)
 
 static void i915_audio_component_put_power(struct device *kdev)
 {
-	intel_display_power_put(kdev_to_i915(kdev), POWER_DOMAIN_AUDIO);
+	intel_display_power_put_unchecked(kdev_to_i915(kdev),
+					  POWER_DOMAIN_AUDIO);
 }
 
 static void i915_audio_component_codec_wake_override(struct device *kdev,
diff --git a/drivers/gpu/drm/i915/intel_cdclk.c b/drivers/gpu/drm/i915/intel_cdclk.c
index 29075c763428..01f0f7913f86 100644
--- a/drivers/gpu/drm/i915/intel_cdclk.c
+++ b/drivers/gpu/drm/i915/intel_cdclk.c
@@ -520,6 +520,7 @@ static void vlv_set_cdclk(struct drm_i915_private *dev_priv,
 {
 	int cdclk = cdclk_state->cdclk;
 	u32 val, cmd = cdclk_state->voltage_level;
+	intel_wakeref_t wakeref;
 
 	switch (cdclk) {
 	case 400000:
@@ -539,7 +540,7 @@ static void vlv_set_cdclk(struct drm_i915_private *dev_priv,
 	 * a system suspend.  So grab the PIPE-A domain, which covers
 	 * the HW blocks needed for the following programming.
 	 */
-	intel_display_power_get(dev_priv, POWER_DOMAIN_PIPE_A);
+	wakeref = intel_display_power_get(dev_priv, POWER_DOMAIN_PIPE_A);
 
 	mutex_lock(&dev_priv->pcu_lock);
 	val = vlv_punit_read(dev_priv, PUNIT_REG_DSPFREQ);
@@ -593,7 +594,7 @@ static void vlv_set_cdclk(struct drm_i915_private *dev_priv,
 
 	vlv_program_pfi_credits(dev_priv);
 
-	intel_display_power_put(dev_priv, POWER_DOMAIN_PIPE_A);
+	intel_display_power_put(dev_priv, POWER_DOMAIN_PIPE_A, wakeref);
 }
 
 static void chv_set_cdclk(struct drm_i915_private *dev_priv,
@@ -601,6 +602,7 @@ static void chv_set_cdclk(struct drm_i915_private *dev_priv,
 {
 	int cdclk = cdclk_state->cdclk;
 	u32 val, cmd = cdclk_state->voltage_level;
+	intel_wakeref_t wakeref;
 
 	switch (cdclk) {
 	case 333333:
@@ -619,7 +621,7 @@ static void chv_set_cdclk(struct drm_i915_private *dev_priv,
 	 * a system suspend.  So grab the PIPE-A domain, which covers
 	 * the HW blocks needed for the following programming.
 	 */
-	intel_display_power_get(dev_priv, POWER_DOMAIN_PIPE_A);
+	wakeref = intel_display_power_get(dev_priv, POWER_DOMAIN_PIPE_A);
 
 	mutex_lock(&dev_priv->pcu_lock);
 	val = vlv_punit_read(dev_priv, PUNIT_REG_DSPFREQ);
@@ -637,7 +639,7 @@ static void chv_set_cdclk(struct drm_i915_private *dev_priv,
 
 	vlv_program_pfi_credits(dev_priv);
 
-	intel_display_power_put(dev_priv, POWER_DOMAIN_PIPE_A);
+	intel_display_power_put(dev_priv, POWER_DOMAIN_PIPE_A, wakeref);
 }
 
 static int bdw_calc_cdclk(int min_cdclk)
diff --git a/drivers/gpu/drm/i915/intel_crt.c b/drivers/gpu/drm/i915/intel_crt.c
index 0c6bf82bb059..e9e21555bd4c 100644
--- a/drivers/gpu/drm/i915/intel_crt.c
+++ b/drivers/gpu/drm/i915/intel_crt.c
@@ -84,15 +84,17 @@ static bool intel_crt_get_hw_state(struct intel_encoder *encoder,
 {
 	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
 	struct intel_crt *crt = intel_encoder_to_crt(encoder);
+	intel_wakeref_t wakeref;
 	bool ret;
 
-	if (!intel_display_power_get_if_enabled(dev_priv,
-						encoder->power_domain))
+	wakeref = intel_display_power_get_if_enabled(dev_priv,
+						     encoder->power_domain);
+	if (!wakeref)
 		return false;
 
 	ret = intel_crt_port_enabled(dev_priv, crt->adpa_reg, pipe);
 
-	intel_display_power_put(dev_priv, encoder->power_domain);
+	intel_display_power_put(dev_priv, encoder->power_domain, wakeref);
 
 	return ret;
 }
@@ -774,6 +776,7 @@ intel_crt_detect(struct drm_connector *connector,
 	struct drm_i915_private *dev_priv = to_i915(connector->dev);
 	struct intel_crt *crt = intel_attached_crt(connector);
 	struct intel_encoder *intel_encoder = &crt->base;
+	intel_wakeref_t wakeref;
 	int status, ret;
 	struct intel_load_detect_pipe tmp;
 
@@ -782,7 +785,8 @@ intel_crt_detect(struct drm_connector *connector,
 		      force);
 
 	if (i915_modparams.load_detect_test) {
-		intel_display_power_get(dev_priv, intel_encoder->power_domain);
+		wakeref = intel_display_power_get(dev_priv,
+						  intel_encoder->power_domain);
 		goto load_detect;
 	}
 
@@ -790,7 +794,8 @@ intel_crt_detect(struct drm_connector *connector,
 	if (dmi_check_system(intel_spurious_crt_detect))
 		return connector_status_disconnected;
 
-	intel_display_power_get(dev_priv, intel_encoder->power_domain);
+	wakeref = intel_display_power_get(dev_priv,
+					  intel_encoder->power_domain);
 
 	if (I915_HAS_HOTPLUG(dev_priv)) {
 		/* We can not rely on the HPD pin always being correctly wired
@@ -845,7 +850,7 @@ intel_crt_detect(struct drm_connector *connector,
 	}
 
 out:
-	intel_display_power_put(dev_priv, intel_encoder->power_domain);
+	intel_display_power_put(dev_priv, intel_encoder->power_domain, wakeref);
 	return status;
 }
 
@@ -861,10 +866,12 @@ static int intel_crt_get_modes(struct drm_connector *connector)
 	struct drm_i915_private *dev_priv = to_i915(dev);
 	struct intel_crt *crt = intel_attached_crt(connector);
 	struct intel_encoder *intel_encoder = &crt->base;
-	int ret;
+	intel_wakeref_t wakeref;
 	struct i2c_adapter *i2c;
+	int ret;
 
-	intel_display_power_get(dev_priv, intel_encoder->power_domain);
+	wakeref = intel_display_power_get(dev_priv,
+					  intel_encoder->power_domain);
 
 	i2c = intel_gmbus_get_adapter(dev_priv, dev_priv->vbt.crt_ddc_pin);
 	ret = intel_crt_ddc_get_modes(connector, i2c);
@@ -876,7 +883,7 @@ static int intel_crt_get_modes(struct drm_connector *connector)
 	ret = intel_crt_ddc_get_modes(connector, i2c);
 
 out:
-	intel_display_power_put(dev_priv, intel_encoder->power_domain);
+	intel_display_power_put(dev_priv, intel_encoder->power_domain, wakeref);
 
 	return ret;
 }
diff --git a/drivers/gpu/drm/i915/intel_csr.c b/drivers/gpu/drm/i915/intel_csr.c
index 4aa8f3d6b64c..e4f210ccd8aa 100644
--- a/drivers/gpu/drm/i915/intel_csr.c
+++ b/drivers/gpu/drm/i915/intel_csr.c
@@ -424,6 +424,21 @@ static uint32_t *parse_csr_fw(struct drm_i915_private *dev_priv,
 	return memcpy(dmc_payload, &fw->data[readcount], nbytes);
 }
 
+static void intel_csr_runtime_pm_get(struct drm_i915_private *dev_priv)
+{
+	WARN_ON(dev_priv->csr.wakeref);
+	dev_priv->csr.wakeref =
+		intel_display_power_get(dev_priv, POWER_DOMAIN_INIT);
+}
+
+static void intel_csr_runtime_pm_put(struct drm_i915_private *dev_priv)
+{
+	intel_wakeref_t wakeref __maybe_unused =
+		fetch_and_zero(&dev_priv->csr.wakeref);
+
+	intel_display_power_put(dev_priv, POWER_DOMAIN_INIT, wakeref);
+}
+
 static void csr_load_work_fn(struct work_struct *work)
 {
 	struct drm_i915_private *dev_priv;
@@ -439,8 +454,7 @@ static void csr_load_work_fn(struct work_struct *work)
 
 	if (dev_priv->csr.dmc_payload) {
 		intel_csr_load_program(dev_priv);
-
-		intel_display_power_put(dev_priv, POWER_DOMAIN_INIT);
+		intel_csr_runtime_pm_put(dev_priv);
 
 		DRM_INFO("Finished loading DMC firmware %s (v%u.%u)\n",
 			 dev_priv->csr.fw_path,
@@ -493,7 +507,7 @@ void intel_csr_ucode_init(struct drm_i915_private *dev_priv)
 	 * Obtain a runtime pm reference, until CSR is loaded,
 	 * to avoid entering runtime-suspend.
 	 */
-	intel_display_power_get(dev_priv, POWER_DOMAIN_INIT);
+	intel_csr_runtime_pm_get(dev_priv);
 
 	if (csr->fw_path == NULL) {
 		DRM_DEBUG_KMS("No known CSR firmware for platform, disabling runtime PM\n");
@@ -523,7 +537,7 @@ void intel_csr_ucode_suspend(struct drm_i915_private *dev_priv)
 
 	/* Drop the reference held in case DMC isn't loaded. */
 	if (!dev_priv->csr.dmc_payload)
-		intel_display_power_put(dev_priv, POWER_DOMAIN_INIT);
+		intel_csr_runtime_pm_put(dev_priv);
 }
 
 /**
@@ -543,7 +557,7 @@ void intel_csr_ucode_resume(struct drm_i915_private *dev_priv)
 	 * loaded.
 	 */
 	if (!dev_priv->csr.dmc_payload)
-		intel_display_power_get(dev_priv, POWER_DOMAIN_INIT);
+		intel_csr_runtime_pm_get(dev_priv);
 }
 
 /**
@@ -559,6 +573,7 @@ void intel_csr_ucode_fini(struct drm_i915_private *dev_priv)
 		return;
 
 	intel_csr_ucode_suspend(dev_priv);
+	WARN_ON(dev_priv->csr.wakeref);
 
 	kfree(dev_priv->csr.dmc_payload);
 }
diff --git a/drivers/gpu/drm/i915/intel_ddi.c b/drivers/gpu/drm/i915/intel_ddi.c
index b6910c8b4e08..91e6456a6744 100644
--- a/drivers/gpu/drm/i915/intel_ddi.c
+++ b/drivers/gpu/drm/i915/intel_ddi.c
@@ -1914,12 +1914,14 @@ int intel_ddi_toggle_hdcp_signalling(struct intel_encoder *intel_encoder,
 {
 	struct drm_device *dev = intel_encoder->base.dev;
 	struct drm_i915_private *dev_priv = to_i915(dev);
+	intel_wakeref_t wakeref;
 	enum pipe pipe = 0;
 	int ret = 0;
 	uint32_t tmp;
 
-	if (WARN_ON(!intel_display_power_get_if_enabled(dev_priv,
-						intel_encoder->power_domain)))
+	wakeref = intel_display_power_get_if_enabled(dev_priv,
+						     intel_encoder->power_domain);
+	if (WARN_ON(!wakeref))
 		return -ENXIO;
 
 	if (WARN_ON(!intel_encoder->get_hw_state(intel_encoder, &pipe))) {
@@ -1934,7 +1936,7 @@ int intel_ddi_toggle_hdcp_signalling(struct intel_encoder *intel_encoder,
 		tmp &= ~TRANS_DDI_HDCP_SIGNALLING;
 	I915_WRITE(TRANS_DDI_FUNC_CTL(pipe), tmp);
 out:
-	intel_display_power_put(dev_priv, intel_encoder->power_domain);
+	intel_display_power_put(dev_priv, intel_encoder->power_domain, wakeref);
 	return ret;
 }
 
@@ -1945,13 +1947,15 @@ bool intel_ddi_connector_get_hw_state(struct intel_connector *intel_connector)
 	struct intel_encoder *encoder = intel_connector->encoder;
 	int type = intel_connector->base.connector_type;
 	enum port port = encoder->port;
-	enum pipe pipe = 0;
 	enum transcoder cpu_transcoder;
+	intel_wakeref_t wakeref;
+	enum pipe pipe = 0;
 	uint32_t tmp;
 	bool ret;
 
-	if (!intel_display_power_get_if_enabled(dev_priv,
-						encoder->power_domain))
+	wakeref = intel_display_power_get_if_enabled(dev_priv,
+						     encoder->power_domain);
+	if (!wakeref)
 		return false;
 
 	if (!encoder->get_hw_state(encoder, &pipe)) {
@@ -1993,7 +1997,7 @@ bool intel_ddi_connector_get_hw_state(struct intel_connector *intel_connector)
 	}
 
 out:
-	intel_display_power_put(dev_priv, encoder->power_domain);
+	intel_display_power_put(dev_priv, encoder->power_domain, wakeref);
 
 	return ret;
 }
@@ -2004,12 +2008,14 @@ bool intel_ddi_get_hw_state(struct intel_encoder *encoder,
 	struct drm_device *dev = encoder->base.dev;
 	struct drm_i915_private *dev_priv = to_i915(dev);
 	enum port port = encoder->port;
+	intel_wakeref_t wakeref;
 	enum pipe p;
 	u32 tmp;
 	bool ret;
 
-	if (!intel_display_power_get_if_enabled(dev_priv,
-						encoder->power_domain))
+	wakeref = intel_display_power_get_if_enabled(dev_priv,
+						     encoder->power_domain);
+	if (!wakeref)
 		return false;
 
 	ret = false;
@@ -2069,7 +2075,7 @@ bool intel_ddi_get_hw_state(struct intel_encoder *encoder,
 				  "(PHY_CTL %08x)\n", port_name(port), tmp);
 	}
 
-	intel_display_power_put(dev_priv, encoder->power_domain);
+	intel_display_power_put(dev_priv, encoder->power_domain, wakeref);
 
 	return ret;
 }
@@ -3030,12 +3036,13 @@ static void intel_ddi_post_disable_dp(struct intel_encoder *encoder,
 	intel_edp_panel_vdd_on(intel_dp);
 	intel_edp_panel_off(intel_dp);
 
-	intel_display_power_put(dev_priv, dig_port->ddi_io_power_domain);
+	intel_display_power_put_unchecked(dev_priv,
+					  dig_port->ddi_io_power_domain);
 
 	intel_ddi_clk_disable(encoder);
 
-	intel_display_power_put(dev_priv,
-				intel_ddi_main_link_aux_domain(intel_dp));
+	intel_display_power_put_unchecked(dev_priv,
+					  intel_ddi_main_link_aux_domain(intel_dp));
 }
 
 static void intel_ddi_post_disable_hdmi(struct intel_encoder *encoder,
@@ -3053,7 +3060,8 @@ static void intel_ddi_post_disable_hdmi(struct intel_encoder *encoder,
 
 	intel_disable_ddi_buf(encoder);
 
-	intel_display_power_put(dev_priv, dig_port->ddi_io_power_domain);
+	intel_display_power_put_unchecked(dev_priv,
+					  dig_port->ddi_io_power_domain);
 
 	intel_ddi_clk_disable(encoder);
 
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index 0a50a04be8a0..806aa6dcbf8a 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -1263,17 +1263,19 @@ void assert_pipe(struct drm_i915_private *dev_priv,
 	enum transcoder cpu_transcoder = intel_pipe_to_cpu_transcoder(dev_priv,
 								      pipe);
 	enum intel_display_power_domain power_domain;
+	intel_wakeref_t wakeref;
 
 	/* we keep both pipes enabled on 830 */
 	if (IS_I830(dev_priv))
 		state = true;
 
 	power_domain = POWER_DOMAIN_TRANSCODER(cpu_transcoder);
-	if (intel_display_power_get_if_enabled(dev_priv, power_domain)) {
+	wakeref = intel_display_power_get_if_enabled(dev_priv, power_domain);
+	if (wakeref) {
 		u32 val = I915_READ(PIPECONF(cpu_transcoder));
 		cur_state = !!(val & PIPECONF_ENABLE);
 
-		intel_display_power_put(dev_priv, power_domain);
+		intel_display_power_put(dev_priv, power_domain, wakeref);
 	} else {
 		cur_state = false;
 	}
@@ -3441,6 +3443,7 @@ static bool i9xx_plane_get_hw_state(struct intel_plane *plane,
 	struct drm_i915_private *dev_priv = to_i915(plane->base.dev);
 	enum intel_display_power_domain power_domain;
 	enum i9xx_plane_id i9xx_plane = plane->i9xx_plane;
+	intel_wakeref_t wakeref;
 	bool ret;
 	u32 val;
 
@@ -3450,7 +3453,8 @@ static bool i9xx_plane_get_hw_state(struct intel_plane *plane,
 	 * display power wells.
 	 */
 	power_domain = POWER_DOMAIN_PIPE(plane->pipe);
-	if (!intel_display_power_get_if_enabled(dev_priv, power_domain))
+	wakeref = intel_display_power_get_if_enabled(dev_priv, power_domain);
+	if (!wakeref)
 		return false;
 
 	val = I915_READ(DSPCNTR(i9xx_plane));
@@ -3463,7 +3467,7 @@ static bool i9xx_plane_get_hw_state(struct intel_plane *plane,
 		*pipe = (val & DISPPLANE_SEL_PIPE_MASK) >>
 			DISPPLANE_SEL_PIPE_SHIFT;
 
-	intel_display_power_put(dev_priv, power_domain);
+	intel_display_power_put(dev_priv, power_domain, wakeref);
 
 	return ret;
 }
@@ -6023,7 +6027,7 @@ static void modeset_put_power_domains(struct drm_i915_private *dev_priv,
 	enum intel_display_power_domain domain;
 
 	for_each_power_domain(domain, domains)
-		intel_display_power_put(dev_priv, domain);
+		intel_display_power_put_unchecked(dev_priv, domain);
 }
 
 static void valleyview_crtc_enable(struct intel_crtc_state *pipe_config,
@@ -6272,7 +6276,7 @@ static void intel_crtc_disable_noatomic(struct drm_crtc *crtc,
 
 	domains = intel_crtc->enabled_power_domains;
 	for_each_power_domain(domain, domains)
-		intel_display_power_put(dev_priv, domain);
+		intel_display_power_put_unchecked(dev_priv, domain);
 	intel_crtc->enabled_power_domains = 0;
 
 	dev_priv->active_crtcs &= ~(1 << intel_crtc->pipe);
@@ -7873,11 +7877,13 @@ static bool i9xx_get_pipe_config(struct intel_crtc *crtc,
 {
 	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
 	enum intel_display_power_domain power_domain;
+	intel_wakeref_t wakeref;
 	uint32_t tmp;
 	bool ret;
 
 	power_domain = POWER_DOMAIN_PIPE(crtc->pipe);
-	if (!intel_display_power_get_if_enabled(dev_priv, power_domain))
+	wakeref = intel_display_power_get_if_enabled(dev_priv, power_domain);
+	if (!wakeref)
 		return false;
 
 	pipe_config->cpu_transcoder = (enum transcoder) crtc->pipe;
@@ -7977,7 +7983,7 @@ static bool i9xx_get_pipe_config(struct intel_crtc *crtc,
 	ret = true;
 
 out:
-	intel_display_power_put(dev_priv, power_domain);
+	intel_display_power_put(dev_priv, power_domain, wakeref);
 
 	return ret;
 }
@@ -8923,11 +8929,13 @@ static bool ironlake_get_pipe_config(struct intel_crtc *crtc,
 	struct drm_device *dev = crtc->base.dev;
 	struct drm_i915_private *dev_priv = to_i915(dev);
 	enum intel_display_power_domain power_domain;
+	intel_wakeref_t wakeref;
 	uint32_t tmp;
 	bool ret;
 
 	power_domain = POWER_DOMAIN_PIPE(crtc->pipe);
-	if (!intel_display_power_get_if_enabled(dev_priv, power_domain))
+	wakeref = intel_display_power_get_if_enabled(dev_priv, power_domain);
+	if (!wakeref)
 		return false;
 
 	pipe_config->cpu_transcoder = (enum transcoder) crtc->pipe;
@@ -9009,7 +9017,7 @@ static bool ironlake_get_pipe_config(struct intel_crtc *crtc,
 	ret = true;
 
 out:
-	intel_display_power_put(dev_priv, power_domain);
+	intel_display_power_put(dev_priv, power_domain, wakeref);
 
 	return ret;
 }
@@ -9617,7 +9625,7 @@ static bool haswell_get_pipe_config(struct intel_crtc *crtc,
 
 out:
 	for_each_power_domain(power_domain, power_domain_mask)
-		intel_display_power_put(dev_priv, power_domain);
+		intel_display_power_put_unchecked(dev_priv, power_domain);
 
 	return active;
 }
@@ -9864,17 +9872,19 @@ static bool i845_cursor_get_hw_state(struct intel_plane *plane,
 {
 	struct drm_i915_private *dev_priv = to_i915(plane->base.dev);
 	enum intel_display_power_domain power_domain;
+	intel_wakeref_t wakeref;
 	bool ret;
 
 	power_domain = POWER_DOMAIN_PIPE(PIPE_A);
-	if (!intel_display_power_get_if_enabled(dev_priv, power_domain))
+	wakeref = intel_display_power_get_if_enabled(dev_priv, power_domain);
+	if (!wakeref)
 		return false;
 
 	ret = I915_READ(CURCNTR(PIPE_A)) & CURSOR_ENABLE;
 
 	*pipe = PIPE_A;
 
-	intel_display_power_put(dev_priv, power_domain);
+	intel_display_power_put(dev_priv, power_domain, wakeref);
 
 	return ret;
 }
@@ -10094,6 +10104,7 @@ static bool i9xx_cursor_get_hw_state(struct intel_plane *plane,
 {
 	struct drm_i915_private *dev_priv = to_i915(plane->base.dev);
 	enum intel_display_power_domain power_domain;
+	intel_wakeref_t wakeref;
 	bool ret;
 	u32 val;
 
@@ -10103,7 +10114,8 @@ static bool i9xx_cursor_get_hw_state(struct intel_plane *plane,
 	 * display power wells.
 	 */
 	power_domain = POWER_DOMAIN_PIPE(plane->pipe);
-	if (!intel_display_power_get_if_enabled(dev_priv, power_domain))
+	wakeref = intel_display_power_get_if_enabled(dev_priv, power_domain);
+	if (!wakeref)
 		return false;
 
 	val = I915_READ(CURCNTR(plane->pipe));
@@ -10116,7 +10128,7 @@ static bool i9xx_cursor_get_hw_state(struct intel_plane *plane,
 		*pipe = (val & MCURSOR_PIPE_SELECT_MASK) >>
 			MCURSOR_PIPE_SELECT_SHIFT;
 
-	intel_display_power_put(dev_priv, power_domain);
+	intel_display_power_put(dev_priv, power_domain, wakeref);
 
 	return ret;
 }
@@ -12667,6 +12679,7 @@ static void intel_atomic_commit_tail(struct drm_atomic_state *state)
 	struct drm_crtc *crtc;
 	struct intel_crtc_state *intel_cstate;
 	u64 put_domains[I915_MAX_PIPES] = {};
+	intel_wakeref_t wakeref = 0;
 	int i;
 
 	intel_atomic_commit_fence_wait(intel_state);
@@ -12674,7 +12687,7 @@ static void intel_atomic_commit_tail(struct drm_atomic_state *state)
 	drm_atomic_helper_wait_for_dependencies(state);
 
 	if (intel_state->modeset)
-		intel_display_power_get(dev_priv, POWER_DOMAIN_MODESET);
+		wakeref = intel_display_power_get(dev_priv, POWER_DOMAIN_MODESET);
 
 	for_each_oldnew_crtc_in_state(state, crtc, old_crtc_state, new_crtc_state, i) {
 		struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
@@ -12815,7 +12828,7 @@ static void intel_atomic_commit_tail(struct drm_atomic_state *state)
 		 * the culprit.
 		 */
 		intel_uncore_arm_unclaimed_mmio_detection(dev_priv);
-		intel_display_power_put(dev_priv, POWER_DOMAIN_MODESET);
+		intel_display_power_put(dev_priv, POWER_DOMAIN_MODESET, wakeref);
 	}
 
 	/*
@@ -15620,19 +15633,25 @@ void i915_redisable_vga_power_on(struct drm_i915_private *dev_priv)
 
 void i915_redisable_vga(struct drm_i915_private *dev_priv)
 {
-	/* This function can be called both from intel_modeset_setup_hw_state or
+	intel_wakeref_t wakeref;
+
+	/*
+	 * This function can be called both from intel_modeset_setup_hw_state or
 	 * at a very early point in our resume sequence, where the power well
 	 * structures are not yet restored. Since this function is at a very
 	 * paranoid "someone might have enabled VGA while we were not looking"
 	 * level, just check if the power well is enabled instead of trying to
 	 * follow the "don't touch the power well if we don't need it" policy
-	 * the rest of the driver uses. */
-	if (!intel_display_power_get_if_enabled(dev_priv, POWER_DOMAIN_VGA))
+	 * the rest of the driver uses.
+	 */
+	wakeref = intel_display_power_get_if_enabled(dev_priv,
+						     POWER_DOMAIN_VGA);
+	if (!wakeref)
 		return;
 
 	i915_redisable_vga_power_on(dev_priv);
 
-	intel_display_power_put(dev_priv, POWER_DOMAIN_VGA);
+	intel_display_power_put(dev_priv, POWER_DOMAIN_VGA, wakeref);
 }
 
 /* FIXME read out full plane state for all planes */
@@ -15860,12 +15879,13 @@ intel_modeset_setup_hw_state(struct drm_device *dev,
 			     struct drm_modeset_acquire_ctx *ctx)
 {
 	struct drm_i915_private *dev_priv = to_i915(dev);
-	enum pipe pipe;
-	struct intel_crtc *crtc;
 	struct intel_encoder *encoder;
+	struct intel_crtc *crtc;
+	intel_wakeref_t wakeref;
+	enum pipe pipe;
 	int i;
 
-	intel_display_power_get(dev_priv, POWER_DOMAIN_INIT);
+	wakeref = intel_display_power_get(dev_priv, POWER_DOMAIN_INIT);
 
 	intel_early_display_was(dev_priv);
 	intel_modeset_readout_hw_state(dev);
@@ -15922,7 +15942,7 @@ intel_modeset_setup_hw_state(struct drm_device *dev,
 			modeset_put_power_domains(dev_priv, put_domains);
 	}
 
-	intel_display_power_put(dev_priv, POWER_DOMAIN_INIT);
+	intel_display_power_put(dev_priv, POWER_DOMAIN_INIT, wakeref);
 
 	intel_fbc_init_pipe_state(dev_priv);
 }
diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c
index 6b4c19123f2a..01a07711d395 100644
--- a/drivers/gpu/drm/i915/intel_dp.c
+++ b/drivers/gpu/drm/i915/intel_dp.c
@@ -671,7 +671,7 @@ static void pps_unlock(struct intel_dp *intel_dp)
 
 	mutex_unlock(&dev_priv->pps_mutex);
 
-	intel_display_power_put(dev_priv, intel_dp->aux_power_domain);
+	intel_display_power_put_unchecked(dev_priv, intel_dp->aux_power_domain);
 }
 
 static void
@@ -2394,7 +2394,7 @@ static void edp_panel_vdd_off_sync(struct intel_dp *intel_dp)
 	if ((pp & PANEL_POWER_ON) == 0)
 		intel_dp->panel_power_off_time = ktime_get_boottime();
 
-	intel_display_power_put(dev_priv, intel_dp->aux_power_domain);
+	intel_display_power_put_unchecked(dev_priv, intel_dp->aux_power_domain);
 }
 
 static void edp_panel_vdd_work(struct work_struct *__work)
@@ -2538,7 +2538,7 @@ static void edp_panel_off(struct intel_dp *intel_dp)
 	intel_dp->panel_power_off_time = ktime_get_boottime();
 
 	/* We got a reference when we enabled the VDD. */
-	intel_display_power_put(dev_priv, intel_dp->aux_power_domain);
+	intel_display_power_put_unchecked(dev_priv, intel_dp->aux_power_domain);
 }
 
 void intel_edp_panel_off(struct intel_dp *intel_dp)
@@ -2848,16 +2848,18 @@ static bool intel_dp_get_hw_state(struct intel_encoder *encoder,
 {
 	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
 	struct intel_dp *intel_dp = enc_to_intel_dp(&encoder->base);
+	intel_wakeref_t wakeref;
 	bool ret;
 
-	if (!intel_display_power_get_if_enabled(dev_priv,
-						encoder->power_domain))
+	wakeref = intel_display_power_get_if_enabled(dev_priv,
+						     encoder->power_domain);
+	if (!wakeref)
 		return false;
 
 	ret = intel_dp_port_enabled(dev_priv, intel_dp->output_reg,
 				    encoder->port, pipe);
 
-	intel_display_power_put(dev_priv, encoder->power_domain);
+	intel_display_power_put(dev_priv, encoder->power_domain, wakeref);
 
 	return ret;
 }
@@ -5018,11 +5020,12 @@ intel_dp_long_pulse(struct intel_connector *connector,
 	struct drm_i915_private *dev_priv = to_i915(connector->base.dev);
 	struct intel_dp *intel_dp = intel_attached_dp(&connector->base);
 	enum drm_connector_status status;
+	intel_wakeref_t wakeref;
 	u8 sink_irq_vector = 0;
 
 	WARN_ON(!drm_modeset_is_locked(&dev_priv->drm.mode_config.connection_mutex));
 
-	intel_display_power_get(dev_priv, intel_dp->aux_power_domain);
+	wakeref = intel_display_power_get(dev_priv, intel_dp->aux_power_domain);
 
 	/* Can't disconnect eDP */
 	if (intel_dp_is_edp(intel_dp))
@@ -5122,7 +5125,7 @@ intel_dp_long_pulse(struct intel_connector *connector,
 	if (status != connector_status_connected && !intel_dp->is_mst)
 		intel_dp_unset_edid(intel_dp);
 
-	intel_display_power_put(dev_priv, intel_dp->aux_power_domain);
+	intel_display_power_put(dev_priv, intel_dp->aux_power_domain, wakeref);
 	return status;
 }
 
@@ -5163,6 +5166,7 @@ intel_dp_force(struct drm_connector *connector)
 	struct intel_dp *intel_dp = intel_attached_dp(connector);
 	struct intel_encoder *intel_encoder = &dp_to_dig_port(intel_dp)->base;
 	struct drm_i915_private *dev_priv = to_i915(intel_encoder->base.dev);
+	intel_wakeref_t wakeref;
 
 	DRM_DEBUG_KMS("[CONNECTOR:%d:%s]\n",
 		      connector->base.id, connector->name);
@@ -5171,11 +5175,11 @@ intel_dp_force(struct drm_connector *connector)
 	if (connector->status != connector_status_connected)
 		return;
 
-	intel_display_power_get(dev_priv, intel_dp->aux_power_domain);
+	wakeref = intel_display_power_get(dev_priv, intel_dp->aux_power_domain);
 
 	intel_dp_set_edid(intel_dp);
 
-	intel_display_power_put(dev_priv, intel_dp->aux_power_domain);
+	intel_display_power_put(dev_priv, intel_dp->aux_power_domain, wakeref);
 }
 
 static int intel_dp_get_modes(struct drm_connector *connector)
@@ -5630,6 +5634,7 @@ intel_dp_hpd_pulse(struct intel_digital_port *intel_dig_port, bool long_hpd)
 	struct intel_dp *intel_dp = &intel_dig_port->dp;
 	struct drm_i915_private *dev_priv = dp_to_i915(intel_dp);
 	enum irqreturn ret = IRQ_NONE;
+	intel_wakeref_t wakeref;
 
 	if (long_hpd && intel_dig_port->base.type == INTEL_OUTPUT_EDP) {
 		/*
@@ -5653,7 +5658,7 @@ intel_dp_hpd_pulse(struct intel_digital_port *intel_dig_port, bool long_hpd)
 		return IRQ_NONE;
 	}
 
-	intel_display_power_get(dev_priv, intel_dp->aux_power_domain);
+	wakeref = intel_display_power_get(dev_priv, intel_dp->aux_power_domain);
 
 	if (intel_dp->is_mst) {
 		if (intel_dp_check_mst_status(intel_dp) == -EINVAL) {
@@ -5688,7 +5693,7 @@ intel_dp_hpd_pulse(struct intel_digital_port *intel_dig_port, bool long_hpd)
 	ret = IRQ_HANDLED;
 
 put_power:
-	intel_display_power_put(dev_priv, intel_dp->aux_power_domain);
+	intel_display_power_put(dev_priv, intel_dp->aux_power_domain, wakeref);
 
 	return ret;
 }
diff --git a/drivers/gpu/drm/i915/intel_dpll_mgr.c b/drivers/gpu/drm/i915/intel_dpll_mgr.c
index e6cac9225536..43879bd91c7a 100644
--- a/drivers/gpu/drm/i915/intel_dpll_mgr.c
+++ b/drivers/gpu/drm/i915/intel_dpll_mgr.c
@@ -344,9 +344,12 @@ static bool ibx_pch_dpll_get_hw_state(struct drm_i915_private *dev_priv,
 				      struct intel_dpll_hw_state *hw_state)
 {
 	const enum intel_dpll_id id = pll->info->id;
+	intel_wakeref_t wakeref;
 	uint32_t val;
 
-	if (!intel_display_power_get_if_enabled(dev_priv, POWER_DOMAIN_PLLS))
+	wakeref = intel_display_power_get_if_enabled(dev_priv,
+						     POWER_DOMAIN_PLLS);
+	if (!wakeref)
 		return false;
 
 	val = I915_READ(PCH_DPLL(id));
@@ -354,7 +357,7 @@ static bool ibx_pch_dpll_get_hw_state(struct drm_i915_private *dev_priv,
 	hw_state->fp0 = I915_READ(PCH_FP0(id));
 	hw_state->fp1 = I915_READ(PCH_FP1(id));
 
-	intel_display_power_put(dev_priv, POWER_DOMAIN_PLLS);
+	intel_display_power_put(dev_priv, POWER_DOMAIN_PLLS, wakeref);
 
 	return val & DPLL_VCO_ENABLE;
 }
@@ -516,15 +519,18 @@ static bool hsw_ddi_wrpll_get_hw_state(struct drm_i915_private *dev_priv,
 				       struct intel_dpll_hw_state *hw_state)
 {
 	const enum intel_dpll_id id = pll->info->id;
+	intel_wakeref_t wakeref;
 	uint32_t val;
 
-	if (!intel_display_power_get_if_enabled(dev_priv, POWER_DOMAIN_PLLS))
+	wakeref = intel_display_power_get_if_enabled(dev_priv,
+						     POWER_DOMAIN_PLLS);
+	if (!wakeref)
 		return false;
 
 	val = I915_READ(WRPLL_CTL(id));
 	hw_state->wrpll = val;
 
-	intel_display_power_put(dev_priv, POWER_DOMAIN_PLLS);
+	intel_display_power_put(dev_priv, POWER_DOMAIN_PLLS, wakeref);
 
 	return val & WRPLL_PLL_ENABLE;
 }
@@ -533,15 +539,18 @@ static bool hsw_ddi_spll_get_hw_state(struct drm_i915_private *dev_priv,
 				      struct intel_shared_dpll *pll,
 				      struct intel_dpll_hw_state *hw_state)
 {
+	intel_wakeref_t wakeref;
 	uint32_t val;
 
-	if (!intel_display_power_get_if_enabled(dev_priv, POWER_DOMAIN_PLLS))
+	wakeref = intel_display_power_get_if_enabled(dev_priv,
+						     POWER_DOMAIN_PLLS);
+	if (!wakeref)
 		return false;
 
 	val = I915_READ(SPLL_CTL);
 	hw_state->spll = val;
 
-	intel_display_power_put(dev_priv, POWER_DOMAIN_PLLS);
+	intel_display_power_put(dev_priv, POWER_DOMAIN_PLLS, wakeref);
 
 	return val & SPLL_PLL_ENABLE;
 }
@@ -996,9 +1005,12 @@ static bool skl_ddi_pll_get_hw_state(struct drm_i915_private *dev_priv,
 	uint32_t val;
 	const struct skl_dpll_regs *regs = skl_dpll_regs;
 	const enum intel_dpll_id id = pll->info->id;
+	intel_wakeref_t wakeref;
 	bool ret;
 
-	if (!intel_display_power_get_if_enabled(dev_priv, POWER_DOMAIN_PLLS))
+	wakeref = intel_display_power_get_if_enabled(dev_priv,
+						     POWER_DOMAIN_PLLS);
+	if (!wakeref)
 		return false;
 
 	ret = false;
@@ -1018,7 +1030,7 @@ static bool skl_ddi_pll_get_hw_state(struct drm_i915_private *dev_priv,
 	ret = true;
 
 out:
-	intel_display_power_put(dev_priv, POWER_DOMAIN_PLLS);
+	intel_display_power_put(dev_priv, POWER_DOMAIN_PLLS, wakeref);
 
 	return ret;
 }
@@ -1027,12 +1039,15 @@ static bool skl_ddi_dpll0_get_hw_state(struct drm_i915_private *dev_priv,
 				       struct intel_shared_dpll *pll,
 				       struct intel_dpll_hw_state *hw_state)
 {
-	uint32_t val;
 	const struct skl_dpll_regs *regs = skl_dpll_regs;
 	const enum intel_dpll_id id = pll->info->id;
+	intel_wakeref_t wakeref;
+	uint32_t val;
 	bool ret;
 
-	if (!intel_display_power_get_if_enabled(dev_priv, POWER_DOMAIN_PLLS))
+	wakeref = intel_display_power_get_if_enabled(dev_priv,
+						     POWER_DOMAIN_PLLS);
+	if (!wakeref)
 		return false;
 
 	ret = false;
@@ -1048,7 +1063,7 @@ static bool skl_ddi_dpll0_get_hw_state(struct drm_i915_private *dev_priv,
 	ret = true;
 
 out:
-	intel_display_power_put(dev_priv, POWER_DOMAIN_PLLS);
+	intel_display_power_put(dev_priv, POWER_DOMAIN_PLLS, wakeref);
 
 	return ret;
 }
@@ -1586,14 +1601,17 @@ static bool bxt_ddi_pll_get_hw_state(struct drm_i915_private *dev_priv,
 					struct intel_dpll_hw_state *hw_state)
 {
 	enum port port = (enum port)pll->info->id; /* 1:1 port->PLL mapping */
-	uint32_t val;
-	bool ret;
+	intel_wakeref_t wakeref;
 	enum dpio_phy phy;
 	enum dpio_channel ch;
+	uint32_t val;
+	bool ret;
 
 	bxt_port_to_phy_channel(dev_priv, port, &phy, &ch);
 
-	if (!intel_display_power_get_if_enabled(dev_priv, POWER_DOMAIN_PLLS))
+	wakeref = intel_display_power_get_if_enabled(dev_priv,
+						     POWER_DOMAIN_PLLS);
+	if (!wakeref)
 		return false;
 
 	ret = false;
@@ -1650,7 +1668,7 @@ static bool bxt_ddi_pll_get_hw_state(struct drm_i915_private *dev_priv,
 	ret = true;
 
 out:
-	intel_display_power_put(dev_priv, POWER_DOMAIN_PLLS);
+	intel_display_power_put(dev_priv, POWER_DOMAIN_PLLS, wakeref);
 
 	return ret;
 }
@@ -2098,10 +2116,13 @@ static bool cnl_ddi_pll_get_hw_state(struct drm_i915_private *dev_priv,
 				     struct intel_dpll_hw_state *hw_state)
 {
 	const enum intel_dpll_id id = pll->info->id;
+	intel_wakeref_t wakeref;
 	uint32_t val;
 	bool ret;
 
-	if (!intel_display_power_get_if_enabled(dev_priv, POWER_DOMAIN_PLLS))
+	wakeref = intel_display_power_get_if_enabled(dev_priv,
+						     POWER_DOMAIN_PLLS);
+	if (!wakeref)
 		return false;
 
 	ret = false;
@@ -2120,7 +2141,7 @@ static bool cnl_ddi_pll_get_hw_state(struct drm_i915_private *dev_priv,
 	ret = true;
 
 out:
-	intel_display_power_put(dev_priv, POWER_DOMAIN_PLLS);
+	intel_display_power_put(dev_priv, POWER_DOMAIN_PLLS, wakeref);
 
 	return ret;
 }
@@ -2954,11 +2975,14 @@ static bool icl_pll_get_hw_state(struct drm_i915_private *dev_priv,
 				 struct intel_dpll_hw_state *hw_state)
 {
 	const enum intel_dpll_id id = pll->info->id;
-	uint32_t val;
-	enum port port;
+	intel_wakeref_t wakeref;
 	bool ret = false;
+	enum port port;
+	uint32_t val;
 
-	if (!intel_display_power_get_if_enabled(dev_priv, POWER_DOMAIN_PLLS))
+	wakeref = intel_display_power_get_if_enabled(dev_priv,
+						     POWER_DOMAIN_PLLS);
+	if (!wakeref)
 		return false;
 
 	val = I915_READ(icl_pll_id_to_enable_reg(id));
@@ -3020,7 +3044,7 @@ static bool icl_pll_get_hw_state(struct drm_i915_private *dev_priv,
 
 	ret = true;
 out:
-	intel_display_power_put(dev_priv, POWER_DOMAIN_PLLS);
+	intel_display_power_put(dev_priv, POWER_DOMAIN_PLLS, wakeref);
 	return ret;
 }
 
diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h
index fd85a937b237..d42f36c1bbf8 100644
--- a/drivers/gpu/drm/i915/intel_drv.h
+++ b/drivers/gpu/drm/i915/intel_drv.h
@@ -1993,12 +1993,21 @@ bool intel_display_power_is_enabled(struct drm_i915_private *dev_priv,
 				    enum intel_display_power_domain domain);
 bool __intel_display_power_is_enabled(struct drm_i915_private *dev_priv,
 				      enum intel_display_power_domain domain);
-void intel_display_power_get(struct drm_i915_private *dev_priv,
-			     enum intel_display_power_domain domain);
-bool intel_display_power_get_if_enabled(struct drm_i915_private *dev_priv,
+intel_wakeref_t intel_display_power_get(struct drm_i915_private *dev_priv,
 					enum intel_display_power_domain domain);
+intel_wakeref_t
+intel_display_power_get_if_enabled(struct drm_i915_private *dev_priv,
+				   enum intel_display_power_domain domain);
+void intel_display_power_put_unchecked(struct drm_i915_private *dev_priv,
+				       enum intel_display_power_domain domain);
+#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_RUNTIME_PM)
 void intel_display_power_put(struct drm_i915_private *dev_priv,
-			     enum intel_display_power_domain domain);
+			     enum intel_display_power_domain domain,
+			     intel_wakeref_t wakeref);
+#else
+#define intel_display_power_put(i915, domain, wakeref) \
+	intel_display_power_put_unchecked(i915, domain)
+#endif
 void icl_dbuf_slices_update(struct drm_i915_private *dev_priv,
 			    u8 req_slices);
 
diff --git a/drivers/gpu/drm/i915/intel_hdmi.c b/drivers/gpu/drm/i915/intel_hdmi.c
index a2dab0b6bde6..c495f6ebe9b2 100644
--- a/drivers/gpu/drm/i915/intel_hdmi.c
+++ b/drivers/gpu/drm/i915/intel_hdmi.c
@@ -1177,15 +1177,17 @@ static bool intel_hdmi_get_hw_state(struct intel_encoder *encoder,
 {
 	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
 	struct intel_hdmi *intel_hdmi = enc_to_intel_hdmi(&encoder->base);
+	intel_wakeref_t wakeref;
 	bool ret;
 
-	if (!intel_display_power_get_if_enabled(dev_priv,
-						encoder->power_domain))
+	wakeref = intel_display_power_get_if_enabled(dev_priv,
+						     encoder->power_domain);
+	if (!wakeref)
 		return false;
 
 	ret = intel_sdvo_port_enabled(dev_priv, intel_hdmi->hdmi_reg, pipe);
 
-	intel_display_power_put(dev_priv, encoder->power_domain);
+	intel_display_power_put(dev_priv, encoder->power_domain, wakeref);
 
 	return ret;
 }
@@ -1871,11 +1873,12 @@ intel_hdmi_set_edid(struct drm_connector *connector)
 {
 	struct drm_i915_private *dev_priv = to_i915(connector->dev);
 	struct intel_hdmi *intel_hdmi = intel_attached_hdmi(connector);
+	intel_wakeref_t wakeref;
 	struct edid *edid;
 	bool connected = false;
 	struct i2c_adapter *i2c;
 
-	intel_display_power_get(dev_priv, POWER_DOMAIN_GMBUS);
+	wakeref = intel_display_power_get(dev_priv, POWER_DOMAIN_GMBUS);
 
 	i2c = intel_gmbus_get_adapter(dev_priv, intel_hdmi->ddc_bus);
 
@@ -1890,7 +1893,7 @@ intel_hdmi_set_edid(struct drm_connector *connector)
 
 	intel_hdmi_dp_dual_mode_detect(connector, edid != NULL);
 
-	intel_display_power_put(dev_priv, POWER_DOMAIN_GMBUS);
+	intel_display_power_put(dev_priv, POWER_DOMAIN_GMBUS, wakeref);
 
 	to_intel_connector(connector)->detect_edid = edid;
 	if (edid && edid->input & DRM_EDID_INPUT_DIGITAL) {
@@ -1915,11 +1918,12 @@ intel_hdmi_detect(struct drm_connector *connector, bool force)
 	struct drm_i915_private *dev_priv = to_i915(connector->dev);
 	struct intel_hdmi *intel_hdmi = intel_attached_hdmi(connector);
 	struct intel_encoder *encoder = &hdmi_to_dig_port(intel_hdmi)->base;
+	intel_wakeref_t wakeref;
 
 	DRM_DEBUG_KMS("[CONNECTOR:%d:%s]\n",
 		      connector->base.id, connector->name);
 
-	intel_display_power_get(dev_priv, POWER_DOMAIN_GMBUS);
+	wakeref = intel_display_power_get(dev_priv, POWER_DOMAIN_GMBUS);
 
 	if (IS_ICELAKE(dev_priv) &&
 	    !intel_digital_port_connected(encoder))
@@ -1931,7 +1935,7 @@ intel_hdmi_detect(struct drm_connector *connector, bool force)
 		status = connector_status_connected;
 
 out:
-	intel_display_power_put(dev_priv, POWER_DOMAIN_GMBUS);
+	intel_display_power_put(dev_priv, POWER_DOMAIN_GMBUS, wakeref);
 
 	if (status != connector_status_connected)
 		cec_notifier_phys_addr_invalidate(intel_hdmi->cec_notifier);
diff --git a/drivers/gpu/drm/i915/intel_i2c.c b/drivers/gpu/drm/i915/intel_i2c.c
index 33d87ab93fdd..5cb833f7854d 100644
--- a/drivers/gpu/drm/i915/intel_i2c.c
+++ b/drivers/gpu/drm/i915/intel_i2c.c
@@ -698,12 +698,13 @@ do_gmbus_xfer(struct i2c_adapter *adapter, struct i2c_msg *msgs, int num,
 static int
 gmbus_xfer(struct i2c_adapter *adapter, struct i2c_msg *msgs, int num)
 {
-	struct intel_gmbus *bus = container_of(adapter, struct intel_gmbus,
-					       adapter);
+	struct intel_gmbus *bus =
+		container_of(adapter, struct intel_gmbus, adapter);
 	struct drm_i915_private *dev_priv = bus->dev_priv;
+	intel_wakeref_t wakeref;
 	int ret;
 
-	intel_display_power_get(dev_priv, POWER_DOMAIN_GMBUS);
+	wakeref = intel_display_power_get(dev_priv, POWER_DOMAIN_GMBUS);
 
 	if (bus->force_bit) {
 		ret = i2c_bit_algo.master_xfer(adapter, msgs, num);
@@ -715,17 +716,16 @@ gmbus_xfer(struct i2c_adapter *adapter, struct i2c_msg *msgs, int num)
 			bus->force_bit |= GMBUS_FORCE_BIT_RETRY;
 	}
 
-	intel_display_power_put(dev_priv, POWER_DOMAIN_GMBUS);
+	intel_display_power_put(dev_priv, POWER_DOMAIN_GMBUS, wakeref);
 
 	return ret;
 }
 
 int intel_gmbus_output_aksv(struct i2c_adapter *adapter)
 {
-	struct intel_gmbus *bus = container_of(adapter, struct intel_gmbus,
-					       adapter);
+	struct intel_gmbus *bus =
+		container_of(adapter, struct intel_gmbus, adapter);
 	struct drm_i915_private *dev_priv = bus->dev_priv;
-	int ret;
 	u8 cmd = DRM_HDCP_DDC_AKSV;
 	u8 buf[DRM_HDCP_KSV_LEN] = { 0 };
 	struct i2c_msg msgs[] = {
@@ -742,8 +742,10 @@ int intel_gmbus_output_aksv(struct i2c_adapter *adapter)
 			.buf = buf,
 		}
 	};
+	intel_wakeref_t wakeref;
+	int ret;
 
-	intel_display_power_get(dev_priv, POWER_DOMAIN_GMBUS);
+	wakeref = intel_display_power_get(dev_priv, POWER_DOMAIN_GMBUS);
 	mutex_lock(&dev_priv->gmbus_mutex);
 
 	/*
@@ -754,7 +756,7 @@ int intel_gmbus_output_aksv(struct i2c_adapter *adapter)
 	ret = do_gmbus_xfer(adapter, msgs, ARRAY_SIZE(msgs), GMBUS_AKSV_SELECT);
 
 	mutex_unlock(&dev_priv->gmbus_mutex);
-	intel_display_power_put(dev_priv, POWER_DOMAIN_GMBUS);
+	intel_display_power_put(dev_priv, POWER_DOMAIN_GMBUS, wakeref);
 
 	return ret;
 }
diff --git a/drivers/gpu/drm/i915/intel_lvds.c b/drivers/gpu/drm/i915/intel_lvds.c
index f9f3b0885ba5..e22537769b53 100644
--- a/drivers/gpu/drm/i915/intel_lvds.c
+++ b/drivers/gpu/drm/i915/intel_lvds.c
@@ -104,15 +104,17 @@ static bool intel_lvds_get_hw_state(struct intel_encoder *encoder,
 {
 	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
 	struct intel_lvds_encoder *lvds_encoder = to_lvds_encoder(&encoder->base);
+	intel_wakeref_t wakeref;
 	bool ret;
 
-	if (!intel_display_power_get_if_enabled(dev_priv,
-						encoder->power_domain))
+	wakeref = intel_display_power_get_if_enabled(dev_priv,
+						     encoder->power_domain);
+	if (!wakeref)
 		return false;
 
 	ret = intel_lvds_port_enabled(dev_priv, lvds_encoder->reg, pipe);
 
-	intel_display_power_put(dev_priv, encoder->power_domain);
+	intel_display_power_put(dev_priv, encoder->power_domain, wakeref);
 
 	return ret;
 }
diff --git a/drivers/gpu/drm/i915/intel_pipe_crc.c b/drivers/gpu/drm/i915/intel_pipe_crc.c
index f3c9010e332a..f14e27ef8690 100644
--- a/drivers/gpu/drm/i915/intel_pipe_crc.c
+++ b/drivers/gpu/drm/i915/intel_pipe_crc.c
@@ -589,6 +589,7 @@ int intel_crtc_set_crc_source(struct drm_crtc *crtc, const char *source_name)
 	struct intel_pipe_crc *pipe_crc = &dev_priv->pipe_crc[crtc->index];
 	enum intel_display_power_domain power_domain;
 	enum intel_pipe_crc_source source;
+	intel_wakeref_t wakeref;
 	u32 val = 0; /* shut up gcc */
 	int ret = 0;
 
@@ -598,7 +599,8 @@ int intel_crtc_set_crc_source(struct drm_crtc *crtc, const char *source_name)
 	}
 
 	power_domain = POWER_DOMAIN_PIPE(crtc->index);
-	if (!intel_display_power_get_if_enabled(dev_priv, power_domain)) {
+	wakeref = intel_display_power_get_if_enabled(dev_priv, power_domain);
+	if (!wakeref) {
 		DRM_DEBUG_KMS("Trying to capture CRC while pipe is off\n");
 		return -EIO;
 	}
@@ -624,7 +626,7 @@ int intel_crtc_set_crc_source(struct drm_crtc *crtc, const char *source_name)
 	pipe_crc->skipped = 0;
 
 out:
-	intel_display_power_put(dev_priv, power_domain);
+	intel_display_power_put(dev_priv, power_domain, wakeref);
 
 	return ret;
 }
diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
index 1db9b8328275..a000b4894962 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -3976,16 +3976,19 @@ void skl_ddb_get_hw_state(struct drm_i915_private *dev_priv,
 		enum intel_display_power_domain power_domain;
 		enum plane_id plane_id;
 		enum pipe pipe = crtc->pipe;
+		intel_wakeref_t wakeref;
 
 		power_domain = POWER_DOMAIN_PIPE(pipe);
-		if (!intel_display_power_get_if_enabled(dev_priv, power_domain))
+		wakeref = intel_display_power_get_if_enabled(dev_priv,
+							     power_domain);
+		if (!wakeref)
 			continue;
 
 		for_each_plane_id_on_crtc(crtc, plane_id)
 			skl_ddb_get_hw_plane_state(dev_priv, pipe,
 						   plane_id, ddb);
 
-		intel_display_power_put(dev_priv, power_domain);
+		intel_display_power_put(dev_priv, power_domain, wakeref);
 	}
 }
 
diff --git a/drivers/gpu/drm/i915/intel_runtime_pm.c b/drivers/gpu/drm/i915/intel_runtime_pm.c
index b939d104b2bf..a3fa65dcc750 100644
--- a/drivers/gpu/drm/i915/intel_runtime_pm.c
+++ b/drivers/gpu/drm/i915/intel_runtime_pm.c
@@ -1766,18 +1766,19 @@ __intel_display_power_get_domain(struct drm_i915_private *dev_priv,
  * Any power domain reference obtained by this function must have a symmetric
  * call to intel_display_power_put() to release the reference again.
  */
-void intel_display_power_get(struct drm_i915_private *dev_priv,
-			     enum intel_display_power_domain domain)
+intel_wakeref_t intel_display_power_get(struct drm_i915_private *dev_priv,
+					enum intel_display_power_domain domain)
 {
 	struct i915_power_domains *power_domains = &dev_priv->power_domains;
-
-	intel_runtime_pm_get(dev_priv);
+	intel_wakeref_t wakeref = intel_runtime_pm_get(dev_priv);
 
 	mutex_lock(&power_domains->lock);
 
 	__intel_display_power_get_domain(dev_priv, domain);
 
 	mutex_unlock(&power_domains->lock);
+
+	return wakeref;
 }
 
 /**
@@ -1792,13 +1793,16 @@ void intel_display_power_get(struct drm_i915_private *dev_priv,
  * Any power domain reference obtained by this function must have a symmetric
  * call to intel_display_power_put() to release the reference again.
  */
-bool intel_display_power_get_if_enabled(struct drm_i915_private *dev_priv,
-					enum intel_display_power_domain domain)
+intel_wakeref_t
+intel_display_power_get_if_enabled(struct drm_i915_private *dev_priv,
+				   enum intel_display_power_domain domain)
 {
 	struct i915_power_domains *power_domains = &dev_priv->power_domains;
+	intel_wakeref_t wakeref;
 	bool is_enabled;
 
-	if (!intel_runtime_pm_get_if_in_use(dev_priv))
+	wakeref = intel_runtime_pm_get_if_in_use(dev_priv);
+	if (!wakeref)
 		return false;
 
 	mutex_lock(&power_domains->lock);
@@ -1812,23 +1816,16 @@ bool intel_display_power_get_if_enabled(struct drm_i915_private *dev_priv,
 
 	mutex_unlock(&power_domains->lock);
 
-	if (!is_enabled)
-		intel_runtime_pm_put_unchecked(dev_priv);
+	if (!is_enabled) {
+		intel_runtime_pm_put(dev_priv, wakeref);
+		wakeref = 0;
+	}
 
-	return is_enabled;
+	return wakeref;
 }
 
-/**
- * intel_display_power_put - release a power domain reference
- * @dev_priv: i915 device instance
- * @domain: power domain to reference
- *
- * This function drops the power domain reference obtained by
- * intel_display_power_get() and might power down the corresponding hardware
- * block right away if this is the last reference.
- */
-void intel_display_power_put(struct drm_i915_private *dev_priv,
-			     enum intel_display_power_domain domain)
+static void __intel_display_power_put(struct drm_i915_private *dev_priv,
+				      enum intel_display_power_domain domain)
 {
 	struct i915_power_domains *power_domains;
 	struct i915_power_well *power_well;
@@ -1846,10 +1843,34 @@ void intel_display_power_put(struct drm_i915_private *dev_priv,
 		intel_power_well_put(dev_priv, power_well);
 
 	mutex_unlock(&power_domains->lock);
+}
 
+/**
+ * intel_display_power_put - release a power domain reference
+ * @dev_priv: i915 device instance
+ * @domain: power domain to reference
+ *
+ * This function drops the power domain reference obtained by
+ * intel_display_power_get() and might power down the corresponding hardware
+ * block right away if this is the last reference.
+ */
+void intel_display_power_put_unchecked(struct drm_i915_private *dev_priv,
+				       enum intel_display_power_domain domain)
+{
+	__intel_display_power_put(dev_priv, domain);
 	intel_runtime_pm_put_unchecked(dev_priv);
 }
 
+#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_RUNTIME_PM)
+void intel_display_power_put(struct drm_i915_private *dev_priv,
+			     enum intel_display_power_domain domain,
+			     intel_wakeref_t wakeref)
+{
+	__intel_display_power_put(dev_priv, domain);
+	intel_runtime_pm_put(dev_priv, wakeref);
+}
+#endif
+
 #define I830_PIPES_POWER_DOMAINS (		\
 	BIT_ULL(POWER_DOMAIN_PIPE_A) |		\
 	BIT_ULL(POWER_DOMAIN_PIPE_B) |		\
@@ -4028,7 +4049,7 @@ void intel_power_domains_fini_hw(struct drm_i915_private *dev_priv)
 
 	/* Remove the refcount we took to keep power well support disabled. */
 	if (!i915_modparams.disable_power_well)
-		intel_display_power_put(dev_priv, POWER_DOMAIN_INIT);
+		intel_display_power_put_unchecked(dev_priv, POWER_DOMAIN_INIT);
 
 	intel_power_domains_verify_state(dev_priv);
 }
@@ -4047,7 +4068,7 @@ void intel_power_domains_fini_hw(struct drm_i915_private *dev_priv)
  */
 void intel_power_domains_enable(struct drm_i915_private *dev_priv)
 {
-	intel_display_power_put(dev_priv, POWER_DOMAIN_INIT);
+	intel_display_power_put_unchecked(dev_priv, POWER_DOMAIN_INIT);
 
 	intel_power_domains_verify_state(dev_priv);
 }
@@ -4082,7 +4103,7 @@ void intel_power_domains_suspend(struct drm_i915_private *dev_priv,
 {
 	struct i915_power_domains *power_domains = &dev_priv->power_domains;
 
-	intel_display_power_put(dev_priv, POWER_DOMAIN_INIT);
+	intel_display_power_put_unchecked(dev_priv, POWER_DOMAIN_INIT);
 
 	/*
 	 * In case of suspend-to-idle (aka S0ix) on a DMC platform without DC9
@@ -4103,7 +4124,7 @@ void intel_power_domains_suspend(struct drm_i915_private *dev_priv,
 	 * power wells if power domains must be deinitialized for suspend.
 	 */
 	if (!i915_modparams.disable_power_well) {
-		intel_display_power_put(dev_priv, POWER_DOMAIN_INIT);
+		intel_display_power_put_unchecked(dev_priv, POWER_DOMAIN_INIT);
 		intel_power_domains_verify_state(dev_priv);
 	}
 
diff --git a/drivers/gpu/drm/i915/intel_sprite.c b/drivers/gpu/drm/i915/intel_sprite.c
index d4c8e10fc90b..73c3f4289762 100644
--- a/drivers/gpu/drm/i915/intel_sprite.c
+++ b/drivers/gpu/drm/i915/intel_sprite.c
@@ -405,17 +405,19 @@ skl_plane_get_hw_state(struct intel_plane *plane,
 	struct drm_i915_private *dev_priv = to_i915(plane->base.dev);
 	enum intel_display_power_domain power_domain;
 	enum plane_id plane_id = plane->id;
+	intel_wakeref_t wakeref;
 	bool ret;
 
 	power_domain = POWER_DOMAIN_PIPE(plane->pipe);
-	if (!intel_display_power_get_if_enabled(dev_priv, power_domain))
+	wakeref = intel_display_power_get_if_enabled(dev_priv, power_domain);
+	if (!wakeref)
 		return false;
 
 	ret = I915_READ(PLANE_CTL(plane->pipe, plane_id)) & PLANE_CTL_ENABLE;
 
 	*pipe = plane->pipe;
 
-	intel_display_power_put(dev_priv, power_domain);
+	intel_display_power_put(dev_priv, power_domain, wakeref);
 
 	return ret;
 }
@@ -669,17 +671,19 @@ vlv_plane_get_hw_state(struct intel_plane *plane,
 	struct drm_i915_private *dev_priv = to_i915(plane->base.dev);
 	enum intel_display_power_domain power_domain;
 	enum plane_id plane_id = plane->id;
+	intel_wakeref_t wakeref;
 	bool ret;
 
 	power_domain = POWER_DOMAIN_PIPE(plane->pipe);
-	if (!intel_display_power_get_if_enabled(dev_priv, power_domain))
+	wakeref = intel_display_power_get_if_enabled(dev_priv, power_domain);
+	if (!wakeref)
 		return false;
 
 	ret = I915_READ(SPCNTR(plane->pipe, plane_id)) & SP_ENABLE;
 
 	*pipe = plane->pipe;
 
-	intel_display_power_put(dev_priv, power_domain);
+	intel_display_power_put(dev_priv, power_domain, wakeref);
 
 	return ret;
 }
@@ -836,17 +840,19 @@ ivb_plane_get_hw_state(struct intel_plane *plane,
 {
 	struct drm_i915_private *dev_priv = to_i915(plane->base.dev);
 	enum intel_display_power_domain power_domain;
+	intel_wakeref_t wakeref;
 	bool ret;
 
 	power_domain = POWER_DOMAIN_PIPE(plane->pipe);
-	if (!intel_display_power_get_if_enabled(dev_priv, power_domain))
+	wakeref = intel_display_power_get_if_enabled(dev_priv, power_domain);
+	if (!wakeref)
 		return false;
 
 	ret =  I915_READ(SPRCTL(plane->pipe)) & SPRITE_ENABLE;
 
 	*pipe = plane->pipe;
 
-	intel_display_power_put(dev_priv, power_domain);
+	intel_display_power_put(dev_priv, power_domain, wakeref);
 
 	return ret;
 }
@@ -1002,17 +1008,19 @@ g4x_plane_get_hw_state(struct intel_plane *plane,
 {
 	struct drm_i915_private *dev_priv = to_i915(plane->base.dev);
 	enum intel_display_power_domain power_domain;
+	intel_wakeref_t wakeref;
 	bool ret;
 
 	power_domain = POWER_DOMAIN_PIPE(plane->pipe);
-	if (!intel_display_power_get_if_enabled(dev_priv, power_domain))
+	wakeref = intel_display_power_get_if_enabled(dev_priv, power_domain);
+	if (!wakeref)
 		return false;
 
 	ret = I915_READ(DVSCNTR(plane->pipe)) & DVS_ENABLE;
 
 	*pipe = plane->pipe;
 
-	intel_display_power_put(dev_priv, power_domain);
+	intel_display_power_put(dev_priv, power_domain, wakeref);
 
 	return ret;
 }
diff --git a/drivers/gpu/drm/i915/vlv_dsi.c b/drivers/gpu/drm/i915/vlv_dsi.c
index 435a2c35ee8c..6ece550f4c1c 100644
--- a/drivers/gpu/drm/i915/vlv_dsi.c
+++ b/drivers/gpu/drm/i915/vlv_dsi.c
@@ -1020,13 +1020,15 @@ static bool intel_dsi_get_hw_state(struct intel_encoder *encoder,
 {
 	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
 	struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base);
+	intel_wakeref_t wakeref;
 	enum port port;
 	bool active = false;
 
 	DRM_DEBUG_KMS("\n");
 
-	if (!intel_display_power_get_if_enabled(dev_priv,
-						encoder->power_domain))
+	wakeref = intel_display_power_get_if_enabled(dev_priv,
+						     encoder->power_domain);
+	if (!wakeref)
 		return false;
 
 	/*
@@ -1082,7 +1084,7 @@ static bool intel_dsi_get_hw_state(struct intel_encoder *encoder,
 	}
 
 out_put_power:
-	intel_display_power_put(dev_priv, encoder->power_domain);
+	intel_display_power_put(dev_priv, encoder->power_domain, wakeref);
 
 	return active;
 }
-- 
2.19.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 106+ messages in thread

* [PATCH 24/40] drm/i915: Track the wakeref used to initialise display power domains
  2018-09-19 19:55 [PATCH 01/40] drm: Use default dma_fence hooks where possible for null syncobj Chris Wilson
                   ` (21 preceding siblings ...)
  2018-09-19 19:55 ` [PATCH 23/40] drm/i915: Markup paired operations on display power domains Chris Wilson
@ 2018-09-19 19:55 ` Chris Wilson
  2018-09-19 19:55 ` [PATCH 25/40] drm/i915/dp: Markup pps lock power well Chris Wilson
                   ` (19 subsequent siblings)
  42 siblings, 0 replies; 106+ messages in thread
From: Chris Wilson @ 2018-09-19 19:55 UTC (permalink / raw)
  To: intel-gfx

On module load and unload, we grab the POWER_DOMAIN_INIT powerwells and
transfer them to the runtime-pm code. We can use our wakeref tracking to
verify that the wakeref is indeed passed from init to enable, and
disable to fini; and across suspend.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_debugfs.c     |   3 +
 drivers/gpu/drm/i915/i915_drv.h         |   2 +
 drivers/gpu/drm/i915/intel_runtime_pm.c | 146 +++++++++++++-----------
 3 files changed, 85 insertions(+), 66 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index 844dceae62de..b34fb0c6bd0e 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -2695,6 +2695,9 @@ static int i915_runtime_pm_status(struct seq_file *m, void *unused)
 	if (!HAS_RUNTIME_PM(dev_priv))
 		seq_puts(m, "Runtime power management not supported\n");
 
+	seq_printf(m, "Runtime power management: %s\n",
+		   enableddisabled(!dev_priv->power_domains.wakeref));
+
 	seq_printf(m, "GPU idle: %s (epoch %u)\n",
 		   yesno(!dev_priv->gt.awake), dev_priv->gt.epoch);
 	seq_printf(m, "IRQs disabled: %s\n",
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 881ecf416d7d..30934abdbd14 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -944,6 +944,8 @@ struct i915_power_domains {
 	bool display_core_suspended;
 	int power_well_count;
 
+	intel_wakeref_t wakeref;
+
 	struct mutex lock;
 	int domain_use_count[POWER_DOMAIN_NUM];
 	struct i915_power_well *power_wells;
diff --git a/drivers/gpu/drm/i915/intel_runtime_pm.c b/drivers/gpu/drm/i915/intel_runtime_pm.c
index a3fa65dcc750..ab897443219b 100644
--- a/drivers/gpu/drm/i915/intel_runtime_pm.c
+++ b/drivers/gpu/drm/i915/intel_runtime_pm.c
@@ -3978,7 +3978,7 @@ static void intel_power_domains_verify_state(struct drm_i915_private *dev_priv);
 
 /**
  * intel_power_domains_init_hw - initialize hardware power domain state
- * @dev_priv: i915 device instance
+ * @i915: i915 device instance
  * @resume: Called from resume code paths or not
  *
  * This function initializes the hardware power domain state and enables all
@@ -3992,27 +3992,27 @@ static void intel_power_domains_verify_state(struct drm_i915_private *dev_priv);
  * intel_power_domains_enable()) and must be paired with
  * intel_power_domains_fini_hw().
  */
-void intel_power_domains_init_hw(struct drm_i915_private *dev_priv, bool resume)
+void intel_power_domains_init_hw(struct drm_i915_private *i915, bool resume)
 {
-	struct i915_power_domains *power_domains = &dev_priv->power_domains;
+	struct i915_power_domains *power_domains = &i915->power_domains;
 
 	power_domains->initializing = true;
 
-	if (IS_ICELAKE(dev_priv)) {
-		icl_display_core_init(dev_priv, resume);
-	} else if (IS_CANNONLAKE(dev_priv)) {
-		cnl_display_core_init(dev_priv, resume);
-	} else if (IS_GEN9_BC(dev_priv)) {
-		skl_display_core_init(dev_priv, resume);
-	} else if (IS_GEN9_LP(dev_priv)) {
-		bxt_display_core_init(dev_priv, resume);
-	} else if (IS_CHERRYVIEW(dev_priv)) {
+	if (IS_ICELAKE(i915)) {
+		icl_display_core_init(i915, resume);
+	} else if (IS_CANNONLAKE(i915)) {
+		cnl_display_core_init(i915, resume);
+	} else if (IS_GEN9_BC(i915)) {
+		skl_display_core_init(i915, resume);
+	} else if (IS_GEN9_LP(i915)) {
+		bxt_display_core_init(i915, resume);
+	} else if (IS_CHERRYVIEW(i915)) {
 		mutex_lock(&power_domains->lock);
-		chv_phy_control_init(dev_priv);
+		chv_phy_control_init(i915);
 		mutex_unlock(&power_domains->lock);
-	} else if (IS_VALLEYVIEW(dev_priv)) {
+	} else if (IS_VALLEYVIEW(i915)) {
 		mutex_lock(&power_domains->lock);
-		vlv_cmnlane_wa(dev_priv);
+		vlv_cmnlane_wa(i915);
 		mutex_unlock(&power_domains->lock);
 	}
 
@@ -4022,18 +4022,20 @@ void intel_power_domains_init_hw(struct drm_i915_private *dev_priv, bool resume)
 	 * resources powered until display HW readout is complete. We drop
 	 * this reference in intel_power_domains_enable().
 	 */
-	intel_display_power_get(dev_priv, POWER_DOMAIN_INIT);
+	power_domains->wakeref =
+		intel_display_power_get(i915, POWER_DOMAIN_INIT);
+
 	/* Disable power support if the user asked so. */
 	if (!i915_modparams.disable_power_well)
-		intel_display_power_get(dev_priv, POWER_DOMAIN_INIT);
-	intel_power_domains_sync_hw(dev_priv);
+		intel_display_power_get(i915, POWER_DOMAIN_INIT);
+	intel_power_domains_sync_hw(i915);
 
 	power_domains->initializing = false;
 }
 
 /**
  * intel_power_domains_fini_hw - deinitialize hw power domain state
- * @dev_priv: i915 device instance
+ * @i915: i915 device instance
  *
  * De-initializes the display power domain HW state. It also ensures that the
  * device stays powered up so that the driver can be reloaded.
@@ -4042,21 +4044,24 @@ void intel_power_domains_init_hw(struct drm_i915_private *dev_priv, bool resume)
  * intel_power_domains_disable()) and must be paired with
  * intel_power_domains_init_hw().
  */
-void intel_power_domains_fini_hw(struct drm_i915_private *dev_priv)
+void intel_power_domains_fini_hw(struct drm_i915_private *i915)
 {
-	/* Keep the power well enabled, but cancel its rpm wakeref. */
-	intel_runtime_pm_put_unchecked(dev_priv);
+	intel_wakeref_t wakeref __maybe_unused =
+		fetch_and_zero(&i915->power_domains.wakeref);
 
 	/* Remove the refcount we took to keep power well support disabled. */
 	if (!i915_modparams.disable_power_well)
-		intel_display_power_put_unchecked(dev_priv, POWER_DOMAIN_INIT);
+		intel_display_power_put_unchecked(i915, POWER_DOMAIN_INIT);
+
+	intel_power_domains_verify_state(i915);
 
-	intel_power_domains_verify_state(dev_priv);
+	/* Keep the power well enabled, but cancel its rpm wakeref. */
+	intel_runtime_pm_put(i915, wakeref);
 }
 
 /**
  * intel_power_domains_enable - enable toggling of display power wells
- * @dev_priv: i915 device instance
+ * @i915: i915 device instance
  *
  * Enable the ondemand enabling/disabling of the display power wells. Note that
  * power wells not belonging to POWER_DOMAIN_INIT are allowed to be toggled
@@ -4066,30 +4071,36 @@ void intel_power_domains_fini_hw(struct drm_i915_private *dev_priv)
  * of display HW readout (which will acquire the power references reflecting
  * the current HW state).
  */
-void intel_power_domains_enable(struct drm_i915_private *dev_priv)
+void intel_power_domains_enable(struct drm_i915_private *i915)
 {
-	intel_display_power_put_unchecked(dev_priv, POWER_DOMAIN_INIT);
+	intel_wakeref_t wakeref __maybe_unused =
+		fetch_and_zero(&i915->power_domains.wakeref);
 
-	intel_power_domains_verify_state(dev_priv);
+	intel_display_power_put(i915, POWER_DOMAIN_INIT, wakeref);
+	intel_power_domains_verify_state(i915);
 }
 
 /**
  * intel_power_domains_disable - disable toggling of display power wells
- * @dev_priv: i915 device instance
+ * @i915: i915 device instance
  *
  * Disable the ondemand enabling/disabling of the display power wells. See
  * intel_power_domains_enable() for which power wells this call controls.
  */
-void intel_power_domains_disable(struct drm_i915_private *dev_priv)
+void intel_power_domains_disable(struct drm_i915_private *i915)
 {
-	intel_display_power_get(dev_priv, POWER_DOMAIN_INIT);
+	struct i915_power_domains *power_domains = &i915->power_domains;
+
+	WARN_ON(power_domains->wakeref);
+	power_domains->wakeref =
+		intel_display_power_get(i915, POWER_DOMAIN_INIT);
 
-	intel_power_domains_verify_state(dev_priv);
+	intel_power_domains_verify_state(i915);
 }
 
 /**
  * intel_power_domains_suspend - suspend power domain state
- * @dev_priv: i915 device instance
+ * @i915: i915 device instance
  * @suspend_mode: specifies the target suspend state (idle, mem, hibernation)
  *
  * This function prepares the hardware power domain state before entering
@@ -4098,12 +4109,14 @@ void intel_power_domains_disable(struct drm_i915_private *dev_priv)
  * It must be called with power domains already disabled (after a call to
  * intel_power_domains_disable()) and paired with intel_power_domains_resume().
  */
-void intel_power_domains_suspend(struct drm_i915_private *dev_priv,
+void intel_power_domains_suspend(struct drm_i915_private *i915,
 				 enum i915_drm_suspend_mode suspend_mode)
 {
-	struct i915_power_domains *power_domains = &dev_priv->power_domains;
+	struct i915_power_domains *power_domains = &i915->power_domains;
+	intel_wakeref_t wakeref __maybe_unused =
+		fetch_and_zero(&power_domains->wakeref);
 
-	intel_display_power_put_unchecked(dev_priv, POWER_DOMAIN_INIT);
+	intel_display_power_put(i915, POWER_DOMAIN_INIT, wakeref);
 
 	/*
 	 * In case of suspend-to-idle (aka S0ix) on a DMC platform without DC9
@@ -4112,10 +4125,10 @@ void intel_power_domains_suspend(struct drm_i915_private *dev_priv,
 	 * resources as required and also enable deeper system power states
 	 * that would be blocked if the firmware was inactive.
 	 */
-	if (!(dev_priv->csr.allowed_dc_mask & DC_STATE_EN_DC9) &&
+	if (!(i915->csr.allowed_dc_mask & DC_STATE_EN_DC9) &&
 	    suspend_mode == I915_DRM_SUSPEND_IDLE &&
-	    dev_priv->csr.dmc_payload != NULL) {
-		intel_power_domains_verify_state(dev_priv);
+	    i915->csr.dmc_payload) {
+		intel_power_domains_verify_state(i915);
 		return;
 	}
 
@@ -4124,25 +4137,25 @@ void intel_power_domains_suspend(struct drm_i915_private *dev_priv,
 	 * power wells if power domains must be deinitialized for suspend.
 	 */
 	if (!i915_modparams.disable_power_well) {
-		intel_display_power_put_unchecked(dev_priv, POWER_DOMAIN_INIT);
-		intel_power_domains_verify_state(dev_priv);
+		intel_display_power_put_unchecked(i915, POWER_DOMAIN_INIT);
+		intel_power_domains_verify_state(i915);
 	}
 
-	if (IS_ICELAKE(dev_priv))
-		icl_display_core_uninit(dev_priv);
-	else if (IS_CANNONLAKE(dev_priv))
-		cnl_display_core_uninit(dev_priv);
-	else if (IS_GEN9_BC(dev_priv))
-		skl_display_core_uninit(dev_priv);
-	else if (IS_GEN9_LP(dev_priv))
-		bxt_display_core_uninit(dev_priv);
+	if (IS_ICELAKE(i915))
+		icl_display_core_uninit(i915);
+	else if (IS_CANNONLAKE(i915))
+		cnl_display_core_uninit(i915);
+	else if (IS_GEN9_BC(i915))
+		skl_display_core_uninit(i915);
+	else if (IS_GEN9_LP(i915))
+		bxt_display_core_uninit(i915);
 
 	power_domains->display_core_suspended = true;
 }
 
 /**
  * intel_power_domains_resume - resume power domain state
- * @dev_priv: i915 device instance
+ * @i915: i915 device instance
  *
  * This function resume the hardware power domain state during system resume.
  *
@@ -4150,28 +4163,30 @@ void intel_power_domains_suspend(struct drm_i915_private *dev_priv,
  * intel_power_domains_enable()) and must be paired with
  * intel_power_domains_suspend().
  */
-void intel_power_domains_resume(struct drm_i915_private *dev_priv)
+void intel_power_domains_resume(struct drm_i915_private *i915)
 {
-	struct i915_power_domains *power_domains = &dev_priv->power_domains;
+	struct i915_power_domains *power_domains = &i915->power_domains;
 
 	if (power_domains->display_core_suspended) {
-		intel_power_domains_init_hw(dev_priv, true);
+		intel_power_domains_init_hw(i915, true);
 		power_domains->display_core_suspended = false;
 	} else {
-		intel_display_power_get(dev_priv, POWER_DOMAIN_INIT);
+		WARN_ON(power_domains->wakeref);
+		power_domains->wakeref =
+			intel_display_power_get(i915, POWER_DOMAIN_INIT);
 	}
 
-	intel_power_domains_verify_state(dev_priv);
+	intel_power_domains_verify_state(i915);
 }
 
 #if IS_ENABLED(CONFIG_DRM_I915_DEBUG_RUNTIME_PM)
 
-static void intel_power_domains_dump_info(struct drm_i915_private *dev_priv)
+static void intel_power_domains_dump_info(struct drm_i915_private *i915)
 {
-	struct i915_power_domains *power_domains = &dev_priv->power_domains;
+	struct i915_power_domains *power_domains = &i915->power_domains;
 	struct i915_power_well *power_well;
 
-	for_each_power_well(dev_priv, power_well) {
+	for_each_power_well(i915, power_well) {
 		enum intel_display_power_domain domain;
 
 		DRM_DEBUG_DRIVER("%-25s %d\n",
@@ -4186,7 +4201,7 @@ static void intel_power_domains_dump_info(struct drm_i915_private *dev_priv)
 
 /**
  * intel_power_domains_verify_state - verify the HW/SW state for all power wells
- * @dev_priv: i915 device instance
+ * @i915: i915 device instance
  *
  * Verify if the reference count of each power well matches its HW enabled
  * state and the total refcount of the domains it belongs to. This must be
@@ -4194,16 +4209,16 @@ static void intel_power_domains_dump_info(struct drm_i915_private *dev_priv)
  * acquiring reference counts for any power wells in use and disabling the
  * ones left on by BIOS but not required by any active output.
  */
-static void intel_power_domains_verify_state(struct drm_i915_private *dev_priv)
+static void intel_power_domains_verify_state(struct drm_i915_private *i915)
 {
-	struct i915_power_domains *power_domains = &dev_priv->power_domains;
+	struct i915_power_domains *power_domains = &i915->power_domains;
 	struct i915_power_well *power_well;
 	bool dump_domain_info;
 
 	mutex_lock(&power_domains->lock);
 
 	dump_domain_info = false;
-	for_each_power_well(dev_priv, power_well) {
+	for_each_power_well(i915, power_well) {
 		enum intel_display_power_domain domain;
 		int domains_count;
 		bool enabled;
@@ -4216,8 +4231,7 @@ static void intel_power_domains_verify_state(struct drm_i915_private *dev_priv)
 		if (!power_well->desc->domains)
 			continue;
 
-		enabled = power_well->desc->ops->is_enabled(dev_priv,
-							    power_well);
+		enabled = power_well->desc->ops->is_enabled(i915, power_well);
 		if ((power_well->count || power_well->desc->always_on) !=
 		    enabled)
 			DRM_ERROR("power well %s state mismatch (refcount %d/enabled %d)",
@@ -4241,7 +4255,7 @@ static void intel_power_domains_verify_state(struct drm_i915_private *dev_priv)
 		static bool dumped;
 
 		if (!dumped) {
-			intel_power_domains_dump_info(dev_priv);
+			intel_power_domains_dump_info(i915);
 			dumped = true;
 		}
 	}
@@ -4251,7 +4265,7 @@ static void intel_power_domains_verify_state(struct drm_i915_private *dev_priv)
 
 #else
 
-static void intel_power_domains_verify_state(struct drm_i915_private *dev_priv)
+static void intel_power_domains_verify_state(struct drm_i915_private *i915)
 {
 }
 
-- 
2.19.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 106+ messages in thread

* [PATCH 25/40] drm/i915/dp: Markup pps lock power well
  2018-09-19 19:55 [PATCH 01/40] drm: Use default dma_fence hooks where possible for null syncobj Chris Wilson
                   ` (22 preceding siblings ...)
  2018-09-19 19:55 ` [PATCH 24/40] drm/i915: Track the wakeref used to initialise " Chris Wilson
@ 2018-09-19 19:55 ` Chris Wilson
  2018-09-19 19:55 ` [PATCH 26/40] drm/i915: Complain if hsw_get_pipe_config acquires the same power well twice Chris Wilson
                   ` (18 subsequent siblings)
  42 siblings, 0 replies; 106+ messages in thread
From: Chris Wilson @ 2018-09-19 19:55 UTC (permalink / raw)
  To: intel-gfx

Track where and when we acquire and release the power well for pps
access along the dp aux link, with a view to detecting if we leak any
wakerefs.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/intel_dp.c | 226 +++++++++++++++++---------------
 1 file changed, 118 insertions(+), 108 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c
index 01a07711d395..82481c8993fe 100644
--- a/drivers/gpu/drm/i915/intel_dp.c
+++ b/drivers/gpu/drm/i915/intel_dp.c
@@ -652,28 +652,36 @@ intel_dp_init_panel_power_sequencer_registers(struct intel_dp *intel_dp,
 static void
 intel_dp_pps_init(struct intel_dp *intel_dp);
 
-static void pps_lock(struct intel_dp *intel_dp)
+static intel_wakeref_t
+pps_lock(struct intel_dp *intel_dp)
 {
 	struct drm_i915_private *dev_priv = dp_to_i915(intel_dp);
+	intel_wakeref_t wakeref;
 
 	/*
 	 * See intel_power_sequencer_reset() why we need
 	 * a power domain reference here.
 	 */
-	intel_display_power_get(dev_priv, intel_dp->aux_power_domain);
-
+	wakeref = intel_display_power_get(dev_priv, intel_dp->aux_power_domain);
 	mutex_lock(&dev_priv->pps_mutex);
+
+	return wakeref;
 }
 
-static void pps_unlock(struct intel_dp *intel_dp)
+static intel_wakeref_t
+pps_unlock(struct intel_dp *intel_dp, intel_wakeref_t wakeref)
 {
 	struct drm_i915_private *dev_priv = dp_to_i915(intel_dp);
 
 	mutex_unlock(&dev_priv->pps_mutex);
+	intel_display_power_put(dev_priv, intel_dp->aux_power_domain, wakeref);
 
-	intel_display_power_put_unchecked(dev_priv, intel_dp->aux_power_domain);
+	return 0;
 }
 
+#define with_pps_lock(dp, wf) \
+	for (wf = pps_lock(dp); wf; wf = pps_unlock(dp, wf))
+
 static void
 vlv_power_sequencer_kick(struct intel_dp *intel_dp)
 {
@@ -1022,30 +1030,30 @@ static int edp_notify_handler(struct notifier_block *this, unsigned long code,
 	struct intel_dp *intel_dp = container_of(this, typeof(* intel_dp),
 						 edp_notifier);
 	struct drm_i915_private *dev_priv = dp_to_i915(intel_dp);
+	intel_wakeref_t wakeref;
 
 	if (!intel_dp_is_edp(intel_dp) || code != SYS_RESTART)
 		return 0;
 
-	pps_lock(intel_dp);
-
-	if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) {
-		enum pipe pipe = vlv_power_sequencer_pipe(intel_dp);
-		i915_reg_t pp_ctrl_reg, pp_div_reg;
-		u32 pp_div;
-
-		pp_ctrl_reg = PP_CONTROL(pipe);
-		pp_div_reg  = PP_DIVISOR(pipe);
-		pp_div = I915_READ(pp_div_reg);
-		pp_div &= PP_REFERENCE_DIVIDER_MASK;
-
-		/* 0x1F write to PP_DIV_REG sets max cycle delay */
-		I915_WRITE(pp_div_reg, pp_div | 0x1F);
-		I915_WRITE(pp_ctrl_reg, PANEL_UNLOCK_REGS | PANEL_POWER_OFF);
-		msleep(intel_dp->panel_power_cycle_delay);
+	with_pps_lock(intel_dp, wakeref) {
+		if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) {
+			enum pipe pipe = vlv_power_sequencer_pipe(intel_dp);
+			i915_reg_t pp_ctrl_reg, pp_div_reg;
+			u32 pp_div;
+
+			pp_ctrl_reg = PP_CONTROL(pipe);
+			pp_div_reg  = PP_DIVISOR(pipe);
+			pp_div = I915_READ(pp_div_reg);
+			pp_div &= PP_REFERENCE_DIVIDER_MASK;
+
+			/* 0x1F write to PP_DIV_REG sets max cycle delay */
+			I915_WRITE(pp_div_reg, pp_div | 0x1F);
+			I915_WRITE(pp_ctrl_reg,
+				   PANEL_UNLOCK_REGS | PANEL_POWER_OFF);
+			msleep(intel_dp->panel_power_cycle_delay);
+		}
 	}
 
-	pps_unlock(intel_dp);
-
 	return 0;
 }
 
@@ -1231,16 +1239,17 @@ intel_dp_aux_xfer(struct intel_dp *intel_dp,
 			to_i915(intel_dig_port->base.base.dev);
 	i915_reg_t ch_ctl, ch_data[5];
 	uint32_t aux_clock_divider;
+	intel_wakeref_t wakeref;
 	int i, ret, recv_bytes;
-	uint32_t status;
 	int try, clock = 0;
+	uint32_t status;
 	bool vdd;
 
 	ch_ctl = intel_dp->aux_ch_ctl_reg(intel_dp);
 	for (i = 0; i < ARRAY_SIZE(ch_data); i++)
 		ch_data[i] = intel_dp->aux_ch_data_reg(intel_dp, i);
 
-	pps_lock(intel_dp);
+	wakeref = pps_lock(intel_dp);
 
 	/*
 	 * We will be called with VDD already enabled for dpcd/edid/oui reads.
@@ -1384,7 +1393,7 @@ intel_dp_aux_xfer(struct intel_dp *intel_dp,
 	if (vdd)
 		edp_panel_vdd_off(intel_dp, false);
 
-	pps_unlock(intel_dp);
+	pps_unlock(intel_dp, wakeref);
 
 	return ret;
 }
@@ -2347,15 +2356,15 @@ static bool edp_panel_vdd_on(struct intel_dp *intel_dp)
  */
 void intel_edp_panel_vdd_on(struct intel_dp *intel_dp)
 {
+	intel_wakeref_t wakeref;
 	bool vdd;
 
 	if (!intel_dp_is_edp(intel_dp))
 		return;
 
-	pps_lock(intel_dp);
-	vdd = edp_panel_vdd_on(intel_dp);
-	pps_unlock(intel_dp);
-
+	vdd = false;
+	with_pps_lock(intel_dp, wakeref)
+		vdd = edp_panel_vdd_on(intel_dp);
 	I915_STATE_WARN(!vdd, "eDP port %c VDD already requested on\n",
 	     port_name(dp_to_dig_port(intel_dp)->base.port));
 }
@@ -2399,13 +2408,15 @@ static void edp_panel_vdd_off_sync(struct intel_dp *intel_dp)
 
 static void edp_panel_vdd_work(struct work_struct *__work)
 {
-	struct intel_dp *intel_dp = container_of(to_delayed_work(__work),
-						 struct intel_dp, panel_vdd_work);
+	struct intel_dp *intel_dp =
+		container_of(to_delayed_work(__work),
+			     struct intel_dp, panel_vdd_work);
+	intel_wakeref_t wakeref;
 
-	pps_lock(intel_dp);
-	if (!intel_dp->want_panel_vdd)
-		edp_panel_vdd_off_sync(intel_dp);
-	pps_unlock(intel_dp);
+	with_pps_lock(intel_dp, wakeref) {
+		if (!intel_dp->want_panel_vdd)
+			edp_panel_vdd_off_sync(intel_dp);
+	}
 }
 
 static void edp_panel_vdd_schedule_off(struct intel_dp *intel_dp)
@@ -2495,12 +2506,13 @@ static void edp_panel_on(struct intel_dp *intel_dp)
 
 void intel_edp_panel_on(struct intel_dp *intel_dp)
 {
+	intel_wakeref_t wakeref;
+
 	if (!intel_dp_is_edp(intel_dp))
 		return;
 
-	pps_lock(intel_dp);
-	edp_panel_on(intel_dp);
-	pps_unlock(intel_dp);
+	with_pps_lock(intel_dp, wakeref)
+		edp_panel_on(intel_dp);
 }
 
 
@@ -2543,20 +2555,20 @@ static void edp_panel_off(struct intel_dp *intel_dp)
 
 void intel_edp_panel_off(struct intel_dp *intel_dp)
 {
+	intel_wakeref_t wakeref;
+
 	if (!intel_dp_is_edp(intel_dp))
 		return;
 
-	pps_lock(intel_dp);
-	edp_panel_off(intel_dp);
-	pps_unlock(intel_dp);
+	with_pps_lock(intel_dp, wakeref)
+		edp_panel_off(intel_dp);
 }
 
 /* Enable backlight in the panel power control. */
 static void _intel_edp_backlight_on(struct intel_dp *intel_dp)
 {
 	struct drm_i915_private *dev_priv = dp_to_i915(intel_dp);
-	u32 pp;
-	i915_reg_t pp_ctrl_reg;
+	intel_wakeref_t wakeref;
 
 	/*
 	 * If we enable the backlight right away following a panel power
@@ -2566,17 +2578,16 @@ static void _intel_edp_backlight_on(struct intel_dp *intel_dp)
 	 */
 	wait_backlight_on(intel_dp);
 
-	pps_lock(intel_dp);
-
-	pp = ironlake_get_pp_control(intel_dp);
-	pp |= EDP_BLC_ENABLE;
-
-	pp_ctrl_reg = _pp_ctrl_reg(intel_dp);
+	with_pps_lock(intel_dp, wakeref) {
+		i915_reg_t pp_ctrl_reg = _pp_ctrl_reg(intel_dp);
+		u32 pp;
 
-	I915_WRITE(pp_ctrl_reg, pp);
-	POSTING_READ(pp_ctrl_reg);
+		pp = ironlake_get_pp_control(intel_dp);
+		pp |= EDP_BLC_ENABLE;
 
-	pps_unlock(intel_dp);
+		I915_WRITE(pp_ctrl_reg, pp);
+		POSTING_READ(pp_ctrl_reg);
+	}
 }
 
 /* Enable backlight PWM and backlight PP control. */
@@ -2598,23 +2609,21 @@ void intel_edp_backlight_on(const struct intel_crtc_state *crtc_state,
 static void _intel_edp_backlight_off(struct intel_dp *intel_dp)
 {
 	struct drm_i915_private *dev_priv = dp_to_i915(intel_dp);
-	u32 pp;
-	i915_reg_t pp_ctrl_reg;
+	intel_wakeref_t wakeref;
 
 	if (!intel_dp_is_edp(intel_dp))
 		return;
 
-	pps_lock(intel_dp);
+	with_pps_lock(intel_dp, wakeref) {
+		i915_reg_t pp_ctrl_reg = _pp_ctrl_reg(intel_dp);
+		u32 pp;
 
-	pp = ironlake_get_pp_control(intel_dp);
-	pp &= ~EDP_BLC_ENABLE;
-
-	pp_ctrl_reg = _pp_ctrl_reg(intel_dp);
-
-	I915_WRITE(pp_ctrl_reg, pp);
-	POSTING_READ(pp_ctrl_reg);
+		pp = ironlake_get_pp_control(intel_dp);
+		pp &= ~EDP_BLC_ENABLE;
 
-	pps_unlock(intel_dp);
+		I915_WRITE(pp_ctrl_reg, pp);
+		POSTING_READ(pp_ctrl_reg);
+	}
 
 	intel_dp->last_backlight_off = jiffies;
 	edp_wait_backlight_off(intel_dp);
@@ -2642,12 +2651,12 @@ static void intel_edp_backlight_power(struct intel_connector *connector,
 				      bool enable)
 {
 	struct intel_dp *intel_dp = intel_attached_dp(&connector->base);
+	intel_wakeref_t wakeref;
 	bool is_enabled;
 
-	pps_lock(intel_dp);
-	is_enabled = ironlake_get_pp_control(intel_dp) & EDP_BLC_ENABLE;
-	pps_unlock(intel_dp);
-
+	is_enabled = false;
+	with_pps_lock(intel_dp, wakeref)
+		is_enabled = ironlake_get_pp_control(intel_dp) & EDP_BLC_ENABLE;
 	if (is_enabled == enable)
 		return;
 
@@ -3141,22 +3150,21 @@ static void intel_enable_dp(struct intel_encoder *encoder,
 	struct intel_crtc *crtc = to_intel_crtc(pipe_config->base.crtc);
 	uint32_t dp_reg = I915_READ(intel_dp->output_reg);
 	enum pipe pipe = crtc->pipe;
+	intel_wakeref_t wakeref;
 
 	if (WARN_ON(dp_reg & DP_PORT_EN))
 		return;
 
-	pps_lock(intel_dp);
-
-	if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv))
-		vlv_init_panel_power_sequencer(encoder, pipe_config);
-
-	intel_dp_enable_port(intel_dp, pipe_config);
+	with_pps_lock(intel_dp, wakeref) {
+		if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv))
+			vlv_init_panel_power_sequencer(encoder, pipe_config);
 
-	edp_panel_vdd_on(intel_dp);
-	edp_panel_on(intel_dp);
-	edp_panel_vdd_off(intel_dp, true);
+		intel_dp_enable_port(intel_dp, pipe_config);
 
-	pps_unlock(intel_dp);
+		edp_panel_vdd_on(intel_dp);
+		edp_panel_on(intel_dp);
+		edp_panel_vdd_off(intel_dp, true);
+	}
 
 	if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) {
 		unsigned int lane_mask = 0x0;
@@ -3854,9 +3862,10 @@ intel_dp_link_down(struct intel_encoder *encoder,
 	intel_dp->DP = DP;
 
 	if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) {
-		pps_lock(intel_dp);
-		intel_dp->active_pipe = INVALID_PIPE;
-		pps_unlock(intel_dp);
+		intel_wakeref_t wakeref;
+
+		with_pps_lock(intel_dp, wakeref)
+			intel_dp->active_pipe = INVALID_PIPE;
 	}
 }
 
@@ -5272,14 +5281,15 @@ void intel_dp_encoder_destroy(struct drm_encoder *encoder)
 
 	intel_dp_mst_encoder_cleanup(intel_dig_port);
 	if (intel_dp_is_edp(intel_dp)) {
+		intel_wakeref_t wakeref;
+
 		cancel_delayed_work_sync(&intel_dp->panel_vdd_work);
 		/*
 		 * vdd might still be enabled do to the delayed vdd off.
 		 * Make sure vdd is actually turned off here.
 		 */
-		pps_lock(intel_dp);
-		edp_panel_vdd_off_sync(intel_dp);
-		pps_unlock(intel_dp);
+		with_pps_lock(intel_dp, wakeref)
+			edp_panel_vdd_off_sync(intel_dp);
 
 		if (intel_dp->edp_notifier.notifier_call) {
 			unregister_reboot_notifier(&intel_dp->edp_notifier);
@@ -5296,6 +5306,7 @@ void intel_dp_encoder_destroy(struct drm_encoder *encoder)
 void intel_dp_encoder_suspend(struct intel_encoder *intel_encoder)
 {
 	struct intel_dp *intel_dp = enc_to_intel_dp(&intel_encoder->base);
+	intel_wakeref_t wakeref;
 
 	if (!intel_dp_is_edp(intel_dp))
 		return;
@@ -5305,9 +5316,8 @@ void intel_dp_encoder_suspend(struct intel_encoder *intel_encoder)
 	 * Make sure vdd is actually turned off here.
 	 */
 	cancel_delayed_work_sync(&intel_dp->panel_vdd_work);
-	pps_lock(intel_dp);
-	edp_panel_vdd_off_sync(intel_dp);
-	pps_unlock(intel_dp);
+	with_pps_lock(intel_dp, wakeref)
+		edp_panel_vdd_off_sync(intel_dp);
 }
 
 static
@@ -5581,6 +5591,7 @@ void intel_dp_encoder_reset(struct drm_encoder *encoder)
 	struct drm_i915_private *dev_priv = to_i915(encoder->dev);
 	struct intel_dp *intel_dp = enc_to_intel_dp(encoder);
 	struct intel_lspcon *lspcon = dp_to_lspcon(intel_dp);
+	intel_wakeref_t wakeref;
 
 	if (!HAS_DDI(dev_priv))
 		intel_dp->DP = I915_READ(intel_dp->output_reg);
@@ -5590,18 +5601,19 @@ void intel_dp_encoder_reset(struct drm_encoder *encoder)
 
 	intel_dp->reset_link_params = true;
 
-	pps_lock(intel_dp);
-
-	if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv))
-		intel_dp->active_pipe = vlv_active_pipe(intel_dp);
+	with_pps_lock(intel_dp, wakeref) {
+		if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv))
+			intel_dp->active_pipe = vlv_active_pipe(intel_dp);
 
-	if (intel_dp_is_edp(intel_dp)) {
-		/* Reinit the power sequencer, in case BIOS did something with it. */
-		intel_dp_pps_init(intel_dp);
-		intel_edp_panel_vdd_sanitize(intel_dp);
+		if (intel_dp_is_edp(intel_dp)) {
+			/*
+			 * Reinit the power sequencer, in case BIOS did
+			 * something nasty with it.
+			 */
+			intel_dp_pps_init(intel_dp);
+			intel_edp_panel_vdd_sanitize(intel_dp);
+		}
 	}
-
-	pps_unlock(intel_dp);
 }
 
 static const struct drm_connector_funcs intel_dp_connector_funcs = {
@@ -6397,8 +6409,9 @@ static bool intel_edp_init_connector(struct intel_dp *intel_dp,
 	struct drm_display_mode *downclock_mode = NULL;
 	bool has_dpcd;
 	struct drm_display_mode *scan;
-	struct edid *edid;
 	enum pipe pipe = INVALID_PIPE;
+	intel_wakeref_t wakeref;
+	struct edid *edid;
 
 	if (!intel_dp_is_edp(intel_dp))
 		return true;
@@ -6416,13 +6429,11 @@ static bool intel_edp_init_connector(struct intel_dp *intel_dp,
 		return false;
 	}
 
-	pps_lock(intel_dp);
-
-	intel_dp_init_panel_power_timestamps(intel_dp);
-	intel_dp_pps_init(intel_dp);
-	intel_edp_panel_vdd_sanitize(intel_dp);
-
-	pps_unlock(intel_dp);
+	with_pps_lock(intel_dp, wakeref) {
+		intel_dp_init_panel_power_timestamps(intel_dp);
+		intel_dp_pps_init(intel_dp);
+		intel_edp_panel_vdd_sanitize(intel_dp);
+	}
 
 	/* Cache DPCD and EDID for edp. */
 	has_dpcd = intel_edp_init_dpcd(intel_dp);
@@ -6503,9 +6514,8 @@ static bool intel_edp_init_connector(struct intel_dp *intel_dp,
 	 * vdd might still be enabled do to the delayed vdd off.
 	 * Make sure vdd is actually turned off here.
 	 */
-	pps_lock(intel_dp);
-	edp_panel_vdd_off_sync(intel_dp);
-	pps_unlock(intel_dp);
+	with_pps_lock(intel_dp, wakeref)
+		edp_panel_vdd_off_sync(intel_dp);
 
 	return false;
 }
-- 
2.19.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 106+ messages in thread

* [PATCH 26/40] drm/i915: Complain if hsw_get_pipe_config acquires the same power well twice
  2018-09-19 19:55 [PATCH 01/40] drm: Use default dma_fence hooks where possible for null syncobj Chris Wilson
                   ` (23 preceding siblings ...)
  2018-09-19 19:55 ` [PATCH 25/40] drm/i915/dp: Markup pps lock power well Chris Wilson
@ 2018-09-19 19:55 ` Chris Wilson
  2018-09-19 19:55 ` [PATCH 27/40] drm/i915: Mark up Ironlake ips with rpm wakerefs Chris Wilson
                   ` (17 subsequent siblings)
  42 siblings, 0 replies; 106+ messages in thread
From: Chris Wilson @ 2018-09-19 19:55 UTC (permalink / raw)
  To: intel-gfx

As we only release each power well once, we assume that each transcoder
maps to a different domain. Complain if this is not so.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/intel_display.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index 806aa6dcbf8a..35d43b34b6e1 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -9437,6 +9437,8 @@ static bool hsw_get_transcoder_state(struct intel_crtc *crtc,
 	power_domain = POWER_DOMAIN_TRANSCODER(pipe_config->cpu_transcoder);
 	if (!intel_display_power_get_if_enabled(dev_priv, power_domain))
 		return false;
+
+	WARN_ON(*power_domain_mask & BIT_ULL(power_domain));
 	*power_domain_mask |= BIT_ULL(power_domain);
 
 	tmp = I915_READ(PIPECONF(pipe_config->cpu_transcoder));
@@ -9464,6 +9466,8 @@ static bool bxt_get_dsi_transcoder_state(struct intel_crtc *crtc,
 		power_domain = POWER_DOMAIN_TRANSCODER(cpu_transcoder);
 		if (!intel_display_power_get_if_enabled(dev_priv, power_domain))
 			continue;
+
+		WARN_ON(*power_domain_mask & BIT_ULL(power_domain));
 		*power_domain_mask |= BIT_ULL(power_domain);
 
 		/*
@@ -9595,7 +9599,9 @@ static bool haswell_get_pipe_config(struct intel_crtc *crtc,
 
 	power_domain = POWER_DOMAIN_PIPE_PANEL_FITTER(crtc->pipe);
 	if (intel_display_power_get_if_enabled(dev_priv, power_domain)) {
+		WARN_ON(power_domain_mask & BIT_ULL(power_domain));
 		power_domain_mask |= BIT_ULL(power_domain);
+
 		if (INTEL_GEN(dev_priv) >= 9)
 			skylake_get_pfit_config(crtc, pipe_config);
 		else
-- 
2.19.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 106+ messages in thread

* [PATCH 27/40] drm/i915: Mark up Ironlake ips with rpm wakerefs
  2018-09-19 19:55 [PATCH 01/40] drm: Use default dma_fence hooks where possible for null syncobj Chris Wilson
                   ` (24 preceding siblings ...)
  2018-09-19 19:55 ` [PATCH 26/40] drm/i915: Complain if hsw_get_pipe_config acquires the same power well twice Chris Wilson
@ 2018-09-19 19:55 ` Chris Wilson
  2018-09-19 19:55 ` [PATCH 28/40] drm/i915: Serialise concurrent calls to i915_gem_set_wedged() Chris Wilson
                   ` (16 subsequent siblings)
  42 siblings, 0 replies; 106+ messages in thread
From: Chris Wilson @ 2018-09-19 19:55 UTC (permalink / raw)
  To: intel-gfx

Currently Ironlake operates under the assumption that rpm awake (and its
error checking is disabled). As such, we have missed a few places where we
access registers without taking the rpm wakeref and thus trigger
warnings. intel_ips being one culprit.

As this involved adding a potentially sleeping rpm_get, we have to
rearrange the spinlocks slightly and so switch to acquiring a device-ref
under the spinlock rather than hold the spinlock for the whole
operation. To be consistent, we make the change in pattern common to the
intel_ips interface even though this adds a few more atomic operations
than necessary in a few cases.

v2: Sagar noted the mb around setting mch_dev were overkill as we only
need ordering there, and that i915_emon_status was still using
struct_mutex for no reason, but lacked rpm.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Sagar Arun Kamble <sagar.a.kamble@intel.com>
---
 drivers/gpu/drm/i915/i915_debugfs.c |  29 +++--
 drivers/gpu/drm/i915/i915_drv.c     |   3 +
 drivers/gpu/drm/i915/intel_pm.c     | 172 ++++++++++++++--------------
 3 files changed, 103 insertions(+), 101 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index b34fb0c6bd0e..64cb9b56618b 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -1758,27 +1758,24 @@ static int i915_sr_status(struct seq_file *m, void *unused)
 
 static int i915_emon_status(struct seq_file *m, void *unused)
 {
-	struct drm_i915_private *dev_priv = node_to_i915(m->private);
-	struct drm_device *dev = &dev_priv->drm;
-	unsigned long temp, chipset, gfx;
-	int ret;
+	struct drm_i915_private *i915 = node_to_i915(m->private);
+	intel_wakeref_t wakeref;
 
-	if (!IS_GEN5(dev_priv))
+	if (!IS_GEN5(i915))
 		return -ENODEV;
 
-	ret = mutex_lock_interruptible(&dev->struct_mutex);
-	if (ret)
-		return ret;
+	with_intel_runtime_pm(i915, wakeref) {
+		unsigned long temp, chipset, gfx;
 
-	temp = i915_mch_val(dev_priv);
-	chipset = i915_chipset_val(dev_priv);
-	gfx = i915_gfx_val(dev_priv);
-	mutex_unlock(&dev->struct_mutex);
+		temp = i915_mch_val(i915);
+		chipset = i915_chipset_val(i915);
+		gfx = i915_gfx_val(i915);
 
-	seq_printf(m, "GMCH temp: %ld\n", temp);
-	seq_printf(m, "Chipset power: %ld\n", chipset);
-	seq_printf(m, "GFX power: %ld\n", gfx);
-	seq_printf(m, "Total power: %ld\n", chipset + gfx);
+		seq_printf(m, "GMCH temp: %ld\n", temp);
+		seq_printf(m, "Chipset power: %ld\n", chipset);
+		seq_printf(m, "GFX power: %ld\n", gfx);
+		seq_printf(m, "Total power: %ld\n", chipset + gfx);
+	}
 
 	return 0;
 }
diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index 9884902a4a9c..e0ab875b4193 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -1761,6 +1761,9 @@ void i915_driver_unload(struct drm_device *dev)
 
 	i915_driver_unregister(dev_priv);
 
+	/* Flush any external code that still may be under the RCU lock */
+	synchronize_rcu();
+
 	if (i915_gem_suspend(dev_priv))
 		DRM_ERROR("failed to idle hardware; continuing to unload!\n");
 
diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
index a000b4894962..c690b642c0a3 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -6154,10 +6154,6 @@ void intel_init_ipc(struct drm_i915_private *dev_priv)
  */
 DEFINE_SPINLOCK(mchdev_lock);
 
-/* Global for IPS driver to get at the current i915 device. Protected by
- * mchdev_lock. */
-static struct drm_i915_private *i915_mch_dev;
-
 bool ironlake_set_drps(struct drm_i915_private *dev_priv, u8 val)
 {
 	u16 rgvswctl;
@@ -7800,16 +7796,17 @@ static unsigned long __i915_chipset_val(struct drm_i915_private *dev_priv)
 
 unsigned long i915_chipset_val(struct drm_i915_private *dev_priv)
 {
-	unsigned long val;
+	intel_wakeref_t wakeref;
+	unsigned long val = 0;
 
 	if (!IS_GEN5(dev_priv))
 		return 0;
 
-	spin_lock_irq(&mchdev_lock);
-
-	val = __i915_chipset_val(dev_priv);
-
-	spin_unlock_irq(&mchdev_lock);
+	with_intel_runtime_pm(dev_priv, wakeref) {
+		spin_lock_irq(&mchdev_lock);
+		val = __i915_chipset_val(dev_priv);
+		spin_unlock_irq(&mchdev_lock);
+	}
 
 	return val;
 }
@@ -7886,14 +7883,16 @@ static void __i915_update_gfx_val(struct drm_i915_private *dev_priv)
 
 void i915_update_gfx_val(struct drm_i915_private *dev_priv)
 {
+	intel_wakeref_t wakeref;
+
 	if (!IS_GEN5(dev_priv))
 		return;
 
-	spin_lock_irq(&mchdev_lock);
-
-	__i915_update_gfx_val(dev_priv);
-
-	spin_unlock_irq(&mchdev_lock);
+	with_intel_runtime_pm(dev_priv, wakeref) {
+		spin_lock_irq(&mchdev_lock);
+		__i915_update_gfx_val(dev_priv);
+		spin_unlock_irq(&mchdev_lock);
+	}
 }
 
 static unsigned long __i915_gfx_val(struct drm_i915_private *dev_priv)
@@ -7935,18 +7934,34 @@ static unsigned long __i915_gfx_val(struct drm_i915_private *dev_priv)
 
 unsigned long i915_gfx_val(struct drm_i915_private *dev_priv)
 {
-	unsigned long val;
+	intel_wakeref_t wakeref;
+	unsigned long val = 0;
 
 	if (!IS_GEN5(dev_priv))
 		return 0;
 
-	spin_lock_irq(&mchdev_lock);
+	with_intel_runtime_pm(dev_priv, wakeref) {
+		spin_lock_irq(&mchdev_lock);
+		val = __i915_gfx_val(dev_priv);
+		spin_unlock_irq(&mchdev_lock);
+	}
 
-	val = __i915_gfx_val(dev_priv);
+	return val;
+}
 
-	spin_unlock_irq(&mchdev_lock);
+static struct drm_i915_private *i915_mch_dev;
 
-	return val;
+static struct drm_i915_private *mchdev_get(void)
+{
+	struct drm_i915_private *i915;
+
+	rcu_read_lock();
+	i915 = i915_mch_dev;
+	if (!kref_get_unless_zero(&i915->drm.ref))
+		i915 = NULL;
+	rcu_read_unlock();
+
+	return i915;
 }
 
 /**
@@ -7957,23 +7972,24 @@ unsigned long i915_gfx_val(struct drm_i915_private *dev_priv)
  */
 unsigned long i915_read_mch_val(void)
 {
-	struct drm_i915_private *dev_priv;
-	unsigned long chipset_val, graphics_val, ret = 0;
+	struct drm_i915_private *i915;
+	unsigned long chipset_val, graphics_val;
+	intel_wakeref_t wakeref;
 
-	spin_lock_irq(&mchdev_lock);
-	if (!i915_mch_dev)
-		goto out_unlock;
-	dev_priv = i915_mch_dev;
-
-	chipset_val = __i915_chipset_val(dev_priv);
-	graphics_val = __i915_gfx_val(dev_priv);
-
-	ret = chipset_val + graphics_val;
+	i915 = mchdev_get();
+	if (!i915)
+		return 0;
 
-out_unlock:
-	spin_unlock_irq(&mchdev_lock);
+	chipset_val = graphics_val = 0;
+	with_intel_runtime_pm(i915, wakeref) {
+		spin_lock_irq(&mchdev_lock);
+		chipset_val = __i915_chipset_val(i915);
+		graphics_val = __i915_gfx_val(i915);
+		spin_unlock_irq(&mchdev_lock);
+	}
 
-	return ret;
+	drm_dev_put(&i915->drm);
+	return chipset_val + graphics_val;
 }
 EXPORT_SYMBOL_GPL(i915_read_mch_val);
 
@@ -7984,23 +8000,19 @@ EXPORT_SYMBOL_GPL(i915_read_mch_val);
  */
 bool i915_gpu_raise(void)
 {
-	struct drm_i915_private *dev_priv;
-	bool ret = true;
+	struct drm_i915_private *i915;
 
-	spin_lock_irq(&mchdev_lock);
-	if (!i915_mch_dev) {
-		ret = false;
-		goto out_unlock;
-	}
-	dev_priv = i915_mch_dev;
-
-	if (dev_priv->ips.max_delay > dev_priv->ips.fmax)
-		dev_priv->ips.max_delay--;
+	i915 = mchdev_get();
+	if (!i915)
+		return false;
 
-out_unlock:
+	spin_lock_irq(&mchdev_lock);
+	if (i915->ips.max_delay > i915->ips.fmax)
+		i915->ips.max_delay--;
 	spin_unlock_irq(&mchdev_lock);
 
-	return ret;
+	drm_dev_put(&i915->drm);
+	return true;
 }
 EXPORT_SYMBOL_GPL(i915_gpu_raise);
 
@@ -8012,23 +8024,19 @@ EXPORT_SYMBOL_GPL(i915_gpu_raise);
  */
 bool i915_gpu_lower(void)
 {
-	struct drm_i915_private *dev_priv;
-	bool ret = true;
+	struct drm_i915_private *i915;
 
-	spin_lock_irq(&mchdev_lock);
-	if (!i915_mch_dev) {
-		ret = false;
-		goto out_unlock;
-	}
-	dev_priv = i915_mch_dev;
-
-	if (dev_priv->ips.max_delay < dev_priv->ips.min_delay)
-		dev_priv->ips.max_delay++;
+	i915 = mchdev_get();
+	if (!i915)
+		return false;
 
-out_unlock:
+	spin_lock_irq(&mchdev_lock);
+	if (i915->ips.max_delay < i915->ips.min_delay)
+		i915->ips.max_delay++;
 	spin_unlock_irq(&mchdev_lock);
 
-	return ret;
+	drm_dev_put(&i915->drm);
+	return true;
 }
 EXPORT_SYMBOL_GPL(i915_gpu_lower);
 
@@ -8039,13 +8047,16 @@ EXPORT_SYMBOL_GPL(i915_gpu_lower);
  */
 bool i915_gpu_busy(void)
 {
-	bool ret = false;
+	struct drm_i915_private *i915;
+	bool ret;
 
-	spin_lock_irq(&mchdev_lock);
-	if (i915_mch_dev)
-		ret = i915_mch_dev->gt.awake;
-	spin_unlock_irq(&mchdev_lock);
+	i915 = mchdev_get();
+	if (!i915)
+		return false;
+
+	ret = i915->gt.awake;
 
+	drm_dev_put(&i915->drm);
 	return ret;
 }
 EXPORT_SYMBOL_GPL(i915_gpu_busy);
@@ -8058,24 +8069,19 @@ EXPORT_SYMBOL_GPL(i915_gpu_busy);
  */
 bool i915_gpu_turbo_disable(void)
 {
-	struct drm_i915_private *dev_priv;
-	bool ret = true;
-
-	spin_lock_irq(&mchdev_lock);
-	if (!i915_mch_dev) {
-		ret = false;
-		goto out_unlock;
-	}
-	dev_priv = i915_mch_dev;
-
-	dev_priv->ips.max_delay = dev_priv->ips.fstart;
+	struct drm_i915_private *i915;
+	bool ret;
 
-	if (!ironlake_set_drps(dev_priv, dev_priv->ips.fstart))
-		ret = false;
+	i915 = mchdev_get();
+	if (!i915)
+		return false;
 
-out_unlock:
+	spin_lock_irq(&mchdev_lock);
+	i915->ips.max_delay = i915->ips.fstart;
+	ret = ironlake_set_drps(i915, i915->ips.fstart);
 	spin_unlock_irq(&mchdev_lock);
 
+	drm_dev_put(&i915->drm);
 	return ret;
 }
 EXPORT_SYMBOL_GPL(i915_gpu_turbo_disable);
@@ -8104,18 +8110,14 @@ void intel_gpu_ips_init(struct drm_i915_private *dev_priv)
 {
 	/* We only register the i915 ips part with intel-ips once everything is
 	 * set up, to avoid intel-ips sneaking in and reading bogus values. */
-	spin_lock_irq(&mchdev_lock);
-	i915_mch_dev = dev_priv;
-	spin_unlock_irq(&mchdev_lock);
+	rcu_assign_pointer(i915_mch_dev, dev_priv);
 
 	ips_ping_for_i915_load();
 }
 
 void intel_gpu_ips_teardown(void)
 {
-	spin_lock_irq(&mchdev_lock);
-	i915_mch_dev = NULL;
-	spin_unlock_irq(&mchdev_lock);
+	rcu_assign_pointer(i915_mch_dev, NULL);
 }
 
 static void intel_init_emon(struct drm_i915_private *dev_priv)
-- 
2.19.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 106+ messages in thread

* [PATCH 28/40] drm/i915: Serialise concurrent calls to i915_gem_set_wedged()
  2018-09-19 19:55 [PATCH 01/40] drm: Use default dma_fence hooks where possible for null syncobj Chris Wilson
                   ` (25 preceding siblings ...)
  2018-09-19 19:55 ` [PATCH 27/40] drm/i915: Mark up Ironlake ips with rpm wakerefs Chris Wilson
@ 2018-09-19 19:55 ` Chris Wilson
  2018-09-19 19:55 ` [PATCH 29/40] drm/i915: Differentiate between ggtt->mutex and ppgtt->mutex Chris Wilson
                   ` (15 subsequent siblings)
  42 siblings, 0 replies; 106+ messages in thread
From: Chris Wilson @ 2018-09-19 19:55 UTC (permalink / raw)
  To: intel-gfx

Make i915_gem_set_wedged() and i915_gem_unset_wedged() behaviour more
consistently if called concurrently.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
---
 drivers/gpu/drm/i915/i915_gem.c               | 32 ++++++++++++++-----
 drivers/gpu/drm/i915/i915_gpu_error.h         |  4 ++-
 .../gpu/drm/i915/selftests/mock_gem_device.c  |  1 +
 3 files changed, 28 insertions(+), 9 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index a8cdaeaea7a1..a2e7f7487588 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -3324,10 +3324,15 @@ static void nop_complete_submit_request(struct i915_request *request)
 
 void i915_gem_set_wedged(struct drm_i915_private *i915)
 {
+	struct i915_gpu_error *error = &i915->gpu_error;
 	struct intel_engine_cs *engine;
 	enum intel_engine_id id;
 
-	GEM_TRACE("start\n");
+	mutex_lock(&error->wedge_mutex);
+	if (test_bit(I915_WEDGED, &error->flags)) {
+		mutex_unlock(&error->wedge_mutex);
+		return;
+	}
 
 	if (GEM_SHOW_DEBUG()) {
 		struct drm_printer p = drm_debug_printer(__func__);
@@ -3336,8 +3341,7 @@ void i915_gem_set_wedged(struct drm_i915_private *i915)
 			intel_engine_dump(engine, &p, "%s\n", engine->name);
 	}
 
-	if (test_and_set_bit(I915_WEDGED, &i915->gpu_error.flags))
-		goto out;
+	GEM_TRACE("start\n");
 
 	/*
 	 * First, stop submission to hw, but do not yet complete requests by
@@ -3397,20 +3401,28 @@ void i915_gem_set_wedged(struct drm_i915_private *i915)
 		i915_gem_reset_finish_engine(engine);
 	}
 
-out:
+	smp_mb__before_atomic();
+	set_bit(I915_WEDGED, &error->flags);
+
 	GEM_TRACE("end\n");
+	mutex_unlock(&error->wedge_mutex);
 
-	wake_up_all(&i915->gpu_error.reset_queue);
+	wake_up_all(&error->reset_queue);
 }
 
 bool i915_gem_unset_wedged(struct drm_i915_private *i915)
 {
+	struct i915_gpu_error *error = &i915->gpu_error;
 	struct i915_timeline *tl;
+	bool ret = false;
 
 	lockdep_assert_held(&i915->drm.struct_mutex);
-	if (!test_bit(I915_WEDGED, &i915->gpu_error.flags))
+
+	if (!test_bit(I915_WEDGED, &error->flags))
 		return true;
 
+	mutex_lock(&error->wedge_mutex);
+
 	GEM_TRACE("start\n");
 
 	/*
@@ -3444,7 +3456,7 @@ bool i915_gem_unset_wedged(struct drm_i915_private *i915)
 		 */
 		if (dma_fence_default_wait(&rq->fence, true,
 					   MAX_SCHEDULE_TIMEOUT) < 0)
-			return false;
+			goto unlock;
 	}
 	i915_retire_requests(i915);
 	GEM_BUG_ON(i915->gt.active_requests);
@@ -3468,8 +3480,11 @@ bool i915_gem_unset_wedged(struct drm_i915_private *i915)
 
 	smp_mb__before_atomic(); /* complete takeover before enabling execbuf */
 	clear_bit(I915_WEDGED, &i915->gpu_error.flags);
+	ret = true;
+unlock:
+	mutex_unlock(&i915->gpu_error.wedge_mutex);
 
-	return true;
+	return ret;
 }
 
 static void
@@ -5817,6 +5832,7 @@ int i915_gem_init_early(struct drm_i915_private *dev_priv)
 			  i915_gem_idle_work_handler);
 	init_waitqueue_head(&dev_priv->gpu_error.wait_queue);
 	init_waitqueue_head(&dev_priv->gpu_error.reset_queue);
+	mutex_init(&dev_priv->gpu_error.wedge_mutex);
 
 	atomic_set(&dev_priv->mm.bsd_engine_dispatch_index, 0);
 
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.h b/drivers/gpu/drm/i915/i915_gpu_error.h
index 1c1bc0c23468..4f5c5be56002 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.h
+++ b/drivers/gpu/drm/i915/i915_gpu_error.h
@@ -269,8 +269,8 @@ struct i915_gpu_error {
 #define I915_RESET_BACKOFF	0
 #define I915_RESET_HANDOFF	1
 #define I915_RESET_MODESET	2
+#define I915_RESET_ENGINE	3
 #define I915_WEDGED		(BITS_PER_LONG - 1)
-#define I915_RESET_ENGINE	(I915_WEDGED - I915_NUM_ENGINES)
 
 	/** Number of times an engine has been reset */
 	u32 reset_engine_count[I915_NUM_ENGINES];
@@ -281,6 +281,8 @@ struct i915_gpu_error {
 	/** Reason for the current *global* reset */
 	const char *reason;
 
+	struct mutex wedge_mutex; /* serialises wedging/unwedging */
+
 	/**
 	 * Waitqueue to signal when a hang is detected. Used to for waiters
 	 * to release the struct_mutex for the reset to procede.
diff --git a/drivers/gpu/drm/i915/selftests/mock_gem_device.c b/drivers/gpu/drm/i915/selftests/mock_gem_device.c
index 0eb283e7fc96..a4cd459c320d 100644
--- a/drivers/gpu/drm/i915/selftests/mock_gem_device.c
+++ b/drivers/gpu/drm/i915/selftests/mock_gem_device.c
@@ -188,6 +188,7 @@ struct drm_i915_private *mock_gem_device(void)
 
 	init_waitqueue_head(&i915->gpu_error.wait_queue);
 	init_waitqueue_head(&i915->gpu_error.reset_queue);
+	mutex_init(&i915->gpu_error.wedge_mutex);
 
 	i915->wq = alloc_ordered_workqueue("mock", 0);
 	if (!i915->wq)
-- 
2.19.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 106+ messages in thread

* [PATCH 29/40] drm/i915: Differentiate between ggtt->mutex and ppgtt->mutex
  2018-09-19 19:55 [PATCH 01/40] drm: Use default dma_fence hooks where possible for null syncobj Chris Wilson
                   ` (26 preceding siblings ...)
  2018-09-19 19:55 ` [PATCH 28/40] drm/i915: Serialise concurrent calls to i915_gem_set_wedged() Chris Wilson
@ 2018-09-19 19:55 ` Chris Wilson
  2018-09-24 13:04   ` Tvrtko Ursulin
  2018-09-19 19:55 ` [PATCH 30/40] drm/i915: Pull all the reset functionality together into i915_reset.c Chris Wilson
                   ` (14 subsequent siblings)
  42 siblings, 1 reply; 106+ messages in thread
From: Chris Wilson @ 2018-09-19 19:55 UTC (permalink / raw)
  To: intel-gfx

We have two classes of VM, global GTT and per-process GTT. In order to
allow ourselves the freedom to mix both along call chains, distinguish
the two classes with regards to their mutex and lockdep maps.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_gem_gtt.c       | 10 +++++-----
 drivers/gpu/drm/i915/i915_gem_gtt.h       |  2 ++
 drivers/gpu/drm/i915/selftests/mock_gtt.c |  6 +++---
 3 files changed, 10 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index ef38d09b6ce0..719e1ac212c1 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -523,8 +523,7 @@ static void vm_free_page(struct i915_address_space *vm, struct page *page)
 	spin_unlock(&vm->free_pages.lock);
 }
 
-static void i915_address_space_init(struct i915_address_space *vm,
-				    struct drm_i915_private *dev_priv)
+static void i915_address_space_init(struct i915_address_space *vm, int subclass)
 {
 	/*
 	 * The vm->mutex must be reclaim safe (for use in the shrinker).
@@ -532,6 +531,7 @@ static void i915_address_space_init(struct i915_address_space *vm,
 	 * attempt holding the lock is immediately reported by lockdep.
 	 */
 	mutex_init(&vm->mutex);
+	lockdep_set_subclass(&vm->mutex, subclass);
 	i915_gem_shrinker_taints_mutex(&vm->mutex);
 
 	GEM_BUG_ON(!vm->total);
@@ -1658,7 +1658,7 @@ static struct i915_hw_ppgtt *gen8_ppgtt_create(struct drm_i915_private *i915)
 	 */
 	ppgtt->vm.has_read_only = !intel_vgpu_active(i915);
 
-	i915_address_space_init(&ppgtt->vm, i915);
+	i915_address_space_init(&ppgtt->vm, VM_CLASS_PPGTT);
 
 	/* There are only few exceptions for gen >=6. chv and bxt.
 	 * And we are not sure about the latter so play safe for now.
@@ -2165,7 +2165,7 @@ static struct i915_hw_ppgtt *gen6_ppgtt_create(struct drm_i915_private *i915)
 
 	ppgtt->base.vm.total = I915_PDES * GEN6_PTES * I915_GTT_PAGE_SIZE;
 
-	i915_address_space_init(&ppgtt->base.vm, i915);
+	i915_address_space_init(&ppgtt->base.vm, VM_CLASS_PPGTT);
 
 	ppgtt->base.vm.allocate_va_range = gen6_alloc_va_range;
 	ppgtt->base.vm.clear_range = gen6_ppgtt_clear_range;
@@ -3607,7 +3607,7 @@ int i915_ggtt_init_hw(struct drm_i915_private *dev_priv)
 	 * and beyond the end of the GTT if we do not provide a guard.
 	 */
 	mutex_lock(&dev_priv->drm.struct_mutex);
-	i915_address_space_init(&ggtt->vm, dev_priv);
+	i915_address_space_init(&ggtt->vm, VM_CLASS_GGTT);
 
 	ggtt->vm.is_ggtt = true;
 
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h
index 7e2af5f4f39b..849a1f67b037 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.h
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.h
@@ -286,6 +286,8 @@ struct i915_address_space {
 	bool closed;
 
 	struct mutex mutex; /* protects vma and our lists */
+#define VM_CLASS_GGTT 0
+#define VM_CLASS_PPGTT 1
 
 	struct i915_page_dma scratch_page;
 	struct i915_page_table *scratch_pt;
diff --git a/drivers/gpu/drm/i915/selftests/mock_gtt.c b/drivers/gpu/drm/i915/selftests/mock_gtt.c
index 6ae418c76015..976c862b3842 100644
--- a/drivers/gpu/drm/i915/selftests/mock_gtt.c
+++ b/drivers/gpu/drm/i915/selftests/mock_gtt.c
@@ -70,7 +70,7 @@ mock_ppgtt(struct drm_i915_private *i915,
 	ppgtt->vm.total = round_down(U64_MAX, PAGE_SIZE);
 	ppgtt->vm.file = ERR_PTR(-ENODEV);
 
-	i915_address_space_init(&ppgtt->vm, i915);
+	i915_address_space_init(&ppgtt->vm, VM_CLASS_PPGTT);
 
 	ppgtt->vm.clear_range = nop_clear_range;
 	ppgtt->vm.insert_page = mock_insert_page;
@@ -102,6 +102,7 @@ void mock_init_ggtt(struct drm_i915_private *i915)
 	struct i915_ggtt *ggtt = &i915->ggtt;
 
 	ggtt->vm.i915 = i915;
+	ggtt->vm.is_ggtt = true;
 
 	ggtt->gmadr = (struct resource) DEFINE_RES_MEM(0, 2048 * PAGE_SIZE);
 	ggtt->mappable_end = resource_size(&ggtt->gmadr);
@@ -117,9 +118,8 @@ void mock_init_ggtt(struct drm_i915_private *i915)
 	ggtt->vm.vma_ops.set_pages   = ggtt_set_pages;
 	ggtt->vm.vma_ops.clear_pages = clear_pages;
 
-	i915_address_space_init(&ggtt->vm, i915);
 
-	ggtt->vm.is_ggtt = true;
+	i915_address_space_init(&ggtt->vm, VM_CLASS_GGTT);
 }
 
 void mock_fini_ggtt(struct drm_i915_private *i915)
-- 
2.19.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 106+ messages in thread

* [PATCH 30/40] drm/i915: Pull all the reset functionality together into i915_reset.c
  2018-09-19 19:55 [PATCH 01/40] drm: Use default dma_fence hooks where possible for null syncobj Chris Wilson
                   ` (27 preceding siblings ...)
  2018-09-19 19:55 ` [PATCH 29/40] drm/i915: Differentiate between ggtt->mutex and ppgtt->mutex Chris Wilson
@ 2018-09-19 19:55 ` Chris Wilson
  2018-09-19 19:55 ` [PATCH 31/40] drm/i915: Make all GPU resets atomic Chris Wilson
                   ` (13 subsequent siblings)
  42 siblings, 0 replies; 106+ messages in thread
From: Chris Wilson @ 2018-09-19 19:55 UTC (permalink / raw)
  To: intel-gfx

Currently the code to reset the GPU and our state is spread widely
across a few files. Pull the logic together into a common file.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/Makefile                 |    3 +-
 drivers/gpu/drm/i915/i915_debugfs.c           |    2 +
 drivers/gpu/drm/i915/i915_drv.c               |  205 +--
 drivers/gpu/drm/i915/i915_drv.h               |   33 +-
 drivers/gpu/drm/i915/i915_gem.c               |  485 +-----
 drivers/gpu/drm/i915/i915_gem_gtt.c           |    1 +
 drivers/gpu/drm/i915/i915_irq.c               |  238 ---
 drivers/gpu/drm/i915/i915_request.c           |    1 +
 drivers/gpu/drm/i915/i915_reset.c             | 1332 +++++++++++++++++
 drivers/gpu/drm/i915/i915_reset.h             |   39 +
 drivers/gpu/drm/i915/intel_display.c          |   15 +-
 drivers/gpu/drm/i915/intel_guc.h              |    3 +
 drivers/gpu/drm/i915/intel_hangcheck.c        |    1 +
 drivers/gpu/drm/i915/intel_uc.c               |    1 +
 drivers/gpu/drm/i915/intel_uncore.c           |  451 ------
 drivers/gpu/drm/i915/selftests/intel_lrc.c    |    2 +
 .../drm/i915/selftests/intel_workarounds.c    |    1 +
 17 files changed, 1408 insertions(+), 1405 deletions(-)
 create mode 100644 drivers/gpu/drm/i915/i915_reset.c
 create mode 100644 drivers/gpu/drm/i915/i915_reset.h

diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
index ef1480c14e4e..4654ef481b45 100644
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -40,7 +40,8 @@ i915-y := i915_drv.o \
 	  i915_mm.o \
 	  i915_params.o \
 	  i915_pci.o \
-          i915_suspend.o \
+	  i915_reset.o \
+	  i915_suspend.o \
 	  i915_syncmap.o \
 	  i915_sw_fence.o \
 	  i915_sysfs.o \
diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index 64cb9b56618b..6f38f00e3765 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -32,6 +32,8 @@
 #include "intel_drv.h"
 #include "intel_guc_submission.h"
 
+#include "i915_reset.h"
+
 static inline struct drm_i915_private *node_to_i915(struct drm_info_node *node)
 {
 	return to_i915(node->minor->dev);
diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index e0ab875b4193..7caa6f17110c 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -49,6 +49,7 @@
 #include "i915_drv.h"
 #include "i915_trace.h"
 #include "i915_pmu.h"
+#include "i915_reset.h"
 #include "i915_query.h"
 #include "i915_vgpu.h"
 #include "intel_drv.h"
@@ -2192,210 +2193,6 @@ static int i915_resume_switcheroo(struct drm_device *dev)
 	return i915_drm_resume(dev);
 }
 
-/**
- * i915_reset - reset chip after a hang
- * @i915: #drm_i915_private to reset
- * @stalled_mask: mask of the stalled engines with the guilty requests
- * @reason: user error message for why we are resetting
- *
- * Reset the chip.  Useful if a hang is detected. Marks the device as wedged
- * on failure.
- *
- * Caller must hold the struct_mutex.
- *
- * Procedure is fairly simple:
- *   - reset the chip using the reset reg
- *   - re-init context state
- *   - re-init hardware status page
- *   - re-init ring buffer
- *   - re-init interrupt state
- *   - re-init display
- */
-void i915_reset(struct drm_i915_private *i915,
-		unsigned int stalled_mask,
-		const char *reason)
-{
-	struct i915_gpu_error *error = &i915->gpu_error;
-	int ret;
-	int i;
-
-	GEM_TRACE("flags=%lx\n", error->flags);
-
-	might_sleep();
-	lockdep_assert_held(&i915->drm.struct_mutex);
-	GEM_BUG_ON(!test_bit(I915_RESET_BACKOFF, &error->flags));
-
-	if (!test_bit(I915_RESET_HANDOFF, &error->flags))
-		return;
-
-	/* Clear any previous failed attempts at recovery. Time to try again. */
-	if (!i915_gem_unset_wedged(i915))
-		goto wakeup;
-
-	if (reason)
-		dev_notice(i915->drm.dev, "Resetting chip for %s\n", reason);
-	error->reset_count++;
-
-	ret = i915_gem_reset_prepare(i915);
-	if (ret) {
-		dev_err(i915->drm.dev, "GPU recovery failed\n");
-		goto taint;
-	}
-
-	if (!intel_has_gpu_reset(i915)) {
-		if (i915_modparams.reset)
-			dev_err(i915->drm.dev, "GPU reset not supported\n");
-		else
-			DRM_DEBUG_DRIVER("GPU reset disabled\n");
-		goto error;
-	}
-
-	for (i = 0; i < 3; i++) {
-		ret = intel_gpu_reset(i915, ALL_ENGINES);
-		if (ret == 0)
-			break;
-
-		msleep(100);
-	}
-	if (ret) {
-		dev_err(i915->drm.dev, "Failed to reset chip\n");
-		goto taint;
-	}
-
-	/* Ok, now get things going again... */
-
-	/*
-	 * Everything depends on having the GTT running, so we need to start
-	 * there.
-	 */
-	ret = i915_ggtt_enable_hw(i915);
-	if (ret) {
-		DRM_ERROR("Failed to re-enable GGTT following reset (%d)\n",
-			  ret);
-		goto error;
-	}
-
-	i915_gem_reset(i915, stalled_mask);
-	intel_overlay_reset(i915);
-
-	/*
-	 * Next we need to restore the context, but we don't use those
-	 * yet either...
-	 *
-	 * Ring buffer needs to be re-initialized in the KMS case, or if X
-	 * was running at the time of the reset (i.e. we weren't VT
-	 * switched away).
-	 */
-	ret = i915_gem_init_hw(i915);
-	if (ret) {
-		DRM_ERROR("Failed to initialise HW following reset (%d)\n",
-			  ret);
-		goto error;
-	}
-
-	i915_queue_hangcheck(i915);
-
-finish:
-	i915_gem_reset_finish(i915);
-wakeup:
-	clear_bit(I915_RESET_HANDOFF, &error->flags);
-	wake_up_bit(&error->flags, I915_RESET_HANDOFF);
-	return;
-
-taint:
-	/*
-	 * History tells us that if we cannot reset the GPU now, we
-	 * never will. This then impacts everything that is run
-	 * subsequently. On failing the reset, we mark the driver
-	 * as wedged, preventing further execution on the GPU.
-	 * We also want to go one step further and add a taint to the
-	 * kernel so that any subsequent faults can be traced back to
-	 * this failure. This is important for CI, where if the
-	 * GPU/driver fails we would like to reboot and restart testing
-	 * rather than continue on into oblivion. For everyone else,
-	 * the system should still plod along, but they have been warned!
-	 */
-	add_taint(TAINT_WARN, LOCKDEP_STILL_OK);
-error:
-	i915_gem_set_wedged(i915);
-	i915_retire_requests(i915);
-	goto finish;
-}
-
-static inline int intel_gt_reset_engine(struct drm_i915_private *dev_priv,
-					struct intel_engine_cs *engine)
-{
-	return intel_gpu_reset(dev_priv, intel_engine_flag(engine));
-}
-
-/**
- * i915_reset_engine - reset GPU engine to recover from a hang
- * @engine: engine to reset
- * @msg: reason for GPU reset; or NULL for no dev_notice()
- *
- * Reset a specific GPU engine. Useful if a hang is detected.
- * Returns zero on successful reset or otherwise an error code.
- *
- * Procedure is:
- *  - identifies the request that caused the hang and it is dropped
- *  - reset engine (which will force the engine to idle)
- *  - re-init/configure engine
- */
-int i915_reset_engine(struct intel_engine_cs *engine, const char *msg)
-{
-	struct i915_gpu_error *error = &engine->i915->gpu_error;
-	struct i915_request *active_request;
-	int ret;
-
-	GEM_TRACE("%s flags=%lx\n", engine->name, error->flags);
-	GEM_BUG_ON(!test_bit(I915_RESET_ENGINE + engine->id, &error->flags));
-
-	active_request = i915_gem_reset_prepare_engine(engine);
-	if (IS_ERR_OR_NULL(active_request)) {
-		/* Either the previous reset failed, or we pardon the reset. */
-		ret = PTR_ERR(active_request);
-		goto out;
-	}
-
-	if (msg)
-		dev_notice(engine->i915->drm.dev,
-			   "Resetting %s for %s\n", engine->name, msg);
-	error->reset_engine_count[engine->id]++;
-
-	if (!engine->i915->guc.execbuf_client)
-		ret = intel_gt_reset_engine(engine->i915, engine);
-	else
-		ret = intel_guc_reset_engine(&engine->i915->guc, engine);
-	if (ret) {
-		/* If we fail here, we expect to fallback to a global reset */
-		DRM_DEBUG_DRIVER("%sFailed to reset %s, ret=%d\n",
-				 engine->i915->guc.execbuf_client ? "GuC " : "",
-				 engine->name, ret);
-		goto out;
-	}
-
-	/*
-	 * The request that caused the hang is stuck on elsp, we know the
-	 * active request and can drop it, adjust head to skip the offending
-	 * request to resume executing remaining requests in the queue.
-	 */
-	i915_gem_reset_engine(engine, active_request, true);
-
-	/*
-	 * The engine and its registers (and workarounds in case of render)
-	 * have been reset to their default values. Follow the init_ring
-	 * process to program RING_MODE, HWSP and re-enable submission.
-	 */
-	ret = engine->init_hw(engine);
-	if (ret)
-		goto out;
-
-out:
-	intel_engine_cancel_stop_cs(engine);
-	i915_gem_reset_finish_engine(engine);
-	return ret;
-}
-
 static int i915_pm_prepare(struct device *kdev)
 {
 	struct pci_dev *pdev = to_pci_dev(kdev);
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 30934abdbd14..ad4e792f3373 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2780,19 +2780,7 @@ extern const struct dev_pm_ops i915_pm_ops;
 extern int i915_driver_load(struct pci_dev *pdev,
 			    const struct pci_device_id *ent);
 extern void i915_driver_unload(struct drm_device *dev);
-extern int intel_gpu_reset(struct drm_i915_private *dev_priv, u32 engine_mask);
-extern bool intel_has_gpu_reset(struct drm_i915_private *dev_priv);
-
-extern void i915_reset(struct drm_i915_private *i915,
-		       unsigned int stalled_mask,
-		       const char *reason);
-extern int i915_reset_engine(struct intel_engine_cs *engine,
-			     const char *reason);
-
-extern bool intel_has_reset_engine(struct drm_i915_private *dev_priv);
-extern int intel_reset_guc(struct drm_i915_private *dev_priv);
-extern int intel_guc_reset_engine(struct intel_guc *guc,
-				  struct intel_engine_cs *engine);
+
 extern void intel_engine_init_hangcheck(struct intel_engine_cs *engine);
 extern void intel_hangcheck_init(struct drm_i915_private *dev_priv);
 extern unsigned long i915_chipset_val(struct drm_i915_private *dev_priv);
@@ -2835,20 +2823,11 @@ static inline void i915_queue_hangcheck(struct drm_i915_private *dev_priv)
 			   &dev_priv->gpu_error.hangcheck_work, delay);
 }
 
-__printf(4, 5)
-void i915_handle_error(struct drm_i915_private *dev_priv,
-		       u32 engine_mask,
-		       unsigned long flags,
-		       const char *fmt, ...);
-#define I915_ERROR_CAPTURE BIT(0)
-
 extern void intel_irq_init(struct drm_i915_private *dev_priv);
 extern void intel_irq_fini(struct drm_i915_private *dev_priv);
 int intel_irq_install(struct drm_i915_private *dev_priv);
 void intel_irq_uninstall(struct drm_i915_private *dev_priv);
 
-void i915_clear_error_registers(struct drm_i915_private *dev_priv);
-
 static inline bool intel_gvt_active(struct drm_i915_private *dev_priv)
 {
 	return dev_priv->gvt;
@@ -3213,18 +3192,8 @@ static inline u32 i915_reset_engine_count(struct i915_gpu_error *error,
 	return READ_ONCE(error->reset_engine_count[engine->id]);
 }
 
-struct i915_request *
-i915_gem_reset_prepare_engine(struct intel_engine_cs *engine);
-int i915_gem_reset_prepare(struct drm_i915_private *dev_priv);
-void i915_gem_reset(struct drm_i915_private *dev_priv,
-		    unsigned int stalled_mask);
-void i915_gem_reset_finish_engine(struct intel_engine_cs *engine);
-void i915_gem_reset_finish(struct drm_i915_private *dev_priv);
 void i915_gem_set_wedged(struct drm_i915_private *dev_priv);
 bool i915_gem_unset_wedged(struct drm_i915_private *dev_priv);
-void i915_gem_reset_engine(struct intel_engine_cs *engine,
-			   struct i915_request *request,
-			   bool stalled);
 
 void i915_gem_init_mmio(struct drm_i915_private *i915);
 int __must_check i915_gem_init(struct drm_i915_private *dev_priv);
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index a2e7f7487588..c03e413f1db8 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -28,15 +28,6 @@
 #include <drm/drmP.h>
 #include <drm/drm_vma_manager.h>
 #include <drm/i915_drm.h>
-#include "i915_drv.h"
-#include "i915_gem_clflush.h"
-#include "i915_vgpu.h"
-#include "i915_trace.h"
-#include "intel_drv.h"
-#include "intel_frontbuffer.h"
-#include "intel_mocs.h"
-#include "intel_workarounds.h"
-#include "i915_gemfs.h"
 #include <linux/dma-fence-array.h>
 #include <linux/kthread.h>
 #include <linux/reservation.h>
@@ -47,6 +38,18 @@
 #include <linux/pci.h>
 #include <linux/dma-buf.h>
 
+#include "i915_drv.h"
+#include "i915_gem_clflush.h"
+#include "i915_gemfs.h"
+#include "i915_reset.h"
+#include "i915_trace.h"
+#include "i915_vgpu.h"
+
+#include "intel_drv.h"
+#include "intel_frontbuffer.h"
+#include "intel_mocs.h"
+#include "intel_workarounds.h"
+
 static void i915_gem_flush_free_objects(struct drm_i915_private *i915);
 
 static bool cpu_write_needs_clflush(struct drm_i915_gem_object *obj)
@@ -2993,61 +2996,6 @@ i915_gem_object_pwrite_gtt(struct drm_i915_gem_object *obj,
 	return 0;
 }
 
-static void i915_gem_client_mark_guilty(struct drm_i915_file_private *file_priv,
-					const struct i915_gem_context *ctx)
-{
-	unsigned int score;
-	unsigned long prev_hang;
-
-	if (i915_gem_context_is_banned(ctx))
-		score = I915_CLIENT_SCORE_CONTEXT_BAN;
-	else
-		score = 0;
-
-	prev_hang = xchg(&file_priv->hang_timestamp, jiffies);
-	if (time_before(jiffies, prev_hang + I915_CLIENT_FAST_HANG_JIFFIES))
-		score += I915_CLIENT_SCORE_HANG_FAST;
-
-	if (score) {
-		atomic_add(score, &file_priv->ban_score);
-
-		DRM_DEBUG_DRIVER("client %s: gained %u ban score, now %u\n",
-				 ctx->name, score,
-				 atomic_read(&file_priv->ban_score));
-	}
-}
-
-static void i915_gem_context_mark_guilty(struct i915_gem_context *ctx)
-{
-	unsigned int score;
-	bool banned, bannable;
-
-	atomic_inc(&ctx->guilty_count);
-
-	bannable = i915_gem_context_is_bannable(ctx);
-	score = atomic_add_return(CONTEXT_SCORE_GUILTY, &ctx->ban_score);
-	banned = score >= CONTEXT_SCORE_BAN_THRESHOLD;
-
-	/* Cool contexts don't accumulate client ban score */
-	if (!bannable)
-		return;
-
-	if (banned) {
-		DRM_DEBUG_DRIVER("context %s: guilty %d, score %u, banned\n",
-				 ctx->name, atomic_read(&ctx->guilty_count),
-				 score);
-		i915_gem_context_set_banned(ctx);
-	}
-
-	if (!IS_ERR_OR_NULL(ctx->file_priv))
-		i915_gem_client_mark_guilty(ctx->file_priv, ctx);
-}
-
-static void i915_gem_context_mark_innocent(struct i915_gem_context *ctx)
-{
-	atomic_inc(&ctx->active_count);
-}
-
 struct i915_request *
 i915_gem_find_active_request(struct intel_engine_cs *engine)
 {
@@ -3078,415 +3026,6 @@ i915_gem_find_active_request(struct intel_engine_cs *engine)
 	return active;
 }
 
-/*
- * Ensure irq handler finishes, and not run again.
- * Also return the active request so that we only search for it once.
- */
-struct i915_request *
-i915_gem_reset_prepare_engine(struct intel_engine_cs *engine)
-{
-	struct i915_request *request;
-
-	/*
-	 * During the reset sequence, we must prevent the engine from
-	 * entering RC6. As the context state is undefined until we restart
-	 * the engine, if it does enter RC6 during the reset, the state
-	 * written to the powercontext is undefined and so we may lose
-	 * GPU state upon resume, i.e. fail to restart after a reset.
-	 */
-	intel_uncore_forcewake_get(engine->i915, FORCEWAKE_ALL);
-
-	request = engine->reset.prepare(engine);
-	if (request && request->fence.error == -EIO)
-		request = ERR_PTR(-EIO); /* Previous reset failed! */
-
-	return request;
-}
-
-int i915_gem_reset_prepare(struct drm_i915_private *dev_priv)
-{
-	struct intel_engine_cs *engine;
-	struct i915_request *request;
-	enum intel_engine_id id;
-	int err = 0;
-
-	for_each_engine(engine, dev_priv, id) {
-		request = i915_gem_reset_prepare_engine(engine);
-		if (IS_ERR(request)) {
-			err = PTR_ERR(request);
-			continue;
-		}
-
-		engine->hangcheck.active_request = request;
-	}
-
-	i915_gem_revoke_fences(dev_priv);
-	intel_uc_sanitize(dev_priv);
-
-	return err;
-}
-
-static void engine_skip_context(struct i915_request *request)
-{
-	struct intel_engine_cs *engine = request->engine;
-	struct i915_gem_context *hung_ctx = request->gem_context;
-	struct i915_timeline *timeline = request->timeline;
-	unsigned long flags;
-
-	GEM_BUG_ON(timeline == &engine->timeline);
-
-	spin_lock_irqsave(&engine->timeline.lock, flags);
-	spin_lock(&timeline->lock);
-
-	list_for_each_entry_continue(request, &engine->timeline.requests, link)
-		if (request->gem_context == hung_ctx)
-			i915_request_skip(request, -EIO);
-
-	list_for_each_entry(request, &timeline->requests, link)
-		i915_request_skip(request, -EIO);
-
-	spin_unlock(&timeline->lock);
-	spin_unlock_irqrestore(&engine->timeline.lock, flags);
-}
-
-/* Returns the request if it was guilty of the hang */
-static struct i915_request *
-i915_gem_reset_request(struct intel_engine_cs *engine,
-		       struct i915_request *request,
-		       bool stalled)
-{
-	/* The guilty request will get skipped on a hung engine.
-	 *
-	 * Users of client default contexts do not rely on logical
-	 * state preserved between batches so it is safe to execute
-	 * queued requests following the hang. Non default contexts
-	 * rely on preserved state, so skipping a batch loses the
-	 * evolution of the state and it needs to be considered corrupted.
-	 * Executing more queued batches on top of corrupted state is
-	 * risky. But we take the risk by trying to advance through
-	 * the queued requests in order to make the client behaviour
-	 * more predictable around resets, by not throwing away random
-	 * amount of batches it has prepared for execution. Sophisticated
-	 * clients can use gem_reset_stats_ioctl and dma fence status
-	 * (exported via sync_file info ioctl on explicit fences) to observe
-	 * when it loses the context state and should rebuild accordingly.
-	 *
-	 * The context ban, and ultimately the client ban, mechanism are safety
-	 * valves if client submission ends up resulting in nothing more than
-	 * subsequent hangs.
-	 */
-
-	if (i915_request_completed(request)) {
-		GEM_TRACE("%s pardoned global=%d (fence %llx:%d), current %d\n",
-			  engine->name, request->global_seqno,
-			  request->fence.context, request->fence.seqno,
-			  intel_engine_get_seqno(engine));
-		stalled = false;
-	}
-
-	if (stalled) {
-		i915_gem_context_mark_guilty(request->gem_context);
-		i915_request_skip(request, -EIO);
-
-		/* If this context is now banned, skip all pending requests. */
-		if (i915_gem_context_is_banned(request->gem_context))
-			engine_skip_context(request);
-	} else {
-		/*
-		 * Since this is not the hung engine, it may have advanced
-		 * since the hang declaration. Double check by refinding
-		 * the active request at the time of the reset.
-		 */
-		request = i915_gem_find_active_request(engine);
-		if (request) {
-			unsigned long flags;
-
-			i915_gem_context_mark_innocent(request->gem_context);
-			dma_fence_set_error(&request->fence, -EAGAIN);
-
-			/* Rewind the engine to replay the incomplete rq */
-			spin_lock_irqsave(&engine->timeline.lock, flags);
-			request = list_prev_entry(request, link);
-			if (&request->link == &engine->timeline.requests)
-				request = NULL;
-			spin_unlock_irqrestore(&engine->timeline.lock, flags);
-		}
-	}
-
-	return request;
-}
-
-void i915_gem_reset_engine(struct intel_engine_cs *engine,
-			   struct i915_request *request,
-			   bool stalled)
-{
-	/*
-	 * Make sure this write is visible before we re-enable the interrupt
-	 * handlers on another CPU, as tasklet_enable() resolves to just
-	 * a compiler barrier which is insufficient for our purpose here.
-	 */
-	smp_store_mb(engine->irq_posted, 0);
-
-	if (request)
-		request = i915_gem_reset_request(engine, request, stalled);
-
-	/* Setup the CS to resume from the breadcrumb of the hung request */
-	engine->reset.reset(engine, request);
-}
-
-void i915_gem_reset(struct drm_i915_private *dev_priv,
-		    unsigned int stalled_mask)
-{
-	struct intel_engine_cs *engine;
-	enum intel_engine_id id;
-
-	lockdep_assert_held(&dev_priv->drm.struct_mutex);
-
-	i915_retire_requests(dev_priv);
-
-	for_each_engine(engine, dev_priv, id) {
-		struct intel_context *ce;
-
-		i915_gem_reset_engine(engine,
-				      engine->hangcheck.active_request,
-				      stalled_mask & ENGINE_MASK(id));
-		ce = fetch_and_zero(&engine->last_retired_context);
-		if (ce)
-			intel_context_unpin(ce);
-
-		/*
-		 * Ostensibily, we always want a context loaded for powersaving,
-		 * so if the engine is idle after the reset, send a request
-		 * to load our scratch kernel_context.
-		 *
-		 * More mysteriously, if we leave the engine idle after a reset,
-		 * the next userspace batch may hang, with what appears to be
-		 * an incoherent read by the CS (presumably stale TLB). An
-		 * empty request appears sufficient to paper over the glitch.
-		 */
-		if (intel_engine_is_idle(engine)) {
-			struct i915_request *rq;
-
-			rq = i915_request_alloc(engine,
-						dev_priv->kernel_context);
-			if (!IS_ERR(rq))
-				i915_request_add(rq);
-		}
-	}
-
-	i915_gem_restore_fences(dev_priv);
-}
-
-void i915_gem_reset_finish_engine(struct intel_engine_cs *engine)
-{
-	engine->reset.finish(engine);
-
-	intel_uncore_forcewake_put(engine->i915, FORCEWAKE_ALL);
-}
-
-void i915_gem_reset_finish(struct drm_i915_private *dev_priv)
-{
-	struct intel_engine_cs *engine;
-	enum intel_engine_id id;
-
-	lockdep_assert_held(&dev_priv->drm.struct_mutex);
-
-	for_each_engine(engine, dev_priv, id) {
-		engine->hangcheck.active_request = NULL;
-		i915_gem_reset_finish_engine(engine);
-	}
-}
-
-static void nop_submit_request(struct i915_request *request)
-{
-	GEM_TRACE("%s fence %llx:%d -> -EIO\n",
-		  request->engine->name,
-		  request->fence.context, request->fence.seqno);
-	dma_fence_set_error(&request->fence, -EIO);
-
-	i915_request_submit(request);
-}
-
-static void nop_complete_submit_request(struct i915_request *request)
-{
-	unsigned long flags;
-
-	GEM_TRACE("%s fence %llx:%d -> -EIO\n",
-		  request->engine->name,
-		  request->fence.context, request->fence.seqno);
-	dma_fence_set_error(&request->fence, -EIO);
-
-	spin_lock_irqsave(&request->engine->timeline.lock, flags);
-	__i915_request_submit(request);
-	intel_engine_init_global_seqno(request->engine, request->global_seqno);
-	spin_unlock_irqrestore(&request->engine->timeline.lock, flags);
-}
-
-void i915_gem_set_wedged(struct drm_i915_private *i915)
-{
-	struct i915_gpu_error *error = &i915->gpu_error;
-	struct intel_engine_cs *engine;
-	enum intel_engine_id id;
-
-	mutex_lock(&error->wedge_mutex);
-	if (test_bit(I915_WEDGED, &error->flags)) {
-		mutex_unlock(&error->wedge_mutex);
-		return;
-	}
-
-	if (GEM_SHOW_DEBUG()) {
-		struct drm_printer p = drm_debug_printer(__func__);
-
-		for_each_engine(engine, i915, id)
-			intel_engine_dump(engine, &p, "%s\n", engine->name);
-	}
-
-	GEM_TRACE("start\n");
-
-	/*
-	 * First, stop submission to hw, but do not yet complete requests by
-	 * rolling the global seqno forward (since this would complete requests
-	 * for which we haven't set the fence error to EIO yet).
-	 */
-	for_each_engine(engine, i915, id) {
-		i915_gem_reset_prepare_engine(engine);
-
-		engine->submit_request = nop_submit_request;
-		engine->schedule = NULL;
-	}
-	i915->caps.scheduler = 0;
-
-	/* Even if the GPU reset fails, it should still stop the engines */
-	if (INTEL_GEN(i915) >= 5)
-		intel_gpu_reset(i915, ALL_ENGINES);
-
-	/*
-	 * Make sure no one is running the old callback before we proceed with
-	 * cancelling requests and resetting the completion tracking. Otherwise
-	 * we might submit a request to the hardware which never completes.
-	 */
-	synchronize_rcu();
-
-	for_each_engine(engine, i915, id) {
-		/* Mark all executing requests as skipped */
-		engine->cancel_requests(engine);
-
-		/*
-		 * Only once we've force-cancelled all in-flight requests can we
-		 * start to complete all requests.
-		 */
-		engine->submit_request = nop_complete_submit_request;
-	}
-
-	/*
-	 * Make sure no request can slip through without getting completed by
-	 * either this call here to intel_engine_init_global_seqno, or the one
-	 * in nop_complete_submit_request.
-	 */
-	synchronize_rcu();
-
-	for_each_engine(engine, i915, id) {
-		unsigned long flags;
-
-		/*
-		 * Mark all pending requests as complete so that any concurrent
-		 * (lockless) lookup doesn't try and wait upon the request as we
-		 * reset it.
-		 */
-		spin_lock_irqsave(&engine->timeline.lock, flags);
-		intel_engine_init_global_seqno(engine,
-					       intel_engine_last_submit(engine));
-		spin_unlock_irqrestore(&engine->timeline.lock, flags);
-
-		i915_gem_reset_finish_engine(engine);
-	}
-
-	smp_mb__before_atomic();
-	set_bit(I915_WEDGED, &error->flags);
-
-	GEM_TRACE("end\n");
-	mutex_unlock(&error->wedge_mutex);
-
-	wake_up_all(&error->reset_queue);
-}
-
-bool i915_gem_unset_wedged(struct drm_i915_private *i915)
-{
-	struct i915_gpu_error *error = &i915->gpu_error;
-	struct i915_timeline *tl;
-	bool ret = false;
-
-	lockdep_assert_held(&i915->drm.struct_mutex);
-
-	if (!test_bit(I915_WEDGED, &error->flags))
-		return true;
-
-	mutex_lock(&error->wedge_mutex);
-
-	GEM_TRACE("start\n");
-
-	/*
-	 * Before unwedging, make sure that all pending operations
-	 * are flushed and errored out - we may have requests waiting upon
-	 * third party fences. We marked all inflight requests as EIO, and
-	 * every execbuf since returned EIO, for consistency we want all
-	 * the currently pending requests to also be marked as EIO, which
-	 * is done inside our nop_submit_request - and so we must wait.
-	 *
-	 * No more can be submitted until we reset the wedged bit.
-	 */
-	list_for_each_entry(tl, &i915->gt.timelines, link) {
-		struct i915_request *rq;
-
-		rq = i915_gem_active_peek(&tl->last_request,
-					  &i915->drm.struct_mutex);
-		if (!rq)
-			continue;
-
-		/*
-		 * We can't use our normal waiter as we want to
-		 * avoid recursively trying to handle the current
-		 * reset. The basic dma_fence_default_wait() installs
-		 * a callback for dma_fence_signal(), which is
-		 * triggered by our nop handler (indirectly, the
-		 * callback enables the signaler thread which is
-		 * woken by the nop_submit_request() advancing the seqno
-		 * and when the seqno passes the fence, the signaler
-		 * then signals the fence waking us up).
-		 */
-		if (dma_fence_default_wait(&rq->fence, true,
-					   MAX_SCHEDULE_TIMEOUT) < 0)
-			goto unlock;
-	}
-	i915_retire_requests(i915);
-	GEM_BUG_ON(i915->gt.active_requests);
-
-	if (!intel_gpu_reset(i915, ALL_ENGINES))
-		intel_engines_sanitize(i915);
-
-	/*
-	 * Undo nop_submit_request. We prevent all new i915 requests from
-	 * being queued (by disallowing execbuf whilst wedged) so having
-	 * waited for all active requests above, we know the system is idle
-	 * and do not have to worry about a thread being inside
-	 * engine->submit_request() as we swap over. So unlike installing
-	 * the nop_submit_request on reset, we can do this from normal
-	 * context and do not require stop_machine().
-	 */
-	intel_engines_reset_default_submission(i915);
-	i915_gem_contexts_lost(i915);
-
-	GEM_TRACE("end\n");
-
-	smp_mb__before_atomic(); /* complete takeover before enabling execbuf */
-	clear_bit(I915_WEDGED, &i915->gpu_error.flags);
-	ret = true;
-unlock:
-	mutex_unlock(&i915->gpu_error.wedge_mutex);
-
-	return ret;
-}
-
 static void
 i915_gem_retire_work_handler(struct work_struct *work)
 {
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 719e1ac212c1..d1ca67459bc2 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -38,6 +38,7 @@
 
 #include "i915_drv.h"
 #include "i915_vgpu.h"
+#include "i915_reset.h"
 #include "i915_trace.h"
 #include "intel_drv.h"
 #include "intel_frontbuffer.h"
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index 83e15288ae96..05035fcad266 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -2920,46 +2920,6 @@ static irqreturn_t gen8_irq_handler(int irq, void *arg)
 	return IRQ_HANDLED;
 }
 
-struct wedge_me {
-	struct delayed_work work;
-	struct drm_i915_private *i915;
-	const char *name;
-};
-
-static void wedge_me(struct work_struct *work)
-{
-	struct wedge_me *w = container_of(work, typeof(*w), work.work);
-
-	dev_err(w->i915->drm.dev,
-		"%s timed out, cancelling all in-flight rendering.\n",
-		w->name);
-	i915_gem_set_wedged(w->i915);
-}
-
-static void __init_wedge(struct wedge_me *w,
-			 struct drm_i915_private *i915,
-			 long timeout,
-			 const char *name)
-{
-	w->i915 = i915;
-	w->name = name;
-
-	INIT_DELAYED_WORK_ONSTACK(&w->work, wedge_me);
-	schedule_delayed_work(&w->work, timeout);
-}
-
-static void __fini_wedge(struct wedge_me *w)
-{
-	cancel_delayed_work_sync(&w->work);
-	destroy_delayed_work_on_stack(&w->work);
-	w->i915 = NULL;
-}
-
-#define i915_wedge_on_timeout(W, DEV, TIMEOUT)				\
-	for (__init_wedge((W), (DEV), (TIMEOUT), __func__);		\
-	     (W)->i915;							\
-	     __fini_wedge((W)))
-
 static u32
 gen11_gt_engine_identity(struct drm_i915_private * const i915,
 			 const unsigned int bank, const unsigned int bit)
@@ -3164,204 +3124,6 @@ static irqreturn_t gen11_irq_handler(int irq, void *arg)
 	return IRQ_HANDLED;
 }
 
-static void i915_reset_device(struct drm_i915_private *dev_priv,
-			      u32 engine_mask,
-			      const char *reason)
-{
-	struct i915_gpu_error *error = &dev_priv->gpu_error;
-	struct kobject *kobj = &dev_priv->drm.primary->kdev->kobj;
-	char *error_event[] = { I915_ERROR_UEVENT "=1", NULL };
-	char *reset_event[] = { I915_RESET_UEVENT "=1", NULL };
-	char *reset_done_event[] = { I915_ERROR_UEVENT "=0", NULL };
-	struct wedge_me w;
-
-	kobject_uevent_env(kobj, KOBJ_CHANGE, error_event);
-
-	DRM_DEBUG_DRIVER("resetting chip\n");
-	kobject_uevent_env(kobj, KOBJ_CHANGE, reset_event);
-
-	/* Use a watchdog to ensure that our reset completes */
-	i915_wedge_on_timeout(&w, dev_priv, 5*HZ) {
-		intel_prepare_reset(dev_priv);
-
-		error->reason = reason;
-		error->stalled_mask = engine_mask;
-
-		/* Signal that locked waiters should reset the GPU */
-		smp_mb__before_atomic();
-		set_bit(I915_RESET_HANDOFF, &error->flags);
-		wake_up_all(&error->wait_queue);
-
-		/* Wait for anyone holding the lock to wakeup, without
-		 * blocking indefinitely on struct_mutex.
-		 */
-		do {
-			if (mutex_trylock(&dev_priv->drm.struct_mutex)) {
-				i915_reset(dev_priv, engine_mask, reason);
-				mutex_unlock(&dev_priv->drm.struct_mutex);
-			}
-		} while (wait_on_bit_timeout(&error->flags,
-					     I915_RESET_HANDOFF,
-					     TASK_UNINTERRUPTIBLE,
-					     1));
-
-		error->stalled_mask = 0;
-		error->reason = NULL;
-
-		intel_finish_reset(dev_priv);
-	}
-
-	if (!test_bit(I915_WEDGED, &error->flags))
-		kobject_uevent_env(kobj, KOBJ_CHANGE, reset_done_event);
-}
-
-void i915_clear_error_registers(struct drm_i915_private *dev_priv)
-{
-	u32 eir;
-
-	if (!IS_GEN2(dev_priv))
-		I915_WRITE(PGTBL_ER, I915_READ(PGTBL_ER));
-
-	if (INTEL_GEN(dev_priv) < 4)
-		I915_WRITE(IPEIR, I915_READ(IPEIR));
-	else
-		I915_WRITE(IPEIR_I965, I915_READ(IPEIR_I965));
-
-	I915_WRITE(EIR, I915_READ(EIR));
-	eir = I915_READ(EIR);
-	if (eir) {
-		/*
-		 * some errors might have become stuck,
-		 * mask them.
-		 */
-		DRM_DEBUG_DRIVER("EIR stuck: 0x%08x, masking\n", eir);
-		I915_WRITE(EMR, I915_READ(EMR) | eir);
-		I915_WRITE(IIR, I915_MASTER_ERROR_INTERRUPT);
-	}
-
-	if (INTEL_GEN(dev_priv) >= 8) {
-		I915_WRITE(GEN8_RING_FAULT_REG,
-			   I915_READ(GEN8_RING_FAULT_REG) & ~RING_FAULT_VALID);
-		POSTING_READ(GEN8_RING_FAULT_REG);
-	} else if (INTEL_GEN(dev_priv) >= 6) {
-		struct intel_engine_cs *engine;
-		enum intel_engine_id id;
-
-		for_each_engine(engine, dev_priv, id) {
-			I915_WRITE(RING_FAULT_REG(engine),
-				   I915_READ(RING_FAULT_REG(engine)) &
-				   ~RING_FAULT_VALID);
-		}
-		POSTING_READ(RING_FAULT_REG(dev_priv->engine[RCS]));
-	}
-}
-
-/**
- * i915_handle_error - handle a gpu error
- * @dev_priv: i915 device private
- * @engine_mask: mask representing engines that are hung
- * @flags: control flags
- * @fmt: Error message format string
- *
- * Do some basic checking of register state at error time and
- * dump it to the syslog.  Also call i915_capture_error_state() to make
- * sure we get a record and make it available in debugfs.  Fire a uevent
- * so userspace knows something bad happened (should trigger collection
- * of a ring dump etc.).
- */
-void i915_handle_error(struct drm_i915_private *dev_priv,
-		       u32 engine_mask,
-		       unsigned long flags,
-		       const char *fmt, ...)
-{
-	struct intel_engine_cs *engine;
-	intel_wakeref_t wakeref;
-	unsigned int tmp;
-	char error_msg[80];
-	char *msg = NULL;
-
-	if (fmt) {
-		va_list args;
-
-		va_start(args, fmt);
-		vscnprintf(error_msg, sizeof(error_msg), fmt, args);
-		va_end(args);
-
-		msg = error_msg;
-	}
-
-	/*
-	 * In most cases it's guaranteed that we get here with an RPM
-	 * reference held, for example because there is a pending GPU
-	 * request that won't finish until the reset is done. This
-	 * isn't the case at least when we get here by doing a
-	 * simulated reset via debugfs, so get an RPM reference.
-	 */
-	wakeref = intel_runtime_pm_get(dev_priv);
-
-	engine_mask &= INTEL_INFO(dev_priv)->ring_mask;
-
-	if (flags & I915_ERROR_CAPTURE) {
-		i915_capture_error_state(dev_priv, engine_mask, msg);
-		i915_clear_error_registers(dev_priv);
-	}
-
-	/*
-	 * Try engine reset when available. We fall back to full reset if
-	 * single reset fails.
-	 */
-	if (intel_has_reset_engine(dev_priv) &&
-	    !i915_terminally_wedged(&dev_priv->gpu_error)) {
-		for_each_engine_masked(engine, dev_priv, engine_mask, tmp) {
-			BUILD_BUG_ON(I915_RESET_MODESET >= I915_RESET_ENGINE);
-			if (test_and_set_bit(I915_RESET_ENGINE + engine->id,
-					     &dev_priv->gpu_error.flags))
-				continue;
-
-			if (i915_reset_engine(engine, msg) == 0)
-				engine_mask &= ~intel_engine_flag(engine);
-
-			clear_bit(I915_RESET_ENGINE + engine->id,
-				  &dev_priv->gpu_error.flags);
-			wake_up_bit(&dev_priv->gpu_error.flags,
-				    I915_RESET_ENGINE + engine->id);
-		}
-	}
-
-	if (!engine_mask)
-		goto out;
-
-	/* Full reset needs the mutex, stop any other user trying to do so. */
-	if (test_and_set_bit(I915_RESET_BACKOFF, &dev_priv->gpu_error.flags)) {
-		wait_event(dev_priv->gpu_error.reset_queue,
-			   !test_bit(I915_RESET_BACKOFF,
-				     &dev_priv->gpu_error.flags));
-		goto out;
-	}
-
-	/* Prevent any other reset-engine attempt. */
-	for_each_engine(engine, dev_priv, tmp) {
-		while (test_and_set_bit(I915_RESET_ENGINE + engine->id,
-					&dev_priv->gpu_error.flags))
-			wait_on_bit(&dev_priv->gpu_error.flags,
-				    I915_RESET_ENGINE + engine->id,
-				    TASK_UNINTERRUPTIBLE);
-	}
-
-	i915_reset_device(dev_priv, engine_mask, msg);
-
-	for_each_engine(engine, dev_priv, tmp) {
-		clear_bit(I915_RESET_ENGINE + engine->id,
-			  &dev_priv->gpu_error.flags);
-	}
-
-	clear_bit(I915_RESET_BACKOFF, &dev_priv->gpu_error.flags);
-	wake_up_all(&dev_priv->gpu_error.reset_queue);
-
-out:
-	intel_runtime_pm_put(dev_priv, wakeref);
-}
-
 /* Called from drm generic code, passed 'crtc' which
  * we use as a pipe index
  */
diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
index abd4dacbab8e..fc7ad8dbc36e 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -29,6 +29,7 @@
 #include <linux/sched/signal.h>
 
 #include "i915_drv.h"
+#include "i915_reset.h"
 
 static const char *i915_fence_get_driver_name(struct dma_fence *fence)
 {
diff --git a/drivers/gpu/drm/i915/i915_reset.c b/drivers/gpu/drm/i915/i915_reset.c
new file mode 100644
index 000000000000..d52de3ec9ba0
--- /dev/null
+++ b/drivers/gpu/drm/i915/i915_reset.c
@@ -0,0 +1,1332 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2008-2018 Intel Corporation
+ */
+
+#include <linux/sched/mm.h>
+
+#include "i915_drv.h"
+#include "i915_gpu_error.h"
+#include "i915_reset.h"
+
+#include "intel_guc.h"
+
+static void engine_skip_context(struct i915_request *rq)
+{
+	struct intel_engine_cs *engine = rq->engine;
+	struct i915_gem_context *hung_ctx = rq->gem_context;
+	struct i915_timeline *timeline = rq->timeline;
+	unsigned long flags;
+
+	GEM_BUG_ON(timeline == &engine->timeline);
+
+	spin_lock_irqsave(&engine->timeline.lock, flags);
+	spin_lock(&timeline->lock);
+
+	list_for_each_entry_continue(rq, &engine->timeline.requests, link)
+		if (rq->gem_context == hung_ctx)
+			i915_request_skip(rq, -EIO);
+
+	list_for_each_entry(rq, &timeline->requests, link)
+		i915_request_skip(rq, -EIO);
+
+	spin_unlock(&timeline->lock);
+	spin_unlock_irqrestore(&engine->timeline.lock, flags);
+}
+
+static void client_mark_guilty(struct drm_i915_file_private *file_priv,
+			       const struct i915_gem_context *ctx)
+{
+	unsigned int score;
+	unsigned long prev_hang;
+
+	if (i915_gem_context_is_banned(ctx))
+		score = I915_CLIENT_SCORE_CONTEXT_BAN;
+	else
+		score = 0;
+
+	prev_hang = xchg(&file_priv->hang_timestamp, jiffies);
+	if (time_before(jiffies, prev_hang + I915_CLIENT_FAST_HANG_JIFFIES))
+		score += I915_CLIENT_SCORE_HANG_FAST;
+
+	if (score) {
+		atomic_add(score, &file_priv->ban_score);
+
+		DRM_DEBUG_DRIVER("client %s: gained %u ban score, now %u\n",
+				 ctx->name, score,
+				 atomic_read(&file_priv->ban_score));
+	}
+}
+
+static void context_mark_guilty(struct i915_gem_context *ctx)
+{
+	unsigned int score;
+	bool banned, bannable;
+
+	atomic_inc(&ctx->guilty_count);
+
+	bannable = i915_gem_context_is_bannable(ctx);
+	score = atomic_add_return(CONTEXT_SCORE_GUILTY, &ctx->ban_score);
+	banned = score >= CONTEXT_SCORE_BAN_THRESHOLD;
+
+	/* Cool contexts don't accumulate client ban score */
+	if (!bannable)
+		return;
+
+	if (banned) {
+		DRM_DEBUG_DRIVER("context %s: guilty %d, score %u, banned\n",
+				 ctx->name, atomic_read(&ctx->guilty_count),
+				 score);
+		i915_gem_context_set_banned(ctx);
+	}
+
+	if (!IS_ERR_OR_NULL(ctx->file_priv))
+		client_mark_guilty(ctx->file_priv, ctx);
+}
+
+static void context_mark_innocent(struct i915_gem_context *ctx)
+{
+	atomic_inc(&ctx->active_count);
+}
+
+static void gen3_stop_engine(struct intel_engine_cs *engine)
+{
+	struct drm_i915_private *dev_priv = engine->i915;
+	const u32 base = engine->mmio_base;
+
+	if (intel_engine_stop_cs(engine))
+		DRM_DEBUG_DRIVER("%s: timed out on STOP_RING\n", engine->name);
+
+	I915_WRITE_FW(RING_HEAD(base), I915_READ_FW(RING_TAIL(base)));
+	POSTING_READ_FW(RING_HEAD(base)); /* paranoia */
+
+	I915_WRITE_FW(RING_HEAD(base), 0);
+	I915_WRITE_FW(RING_TAIL(base), 0);
+	POSTING_READ_FW(RING_TAIL(base));
+
+	/* The ring must be empty before it is disabled */
+	I915_WRITE_FW(RING_CTL(base), 0);
+
+	/* Check acts as a post */
+	if (I915_READ_FW(RING_HEAD(base)) != 0)
+		DRM_DEBUG_DRIVER("%s: ring head not parked\n",
+				 engine->name);
+}
+
+static void i915_stop_engines(struct drm_i915_private *i915,
+			      unsigned int engine_mask)
+{
+	struct intel_engine_cs *engine;
+	enum intel_engine_id id;
+
+	if (INTEL_GEN(i915) < 3)
+		return;
+
+	for_each_engine_masked(engine, i915, engine_mask, id)
+		gen3_stop_engine(engine);
+}
+
+static bool i915_in_reset(struct pci_dev *pdev)
+{
+	u8 gdrst;
+
+	pci_read_config_byte(pdev, I915_GDRST, &gdrst);
+	return gdrst & GRDOM_RESET_STATUS;
+}
+
+static int i915_do_reset(struct drm_i915_private *i915,
+			 unsigned int engine_mask,
+			 unsigned int retry)
+{
+	struct pci_dev *pdev = i915->drm.pdev;
+	int err;
+
+	/* Assert reset for at least 20 usec, and wait for acknowledgement. */
+	pci_write_config_byte(pdev, I915_GDRST, GRDOM_RESET_ENABLE);
+	usleep_range(50, 200);
+	err = wait_for(i915_in_reset(pdev), 500);
+
+	/* Clear the reset request. */
+	pci_write_config_byte(pdev, I915_GDRST, 0);
+	usleep_range(50, 200);
+	if (!err)
+		err = wait_for(!i915_in_reset(pdev), 500);
+
+	return err;
+}
+
+static bool g4x_reset_complete(struct pci_dev *pdev)
+{
+	u8 gdrst;
+
+	pci_read_config_byte(pdev, I915_GDRST, &gdrst);
+	return (gdrst & GRDOM_RESET_ENABLE) == 0;
+}
+
+static int g33_do_reset(struct drm_i915_private *i915,
+			unsigned int engine_mask,
+			unsigned int retry)
+{
+	struct pci_dev *pdev = i915->drm.pdev;
+
+	pci_write_config_byte(pdev, I915_GDRST, GRDOM_RESET_ENABLE);
+	return wait_for(g4x_reset_complete(pdev), 500);
+}
+
+static int g4x_do_reset(struct drm_i915_private *dev_priv,
+			unsigned int engine_mask,
+			unsigned int retry)
+{
+	struct pci_dev *pdev = dev_priv->drm.pdev;
+	int ret;
+
+	/* WaVcpClkGateDisableForMediaReset:ctg,elk */
+	I915_WRITE(VDECCLK_GATE_D,
+		   I915_READ(VDECCLK_GATE_D) | VCP_UNIT_CLOCK_GATE_DISABLE);
+	POSTING_READ(VDECCLK_GATE_D);
+
+	pci_write_config_byte(pdev, I915_GDRST,
+			      GRDOM_MEDIA | GRDOM_RESET_ENABLE);
+	ret =  wait_for(g4x_reset_complete(pdev), 500);
+	if (ret) {
+		DRM_DEBUG_DRIVER("Wait for media reset failed\n");
+		goto out;
+	}
+
+	pci_write_config_byte(pdev, I915_GDRST,
+			      GRDOM_RENDER | GRDOM_RESET_ENABLE);
+	ret =  wait_for(g4x_reset_complete(pdev), 500);
+	if (ret) {
+		DRM_DEBUG_DRIVER("Wait for render reset failed\n");
+		goto out;
+	}
+
+out:
+	pci_write_config_byte(pdev, I915_GDRST, 0);
+
+	I915_WRITE(VDECCLK_GATE_D,
+		   I915_READ(VDECCLK_GATE_D) & ~VCP_UNIT_CLOCK_GATE_DISABLE);
+	POSTING_READ(VDECCLK_GATE_D);
+
+	return ret;
+}
+
+static int ironlake_do_reset(struct drm_i915_private *dev_priv,
+			     unsigned int engine_mask,
+			     unsigned int retry)
+{
+	int ret;
+
+	I915_WRITE(ILK_GDSR, ILK_GRDOM_RENDER | ILK_GRDOM_RESET_ENABLE);
+	ret = intel_wait_for_register(dev_priv,
+				      ILK_GDSR, ILK_GRDOM_RESET_ENABLE, 0,
+				      500);
+	if (ret) {
+		DRM_DEBUG_DRIVER("Wait for render reset failed\n");
+		goto out;
+	}
+
+	I915_WRITE(ILK_GDSR, ILK_GRDOM_MEDIA | ILK_GRDOM_RESET_ENABLE);
+	ret = intel_wait_for_register(dev_priv,
+				      ILK_GDSR, ILK_GRDOM_RESET_ENABLE, 0,
+				      500);
+	if (ret) {
+		DRM_DEBUG_DRIVER("Wait for media reset failed\n");
+		goto out;
+	}
+
+out:
+	I915_WRITE(ILK_GDSR, 0);
+	POSTING_READ(ILK_GDSR);
+	return ret;
+}
+
+/* Reset the hardware domains (GENX_GRDOM_*) specified by mask */
+static int gen6_hw_domain_reset(struct drm_i915_private *dev_priv,
+				u32 hw_domain_mask)
+{
+	int err;
+
+	/*
+	 * GEN6_GDRST is not in the gt power well, no need to check
+	 * for fifo space for the write or forcewake the chip for
+	 * the read
+	 */
+	I915_WRITE_FW(GEN6_GDRST, hw_domain_mask);
+
+	/* Wait for the device to ack the reset requests */
+	err = __intel_wait_for_register_fw(dev_priv,
+					   GEN6_GDRST, hw_domain_mask, 0,
+					   500, 0,
+					   NULL);
+	if (err)
+		DRM_DEBUG_DRIVER("Wait for 0x%08x engines reset failed\n",
+				 hw_domain_mask);
+
+	return err;
+}
+
+static int gen6_reset_engines(struct drm_i915_private *i915,
+			      unsigned int engine_mask,
+			      unsigned int retry)
+{
+	struct intel_engine_cs *engine;
+	const u32 hw_engine_mask[I915_NUM_ENGINES] = {
+		[RCS] = GEN6_GRDOM_RENDER,
+		[BCS] = GEN6_GRDOM_BLT,
+		[VCS] = GEN6_GRDOM_MEDIA,
+		[VCS2] = GEN8_GRDOM_MEDIA2,
+		[VECS] = GEN6_GRDOM_VECS,
+	};
+	u32 hw_mask;
+
+	if (engine_mask == ALL_ENGINES) {
+		hw_mask = GEN6_GRDOM_FULL;
+	} else {
+		unsigned int tmp;
+
+		hw_mask = 0;
+		for_each_engine_masked(engine, i915, engine_mask, tmp)
+			hw_mask |= hw_engine_mask[engine->id];
+	}
+
+	return gen6_hw_domain_reset(i915, hw_mask);
+}
+
+static int gen11_reset_engines(struct drm_i915_private *i915,
+			       unsigned int engine_mask,
+			       unsigned int retry)
+{
+	struct intel_engine_cs *engine;
+	const u32 hw_engine_mask[I915_NUM_ENGINES] = {
+		[RCS] = GEN11_GRDOM_RENDER,
+		[BCS] = GEN11_GRDOM_BLT,
+		[VCS] = GEN11_GRDOM_MEDIA,
+		[VCS2] = GEN11_GRDOM_MEDIA2,
+		[VCS3] = GEN11_GRDOM_MEDIA3,
+		[VCS4] = GEN11_GRDOM_MEDIA4,
+		[VECS] = GEN11_GRDOM_VECS,
+		[VECS2] = GEN11_GRDOM_VECS2,
+	};
+	u32 hw_mask;
+
+	BUILD_BUG_ON(VECS2 + 1 != I915_NUM_ENGINES);
+
+	if (engine_mask == ALL_ENGINES) {
+		hw_mask = GEN11_GRDOM_FULL;
+	} else {
+		unsigned int tmp;
+
+		hw_mask = 0;
+		for_each_engine_masked(engine, i915, engine_mask, tmp)
+			hw_mask |= hw_engine_mask[engine->id];
+	}
+
+	return gen6_hw_domain_reset(i915, hw_mask);
+}
+
+static int gen8_engine_reset_prepare(struct intel_engine_cs *engine)
+{
+	struct drm_i915_private *dev_priv = engine->i915;
+	int ret;
+
+	I915_WRITE_FW(RING_RESET_CTL(engine->mmio_base),
+		      _MASKED_BIT_ENABLE(RESET_CTL_REQUEST_RESET));
+
+	ret = __intel_wait_for_register_fw(dev_priv,
+					   RING_RESET_CTL(engine->mmio_base),
+					   RESET_CTL_READY_TO_RESET,
+					   RESET_CTL_READY_TO_RESET,
+					   700, 0,
+					   NULL);
+	if (ret)
+		DRM_ERROR("%s: reset request timeout\n", engine->name);
+
+	return ret;
+}
+
+static void gen8_engine_reset_cancel(struct intel_engine_cs *engine)
+{
+	struct drm_i915_private *dev_priv = engine->i915;
+
+	I915_WRITE_FW(RING_RESET_CTL(engine->mmio_base),
+		      _MASKED_BIT_DISABLE(RESET_CTL_REQUEST_RESET));
+}
+
+static int gen8_reset_engines(struct drm_i915_private *i915,
+			      unsigned int engine_mask,
+			      unsigned int retry)
+{
+	struct intel_engine_cs *engine;
+	const bool reset_non_ready = retry >= 1;
+	unsigned int tmp;
+	int ret;
+
+	for_each_engine_masked(engine, i915, engine_mask, tmp) {
+		ret = gen8_engine_reset_prepare(engine);
+		if (ret && !reset_non_ready)
+			goto skip_reset;
+
+		/*
+		 * If this is not the first failed attempt to prepare,
+		 * we decide to proceed anyway.
+		 *
+		 * By doing so we risk context corruption and with
+		 * some gens (kbl), possible system hang if reset
+		 * happens during active bb execution.
+		 *
+		 * We rather take context corruption instead of
+		 * failed reset with a wedged driver/gpu. And
+		 * active bb execution case should be covered by
+		 * i915_stop_engines we have before the reset.
+		 */
+	}
+
+	if (INTEL_GEN(i915) >= 11)
+		ret = gen11_reset_engines(i915, engine_mask, retry);
+	else
+		ret = gen6_reset_engines(i915, engine_mask, retry);
+
+skip_reset:
+	for_each_engine_masked(engine, i915, engine_mask, tmp)
+		gen8_engine_reset_cancel(engine);
+
+	return ret;
+}
+
+typedef int (*reset_func)(struct drm_i915_private *,
+			  unsigned int engine_mask,
+			  unsigned int retry);
+
+static reset_func intel_get_gpu_reset(struct drm_i915_private *i915)
+{
+	if (!i915_modparams.reset)
+		return NULL;
+
+	if (INTEL_GEN(i915) >= 8)
+		return gen8_reset_engines;
+	else if (INTEL_GEN(i915) >= 6)
+		return gen6_reset_engines;
+	else if (IS_GEN5(i915))
+		return ironlake_do_reset;
+	else if (IS_G4X(i915))
+		return g4x_do_reset;
+	else if (IS_G33(i915) || IS_PINEVIEW(i915))
+		return g33_do_reset;
+	else if (INTEL_GEN(i915) >= 3)
+		return i915_do_reset;
+	else
+		return NULL;
+}
+
+int intel_gpu_reset(struct drm_i915_private *i915, unsigned int engine_mask)
+{
+	reset_func reset = intel_get_gpu_reset(i915);
+	int retry;
+	int ret;
+
+	/*
+	 * We want to perform per-engine reset from atomic context (e.g.
+	 * softirq), which imposes the constraint that we cannot sleep.
+	 * However, experience suggests that spending a bit of time waiting
+	 * for a reset helps in various cases, so for a full-device reset
+	 * we apply the opposite rule and wait if we want to. As we should
+	 * always follow up a failed per-engine reset with a full device reset,
+	 * being a little faster, stricter and more error prone for the
+	 * atomic case seems an acceptable compromise.
+	 *
+	 * Unfortunately this leads to a bimodal routine, when the goal was
+	 * to have a single reset function that worked for resetting any
+	 * number of engines simultaneously.
+	 */
+	might_sleep_if(engine_mask == ALL_ENGINES);
+
+	/*
+	 * If the power well sleeps during the reset, the reset
+	 * request may be dropped and never completes (causing -EIO).
+	 */
+	intel_uncore_forcewake_get(i915, FORCEWAKE_ALL);
+	for (retry = 0; retry < 3; retry++) {
+		/*
+		 * We stop engines, otherwise we might get failed reset and a
+		 * dead gpu (on elk). Also as modern gpu as kbl can suffer
+		 * from system hang if batchbuffer is progressing when
+		 * the reset is issued, regardless of READY_TO_RESET ack.
+		 * Thus assume it is best to stop engines on all gens
+		 * where we have a gpu reset.
+		 *
+		 * WaKBLVECSSemaphoreWaitPoll:kbl (on ALL_ENGINES)
+		 *
+		 * WaMediaResetMainRingCleanup:ctg,elk (presumably)
+		 *
+		 * FIXME: Wa for more modern gens needs to be validated
+		 */
+		i915_stop_engines(i915, engine_mask);
+
+		ret = -ENODEV;
+		if (reset) {
+			GEM_TRACE("engine_mask=%x\n", engine_mask);
+			ret = reset(i915, engine_mask, retry);
+		}
+		if (ret != -ETIMEDOUT || engine_mask != ALL_ENGINES)
+			break;
+
+		cond_resched();
+	}
+	intel_uncore_forcewake_put(i915, FORCEWAKE_ALL);
+
+	return ret;
+}
+
+bool intel_has_gpu_reset(struct drm_i915_private *i915)
+{
+	return intel_get_gpu_reset(i915);
+}
+
+bool intel_has_reset_engine(struct drm_i915_private *i915)
+{
+	return i915->info.has_reset_engine && i915_modparams.reset >= 2;
+}
+
+int intel_reset_guc(struct drm_i915_private *i915)
+{
+	u32 guc_domain =
+		INTEL_GEN(i915) >= 11 ? GEN11_GRDOM_GUC : GEN9_GRDOM_GUC;
+	int ret;
+
+	GEM_BUG_ON(!HAS_GUC(i915));
+
+	intel_uncore_forcewake_get(i915, FORCEWAKE_ALL);
+	ret = gen6_hw_domain_reset(i915, guc_domain);
+	intel_uncore_forcewake_put(i915, FORCEWAKE_ALL);
+
+	return ret;
+}
+
+/*
+ * Ensure irq handler finishes, and not run again.
+ * Also return the active request so that we only search for it once.
+ */
+static struct i915_request *
+reset_prepare_engine(struct intel_engine_cs *engine)
+{
+	struct i915_request *rq;
+
+	/*
+	 * During the reset sequence, we must prevent the engine from
+	 * entering RC6. As the context state is undefined until we restart
+	 * the engine, if it does enter RC6 during the reset, the state
+	 * written to the powercontext is undefined and so we may lose
+	 * GPU state upon resume, i.e. fail to restart after a reset.
+	 */
+	intel_uncore_forcewake_get(engine->i915, FORCEWAKE_ALL);
+
+	rq = engine->reset.prepare(engine);
+	if (rq && rq->fence.error == -EIO)
+		rq = ERR_PTR(-EIO); /* Previous reset failed! */
+
+	return rq;
+}
+
+static int reset_prepare(struct drm_i915_private *i915)
+{
+	struct intel_engine_cs *engine;
+	struct i915_request *rq;
+	enum intel_engine_id id;
+	int err = 0;
+
+	for_each_engine(engine, i915, id) {
+		rq = reset_prepare_engine(engine);
+		if (IS_ERR(rq)) {
+			err = PTR_ERR(rq);
+			continue;
+		}
+
+		engine->hangcheck.active_request = rq;
+	}
+
+	i915_gem_revoke_fences(i915);
+	intel_uc_sanitize(i915);
+
+	return err;
+}
+
+/* Returns the request if it was guilty of the hang */
+static struct i915_request *
+reset_request(struct intel_engine_cs *engine,
+	      struct i915_request *rq,
+	      bool stalled)
+{
+	/*
+	 * The guilty request will get skipped on a hung engine.
+	 *
+	 * Users of client default contexts do not rely on logical
+	 * state preserved between batches so it is safe to execute
+	 * queued requests following the hang. Non default contexts
+	 * rely on preserved state, so skipping a batch loses the
+	 * evolution of the state and it needs to be considered corrupted.
+	 * Executing more queued batches on top of corrupted state is
+	 * risky. But we take the risk by trying to advance through
+	 * the queued requests in order to make the client behaviour
+	 * more predictable around resets, by not throwing away random
+	 * amount of batches it has prepared for execution. Sophisticated
+	 * clients can use gem_reset_stats_ioctl and dma fence status
+	 * (exported via sync_file info ioctl on explicit fences) to observe
+	 * when it loses the context state and should rebuild accordingly.
+	 *
+	 * The context ban, and ultimately the client ban, mechanism are safety
+	 * valves if client submission ends up resulting in nothing more than
+	 * subsequent hangs.
+	 */
+
+	if (i915_request_completed(rq)) {
+		GEM_TRACE("%s pardoned global=%d (fence %llx:%d), current %d\n",
+			  engine->name, rq->global_seqno,
+			  rq->fence.context, rq->fence.seqno,
+			  intel_engine_get_seqno(engine));
+		stalled = false;
+	}
+
+	if (stalled) {
+		context_mark_guilty(rq->gem_context);
+		i915_request_skip(rq, -EIO);
+
+		/* If this context is now banned, skip all pending requests. */
+		if (i915_gem_context_is_banned(rq->gem_context))
+			engine_skip_context(rq);
+	} else {
+		/*
+		 * Since this is not the hung engine, it may have advanced
+		 * since the hang declaration. Double check by refinding
+		 * the active request at the time of the reset.
+		 */
+		rq = i915_gem_find_active_request(engine);
+		if (rq) {
+			unsigned long flags;
+
+			context_mark_innocent(rq->gem_context);
+			dma_fence_set_error(&rq->fence, -EAGAIN);
+
+			/* Rewind the engine to replay the incomplete rq */
+			spin_lock_irqsave(&engine->timeline.lock, flags);
+			rq = list_prev_entry(rq, link);
+			if (&rq->link == &engine->timeline.requests)
+				rq = NULL;
+			spin_unlock_irqrestore(&engine->timeline.lock, flags);
+		}
+	}
+
+	return rq;
+}
+
+static void reset_engine(struct intel_engine_cs *engine,
+			 struct i915_request *rq,
+			 bool stalled)
+{
+	/*
+	 * Make sure this write is visible before we re-enable the interrupt
+	 * handlers on another CPU, as tasklet_enable() resolves to just
+	 * a compiler barrier which is insufficient for our purpose here.
+	 */
+	smp_store_mb(engine->irq_posted, 0);
+
+	if (rq)
+		rq = reset_request(engine, rq, stalled);
+
+	/* Setup the CS to resume from the breadcrumb of the hung request */
+	engine->reset.reset(engine, rq);
+}
+
+static void gt_reset(struct drm_i915_private *i915, unsigned int stalled_mask)
+{
+	struct intel_engine_cs *engine;
+	enum intel_engine_id id;
+
+	lockdep_assert_held(&i915->drm.struct_mutex);
+
+	i915_retire_requests(i915);
+
+	for_each_engine(engine, i915, id) {
+		struct intel_context *ce;
+
+		reset_engine(engine,
+			     engine->hangcheck.active_request,
+			     stalled_mask & ENGINE_MASK(id));
+		ce = fetch_and_zero(&engine->last_retired_context);
+		if (ce)
+			intel_context_unpin(ce);
+
+		/*
+		 * Ostensibily, we always want a context loaded for powersaving,
+		 * so if the engine is idle after the reset, send a request
+		 * to load our scratch kernel_context.
+		 *
+		 * More mysteriously, if we leave the engine idle after a reset,
+		 * the next userspace batch may hang, with what appears to be
+		 * an incoherent read by the CS (presumably stale TLB). An
+		 * empty request appears sufficient to paper over the glitch.
+		 */
+		if (intel_engine_is_idle(engine)) {
+			struct i915_request *rq;
+
+			rq = i915_request_alloc(engine, i915->kernel_context);
+			if (!IS_ERR(rq))
+				i915_request_add(rq);
+		}
+	}
+
+	i915_gem_restore_fences(i915);
+}
+
+static void reset_finish_engine(struct intel_engine_cs *engine)
+{
+	engine->reset.finish(engine);
+
+	intel_uncore_forcewake_put(engine->i915, FORCEWAKE_ALL);
+}
+
+static void reset_finish(struct drm_i915_private *i915)
+{
+	struct intel_engine_cs *engine;
+	enum intel_engine_id id;
+
+	lockdep_assert_held(&i915->drm.struct_mutex);
+
+	for_each_engine(engine, i915, id) {
+		engine->hangcheck.active_request = NULL;
+		reset_finish_engine(engine);
+	}
+}
+
+static void nop_submit_request(struct i915_request *rq)
+{
+	GEM_TRACE("%s fence %llx:%d -> -EIO\n",
+		  rq->engine->name, rq->fence.context, rq->fence.seqno);
+	dma_fence_set_error(&rq->fence, -EIO);
+
+	i915_request_submit(rq);
+}
+
+static void nop_complete_submit_request(struct i915_request *rq)
+{
+	unsigned long flags;
+
+	GEM_TRACE("%s fence %llx:%d -> -EIO\n",
+		  rq->engine->name,
+		  rq->fence.context, rq->fence.seqno);
+	dma_fence_set_error(&rq->fence, -EIO);
+
+	spin_lock_irqsave(&rq->engine->timeline.lock, flags);
+	__i915_request_submit(rq);
+	intel_engine_init_global_seqno(rq->engine, rq->global_seqno);
+	spin_unlock_irqrestore(&rq->engine->timeline.lock, flags);
+}
+
+void i915_gem_set_wedged(struct drm_i915_private *i915)
+{
+	struct i915_gpu_error *error = &i915->gpu_error;
+	struct intel_engine_cs *engine;
+	enum intel_engine_id id;
+
+	mutex_lock(&error->wedge_mutex);
+	if (test_bit(I915_WEDGED, &error->flags)) {
+		mutex_unlock(&error->wedge_mutex);
+		return;
+	}
+
+	if (GEM_SHOW_DEBUG()) {
+		struct drm_printer p = drm_debug_printer(__func__);
+
+		for_each_engine(engine, i915, id)
+			intel_engine_dump(engine, &p, "%s\n", engine->name);
+	}
+
+	GEM_TRACE("start\n");
+
+	/*
+	 * First, stop submission to hw, but do not yet complete requests by
+	 * rolling the global seqno forward (since this would complete requests
+	 * for which we haven't set the fence error to EIO yet).
+	 */
+	for_each_engine(engine, i915, id) {
+		reset_prepare_engine(engine);
+
+		engine->submit_request = nop_submit_request;
+		engine->schedule = NULL;
+	}
+	i915->caps.scheduler = 0;
+
+	/* Even if the GPU reset fails, it should still stop the engines */
+	if (INTEL_GEN(i915) >= 5)
+		intel_gpu_reset(i915, ALL_ENGINES);
+
+	/*
+	 * Make sure no one is running the old callback before we proceed with
+	 * cancelling requests and resetting the completion tracking. Otherwise
+	 * we might submit a request to the hardware which never completes.
+	 */
+	synchronize_rcu();
+
+	for_each_engine(engine, i915, id) {
+		/* Mark all executing requests as skipped */
+		engine->cancel_requests(engine);
+
+		/*
+		 * Only once we've force-cancelled all in-flight requests can we
+		 * start to complete all requests.
+		 */
+		engine->submit_request = nop_complete_submit_request;
+	}
+
+	/*
+	 * Make sure no request can slip through without getting completed by
+	 * either this call here to intel_engine_init_global_seqno, or the one
+	 * in nop_complete_submit_request.
+	 */
+	synchronize_rcu();
+
+	for_each_engine(engine, i915, id) {
+		unsigned long flags;
+
+		/*
+		 * Mark all pending requests as complete so that any concurrent
+		 * (lockless) lookup doesn't try and wait upon the request as we
+		 * reset it.
+		 */
+		spin_lock_irqsave(&engine->timeline.lock, flags);
+		intel_engine_init_global_seqno(engine,
+					       intel_engine_last_submit(engine));
+		spin_unlock_irqrestore(&engine->timeline.lock, flags);
+
+		reset_finish_engine(engine);
+	}
+
+	smp_mb__before_atomic();
+	set_bit(I915_WEDGED, &error->flags);
+
+	GEM_TRACE("end\n");
+	mutex_unlock(&error->wedge_mutex);
+
+	wake_up_all(&error->reset_queue);
+}
+
+bool i915_gem_unset_wedged(struct drm_i915_private *i915)
+{
+	struct i915_gpu_error *error = &i915->gpu_error;
+	struct i915_timeline *tl;
+	bool ret = false;
+
+	lockdep_assert_held(&i915->drm.struct_mutex);
+
+	if (!test_bit(I915_WEDGED, &error->flags))
+		return true;
+
+	mutex_lock(&error->wedge_mutex);
+
+	GEM_TRACE("start\n");
+
+	/*
+	 * Before unwedging, make sure that all pending operations
+	 * are flushed and errored out - we may have requests waiting upon
+	 * third party fences. We marked all inflight requests as EIO, and
+	 * every execbuf since returned EIO, for consistency we want all
+	 * the currently pending requests to also be marked as EIO, which
+	 * is done inside our nop_submit_request - and so we must wait.
+	 *
+	 * No more can be submitted until we reset the wedged bit.
+	 */
+	list_for_each_entry(tl, &i915->gt.timelines, link) {
+		struct i915_request *rq;
+
+		rq = i915_gem_active_peek(&tl->last_request,
+					  &i915->drm.struct_mutex);
+		if (!rq)
+			continue;
+
+		/*
+		 * We can't use our normal waiter as we want to
+		 * avoid recursively trying to handle the current
+		 * reset. The basic dma_fence_default_wait() installs
+		 * a callback for dma_fence_signal(), which is
+		 * triggered by our nop handler (indirectly, the
+		 * callback enables the signaler thread which is
+		 * woken by the nop_submit_request() advancing the seqno
+		 * and when the seqno passes the fence, the signaler
+		 * then signals the fence waking us up).
+		 */
+		if (dma_fence_default_wait(&rq->fence, true,
+					   MAX_SCHEDULE_TIMEOUT) < 0)
+			goto unlock;
+	}
+	i915_retire_requests(i915);
+	GEM_BUG_ON(i915->gt.active_requests);
+
+	if (!intel_gpu_reset(i915, ALL_ENGINES))
+		intel_engines_sanitize(i915);
+
+	/*
+	 * Undo nop_submit_request. We prevent all new i915 requests from
+	 * being queued (by disallowing execbuf whilst wedged) so having
+	 * waited for all active requests above, we know the system is idle
+	 * and do not have to worry about a thread being inside
+	 * engine->submit_request() as we swap over. So unlike installing
+	 * the nop_submit_request on reset, we can do this from normal
+	 * context and do not require stop_machine().
+	 */
+	intel_engines_reset_default_submission(i915);
+	i915_gem_contexts_lost(i915);
+
+	GEM_TRACE("end\n");
+
+	smp_mb__before_atomic(); /* complete takeover before enabling execbuf */
+	clear_bit(I915_WEDGED, &i915->gpu_error.flags);
+	ret = true;
+unlock:
+	mutex_unlock(&i915->gpu_error.wedge_mutex);
+
+	return ret;
+}
+
+/**
+ * i915_reset - reset chip after a hang
+ * @i915: #drm_i915_private to reset
+ * @stalled_mask: mask of the stalled engines with the guilty requests
+ * @reason: user error message for why we are resetting
+ *
+ * Reset the chip.  Useful if a hang is detected. Marks the device as wedged
+ * on failure.
+ *
+ * Caller must hold the struct_mutex.
+ *
+ * Procedure is fairly simple:
+ *   - reset the chip using the reset reg
+ *   - re-init context state
+ *   - re-init hardware status page
+ *   - re-init ring buffer
+ *   - re-init interrupt state
+ *   - re-init display
+ */
+void i915_reset(struct drm_i915_private *i915,
+		unsigned int stalled_mask,
+		const char *reason)
+{
+	struct i915_gpu_error *error = &i915->gpu_error;
+	int ret;
+	int i;
+
+	GEM_TRACE("flags=%lx\n", error->flags);
+
+	might_sleep();
+	lockdep_assert_held(&i915->drm.struct_mutex);
+	GEM_BUG_ON(!test_bit(I915_RESET_BACKOFF, &error->flags));
+
+	if (!test_bit(I915_RESET_HANDOFF, &error->flags))
+		return;
+
+	/* Clear any previous failed attempts at recovery. Time to try again. */
+	if (!i915_gem_unset_wedged(i915))
+		goto wakeup;
+
+	if (reason)
+		dev_notice(i915->drm.dev, "Resetting chip for %s\n", reason);
+	error->reset_count++;
+
+	ret = reset_prepare(i915);
+	if (ret) {
+		dev_err(i915->drm.dev, "GPU recovery failed\n");
+		goto taint;
+	}
+
+	if (!intel_has_gpu_reset(i915)) {
+		if (i915_modparams.reset)
+			dev_err(i915->drm.dev, "GPU reset not supported\n");
+		else
+			DRM_DEBUG_DRIVER("GPU reset disabled\n");
+		goto error;
+	}
+
+	for (i = 0; i < 3; i++) {
+		ret = intel_gpu_reset(i915, ALL_ENGINES);
+		if (ret == 0)
+			break;
+
+		msleep(100);
+	}
+	if (ret) {
+		dev_err(i915->drm.dev, "Failed to reset chip\n");
+		goto taint;
+	}
+
+	/* Ok, now get things going again... */
+
+	/*
+	 * Everything depends on having the GTT running, so we need to start
+	 * there.
+	 */
+	ret = i915_ggtt_enable_hw(i915);
+	if (ret) {
+		DRM_ERROR("Failed to re-enable GGTT following reset (%d)\n",
+			  ret);
+		goto error;
+	}
+
+	gt_reset(i915, stalled_mask);
+	intel_overlay_reset(i915);
+
+	/*
+	 * Next we need to restore the context, but we don't use those
+	 * yet either...
+	 *
+	 * Ring buffer needs to be re-initialized in the KMS case, or if X
+	 * was running at the time of the reset (i.e. we weren't VT
+	 * switched away).
+	 */
+	ret = i915_gem_init_hw(i915);
+	if (ret) {
+		DRM_ERROR("Failed to initialise HW following reset (%d)\n",
+			  ret);
+		goto error;
+	}
+
+	i915_queue_hangcheck(i915);
+
+finish:
+	reset_finish(i915);
+wakeup:
+	clear_bit(I915_RESET_HANDOFF, &error->flags);
+	wake_up_bit(&error->flags, I915_RESET_HANDOFF);
+	return;
+
+taint:
+	/*
+	 * History tells us that if we cannot reset the GPU now, we
+	 * never will. This then impacts everything that is run
+	 * subsequently. On failing the reset, we mark the driver
+	 * as wedged, preventing further execution on the GPU.
+	 * We also want to go one step further and add a taint to the
+	 * kernel so that any subsequent faults can be traced back to
+	 * this failure. This is important for CI, where if the
+	 * GPU/driver fails we would like to reboot and restart testing
+	 * rather than continue on into oblivion. For everyone else,
+	 * the system should still plod along, but they have been warned!
+	 */
+	add_taint(TAINT_WARN, LOCKDEP_STILL_OK);
+error:
+	i915_gem_set_wedged(i915);
+	i915_retire_requests(i915);
+	goto finish;
+}
+
+static inline int intel_gt_reset_engine(struct drm_i915_private *i915,
+					struct intel_engine_cs *engine)
+{
+	return intel_gpu_reset(i915, intel_engine_flag(engine));
+}
+
+/**
+ * i915_reset_engine - reset GPU engine to recover from a hang
+ * @engine: engine to reset
+ * @msg: reason for GPU reset; or NULL for no dev_notice()
+ *
+ * Reset a specific GPU engine. Useful if a hang is detected.
+ * Returns zero on successful reset or otherwise an error code.
+ *
+ * Procedure is:
+ *  - identifies the request that caused the hang and it is dropped
+ *  - reset engine (which will force the engine to idle)
+ *  - re-init/configure engine
+ */
+int i915_reset_engine(struct intel_engine_cs *engine, const char *msg)
+{
+	struct i915_gpu_error *error = &engine->i915->gpu_error;
+	struct i915_request *active_request;
+	int ret;
+
+	GEM_TRACE("%s flags=%lx\n", engine->name, error->flags);
+	GEM_BUG_ON(!test_bit(I915_RESET_ENGINE + engine->id, &error->flags));
+
+	active_request = reset_prepare_engine(engine);
+	if (IS_ERR_OR_NULL(active_request)) {
+		/* Either the previous reset failed, or we pardon the reset. */
+		ret = PTR_ERR(active_request);
+		goto out;
+	}
+
+	if (msg)
+		dev_notice(engine->i915->drm.dev,
+			   "Resetting %s for %s\n", engine->name, msg);
+	error->reset_engine_count[engine->id]++;
+
+	if (!engine->i915->guc.execbuf_client)
+		ret = intel_gt_reset_engine(engine->i915, engine);
+	else
+		ret = intel_guc_reset_engine(&engine->i915->guc, engine);
+	if (ret) {
+		/* If we fail here, we expect to fallback to a global reset */
+		DRM_DEBUG_DRIVER("%sFailed to reset %s, ret=%d\n",
+				 engine->i915->guc.execbuf_client ? "GuC " : "",
+				 engine->name, ret);
+		goto out;
+	}
+
+	/*
+	 * The request that caused the hang is stuck on elsp, we know the
+	 * active request and can drop it, adjust head to skip the offending
+	 * request to resume executing remaining requests in the queue.
+	 */
+	reset_engine(engine, active_request, true);
+
+	/*
+	 * The engine and its registers (and workarounds in case of render)
+	 * have been reset to their default values. Follow the init_ring
+	 * process to program RING_MODE, HWSP and re-enable submission.
+	 */
+	ret = engine->init_hw(engine);
+	if (ret)
+		goto out;
+
+out:
+	intel_engine_cancel_stop_cs(engine);
+	reset_finish_engine(engine);
+	return ret;
+}
+
+struct wedge_me {
+	struct delayed_work work;
+	struct drm_i915_private *i915;
+	const char *name;
+};
+
+static void wedge_me(struct work_struct *work)
+{
+	struct wedge_me *w = container_of(work, typeof(*w), work.work);
+
+	dev_err(w->i915->drm.dev,
+		"%s timed out, cancelling all in-flight rendering.\n",
+		w->name);
+	i915_gem_set_wedged(w->i915);
+}
+
+static void __init_wedge(struct wedge_me *w,
+			 struct drm_i915_private *i915,
+			 long timeout,
+			 const char *name)
+{
+	w->i915 = i915;
+	w->name = name;
+
+	INIT_DELAYED_WORK_ONSTACK(&w->work, wedge_me);
+	schedule_delayed_work(&w->work, timeout);
+}
+
+static void __fini_wedge(struct wedge_me *w)
+{
+	cancel_delayed_work_sync(&w->work);
+	destroy_delayed_work_on_stack(&w->work);
+	w->i915 = NULL;
+}
+
+#define i915_wedge_on_timeout(W, DEV, TIMEOUT)				\
+	for (__init_wedge((W), (DEV), (TIMEOUT), __func__);		\
+	     (W)->i915;							\
+	     __fini_wedge((W)))
+
+static void i915_reset_device(struct drm_i915_private *i915,
+			      u32 engine_mask,
+			      const char *reason)
+{
+	struct i915_gpu_error *error = &i915->gpu_error;
+	struct kobject *kobj = &i915->drm.primary->kdev->kobj;
+	char *error_event[] = { I915_ERROR_UEVENT "=1", NULL };
+	char *reset_event[] = { I915_RESET_UEVENT "=1", NULL };
+	char *reset_done_event[] = { I915_ERROR_UEVENT "=0", NULL };
+	struct wedge_me w;
+
+	kobject_uevent_env(kobj, KOBJ_CHANGE, error_event);
+
+	DRM_DEBUG_DRIVER("resetting chip\n");
+	kobject_uevent_env(kobj, KOBJ_CHANGE, reset_event);
+
+	/* Use a watchdog to ensure that our reset completes */
+	i915_wedge_on_timeout(&w, i915, 5 * HZ) {
+		intel_prepare_reset(i915);
+
+		error->reason = reason;
+		error->stalled_mask = engine_mask;
+
+		/* Signal that locked waiters should reset the GPU */
+		smp_mb__before_atomic();
+		set_bit(I915_RESET_HANDOFF, &error->flags);
+		wake_up_all(&error->wait_queue);
+
+		/*
+		 * Wait for anyone holding the lock to wakeup, without
+		 * blocking indefinitely on struct_mutex.
+		 */
+		do {
+			if (mutex_trylock(&i915->drm.struct_mutex)) {
+				i915_reset(i915, engine_mask, reason);
+				mutex_unlock(&i915->drm.struct_mutex);
+			}
+		} while (wait_on_bit_timeout(&error->flags,
+					     I915_RESET_HANDOFF,
+					     TASK_UNINTERRUPTIBLE,
+					     1));
+
+		error->stalled_mask = 0;
+		error->reason = NULL;
+
+		intel_finish_reset(i915);
+	}
+
+	if (!test_bit(I915_WEDGED, &error->flags))
+		kobject_uevent_env(kobj, KOBJ_CHANGE, reset_done_event);
+}
+
+void i915_clear_error_registers(struct drm_i915_private *dev_priv)
+{
+	u32 eir;
+
+	if (!IS_GEN2(dev_priv))
+		I915_WRITE(PGTBL_ER, I915_READ(PGTBL_ER));
+
+	if (INTEL_GEN(dev_priv) < 4)
+		I915_WRITE(IPEIR, I915_READ(IPEIR));
+	else
+		I915_WRITE(IPEIR_I965, I915_READ(IPEIR_I965));
+
+	I915_WRITE(EIR, I915_READ(EIR));
+	eir = I915_READ(EIR);
+	if (eir) {
+		/*
+		 * some errors might have become stuck,
+		 * mask them.
+		 */
+		DRM_DEBUG_DRIVER("EIR stuck: 0x%08x, masking\n", eir);
+		I915_WRITE(EMR, I915_READ(EMR) | eir);
+		I915_WRITE(IIR, I915_MASTER_ERROR_INTERRUPT);
+	}
+
+	if (INTEL_GEN(dev_priv) >= 8) {
+		I915_WRITE(GEN8_RING_FAULT_REG,
+			   I915_READ(GEN8_RING_FAULT_REG) & ~RING_FAULT_VALID);
+		POSTING_READ(GEN8_RING_FAULT_REG);
+	} else if (INTEL_GEN(dev_priv) >= 6) {
+		struct intel_engine_cs *engine;
+		enum intel_engine_id id;
+
+		for_each_engine(engine, dev_priv, id) {
+			I915_WRITE(RING_FAULT_REG(engine),
+				   I915_READ(RING_FAULT_REG(engine)) &
+				   ~RING_FAULT_VALID);
+		}
+		POSTING_READ(RING_FAULT_REG(dev_priv->engine[RCS]));
+	}
+}
+
+/**
+ * i915_handle_error - handle a gpu error
+ * @i915: i915 device private
+ * @engine_mask: mask representing engines that are hung
+ * @flags: control flags
+ * @fmt: Error message format string
+ *
+ * Do some basic checking of register state at error time and
+ * dump it to the syslog.  Also call i915_capture_error_state() to make
+ * sure we get a record and make it available in debugfs.  Fire a uevent
+ * so userspace knows something bad happened (should trigger collection
+ * of a ring dump etc.).
+ */
+void i915_handle_error(struct drm_i915_private *i915,
+		       u32 engine_mask,
+		       unsigned long flags,
+		       const char *fmt, ...)
+{
+	struct intel_engine_cs *engine;
+	intel_wakeref_t wakeref;
+	unsigned int tmp;
+	char error_msg[80];
+	char *msg = NULL;
+
+	if (fmt) {
+		va_list args;
+
+		va_start(args, fmt);
+		vscnprintf(error_msg, sizeof(error_msg), fmt, args);
+		va_end(args);
+
+		msg = error_msg;
+	}
+
+	/*
+	 * In most cases it's guaranteed that we get here with an RPM
+	 * reference held, for example because there is a pending GPU
+	 * request that won't finish until the reset is done. This
+	 * isn't the case at least when we get here by doing a
+	 * simulated reset via debugfs, so get an RPM reference.
+	 */
+	wakeref = intel_runtime_pm_get(i915);
+
+	engine_mask &= INTEL_INFO(i915)->ring_mask;
+
+	if (flags & I915_ERROR_CAPTURE) {
+		i915_capture_error_state(i915, engine_mask, msg);
+		i915_clear_error_registers(i915);
+	}
+
+	/*
+	 * Try engine reset when available. We fall back to full reset if
+	 * single reset fails.
+	 */
+	if (intel_has_reset_engine(i915) &&
+	    !i915_terminally_wedged(&i915->gpu_error)) {
+		for_each_engine_masked(engine, i915, engine_mask, tmp) {
+			BUILD_BUG_ON(I915_RESET_MODESET >= I915_RESET_ENGINE);
+			if (test_and_set_bit(I915_RESET_ENGINE + engine->id,
+					     &i915->gpu_error.flags))
+				continue;
+
+			if (i915_reset_engine(engine, msg) == 0)
+				engine_mask &= ~intel_engine_flag(engine);
+
+			clear_bit(I915_RESET_ENGINE + engine->id,
+				  &i915->gpu_error.flags);
+			wake_up_bit(&i915->gpu_error.flags,
+				    I915_RESET_ENGINE + engine->id);
+		}
+	}
+
+	if (!engine_mask)
+		goto out;
+
+	/* Full reset needs the mutex, stop any other user trying to do so. */
+	if (test_and_set_bit(I915_RESET_BACKOFF, &i915->gpu_error.flags)) {
+		wait_event(i915->gpu_error.reset_queue,
+			   !test_bit(I915_RESET_BACKOFF,
+				     &i915->gpu_error.flags));
+		goto out;
+	}
+
+	/* Prevent any other reset-engine attempt. */
+	for_each_engine(engine, i915, tmp) {
+		while (test_and_set_bit(I915_RESET_ENGINE + engine->id,
+					&i915->gpu_error.flags))
+			wait_on_bit(&i915->gpu_error.flags,
+				    I915_RESET_ENGINE + engine->id,
+				    TASK_UNINTERRUPTIBLE);
+	}
+
+	i915_reset_device(i915, engine_mask, msg);
+
+	for_each_engine(engine, i915, tmp) {
+		clear_bit(I915_RESET_ENGINE + engine->id,
+			  &i915->gpu_error.flags);
+	}
+
+	clear_bit(I915_RESET_BACKOFF, &i915->gpu_error.flags);
+	wake_up_all(&i915->gpu_error.reset_queue);
+
+out:
+	intel_runtime_pm_put(i915, wakeref);
+}
diff --git a/drivers/gpu/drm/i915/i915_reset.h b/drivers/gpu/drm/i915/i915_reset.h
new file mode 100644
index 000000000000..3ae7e25aa569
--- /dev/null
+++ b/drivers/gpu/drm/i915/i915_reset.h
@@ -0,0 +1,39 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2008-2018 Intel Corporation
+ */
+
+#ifndef I915_RESET_H
+#define I915_RESET_H
+
+#include <linux/compiler.h>
+#include <linux/types.h>
+
+struct drm_i915_private;
+struct intel_engine_cs;
+struct intel_guc;
+
+__printf(4, 5)
+void i915_handle_error(struct drm_i915_private *i915,
+		       u32 engine_mask,
+		       unsigned long flags,
+		       const char *fmt, ...);
+#define I915_ERROR_CAPTURE BIT(0)
+
+void i915_clear_error_registers(struct drm_i915_private *i915);
+
+void i915_reset(struct drm_i915_private *i915,
+		unsigned int stalled_mask,
+		const char *reason);
+int i915_reset_engine(struct intel_engine_cs *engine,
+		      const char *reason);
+
+bool intel_has_gpu_reset(struct drm_i915_private *i915);
+bool intel_has_reset_engine(struct drm_i915_private *i915);
+
+int intel_gpu_reset(struct drm_i915_private *i915, u32 engine_mask);
+
+int intel_reset_guc(struct drm_i915_private *i915);
+
+#endif /* I915_RESET_H */
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index 35d43b34b6e1..11504a5dbd23 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -33,13 +33,7 @@
 #include <linux/vgaarb.h>
 #include <drm/drm_edid.h>
 #include <drm/drmP.h>
-#include "intel_drv.h"
-#include "intel_frontbuffer.h"
 #include <drm/i915_drm.h>
-#include "i915_drv.h"
-#include "i915_gem_clflush.h"
-#include "intel_dsi.h"
-#include "i915_trace.h"
 #include <drm/drm_atomic.h>
 #include <drm/drm_atomic_helper.h>
 #include <drm/drm_dp_helper.h>
@@ -50,6 +44,15 @@
 #include <linux/dma_remapping.h>
 #include <linux/reservation.h>
 
+#include "intel_drv.h"
+#include "intel_dsi.h"
+#include "intel_frontbuffer.h"
+
+#include "i915_drv.h"
+#include "i915_gem_clflush.h"
+#include "i915_reset.h"
+#include "i915_trace.h"
+
 /* Primary plane formats for gen <= 3 */
 static const uint32_t i8xx_primary_formats[] = {
 	DRM_FORMAT_C8,
diff --git a/drivers/gpu/drm/i915/intel_guc.h b/drivers/gpu/drm/i915/intel_guc.h
index ad42faf48c46..fa06a3c05a20 100644
--- a/drivers/gpu/drm/i915/intel_guc.h
+++ b/drivers/gpu/drm/i915/intel_guc.h
@@ -187,4 +187,7 @@ static inline void intel_guc_disable_msg(struct intel_guc *guc, u32 mask)
 	spin_unlock_irq(&guc->irq_lock);
 }
 
+int intel_guc_reset_engine(struct intel_guc *guc,
+			   struct intel_engine_cs *engine);
+
 #endif
diff --git a/drivers/gpu/drm/i915/intel_hangcheck.c b/drivers/gpu/drm/i915/intel_hangcheck.c
index e26d05a46451..c94a46525b0e 100644
--- a/drivers/gpu/drm/i915/intel_hangcheck.c
+++ b/drivers/gpu/drm/i915/intel_hangcheck.c
@@ -23,6 +23,7 @@
  */
 
 #include "i915_drv.h"
+#include "i915_reset.h"
 
 static bool
 ipehr_is_semaphore_wait(struct intel_engine_cs *engine, u32 ipehr)
diff --git a/drivers/gpu/drm/i915/intel_uc.c b/drivers/gpu/drm/i915/intel_uc.c
index b1b3e81b6e24..4b6dc81486c0 100644
--- a/drivers/gpu/drm/i915/intel_uc.c
+++ b/drivers/gpu/drm/i915/intel_uc.c
@@ -26,6 +26,7 @@
 #include "intel_guc_submission.h"
 #include "intel_guc.h"
 #include "i915_drv.h"
+#include "i915_reset.h"
 
 static void guc_free_load_err_log(struct intel_guc *guc);
 
diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c
index e611dfb2543e..8077a53a1863 100644
--- a/drivers/gpu/drm/i915/intel_uncore.c
+++ b/drivers/gpu/drm/i915/intel_uncore.c
@@ -1715,267 +1715,6 @@ int i915_reg_read_ioctl(struct drm_device *dev,
 	return ret;
 }
 
-static void gen3_stop_engine(struct intel_engine_cs *engine)
-{
-	struct drm_i915_private *dev_priv = engine->i915;
-	const u32 base = engine->mmio_base;
-
-	if (intel_engine_stop_cs(engine))
-		DRM_DEBUG_DRIVER("%s: timed out on STOP_RING\n", engine->name);
-
-	I915_WRITE_FW(RING_HEAD(base), I915_READ_FW(RING_TAIL(base)));
-	POSTING_READ_FW(RING_HEAD(base)); /* paranoia */
-
-	I915_WRITE_FW(RING_HEAD(base), 0);
-	I915_WRITE_FW(RING_TAIL(base), 0);
-	POSTING_READ_FW(RING_TAIL(base));
-
-	/* The ring must be empty before it is disabled */
-	I915_WRITE_FW(RING_CTL(base), 0);
-
-	/* Check acts as a post */
-	if (I915_READ_FW(RING_HEAD(base)) != 0)
-		DRM_DEBUG_DRIVER("%s: ring head not parked\n",
-				 engine->name);
-}
-
-static void i915_stop_engines(struct drm_i915_private *dev_priv,
-			      unsigned int engine_mask)
-{
-	struct intel_engine_cs *engine;
-	enum intel_engine_id id;
-
-	if (INTEL_GEN(dev_priv) < 3)
-		return;
-
-	for_each_engine_masked(engine, dev_priv, engine_mask, id)
-		gen3_stop_engine(engine);
-}
-
-static bool i915_in_reset(struct pci_dev *pdev)
-{
-	u8 gdrst;
-
-	pci_read_config_byte(pdev, I915_GDRST, &gdrst);
-	return gdrst & GRDOM_RESET_STATUS;
-}
-
-static int i915_do_reset(struct drm_i915_private *dev_priv,
-			 unsigned int engine_mask,
-			 unsigned int retry)
-{
-	struct pci_dev *pdev = dev_priv->drm.pdev;
-	int err;
-
-	/* Assert reset for at least 20 usec, and wait for acknowledgement. */
-	pci_write_config_byte(pdev, I915_GDRST, GRDOM_RESET_ENABLE);
-	usleep_range(50, 200);
-	err = wait_for(i915_in_reset(pdev), 500);
-
-	/* Clear the reset request. */
-	pci_write_config_byte(pdev, I915_GDRST, 0);
-	usleep_range(50, 200);
-	if (!err)
-		err = wait_for(!i915_in_reset(pdev), 500);
-
-	return err;
-}
-
-static bool g4x_reset_complete(struct pci_dev *pdev)
-{
-	u8 gdrst;
-
-	pci_read_config_byte(pdev, I915_GDRST, &gdrst);
-	return (gdrst & GRDOM_RESET_ENABLE) == 0;
-}
-
-static int g33_do_reset(struct drm_i915_private *dev_priv,
-			unsigned int engine_mask,
-			unsigned int retry)
-{
-	struct pci_dev *pdev = dev_priv->drm.pdev;
-
-	pci_write_config_byte(pdev, I915_GDRST, GRDOM_RESET_ENABLE);
-	return wait_for(g4x_reset_complete(pdev), 500);
-}
-
-static int g4x_do_reset(struct drm_i915_private *dev_priv,
-			unsigned int engine_mask,
-			unsigned int retry)
-{
-	struct pci_dev *pdev = dev_priv->drm.pdev;
-	int ret;
-
-	/* WaVcpClkGateDisableForMediaReset:ctg,elk */
-	I915_WRITE(VDECCLK_GATE_D,
-		   I915_READ(VDECCLK_GATE_D) | VCP_UNIT_CLOCK_GATE_DISABLE);
-	POSTING_READ(VDECCLK_GATE_D);
-
-	pci_write_config_byte(pdev, I915_GDRST,
-			      GRDOM_MEDIA | GRDOM_RESET_ENABLE);
-	ret =  wait_for(g4x_reset_complete(pdev), 500);
-	if (ret) {
-		DRM_DEBUG_DRIVER("Wait for media reset failed\n");
-		goto out;
-	}
-
-	pci_write_config_byte(pdev, I915_GDRST,
-			      GRDOM_RENDER | GRDOM_RESET_ENABLE);
-	ret =  wait_for(g4x_reset_complete(pdev), 500);
-	if (ret) {
-		DRM_DEBUG_DRIVER("Wait for render reset failed\n");
-		goto out;
-	}
-
-out:
-	pci_write_config_byte(pdev, I915_GDRST, 0);
-
-	I915_WRITE(VDECCLK_GATE_D,
-		   I915_READ(VDECCLK_GATE_D) & ~VCP_UNIT_CLOCK_GATE_DISABLE);
-	POSTING_READ(VDECCLK_GATE_D);
-
-	return ret;
-}
-
-static int ironlake_do_reset(struct drm_i915_private *dev_priv,
-			     unsigned int engine_mask,
-			     unsigned int retry)
-{
-	int ret;
-
-	I915_WRITE(ILK_GDSR, ILK_GRDOM_RENDER | ILK_GRDOM_RESET_ENABLE);
-	ret = intel_wait_for_register(dev_priv,
-				      ILK_GDSR, ILK_GRDOM_RESET_ENABLE, 0,
-				      500);
-	if (ret) {
-		DRM_DEBUG_DRIVER("Wait for render reset failed\n");
-		goto out;
-	}
-
-	I915_WRITE(ILK_GDSR, ILK_GRDOM_MEDIA | ILK_GRDOM_RESET_ENABLE);
-	ret = intel_wait_for_register(dev_priv,
-				      ILK_GDSR, ILK_GRDOM_RESET_ENABLE, 0,
-				      500);
-	if (ret) {
-		DRM_DEBUG_DRIVER("Wait for media reset failed\n");
-		goto out;
-	}
-
-out:
-	I915_WRITE(ILK_GDSR, 0);
-	POSTING_READ(ILK_GDSR);
-	return ret;
-}
-
-/* Reset the hardware domains (GENX_GRDOM_*) specified by mask */
-static int gen6_hw_domain_reset(struct drm_i915_private *dev_priv,
-				u32 hw_domain_mask)
-{
-	int err;
-
-	/* GEN6_GDRST is not in the gt power well, no need to check
-	 * for fifo space for the write or forcewake the chip for
-	 * the read
-	 */
-	__raw_i915_write32(dev_priv, GEN6_GDRST, hw_domain_mask);
-
-	/* Wait for the device to ack the reset requests */
-	err = __intel_wait_for_register_fw(dev_priv,
-					   GEN6_GDRST, hw_domain_mask, 0,
-					   500, 0,
-					   NULL);
-	if (err)
-		DRM_DEBUG_DRIVER("Wait for 0x%08x engines reset failed\n",
-				 hw_domain_mask);
-
-	return err;
-}
-
-/**
- * gen6_reset_engines - reset individual engines
- * @dev_priv: i915 device
- * @engine_mask: mask of intel_ring_flag() engines or ALL_ENGINES for full reset
- * @retry: the count of of previous attempts to reset.
- *
- * This function will reset the individual engines that are set in engine_mask.
- * If you provide ALL_ENGINES as mask, full global domain reset will be issued.
- *
- * Note: It is responsibility of the caller to handle the difference between
- * asking full domain reset versus reset for all available individual engines.
- *
- * Returns 0 on success, nonzero on error.
- */
-static int gen6_reset_engines(struct drm_i915_private *dev_priv,
-			      unsigned int engine_mask,
-			      unsigned int retry)
-{
-	struct intel_engine_cs *engine;
-	const u32 hw_engine_mask[I915_NUM_ENGINES] = {
-		[RCS] = GEN6_GRDOM_RENDER,
-		[BCS] = GEN6_GRDOM_BLT,
-		[VCS] = GEN6_GRDOM_MEDIA,
-		[VCS2] = GEN8_GRDOM_MEDIA2,
-		[VECS] = GEN6_GRDOM_VECS,
-	};
-	u32 hw_mask;
-
-	if (engine_mask == ALL_ENGINES) {
-		hw_mask = GEN6_GRDOM_FULL;
-	} else {
-		unsigned int tmp;
-
-		hw_mask = 0;
-		for_each_engine_masked(engine, dev_priv, engine_mask, tmp)
-			hw_mask |= hw_engine_mask[engine->id];
-	}
-
-	return gen6_hw_domain_reset(dev_priv, hw_mask);
-}
-
-/**
- * gen11_reset_engines - reset individual engines
- * @dev_priv: i915 device
- * @engine_mask: mask of intel_ring_flag() engines or ALL_ENGINES for full reset
- *
- * This function will reset the individual engines that are set in engine_mask.
- * If you provide ALL_ENGINES as mask, full global domain reset will be issued.
- *
- * Note: It is responsibility of the caller to handle the difference between
- * asking full domain reset versus reset for all available individual engines.
- *
- * Returns 0 on success, nonzero on error.
- */
-static int gen11_reset_engines(struct drm_i915_private *dev_priv,
-			       unsigned int engine_mask)
-{
-	struct intel_engine_cs *engine;
-	const u32 hw_engine_mask[I915_NUM_ENGINES] = {
-		[RCS] = GEN11_GRDOM_RENDER,
-		[BCS] = GEN11_GRDOM_BLT,
-		[VCS] = GEN11_GRDOM_MEDIA,
-		[VCS2] = GEN11_GRDOM_MEDIA2,
-		[VCS3] = GEN11_GRDOM_MEDIA3,
-		[VCS4] = GEN11_GRDOM_MEDIA4,
-		[VECS] = GEN11_GRDOM_VECS,
-		[VECS2] = GEN11_GRDOM_VECS2,
-	};
-	u32 hw_mask;
-
-	BUILD_BUG_ON(VECS2 + 1 != I915_NUM_ENGINES);
-
-	if (engine_mask == ALL_ENGINES) {
-		hw_mask = GEN11_GRDOM_FULL;
-	} else {
-		unsigned int tmp;
-
-		hw_mask = 0;
-		for_each_engine_masked(engine, dev_priv, engine_mask, tmp)
-			hw_mask |= hw_engine_mask[engine->id];
-	}
-
-	return gen6_hw_domain_reset(dev_priv, hw_mask);
-}
-
 /**
  * __intel_wait_for_register_fw - wait until register matches expected state
  * @dev_priv: the i915 device
@@ -2086,196 +1825,6 @@ int __intel_wait_for_register(struct drm_i915_private *dev_priv,
 	return ret;
 }
 
-static int gen8_engine_reset_prepare(struct intel_engine_cs *engine)
-{
-	struct drm_i915_private *dev_priv = engine->i915;
-	int ret;
-
-	I915_WRITE_FW(RING_RESET_CTL(engine->mmio_base),
-		      _MASKED_BIT_ENABLE(RESET_CTL_REQUEST_RESET));
-
-	ret = __intel_wait_for_register_fw(dev_priv,
-					   RING_RESET_CTL(engine->mmio_base),
-					   RESET_CTL_READY_TO_RESET,
-					   RESET_CTL_READY_TO_RESET,
-					   700, 0,
-					   NULL);
-	if (ret)
-		DRM_ERROR("%s: reset request timeout\n", engine->name);
-
-	return ret;
-}
-
-static void gen8_engine_reset_cancel(struct intel_engine_cs *engine)
-{
-	struct drm_i915_private *dev_priv = engine->i915;
-
-	I915_WRITE_FW(RING_RESET_CTL(engine->mmio_base),
-		      _MASKED_BIT_DISABLE(RESET_CTL_REQUEST_RESET));
-}
-
-static int reset_engines(struct drm_i915_private *i915,
-			 unsigned int engine_mask,
-			 unsigned int retry)
-{
-	if (INTEL_GEN(i915) >= 11)
-		return gen11_reset_engines(i915, engine_mask);
-	else
-		return gen6_reset_engines(i915, engine_mask, retry);
-}
-
-static int gen8_reset_engines(struct drm_i915_private *dev_priv,
-			      unsigned int engine_mask,
-			      unsigned int retry)
-{
-	struct intel_engine_cs *engine;
-	const bool reset_non_ready = retry >= 1;
-	unsigned int tmp;
-	int ret;
-
-	for_each_engine_masked(engine, dev_priv, engine_mask, tmp) {
-		ret = gen8_engine_reset_prepare(engine);
-		if (ret && !reset_non_ready)
-			goto skip_reset;
-
-		/*
-		 * If this is not the first failed attempt to prepare,
-		 * we decide to proceed anyway.
-		 *
-		 * By doing so we risk context corruption and with
-		 * some gens (kbl), possible system hang if reset
-		 * happens during active bb execution.
-		 *
-		 * We rather take context corruption instead of
-		 * failed reset with a wedged driver/gpu. And
-		 * active bb execution case should be covered by
-		 * i915_stop_engines we have before the reset.
-		 */
-	}
-
-	ret = reset_engines(dev_priv, engine_mask, retry);
-
-skip_reset:
-	for_each_engine_masked(engine, dev_priv, engine_mask, tmp)
-		gen8_engine_reset_cancel(engine);
-
-	return ret;
-}
-
-typedef int (*reset_func)(struct drm_i915_private *,
-			  unsigned int engine_mask, unsigned int retry);
-
-static reset_func intel_get_gpu_reset(struct drm_i915_private *dev_priv)
-{
-	if (!i915_modparams.reset)
-		return NULL;
-
-	if (INTEL_GEN(dev_priv) >= 8)
-		return gen8_reset_engines;
-	else if (INTEL_GEN(dev_priv) >= 6)
-		return gen6_reset_engines;
-	else if (IS_GEN5(dev_priv))
-		return ironlake_do_reset;
-	else if (IS_G4X(dev_priv))
-		return g4x_do_reset;
-	else if (IS_G33(dev_priv) || IS_PINEVIEW(dev_priv))
-		return g33_do_reset;
-	else if (INTEL_GEN(dev_priv) >= 3)
-		return i915_do_reset;
-	else
-		return NULL;
-}
-
-int intel_gpu_reset(struct drm_i915_private *dev_priv,
-		    const unsigned int engine_mask)
-{
-	reset_func reset = intel_get_gpu_reset(dev_priv);
-	unsigned int retry;
-	int ret;
-
-	GEM_BUG_ON(!engine_mask);
-
-	/*
-	 * We want to perform per-engine reset from atomic context (e.g.
-	 * softirq), which imposes the constraint that we cannot sleep.
-	 * However, experience suggests that spending a bit of time waiting
-	 * for a reset helps in various cases, so for a full-device reset
-	 * we apply the opposite rule and wait if we want to. As we should
-	 * always follow up a failed per-engine reset with a full device reset,
-	 * being a little faster, stricter and more error prone for the
-	 * atomic case seems an acceptable compromise.
-	 *
-	 * Unfortunately this leads to a bimodal routine, when the goal was
-	 * to have a single reset function that worked for resetting any
-	 * number of engines simultaneously.
-	 */
-	might_sleep_if(engine_mask == ALL_ENGINES);
-
-	/*
-	 * If the power well sleeps during the reset, the reset
-	 * request may be dropped and never completes (causing -EIO).
-	 */
-	intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
-	for (retry = 0; retry < 3; retry++) {
-
-		/*
-		 * We stop engines, otherwise we might get failed reset and a
-		 * dead gpu (on elk). Also as modern gpu as kbl can suffer
-		 * from system hang if batchbuffer is progressing when
-		 * the reset is issued, regardless of READY_TO_RESET ack.
-		 * Thus assume it is best to stop engines on all gens
-		 * where we have a gpu reset.
-		 *
-		 * WaKBLVECSSemaphoreWaitPoll:kbl (on ALL_ENGINES)
-		 *
-		 * WaMediaResetMainRingCleanup:ctg,elk (presumably)
-		 *
-		 * FIXME: Wa for more modern gens needs to be validated
-		 */
-		i915_stop_engines(dev_priv, engine_mask);
-
-		ret = -ENODEV;
-		if (reset) {
-			ret = reset(dev_priv, engine_mask, retry);
-			GEM_TRACE("engine_mask=%x, ret=%d, retry=%d\n",
-				  engine_mask, ret, retry);
-		}
-		if (ret != -ETIMEDOUT || engine_mask != ALL_ENGINES)
-			break;
-
-		cond_resched();
-	}
-	intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
-
-	return ret;
-}
-
-bool intel_has_gpu_reset(struct drm_i915_private *dev_priv)
-{
-	return intel_get_gpu_reset(dev_priv) != NULL;
-}
-
-bool intel_has_reset_engine(struct drm_i915_private *dev_priv)
-{
-	return (dev_priv->info.has_reset_engine &&
-		i915_modparams.reset >= 2);
-}
-
-int intel_reset_guc(struct drm_i915_private *dev_priv)
-{
-	u32 guc_domain = INTEL_GEN(dev_priv) >= 11 ? GEN11_GRDOM_GUC :
-						     GEN9_GRDOM_GUC;
-	int ret;
-
-	GEM_BUG_ON(!HAS_GUC(dev_priv));
-
-	intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
-	ret = gen6_hw_domain_reset(dev_priv, guc_domain);
-	intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
-
-	return ret;
-}
-
 bool intel_uncore_unclaimed_mmio(struct drm_i915_private *dev_priv)
 {
 	return check_for_unclaimed_mmio(dev_priv);
diff --git a/drivers/gpu/drm/i915/selftests/intel_lrc.c b/drivers/gpu/drm/i915/selftests/intel_lrc.c
index 95f7e9364947..b6e11b58b3c9 100644
--- a/drivers/gpu/drm/i915/selftests/intel_lrc.c
+++ b/drivers/gpu/drm/i915/selftests/intel_lrc.c
@@ -4,6 +4,8 @@
  * Copyright © 2018 Intel Corporation
  */
 
+#include "../i915_reset.h"
+
 #include "../i915_selftest.h"
 #include "igt_flush_test.h"
 
diff --git a/drivers/gpu/drm/i915/selftests/intel_workarounds.c b/drivers/gpu/drm/i915/selftests/intel_workarounds.c
index e2b542c6bfb1..6688bdd8b6eb 100644
--- a/drivers/gpu/drm/i915/selftests/intel_workarounds.c
+++ b/drivers/gpu/drm/i915/selftests/intel_workarounds.c
@@ -5,6 +5,7 @@
  */
 
 #include "../i915_selftest.h"
+#include "../i915_reset.h"
 
 #include "igt_wedge_me.h"
 #include "mock_context.h"
-- 
2.19.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 106+ messages in thread

* [PATCH 31/40] drm/i915: Make all GPU resets atomic
  2018-09-19 19:55 [PATCH 01/40] drm: Use default dma_fence hooks where possible for null syncobj Chris Wilson
                   ` (28 preceding siblings ...)
  2018-09-19 19:55 ` [PATCH 30/40] drm/i915: Pull all the reset functionality together into i915_reset.c Chris Wilson
@ 2018-09-19 19:55 ` Chris Wilson
  2018-09-19 19:55 ` [PATCH 32/40] drm/i915: Introduce the i915_user_extension_method Chris Wilson
                   ` (12 subsequent siblings)
  42 siblings, 0 replies; 106+ messages in thread
From: Chris Wilson @ 2018-09-19 19:55 UTC (permalink / raw)
  To: intel-gfx

In preparation for the next few commits, make resetting the GPU atomic.
Currently, we have prepared gen6+ for atomic resetting of individual
engines, but now there is a requirement to perform the whole device
level reset (just the register poking) from inside an atomic context.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_reset.c | 50 +++++++++++++++++--------------
 1 file changed, 27 insertions(+), 23 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_reset.c b/drivers/gpu/drm/i915/i915_reset.c
index d52de3ec9ba0..7a19fb78601b 100644
--- a/drivers/gpu/drm/i915/i915_reset.c
+++ b/drivers/gpu/drm/i915/i915_reset.c
@@ -144,14 +144,14 @@ static int i915_do_reset(struct drm_i915_private *i915,
 
 	/* Assert reset for at least 20 usec, and wait for acknowledgement. */
 	pci_write_config_byte(pdev, I915_GDRST, GRDOM_RESET_ENABLE);
-	usleep_range(50, 200);
-	err = wait_for(i915_in_reset(pdev), 500);
+	udelay(50);
+	err = wait_for_atomic(i915_in_reset(pdev), 50);
 
 	/* Clear the reset request. */
 	pci_write_config_byte(pdev, I915_GDRST, 0);
-	usleep_range(50, 200);
+	udelay(50);
 	if (!err)
-		err = wait_for(!i915_in_reset(pdev), 500);
+		err = wait_for_atomic(!i915_in_reset(pdev), 50);
 
 	return err;
 }
@@ -171,7 +171,7 @@ static int g33_do_reset(struct drm_i915_private *i915,
 	struct pci_dev *pdev = i915->drm.pdev;
 
 	pci_write_config_byte(pdev, I915_GDRST, GRDOM_RESET_ENABLE);
-	return wait_for(g4x_reset_complete(pdev), 500);
+	return wait_for_atomic(g4x_reset_complete(pdev), 50);
 }
 
 static int g4x_do_reset(struct drm_i915_private *dev_priv,
@@ -182,13 +182,13 @@ static int g4x_do_reset(struct drm_i915_private *dev_priv,
 	int ret;
 
 	/* WaVcpClkGateDisableForMediaReset:ctg,elk */
-	I915_WRITE(VDECCLK_GATE_D,
-		   I915_READ(VDECCLK_GATE_D) | VCP_UNIT_CLOCK_GATE_DISABLE);
-	POSTING_READ(VDECCLK_GATE_D);
+	I915_WRITE_FW(VDECCLK_GATE_D,
+		      I915_READ(VDECCLK_GATE_D) | VCP_UNIT_CLOCK_GATE_DISABLE);
+	POSTING_READ_FW(VDECCLK_GATE_D);
 
 	pci_write_config_byte(pdev, I915_GDRST,
 			      GRDOM_MEDIA | GRDOM_RESET_ENABLE);
-	ret =  wait_for(g4x_reset_complete(pdev), 500);
+	ret =  wait_for_atomic(g4x_reset_complete(pdev), 50);
 	if (ret) {
 		DRM_DEBUG_DRIVER("Wait for media reset failed\n");
 		goto out;
@@ -196,7 +196,7 @@ static int g4x_do_reset(struct drm_i915_private *dev_priv,
 
 	pci_write_config_byte(pdev, I915_GDRST,
 			      GRDOM_RENDER | GRDOM_RESET_ENABLE);
-	ret =  wait_for(g4x_reset_complete(pdev), 500);
+	ret =  wait_for_atomic(g4x_reset_complete(pdev), 50);
 	if (ret) {
 		DRM_DEBUG_DRIVER("Wait for render reset failed\n");
 		goto out;
@@ -205,9 +205,9 @@ static int g4x_do_reset(struct drm_i915_private *dev_priv,
 out:
 	pci_write_config_byte(pdev, I915_GDRST, 0);
 
-	I915_WRITE(VDECCLK_GATE_D,
-		   I915_READ(VDECCLK_GATE_D) & ~VCP_UNIT_CLOCK_GATE_DISABLE);
-	POSTING_READ(VDECCLK_GATE_D);
+	I915_WRITE_FW(VDECCLK_GATE_D,
+		      I915_READ(VDECCLK_GATE_D) & ~VCP_UNIT_CLOCK_GATE_DISABLE);
+	POSTING_READ_FW(VDECCLK_GATE_D);
 
 	return ret;
 }
@@ -218,27 +218,29 @@ static int ironlake_do_reset(struct drm_i915_private *dev_priv,
 {
 	int ret;
 
-	I915_WRITE(ILK_GDSR, ILK_GRDOM_RENDER | ILK_GRDOM_RESET_ENABLE);
-	ret = intel_wait_for_register(dev_priv,
-				      ILK_GDSR, ILK_GRDOM_RESET_ENABLE, 0,
-				      500);
+	I915_WRITE_FW(ILK_GDSR, ILK_GRDOM_RENDER | ILK_GRDOM_RESET_ENABLE);
+	ret = __intel_wait_for_register_fw(dev_priv, ILK_GDSR,
+					   ILK_GRDOM_RESET_ENABLE, 0,
+					   5000, 0,
+					   NULL);
 	if (ret) {
 		DRM_DEBUG_DRIVER("Wait for render reset failed\n");
 		goto out;
 	}
 
-	I915_WRITE(ILK_GDSR, ILK_GRDOM_MEDIA | ILK_GRDOM_RESET_ENABLE);
-	ret = intel_wait_for_register(dev_priv,
-				      ILK_GDSR, ILK_GRDOM_RESET_ENABLE, 0,
-				      500);
+	I915_WRITE_FW(ILK_GDSR, ILK_GRDOM_MEDIA | ILK_GRDOM_RESET_ENABLE);
+	ret = __intel_wait_for_register_fw(dev_priv, ILK_GDSR,
+					   ILK_GRDOM_RESET_ENABLE, 0,
+					   5000, 0,
+					   NULL);
 	if (ret) {
 		DRM_DEBUG_DRIVER("Wait for media reset failed\n");
 		goto out;
 	}
 
 out:
-	I915_WRITE(ILK_GDSR, 0);
-	POSTING_READ(ILK_GDSR);
+	I915_WRITE_FW(ILK_GDSR, 0);
+	POSTING_READ_FW(ILK_GDSR);
 	return ret;
 }
 
@@ -467,7 +469,9 @@ int intel_gpu_reset(struct drm_i915_private *i915, unsigned int engine_mask)
 		ret = -ENODEV;
 		if (reset) {
 			GEM_TRACE("engine_mask=%x\n", engine_mask);
+			preempt_disable();
 			ret = reset(i915, engine_mask, retry);
+			preempt_enable();
 		}
 		if (ret != -ETIMEDOUT || engine_mask != ALL_ENGINES)
 			break;
-- 
2.19.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 106+ messages in thread

* [PATCH 32/40] drm/i915: Introduce the i915_user_extension_method
  2018-09-19 19:55 [PATCH 01/40] drm: Use default dma_fence hooks where possible for null syncobj Chris Wilson
                   ` (29 preceding siblings ...)
  2018-09-19 19:55 ` [PATCH 31/40] drm/i915: Make all GPU resets atomic Chris Wilson
@ 2018-09-19 19:55 ` Chris Wilson
  2018-09-24 13:20   ` Tvrtko Ursulin
  2018-09-19 19:55 ` [PATCH 33/40] drm/i915: Extend CREATE_CONTEXT to allow inheritance ala clone() Chris Wilson
                   ` (11 subsequent siblings)
  42 siblings, 1 reply; 106+ messages in thread
From: Chris Wilson @ 2018-09-19 19:55 UTC (permalink / raw)
  To: intel-gfx

An idea for extending uABI inspired by Vulkan's extension chains.
Instead of expanding the data struct for each ioctl every time we need
to add a new feature, define an extension chain instead. As we add
optional interfaces to control the ioctl, we define a new extension
struct that can be linked into the ioctl data only when required by the
user. The key advantage being able to ignore large control structs for
optional interfaces/extensions, while being able to process them in a
consistent manner.

In comparison to other extensible ioctls, the key difference is the
use of a linked chain of extension structs vs an array of tagged
pointers. For example,

struct drm_amdgpu_cs_chunk {
        __u32           chunk_id;
        __u32           length_dw;
        __u64           chunk_data;
};

struct drm_amdgpu_cs_in {
        __u32           ctx_id;
        __u32           bo_list_handle;
        __u32           num_chunks;
        __u32           _pad;
        __u64           chunks;
};

allows userspace to pass in array of pointers to extension structs, but
must therefore keep constructing that array along side the command stream.
In dynamic situations like that, a linked list is preferred and does not
similar from extra cache line misses as the extension structs themselves
must still be loaded separate to the chunks array.

v2: Apply the tail call optimisation directly to nip the worry of stack
overflow in the bud.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/Makefile               |  1 +
 drivers/gpu/drm/i915/i915_user_extensions.c | 37 +++++++++++++++++++++
 drivers/gpu/drm/i915/i915_user_extensions.h | 20 +++++++++++
 include/uapi/drm/i915_drm.h                 | 20 +++++++++++
 4 files changed, 78 insertions(+)
 create mode 100644 drivers/gpu/drm/i915/i915_user_extensions.c
 create mode 100644 drivers/gpu/drm/i915/i915_user_extensions.h

diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
index 4654ef481b45..6ffeb60392f5 100644
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -45,6 +45,7 @@ i915-y := i915_drv.o \
 	  i915_syncmap.o \
 	  i915_sw_fence.o \
 	  i915_sysfs.o \
+	  i915_user_extensions.o \
 	  intel_csr.o \
 	  intel_device_info.o \
 	  intel_pm.o \
diff --git a/drivers/gpu/drm/i915/i915_user_extensions.c b/drivers/gpu/drm/i915/i915_user_extensions.c
new file mode 100644
index 000000000000..00622f8ac468
--- /dev/null
+++ b/drivers/gpu/drm/i915/i915_user_extensions.c
@@ -0,0 +1,37 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2018 Intel Corporation
+ */
+
+#include <linux/uaccess.h>
+#include <uapi/drm/i915_drm.h>
+
+#include "i915_user_extensions.h"
+
+int i915_user_extensions(struct i915_user_extension __user *ext,
+			 const i915_user_extension_fn *tbl,
+			 unsigned long count,
+			 void *data)
+{
+	while (ext) {
+		int err;
+		u64 x;
+
+		if (get_user(x, &ext->name))
+			return -EFAULT;
+
+		err = -EINVAL;
+		if (x < count && tbl[x])
+			err = tbl[x](ext, data);
+		if (err)
+			return err;
+
+		if (get_user(x, &ext->next_extension))
+			return -EFAULT;
+
+		ext = u64_to_user_ptr(x);
+	}
+
+	return 0;
+}
diff --git a/drivers/gpu/drm/i915/i915_user_extensions.h b/drivers/gpu/drm/i915/i915_user_extensions.h
new file mode 100644
index 000000000000..313a510b068a
--- /dev/null
+++ b/drivers/gpu/drm/i915/i915_user_extensions.h
@@ -0,0 +1,20 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2018 Intel Corporation
+ */
+
+#ifndef I915_USER_EXTENSIONS_H
+#define I915_USER_EXTENSIONS_H
+
+struct i915_user_extension;
+
+typedef int (*i915_user_extension_fn)(struct i915_user_extension __user *ext,
+				      void *data);
+
+int i915_user_extensions(struct i915_user_extension __user *ext,
+			 const i915_user_extension_fn *tbl,
+			 unsigned long count,
+			 void *data);
+
+#endif /* I915_USER_EXTENSIONS_H */
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index a4446f452040..4fa7e54501cc 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -62,6 +62,26 @@ extern "C" {
 #define I915_ERROR_UEVENT		"ERROR"
 #define I915_RESET_UEVENT		"RESET"
 
+/*
+ * i915_user_extension: Base class for defining a chain of extensions
+ *
+ * Many interfaces need to grow over time. In most cases we can simply
+ * extend the struct and have userspace pass in more data. Another option,
+ * as demonstrated by Vulkan's approach to providing extensions for forward
+ * and backward compatibilty, is to use a list of optional structs to
+ * provide those extra details.
+ *
+ * The key advantage to using an extension chain is that it allows us to
+ * redefine the interface more easily than an ever growing struct of
+ * increasing complexity, and for large parts of that interface to be
+ * entirely optional. The downside is more pointer chasing; chasing across
+ * the __user boundary with pointers encapsulated inside u64.
+ */
+struct i915_user_extension {
+	__u64 next_extension;
+	__u64 name;
+};
+
 /*
  * MOCS indexes used for GPU surfaces, defining the cacheability of the
  * surface data and the coherency for this data wrt. CPU vs. GPU accesses.
-- 
2.19.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 106+ messages in thread

* [PATCH 33/40] drm/i915: Extend CREATE_CONTEXT to allow inheritance ala clone()
  2018-09-19 19:55 [PATCH 01/40] drm: Use default dma_fence hooks where possible for null syncobj Chris Wilson
                   ` (30 preceding siblings ...)
  2018-09-19 19:55 ` [PATCH 32/40] drm/i915: Introduce the i915_user_extension_method Chris Wilson
@ 2018-09-19 19:55 ` Chris Wilson
  2018-09-24 17:22   ` Tvrtko Ursulin
  2018-09-19 19:55 ` [PATCH 34/40] drm/i915: Allow contexts to share a single timeline across all engines Chris Wilson
                   ` (10 subsequent siblings)
  42 siblings, 1 reply; 106+ messages in thread
From: Chris Wilson @ 2018-09-19 19:55 UTC (permalink / raw)
  To: intel-gfx

A context encompasses the driver's view of process related state, and
encapsulates the logical GPU state where available. Each context is
currently equivalent to a process in CPU terms. Like with processes,
sometimes the user wants a lighter encapsulation that shares some state
with the parent process, for example two threads have unique register
state but share the virtual memory mappings. We can support exactly the
same principle using contexts where we may share the GTT but keep the
logical GPU state distinct. This allows quicker switching between those
contexts, and for userspace to allocate a single offset in the GTT and
use it across multiple contexts. Like with clone(), in the future we may
wish to allow userspace to select more features to copy across from the
parent, but for now we only allow sharing of the GTT.

Note that if full per-process GTT is not supported on the harder, the
GTT are already implicitly shared between contexts, and this request
to create contexts with shared GTT fails. With full ppGTT, every fd
(i.e. every process) is allocated a unique GTT so this request cannot be
used to share GTT between processes/fds, it can only share GTT belonging
to this fd.

Testcase: igt/gem_ctx_shared
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Cc: Michał Winiarski <michal.winiarski@intel.com>
---
 drivers/gpu/drm/i915/i915_gem_context.c       |  62 ++++-
 drivers/gpu/drm/i915/i915_gem_gtt.c           |  19 +-
 drivers/gpu/drm/i915/i915_gem_gtt.h           |  14 +-
 drivers/gpu/drm/i915/selftests/huge_pages.c   |   1 -
 .../gpu/drm/i915/selftests/i915_gem_context.c | 252 +++++++++++++-----
 drivers/gpu/drm/i915/selftests/i915_gem_gtt.c |   1 -
 drivers/gpu/drm/i915/selftests/mock_context.c |   2 +-
 drivers/gpu/drm/i915/selftests/mock_gtt.c     |   2 +
 include/uapi/drm/i915_drm.h                   |  11 +-
 9 files changed, 279 insertions(+), 85 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
index 150d7a6b2bd3..da2ac10f8e8a 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -110,6 +110,8 @@ static void lut_close(struct i915_gem_context *ctx)
 		struct i915_vma *vma = rcu_dereference_raw(*slot);
 
 		radix_tree_iter_delete(&ctx->handles_vma, &iter, slot);
+
+		vma->open_count--;
 		__i915_gem_object_release_unless_active(vma->obj);
 	}
 	rcu_read_unlock();
@@ -292,7 +294,7 @@ static void context_close(struct i915_gem_context *ctx)
 	 */
 	lut_close(ctx);
 	if (ctx->ppgtt)
-		i915_ppgtt_close(&ctx->ppgtt->vm);
+		i915_ppgtt_close(ctx->ppgtt);
 
 	ctx->file_priv = ERR_PTR(-EBADF);
 	i915_gem_context_put(ctx);
@@ -399,9 +401,12 @@ static void __destroy_hw_context(struct i915_gem_context *ctx,
 	context_close(ctx);
 }
 
+#define CREATE_VM BIT(0)
+
 static struct i915_gem_context *
 i915_gem_create_context(struct drm_i915_private *dev_priv,
-			struct drm_i915_file_private *file_priv)
+			struct drm_i915_file_private *file_priv,
+			unsigned int flags)
 {
 	struct i915_gem_context *ctx;
 
@@ -414,7 +419,7 @@ i915_gem_create_context(struct drm_i915_private *dev_priv,
 	if (IS_ERR(ctx))
 		return ctx;
 
-	if (USES_FULL_PPGTT(dev_priv)) {
+	if (flags & CREATE_VM && USES_FULL_PPGTT(dev_priv)) {
 		struct i915_hw_ppgtt *ppgtt;
 
 		ppgtt = i915_ppgtt_create(dev_priv, file_priv);
@@ -493,7 +498,7 @@ i915_gem_context_create_kernel(struct drm_i915_private *i915, int prio)
 	struct i915_gem_context *ctx;
 	int err;
 
-	ctx = i915_gem_create_context(i915, NULL);
+	ctx = i915_gem_create_context(i915, NULL, CREATE_VM);
 	if (IS_ERR(ctx))
 		return ctx;
 
@@ -620,7 +625,7 @@ int i915_gem_context_open(struct drm_i915_private *i915,
 	idr_init(&file_priv->context_idr);
 
 	mutex_lock(&i915->drm.struct_mutex);
-	ctx = i915_gem_create_context(i915, file_priv);
+	ctx = i915_gem_create_context(i915, file_priv, CREATE_VM);
 	mutex_unlock(&i915->drm.struct_mutex);
 	if (IS_ERR(ctx)) {
 		idr_destroy(&file_priv->context_idr);
@@ -778,10 +783,12 @@ int i915_gem_context_create_ioctl(struct drm_device *dev, void *data,
 				  struct drm_file *file)
 {
 	struct drm_i915_private *dev_priv = to_i915(dev);
-	struct drm_i915_gem_context_create *args = data;
+	struct drm_i915_gem_context_create_v2 *args = data;
 	struct drm_i915_file_private *file_priv = file->driver_priv;
+	struct i915_gem_context *share = NULL;
 	struct i915_gem_context *ctx;
-	int ret;
+	unsigned int flags = CREATE_VM;
+	int err;
 
 	if (!DRIVER_CAPS(dev_priv)->has_logical_contexts)
 		return -ENODEV;
@@ -789,6 +796,9 @@ int i915_gem_context_create_ioctl(struct drm_device *dev, void *data,
 	if (args->pad != 0)
 		return -EINVAL;
 
+	if (args->flags & ~I915_GEM_CONTEXT_SHARE_GTT)
+		return -EINVAL;
+
 	if (client_is_banned(file_priv)) {
 		DRM_DEBUG("client %s[%d] banned from creating ctx\n",
 			  current->comm,
@@ -797,21 +807,45 @@ int i915_gem_context_create_ioctl(struct drm_device *dev, void *data,
 		return -EIO;
 	}
 
-	ret = i915_mutex_lock_interruptible(dev);
-	if (ret)
-		return ret;
+	if (args->flags & I915_GEM_CONTEXT_SHARE_GTT) {
+		share = i915_gem_context_lookup(file_priv, args->share_ctx);
+		if (!share)
+			return -ENOENT;
+
+		if (!share->ppgtt) {
+			err = -ENODEV;
+			goto out;
+		}
+
+		flags &= ~CREATE_VM;
+	}
 
-	ctx = i915_gem_create_context(dev_priv, file_priv);
+	err = i915_mutex_lock_interruptible(dev);
+	if (err)
+		goto out;
+
+	ctx = i915_gem_create_context(dev_priv, file_priv, flags);
 	mutex_unlock(&dev->struct_mutex);
-	if (IS_ERR(ctx))
-		return PTR_ERR(ctx);
+	if (IS_ERR(ctx)) {
+		err = PTR_ERR(ctx);
+		goto out;
+	}
+
+	if (!(flags & CREATE_VM)) {
+		ctx->desc_template = share->desc_template;
+		ctx->ppgtt = i915_ppgtt_get(share->ppgtt);
+		i915_ppgtt_open(ctx->ppgtt);
+	}
 
 	GEM_BUG_ON(i915_gem_context_is_kernel(ctx));
 
 	args->ctx_id = ctx->user_handle;
 	DRM_DEBUG("HW context %d created\n", args->ctx_id);
 
-	return 0;
+out:
+	if (share)
+		i915_gem_context_put(share);
+	return err;
 }
 
 int i915_gem_context_destroy_ioctl(struct drm_device *dev, void *data,
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index d1ca67459bc2..caf48a1a1c97 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -1644,6 +1644,7 @@ static struct i915_hw_ppgtt *gen8_ppgtt_create(struct drm_i915_private *i915)
 		return ERR_PTR(-ENOMEM);
 
 	kref_init(&ppgtt->ref);
+	ppgtt->open_count = 1;
 
 	ppgtt->vm.i915 = i915;
 	ppgtt->vm.dma = &i915->drm.pdev->dev;
@@ -2160,6 +2161,7 @@ static struct i915_hw_ppgtt *gen6_ppgtt_create(struct drm_i915_private *i915)
 		return ERR_PTR(-ENOMEM);
 
 	kref_init(&ppgtt->base.ref);
+	ppgtt->base.open_count = 1;
 
 	ppgtt->base.vm.i915 = i915;
 	ppgtt->base.vm.dma = &i915->drm.pdev->dev;
@@ -2285,10 +2287,21 @@ i915_ppgtt_create(struct drm_i915_private *i915,
 	return ppgtt;
 }
 
-void i915_ppgtt_close(struct i915_address_space *vm)
+void i915_ppgtt_open(struct i915_hw_ppgtt *ppgtt)
 {
-	GEM_BUG_ON(vm->closed);
-	vm->closed = true;
+	GEM_BUG_ON(ppgtt->vm.closed);
+
+	ppgtt->open_count++;
+}
+
+void i915_ppgtt_close(struct i915_hw_ppgtt *ppgtt)
+{
+	GEM_BUG_ON(!ppgtt->open_count);
+	if (--ppgtt->open_count)
+		return;
+
+	GEM_BUG_ON(ppgtt->vm.closed);
+	ppgtt->vm.closed = true;
 }
 
 static void ppgtt_destroy_vma(struct i915_address_space *vm)
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h
index 849a1f67b037..512a5a0ab820 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.h
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.h
@@ -408,6 +408,8 @@ struct i915_hw_ppgtt {
 	struct kref ref;
 
 	unsigned long pd_dirty_rings;
+	unsigned int open_count;
+
 	union {
 		struct i915_pml4 pml4;		/* GEN8+ & 48b PPGTT */
 		struct i915_page_directory_pointer pdp;	/* GEN8+ */
@@ -626,12 +628,16 @@ int i915_ppgtt_init_hw(struct drm_i915_private *dev_priv);
 void i915_ppgtt_release(struct kref *kref);
 struct i915_hw_ppgtt *i915_ppgtt_create(struct drm_i915_private *dev_priv,
 					struct drm_i915_file_private *fpriv);
-void i915_ppgtt_close(struct i915_address_space *vm);
-static inline void i915_ppgtt_get(struct i915_hw_ppgtt *ppgtt)
+
+void i915_ppgtt_open(struct i915_hw_ppgtt *ppgtt);
+void i915_ppgtt_close(struct i915_hw_ppgtt *ppgtt);
+
+static inline struct i915_hw_ppgtt *i915_ppgtt_get(struct i915_hw_ppgtt *ppgtt)
 {
-	if (ppgtt)
-		kref_get(&ppgtt->ref);
+	kref_get(&ppgtt->ref);
+	return ppgtt;
 }
+
 static inline void i915_ppgtt_put(struct i915_hw_ppgtt *ppgtt)
 {
 	if (ppgtt)
diff --git a/drivers/gpu/drm/i915/selftests/huge_pages.c b/drivers/gpu/drm/i915/selftests/huge_pages.c
index 4ab98db946c8..ad25860a3336 100644
--- a/drivers/gpu/drm/i915/selftests/huge_pages.c
+++ b/drivers/gpu/drm/i915/selftests/huge_pages.c
@@ -1724,7 +1724,6 @@ int i915_gem_huge_page_mock_selftests(void)
 	err = i915_subtests(tests, ppgtt);
 
 out_close:
-	i915_ppgtt_close(&ppgtt->vm);
 	i915_ppgtt_put(ppgtt);
 
 out_unlock:
diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/selftests/i915_gem_context.c
index c7c891a287f5..7ef3f9a3b727 100644
--- a/drivers/gpu/drm/i915/selftests/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/selftests/i915_gem_context.c
@@ -131,7 +131,9 @@ static int live_nop_switch(void *arg)
 	}
 
 	for (n = 0; n < nctx; n++) {
-		ctx[n] = i915_gem_create_context(i915, file->driver_priv);
+		ctx[n] = i915_gem_create_context(i915,
+						 file->driver_priv,
+						 CREATE_VM);
 		if (IS_ERR(ctx[n])) {
 			err = PTR_ERR(ctx[n]);
 			goto out_unlock;
@@ -317,10 +319,6 @@ static int gpu_fill(struct drm_i915_gem_object *obj,
 	if (IS_ERR(vma))
 		return PTR_ERR(vma);
 
-	err = i915_gem_object_set_to_gtt_domain(obj, false);
-	if (err)
-		return err;
-
 	err = i915_vma_pin(vma, 0, 0, PIN_HIGH | PIN_USER);
 	if (err)
 		return err;
@@ -415,7 +413,8 @@ static int cpu_fill(struct drm_i915_gem_object *obj, u32 value)
 	return 0;
 }
 
-static int cpu_check(struct drm_i915_gem_object *obj, unsigned int max)
+static noinline int cpu_check(struct drm_i915_gem_object *obj,
+			      unsigned int idx, unsigned int max)
 {
 	unsigned int n, m, needs_flush;
 	int err;
@@ -433,8 +432,8 @@ static int cpu_check(struct drm_i915_gem_object *obj, unsigned int max)
 
 		for (m = 0; m < max; m++) {
 			if (map[m] != m) {
-				pr_err("Invalid value at page %d, offset %d: found %x expected %x\n",
-				       n, m, map[m], m);
+				pr_err("%pS: Invalid value at object %d page %d, offset %d: found %x expected %x\n",
+				       __builtin_return_address(0), idx, n, m, map[m], m);
 				err = -EINVAL;
 				goto out_unmap;
 			}
@@ -442,8 +441,9 @@ static int cpu_check(struct drm_i915_gem_object *obj, unsigned int max)
 
 		for (; m < DW_PER_PAGE; m++) {
 			if (map[m] != STACK_MAGIC) {
-				pr_err("Invalid value at page %d, offset %d: found %x expected %x\n",
-				       n, m, map[m], STACK_MAGIC);
+				pr_err("%pS: Invalid value at object %d page %d, offset %d: found %x expected %x\n",
+				       __builtin_return_address(0), idx, n, m,
+				       map[m], STACK_MAGIC);
 				err = -EINVAL;
 				goto out_unmap;
 			}
@@ -506,6 +506,10 @@ create_test_object(struct i915_gem_context *ctx,
 		return ERR_PTR(err);
 	}
 
+	err = i915_gem_object_set_to_gtt_domain(obj, false);
+	if (err)
+		return ERR_PTR(err);
+
 	list_add_tail(&obj->st_link, objects);
 	return obj;
 }
@@ -521,12 +525,8 @@ static unsigned long max_dwords(struct drm_i915_gem_object *obj)
 static int igt_ctx_exec(void *arg)
 {
 	struct drm_i915_private *i915 = arg;
-	struct drm_i915_gem_object *obj = NULL;
-	struct drm_file *file;
-	IGT_TIMEOUT(end_time);
-	LIST_HEAD(objects);
-	unsigned long ncontexts, ndwords, dw;
-	bool first_shared_gtt = true;
+	struct intel_engine_cs *engine;
+	enum intel_engine_id id;
 	int err = -ENODEV;
 
 	/*
@@ -538,42 +538,169 @@ static int igt_ctx_exec(void *arg)
 	if (!DRIVER_CAPS(i915)->has_logical_contexts)
 		return 0;
 
-	file = mock_file(i915);
-	if (IS_ERR(file))
-		return PTR_ERR(file);
+	for_each_engine(engine, i915, id) {
+		struct drm_i915_gem_object *obj = NULL;
+		struct drm_file *file;
+		IGT_TIMEOUT(end_time);
+		LIST_HEAD(objects);
+		unsigned long ncontexts, ndwords, dw;
+		bool first_shared_gtt = true;
+
+		if (!intel_engine_can_store_dword(engine))
+			continue;
 
-	mutex_lock(&i915->drm.struct_mutex);
+		if (!engine->context_size)
+			continue; /* No logical context support in HW */
 
-	ncontexts = 0;
-	ndwords = 0;
-	dw = 0;
-	while (!time_after(jiffies, end_time)) {
-		struct intel_engine_cs *engine;
-		struct i915_gem_context *ctx;
-		unsigned int id;
 
-		if (first_shared_gtt) {
-			ctx = __create_hw_context(i915, file->driver_priv);
-			first_shared_gtt = false;
-		} else {
-			ctx = i915_gem_create_context(i915, file->driver_priv);
+		file = mock_file(i915);
+		if (IS_ERR(file))
+			return PTR_ERR(file);
+
+		mutex_lock(&i915->drm.struct_mutex);
+
+		ncontexts = 0;
+		ndwords = 0;
+		dw = 0;
+		while (!time_after(jiffies, end_time)) {
+			struct i915_gem_context *ctx;
+			intel_wakeref_t wakeref;
+
+			if (first_shared_gtt) {
+				ctx = __create_hw_context(i915, file->driver_priv);
+				first_shared_gtt = false;
+			} else {
+				ctx = i915_gem_create_context(i915,
+							      file->driver_priv,
+							      CREATE_VM);
+			}
+			if (IS_ERR(ctx)) {
+				err = PTR_ERR(ctx);
+				goto out_unlock;
+			}
+
+			if (!obj) {
+				obj = create_test_object(ctx, file, &objects);
+				if (IS_ERR(obj)) {
+					err = PTR_ERR(obj);
+					goto out_unlock;
+				}
+			}
+
+			wakeref = intel_runtime_pm_get(i915);
+			err = gpu_fill(obj, ctx, engine, dw);
+			intel_runtime_pm_put(i915, wakeref);
+			if (err) {
+				pr_err("Failed to fill dword %lu [%lu/%lu] with gpu (%s) in ctx %u [full-ppgtt? %s], err=%d\n",
+				       ndwords, dw, max_dwords(obj),
+				       engine->name, ctx->hw_id,
+				       yesno(!!ctx->ppgtt), err);
+				goto out_unlock;
+			}
+
+			if (++dw == max_dwords(obj)) {
+				obj = NULL;
+				dw = 0;
+			}
+
+			ndwords++;
+			ncontexts++;
 		}
-		if (IS_ERR(ctx)) {
-			err = PTR_ERR(ctx);
+
+		pr_info("Submitted %lu contexts to %s, filling %lu dwords\n",
+			ncontexts, engine->name, ndwords);
+
+		ncontexts = dw = 0;
+		list_for_each_entry(obj, &objects, st_link) {
+			unsigned int rem =
+				min_t(unsigned int, ndwords - dw, max_dwords(obj));
+
+			err = cpu_check(obj, ncontexts++, rem);
+			if (err)
+				break;
+
+			dw += rem;
+		}
+
+out_unlock:
+		i915_gem_wait_for_idle(i915,
+				       I915_WAIT_LOCKED,
+				       MAX_SCHEDULE_TIMEOUT);
+		mutex_unlock(&i915->drm.struct_mutex);
+
+		mock_file_free(i915, file);
+		if (err)
+			return err;
+	}
+
+	return 0;
+}
+
+static int igt_shared_ctx_exec(void *arg)
+{
+	struct drm_i915_private *i915 = arg;
+	struct intel_engine_cs *engine;
+	enum intel_engine_id id;
+	int err = -ENODEV;
+
+	/*
+	 * Create a few different contexts with the same mm and write
+	 * through each ctx using the GPU making sure those writes end
+	 * up in the expected pages of our obj.
+	 */
+
+	for_each_engine(engine, i915, id) {
+		IGT_TIMEOUT(end_time);
+		LIST_HEAD(objects);
+		unsigned long ncontexts, ndwords, dw;
+		struct drm_i915_gem_object *obj = NULL;
+		struct drm_file *file;
+		struct i915_gem_context *parent;
+
+		if (!intel_engine_can_store_dword(engine))
+			continue;
+
+		file = mock_file(i915);
+		if (IS_ERR(file))
+			return PTR_ERR(file);
+
+		mutex_lock(&i915->drm.struct_mutex);
+
+		parent = i915_gem_create_context(i915, file->driver_priv,
+						 CREATE_VM);
+		if (IS_ERR(parent)) {
+			err = PTR_ERR(parent);
+			if (err == -ENODEV) /* no logical ctx support */
+				err = 0;
 			goto out_unlock;
 		}
 
-		for_each_engine(engine, i915, id) {
+		if (!parent->ppgtt) {
+			err = 0;
+			goto out_unlock;
+		}
+
+		ncontexts = 0;
+		ndwords = 0;
+		dw = 0;
+		while (!time_after(jiffies, end_time)) {
+			struct i915_gem_context *ctx;
 			intel_wakeref_t wakeref;
 
-			if (!engine->context_size)
-				continue; /* No logical context support in HW */
+			ctx = i915_gem_create_context(i915,
+						      file->driver_priv,
+						      0);
+			if (IS_ERR(ctx)) {
+				err = PTR_ERR(ctx);
+				goto out_unlock;
+			}
 
-			if (!intel_engine_can_store_dword(engine))
-				continue;
+			i915_ppgtt_open(parent->ppgtt);
+			ctx->ppgtt = i915_ppgtt_get(parent->ppgtt);
+			ctx->desc_template = parent->desc_template;
 
 			if (!obj) {
-				obj = create_test_object(ctx, file, &objects);
+				obj = create_test_object(parent, file, &objects);
 				if (IS_ERR(obj)) {
 					err = PTR_ERR(obj);
 					goto out_unlock;
@@ -595,32 +722,36 @@ static int igt_ctx_exec(void *arg)
 				obj = NULL;
 				dw = 0;
 			}
+
 			ndwords++;
+			ncontexts++;
 		}
-		ncontexts++;
-	}
-	pr_info("Submitted %lu contexts (across %u engines), filling %lu dwords\n",
-		ncontexts, INTEL_INFO(i915)->num_rings, ndwords);
+		pr_info("Submitted %lu contexts to %s, filling %lu dwords\n",
+			ncontexts, engine->name, ndwords);
 
-	dw = 0;
-	list_for_each_entry(obj, &objects, st_link) {
-		unsigned int rem =
-			min_t(unsigned int, ndwords - dw, max_dwords(obj));
+		ncontexts = dw = 0;
+		list_for_each_entry(obj, &objects, st_link) {
+			unsigned int rem =
+				min_t(unsigned int, ndwords - dw, max_dwords(obj));
 
-		err = cpu_check(obj, rem);
-		if (err)
-			break;
+			err = cpu_check(obj, ncontexts++, rem);
+			if (err)
+				break;
 
-		dw += rem;
-	}
+			dw += rem;
+		}
 
 out_unlock:
-	if (igt_flush_test(i915, I915_WAIT_LOCKED))
-		err = -EIO;
-	mutex_unlock(&i915->drm.struct_mutex);
+		if (igt_flush_test(i915, I915_WAIT_LOCKED))
+			err = -EIO;
+		mutex_unlock(&i915->drm.struct_mutex);
 
-	mock_file_free(i915, file);
-	return err;
+		mock_file_free(i915, file);
+		if (err)
+			return err;
+	}
+
+	return 0;
 }
 
 static int igt_ctx_readonly(void *arg)
@@ -648,7 +779,7 @@ static int igt_ctx_readonly(void *arg)
 
 	mutex_lock(&i915->drm.struct_mutex);
 
-	ctx = i915_gem_create_context(i915, file->driver_priv);
+	ctx = i915_gem_create_context(i915, file->driver_priv, 0);
 	if (IS_ERR(ctx)) {
 		err = PTR_ERR(ctx);
 		goto out_unlock;
@@ -714,7 +845,7 @@ static int igt_ctx_readonly(void *arg)
 		if (i915_gem_object_is_readonly(obj))
 			num_writes = 0;
 
-		err = cpu_check(obj, num_writes);
+		err = cpu_check(obj, 0, num_writes);
 		if (err)
 			break;
 
@@ -913,6 +1044,7 @@ int i915_gem_context_live_selftests(struct drm_i915_private *dev_priv)
 		SUBTEST(live_nop_switch),
 		SUBTEST(igt_ctx_exec),
 		SUBTEST(igt_ctx_readonly),
+		SUBTEST(igt_shared_ctx_exec),
 	};
 	bool fake_alias = false;
 	int err;
diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c
index 38414558e18b..0e0fc7eedba3 100644
--- a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c
@@ -1020,7 +1020,6 @@ static int exercise_ppgtt(struct drm_i915_private *dev_priv,
 
 	err = func(dev_priv, &ppgtt->vm, 0, ppgtt->vm.total, end_time);
 
-	i915_ppgtt_close(&ppgtt->vm);
 	i915_ppgtt_put(ppgtt);
 out_unlock:
 	mutex_unlock(&dev_priv->drm.struct_mutex);
diff --git a/drivers/gpu/drm/i915/selftests/mock_context.c b/drivers/gpu/drm/i915/selftests/mock_context.c
index d937bdff26f9..a7511ba98342 100644
--- a/drivers/gpu/drm/i915/selftests/mock_context.c
+++ b/drivers/gpu/drm/i915/selftests/mock_context.c
@@ -92,7 +92,7 @@ live_context(struct drm_i915_private *i915, struct drm_file *file)
 {
 	lockdep_assert_held(&i915->drm.struct_mutex);
 
-	return i915_gem_create_context(i915, file->driver_priv);
+	return i915_gem_create_context(i915, file->driver_priv, CREATE_VM);
 }
 
 struct i915_gem_context *
diff --git a/drivers/gpu/drm/i915/selftests/mock_gtt.c b/drivers/gpu/drm/i915/selftests/mock_gtt.c
index 976c862b3842..e0880d2c498a 100644
--- a/drivers/gpu/drm/i915/selftests/mock_gtt.c
+++ b/drivers/gpu/drm/i915/selftests/mock_gtt.c
@@ -66,6 +66,8 @@ mock_ppgtt(struct drm_i915_private *i915,
 		return NULL;
 
 	kref_init(&ppgtt->ref);
+	ppgtt->open_count = 1;
+
 	ppgtt->vm.i915 = i915;
 	ppgtt->vm.total = round_down(U64_MAX, PAGE_SIZE);
 	ppgtt->vm.file = ERR_PTR(-ENODEV);
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index 4fa7e54501cc..dc1c52f95cab 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -386,7 +386,7 @@ typedef struct _drm_i915_sarea {
 #define DRM_IOCTL_I915_SET_SPRITE_COLORKEY DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_SET_SPRITE_COLORKEY, struct drm_intel_sprite_colorkey)
 #define DRM_IOCTL_I915_GET_SPRITE_COLORKEY DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GET_SPRITE_COLORKEY, struct drm_intel_sprite_colorkey)
 #define DRM_IOCTL_I915_GEM_WAIT		DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_WAIT, struct drm_i915_gem_wait)
-#define DRM_IOCTL_I915_GEM_CONTEXT_CREATE	DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_GEM_CONTEXT_CREATE, struct drm_i915_gem_context_create)
+#define DRM_IOCTL_I915_GEM_CONTEXT_CREATE	DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_GEM_CONTEXT_CREATE, struct drm_i915_gem_context_create_v2)
 #define DRM_IOCTL_I915_GEM_CONTEXT_DESTROY	DRM_IOW (DRM_COMMAND_BASE + DRM_I915_GEM_CONTEXT_DESTROY, struct drm_i915_gem_context_destroy)
 #define DRM_IOCTL_I915_REG_READ			DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_REG_READ, struct drm_i915_reg_read)
 #define DRM_IOCTL_I915_GET_RESET_STATS		DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_GET_RESET_STATS, struct drm_i915_reset_stats)
@@ -1429,6 +1429,15 @@ struct drm_i915_gem_context_create {
 	__u32 pad;
 };
 
+struct drm_i915_gem_context_create_v2 {
+	/*  output: id of new context*/
+	__u32 ctx_id;
+	__u32 flags;
+#define I915_GEM_CONTEXT_SHARE_GTT 0x1
+	__u32 share_ctx;
+	__u32 pad;
+};
+
 struct drm_i915_gem_context_destroy {
 	__u32 ctx_id;
 	__u32 pad;
-- 
2.19.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 106+ messages in thread

* [PATCH 34/40] drm/i915: Allow contexts to share a single timeline across all engines
  2018-09-19 19:55 [PATCH 01/40] drm: Use default dma_fence hooks where possible for null syncobj Chris Wilson
                   ` (31 preceding siblings ...)
  2018-09-19 19:55 ` [PATCH 33/40] drm/i915: Extend CREATE_CONTEXT to allow inheritance ala clone() Chris Wilson
@ 2018-09-19 19:55 ` Chris Wilson
  2018-09-25  8:45   ` Tvrtko Ursulin
  2018-09-19 19:55 ` [PATCH 35/40] drm/i915: Fix I915_EXEC_RING_MASK Chris Wilson
                   ` (9 subsequent siblings)
  42 siblings, 1 reply; 106+ messages in thread
From: Chris Wilson @ 2018-09-19 19:55 UTC (permalink / raw)
  To: intel-gfx

Previously, our view has been always to run the engines independently
within a context. (Multiple engines happened before we had contexts and
timelines, so they always operated independently and that behaviour
persisted into contexts.) However, at the user level the context often
represents a single timeline (e.g. GL contexts) and userspace must
ensure that the individual engines are serialised to present that
ordering to the client (or forgot about this detail entirely and hope no
one notices - a fair ploy if the client can only directly control one
engine themselves ;)

In the next patch, we will want to construct a set of engines that
operate as one, that have a single timeline interwoven between them, to
present a single virtual engine to the user. (They submit to the virtual
engine, then we decide which engine to execute on based.)

To that end, we want to be able to create contexts which have a single
timeline (fence context) shared between all engines, rather than multiple
timelines.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_gem_context.c | 26 ++++++++++++++++-
 drivers/gpu/drm/i915/i915_gem_context.h |  3 ++
 drivers/gpu/drm/i915/i915_request.c     | 10 +++++--
 drivers/gpu/drm/i915/i915_request.h     |  5 +++-
 drivers/gpu/drm/i915/i915_sw_fence.c    | 39 +++++++++++++++++++++----
 drivers/gpu/drm/i915/i915_sw_fence.h    | 13 +++++++--
 drivers/gpu/drm/i915/intel_lrc.c        |  5 +++-
 include/uapi/drm/i915_drm.h             |  3 +-
 8 files changed, 91 insertions(+), 13 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
index da2ac10f8e8a..a8570a07b3b7 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -223,6 +223,9 @@ static void i915_gem_context_free(struct i915_gem_context *ctx)
 			ce->ops->destroy(ce);
 	}
 
+	if (ctx->timeline)
+		i915_timeline_put(ctx->timeline);
+
 	kfree(ctx->name);
 	put_pid(ctx->pid);
 
@@ -402,6 +405,7 @@ static void __destroy_hw_context(struct i915_gem_context *ctx,
 }
 
 #define CREATE_VM BIT(0)
+#define CREATE_TIMELINE BIT(1)
 
 static struct i915_gem_context *
 i915_gem_create_context(struct drm_i915_private *dev_priv,
@@ -412,6 +416,9 @@ i915_gem_create_context(struct drm_i915_private *dev_priv,
 
 	lockdep_assert_held(&dev_priv->drm.struct_mutex);
 
+	if (flags & CREATE_TIMELINE && !HAS_EXECLISTS(dev_priv))
+		return ERR_PTR(-EINVAL);
+
 	/* Reap the most stale context */
 	contexts_free_first(dev_priv);
 
@@ -434,6 +441,18 @@ i915_gem_create_context(struct drm_i915_private *dev_priv,
 		ctx->desc_template = default_desc_template(dev_priv, ppgtt);
 	}
 
+	if (flags & CREATE_TIMELINE) {
+		struct i915_timeline *timeline;
+
+		timeline = i915_timeline_create(dev_priv, ctx->name);
+		if (IS_ERR(timeline)) {
+			__destroy_hw_context(ctx, file_priv);
+			return ERR_CAST(timeline);
+		}
+
+		ctx->timeline = timeline;
+	}
+
 	trace_i915_context_create(ctx);
 
 	return ctx;
@@ -796,7 +815,9 @@ int i915_gem_context_create_ioctl(struct drm_device *dev, void *data,
 	if (args->pad != 0)
 		return -EINVAL;
 
-	if (args->flags & ~I915_GEM_CONTEXT_SHARE_GTT)
+	if (args->flags &
+	    ~(I915_GEM_CONTEXT_SHARE_GTT |
+	      I915_GEM_CONTEXT_SINGLE_TIMELINE))
 		return -EINVAL;
 
 	if (client_is_banned(file_priv)) {
@@ -820,6 +841,9 @@ int i915_gem_context_create_ioctl(struct drm_device *dev, void *data,
 		flags &= ~CREATE_VM;
 	}
 
+	if (args->flags & I915_GEM_CONTEXT_SINGLE_TIMELINE)
+		flags |= CREATE_TIMELINE;
+
 	err = i915_mutex_lock_interruptible(dev);
 	if (err)
 		goto out;
diff --git a/drivers/gpu/drm/i915/i915_gem_context.h b/drivers/gpu/drm/i915/i915_gem_context.h
index 08165f6a0a84..770043449ba6 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.h
+++ b/drivers/gpu/drm/i915/i915_gem_context.h
@@ -41,6 +41,7 @@ struct drm_i915_private;
 struct drm_i915_file_private;
 struct i915_hw_ppgtt;
 struct i915_request;
+struct i915_timeline;
 struct i915_vma;
 struct intel_ring;
 
@@ -66,6 +67,8 @@ struct i915_gem_context {
 	/** file_priv: owning file descriptor */
 	struct drm_i915_file_private *file_priv;
 
+	struct i915_timeline *timeline;
+
 	/**
 	 * @ppgtt: unique address space (GTT)
 	 *
diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
index fc7ad8dbc36e..34d410cfa577 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -1007,8 +1007,14 @@ void i915_request_add(struct i915_request *request)
 	prev = i915_gem_active_raw(&timeline->last_request,
 				   &request->i915->drm.struct_mutex);
 	if (prev && !i915_request_completed(prev)) {
-		i915_sw_fence_await_sw_fence(&request->submit, &prev->submit,
-					     &request->submitq);
+		if (prev->engine == engine)
+			i915_sw_fence_await_sw_fence(&request->submit,
+						     &prev->submit,
+						     &request->submitq);
+		else
+			__i915_sw_fence_await_dma_fence(&request->submit,
+							&prev->fence,
+							&request->dmaq);
 		if (engine->schedule)
 			__i915_sched_node_add_dependency(&request->sched,
 							 &prev->sched,
diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h
index 90e9d170a0cd..68d36eeb5edb 100644
--- a/drivers/gpu/drm/i915/i915_request.h
+++ b/drivers/gpu/drm/i915/i915_request.h
@@ -116,7 +116,10 @@ struct i915_request {
 	 * It is used by the driver to then queue the request for execution.
 	 */
 	struct i915_sw_fence submit;
-	wait_queue_entry_t submitq;
+	union {
+		wait_queue_entry_t submitq;
+		struct i915_sw_dma_fence_cb dmaq;
+	};
 	wait_queue_head_t execute;
 
 	/*
diff --git a/drivers/gpu/drm/i915/i915_sw_fence.c b/drivers/gpu/drm/i915/i915_sw_fence.c
index 6dbeed079ae5..b68883fe73c5 100644
--- a/drivers/gpu/drm/i915/i915_sw_fence.c
+++ b/drivers/gpu/drm/i915/i915_sw_fence.c
@@ -362,11 +362,6 @@ int i915_sw_fence_await_sw_fence_gfp(struct i915_sw_fence *fence,
 	return __i915_sw_fence_await_sw_fence(fence, signaler, NULL, gfp);
 }
 
-struct i915_sw_dma_fence_cb {
-	struct dma_fence_cb base;
-	struct i915_sw_fence *fence;
-};
-
 struct i915_sw_dma_fence_cb_timer {
 	struct i915_sw_dma_fence_cb base;
 	struct dma_fence *dma;
@@ -483,6 +478,40 @@ int i915_sw_fence_await_dma_fence(struct i915_sw_fence *fence,
 	return ret;
 }
 
+static void __dma_i915_sw_fence_wake(struct dma_fence *dma,
+				     struct dma_fence_cb *data)
+{
+	struct i915_sw_dma_fence_cb *cb = container_of(data, typeof(*cb), base);
+
+	i915_sw_fence_complete(cb->fence);
+}
+
+int __i915_sw_fence_await_dma_fence(struct i915_sw_fence *fence,
+				    struct dma_fence *dma,
+				    struct i915_sw_dma_fence_cb *cb)
+{
+	int ret;
+
+	debug_fence_assert(fence);
+
+	if (dma_fence_is_signaled(dma))
+		return 0;
+
+	cb->fence = fence;
+	i915_sw_fence_await(fence);
+
+	ret = dma_fence_add_callback(dma, &cb->base, __dma_i915_sw_fence_wake);
+	if (ret == 0) {
+		ret = 1;
+	} else {
+		i915_sw_fence_complete(fence);
+		if (ret == -ENOENT) /* fence already signaled */
+			ret = 0;
+	}
+
+	return ret;
+}
+
 int i915_sw_fence_await_reservation(struct i915_sw_fence *fence,
 				    struct reservation_object *resv,
 				    const struct dma_fence_ops *exclude,
diff --git a/drivers/gpu/drm/i915/i915_sw_fence.h b/drivers/gpu/drm/i915/i915_sw_fence.h
index fe2ef4dadfc6..914a734d49bc 100644
--- a/drivers/gpu/drm/i915/i915_sw_fence.h
+++ b/drivers/gpu/drm/i915/i915_sw_fence.h
@@ -10,14 +10,13 @@
 #ifndef _I915_SW_FENCE_H_
 #define _I915_SW_FENCE_H_
 
+#include <linux/dma-fence.h>
 #include <linux/gfp.h>
 #include <linux/kref.h>
 #include <linux/notifier.h> /* for NOTIFY_DONE */
 #include <linux/wait.h>
 
 struct completion;
-struct dma_fence;
-struct dma_fence_ops;
 struct reservation_object;
 
 struct i915_sw_fence {
@@ -69,10 +68,20 @@ int i915_sw_fence_await_sw_fence(struct i915_sw_fence *fence,
 int i915_sw_fence_await_sw_fence_gfp(struct i915_sw_fence *fence,
 				     struct i915_sw_fence *after,
 				     gfp_t gfp);
+
+struct i915_sw_dma_fence_cb {
+	struct dma_fence_cb base;
+	struct i915_sw_fence *fence;
+};
+
+int __i915_sw_fence_await_dma_fence(struct i915_sw_fence *fence,
+				    struct dma_fence *dma,
+				    struct i915_sw_dma_fence_cb *cb);
 int i915_sw_fence_await_dma_fence(struct i915_sw_fence *fence,
 				  struct dma_fence *dma,
 				  unsigned long timeout,
 				  gfp_t gfp);
+
 int i915_sw_fence_await_reservation(struct i915_sw_fence *fence,
 				    struct reservation_object *resv,
 				    const struct dma_fence_ops *exclude,
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 74be9a49ef9e..48a2bca7fec3 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -2626,7 +2626,10 @@ static int execlists_context_deferred_alloc(struct i915_gem_context *ctx,
 		goto error_deref_obj;
 	}
 
-	timeline = i915_timeline_create(ctx->i915, ctx->name);
+	if (ctx->timeline)
+		timeline = i915_timeline_get(ctx->timeline);
+	else
+		timeline = i915_timeline_create(ctx->i915, ctx->name);
 	if (IS_ERR(timeline)) {
 		ret = PTR_ERR(timeline);
 		goto error_deref_obj;
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index dc1c52f95cab..adb9fed86ef7 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -1433,7 +1433,8 @@ struct drm_i915_gem_context_create_v2 {
 	/*  output: id of new context*/
 	__u32 ctx_id;
 	__u32 flags;
-#define I915_GEM_CONTEXT_SHARE_GTT 0x1
+#define I915_GEM_CONTEXT_SHARE_GTT		0x1
+#define I915_GEM_CONTEXT_SINGLE_TIMELINE	0x2
 	__u32 share_ctx;
 	__u32 pad;
 };
-- 
2.19.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 106+ messages in thread

* [PATCH 35/40] drm/i915: Fix I915_EXEC_RING_MASK
  2018-09-19 19:55 [PATCH 01/40] drm: Use default dma_fence hooks where possible for null syncobj Chris Wilson
                   ` (32 preceding siblings ...)
  2018-09-19 19:55 ` [PATCH 34/40] drm/i915: Allow contexts to share a single timeline across all engines Chris Wilson
@ 2018-09-19 19:55 ` Chris Wilson
  2018-09-25  8:46   ` [Intel-gfx] " Tvrtko Ursulin
  2018-09-19 19:55 ` [PATCH 36/40] drm/i915: Re-arrange execbuf so context is known before engine Chris Wilson
                   ` (8 subsequent siblings)
  42 siblings, 1 reply; 106+ messages in thread
From: Chris Wilson @ 2018-09-19 19:55 UTC (permalink / raw)
  To: intel-gfx; +Cc: Chris Wilson, Tvrtko Ursulin, stable

This was supposed to be a mask of all known rings, but it is being used
by execbuffer to filter out invalid rings, and so is instead mapping high
unused values onto valid rings. Instead of a mask of all known rings,
we need it to be the mask of all possible rings.

Fixes: 549f7365820a ("drm/i915: Enable SandyBridge blitter ring")
Fixes: de1add360522 ("drm/i915: Decouple execbuf uAPI from internal implementation")
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: <stable@vger.kernel.org> # v4.6+
---
 include/uapi/drm/i915_drm.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index adb9fed86ef7..65b0b84419f3 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -984,7 +984,7 @@ struct drm_i915_gem_execbuffer2 {
 	 * struct drm_i915_gem_exec_fence *fences.
 	 */
 	__u64 cliprects_ptr;
-#define I915_EXEC_RING_MASK              (7<<0)
+#define I915_EXEC_RING_MASK              (0x3f)
 #define I915_EXEC_DEFAULT                (0<<0)
 #define I915_EXEC_RENDER                 (1<<0)
 #define I915_EXEC_BSD                    (2<<0)
-- 
2.19.0

^ permalink raw reply related	[flat|nested] 106+ messages in thread

* [PATCH 36/40] drm/i915: Re-arrange execbuf so context is known before engine
  2018-09-19 19:55 [PATCH 01/40] drm: Use default dma_fence hooks where possible for null syncobj Chris Wilson
                   ` (33 preceding siblings ...)
  2018-09-19 19:55 ` [PATCH 35/40] drm/i915: Fix I915_EXEC_RING_MASK Chris Wilson
@ 2018-09-19 19:55 ` Chris Wilson
  2018-09-19 19:55 ` [PATCH 37/40] drm/i915: Allow a context to define its set of engines Chris Wilson
                   ` (7 subsequent siblings)
  42 siblings, 0 replies; 106+ messages in thread
From: Chris Wilson @ 2018-09-19 19:55 UTC (permalink / raw)
  To: intel-gfx

From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Needed for a following patch.

Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 drivers/gpu/drm/i915/i915_gem_execbuffer.c | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 42b32e8449a3..109486a6ef07 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -2236,10 +2236,6 @@ i915_gem_do_execbuffer(struct drm_device *dev,
 	if (args->flags & I915_EXEC_IS_PINNED)
 		eb.batch_flags |= I915_DISPATCH_PINNED;
 
-	eb.engine = eb_select_engine(eb.i915, file, args);
-	if (!eb.engine)
-		return -EINVAL;
-
 	if (args->flags & I915_EXEC_FENCE_IN) {
 		in_fence = sync_file_get_fence(lower_32_bits(args->rsvd2));
 		if (!in_fence)
@@ -2264,6 +2260,12 @@ i915_gem_do_execbuffer(struct drm_device *dev,
 	if (unlikely(err))
 		goto err_destroy;
 
+	eb.engine = eb_select_engine(eb.i915, file, args);
+	if (!eb.engine) {
+		err = -EINVAL;
+		goto err_engine;
+	}
+
 	/*
 	 * Take a local wakeref for preparing to dispatch the execbuf as
 	 * we expect to access the hardware fairly frequently in the
@@ -2424,6 +2426,7 @@ i915_gem_do_execbuffer(struct drm_device *dev,
 	mutex_unlock(&dev->struct_mutex);
 err_rpm:
 	intel_runtime_pm_put(eb.i915, wakeref);
+err_engine:
 	i915_gem_context_put(eb.ctx);
 err_destroy:
 	eb_destroy(&eb);
-- 
2.19.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 106+ messages in thread

* [PATCH 37/40] drm/i915: Allow a context to define its set of engines
  2018-09-19 19:55 [PATCH 01/40] drm: Use default dma_fence hooks where possible for null syncobj Chris Wilson
                   ` (34 preceding siblings ...)
  2018-09-19 19:55 ` [PATCH 36/40] drm/i915: Re-arrange execbuf so context is known before engine Chris Wilson
@ 2018-09-19 19:55 ` Chris Wilson
  2018-09-27 11:28   ` Tvrtko Ursulin
  2018-09-19 19:55 ` [PATCH 38/40] drm/i915/execlists: Flush the CS events before unpinning Chris Wilson
                   ` (6 subsequent siblings)
  42 siblings, 1 reply; 106+ messages in thread
From: Chris Wilson @ 2018-09-19 19:55 UTC (permalink / raw)
  To: intel-gfx

Over the last few years, we have debated how to extend the user API to
support an increase in the number of engines, that may be sparse and
even be heterogeneous within a class (not all video decoders created
equal). We settled on using (class, instance) tuples to identify a
specific engine, with an API for the user to construct a map of engines
to capabilities. Into this picture, we then add a challenge of virtual
engines; one user engine that maps behind the scenes to any number of
physical engines. To keep it general, we want the user to have full
control over that mapping. To that end, we allow the user to constrain a
context to define the set of engines that it can access, order fully
controlled by the user via (class, instance). With such precise control
in context setup, we can continue to use the existing execbuf uABI of
specifying a single index; only now it doesn't automagically map onto
the engines, it uses the user defined engine map from the context.

The I915_EXEC_DEFAULT slot is left empty, and invalid for use by
execbuf. It's use will be revealed in the next patch.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_gem_context.c    | 88 ++++++++++++++++++++++
 drivers/gpu/drm/i915/i915_gem_context.h    |  4 +
 drivers/gpu/drm/i915/i915_gem_execbuffer.c | 22 ++++--
 include/uapi/drm/i915_drm.h                | 27 +++++++
 4 files changed, 135 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
index a8570a07b3b7..313471253f51 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -90,6 +90,7 @@
 #include <drm/i915_drm.h>
 #include "i915_drv.h"
 #include "i915_trace.h"
+#include "i915_user_extensions.h"
 #include "intel_workarounds.h"
 
 #define ALL_L3_SLICES(dev) (1 << NUM_L3_SLICES(dev)) - 1
@@ -223,6 +224,8 @@ static void i915_gem_context_free(struct i915_gem_context *ctx)
 			ce->ops->destroy(ce);
 	}
 
+	kfree(ctx->engines);
+
 	if (ctx->timeline)
 		i915_timeline_put(ctx->timeline);
 
@@ -948,6 +951,87 @@ int i915_gem_context_getparam_ioctl(struct drm_device *dev, void *data,
 	return ret;
 }
 
+struct set_engines {
+	struct i915_gem_context *ctx;
+	struct intel_engine_cs **engines;
+	unsigned int nengine;
+};
+
+static const i915_user_extension_fn set_engines__extensions[] = {
+};
+
+static int set_engines(struct i915_gem_context *ctx,
+		       struct drm_i915_gem_context_param *args)
+{
+	struct i915_context_param_engines __user *user;
+	struct set_engines set = {
+		.ctx = ctx,
+		.engines = NULL,
+		.nengine = -1,
+	};
+	unsigned int n;
+	u64 size, extensions;
+	int err;
+
+	user = u64_to_user_ptr(args->value);
+	size = args->size;
+	if (!size)
+		goto out;
+
+	if (size < sizeof(struct i915_context_param_engines))
+		return -EINVAL;
+
+	size -= sizeof(struct i915_context_param_engines);
+	if (size % sizeof(*user->class_instance))
+		return -EINVAL;
+
+	set.nengine = size / sizeof(*user->class_instance);
+	if (set.nengine == 0 || set.nengine >= I915_EXEC_RING_MASK)
+		return -EINVAL;
+
+	set.engines = kmalloc_array(set.nengine + 1,
+				    sizeof(*set.engines),
+				    GFP_KERNEL);
+	if (!set.engines)
+		return -ENOMEM;
+
+	set.engines[0] = NULL;
+	for (n = 0; n < set.nengine; n++) {
+		u32 class, instance;
+
+		if (get_user(class, &user->class_instance[n].class) ||
+		    get_user(instance, &user->class_instance[n].instance)) {
+			kfree(set.engines);
+			return -EFAULT;
+		}
+
+		set.engines[n + 1] =
+			intel_engine_lookup_user(ctx->i915, class, instance);
+		if (!set.engines[n + 1]) {
+			kfree(set.engines);
+			return -ENOENT;
+		}
+	}
+
+	err = -EFAULT;
+	if (!get_user(extensions, &user->extensions))
+		err = i915_user_extensions(u64_to_user_ptr(extensions),
+					   set_engines__extensions,
+					   ARRAY_SIZE(set_engines__extensions),
+					   &set);
+	if (err) {
+		kfree(set.engines);
+		return err;
+	}
+
+out:
+	kfree(ctx->engines);
+	ctx->engines = set.engines;
+	ctx->nengine = set.nengine + 1;
+
+	return 0;
+}
+
 int i915_gem_context_setparam_ioctl(struct drm_device *dev, void *data,
 				    struct drm_file *file)
 {
@@ -1011,6 +1095,10 @@ int i915_gem_context_setparam_ioctl(struct drm_device *dev, void *data,
 		}
 		break;
 
+	case I915_CONTEXT_PARAM_ENGINES:
+		ret = set_engines(ctx, args);
+		break;
+
 	default:
 		ret = -EINVAL;
 		break;
diff --git a/drivers/gpu/drm/i915/i915_gem_context.h b/drivers/gpu/drm/i915/i915_gem_context.h
index 770043449ba6..9f89119a6566 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.h
+++ b/drivers/gpu/drm/i915/i915_gem_context.h
@@ -67,6 +67,8 @@ struct i915_gem_context {
 	/** file_priv: owning file descriptor */
 	struct drm_i915_file_private *file_priv;
 
+	struct intel_engine_cs **engines;
+
 	struct i915_timeline *timeline;
 
 	/**
@@ -135,6 +137,8 @@ struct i915_gem_context {
 #define CONTEXT_CLOSED			1
 #define CONTEXT_FORCE_SINGLE_SUBMISSION	2
 
+	unsigned int nengine;
+
 	/**
 	 * @hw_id: - unique identifier for the context
 	 *
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 109486a6ef07..faedfdca875d 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -2014,13 +2014,23 @@ static const enum intel_engine_id user_ring_map[I915_USER_RINGS + 1] = {
 };
 
 static struct intel_engine_cs *
-eb_select_engine(struct drm_i915_private *dev_priv,
+eb_select_engine(struct i915_execbuffer *eb,
 		 struct drm_file *file,
 		 struct drm_i915_gem_execbuffer2 *args)
 {
 	unsigned int user_ring_id = args->flags & I915_EXEC_RING_MASK;
 	struct intel_engine_cs *engine;
 
+	if (eb->ctx->engines) {
+		if (user_ring_id >= eb->ctx->nengine) {
+			DRM_DEBUG("execbuf with unknown ring: %u\n",
+				  user_ring_id);
+			return NULL;
+		}
+
+		return eb->ctx->engines[user_ring_id];
+	}
+
 	if (user_ring_id > I915_USER_RINGS) {
 		DRM_DEBUG("execbuf with unknown ring: %u\n", user_ring_id);
 		return NULL;
@@ -2033,11 +2043,11 @@ eb_select_engine(struct drm_i915_private *dev_priv,
 		return NULL;
 	}
 
-	if (user_ring_id == I915_EXEC_BSD && HAS_BSD2(dev_priv)) {
+	if (user_ring_id == I915_EXEC_BSD && HAS_BSD2(eb->i915)) {
 		unsigned int bsd_idx = args->flags & I915_EXEC_BSD_MASK;
 
 		if (bsd_idx == I915_EXEC_BSD_DEFAULT) {
-			bsd_idx = gen8_dispatch_bsd_engine(dev_priv, file);
+			bsd_idx = gen8_dispatch_bsd_engine(eb->i915, file);
 		} else if (bsd_idx >= I915_EXEC_BSD_RING1 &&
 			   bsd_idx <= I915_EXEC_BSD_RING2) {
 			bsd_idx >>= I915_EXEC_BSD_SHIFT;
@@ -2048,9 +2058,9 @@ eb_select_engine(struct drm_i915_private *dev_priv,
 			return NULL;
 		}
 
-		engine = dev_priv->engine[_VCS(bsd_idx)];
+		engine = eb->i915->engine[_VCS(bsd_idx)];
 	} else {
-		engine = dev_priv->engine[user_ring_map[user_ring_id]];
+		engine = eb->i915->engine[user_ring_map[user_ring_id]];
 	}
 
 	if (!engine) {
@@ -2260,7 +2270,7 @@ i915_gem_do_execbuffer(struct drm_device *dev,
 	if (unlikely(err))
 		goto err_destroy;
 
-	eb.engine = eb_select_engine(eb.i915, file, args);
+	eb.engine = eb_select_engine(&eb, file, args);
 	if (!eb.engine) {
 		err = -EINVAL;
 		goto err_engine;
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index 65b0b84419f3..d41b4c673af4 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -1508,9 +1508,36 @@ struct drm_i915_gem_context_param {
 #define   I915_CONTEXT_MAX_USER_PRIORITY	1023 /* inclusive */
 #define   I915_CONTEXT_DEFAULT_PRIORITY		0
 #define   I915_CONTEXT_MIN_USER_PRIORITY	-1023 /* inclusive */
+
+/*
+ * I915_CONTEXT_PARAM_ENGINES:
+ *
+ * Bind this context to operate on this subset of available engines. Henceforth,
+ * the I915_EXEC_RING selector for DRM_IOCTL_I915_GEM_EXECBUFFER2 operates as
+ * an index into this array of engines; I915_EXEC_DEFAULT selecting engine[0]
+ * and upwards. The array created is offset by 1, such that by default
+ * I915_EXEC_DEFAULT is left empty, to be filled in as directed. Slots 1...N
+ * are then filled in using the specified (class, instance).
+ *
+ * Setting the number of engines bound to the context will revert back to
+ * default settings.
+ *
+ * See struct i915_context_param_engines.
+ */
+#define I915_CONTEXT_PARAM_ENGINES	0x7
+
 	__u64 value;
 };
 
+struct i915_context_param_engines {
+	__u64 extensions;
+
+	struct {
+		__u32 class; /* see enum drm_i915_gem_engine_class */
+		__u32 instance;
+	} class_instance[0];
+};
+
 enum drm_i915_oa_format {
 	I915_OA_FORMAT_A13 = 1,	    /* HSW only */
 	I915_OA_FORMAT_A29,	    /* HSW only */
-- 
2.19.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 106+ messages in thread

* [PATCH 38/40] drm/i915/execlists: Flush the CS events before unpinning
  2018-09-19 19:55 [PATCH 01/40] drm: Use default dma_fence hooks where possible for null syncobj Chris Wilson
                   ` (35 preceding siblings ...)
  2018-09-19 19:55 ` [PATCH 37/40] drm/i915: Allow a context to define its set of engines Chris Wilson
@ 2018-09-19 19:55 ` Chris Wilson
  2018-10-01 10:51   ` Tvrtko Ursulin
  2018-09-19 19:55 ` [PATCH 39/40] drm/i915/execlists: Refactor out can_merge_rq() Chris Wilson
                   ` (5 subsequent siblings)
  42 siblings, 1 reply; 106+ messages in thread
From: Chris Wilson @ 2018-09-19 19:55 UTC (permalink / raw)
  To: intel-gfx

Inside the execlists submission tasklet, we often make the mistake of
assuming that everything beneath the request is available for use.
However, the submission and the request live on two separate timelines,
and the request contents may be freed from an early retirement before we
have had a chance to run the submission tasklet (think ksoftirqd). To
safeguard ourselves against any mistakes, flush the tasklet before we
unpin the context if execlists still has a reference to this context.

References: 60367132a214 ("drm/i915: Avoid use-after-free of ctx in request tracepoints")
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 drivers/gpu/drm/i915/i915_gem_context.h |  1 +
 drivers/gpu/drm/i915/intel_lrc.c        | 32 ++++++++++++++++++++++++-
 2 files changed, 32 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_context.h b/drivers/gpu/drm/i915/i915_gem_context.h
index 9f89119a6566..1fd71dfdfa62 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.h
+++ b/drivers/gpu/drm/i915/i915_gem_context.h
@@ -170,6 +170,7 @@ struct i915_gem_context {
 	/** engine: per-engine logical HW state */
 	struct intel_context {
 		struct i915_gem_context *gem_context;
+		struct intel_engine_cs *active;
 		struct i915_vma *state;
 		struct intel_ring *ring;
 		u32 *lrc_reg_state;
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 48a2bca7fec3..be7dbdd7fc2c 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -282,6 +282,8 @@ static void __unwind_incomplete_requests(struct intel_engine_cs *engine)
 		__i915_request_unsubmit(rq);
 		unwind_wa_tail(rq);
 
+		GEM_BUG_ON(rq->hw_context->active);
+
 		GEM_BUG_ON(rq_prio(rq) == I915_PRIORITY_INVALID);
 		if (rq_prio(rq) != prio) {
 			prio = rq_prio(rq);
@@ -427,8 +429,11 @@ static void execlists_submit_ports(struct intel_engine_cs *engine)
 		rq = port_unpack(&port[n], &count);
 		if (rq) {
 			GEM_BUG_ON(count > !n);
-			if (!count++)
+			if (!count++) {
+				GEM_BUG_ON(rq->hw_context->active);
 				execlists_context_schedule_in(rq);
+				rq->hw_context->active = engine;
+			}
 			port_set(&port[n], port_pack(rq, count));
 			desc = execlists_update_context(rq);
 			GEM_DEBUG_EXEC(port[n].context_id = upper_32_bits(desc));
@@ -734,6 +739,8 @@ execlists_cancel_port_requests(struct intel_engine_execlists * const execlists)
 			  intel_engine_get_seqno(rq->engine));
 
 		GEM_BUG_ON(!execlists->active);
+
+		rq->hw_context->active = NULL;
 		execlists_context_schedule_out(rq,
 					       i915_request_completed(rq) ?
 					       INTEL_CONTEXT_SCHEDULE_OUT :
@@ -971,6 +978,7 @@ static void process_csb(struct intel_engine_cs *engine)
 			 */
 			GEM_BUG_ON(!i915_request_completed(rq));
 
+			rq->hw_context->active = NULL;
 			execlists_context_schedule_out(rq,
 						       INTEL_CONTEXT_SCHEDULE_OUT);
 			i915_request_put(rq);
@@ -1080,6 +1088,28 @@ static void execlists_context_destroy(struct intel_context *ce)
 
 static void execlists_context_unpin(struct intel_context *ce)
 {
+	struct intel_engine_cs *engine;
+
+	/*
+	 * The tasklet may still be using a pointer to our state, via an
+	 * old request. However, since we know we only unpin the context
+	 * on retirement of the following request, we know that the last
+	 * request referencing us will have had a completion CS interrupt.
+	 * If we see that it is still active, it means that the tasklet hasn't
+	 * had the chance to run yet; let it run before we teardown the
+	 * reference it may use.
+	 */
+	engine = READ_ONCE(ce->active);
+	if (unlikely(engine)) {
+		unsigned long flags;
+
+		spin_lock_irqsave(&engine->timeline.lock, flags);
+		process_csb(engine);
+		spin_unlock_irqrestore(&engine->timeline.lock, flags);
+
+		GEM_BUG_ON(READ_ONCE(ce->active));
+	}
+
 	i915_gem_context_unpin_hw_id(ce->gem_context);
 
 	intel_ring_unpin(ce->ring);
-- 
2.19.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 106+ messages in thread

* [PATCH 39/40] drm/i915/execlists: Refactor out can_merge_rq()
  2018-09-19 19:55 [PATCH 01/40] drm: Use default dma_fence hooks where possible for null syncobj Chris Wilson
                   ` (36 preceding siblings ...)
  2018-09-19 19:55 ` [PATCH 38/40] drm/i915/execlists: Flush the CS events before unpinning Chris Wilson
@ 2018-09-19 19:55 ` Chris Wilson
  2018-09-27 11:32   ` Tvrtko Ursulin
  2018-09-19 19:55 ` [PATCH 40/40] drm/i915: Load balancing across a virtual engine Chris Wilson
                   ` (4 subsequent siblings)
  42 siblings, 1 reply; 106+ messages in thread
From: Chris Wilson @ 2018-09-19 19:55 UTC (permalink / raw)
  To: intel-gfx

In the next patch, we add another user that wants to check whether
requests can be merge into a single HW execution, and in the future we
want to add more conditions under which requests from the same context
cannot be merge. In preparation, extract out can_merge_rq().

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/intel_lrc.c | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index be7dbdd7fc2c..679ce521be16 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -478,6 +478,15 @@ static bool can_merge_ctx(const struct intel_context *prev,
 	return true;
 }
 
+static bool can_merge_rq(const struct i915_request *prev,
+			 const struct i915_request *next)
+{
+	if (!can_merge_ctx(prev->hw_context, next->hw_context))
+		return false;
+
+	return true;
+}
+
 static void port_assign(struct execlist_port *port, struct i915_request *rq)
 {
 	GEM_BUG_ON(rq == port_request(port));
@@ -639,8 +648,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
 			 * second request, and so we never need to tell the
 			 * hardware about the first.
 			 */
-			if (last &&
-			    !can_merge_ctx(rq->hw_context, last->hw_context)) {
+			if (last && !can_merge_rq(rq, last)) {
 				/*
 				 * If we are on the second port and cannot
 				 * combine this request with the last, then we
-- 
2.19.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 106+ messages in thread

* [PATCH 40/40] drm/i915: Load balancing across a virtual engine
  2018-09-19 19:55 [PATCH 01/40] drm: Use default dma_fence hooks where possible for null syncobj Chris Wilson
                   ` (37 preceding siblings ...)
  2018-09-19 19:55 ` [PATCH 39/40] drm/i915/execlists: Refactor out can_merge_rq() Chris Wilson
@ 2018-09-19 19:55 ` Chris Wilson
  2018-10-01 11:37   ` Tvrtko Ursulin
  2018-09-19 21:54 ` ✗ Fi.CI.CHECKPATCH: warning for series starting with [01/40] drm: Use default dma_fence hooks where possible for null syncobj Patchwork
                   ` (3 subsequent siblings)
  42 siblings, 1 reply; 106+ messages in thread
From: Chris Wilson @ 2018-09-19 19:55 UTC (permalink / raw)
  To: intel-gfx

Having allowed the user to define a set of engines that they will want
to only use, we go one step further and allow them to bind those engines
into a single virtual instance. Submitting a batch to the virtual engine
will then forward it to any one of the set in a manner as best to
distribute load.  The virtual engine has a single timeline across all
engines (it operates as a single queue), so it is not able to concurrently
run batches across multiple engines by itself; that is left up to the user
to submit multiple concurrent batches to multiple queues. Multiple users
will be load balanced across the system.

The mechanism used for load balancing in this patch is a late greedy
balancer. When a request is ready for execution, it is added to each
engine's queue, and when an engine is ready for its next request it
claims it from the virtual engine. The first engine to do so, wins, i.e.
the request is executed at the earliest opportunity (idle moment) in the
system.

As not all HW is created equal, the user is still able to skip the
virtual engine and execute the batch on a specific engine, all within the
same queue. It will then be executed in order on the correct engine,
with execution on other virtual engines being moved away due to the load
detection.

A couple of areas for potential improvement left!

- The virtual engine always take priority over equal-priority tasks.
Mostly broken up by applying FQ_CODEL rules for prioritising new clients,
and hopefully the virtual and real engines are not then congested (i.e.
all work is via virtual engines, or all work is to the real engine).

- We require the breadcrumb irq around every virtual engine request. For
normal engines, we eliminate the need for the slow round trip via
interrupt by using the submit fence and queueing in order. For virtual
engines, we have to allow any job to transfer to a new ring, and cannot
coalesce the submissions, so require the completion fence instead,
forcing the persistent use of interrupts.

Other areas of improvement are more general, such as reducing lock
contention, reducing dispatch overhead, looking at direct submission
rather than bouncing around tasklets etc.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 drivers/gpu/drm/i915/i915_gem.h            |   5 +
 drivers/gpu/drm/i915/i915_gem_context.c    |  85 +++-
 drivers/gpu/drm/i915/i915_gem_context.h    |   1 +
 drivers/gpu/drm/i915/i915_request.c        |   2 +-
 drivers/gpu/drm/i915/i915_scheduler.c      |   1 +
 drivers/gpu/drm/i915/i915_timeline.h       |   1 +
 drivers/gpu/drm/i915/intel_engine_cs.c     |  48 +-
 drivers/gpu/drm/i915/intel_lrc.c           | 500 ++++++++++++++++++++-
 drivers/gpu/drm/i915/intel_lrc.h           |  16 +-
 drivers/gpu/drm/i915/intel_ringbuffer.h    |   8 +
 drivers/gpu/drm/i915/selftests/intel_lrc.c | 216 +++++++++
 include/uapi/drm/i915_drm.h                |  27 ++
 12 files changed, 851 insertions(+), 59 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.h b/drivers/gpu/drm/i915/i915_gem.h
index 599c4f6eb1ea..69d137786700 100644
--- a/drivers/gpu/drm/i915/i915_gem.h
+++ b/drivers/gpu/drm/i915/i915_gem.h
@@ -87,4 +87,9 @@ static inline bool __tasklet_is_enabled(const struct tasklet_struct *t)
 	return !atomic_read(&t->count);
 }
 
+static inline bool __tasklet_is_scheduled(struct tasklet_struct *t)
+{
+	return test_bit(TASKLET_STATE_SCHED, &t->state);
+}
+
 #endif /* __I915_GEM_H__ */
diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
index 313471253f51..0ee39772df03 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -91,6 +91,7 @@
 #include "i915_drv.h"
 #include "i915_trace.h"
 #include "i915_user_extensions.h"
+#include "intel_lrc.h"
 #include "intel_workarounds.h"
 
 #define ALL_L3_SLICES(dev) (1 << NUM_L3_SLICES(dev)) - 1
@@ -224,7 +225,10 @@ static void i915_gem_context_free(struct i915_gem_context *ctx)
 			ce->ops->destroy(ce);
 	}
 
-	kfree(ctx->engines);
+	if (ctx->engines) {
+		intel_virtual_engine_put(ctx->engines[0]);
+		kfree(ctx->engines);
+	}
 
 	if (ctx->timeline)
 		i915_timeline_put(ctx->timeline);
@@ -351,6 +355,7 @@ __create_hw_context(struct drm_i915_private *dev_priv,
 		struct intel_context *ce = &ctx->__engine[n];
 
 		ce->gem_context = ctx;
+		ce->owner = dev_priv->engine[n];
 	}
 
 	INIT_RADIX_TREE(&ctx->handles_vma, GFP_KERNEL);
@@ -679,7 +684,8 @@ last_request_on_engine(struct i915_timeline *timeline,
 
 	rq = i915_gem_active_raw(&timeline->last_request,
 				 &engine->i915->drm.struct_mutex);
-	if (rq && rq->engine == engine) {
+	if (rq &&
+	    (rq->engine == engine || intel_engine_is_virtual(rq->engine))) {
 		GEM_TRACE("last request for %s on engine %s: %llx:%d\n",
 			  timeline->name, engine->name,
 			  rq->fence.context, rq->fence.seqno);
@@ -951,13 +957,82 @@ int i915_gem_context_getparam_ioctl(struct drm_device *dev, void *data,
 	return ret;
 }
 
+static int check_user_mbz64(u64 __user *user)
+{
+	u64 mbz;
+
+	if (get_user(mbz, user))
+		return -EFAULT;
+
+	return mbz ? -EINVAL : 0;
+}
+
 struct set_engines {
 	struct i915_gem_context *ctx;
 	struct intel_engine_cs **engines;
 	unsigned int nengine;
 };
 
+static int set_engines__load_balance(struct i915_user_extension __user *base,
+				     void *data)
+
+{
+	struct i915_context_engines_load_balance __user *ext =
+		container_of(base, typeof(*ext) __user, base);
+	const struct set_engines *set = data;
+	struct intel_engine_cs *ve;
+	unsigned int n;
+	u64 mask;
+	int err;
+
+	if (set->engines[0])
+		return -EEXIST;
+
+	if (!HAS_EXECLISTS(set->ctx->i915))
+		return -ENODEV;
+
+	if (USES_GUC_SUBMISSION(set->ctx->i915))
+		return -ENODEV;
+
+	if (!set->ctx->timeline)
+		return -EINVAL;
+
+	err = check_user_mbz64(&ext->flags);
+	if (err)
+		return err;
+
+	for (n = 0; n < ARRAY_SIZE(ext->mbz); n++) {
+		err = check_user_mbz64(&ext->mbz[n]);
+		if (err)
+			return err;
+	}
+
+	if (get_user(mask, &ext->engines_mask))
+		return -EFAULT;
+
+	if (mask == ~0ull) {
+		ve = intel_execlists_create_virtual(set->ctx,
+						    set->engines + 1,
+						    set->nengine);
+	} else {
+		struct intel_engine_cs *stack[64];
+		int bit;
+
+		n = 0;
+		for_each_set_bit(bit, (unsigned long *)&mask, set->nengine)
+			stack[n++] = set->engines[bit + 1];
+
+		ve = intel_execlists_create_virtual(set->ctx, stack, n);
+	}
+	if (IS_ERR(ve))
+		return PTR_ERR(ve);
+
+	set->engines[0] = ve;
+	return 0;
+}
+
 static const i915_user_extension_fn set_engines__extensions[] = {
+	[I915_CONTEXT_ENGINES_EXT_LOAD_BALANCE] = set_engines__load_balance,
 };
 
 static int set_engines(struct i915_gem_context *ctx,
@@ -1020,12 +1095,16 @@ static int set_engines(struct i915_gem_context *ctx,
 					   ARRAY_SIZE(set_engines__extensions),
 					   &set);
 	if (err) {
+		intel_virtual_engine_put(set.engines[0]);
 		kfree(set.engines);
 		return err;
 	}
 
 out:
-	kfree(ctx->engines);
+	if (ctx->engines) {
+		intel_virtual_engine_put(ctx->engines[0]);
+		kfree(ctx->engines);
+	}
 	ctx->engines = set.engines;
 	ctx->nengine = set.nengine + 1;
 
diff --git a/drivers/gpu/drm/i915/i915_gem_context.h b/drivers/gpu/drm/i915/i915_gem_context.h
index 1fd71dfdfa62..6bab994cecba 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.h
+++ b/drivers/gpu/drm/i915/i915_gem_context.h
@@ -171,6 +171,7 @@ struct i915_gem_context {
 	struct intel_context {
 		struct i915_gem_context *gem_context;
 		struct intel_engine_cs *active;
+		struct intel_engine_cs *owner;
 		struct i915_vma *state;
 		struct intel_ring *ring;
 		u32 *lrc_reg_state;
diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
index 34d410cfa577..fda5a936bcc0 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -1007,7 +1007,7 @@ void i915_request_add(struct i915_request *request)
 	prev = i915_gem_active_raw(&timeline->last_request,
 				   &request->i915->drm.struct_mutex);
 	if (prev && !i915_request_completed(prev)) {
-		if (prev->engine == engine)
+		if (prev->engine == engine && !intel_engine_is_virtual(engine))
 			i915_sw_fence_await_sw_fence(&request->submit,
 						     &prev->submit,
 						     &request->submitq);
diff --git a/drivers/gpu/drm/i915/i915_scheduler.c b/drivers/gpu/drm/i915/i915_scheduler.c
index 1423088dceff..cfb0a0353e15 100644
--- a/drivers/gpu/drm/i915/i915_scheduler.c
+++ b/drivers/gpu/drm/i915/i915_scheduler.c
@@ -335,6 +335,7 @@ static void __i915_schedule(struct i915_request *rq,
 
 		node->attr.priority = prio;
 		if (!list_empty(&node->link)) {
+			GEM_BUG_ON(intel_engine_is_virtual(engine));
 			if (last != engine) {
 				pl = i915_sched_lookup_priolist(engine, prio);
 				last = engine;
diff --git a/drivers/gpu/drm/i915/i915_timeline.h b/drivers/gpu/drm/i915/i915_timeline.h
index a2c2c3ab5fb0..b5321dc2d5a5 100644
--- a/drivers/gpu/drm/i915/i915_timeline.h
+++ b/drivers/gpu/drm/i915/i915_timeline.h
@@ -39,6 +39,7 @@ struct i915_timeline {
 	spinlock_t lock;
 #define TIMELINE_CLIENT 0 /* default subclass */
 #define TIMELINE_ENGINE 1
+#define TIMELINE_VIRTUAL 2
 
 	/**
 	 * List of breadcrumbs associated with GPU requests currently
diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c
index b122d82465d0..3659f7e68348 100644
--- a/drivers/gpu/drm/i915/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/intel_engine_cs.c
@@ -1444,13 +1444,11 @@ void intel_engine_dump(struct intel_engine_cs *engine,
 {
 	const int MAX_REQUESTS_TO_SHOW = 8;
 	struct intel_breadcrumbs * const b = &engine->breadcrumbs;
-	const struct intel_engine_execlists * const execlists = &engine->execlists;
 	struct i915_gpu_error * const error = &engine->i915->gpu_error;
-	struct i915_request *rq, *last;
+	struct i915_request *rq;
 	intel_wakeref_t wakeref;
 	unsigned long flags;
 	struct rb_node *rb;
-	int count;
 
 	if (header) {
 		va_list ap;
@@ -1515,49 +1513,9 @@ void intel_engine_dump(struct intel_engine_cs *engine,
 	}
 
 	local_irq_save(flags);
-	spin_lock(&engine->timeline.lock);
-
-	last = NULL;
-	count = 0;
-	list_for_each_entry(rq, &engine->timeline.requests, link) {
-		if (count++ < MAX_REQUESTS_TO_SHOW - 1)
-			print_request(m, rq, "\t\tE ");
-		else
-			last = rq;
-	}
-	if (last) {
-		if (count > MAX_REQUESTS_TO_SHOW) {
-			drm_printf(m,
-				   "\t\t...skipping %d executing requests...\n",
-				   count - MAX_REQUESTS_TO_SHOW);
-		}
-		print_request(m, last, "\t\tE ");
-	}
-
-	last = NULL;
-	count = 0;
-	drm_printf(m, "\t\tQueue priority: %d\n", execlists->queue_priority);
-	for (rb = rb_first_cached(&execlists->queue); rb; rb = rb_next(rb)) {
-		struct i915_priolist *p = rb_entry(rb, typeof(*p), node);
-		int i;
-
-		priolist_for_each_request(rq, p, i) {
-			if (count++ < MAX_REQUESTS_TO_SHOW - 1)
-				print_request(m, rq, "\t\tQ ");
-			else
-				last = rq;
-		}
-	}
-	if (last) {
-		if (count > MAX_REQUESTS_TO_SHOW) {
-			drm_printf(m,
-				   "\t\t...skipping %d queued requests...\n",
-				   count - MAX_REQUESTS_TO_SHOW);
-		}
-		print_request(m, last, "\t\tQ ");
-	}
 
-	spin_unlock(&engine->timeline.lock);
+	intel_execlists_show_requests(engine, m,
+				      print_request, MAX_REQUESTS_TO_SHOW);
 
 	spin_lock(&b->rb_lock);
 	for (rb = rb_first(&b->waiters); rb; rb = rb_next(rb)) {
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 679ce521be16..999fb13fe0e0 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -164,6 +164,29 @@
 #define WA_TAIL_DWORDS 2
 #define WA_TAIL_BYTES (sizeof(u32) * WA_TAIL_DWORDS)
 
+struct virtual_engine {
+	struct intel_engine_cs base;
+
+	struct intel_context context;
+	struct kref kref;
+
+	struct intel_engine_cs *bound;
+
+	struct i915_request *request;
+	struct ve_node {
+		struct rb_node rb;
+		int prio;
+	} nodes[I915_NUM_ENGINES];
+
+	unsigned int count;
+	struct intel_engine_cs *siblings[0];
+};
+
+static struct virtual_engine *to_virtual_engine(struct intel_engine_cs *engine)
+{
+	return container_of(engine, struct virtual_engine, base);
+}
+
 static int execlists_context_deferred_alloc(struct i915_gem_context *ctx,
 					    struct intel_engine_cs *engine,
 					    struct intel_context *ce);
@@ -276,6 +299,8 @@ static void __unwind_incomplete_requests(struct intel_engine_cs *engine)
 	list_for_each_entry_safe_reverse(rq, rn,
 					 &engine->timeline.requests,
 					 link) {
+		struct intel_engine_cs *owner;
+
 		if (i915_request_completed(rq))
 			break;
 
@@ -284,14 +309,20 @@ static void __unwind_incomplete_requests(struct intel_engine_cs *engine)
 
 		GEM_BUG_ON(rq->hw_context->active);
 
-		GEM_BUG_ON(rq_prio(rq) == I915_PRIORITY_INVALID);
-		if (rq_prio(rq) != prio) {
-			prio = rq_prio(rq);
-			pl = i915_sched_lookup_priolist(engine, prio);
-		}
-		GEM_BUG_ON(RB_EMPTY_ROOT(&engine->execlists.queue.rb_root));
+		owner = rq->hw_context->owner;
+		if (likely(owner == engine)) {
+			GEM_BUG_ON(rq_prio(rq) == I915_PRIORITY_INVALID);
+			if (rq_prio(rq) != prio) {
+				prio = rq_prio(rq);
+				pl = i915_sched_lookup_priolist(engine, prio);
+			}
+			GEM_BUG_ON(RB_EMPTY_ROOT(&engine->execlists.queue.rb_root));
 
-		list_add(&rq->sched.link, pl);
+			list_add(&rq->sched.link, pl);
+		} else {
+			rq->engine = owner;
+			owner->submit_request(rq);
+		}
 
 		active = rq;
 	}
@@ -301,7 +332,8 @@ static void __unwind_incomplete_requests(struct intel_engine_cs *engine)
 	 * stream, so give it the equivalent small priority bump to prevent
 	 * it being gazumped a second time by another peer.
 	 */
-	if (!(prio & I915_PRIORITY_NEWCLIENT)) {
+	if (!(prio & I915_PRIORITY_NEWCLIENT) &&
+	    active->hw_context->owner == engine) {
 		prio |= I915_PRIORITY_NEWCLIENT;
 		list_move_tail(&active->sched.link,
 			       i915_sched_lookup_priolist(engine, prio));
@@ -538,6 +570,50 @@ static void complete_preempt_context(struct intel_engine_execlists *execlists)
 						  execlists));
 }
 
+static void virtual_update_register_offsets(u32 *regs,
+					    struct intel_engine_cs *engine)
+{
+	u32 base = engine->mmio_base;
+
+	regs[CTX_CONTEXT_CONTROL] =
+		i915_mmio_reg_offset(RING_CONTEXT_CONTROL(engine));
+	regs[CTX_RING_HEAD] = i915_mmio_reg_offset(RING_HEAD(base));
+	regs[CTX_RING_TAIL] = i915_mmio_reg_offset(RING_TAIL(base));
+	regs[CTX_RING_BUFFER_START] = i915_mmio_reg_offset(RING_START(base));
+	regs[CTX_RING_BUFFER_CONTROL] = i915_mmio_reg_offset(RING_CTL(base));
+
+	regs[CTX_BB_HEAD_U] = i915_mmio_reg_offset(RING_BBADDR_UDW(base));
+	regs[CTX_BB_HEAD_L] = i915_mmio_reg_offset(RING_BBADDR(base));
+	regs[CTX_BB_STATE] = i915_mmio_reg_offset(RING_BBSTATE(base));
+	regs[CTX_SECOND_BB_HEAD_U] =
+		i915_mmio_reg_offset(RING_SBBADDR_UDW(base));
+	regs[CTX_SECOND_BB_HEAD_L] = i915_mmio_reg_offset(RING_SBBADDR(base));
+	regs[CTX_SECOND_BB_STATE] = i915_mmio_reg_offset(RING_SBBSTATE(base));
+
+	regs[CTX_CTX_TIMESTAMP] =
+		i915_mmio_reg_offset(RING_CTX_TIMESTAMP(base));
+	regs[CTX_PDP3_UDW] = i915_mmio_reg_offset(GEN8_RING_PDP_UDW(engine, 3));
+	regs[CTX_PDP3_LDW] = i915_mmio_reg_offset(GEN8_RING_PDP_LDW(engine, 3));
+	regs[CTX_PDP2_UDW] = i915_mmio_reg_offset(GEN8_RING_PDP_UDW(engine, 2));
+	regs[CTX_PDP2_LDW] = i915_mmio_reg_offset(GEN8_RING_PDP_LDW(engine, 2));
+	regs[CTX_PDP1_UDW] = i915_mmio_reg_offset(GEN8_RING_PDP_UDW(engine, 1));
+	regs[CTX_PDP1_LDW] = i915_mmio_reg_offset(GEN8_RING_PDP_LDW(engine, 1));
+	regs[CTX_PDP0_UDW] = i915_mmio_reg_offset(GEN8_RING_PDP_UDW(engine, 0));
+	regs[CTX_PDP0_LDW] = i915_mmio_reg_offset(GEN8_RING_PDP_LDW(engine, 0));
+
+	if (engine->class == RENDER_CLASS) {
+		regs[CTX_RCS_INDIRECT_CTX] =
+			i915_mmio_reg_offset(RING_INDIRECT_CTX(base));
+		regs[CTX_RCS_INDIRECT_CTX_OFFSET] =
+			i915_mmio_reg_offset(RING_INDIRECT_CTX_OFFSET(base));
+		regs[CTX_BB_PER_CTX_PTR] =
+			i915_mmio_reg_offset(RING_BB_PER_CTX_PTR(base));
+
+		regs[CTX_R_PWR_CLK_STATE] =
+			i915_mmio_reg_offset(GEN8_R_PWR_CLK_STATE);
+	}
+}
+
 static void execlists_dequeue(struct intel_engine_cs *engine)
 {
 	struct intel_engine_execlists * const execlists = &engine->execlists;
@@ -547,6 +623,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
 	struct i915_request *last = port_request(port);
 	struct rb_node *rb;
 	bool submit = false;
+	int prio;
 
 	/*
 	 * Hardware submission is through 2 ports. Conceptually each port
@@ -570,6 +647,31 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
 	 * and context switches) submission.
 	 */
 
+restart_virtual_engine:
+	prio = execlists->queue_priority;
+	for (rb = rb_first_cached(&execlists->virtual); rb; ) {
+		struct virtual_engine *ve =
+			rb_entry(rb, typeof(*ve), nodes[engine->id].rb);
+		struct i915_request *rq = READ_ONCE(ve->request);
+		struct intel_engine_cs *active;
+
+		if (!rq) {
+			rb_erase_cached(rb, &execlists->virtual);
+			RB_CLEAR_NODE(rb);
+			rb = rb_first_cached(&execlists->virtual);
+			continue;
+		}
+
+		active = READ_ONCE(ve->context.active);
+		if (active && active != engine) {
+			rb = rb_next(rb);
+			continue;
+		}
+
+		prio = max(prio, rq_prio(rq));
+		break;
+	}
+
 	if (last) {
 		/*
 		 * Don't resubmit or switch until all outstanding
@@ -591,7 +693,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
 		if (!execlists_is_active(execlists, EXECLISTS_ACTIVE_HWACK))
 			return;
 
-		if (need_preempt(engine, last, execlists->queue_priority)) {
+		if (need_preempt(engine, last, prio)) {
 			inject_preempt_context(engine);
 			return;
 		}
@@ -631,6 +733,67 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
 		last->tail = last->wa_tail;
 	}
 
+	if (rb) { /* XXX virtual is always taking precedence */
+		struct virtual_engine *ve =
+			rb_entry(rb, typeof(*ve), nodes[engine->id].rb);
+		struct i915_request *rq;
+
+		spin_lock(&ve->base.timeline.lock);
+
+		rq = ve->request;
+		if (unlikely(!rq)) { /* lost the race to a sibling */
+			spin_unlock(&ve->base.timeline.lock);
+			goto restart_virtual_engine;
+		}
+
+		if (rq_prio(rq) >= prio) {
+			if (last && !can_merge_rq(rq, last)) {
+				spin_unlock(&ve->base.timeline.lock);
+				return;
+			}
+
+			GEM_BUG_ON(rq->engine != &ve->base);
+			ve->request = NULL;
+			ve->base.execlists.queue_priority = INT_MIN;
+			rb_erase_cached(rb, &execlists->virtual);
+			RB_CLEAR_NODE(rb);
+
+			GEM_BUG_ON(rq->hw_context != &ve->context);
+			rq->engine = engine;
+
+			if (engine != ve->bound) {
+				u32 *regs = ve->context.lrc_reg_state;
+				unsigned int n;
+
+				GEM_BUG_ON(READ_ONCE(ve->context.active));
+				virtual_update_register_offsets(regs, engine);
+				ve->bound = engine;
+
+				/*
+				 * Move the bound engine to the top of the list
+				 * for future execution. We then kick this
+				 * tasklet first before checking others, so that
+				 * we preferentially reuse this set of bound
+				 * registers.
+				 */
+				for (n = 1; n < ve->count; n++) {
+					if (ve->siblings[n] == engine) {
+						swap(ve->siblings[n],
+						     ve->siblings[0]);
+						break;
+					}
+				}
+			}
+
+			__i915_request_submit(rq);
+			trace_i915_request_in(rq, port_index(port, execlists));
+			submit = true;
+			last = rq;
+		}
+
+		spin_unlock(&ve->base.timeline.lock);
+	}
+
 	while ((rb = rb_first_cached(&execlists->queue))) {
 		struct i915_priolist *p = to_priolist(rb);
 		struct i915_request *rq, *rn;
@@ -2735,6 +2898,325 @@ void intel_lr_context_resume(struct drm_i915_private *i915)
 	}
 }
 
+static void virtual_engine_free(struct kref *kref)
+{
+	struct virtual_engine *ve = container_of(kref, typeof(*ve), kref);
+	unsigned int n;
+
+	GEM_BUG_ON(ve->request);
+	GEM_BUG_ON(ve->context.active);
+
+	for (n = 0; n < ve->count; n++) {
+		struct intel_engine_cs *sibling = ve->siblings[n];
+		struct rb_node *node = &ve->nodes[sibling->id].rb;
+
+		if (RB_EMPTY_NODE(node))
+			continue;
+
+		spin_lock_irq(&sibling->timeline.lock);
+
+		if (!RB_EMPTY_NODE(node))
+			rb_erase_cached(node, &sibling->execlists.virtual);
+
+		spin_unlock_irq(&sibling->timeline.lock);
+	}
+	GEM_BUG_ON(__tasklet_is_scheduled(&ve->base.execlists.tasklet));
+
+	if (ve->context.state)
+		execlists_context_destroy(&ve->context);
+
+	intel_engine_cleanup_scratch(&ve->base);
+	i915_timeline_fini(&ve->base.timeline);
+	kfree(ve);
+}
+
+static void virtual_context_unpin(struct intel_context *ce)
+{
+	struct virtual_engine *ve = container_of(ce, typeof(*ve), context);
+
+	execlists_context_unpin(ce);
+
+	kref_put(&ve->kref, virtual_engine_free);
+}
+
+static const struct intel_context_ops virtual_context_ops = {
+	.unpin = virtual_context_unpin,
+};
+
+static struct intel_context *
+virtual_context_pin(struct intel_engine_cs *engine,
+		    struct i915_gem_context *ctx)
+{
+	struct virtual_engine *ve = to_virtual_engine(engine);
+	struct intel_context *ce = &ve->context;
+
+	lockdep_assert_held(&ctx->i915->drm.struct_mutex);
+
+	if (likely(ce->pin_count++))
+		return ce;
+	GEM_BUG_ON(!ce->pin_count); /* no overflow please! */
+
+	kref_get(&ve->kref);
+	ce->ops = &virtual_context_ops;
+
+	if (!ve->bound)
+		ve->bound = ve->siblings[0];
+
+	return __execlists_context_pin(ve->bound, ctx, ce);
+}
+
+static void virtual_submission_tasklet(unsigned long data)
+{
+	struct virtual_engine * const ve = (struct virtual_engine *)data;
+	unsigned int n;
+	int prio;
+
+	prio = READ_ONCE(ve->base.execlists.queue_priority);
+	if (prio == INT_MIN)
+		return;
+
+	local_irq_disable();
+	for (n = 0; READ_ONCE(ve->request) && n < ve->count; n++) {
+		struct intel_engine_cs *sibling = ve->siblings[n];
+		struct ve_node * const node = &ve->nodes[sibling->id];
+		struct rb_node **parent, *rb;
+		bool first;
+
+		spin_lock(&sibling->timeline.lock);
+
+		if (!RB_EMPTY_NODE(&node->rb)) {
+			first = rb_first_cached(&sibling->execlists.virtual) == &node->rb;
+			if (prio == node->prio || (prio > node->prio && first))
+				goto submit_engine;
+
+			rb_erase_cached(&node->rb, &sibling->execlists.virtual);
+		}
+
+		rb = NULL;
+		first = true;
+		parent = &sibling->execlists.virtual.rb_root.rb_node;
+		while (*parent) {
+			struct ve_node *other;
+
+			rb = *parent;
+			other = rb_entry(rb, typeof(*other), rb);
+			if (prio > other->prio) {
+				parent = &rb->rb_left;
+			} else {
+				parent = &rb->rb_right;
+				first = false;
+			}
+		}
+
+		rb_link_node(&node->rb, rb, parent);
+		rb_insert_color_cached(&node->rb,
+				       &sibling->execlists.virtual,
+				       first);
+
+submit_engine:
+		GEM_BUG_ON(RB_EMPTY_NODE(&node->rb));
+		node->prio = prio;
+		if (first && prio > sibling->execlists.queue_priority)
+			tasklet_hi_schedule(&sibling->execlists.tasklet);
+
+		spin_unlock(&sibling->timeline.lock);
+	}
+	local_irq_enable();
+}
+
+static void virtual_submit_request(struct i915_request *request)
+{
+	struct virtual_engine *ve = to_virtual_engine(request->engine);
+
+	GEM_BUG_ON(ve->base.submit_request != virtual_submit_request);
+
+	GEM_BUG_ON(ve->request);
+	ve->base.execlists.queue_priority = rq_prio(request);
+	WRITE_ONCE(ve->request, request);
+
+	tasklet_schedule(&ve->base.execlists.tasklet);
+}
+
+struct intel_engine_cs *
+intel_execlists_create_virtual(struct i915_gem_context *ctx,
+			       struct intel_engine_cs **siblings,
+			       unsigned int count)
+{
+	struct virtual_engine *ve;
+	unsigned int n;
+	int err;
+
+	if (!count)
+		return ERR_PTR(-EINVAL);
+
+	ve = kzalloc(sizeof(*ve) + count * sizeof(*ve->siblings), GFP_KERNEL);
+	if (!ve)
+		return ERR_PTR(-ENOMEM);
+
+	kref_init(&ve->kref);
+	ve->base.i915 = ctx->i915;
+	ve->base.id = -1;
+	ve->base.class = OTHER_CLASS;
+	ve->base.uabi_class = I915_ENGINE_CLASS_INVALID;
+	ve->base.flags = I915_ENGINE_IS_VIRTUAL;
+
+	snprintf(ve->base.name, sizeof(ve->base.name), "virtual");
+	i915_timeline_init(ctx->i915, &ve->base.timeline, ve->base.name);
+	lockdep_set_subclass(&ve->base.timeline.lock, TIMELINE_VIRTUAL);
+
+	err = intel_engine_create_scratch(&ve->base, 4096);
+	if (err)
+		goto err_put;
+
+	ve->context.gem_context = ctx;
+	ve->context.owner = &ve->base;
+
+	ve->base.context_pin = virtual_context_pin;
+	ve->base.request_alloc = execlists_request_alloc;
+
+	ve->base.schedule = i915_schedule;
+	ve->base.submit_request = virtual_submit_request;
+
+	ve->base.execlists.queue_priority = INT_MIN;
+	tasklet_init(&ve->base.execlists.tasklet,
+		     virtual_submission_tasklet,
+		     (unsigned long)ve);
+
+	ve->count = count;
+	for (n = 0; n < count; n++) {
+		struct intel_engine_cs *sibling = siblings[n];
+
+		ve->siblings[n] = sibling;
+
+		if (sibling->execlists.tasklet.func != execlists_submission_tasklet) {
+			err = -ENODEV;
+			ve->count = n;
+			goto err_put;
+		}
+
+		if (RB_EMPTY_NODE(&ve->nodes[sibling->id].rb)) {
+			err = -EINVAL;
+			ve->count = n;
+			goto err_put;
+		}
+
+		RB_CLEAR_NODE(&ve->nodes[sibling->id].rb);
+
+		if (ve->base.class != OTHER_CLASS) {
+			if (ve->base.class != sibling->class) {
+				err = -EINVAL;
+				ve->count = n;
+				goto err_put;
+			}
+			continue;
+		}
+
+		ve->base.class = sibling->class;
+		snprintf(ve->base.name, sizeof(ve->base.name),
+			 "v%dx%d", ve->base.class, count);
+		ve->base.context_size = sibling->context_size;
+
+		ve->base.emit_bb_start = sibling->emit_bb_start;
+		ve->base.emit_flush = sibling->emit_flush;
+		ve->base.emit_breadcrumb = sibling->emit_breadcrumb;
+		ve->base.emit_breadcrumb_sz = sibling->emit_breadcrumb_sz;
+	}
+
+	return &ve->base;
+
+err_put:
+	virtual_engine_free(&ve->kref);
+	return ERR_PTR(err);
+}
+
+void intel_virtual_engine_put(struct intel_engine_cs *engine)
+{
+	if (!engine)
+		return;
+
+	kref_put(&to_virtual_engine(engine)->kref, virtual_engine_free);
+}
+
+void intel_execlists_show_requests(struct intel_engine_cs *engine,
+				   struct drm_printer *m,
+				   void (*show_request)(struct drm_printer *m,
+							struct i915_request *rq,
+							const char *prefix),
+				   int max)
+{
+	const struct intel_engine_execlists *execlists = &engine->execlists;
+	struct i915_request *rq, *last;
+	struct rb_node *rb;
+	int count;
+
+	spin_lock(&engine->timeline.lock);
+
+	last = NULL;
+	count = 0;
+	list_for_each_entry(rq, &engine->timeline.requests, link) {
+		if (count++ < max - 1)
+			show_request(m, rq, "\t\tE ");
+		else
+			last = rq;
+	}
+	if (last) {
+		if (count > max) {
+			drm_printf(m,
+				   "\t\t...skipping %d executing requests...\n",
+				   count - max);
+		}
+		show_request(m, last, "\t\tE ");
+	}
+
+	last = NULL;
+	count = 0;
+	drm_printf(m, "\t\tQueue priority: %d\n", execlists->queue_priority);
+	for (rb = rb_first_cached(&execlists->queue); rb; rb = rb_next(rb)) {
+		struct i915_priolist *p = rb_entry(rb, typeof(*p), node);
+		int i;
+
+		priolist_for_each_request(rq, p, i) {
+			if (count++ < max - 1)
+				show_request(m, rq, "\t\tQ ");
+			else
+				last = rq;
+		}
+	}
+	if (last) {
+		if (count > max) {
+			drm_printf(m,
+				   "\t\t...skipping %d queued requests...\n",
+				   count - max);
+		}
+		show_request(m, last, "\t\tQ ");
+	}
+
+	last = NULL;
+	count = 0;
+	for (rb = rb_first_cached(&execlists->virtual); rb; rb = rb_next(rb)) {
+		struct virtual_engine *ve =
+			rb_entry(rb, typeof(*ve), nodes[engine->id].rb);
+		struct i915_request *rq = READ_ONCE(ve->request);
+
+		if (rq) {
+			if (count++ < max - 1)
+				show_request(m, rq, "\t\tV ");
+			else
+				last = rq;
+		}
+	}
+	if (last) {
+		if (count > max) {
+			drm_printf(m,
+				   "\t\t...skipping %d virtual requests...\n",
+				   count - max);
+		}
+		show_request(m, last, "\t\tV ");
+	}
+
+	spin_unlock(&engine->timeline.lock);
+}
+
 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
 #include "selftests/intel_lrc.c"
 #endif
diff --git a/drivers/gpu/drm/i915/intel_lrc.h b/drivers/gpu/drm/i915/intel_lrc.h
index f5a5502ecf70..1d96d3bbca3c 100644
--- a/drivers/gpu/drm/i915/intel_lrc.h
+++ b/drivers/gpu/drm/i915/intel_lrc.h
@@ -97,11 +97,25 @@ int logical_xcs_ring_init(struct intel_engine_cs *engine);
  */
 #define LRC_HEADER_PAGES LRC_PPHWSP_PN
 
+struct drm_printer;
+
 struct drm_i915_private;
 struct i915_gem_context;
 
 void intel_lr_context_resume(struct drm_i915_private *dev_priv);
-
 void intel_execlists_set_default_submission(struct intel_engine_cs *engine);
 
+struct intel_engine_cs *
+intel_execlists_create_virtual(struct i915_gem_context *ctx,
+			       struct intel_engine_cs **siblings,
+			       unsigned int count);
+void intel_virtual_engine_put(struct intel_engine_cs *engine);
+
+void intel_execlists_show_requests(struct intel_engine_cs *engine,
+				   struct drm_printer *m,
+				   void (*show_request)(struct drm_printer *m,
+							struct i915_request *rq,
+							const char *prefix),
+				   int max);
+
 #endif /* _INTEL_LRC_H_ */
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index f6ec48a75a69..ec772d836f49 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -311,6 +311,7 @@ struct intel_engine_execlists {
 	 * @queue: queue of requests, in priority lists
 	 */
 	struct rb_root_cached queue;
+	struct rb_root_cached virtual;
 
 	/**
 	 * @csb_read: control register for Context Switch buffer
@@ -598,6 +599,7 @@ struct intel_engine_cs {
 #define I915_ENGINE_NEEDS_CMD_PARSER BIT(0)
 #define I915_ENGINE_SUPPORTS_STATS   BIT(1)
 #define I915_ENGINE_HAS_PREEMPTION   BIT(2)
+#define I915_ENGINE_IS_VIRTUAL       BIT(3)
 	unsigned int flags;
 
 	/*
@@ -680,6 +682,12 @@ static inline bool __execlists_need_preempt(int prio, int last)
 	return prio > max(0, last);
 }
 
+static inline bool
+intel_engine_is_virtual(const struct intel_engine_cs *engine)
+{
+	return engine->flags & I915_ENGINE_IS_VIRTUAL;
+}
+
 static inline void
 execlists_set_active(struct intel_engine_execlists *execlists,
 		     unsigned int bit)
diff --git a/drivers/gpu/drm/i915/selftests/intel_lrc.c b/drivers/gpu/drm/i915/selftests/intel_lrc.c
index b6e11b58b3c9..0fb7a6f9aedb 100644
--- a/drivers/gpu/drm/i915/selftests/intel_lrc.c
+++ b/drivers/gpu/drm/i915/selftests/intel_lrc.c
@@ -4,6 +4,8 @@
  * Copyright © 2018 Intel Corporation
  */
 
+#include <linux/prime_numbers.h>
+
 #include "../i915_reset.h"
 
 #include "../i915_selftest.h"
@@ -581,6 +583,219 @@ static int live_preempt_hang(void *arg)
 	return err;
 }
 
+struct live_test {
+	struct drm_i915_private *i915;
+	const char *func;
+	const char *name;
+
+	unsigned int reset_count;
+	bool wedge;
+};
+
+static int begin_live_test(struct live_test *t,
+			   struct drm_i915_private *i915,
+			   const char *func,
+			   const char *name)
+{
+	t->i915 = i915;
+	t->func = func;
+	t->name = name;
+
+	if (igt_flush_test(i915, I915_WAIT_LOCKED))
+		return -EIO;
+
+	i915->gpu_error.missed_irq_rings = 0;
+	t->reset_count = i915_reset_count(&i915->gpu_error);
+
+	return 0;
+}
+
+static int end_live_test(struct live_test *t)
+{
+	struct drm_i915_private *i915 = t->i915;
+
+	if (igt_flush_test(i915, I915_WAIT_LOCKED))
+		return -EIO;
+
+	if (t->reset_count != i915_reset_count(&i915->gpu_error)) {
+		pr_err("%s(%s): GPU was reset %d times!\n",
+		       t->func, t->name,
+		       i915_reset_count(&i915->gpu_error) - t->reset_count);
+		return -EIO;
+	}
+
+	if (i915->gpu_error.missed_irq_rings) {
+		pr_err("%s(%s): Missed interrupts on engines %lx\n",
+		       t->func, t->name, i915->gpu_error.missed_irq_rings);
+		return -EIO;
+	}
+
+	return 0;
+}
+
+static int nop_virtual_engine(struct drm_i915_private *i915,
+			      struct intel_engine_cs **siblings,
+			      unsigned int nsibling,
+			      unsigned int nctx,
+			      unsigned int flags)
+#define CHAIN BIT(0)
+{
+	IGT_TIMEOUT(end_time);
+	struct i915_request *request[16];
+	struct i915_gem_context *ctx[16];
+	struct intel_engine_cs *ve[16];
+	unsigned long n, prime, nc;
+	ktime_t times[2] = {};
+	struct live_test t;
+	int err;
+
+	GEM_BUG_ON(!nctx || nctx > ARRAY_SIZE(ctx));
+
+	for (n = 0; n < nctx; n++) {
+		ctx[n] = kernel_context(i915);
+		if (!ctx[n])
+			return -ENOMEM;
+
+		ve[n] = intel_execlists_create_virtual(ctx[n],
+						       siblings, nsibling);
+		if (IS_ERR(ve[n]))
+			return PTR_ERR(ve[n]);
+	}
+
+	err = begin_live_test(&t, i915, __func__, ve[0]->name);
+	if (err)
+		goto out;
+
+	for_each_prime_number_from(prime, 1, 8192) {
+		times[1] = ktime_get_raw();
+
+		if (flags & CHAIN) {
+			for (nc = 0; nc < nctx; nc++) {
+				for (n = 0; n < prime; n++) {
+					request[nc] =
+						i915_request_alloc(ve[nc], ctx[nc]);
+					if (IS_ERR(request[nc])) {
+						err = PTR_ERR(request[nc]);
+						goto out;
+					}
+
+					i915_request_add(request[nc]);
+				}
+			}
+		} else {
+			for (n = 0; n < prime; n++) {
+				for (nc = 0; nc < nctx; nc++) {
+					request[nc] =
+						i915_request_alloc(ve[nc], ctx[nc]);
+					if (IS_ERR(request[nc])) {
+						err = PTR_ERR(request[nc]);
+						goto out;
+					}
+
+					i915_request_add(request[nc]);
+				}
+			}
+		}
+
+		for (nc = 0; nc < nctx; nc++) {
+			if (i915_request_wait(request[nc],
+					      I915_WAIT_LOCKED,
+					      HZ / 10) < 0) {
+				pr_err("%s(%s): wait for %llx:%d timed out\n",
+				       __func__, ve[0]->name,
+				       request[nc]->fence.context,
+				       request[nc]->fence.seqno);
+
+				GEM_TRACE("%s(%s) failed at request %llx:%d\n",
+					  __func__, ve[0]->name,
+					  request[nc]->fence.context,
+					  request[nc]->fence.seqno);
+				GEM_TRACE_DUMP();
+				i915_gem_set_wedged(i915);
+				break;
+			}
+		}
+
+		times[1] = ktime_sub(ktime_get_raw(), times[1]);
+		if (prime == 1)
+			times[0] = times[1];
+
+		if (__igt_timeout(end_time, NULL))
+			break;
+	}
+
+	err = end_live_test(&t);
+	if (err)
+		goto out;
+
+	pr_info("Requestx%d latencies on %s: 1 = %lluns, %lu = %lluns\n",
+		nctx, ve[0]->name, ktime_to_ns(times[0]),
+		prime, div64_u64(ktime_to_ns(times[1]), prime));
+
+out:
+	if (igt_flush_test(i915, I915_WAIT_LOCKED))
+		err = -EIO;
+
+	for (nc = 0; nc < nctx; nc++) {
+		intel_virtual_engine_put(ve[nc]);
+		kernel_context_close(ctx[nc]);
+	}
+	return err;
+}
+
+static int live_virtual_engine(void *arg)
+{
+	struct drm_i915_private *i915 = arg;
+	struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1];
+	struct intel_engine_cs *engine;
+	enum intel_engine_id id;
+	unsigned int class, inst;
+	int err = -ENODEV;
+
+	if (USES_GUC_SUBMISSION(i915))
+		return 0;
+
+	mutex_lock(&i915->drm.struct_mutex);
+
+	for_each_engine(engine, i915, id) {
+		err = nop_virtual_engine(i915, &engine, 1, 1, 0);
+		if (err) {
+			pr_err("Failed to wrap engine %s: err=%d\n",
+			       engine->name, err);
+			goto out_unlock;
+		}
+	}
+
+	for (class = 0; class <= MAX_ENGINE_CLASS; class++) {
+		int nsibling, n;
+
+		nsibling = 0;
+		for (inst = 0; inst <= MAX_ENGINE_INSTANCE; inst++) {
+			if (!i915->engine_class[class][inst])
+				break;
+
+			siblings[nsibling++] = i915->engine_class[class][inst];
+		}
+		if (nsibling < 2)
+			continue;
+
+		for (n = 1; n <= nsibling + 1; n++) {
+			err = nop_virtual_engine(i915, siblings, nsibling,
+						 n, 0);
+			if (err)
+				goto out_unlock;
+		}
+
+		err = nop_virtual_engine(i915, siblings, nsibling, n, CHAIN);
+		if (err)
+			goto out_unlock;
+	}
+
+out_unlock:
+	mutex_unlock(&i915->drm.struct_mutex);
+	return err;
+}
+
 int intel_execlists_live_selftests(struct drm_i915_private *i915)
 {
 	static const struct i915_subtest tests[] = {
@@ -588,6 +803,7 @@ int intel_execlists_live_selftests(struct drm_i915_private *i915)
 		SUBTEST(live_preempt),
 		SUBTEST(live_late_preempt),
 		SUBTEST(live_preempt_hang),
+		SUBTEST(live_virtual_engine),
 	};
 
 	if (!HAS_EXECLISTS(i915))
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index d41b4c673af4..6dd2a89410e8 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -1523,14 +1523,41 @@ struct drm_i915_gem_context_param {
  * default settings.
  *
  * See struct i915_context_param_engines.
+ *
+ * Extensions:
+ *   i915_context_engines_load_balance (I915_CONTEXT_ENGINES_EXT_LOAD_BALANCE)
  */
 #define I915_CONTEXT_PARAM_ENGINES	0x7
 
 	__u64 value;
 };
 
+/*
+ * i915_context_engines_load_balance:
+ *
+ * Enable load balancing across this set of engines.
+ *
+ * Into the I915_EXEC_DEFAULT slot, a virtual engine is created that when
+ * used will proxy the execbuffer request onto one of the set of engines
+ * in such a way as to distribute the load evenly across the set.
+ *
+ * The set of engines must be compatible (e.g. the same HW class) as they
+ * will share the same logical GPU context and ring.
+ *
+ * The context must be defined to use a single timeline for all engines.
+ */
+struct i915_context_engines_load_balance {
+	struct i915_user_extension base;
+
+	__u64 flags; /* all undefined flags must be zero */
+	__u64 engines_mask;
+
+	__u64 mbz[4]; /* reserved for future use; must be zero */
+};
+
 struct i915_context_param_engines {
 	__u64 extensions;
+#define I915_CONTEXT_ENGINES_EXT_LOAD_BALANCE 0
 
 	struct {
 		__u32 class; /* see enum drm_i915_gem_engine_class */
-- 
2.19.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 106+ messages in thread

* ✗ Fi.CI.CHECKPATCH: warning for series starting with [01/40] drm: Use default dma_fence hooks where possible for null syncobj
  2018-09-19 19:55 [PATCH 01/40] drm: Use default dma_fence hooks where possible for null syncobj Chris Wilson
                   ` (38 preceding siblings ...)
  2018-09-19 19:55 ` [PATCH 40/40] drm/i915: Load balancing across a virtual engine Chris Wilson
@ 2018-09-19 21:54 ` Patchwork
  2018-09-19 22:08 ` ✗ Fi.CI.SPARSE: " Patchwork
                   ` (2 subsequent siblings)
  42 siblings, 0 replies; 106+ messages in thread
From: Patchwork @ 2018-09-19 21:54 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

== Series Details ==

Series: series starting with [01/40] drm: Use default dma_fence hooks where possible for null syncobj
URL   : https://patchwork.freedesktop.org/series/49923/
State : warning

== Summary ==

$ dim checkpatch origin/drm-tip
db53c1c9bc91 drm: Use default dma_fence hooks where possible for null syncobj
77e2baa66cf6 drm: Fix syncobj handing of schedule() returning 0
0466e1b99db2 drm/i915/selftests: Live tests emit requests and so require rpm
849b9a5e54f4 drm/i915: Park the GPU on module load
22997135d8ef drm/i915: Handle incomplete Z_FINISH for compressed error states
4eb03b08ed0a drm/i915: Clear the error PTE just once on finish
0125ac1db1a3 drm/i915: Cache the error string
dda3a9df7889 drm/i915/execlists: Avoid kicking priority on the current context
8bec72d91b02 drm/i915/selftests: Free the batch along the contexts error path
0af582c21e60 drm/i915/selftests: Basic stress test for rapid context switching
f883600aab97 drm/i915/execlists: Onion unwind for logical_ring_init() failure
54a30e5f6ba6 drm/i915/execlists: Assert the queue is non-empty on unsubmitting
-:9: WARNING:COMMIT_LOG_LONG_LINE: Possible unwrapped commit description (prefer a maximum 75 chars per line)
#9: 
<0>[  531.960431] drv_self-4806    7.... 527402570us : intel_gpu_reset: engine_mask=1, ret=0, retry=0

total: 0 errors, 1 warnings, 0 checks, 7 lines checked
d949a956393f drm/i915: Reserve some priority bits for internal use
d4dade02ac2f drm/i915: Combine multiple internal plists into the same i915_priolist bucket
-:163: WARNING:FUNCTION_ARGUMENTS: function definition argument 'pl' should also have an identifier name
#163: FILE: drivers/gpu/drm/i915/intel_lrc.c:367:
+	struct list_head *uninitialized_var(pl);

-:281: WARNING:FUNCTION_ARGUMENTS: function definition argument 'pl' should also have an identifier name
#281: FILE: drivers/gpu/drm/i915/intel_lrc.c:1180:
+	struct list_head *uninitialized_var(pl);

-:310: CHECK:MACRO_ARG_REUSE: Macro argument reuse 'plist' - possible side-effects?
#310: FILE: drivers/gpu/drm/i915/intel_ringbuffer.h:199:
+#define priolist_for_each_request(it, plist, idx) \
+	for (idx = 0; idx < ARRAY_SIZE((plist)->requests); idx++) \
+		list_for_each_entry(it, &(plist)->requests[idx], sched.link)

-:310: CHECK:MACRO_ARG_REUSE: Macro argument reuse 'idx' - possible side-effects?
#310: FILE: drivers/gpu/drm/i915/intel_ringbuffer.h:199:
+#define priolist_for_each_request(it, plist, idx) \
+	for (idx = 0; idx < ARRAY_SIZE((plist)->requests); idx++) \
+		list_for_each_entry(it, &(plist)->requests[idx], sched.link)

-:314: CHECK:MACRO_ARG_REUSE: Macro argument reuse 'plist' - possible side-effects?
#314: FILE: drivers/gpu/drm/i915/intel_ringbuffer.h:203:
+#define priolist_for_each_request_consume(it, n, plist, idx) \
+	for (; (idx = ffs((plist)->used)); (plist)->used &= ~BIT(idx - 1)) \
+		list_for_each_entry_safe(it, n, \
+					 &(plist)->requests[idx - 1], \
+					 sched.link)

-:314: CHECK:MACRO_ARG_REUSE: Macro argument reuse 'idx' - possible side-effects?
#314: FILE: drivers/gpu/drm/i915/intel_ringbuffer.h:203:
+#define priolist_for_each_request_consume(it, n, plist, idx) \
+	for (; (idx = ffs((plist)->used)); (plist)->used &= ~BIT(idx - 1)) \
+		list_for_each_entry_safe(it, n, \
+					 &(plist)->requests[idx - 1], \
+					 sched.link)

total: 0 errors, 2 warnings, 4 checks, 272 lines checked
ab69b94ad492 drm/i915: Priority boost for new clients
9eb955168501 drm/i915: Pull scheduling under standalone lock
-:145: WARNING:FILE_PATH_CHANGES: added, moved or deleted file(s), does MAINTAINERS need updating?
#145: 
new file mode 100644

-:150: WARNING:SPDX_LICENSE_TAG: Missing or malformed SPDX-License-Identifier tag in line 1
#150: FILE: drivers/gpu/drm/i915/i915_scheduler.c:1:
+/*

-:393: WARNING:FUNCTION_ARGUMENTS: function definition argument 'pl' should also have an identifier name
#393: FILE: drivers/gpu/drm/i915/i915_scheduler.c:244:
+	struct list_head *uninitialized_var(pl);

total: 0 errors, 3 warnings, 0 checks, 870 lines checked
1fc76a0359e0 drm/i915: Priority boost for waiting clients
8be0aefc32e3 drm/i915: Report the number of closed vma held by each context in debugfs
-:43: WARNING:LONG_LINE: line over 100 characters
#43: FILE: drivers/gpu/drm/i915/i915_debugfs.c:350:
+		seq_printf(m, "%s: %lu objects, %llu bytes (%llu active, %llu inactive, %llu global, %llu shared, %llu unbound, %llu closed)\n", \

total: 0 errors, 1 warnings, 0 checks, 169 lines checked
05be2e2d6f16 drm/i915: Remove debugfs/i915_ppgtt_info
5985e77c4002 drm/i915: Track all held rpm wakerefs
-:105: CHECK:UNCOMMENTED_DEFINITION: spinlock_t definition without comment
#105: FILE: drivers/gpu/drm/i915/i915_drv.h:1294:
+	spinlock_t debug_lock;

total: 0 errors, 0 warnings, 1 checks, 571 lines checked
87bdf5a67717 drm/i915: Markup paired operations on wakerefs
-:707: WARNING:NEW_TYPEDEFS: do not add new typedefs
#707: FILE: drivers/gpu/drm/i915/i915_drv.h:131:
+typedef depot_stack_handle_t intel_wakeref_t;

total: 0 errors, 1 warnings, 0 checks, 2291 lines checked
78bfd7e76b95 drm/i915: Syntatic sugar for using intel_runtime_pm
-:491: CHECK:MACRO_ARG_REUSE: Macro argument reuse 'i915' - possible side-effects?
#491: FILE: drivers/gpu/drm/i915/intel_drv.h:2066:
+#define with_intel_runtime_pm(i915, wf) \
+	for (wf = intel_runtime_pm_get(i915); wf; \
+	     intel_runtime_pm_put(i915, wf), wf = 0)

-:491: CHECK:MACRO_ARG_REUSE: Macro argument reuse 'wf' - possible side-effects?
#491: FILE: drivers/gpu/drm/i915/intel_drv.h:2066:
+#define with_intel_runtime_pm(i915, wf) \
+	for (wf = intel_runtime_pm_get(i915); wf; \
+	     intel_runtime_pm_put(i915, wf), wf = 0)

-:495: CHECK:MACRO_ARG_REUSE: Macro argument reuse 'i915' - possible side-effects?
#495: FILE: drivers/gpu/drm/i915/intel_drv.h:2070:
+#define with_intel_runtime_pm_if_in_use(i915, wf) \
+	for (wf = intel_runtime_pm_get_if_in_use(i915); wf; \
+	     intel_runtime_pm_put(i915, wf), wf = 0)

-:495: CHECK:MACRO_ARG_REUSE: Macro argument reuse 'wf' - possible side-effects?
#495: FILE: drivers/gpu/drm/i915/intel_drv.h:2070:
+#define with_intel_runtime_pm_if_in_use(i915, wf) \
+	for (wf = intel_runtime_pm_get_if_in_use(i915); wf; \
+	     intel_runtime_pm_put(i915, wf), wf = 0)

total: 0 errors, 0 warnings, 4 checks, 694 lines checked
01c1f8ef9e05 drm/i915: Markup paired operations on display power domains
5df99971b669 drm/i915: Track the wakeref used to initialise display power domains
-:207: WARNING:LINE_SPACING: Missing a blank line after declarations
#207: FILE: drivers/gpu/drm/i915/intel_runtime_pm.c:4116:
+	struct i915_power_domains *power_domains = &i915->power_domains;
+	intel_wakeref_t wakeref __maybe_unused =

total: 0 errors, 1 warnings, 0 checks, 320 lines checked
0d6a6f256a4c drm/i915/dp: Markup pps lock power well
-:52: CHECK:MACRO_ARG_REUSE: Macro argument reuse 'dp' - possible side-effects?
#52: FILE: drivers/gpu/drm/i915/intel_dp.c:682:
+#define with_pps_lock(dp, wf) \
+	for (wf = pps_lock(dp); wf; wf = pps_unlock(dp, wf))

-:52: CHECK:MACRO_ARG_REUSE: Macro argument reuse 'wf' - possible side-effects?
#52: FILE: drivers/gpu/drm/i915/intel_dp.c:682:
+#define with_pps_lock(dp, wf) \
+	for (wf = pps_lock(dp); wf; wf = pps_unlock(dp, wf))

total: 0 errors, 0 warnings, 2 checks, 425 lines checked
a373ff7a3fec drm/i915: Complain if hsw_get_pipe_config acquires the same power well twice
0f9a343f4e3e drm/i915: Mark up Ironlake ips with rpm wakerefs
-:210: CHECK:MULTIPLE_ASSIGNMENTS: multiple assignments should be avoided
#210: FILE: drivers/gpu/drm/i915/intel_pm.c:7983:
+	chipset_val = graphics_val = 0;

total: 0 errors, 0 warnings, 1 checks, 318 lines checked
df26a76763aa drm/i915: Serialise concurrent calls to i915_gem_set_wedged()
-:48: WARNING:MEMORY_BARRIER: memory barrier without comment
#48: FILE: drivers/gpu/drm/i915/i915_gem.c:3404:
+	smp_mb__before_atomic();

total: 0 errors, 1 warnings, 0 checks, 107 lines checked
d02f5463e184 drm/i915: Differentiate between ggtt->mutex and ppgtt->mutex
d3cbebb877f1 drm/i915: Pull all the reset functionality together into i915_reset.c
-:1125: WARNING:FILE_PATH_CHANGES: added, moved or deleted file(s), does MAINTAINERS need updating?
#1125: 
new file mode 100644

-:1130: WARNING:SPDX_LICENSE_TAG: Missing or malformed SPDX-License-Identifier tag in line 1
#1130: FILE: drivers/gpu/drm/i915/i915_reset.c:1:
+/*

-:1274: WARNING:TYPO_SPELLING: 'acknowledgement' may be misspelled - perhaps 'acknowledgment'?
#1274: FILE: drivers/gpu/drm/i915/i915_reset.c:145:
+	/* Assert reset for at least 20 usec, and wait for acknowledgement. */

-:1934: WARNING:MEMORY_BARRIER: memory barrier without comment
#1934: FILE: drivers/gpu/drm/i915/i915_reset.c:805:
+	smp_mb__before_atomic();

-:2259: CHECK:MACRO_ARG_REUSE: Macro argument reuse 'W' - possible side-effects?
#2259: FILE: drivers/gpu/drm/i915/i915_reset.c:1130:
+#define i915_wedge_on_timeout(W, DEV, TIMEOUT)				\
+	for (__init_wedge((W), (DEV), (TIMEOUT), __func__);		\
+	     (W)->i915;							\
+	     __fini_wedge((W)))

-:2270: WARNING:STATIC_CONST_CHAR_ARRAY: char * array declaration might be better as static const
#2270: FILE: drivers/gpu/drm/i915/i915_reset.c:1141:
+	char *error_event[] = { I915_ERROR_UEVENT "=1", NULL };

-:2271: WARNING:STATIC_CONST_CHAR_ARRAY: char * array declaration might be better as static const
#2271: FILE: drivers/gpu/drm/i915/i915_reset.c:1142:
+	char *reset_event[] = { I915_RESET_UEVENT "=1", NULL };

-:2272: WARNING:STATIC_CONST_CHAR_ARRAY: char * array declaration might be better as static const
#2272: FILE: drivers/gpu/drm/i915/i915_reset.c:1143:
+	char *reset_done_event[] = { I915_ERROR_UEVENT "=0", NULL };

-:2468: WARNING:SPDX_LICENSE_TAG: Missing or malformed SPDX-License-Identifier tag in line 1
#2468: FILE: drivers/gpu/drm/i915/i915_reset.h:1:
+/*

total: 0 errors, 8 warnings, 1 checks, 2963 lines checked
130d06e20fbd drm/i915: Make all GPU resets atomic
-:23: CHECK:USLEEP_RANGE: usleep_range is preferred over udelay; see Documentation/timers/timers-howto.txt
#23: FILE: drivers/gpu/drm/i915/i915_reset.c:147:
+	udelay(50);

-:29: CHECK:USLEEP_RANGE: usleep_range is preferred over udelay; see Documentation/timers/timers-howto.txt
#29: FILE: drivers/gpu/drm/i915/i915_reset.c:152:
+	udelay(50);

total: 0 errors, 0 warnings, 2 checks, 111 lines checked
a93118d89924 drm/i915: Introduce the i915_user_extension_method
-:57: WARNING:FILE_PATH_CHANGES: added, moved or deleted file(s), does MAINTAINERS need updating?
#57: 
new file mode 100644

-:62: WARNING:SPDX_LICENSE_TAG: Missing or malformed SPDX-License-Identifier tag in line 1
#62: FILE: drivers/gpu/drm/i915/i915_user_extensions.c:1:
+/*

-:105: WARNING:SPDX_LICENSE_TAG: Missing or malformed SPDX-License-Identifier tag in line 1
#105: FILE: drivers/gpu/drm/i915/i915_user_extensions.h:1:
+/*

-:139: WARNING:TYPO_SPELLING: 'compatibilty' may be misspelled - perhaps 'compatibility'?
#139: FILE: include/uapi/drm/i915_drm.h:71:
+ * and backward compatibilty, is to use a list of optional structs to

total: 0 errors, 4 warnings, 0 checks, 90 lines checked
4adc1d3b2073 drm/i915: Extend CREATE_CONTEXT to allow inheritance ala clone()
-:364: WARNING:LINE_SPACING: Missing a blank line after declarations
#364: FILE: drivers/gpu/drm/i915/selftests/i915_gem_context.c:544:
+		struct drm_file *file;
+		IGT_TIMEOUT(end_time);

-:448: CHECK:MULTIPLE_ASSIGNMENTS: multiple assignments should be avoided
#448: FILE: drivers/gpu/drm/i915/selftests/i915_gem_context.c:613:
+		ncontexts = dw = 0;

-:567: CHECK:MULTIPLE_ASSIGNMENTS: multiple assignments should be avoided
#567: FILE: drivers/gpu/drm/i915/selftests/i915_gem_context.c:732:
+		ncontexts = dw = 0;

-:676: WARNING:LONG_LINE: line over 100 characters
#676: FILE: include/uapi/drm/i915_drm.h:389:
+#define DRM_IOCTL_I915_GEM_CONTEXT_CREATE	DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_GEM_CONTEXT_CREATE, struct drm_i915_gem_context_create_v2)

-:676: WARNING:SPACING: space prohibited between function name and open parenthesis '('
#676: FILE: include/uapi/drm/i915_drm.h:389:
+#define DRM_IOCTL_I915_GEM_CONTEXT_CREATE	DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_GEM_CONTEXT_CREATE, struct drm_i915_gem_context_create_v2)

-:676: ERROR:COMPLEX_MACRO: Macros with complex values should be enclosed in parentheses
#676: FILE: include/uapi/drm/i915_drm.h:389:
+#define DRM_IOCTL_I915_GEM_CONTEXT_CREATE	DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_GEM_CONTEXT_CREATE, struct drm_i915_gem_context_create_v2)

total: 1 errors, 3 warnings, 2 checks, 591 lines checked
3649c8fb7536 drm/i915: Allow contexts to share a single timeline across all engines
61e7a8b16c14 drm/i915: Fix I915_EXEC_RING_MASK
6f508694daf5 drm/i915: Re-arrange execbuf so context is known before engine
20ccfc746d06 drm/i915: Allow a context to define its set of engines
6492191779dd drm/i915/execlists: Flush the CS events before unpinning
-:14: WARNING:COMMIT_LOG_LONG_LINE: Possible unwrapped commit description (prefer a maximum 75 chars per line)
#14: 
References: 60367132a214 ("drm/i915: Avoid use-after-free of ctx in request tracepoints")

-:14: ERROR:GIT_COMMIT_ID: Please use git commit description style 'commit <12+ chars of sha1> ("<title line>")' - ie: 'commit 60367132a214 ("drm/i915: Avoid use-after-free of ctx in request tracepoints")'
#14: 
References: 60367132a214 ("drm/i915: Avoid use-after-free of ctx in request tracepoints")

total: 1 errors, 1 warnings, 0 checks, 70 lines checked
e43ea3fafed3 drm/i915/execlists: Refactor out can_merge_rq()
6e322a6b0558 drm/i915: Load balancing across a virtual engine

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 106+ messages in thread

* ✗ Fi.CI.SPARSE: warning for series starting with [01/40] drm: Use default dma_fence hooks where possible for null syncobj
  2018-09-19 19:55 [PATCH 01/40] drm: Use default dma_fence hooks where possible for null syncobj Chris Wilson
                   ` (39 preceding siblings ...)
  2018-09-19 21:54 ` ✗ Fi.CI.CHECKPATCH: warning for series starting with [01/40] drm: Use default dma_fence hooks where possible for null syncobj Patchwork
@ 2018-09-19 22:08 ` Patchwork
  2018-09-19 22:17 ` ✗ Fi.CI.BAT: failure " Patchwork
  2018-09-20 13:34 ` [PATCH 01/40] " Tvrtko Ursulin
  42 siblings, 0 replies; 106+ messages in thread
From: Patchwork @ 2018-09-19 22:08 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

== Series Details ==

Series: series starting with [01/40] drm: Use default dma_fence hooks where possible for null syncobj
URL   : https://patchwork.freedesktop.org/series/49923/
State : warning

== Summary ==

$ dim sparse origin/drm-tip
Commit: drm: Use default dma_fence hooks where possible for null syncobj
Okay!

Commit: drm: Fix syncobj handing of schedule() returning 0
Okay!

Commit: drm/i915/selftests: Live tests emit requests and so require rpm
Okay!

Commit: drm/i915: Park the GPU on module load
Okay!

Commit: drm/i915: Handle incomplete Z_FINISH for compressed error states
Okay!

Commit: drm/i915: Clear the error PTE just once on finish
Okay!

Commit: drm/i915: Cache the error string
+drivers/gpu/drm/i915/i915_gpu_error.c:854:25: warning: Using plain integer as NULL pointer
+drivers/gpu/drm/i915/i915_gpu_error.c:918:23: warning: expression using sizeof(void)
+drivers/gpu/drm/i915/i915_gpu_error.c:918:23: warning: expression using sizeof(void)
+drivers/gpu/drm/i915/i915_sysfs.c:531:23: warning: expression using sizeof(void)
+drivers/gpu/drm/i915/i915_sysfs.c:531:23: warning: expression using sizeof(void)

Commit: drm/i915/execlists: Avoid kicking priority on the current context
Okay!

Commit: drm/i915/selftests: Free the batch along the contexts error path
Okay!

Commit: drm/i915/selftests: Basic stress test for rapid context switching
+./include/linux/slab.h:631:13: error: undefined identifier '__builtin_mul_overflow'
+./include/linux/slab.h:631:13: warning: call with no type!

Commit: drm/i915/execlists: Onion unwind for logical_ring_init() failure
Okay!

Commit: drm/i915/execlists: Assert the queue is non-empty on unsubmitting
Okay!

Commit: drm/i915: Reserve some priority bits for internal use
Okay!

Commit: drm/i915: Combine multiple internal plists into the same i915_priolist bucket
Okay!

Commit: drm/i915: Priority boost for new clients
Okay!

Commit: drm/i915: Pull scheduling under standalone lock
+./include/uapi/linux/perf_event.h:147:56: warning: cast truncates bits from constant value (8000000000000000 becomes 0)

Commit: drm/i915: Priority boost for waiting clients
Okay!

Commit: drm/i915: Report the number of closed vma held by each context in debugfs
Okay!

Commit: drm/i915: Remove debugfs/i915_ppgtt_info
Okay!

Commit: drm/i915: Track all held rpm wakerefs
-drivers/gpu/drm/i915/selftests/../i915_drv.h:3718:16: warning: expression using sizeof(void)
+drivers/gpu/drm/i915/selftests/../i915_drv.h:3725:16: warning: expression using sizeof(void)

Commit: drm/i915: Markup paired operations on wakerefs
-drivers/gpu/drm/i915/selftests/../i915_drv.h:3725:16: warning: expression using sizeof(void)
+drivers/gpu/drm/i915/selftests/../i915_drv.h:3729:16: warning: expression using sizeof(void)

Commit: drm/i915: Syntatic sugar for using intel_runtime_pm
Okay!

Commit: drm/i915: Markup paired operations on display power domains
-drivers/gpu/drm/i915/selftests/../i915_drv.h:3729:16: warning: expression using sizeof(void)
+drivers/gpu/drm/i915/selftests/../i915_drv.h:3731:16: warning: expression using sizeof(void)

Commit: drm/i915: Track the wakeref used to initialise display power domains
-drivers/gpu/drm/i915/selftests/../i915_drv.h:3731:16: warning: expression using sizeof(void)
+drivers/gpu/drm/i915/selftests/../i915_drv.h:3733:16: warning: expression using sizeof(void)

Commit: drm/i915/dp: Markup pps lock power well
Okay!

Commit: drm/i915: Complain if hsw_get_pipe_config acquires the same power well twice
Okay!

Commit: drm/i915: Mark up Ironlake ips with rpm wakerefs
Okay!

Commit: drm/i915: Serialise concurrent calls to i915_gem_set_wedged()
Okay!

Commit: drm/i915: Differentiate between ggtt->mutex and ppgtt->mutex
Okay!

Commit: drm/i915: Pull all the reset functionality together into i915_reset.c
-drivers/gpu/drm/i915/selftests/../i915_drv.h:3733:16: warning: expression using sizeof(void)
+drivers/gpu/drm/i915/selftests/../i915_drv.h:3702:16: warning: expression using sizeof(void)
+./include/uapi/linux/perf_event.h:147:56: warning: cast truncates bits from constant value (8000000000000000 becomes 0)

Commit: drm/i915: Make all GPU resets atomic
Okay!

Commit: drm/i915: Introduce the i915_user_extension_method
Okay!

Commit: drm/i915: Extend CREATE_CONTEXT to allow inheritance ala clone()
-O:drivers/gpu/drm/i915/selftests/i915_gem_context.c:608:25: warning: expression using sizeof(void)
-O:drivers/gpu/drm/i915/selftests/i915_gem_context.c:608:25: warning: expression using sizeof(void)
+drivers/gpu/drm/i915/selftests/i915_gem_context.c:616:33: warning: expression using sizeof(void)
+drivers/gpu/drm/i915/selftests/i915_gem_context.c:616:33: warning: expression using sizeof(void)
+drivers/gpu/drm/i915/selftests/i915_gem_context.c:735:33: warning: expression using sizeof(void)
+drivers/gpu/drm/i915/selftests/i915_gem_context.c:735:33: warning: expression using sizeof(void)

Commit: drm/i915: Allow contexts to share a single timeline across all engines
Okay!

Commit: drm/i915: Fix I915_EXEC_RING_MASK
Okay!

Commit: drm/i915: Re-arrange execbuf so context is known before engine
Okay!

Commit: drm/i915: Allow a context to define its set of engines
+./include/linux/slab.h:631:13: error: not a function <noident>

Commit: drm/i915/execlists: Flush the CS events before unpinning
Okay!

Commit: drm/i915/execlists: Refactor out can_merge_rq()
Okay!

Commit: drm/i915: Load balancing across a virtual engine
+drivers/gpu/drm/i915/i915_gem_context.c:981:17: warning: cast removes address space of expression
+drivers/gpu/drm/i915/intel_lrc.c:671:24: warning: expression using sizeof(void)
+drivers/gpu/drm/i915/intel_lrc.c:671:24: warning: expression using sizeof(void)
-O:drivers/gpu/drm/i915/intel_ringbuffer.h:680:23: warning: expression using sizeof(void)
-O:drivers/gpu/drm/i915/intel_ringbuffer.h:680:23: warning: expression using sizeof(void)
+drivers/gpu/drm/i915/intel_ringbuffer.h:682:23: warning: expression using sizeof(void)
+drivers/gpu/drm/i915/intel_ringbuffer.h:682:23: warning: expression using sizeof(void)

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 106+ messages in thread

* ✗ Fi.CI.BAT: failure for series starting with [01/40] drm: Use default dma_fence hooks where possible for null syncobj
  2018-09-19 19:55 [PATCH 01/40] drm: Use default dma_fence hooks where possible for null syncobj Chris Wilson
                   ` (40 preceding siblings ...)
  2018-09-19 22:08 ` ✗ Fi.CI.SPARSE: " Patchwork
@ 2018-09-19 22:17 ` Patchwork
  2018-09-20 13:34 ` [PATCH 01/40] " Tvrtko Ursulin
  42 siblings, 0 replies; 106+ messages in thread
From: Patchwork @ 2018-09-19 22:17 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

== Series Details ==

Series: series starting with [01/40] drm: Use default dma_fence hooks where possible for null syncobj
URL   : https://patchwork.freedesktop.org/series/49923/
State : failure

== Summary ==

= CI Bug Log - changes from CI_DRM_4844 -> Patchwork_10230 =

== Summary - FAILURE ==

  Serious unknown changes coming with Patchwork_10230 absolutely need to be
  verified manually.
  
  If you think the reported changes have nothing to do with the changes
  introduced in Patchwork_10230, please notify your bug team to allow them
  to document this new failure mode, which will reduce false positives in CI.

  External URL: https://patchwork.freedesktop.org/api/1.0/series/49923/revisions/1/mbox/

== Possible new issues ==

  Here are the unknown changes that may have been introduced in Patchwork_10230:

  === IGT changes ===

    ==== Possible regressions ====

    igt@drv_selftest@live_contexts:
      fi-icl-u:           PASS -> DMESG-FAIL

    igt@drv_selftest@live_gem:
      fi-skl-guc:         PASS -> DMESG-FAIL
      fi-kbl-guc:         PASS -> DMESG-FAIL
      fi-cfl-guc:         PASS -> DMESG-FAIL

    
    ==== Warnings ====

    igt@pm_rpm@module-reload:
      fi-hsw-4770r:       SKIP -> PASS

    
== Known issues ==

  Here are the changes found in Patchwork_10230 that come from known issues:

  === IGT changes ===

    ==== Issues hit ====

    igt@kms_pipe_crc_basic@nonblocking-crc-pipe-a-frame-sequence:
      fi-byt-clapper:     PASS -> FAIL (fdo#107362, fdo#103191)

    igt@kms_pipe_crc_basic@read-crc-pipe-b:
      fi-byt-clapper:     PASS -> FAIL (fdo#107362)

    igt@kms_pipe_crc_basic@suspend-read-crc-pipe-a:
      fi-bdw-samus:       PASS -> INCOMPLETE (fdo#107773)

    
    ==== Possible fixes ====

    igt@drv_module_reload@basic-reload:
      fi-glk-j4005:       DMESG-WARN (fdo#106248, fdo#106725) -> PASS

    igt@drv_module_reload@basic-reload-inject:
      fi-hsw-4770r:       DMESG-WARN (fdo#107425, fdo#107924) -> PASS

    igt@drv_selftest@mock_hugepages:
      fi-bwr-2160:        DMESG-FAIL (fdo#107930) -> PASS

    igt@gem_exec_suspend@basic-s4-devices:
      fi-kbl-7500u:       DMESG-WARN (fdo#107139, fdo#105128) -> PASS

    igt@kms_flip@basic-flip-vs-modeset:
      fi-glk-j4005:       DMESG-WARN (fdo#106000) -> PASS

    igt@pm_rpm@module-reload:
      fi-glk-j4005:       DMESG-WARN (fdo#107726) -> PASS

    
  fdo#103191 https://bugs.freedesktop.org/show_bug.cgi?id=103191
  fdo#105128 https://bugs.freedesktop.org/show_bug.cgi?id=105128
  fdo#106000 https://bugs.freedesktop.org/show_bug.cgi?id=106000
  fdo#106248 https://bugs.freedesktop.org/show_bug.cgi?id=106248
  fdo#106725 https://bugs.freedesktop.org/show_bug.cgi?id=106725
  fdo#107139 https://bugs.freedesktop.org/show_bug.cgi?id=107139
  fdo#107362 https://bugs.freedesktop.org/show_bug.cgi?id=107362
  fdo#107425 https://bugs.freedesktop.org/show_bug.cgi?id=107425
  fdo#107726 https://bugs.freedesktop.org/show_bug.cgi?id=107726
  fdo#107773 https://bugs.freedesktop.org/show_bug.cgi?id=107773
  fdo#107924 https://bugs.freedesktop.org/show_bug.cgi?id=107924
  fdo#107930 https://bugs.freedesktop.org/show_bug.cgi?id=107930


== Participating hosts (52 -> 47) ==

  Missing    (5): fi-ctg-p8600 fi-ilk-m540 fi-byt-squawks fi-bsw-cyan fi-hsw-4200u 


== Build changes ==

    * Linux: CI_DRM_4844 -> Patchwork_10230

  CI_DRM_4844: 73c0da2189956c18a0fff343bd84eb0493f81db1 @ git://anongit.freedesktop.org/gfx-ci/linux
  IGT_4646: d409cc6f234fbc0122c64be27ba85b5603658de5 @ git://anongit.freedesktop.org/xorg/app/intel-gpu-tools
  Patchwork_10230: 6e322a6b05588195c9c122057990a333004e4b70 @ git://anongit.freedesktop.org/gfx-ci/linux


== Linux commits ==

6e322a6b0558 drm/i915: Load balancing across a virtual engine
e43ea3fafed3 drm/i915/execlists: Refactor out can_merge_rq()
6492191779dd drm/i915/execlists: Flush the CS events before unpinning
20ccfc746d06 drm/i915: Allow a context to define its set of engines
6f508694daf5 drm/i915: Re-arrange execbuf so context is known before engine
61e7a8b16c14 drm/i915: Fix I915_EXEC_RING_MASK
3649c8fb7536 drm/i915: Allow contexts to share a single timeline across all engines
4adc1d3b2073 drm/i915: Extend CREATE_CONTEXT to allow inheritance ala clone()
a93118d89924 drm/i915: Introduce the i915_user_extension_method
130d06e20fbd drm/i915: Make all GPU resets atomic
d3cbebb877f1 drm/i915: Pull all the reset functionality together into i915_reset.c
d02f5463e184 drm/i915: Differentiate between ggtt->mutex and ppgtt->mutex
df26a76763aa drm/i915: Serialise concurrent calls to i915_gem_set_wedged()
0f9a343f4e3e drm/i915: Mark up Ironlake ips with rpm wakerefs
a373ff7a3fec drm/i915: Complain if hsw_get_pipe_config acquires the same power well twice
0d6a6f256a4c drm/i915/dp: Markup pps lock power well
5df99971b669 drm/i915: Track the wakeref used to initialise display power domains
01c1f8ef9e05 drm/i915: Markup paired operations on display power domains
78bfd7e76b95 drm/i915: Syntatic sugar for using intel_runtime_pm
87bdf5a67717 drm/i915: Markup paired operations on wakerefs
5985e77c4002 drm/i915: Track all held rpm wakerefs
05be2e2d6f16 drm/i915: Remove debugfs/i915_ppgtt_info
8be0aefc32e3 drm/i915: Report the number of closed vma held by each context in debugfs
1fc76a0359e0 drm/i915: Priority boost for waiting clients
9eb955168501 drm/i915: Pull scheduling under standalone lock
ab69b94ad492 drm/i915: Priority boost for new clients
d4dade02ac2f drm/i915: Combine multiple internal plists into the same i915_priolist bucket
d949a956393f drm/i915: Reserve some priority bits for internal use
54a30e5f6ba6 drm/i915/execlists: Assert the queue is non-empty on unsubmitting
f883600aab97 drm/i915/execlists: Onion unwind for logical_ring_init() failure
0af582c21e60 drm/i915/selftests: Basic stress test for rapid context switching
8bec72d91b02 drm/i915/selftests: Free the batch along the contexts error path
dda3a9df7889 drm/i915/execlists: Avoid kicking priority on the current context
0125ac1db1a3 drm/i915: Cache the error string
4eb03b08ed0a drm/i915: Clear the error PTE just once on finish
22997135d8ef drm/i915: Handle incomplete Z_FINISH for compressed error states
849b9a5e54f4 drm/i915: Park the GPU on module load
0466e1b99db2 drm/i915/selftests: Live tests emit requests and so require rpm
77e2baa66cf6 drm: Fix syncobj handing of schedule() returning 0
db53c1c9bc91 drm: Use default dma_fence hooks where possible for null syncobj

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_10230/issues.html
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 106+ messages in thread

* Re: [PATCH 09/40] drm/i915/selftests: Free the batch along the contexts error path
  2018-09-19 19:55 ` [PATCH 09/40] drm/i915/selftests: Free the batch along the contexts error path Chris Wilson
@ 2018-09-20  8:30   ` Mika Kuoppala
  2018-09-20  8:36     ` Chris Wilson
  0 siblings, 1 reply; 106+ messages in thread
From: Mika Kuoppala @ 2018-09-20  8:30 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx

Chris Wilson <chris@chris-wilson.co.uk> writes:

> Remember to release the batch bo reference if we hit an error trying to
> submit our MI_STORE_DWORD_IMM.
>
> References: https://bugs.freedesktop.org/show_bug.cgi?id=107979
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>  drivers/gpu/drm/i915/selftests/i915_gem_context.c | 1 +
>  1 file changed, 1 insertion(+)
>
> diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/selftests/i915_gem_context.c
> index 79f9bcaf0c05..a89d31075c7a 100644
> --- a/drivers/gpu/drm/i915/selftests/i915_gem_context.c
> +++ b/drivers/gpu/drm/i915/selftests/i915_gem_context.c
> @@ -195,6 +195,7 @@ static int gpu_fill(struct drm_i915_gem_object *obj,
>  	i915_request_add(rq);
>  err_batch:
>  	i915_vma_unpin(batch);
> +	i915_vma_put(batch);

Looks good in here but in gpu_fill_dw we have similar
problem? We release the object ref but don't
release the vma when returning ERR_PTR.

-Mika

>  err_vma:
>  	i915_vma_unpin(vma);
>  	return err;
> -- 
> 2.19.0
>
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/intel-gfx
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 106+ messages in thread

* Re: [PATCH 09/40] drm/i915/selftests: Free the batch along the contexts error path
  2018-09-20  8:30   ` Mika Kuoppala
@ 2018-09-20  8:36     ` Chris Wilson
  2018-09-20  9:19       ` Mika Kuoppala
  0 siblings, 1 reply; 106+ messages in thread
From: Chris Wilson @ 2018-09-20  8:36 UTC (permalink / raw)
  To: Mika Kuoppala, intel-gfx

Quoting Mika Kuoppala (2018-09-20 09:30:22)
> Chris Wilson <chris@chris-wilson.co.uk> writes:
> 
> > Remember to release the batch bo reference if we hit an error trying to
> > submit our MI_STORE_DWORD_IMM.
> >
> > References: https://bugs.freedesktop.org/show_bug.cgi?id=107979
> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > ---
> >  drivers/gpu/drm/i915/selftests/i915_gem_context.c | 1 +
> >  1 file changed, 1 insertion(+)
> >
> > diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/selftests/i915_gem_context.c
> > index 79f9bcaf0c05..a89d31075c7a 100644
> > --- a/drivers/gpu/drm/i915/selftests/i915_gem_context.c
> > +++ b/drivers/gpu/drm/i915/selftests/i915_gem_context.c
> > @@ -195,6 +195,7 @@ static int gpu_fill(struct drm_i915_gem_object *obj,
> >       i915_request_add(rq);
> >  err_batch:
> >       i915_vma_unpin(batch);
> > +     i915_vma_put(batch);
> 
> Looks good in here but in gpu_fill_dw we have similar
> problem? We release the object ref but don't
> release the vma when returning ERR_PTR.

The dirty secret is that vma don't have refs, they are owned by their
parent object (with a secondary lifetime constraint from the vm).
i915_vma_put(vma) == i915_gem_object_put(vma->obj);
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 106+ messages in thread

* Re: [PATCH 09/40] drm/i915/selftests: Free the batch along the contexts error path
  2018-09-20  8:36     ` Chris Wilson
@ 2018-09-20  9:19       ` Mika Kuoppala
  0 siblings, 0 replies; 106+ messages in thread
From: Mika Kuoppala @ 2018-09-20  9:19 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx

Chris Wilson <chris@chris-wilson.co.uk> writes:

> Quoting Mika Kuoppala (2018-09-20 09:30:22)
>> Chris Wilson <chris@chris-wilson.co.uk> writes:
>> 
>> > Remember to release the batch bo reference if we hit an error trying to
>> > submit our MI_STORE_DWORD_IMM.
>> >
>> > References: https://bugs.freedesktop.org/show_bug.cgi?id=107979
>> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
>> > ---
>> >  drivers/gpu/drm/i915/selftests/i915_gem_context.c | 1 +
>> >  1 file changed, 1 insertion(+)
>> >
>> > diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/selftests/i915_gem_context.c
>> > index 79f9bcaf0c05..a89d31075c7a 100644
>> > --- a/drivers/gpu/drm/i915/selftests/i915_gem_context.c
>> > +++ b/drivers/gpu/drm/i915/selftests/i915_gem_context.c
>> > @@ -195,6 +195,7 @@ static int gpu_fill(struct drm_i915_gem_object *obj,
>> >       i915_request_add(rq);
>> >  err_batch:
>> >       i915_vma_unpin(batch);
>> > +     i915_vma_put(batch);
>> 
>> Looks good in here but in gpu_fill_dw we have similar
>> problem? We release the object ref but don't
>> release the vma when returning ERR_PTR.
>
> The dirty secret is that vma don't have refs, they are owned by their
> parent object (with a secondary lifetime constraint from the vm).
> i915_vma_put(vma) == i915_gem_object_put(vma->obj);

Ah. Worry not, your secret is safe with me.

Reviewed-by: Mika Kuoppala <mika.kuoppala@linux.intel.com>
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 106+ messages in thread

* Re: [PATCH 10/40] drm/i915/selftests: Basic stress test for rapid context switching
  2018-09-19 19:55 ` [PATCH 10/40] drm/i915/selftests: Basic stress test for rapid context switching Chris Wilson
@ 2018-09-20 10:38   ` Mika Kuoppala
  2018-09-20 10:46     ` Chris Wilson
  0 siblings, 1 reply; 106+ messages in thread
From: Mika Kuoppala @ 2018-09-20 10:38 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx

Chris Wilson <chris@chris-wilson.co.uk> writes:

> We need to exercise the HW and submission paths for switching contexts
> rapidly to check that features such as execlists' wa_tail are adequate.
> Plus it's an interesting baseline latency metric.
>
> v2: Check the initial request for allocation errors
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>  .../gpu/drm/i915/selftests/i915_gem_context.c | 188 ++++++++++++++++++
>  1 file changed, 188 insertions(+)
>
> diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/selftests/i915_gem_context.c
> index a89d31075c7a..f1a57e3c3820 100644
> --- a/drivers/gpu/drm/i915/selftests/i915_gem_context.c
> +++ b/drivers/gpu/drm/i915/selftests/i915_gem_context.c
> @@ -22,6 +22,8 @@
>   *
>   */
>  
> +#include <linux/prime_numbers.h>
> +
>  #include "../i915_selftest.h"
>  #include "i915_random.h"
>  #include "igt_flush_test.h"
> @@ -32,6 +34,191 @@
>  
>  #define DW_PER_PAGE (PAGE_SIZE / sizeof(u32))
>  
> +struct live_test {
> +	struct drm_i915_private *i915;
> +	const char *func;
> +	const char *name;
> +
> +	unsigned int reset_count;
> +};
> +
> +static int begin_live_test(struct live_test *t,
> +			   struct drm_i915_private *i915,
> +			   const char *func,
> +			   const char *name)
> +{
> +	int err;
> +
> +	t->i915 = i915;
> +	t->func = func;
> +	t->name = name;
> +
> +	err = i915_gem_wait_for_idle(i915,
> +				     I915_WAIT_LOCKED,
> +				     MAX_SCHEDULE_TIMEOUT);
> +	if (err) {
> +		pr_err("%s(%s): failed to idle before, with err=%d!",
> +		       func, name, err);
> +		return err;
> +	}
> +
> +	i915->gpu_error.missed_irq_rings = 0;

Blunt but should work as we check this for tests
that matter and thus leftovers should not matter.

> +	t->reset_count = i915_reset_count(&i915->gpu_error);
> +
> +	return 0;
> +}
> +
> +static int end_live_test(struct live_test *t)
> +{
> +	struct drm_i915_private *i915 = t->i915;
> +
> +	i915_retire_requests(i915);
> +
> +	if (wait_for(intel_engines_are_idle(i915), 10)) {
> +		pr_err("%s(%s): GPU not idle\n", t->func, t->name);
> +		return -EIO;
> +	}
> +
> +	if (t->reset_count != i915_reset_count(&i915->gpu_error)) {
> +		pr_err("%s(%s): GPU was reset %d times!\n",
> +		       t->func, t->name,
> +		       i915_reset_count(&i915->gpu_error) - t->reset_count);
> +		return -EIO;
> +	}
> +
> +	if (i915->gpu_error.missed_irq_rings) {
> +		pr_err("%s(%s): Missed interrupts on engines %lx\n",
> +		       t->func, t->name, i915->gpu_error.missed_irq_rings);
> +		return -EIO;
> +	}
> +
> +	return 0;
> +}
> +
> +static int live_nop_switch(void *arg)
> +{
> +	const unsigned int nctx = 1024;
> +	struct drm_i915_private *i915 = arg;
> +	struct intel_engine_cs *engine;
> +	struct i915_gem_context **ctx;
> +	enum intel_engine_id id;
> +	struct drm_file *file;
> +	struct live_test t;
> +	unsigned long n;
> +	int err = -ENODEV;
> +
> +	/*
> +	 * Create as many contexts as we can feasibly get away with
> +	 * and check we can switch between them rapidly.
> +	 *
> +	 * Serves as very simple stress test for submission and HW switching
> +	 * between contexts.
> +	 */
> +
> +	if (!DRIVER_CAPS(i915)->has_logical_contexts)
> +		return 0;
> +
> +	file = mock_file(i915);
> +	if (IS_ERR(file))
> +		return PTR_ERR(file);
> +
> +	mutex_lock(&i915->drm.struct_mutex);
> +
> +	ctx = kcalloc(nctx, sizeof(*ctx), GFP_KERNEL);
> +	if (!ctx) {
> +		err = -ENOMEM;
> +		goto out_unlock;
> +	}
> +
> +	for (n = 0; n < nctx; n++) {
> +		ctx[n] = i915_gem_create_context(i915, file->driver_priv);
> +		if (IS_ERR(ctx[n])) {
> +			err = PTR_ERR(ctx[n]);
> +			goto out_unlock;
> +		}
> +	}
> +
> +	for_each_engine(engine, i915, id) {
> +		struct i915_request *rq;
> +		unsigned long end_time, prime;
> +		ktime_t times[2] = {};
> +
> +		times[0] = ktime_get_raw();
> +		for (n = 0; n < nctx; n++) {
> +			rq = i915_request_alloc(engine, ctx[n]);
> +			if (IS_ERR(rq)) {
> +				err = PTR_ERR(rq);
> +				goto out_unlock;
> +			}
> +			i915_request_add(rq);
> +		}
> +		i915_request_wait(rq,
> +				  I915_WAIT_LOCKED,
> +				  MAX_SCHEDULE_TIMEOUT);
> +		times[1] = ktime_get_raw();
> +
> +		pr_info("Populated %d contexts on %s in %lluns\n",
> +			nctx, engine->name, ktime_to_ns(times[1] - times[0]));
> +
> +		err = begin_live_test(&t, i915, __func__, engine->name);
> +		if (err)
> +			goto out_unlock;
> +
> +		end_time = jiffies + i915_selftest.timeout_jiffies;
> +		for_each_prime_number_from(prime, 2, 8192) {
> +			times[1] = ktime_get_raw();
> +
> +			for (n = 0; n < prime; n++) {
> +				rq = i915_request_alloc(engine, ctx[n % nctx]);
> +				if (IS_ERR(rq)) {
> +					err = PTR_ERR(rq);
> +					goto out_unlock;
> +				}
> +
> +				/*
> +				 * This space is left intentionally blank.
> +				 *
> +				 * We do not actually want to perform any
> +				 * action with this request, we just want
> +				 * to measure the latency in allocation
> +				 * and submission of our breadcrumbs -
> +				 * ensuring that the bare request is sufficient
> +				 * for the system to work (i.e. proper HEAD
> +				 * tracking of the rings, interrupt handling,
> +				 * etc). It also gives us the lowest bounds
> +				 * for latency.
> +				 */
> +
> +				i915_request_add(rq);

We get the breadcrumbs in place, with the wa_tails and so on so this
should work without any nops nor bb ends.

> +			}
> +			i915_request_wait(rq,
> +					  I915_WAIT_LOCKED,
> +					  MAX_SCHEDULE_TIMEOUT);
> +
> +			times[1] = ktime_sub(ktime_get_raw(), times[1]);
> +			if (prime == 2)
> +				times[0] = times[1];
> +
> +			if (__igt_timeout(end_time, NULL))
> +				break;
> +		}
> +
> +		err = end_live_test(&t);
> +		if (err)
> +			goto out_unlock;
> +
> +		pr_info("Switch latencies on %s: 1 = %lluns, %lu = %lluns\n",
> +			engine->name,
> +			ktime_to_ns(times[0]),
> +			prime - 1, div64_u64(ktime_to_ns(times[1]), prime - 1));
> +	}
> +
> +out_unlock:
> +	mutex_unlock(&i915->drm.struct_mutex);
> +	mock_file_free(i915, file);
> +	return err;
> +}
> +
>  static struct i915_vma *
>  gpu_fill_dw(struct i915_vma *vma, u64 offset, unsigned long count, u32 value)
>  {
> @@ -718,6 +905,7 @@ int i915_gem_context_live_selftests(struct drm_i915_private *dev_priv)
>  {
>  	static const struct i915_subtest tests[] = {
>  		SUBTEST(igt_switch_to_kernel_context),
> +		SUBTEST(live_nop_switch),

Due to the nature of the test, the nop has a minor twist to
think there is a nop in ring. This is more of a nop request,
or empty request. 

'live_bare|empty_switch' ?

Can't think of better so I dont insist, so regardless

Reviewed-by: Mika Kuoppala <mika.kuoppala@linux.intel.com>

>  		SUBTEST(igt_ctx_exec),
>  		SUBTEST(igt_ctx_readonly),
>  	};
> -- 
> 2.19.0
>
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/intel-gfx
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 106+ messages in thread

* Re: [PATCH 10/40] drm/i915/selftests: Basic stress test for rapid context switching
  2018-09-20 10:38   ` Mika Kuoppala
@ 2018-09-20 10:46     ` Chris Wilson
  0 siblings, 0 replies; 106+ messages in thread
From: Chris Wilson @ 2018-09-20 10:46 UTC (permalink / raw)
  To: Mika Kuoppala, intel-gfx

Quoting Mika Kuoppala (2018-09-20 11:38:08)
> Chris Wilson <chris@chris-wilson.co.uk> writes:
> > +             end_time = jiffies + i915_selftest.timeout_jiffies;
> > +             for_each_prime_number_from(prime, 2, 8192) {
> > +                     times[1] = ktime_get_raw();
> > +
> > +                     for (n = 0; n < prime; n++) {
> > +                             rq = i915_request_alloc(engine, ctx[n % nctx]);
> > +                             if (IS_ERR(rq)) {
> > +                                     err = PTR_ERR(rq);
> > +                                     goto out_unlock;
> > +                             }
> > +
> > +                             /*
> > +                              * This space is left intentionally blank.
> > +                              *
> > +                              * We do not actually want to perform any
> > +                              * action with this request, we just want
> > +                              * to measure the latency in allocation
> > +                              * and submission of our breadcrumbs -
> > +                              * ensuring that the bare request is sufficient
> > +                              * for the system to work (i.e. proper HEAD
> > +                              * tracking of the rings, interrupt handling,
> > +                              * etc). It also gives us the lowest bounds
> > +                              * for latency.
> > +                              */
> > +
> > +                             i915_request_add(rq);
> 
> We get the breadcrumbs in place, with the wa_tails and so on so this
> should work without any nops nor bb ends.

Right, not one batch.
 
> > +                     }
> > +                     i915_request_wait(rq,
> > +                                       I915_WAIT_LOCKED,
> > +                                       MAX_SCHEDULE_TIMEOUT);

Fwiw, a lesson learnt from buggy code is never to use
MAX_SCHEDULE_TIMEOUT in tests :)
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 106+ messages in thread

* Re: [PATCH 01/40] drm: Use default dma_fence hooks where possible for null syncobj
  2018-09-19 19:55 [PATCH 01/40] drm: Use default dma_fence hooks where possible for null syncobj Chris Wilson
                   ` (41 preceding siblings ...)
  2018-09-19 22:17 ` ✗ Fi.CI.BAT: failure " Patchwork
@ 2018-09-20 13:34 ` Tvrtko Ursulin
  2018-09-20 13:40   ` Chris Wilson
  42 siblings, 1 reply; 106+ messages in thread
From: Tvrtko Ursulin @ 2018-09-20 13:34 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 19/09/2018 20:55, Chris Wilson wrote:
> Both the .enable_signaling and .release of the null syncobj fence
> can be replaced by the default callbacks for a small reduction in code
> size.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>   drivers/gpu/drm/drm_syncobj.c | 11 -----------
>   1 file changed, 11 deletions(-)
> 
> diff --git a/drivers/gpu/drm/drm_syncobj.c b/drivers/gpu/drm/drm_syncobj.c
> index 497729202bfe..e254f97fed7d 100644
> --- a/drivers/gpu/drm/drm_syncobj.c
> +++ b/drivers/gpu/drm/drm_syncobj.c
> @@ -66,20 +66,9 @@ static const char *drm_syncobj_stub_fence_get_name(struct dma_fence *fence)
>           return "syncobjstub";
>   }
>   
> -static bool drm_syncobj_stub_fence_enable_signaling(struct dma_fence *fence)
> -{
> -    return !dma_fence_is_signaled(fence);
> -}
> -
> -static void drm_syncobj_stub_fence_release(struct dma_fence *f)
> -{
> -	kfree(f);
> -}

With the default implementation this becomes kfree_rcu - so 
theoretically a change in behaviour after all.

Since there are RCU usages in syncobj and the magical null/stub handle 
is not explained (or I did not find it), was the code fine with a plain 
kfree?

Regards,

Tvrtko

>   static const struct dma_fence_ops drm_syncobj_stub_fence_ops = {
>   	.get_driver_name = drm_syncobj_stub_fence_get_name,
>   	.get_timeline_name = drm_syncobj_stub_fence_get_name,
> -	.enable_signaling = drm_syncobj_stub_fence_enable_signaling,
> -	.release = drm_syncobj_stub_fence_release,
>   };
>   
>   
> 
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 106+ messages in thread

* Re: [PATCH 01/40] drm: Use default dma_fence hooks where possible for null syncobj
  2018-09-20 13:34 ` [PATCH 01/40] " Tvrtko Ursulin
@ 2018-09-20 13:40   ` Chris Wilson
  2018-09-20 13:54     ` Tvrtko Ursulin
  0 siblings, 1 reply; 106+ messages in thread
From: Chris Wilson @ 2018-09-20 13:40 UTC (permalink / raw)
  To: Tvrtko Ursulin, intel-gfx

Quoting Tvrtko Ursulin (2018-09-20 14:34:29)
> 
> On 19/09/2018 20:55, Chris Wilson wrote:
> > Both the .enable_signaling and .release of the null syncobj fence
> > can be replaced by the default callbacks for a small reduction in code
> > size.
> > 
> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > ---
> >   drivers/gpu/drm/drm_syncobj.c | 11 -----------
> >   1 file changed, 11 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/drm_syncobj.c b/drivers/gpu/drm/drm_syncobj.c
> > index 497729202bfe..e254f97fed7d 100644
> > --- a/drivers/gpu/drm/drm_syncobj.c
> > +++ b/drivers/gpu/drm/drm_syncobj.c
> > @@ -66,20 +66,9 @@ static const char *drm_syncobj_stub_fence_get_name(struct dma_fence *fence)
> >           return "syncobjstub";
> >   }
> >   
> > -static bool drm_syncobj_stub_fence_enable_signaling(struct dma_fence *fence)
> > -{
> > -    return !dma_fence_is_signaled(fence);
> > -}
> > -
> > -static void drm_syncobj_stub_fence_release(struct dma_fence *f)
> > -{
> > -     kfree(f);
> > -}
> 
> With the default implementation this becomes kfree_rcu - so 
> theoretically a change in behaviour after all.

A correction, since dma_fence are required to be RCU safe.
 
> Since there are RCU usages in syncobj and the magical null/stub handle 
> is not explained (or I did not find it), was the code fine with a plain 
> kfree?

No :) It's the third parties who may be doing RCU lookups, the only
argument would be that it would never be exposed... Except it was.
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 106+ messages in thread

* Re: [PATCH 01/40] drm: Use default dma_fence hooks where possible for null syncobj
  2018-09-20 13:40   ` Chris Wilson
@ 2018-09-20 13:54     ` Tvrtko Ursulin
  0 siblings, 0 replies; 106+ messages in thread
From: Tvrtko Ursulin @ 2018-09-20 13:54 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 20/09/2018 14:40, Chris Wilson wrote:
> Quoting Tvrtko Ursulin (2018-09-20 14:34:29)
>>
>> On 19/09/2018 20:55, Chris Wilson wrote:
>>> Both the .enable_signaling and .release of the null syncobj fence
>>> can be replaced by the default callbacks for a small reduction in code
>>> size.
>>>
>>> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
>>> ---
>>>    drivers/gpu/drm/drm_syncobj.c | 11 -----------
>>>    1 file changed, 11 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/drm_syncobj.c b/drivers/gpu/drm/drm_syncobj.c
>>> index 497729202bfe..e254f97fed7d 100644
>>> --- a/drivers/gpu/drm/drm_syncobj.c
>>> +++ b/drivers/gpu/drm/drm_syncobj.c
>>> @@ -66,20 +66,9 @@ static const char *drm_syncobj_stub_fence_get_name(struct dma_fence *fence)
>>>            return "syncobjstub";
>>>    }
>>>    
>>> -static bool drm_syncobj_stub_fence_enable_signaling(struct dma_fence *fence)
>>> -{
>>> -    return !dma_fence_is_signaled(fence);
>>> -}
>>> -
>>> -static void drm_syncobj_stub_fence_release(struct dma_fence *f)
>>> -{
>>> -     kfree(f);
>>> -}
>>
>> With the default implementation this becomes kfree_rcu - so
>> theoretically a change in behaviour after all.
> 
> A correction, since dma_fence are required to be RCU safe.
>   
>> Since there are RCU usages in syncobj and the magical null/stub handle
>> is not explained (or I did not find it), was the code fine with a plain
>> kfree?
> 
> No :) It's the third parties who may be doing RCU lookups, the only
> argument would be that it would never be exposed... Except it was.

Update the commit then to reflect the bug fixing nature and:

Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Add Fixes: perhaps.. e28bd10? Claims to be renaming, but it did add the 
kfree release callback as well.

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 106+ messages in thread

* Re: [PATCH 03/40] drm/i915/selftests: Live tests emit requests and so require rpm
  2018-09-19 19:55 ` [PATCH 03/40] drm/i915/selftests: Live tests emit requests and so require rpm Chris Wilson
@ 2018-09-20 14:01   ` Tvrtko Ursulin
  2018-09-20 14:47     ` Chris Wilson
  0 siblings, 1 reply; 106+ messages in thread
From: Tvrtko Ursulin @ 2018-09-20 14:01 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 19/09/2018 20:55, Chris Wilson wrote:
> As we emit requests or touch HW directly for some of the live tests, the
> requirement is that we hold the rpm wakeref before doing so. We want a
> mix of granularity since we will want to test runtime suspend, so try to
> mark up only the critical sections where we need rpm for the live test.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>   drivers/gpu/drm/i915/selftests/i915_gem_coherency.c | 2 ++
>   drivers/gpu/drm/i915/selftests/i915_gem_context.c   | 4 ++++
>   drivers/gpu/drm/i915/selftests/i915_request.c       | 8 ++++++++
>   drivers/gpu/drm/i915/selftests/intel_guc.c          | 4 ++++
>   drivers/gpu/drm/i915/selftests/intel_lrc.c          | 8 ++++++++
>   drivers/gpu/drm/i915/selftests/intel_workarounds.c  | 5 +++++
>   6 files changed, 31 insertions(+)
> 
> diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_coherency.c b/drivers/gpu/drm/i915/selftests/i915_gem_coherency.c
> index 4e6a221063ac..f7392c1ffe75 100644
> --- a/drivers/gpu/drm/i915/selftests/i915_gem_coherency.c
> +++ b/drivers/gpu/drm/i915/selftests/i915_gem_coherency.c
> @@ -298,6 +298,7 @@ static int igt_gem_coherency(void *arg)
>   	values = offsets + ncachelines;
>   
>   	mutex_lock(&i915->drm.struct_mutex);
> +	intel_runtime_pm_get(i915);
>   	for (over = igt_coherency_mode; over->name; over++) {
>   		if (!over->set)
>   			continue;
> @@ -375,6 +376,7 @@ static int igt_gem_coherency(void *arg)
>   		}
>   	}
>   unlock:
> +	intel_runtime_pm_put(i915);
>   	mutex_unlock(&i915->drm.struct_mutex);
>   	kfree(offsets);
>   	return err;
> diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/selftests/i915_gem_context.c
> index 1c92560d35da..79f9bcaf0c05 100644
> --- a/drivers/gpu/drm/i915/selftests/i915_gem_context.c
> +++ b/drivers/gpu/drm/i915/selftests/i915_gem_context.c
> @@ -636,6 +636,8 @@ static int igt_switch_to_kernel_context(void *arg)
>   	 */
>   
>   	mutex_lock(&i915->drm.struct_mutex);
> +	intel_runtime_pm_get(i915);
> +
>   	ctx = kernel_context(i915);
>   	if (IS_ERR(ctx)) {
>   		mutex_unlock(&i915->drm.struct_mutex);
> @@ -658,6 +660,8 @@ static int igt_switch_to_kernel_context(void *arg)
>   	GEM_TRACE_DUMP_ON(err);
>   	if (igt_flush_test(i915, I915_WAIT_LOCKED))
>   		err = -EIO;
> +
> +	intel_runtime_pm_put(i915);
>   	mutex_unlock(&i915->drm.struct_mutex);
>   
>   	kernel_context_close(ctx);
> diff --git a/drivers/gpu/drm/i915/selftests/i915_request.c b/drivers/gpu/drm/i915/selftests/i915_request.c
> index c4aac6141e04..07e557815308 100644
> --- a/drivers/gpu/drm/i915/selftests/i915_request.c
> +++ b/drivers/gpu/drm/i915/selftests/i915_request.c
> @@ -342,6 +342,7 @@ static int live_nop_request(void *arg)
>   	 */
>   
>   	mutex_lock(&i915->drm.struct_mutex);
> +	intel_runtime_pm_get(i915);
>   
>   	for_each_engine(engine, i915, id) {
>   		struct i915_request *request = NULL;
> @@ -402,6 +403,7 @@ static int live_nop_request(void *arg)
>   	}
>   
>   out_unlock:
> +	intel_runtime_pm_put(i915);
>   	mutex_unlock(&i915->drm.struct_mutex);
>   	return err;
>   }
> @@ -487,6 +489,7 @@ static int live_empty_request(void *arg)
>   	 */
>   
>   	mutex_lock(&i915->drm.struct_mutex);
> +	intel_runtime_pm_get(i915);
>   
>   	batch = empty_batch(i915);
>   	if (IS_ERR(batch)) {
> @@ -550,6 +553,7 @@ static int live_empty_request(void *arg)
>   	i915_vma_unpin(batch);
>   	i915_vma_put(batch);
>   out_unlock:
> +	intel_runtime_pm_put(i915);
>   	mutex_unlock(&i915->drm.struct_mutex);
>   	return err;
>   }
> @@ -644,6 +648,7 @@ static int live_all_engines(void *arg)
>   	 */
>   
>   	mutex_lock(&i915->drm.struct_mutex);
> +	intel_runtime_pm_get(i915);
>   
>   	err = begin_live_test(&t, i915, __func__, "");
>   	if (err)
> @@ -726,6 +731,7 @@ static int live_all_engines(void *arg)
>   	i915_vma_unpin(batch);
>   	i915_vma_put(batch);
>   out_unlock:
> +	intel_runtime_pm_put(i915);
>   	mutex_unlock(&i915->drm.struct_mutex);
>   	return err;
>   }
> @@ -747,6 +753,7 @@ static int live_sequential_engines(void *arg)
>   	 */
>   
>   	mutex_lock(&i915->drm.struct_mutex);
> +	intel_runtime_pm_get(i915);
>   
>   	err = begin_live_test(&t, i915, __func__, "");
>   	if (err)
> @@ -853,6 +860,7 @@ static int live_sequential_engines(void *arg)
>   		i915_request_put(request[id]);
>   	}
>   out_unlock:
> +	intel_runtime_pm_put(i915);
>   	mutex_unlock(&i915->drm.struct_mutex);
>   	return err;
>   }
> diff --git a/drivers/gpu/drm/i915/selftests/intel_guc.c b/drivers/gpu/drm/i915/selftests/intel_guc.c
> index 90ba88c972cf..0c0ab82b6228 100644
> --- a/drivers/gpu/drm/i915/selftests/intel_guc.c
> +++ b/drivers/gpu/drm/i915/selftests/intel_guc.c
> @@ -142,6 +142,7 @@ static int igt_guc_clients(void *args)
>   
>   	GEM_BUG_ON(!HAS_GUC(dev_priv));
>   	mutex_lock(&dev_priv->drm.struct_mutex);
> +	intel_runtime_pm_get(dev_priv);
>   
>   	guc = &dev_priv->guc;
>   	if (!guc) {
> @@ -269,6 +270,7 @@ static int igt_guc_clients(void *args)
>   	guc_clients_create(guc);
>   	guc_clients_doorbell_init(guc);
>   unlock:
> +	intel_runtime_pm_put(dev_priv);
>   	mutex_unlock(&dev_priv->drm.struct_mutex);
>   	return err;
>   }
> @@ -287,6 +289,7 @@ static int igt_guc_doorbells(void *arg)
>   
>   	GEM_BUG_ON(!HAS_GUC(dev_priv));
>   	mutex_lock(&dev_priv->drm.struct_mutex);
> +	intel_runtime_pm_get(dev_priv);
>   
>   	guc = &dev_priv->guc;
>   	if (!guc) {
> @@ -379,6 +382,7 @@ static int igt_guc_doorbells(void *arg)
>   			guc_client_free(clients[i]);
>   		}
>   unlock:
> +	intel_runtime_pm_put(dev_priv);
>   	mutex_unlock(&dev_priv->drm.struct_mutex);
>   	return err;
>   }
> diff --git a/drivers/gpu/drm/i915/selftests/intel_lrc.c b/drivers/gpu/drm/i915/selftests/intel_lrc.c
> index 582566faef09..1aea7a8f2224 100644
> --- a/drivers/gpu/drm/i915/selftests/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/selftests/intel_lrc.c
> @@ -221,6 +221,7 @@ static int live_sanitycheck(void *arg)
>   		return 0;
>   
>   	mutex_lock(&i915->drm.struct_mutex);
> +	intel_runtime_pm_get(i915);
>   
>   	if (spinner_init(&spin, i915))
>   		goto err_unlock;
> @@ -261,6 +262,7 @@ static int live_sanitycheck(void *arg)
>   	spinner_fini(&spin);
>   err_unlock:
>   	igt_flush_test(i915, I915_WAIT_LOCKED);
> +	intel_runtime_pm_put(i915);
>   	mutex_unlock(&i915->drm.struct_mutex);
>   	return err;
>   }
> @@ -278,6 +280,7 @@ static int live_preempt(void *arg)
>   		return 0;
>   
>   	mutex_lock(&i915->drm.struct_mutex);
> +	intel_runtime_pm_get(i915);
>   
>   	if (spinner_init(&spin_hi, i915))
>   		goto err_unlock;
> @@ -350,6 +353,7 @@ static int live_preempt(void *arg)
>   	spinner_fini(&spin_hi);
>   err_unlock:
>   	igt_flush_test(i915, I915_WAIT_LOCKED);
> +	intel_runtime_pm_put(i915);
>   	mutex_unlock(&i915->drm.struct_mutex);
>   	return err;
>   }
> @@ -368,6 +372,7 @@ static int live_late_preempt(void *arg)
>   		return 0;
>   
>   	mutex_lock(&i915->drm.struct_mutex);
> +	intel_runtime_pm_get(i915);
>   
>   	if (spinner_init(&spin_hi, i915))
>   		goto err_unlock;
> @@ -440,6 +445,7 @@ static int live_late_preempt(void *arg)
>   	spinner_fini(&spin_hi);
>   err_unlock:
>   	igt_flush_test(i915, I915_WAIT_LOCKED);
> +	intel_runtime_pm_put(i915);
>   	mutex_unlock(&i915->drm.struct_mutex);
>   	return err;
>   
> @@ -467,6 +473,7 @@ static int live_preempt_hang(void *arg)
>   		return 0;
>   
>   	mutex_lock(&i915->drm.struct_mutex);
> +	intel_runtime_pm_get(i915);
>   
>   	if (spinner_init(&spin_hi, i915))
>   		goto err_unlock;
> @@ -561,6 +568,7 @@ static int live_preempt_hang(void *arg)
>   	spinner_fini(&spin_hi);
>   err_unlock:
>   	igt_flush_test(i915, I915_WAIT_LOCKED);
> +	intel_runtime_pm_put(i915);
>   	mutex_unlock(&i915->drm.struct_mutex);
>   	return err;
>   }
> diff --git a/drivers/gpu/drm/i915/selftests/intel_workarounds.c b/drivers/gpu/drm/i915/selftests/intel_workarounds.c
> index 0d39b3bf0c0d..d1a0923d2f38 100644
> --- a/drivers/gpu/drm/i915/selftests/intel_workarounds.c
> +++ b/drivers/gpu/drm/i915/selftests/intel_workarounds.c
> @@ -44,7 +44,9 @@ read_nonprivs(struct i915_gem_context *ctx, struct intel_engine_cs *engine)
>   	if (err)
>   		goto err_obj;
>   
> +	intel_runtime_pm_get(engine->i915);
>   	rq = i915_request_alloc(engine, ctx);
> +	intel_runtime_pm_put(engine->i915);
>   	if (IS_ERR(rq)) {
>   		err = PTR_ERR(rq);
>   		goto err_pin;
> @@ -175,7 +177,10 @@ static int switch_to_scratch_context(struct intel_engine_cs *engine)
>   	if (IS_ERR(ctx))
>   		return PTR_ERR(ctx);
>   
> +	intel_runtime_pm_get(engine->i915);
>   	rq = i915_request_alloc(engine, ctx);
> +	intel_runtime_pm_put(engine->i915);
> +
>   	kernel_context_close(ctx);
>   	if (IS_ERR(rq))
>   		return PTR_ERR(rq);
> 

tl;dr; - if it passes CI:

Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Should we have a rpm assert in i915_request_alloc? Or at least in the 
backend context pin if that's the only place where we can need it.

Regards,

Tvrtko

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 106+ messages in thread

* Re: [PATCH 04/40] drm/i915: Park the GPU on module load
  2018-09-19 19:55 ` [PATCH 04/40] drm/i915: Park the GPU on module load Chris Wilson
@ 2018-09-20 14:02   ` Tvrtko Ursulin
  2018-09-20 14:52     ` Chris Wilson
  0 siblings, 1 reply; 106+ messages in thread
From: Tvrtko Ursulin @ 2018-09-20 14:02 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 19/09/2018 20:55, Chris Wilson wrote:
> Once we have flushed the first request through the system to both load a
> context and record the default state; tell the GPU to park and idle
> itself, putting itself immediately (hopefully at least) into a
> powersaving state, and allowing ourselves to start from known state
> after setting up all our bookkeeping.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> ---
>   drivers/gpu/drm/i915/i915_gem.c | 8 ++++++++
>   1 file changed, 8 insertions(+)
> 
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index a94d5a308c4d..6b347ffb996b 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -5417,6 +5417,14 @@ static int __intel_engines_record_defaults(struct drm_i915_private *i915)
>   
>   	assert_kernel_context_is_current(i915);
>   
> +	/*
> +	 * Immediately park the GPU so that we enable powersaving and
> +	 * treat it as idle. The next time we issue a request, we will
> +	 * unpark and start using the engine->pinned_default_state, otherwise
> +	 * it is in limbo and an early reset may fail.
> +	 */
> +	__i915_gem_park(i915);
> +
>   	for_each_engine(engine, i915, id) {
>   		struct i915_vma *state;
>   		void *vaddr;
> 

Presumably the previous patch will make this pass CI?

I would prefer the parking is done from the caller level but we couldn't 
agree on this minor detail so anyway:

Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 106+ messages in thread

* Re: [PATCH 02/40] drm: Fix syncobj handing of schedule() returning 0
  2018-09-19 19:55 ` [PATCH 02/40] drm: Fix syncobj handing of schedule() returning 0 Chris Wilson
@ 2018-09-20 14:13   ` Tvrtko Ursulin
  0 siblings, 0 replies; 106+ messages in thread
From: Tvrtko Ursulin @ 2018-09-20 14:13 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 19/09/2018 20:55, Chris Wilson wrote:
> After schedule() returns 0, we must do one last check of COND to
> determine the reason for the wakeup with 0 jiffies remaining before
> reporting the timeout -- otherwise we may lose the signal due to
> scheduler delays.
> 
> References: https://bugs.freedesktop.org/show_bug.cgi?id=106690
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>   drivers/gpu/drm/drm_syncobj.c | 41 +++++++++++++----------------------
>   1 file changed, 15 insertions(+), 26 deletions(-)
> 
> diff --git a/drivers/gpu/drm/drm_syncobj.c b/drivers/gpu/drm/drm_syncobj.c
> index e254f97fed7d..5bcb3ef9b256 100644
> --- a/drivers/gpu/drm/drm_syncobj.c
> +++ b/drivers/gpu/drm/drm_syncobj.c
> @@ -672,7 +672,6 @@ static signed long drm_syncobj_array_wait_timeout(struct drm_syncobj **syncobjs,
>   {
>   	struct syncobj_wait_entry *entries;
>   	struct dma_fence *fence;
> -	signed long ret;
>   	uint32_t signaled_count, i;
>   
>   	entries = kcalloc(count, sizeof(*entries), GFP_KERNEL);
> @@ -692,7 +691,7 @@ static signed long drm_syncobj_array_wait_timeout(struct drm_syncobj **syncobjs,
>   			if (flags & DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT) {
>   				continue;
>   			} else {
> -				ret = -EINVAL;
> +				timeout = -EINVAL;
>   				goto cleanup_entries;
>   			}
>   		}
> @@ -704,12 +703,6 @@ static signed long drm_syncobj_array_wait_timeout(struct drm_syncobj **syncobjs,
>   		}
>   	}
>   
> -	/* Initialize ret to the max of timeout and 1.  That way, the
> -	 * default return value indicates a successful wait and not a
> -	 * timeout.
> -	 */
> -	ret = max_t(signed long, timeout, 1);
> -
>   	if (signaled_count == count ||
>   	    (signaled_count > 0 &&
>   	     !(flags & DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL)))
> @@ -760,18 +753,17 @@ static signed long drm_syncobj_array_wait_timeout(struct drm_syncobj **syncobjs,
>   			goto done_waiting;
>   
>   		if (timeout == 0) {
> -			/* If we are doing a 0 timeout wait and we got
> -			 * here, then we just timed out.
> -			 */
> -			ret = 0;
> +			timeout = -ETIME;
>   			goto done_waiting;
>   		}
>   
> -		ret = schedule_timeout(ret);
> +		if (signal_pending(current)) {
> +			timeout = -ERESTARTSYS;
> +			goto done_waiting;
> +		}
>   
> -		if (ret > 0 && signal_pending(current))
> -			ret = -ERESTARTSYS;
> -	} while (ret > 0);
> +		timeout = schedule_timeout(timeout);
> +	} while (1);
>   
>   done_waiting:
>   	__set_current_state(TASK_RUNNING);
> @@ -788,7 +780,7 @@ static signed long drm_syncobj_array_wait_timeout(struct drm_syncobj **syncobjs,
>   	}
>   	kfree(entries);
>   
> -	return ret;
> +	return timeout;
>   }
>   
>   /**
> @@ -829,19 +821,16 @@ static int drm_syncobj_array_wait(struct drm_device *dev,
>   				  struct drm_syncobj **syncobjs)
>   {
>   	signed long timeout = drm_timeout_abs_to_jiffies(wait->timeout_nsec);
> -	signed long ret = 0;
>   	uint32_t first = ~0;
>   
> -	ret = drm_syncobj_array_wait_timeout(syncobjs,
> -					     wait->count_handles,
> -					     wait->flags,
> -					     timeout, &first);
> -	if (ret < 0)
> -		return ret;
> +	timeout = drm_syncobj_array_wait_timeout(syncobjs,
> +						 wait->count_handles,
> +						 wait->flags,
> +						 timeout, &first);
> +	if (timeout < 0)
> +		return timeout;
>   
>   	wait->first_signaled = first;
> -	if (ret == 0)
> -		return -ETIME;
>   	return 0;
>   }
>   
> 

LGTM.

Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 106+ messages in thread

* Re: [PATCH 11/40] drm/i915/execlists: Onion unwind for logical_ring_init() failure
  2018-09-19 19:55 ` [PATCH 11/40] drm/i915/execlists: Onion unwind for logical_ring_init() failure Chris Wilson
@ 2018-09-20 14:18   ` Mika Kuoppala
  2018-09-20 14:21   ` Tvrtko Ursulin
  1 sibling, 0 replies; 106+ messages in thread
From: Mika Kuoppala @ 2018-09-20 14:18 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx

Chris Wilson <chris@chris-wilson.co.uk> writes:

> Fix up the error unwind for logical_ring_init() failing by moving the
> cleanup into the callers who own the various bits of state during
> initialisation.
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>

Reviewed-by: Mika Kuoppala <mika.kuoppala@linux.intel.com>
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 106+ messages in thread

* Re: [PATCH 11/40] drm/i915/execlists: Onion unwind for logical_ring_init() failure
  2018-09-19 19:55 ` [PATCH 11/40] drm/i915/execlists: Onion unwind for logical_ring_init() failure Chris Wilson
  2018-09-20 14:18   ` Mika Kuoppala
@ 2018-09-20 14:21   ` Tvrtko Ursulin
  2018-09-20 19:59     ` Chris Wilson
  1 sibling, 1 reply; 106+ messages in thread
From: Tvrtko Ursulin @ 2018-09-20 14:21 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 19/09/2018 20:55, Chris Wilson wrote:
> Fix up the error unwind for logical_ring_init() failing by moving the

Could you say in the commit what was broken?

Regards,

Tvrtko

> cleanup into the callers who own the various bits of state during
> initialisation.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>   drivers/gpu/drm/i915/intel_lrc.c | 18 +++++++++++-------
>   1 file changed, 11 insertions(+), 7 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> index b4448b05d78a..3edb417caa7b 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/intel_lrc.c
> @@ -2415,7 +2415,7 @@ static int logical_ring_init(struct intel_engine_cs *engine)
>   
>   	ret = intel_engine_init_common(engine);
>   	if (ret)
> -		goto error;
> +		return ret;
>   
>   	if (HAS_LOGICAL_RING_ELSQ(i915)) {
>   		execlists->submit_reg = i915->regs +
> @@ -2457,10 +2457,6 @@ static int logical_ring_init(struct intel_engine_cs *engine)
>   	reset_csb_pointers(execlists);
>   
>   	return 0;
> -
> -error:
> -	intel_logical_ring_cleanup(engine);
> -	return ret;
>   }
>   
>   int logical_render_ring_init(struct intel_engine_cs *engine)
> @@ -2483,10 +2479,14 @@ int logical_render_ring_init(struct intel_engine_cs *engine)
>   	engine->emit_breadcrumb = gen8_emit_breadcrumb_rcs;
>   	engine->emit_breadcrumb_sz = gen8_emit_breadcrumb_rcs_sz;
>   
> -	ret = intel_engine_create_scratch(engine, PAGE_SIZE);
> +	ret = logical_ring_init(engine);
>   	if (ret)
>   		return ret;
>   
> +	ret = intel_engine_create_scratch(engine, PAGE_SIZE);
> +	if (ret)
> +		goto err_cleanup_common;
> +
>   	ret = intel_init_workaround_bb(engine);
>   	if (ret) {
>   		/*
> @@ -2498,7 +2498,11 @@ int logical_render_ring_init(struct intel_engine_cs *engine)
>   			  ret);
>   	}
>   
> -	return logical_ring_init(engine);
> +	return 0;
> +
> +err_cleanup_common:
> +	intel_engine_cleanup_common(engine);
> +	return ret;
>   }
>   
>   int logical_xcs_ring_init(struct intel_engine_cs *engine)
> 
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 106+ messages in thread

* Re: [PATCH 03/40] drm/i915/selftests: Live tests emit requests and so require rpm
  2018-09-20 14:01   ` Tvrtko Ursulin
@ 2018-09-20 14:47     ` Chris Wilson
  0 siblings, 0 replies; 106+ messages in thread
From: Chris Wilson @ 2018-09-20 14:47 UTC (permalink / raw)
  To: Tvrtko Ursulin, intel-gfx

Quoting Tvrtko Ursulin (2018-09-20 15:01:38)
> 
> On 19/09/2018 20:55, Chris Wilson wrote:
> > As we emit requests or touch HW directly for some of the live tests, the
> > requirement is that we hold the rpm wakeref before doing so. We want a
> > mix of granularity since we will want to test runtime suspend, so try to
> > mark up only the critical sections where we need rpm for the live test.
> > 
> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > ---
> Should we have a rpm assert in i915_request_alloc?

The intel_runtime_pm_get_noresume() we use there complains if we don't
have an intel rpm wakeref.

> Or at least in the 
> backend context pin if that's the only place where we can need it.

Yup, it spits one out when we touch HW without a wakeref.

Just it seems that wakerefs abound and we rarely catch ourselves out ;)
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 106+ messages in thread

* Re: [PATCH 04/40] drm/i915: Park the GPU on module load
  2018-09-20 14:02   ` Tvrtko Ursulin
@ 2018-09-20 14:52     ` Chris Wilson
  0 siblings, 0 replies; 106+ messages in thread
From: Chris Wilson @ 2018-09-20 14:52 UTC (permalink / raw)
  To: Tvrtko Ursulin, intel-gfx

Quoting Tvrtko Ursulin (2018-09-20 15:02:36)
> 
> On 19/09/2018 20:55, Chris Wilson wrote:
> > Once we have flushed the first request through the system to both load a
> > context and record the default state; tell the GPU to park and idle
> > itself, putting itself immediately (hopefully at least) into a
> > powersaving state, and allowing ourselves to start from known state
> > after setting up all our bookkeeping.
> > 
> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> > ---
> >   drivers/gpu/drm/i915/i915_gem.c | 8 ++++++++
> >   1 file changed, 8 insertions(+)
> > 
> > diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> > index a94d5a308c4d..6b347ffb996b 100644
> > --- a/drivers/gpu/drm/i915/i915_gem.c
> > +++ b/drivers/gpu/drm/i915/i915_gem.c
> > @@ -5417,6 +5417,14 @@ static int __intel_engines_record_defaults(struct drm_i915_private *i915)
> >   
> >       assert_kernel_context_is_current(i915);
> >   
> > +     /*
> > +      * Immediately park the GPU so that we enable powersaving and
> > +      * treat it as idle. The next time we issue a request, we will
> > +      * unpark and start using the engine->pinned_default_state, otherwise
> > +      * it is in limbo and an early reset may fail.
> > +      */
> > +     __i915_gem_park(i915);
> > +
> >       for_each_engine(engine, i915, id) {
> >               struct i915_vma *state;
> >               void *vaddr;
> > 
> 
> Presumably the previous patch will make this pass CI?

Aye. There was an earlier series of just this pair to check that I had
caught all the missing wakerefs uncovered by idling on init.
 
> I would prefer the parking is done from the caller level but we couldn't 
> agree on this minor detail so anyway:
 
I wanted to postpone that idea until we have the rc6 setup refactored,
as I think that may open some more interesting avenues of how to handle
this.
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 106+ messages in thread

* Re: [PATCH 11/40] drm/i915/execlists: Onion unwind for logical_ring_init() failure
  2018-09-20 14:21   ` Tvrtko Ursulin
@ 2018-09-20 19:59     ` Chris Wilson
  2018-09-21 10:00       ` Tvrtko Ursulin
  0 siblings, 1 reply; 106+ messages in thread
From: Chris Wilson @ 2018-09-20 19:59 UTC (permalink / raw)
  To: Tvrtko Ursulin, intel-gfx

Quoting Tvrtko Ursulin (2018-09-20 15:21:47)
> 
> On 19/09/2018 20:55, Chris Wilson wrote:
> > Fix up the error unwind for logical_ring_init() failing by moving the
> 
> Could you say in the commit what was broken?

We didn't cleanup all the state we allocated in the caller.
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 106+ messages in thread

* Re: [PATCH 11/40] drm/i915/execlists: Onion unwind for logical_ring_init() failure
  2018-09-20 19:59     ` Chris Wilson
@ 2018-09-21 10:00       ` Tvrtko Ursulin
  2018-09-21 10:01         ` Chris Wilson
  0 siblings, 1 reply; 106+ messages in thread
From: Tvrtko Ursulin @ 2018-09-21 10:00 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 20/09/2018 20:59, Chris Wilson wrote:
> Quoting Tvrtko Ursulin (2018-09-20 15:21:47)
>>
>> On 19/09/2018 20:55, Chris Wilson wrote:
>>> Fix up the error unwind for logical_ring_init() failing by moving the
>>
>> Could you say in the commit what was broken?
> 
> We didn't cleanup all the state we allocated in the caller.

That was kind of obvious, but it would have been helpful to the reviewer 
at least, if not to the commit message quality itself, to say what exactly.

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 106+ messages in thread

* Re: [PATCH 11/40] drm/i915/execlists: Onion unwind for logical_ring_init() failure
  2018-09-21 10:00       ` Tvrtko Ursulin
@ 2018-09-21 10:01         ` Chris Wilson
  0 siblings, 0 replies; 106+ messages in thread
From: Chris Wilson @ 2018-09-21 10:01 UTC (permalink / raw)
  To: Tvrtko Ursulin, intel-gfx

Quoting Tvrtko Ursulin (2018-09-21 11:00:06)
> 
> On 20/09/2018 20:59, Chris Wilson wrote:
> > Quoting Tvrtko Ursulin (2018-09-20 15:21:47)
> >>
> >> On 19/09/2018 20:55, Chris Wilson wrote:
> >>> Fix up the error unwind for logical_ring_init() failing by moving the
> >>
> >> Could you say in the commit what was broken?
> > 
> > We didn't cleanup all the state we allocated in the caller.
> 
> That was kind of obvious, but it would have been helpful to the reviewer 
> at least, if not to the commit message quality itself, to say what exactly.

I hope the addendum in the commitmsg was helpful.
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 106+ messages in thread

* Re: [PATCH 12/40] drm/i915/execlists: Assert the queue is non-empty on unsubmitting
  2018-09-19 19:55 ` [PATCH 12/40] drm/i915/execlists: Assert the queue is non-empty on unsubmitting Chris Wilson
@ 2018-09-24  9:07   ` Tvrtko Ursulin
  2018-09-25  7:41     ` Chris Wilson
  0 siblings, 1 reply; 106+ messages in thread
From: Tvrtko Ursulin @ 2018-09-24  9:07 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 19/09/2018 20:55, Chris Wilson wrote:
> In the sequence
> 
> <0>[  531.960431] drv_self-4806    7.... 527402570us : intel_gpu_reset: engine_mask=1, ret=0, retry=0
> <0>[  531.960431] drv_self-4806    7.... 527402571us : execlists_reset: rcs0 request global=115de, current=71133

How such a unbelievably huge delta between the guilty request and hws?

> <0>[  531.960431] drv_self-4806    7d..1 527402571us : execlists_cancel_port_requests: rcs0:port0 global=71134 (fence 826b:198), (current 71133)
> <0>[  531.960431] drv_self-4806    7d..1 527402572us : execlists_cancel_port_requests: rcs0:port1 global=71135 (fence 826c:53), (current 71133)
> <0>[  531.960431] drv_self-4806    7d..1 527402572us : __i915_request_unsubmit: rcs0 fence 826c:53 <- global=71135, current 71133
> <0>[  531.960431] drv_self-4806    7d..1 527402579us : __i915_request_unsubmit: rcs0 fence 826b:198 <- global=71134, current 71133
> <0>[  531.960431] drv_self-4806    7.... 527402613us : intel_engine_cancel_stop_cs: rcs0
> <0>[  531.960431] drv_self-4806    7.... 527402624us : execlists_reset_finish: rcs0
> 
> we are missing the execlists_submission_tasklet() invocation before the
> execlists_reset_fini() implying that either the queue is empty, or we
> failed to schedule and run the tasklet on finish. Add an assert so we
> are sure that on unsubmitting the incomplete request after reset, the
> queue is indeed populated.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>   drivers/gpu/drm/i915/intel_lrc.c | 1 +
>   1 file changed, 1 insertion(+)
> 
> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> index 3edb417caa7b..e8de250c3413 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/intel_lrc.c
> @@ -344,6 +344,7 @@ static void __unwind_incomplete_requests(struct intel_engine_cs *engine)
>   			last_prio = rq_prio(rq);
>   			p = lookup_priolist(engine, last_prio);
>   		}
> +		GEM_BUG_ON(RB_EMPTY_ROOT(&engine->execlists.queue.rb_root));
>   
>   		GEM_BUG_ON(p->priority != rq_prio(rq));
>   		list_add(&rq->sched.link, &p->requests);
> 

Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 106+ messages in thread

* Re: [PATCH 13/40] drm/i915: Reserve some priority bits for internal use
  2018-09-19 19:55 ` [PATCH 13/40] drm/i915: Reserve some priority bits for internal use Chris Wilson
@ 2018-09-24  9:12   ` Tvrtko Ursulin
  0 siblings, 0 replies; 106+ messages in thread
From: Tvrtko Ursulin @ 2018-09-24  9:12 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 19/09/2018 20:55, Chris Wilson wrote:
> In the next few patches, we will want to give a small priority boost to
> some requests/queues but not so much that we perturb the user controlled
> order. As such we shift the user priority bits higher leaving ourselves

Prepare for shifting actually.

Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Regards,

Tvrtko

> a few low priority bits for our bumping.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>   drivers/gpu/drm/i915/i915_drv.h            | 2 +-
>   drivers/gpu/drm/i915/i915_gem_context.c    | 9 +++++----
>   drivers/gpu/drm/i915/i915_scheduler.h      | 6 ++++++
>   drivers/gpu/drm/i915/selftests/intel_lrc.c | 8 +++++---
>   4 files changed, 17 insertions(+), 8 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 7d4daa7412f1..b902bb96e0be 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -3230,7 +3230,7 @@ int i915_gem_object_wait(struct drm_i915_gem_object *obj,
>   int i915_gem_object_wait_priority(struct drm_i915_gem_object *obj,
>   				  unsigned int flags,
>   				  const struct i915_sched_attr *attr);
> -#define I915_PRIORITY_DISPLAY I915_PRIORITY_MAX
> +#define I915_PRIORITY_DISPLAY I915_USER_PRIORITY(I915_PRIORITY_MAX)
>   
>   int __must_check
>   i915_gem_object_set_to_wc_domain(struct drm_i915_gem_object *obj, bool write);
> diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
> index f772593b99ab..150d7a6b2bd3 100644
> --- a/drivers/gpu/drm/i915/i915_gem_context.c
> +++ b/drivers/gpu/drm/i915/i915_gem_context.c
> @@ -337,7 +337,7 @@ __create_hw_context(struct drm_i915_private *dev_priv,
>   	kref_init(&ctx->ref);
>   	list_add_tail(&ctx->link, &dev_priv->contexts.list);
>   	ctx->i915 = dev_priv;
> -	ctx->sched.priority = I915_PRIORITY_NORMAL;
> +	ctx->sched.priority = I915_USER_PRIORITY(I915_PRIORITY_NORMAL);
>   
>   	for (n = 0; n < ARRAY_SIZE(ctx->__engine); n++) {
>   		struct intel_context *ce = &ctx->__engine[n];
> @@ -504,7 +504,7 @@ i915_gem_context_create_kernel(struct drm_i915_private *i915, int prio)
>   	}
>   
>   	i915_gem_context_clear_bannable(ctx);
> -	ctx->sched.priority = prio;
> +	ctx->sched.priority = I915_USER_PRIORITY(prio);
>   	ctx->ring_size = PAGE_SIZE;
>   
>   	GEM_BUG_ON(!i915_gem_context_is_kernel(ctx));
> @@ -879,7 +879,7 @@ int i915_gem_context_getparam_ioctl(struct drm_device *dev, void *data,
>   		args->value = i915_gem_context_is_bannable(ctx);
>   		break;
>   	case I915_CONTEXT_PARAM_PRIORITY:
> -		args->value = ctx->sched.priority;
> +		args->value = ctx->sched.priority >> I915_USER_PRIORITY_SHIFT;
>   		break;
>   	default:
>   		ret = -EINVAL;
> @@ -948,7 +948,8 @@ int i915_gem_context_setparam_ioctl(struct drm_device *dev, void *data,
>   				 !capable(CAP_SYS_NICE))
>   				ret = -EPERM;
>   			else
> -				ctx->sched.priority = priority;
> +				ctx->sched.priority =
> +					I915_USER_PRIORITY(priority);
>   		}
>   		break;
>   
> diff --git a/drivers/gpu/drm/i915/i915_scheduler.h b/drivers/gpu/drm/i915/i915_scheduler.h
> index 70a42220358d..7edfad0abfd7 100644
> --- a/drivers/gpu/drm/i915/i915_scheduler.h
> +++ b/drivers/gpu/drm/i915/i915_scheduler.h
> @@ -19,6 +19,12 @@ enum {
>   	I915_PRIORITY_INVALID = INT_MIN
>   };
>   
> +#define I915_USER_PRIORITY_SHIFT 0
> +#define I915_USER_PRIORITY(x) ((x) << I915_USER_PRIORITY_SHIFT)
> +
> +#define I915_PRIORITY_COUNT BIT(I915_USER_PRIORITY_SHIFT)
> +#define I915_PRIORITY_MASK (-I915_PRIORITY_COUNT)
> +
>   struct i915_sched_attr {
>   	/**
>   	 * @priority: execution and service priority
> diff --git a/drivers/gpu/drm/i915/selftests/intel_lrc.c b/drivers/gpu/drm/i915/selftests/intel_lrc.c
> index 1aea7a8f2224..94ceb5f6c507 100644
> --- a/drivers/gpu/drm/i915/selftests/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/selftests/intel_lrc.c
> @@ -291,12 +291,14 @@ static int live_preempt(void *arg)
>   	ctx_hi = kernel_context(i915);
>   	if (!ctx_hi)
>   		goto err_spin_lo;
> -	ctx_hi->sched.priority = I915_CONTEXT_MAX_USER_PRIORITY;
> +	ctx_hi->sched.priority =
> +		I915_USER_PRIORITY(I915_CONTEXT_MAX_USER_PRIORITY);
>   
>   	ctx_lo = kernel_context(i915);
>   	if (!ctx_lo)
>   		goto err_ctx_hi;
> -	ctx_lo->sched.priority = I915_CONTEXT_MIN_USER_PRIORITY;
> +	ctx_lo->sched.priority =
> +		I915_USER_PRIORITY(I915_CONTEXT_MIN_USER_PRIORITY);
>   
>   	for_each_engine(engine, i915, id) {
>   		struct i915_request *rq;
> @@ -417,7 +419,7 @@ static int live_late_preempt(void *arg)
>   			goto err_wedged;
>   		}
>   
> -		attr.priority = I915_PRIORITY_MAX;
> +		attr.priority = I915_USER_PRIORITY(I915_PRIORITY_MAX);
>   		engine->schedule(rq, &attr);
>   
>   		if (!wait_for_spinner(&spin_hi, rq)) {
> 
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 106+ messages in thread

* Re: [PATCH 14/40] drm/i915: Combine multiple internal plists into the same i915_priolist bucket
  2018-09-19 19:55 ` [PATCH 14/40] drm/i915: Combine multiple internal plists into the same i915_priolist bucket Chris Wilson
@ 2018-09-24 10:25   ` Tvrtko Ursulin
  2018-09-25  7:55     ` Chris Wilson
  0 siblings, 1 reply; 106+ messages in thread
From: Tvrtko Ursulin @ 2018-09-24 10:25 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 19/09/2018 20:55, Chris Wilson wrote:
> As we are about to allow ourselves to slightly bump the user priority
> into a few different sublevels, packthose internal priority lists
> into the same i915_priolist to keep the rbtree compact and avoid having
> to allocate the default user priority even after the internal bumping.
> The downside to having an requests[] rather than a node per active list,
> is that we then have to walk over the empty higher priority lists. To
> compensate, we track the active buckets and use a small bitmap to skip
> over any inactive ones.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>   drivers/gpu/drm/i915/intel_engine_cs.c      |  6 +-
>   drivers/gpu/drm/i915/intel_guc_submission.c | 12 ++-
>   drivers/gpu/drm/i915/intel_lrc.c            | 87 ++++++++++++++-------
>   drivers/gpu/drm/i915/intel_ringbuffer.h     | 13 ++-
>   4 files changed, 80 insertions(+), 38 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c
> index 217ed3ee1cab..83f2f7774c1f 100644
> --- a/drivers/gpu/drm/i915/intel_engine_cs.c
> +++ b/drivers/gpu/drm/i915/intel_engine_cs.c
> @@ -1534,10 +1534,10 @@ void intel_engine_dump(struct intel_engine_cs *engine,
>   	count = 0;
>   	drm_printf(m, "\t\tQueue priority: %d\n", execlists->queue_priority);
>   	for (rb = rb_first_cached(&execlists->queue); rb; rb = rb_next(rb)) {
> -		struct i915_priolist *p =
> -			rb_entry(rb, typeof(*p), node);
> +		struct i915_priolist *p = rb_entry(rb, typeof(*p), node);
> +		int i;
>   
> -		list_for_each_entry(rq, &p->requests, sched.link) {
> +		priolist_for_each_request(rq, p, i) {
>   			if (count++ < MAX_REQUESTS_TO_SHOW - 1)
>   				print_request(m, rq, "\t\tQ ");
>   			else
> diff --git a/drivers/gpu/drm/i915/intel_guc_submission.c b/drivers/gpu/drm/i915/intel_guc_submission.c
> index 6f693ef62c64..8531bd917ec3 100644
> --- a/drivers/gpu/drm/i915/intel_guc_submission.c
> +++ b/drivers/gpu/drm/i915/intel_guc_submission.c
> @@ -726,30 +726,28 @@ static bool __guc_dequeue(struct intel_engine_cs *engine)
>   	while ((rb = rb_first_cached(&execlists->queue))) {
>   		struct i915_priolist *p = to_priolist(rb);
>   		struct i915_request *rq, *rn;
> +		int i;
>   
> -		list_for_each_entry_safe(rq, rn, &p->requests, sched.link) {
> +		priolist_for_each_request_consume(rq, rn, p, i) {

Hm consumed clears the bitmask every time, but we are not certain yet we 
will consume the request.

>   			if (last && rq->hw_context != last->hw_context) {
> -				if (port == last_port) {
> -					__list_del_many(&p->requests,
> -							&rq->sched.link);
> +				if (port == last_port)
>   					goto done;
> -				}
>   
>   				if (submit)
>   					port_assign(port, last);
>   				port++;
>   			}
>   
> -			INIT_LIST_HEAD(&rq->sched.link);
> +			list_del_init(&rq->sched.link);
>   
>   			__i915_request_submit(rq);
>   			trace_i915_request_in(rq, port_index(port, execlists));
> +
>   			last = rq;
>   			submit = true;
>   		}
>   
>   		rb_erase_cached(&p->node, &execlists->queue);
> -		INIT_LIST_HEAD(&p->requests);
>   		if (p->priority != I915_PRIORITY_NORMAL)
>   			kmem_cache_free(engine->i915->priorities, p);
>   	}
> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> index e8de250c3413..aeae82b5223c 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/intel_lrc.c
> @@ -259,14 +259,49 @@ intel_lr_context_descriptor_update(struct i915_gem_context *ctx,
>   	ce->lrc_desc = desc;
>   }
>   
> -static struct i915_priolist *
> +static void assert_priolists(struct intel_engine_execlists * const execlists,
> +			     int queue_priority)
> +{
> +	struct rb_node *rb;
> +	int last_prio, i;
> +
> +	if (!IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
> +		return;
> +
> +	GEM_BUG_ON(rb_first_cached(&execlists->queue) !=
> +		   rb_first(&execlists->queue.rb_root));
> +
> +	last_prio = (queue_priority >> I915_USER_PRIORITY_SHIFT) + 1;
 >
> +	for (rb = rb_first_cached(&execlists->queue); rb; rb = rb_next(rb)) {
> +		struct i915_priolist *p = to_priolist(rb);
> +
> +		GEM_BUG_ON(p->priority >= last_prio);
> +		last_prio = p->priority;
> +
> +		GEM_BUG_ON(!p->used);
> +		for (i = 0; i < ARRAY_SIZE(p->requests); i++) {
> +			if (list_empty(&p->requests[i]))
> +				continue;

Asserting that bitmask slot is not set if list is empty is not interesting?

> +
> +			GEM_BUG_ON(!(p->used & BIT(i)));
> +		}
> +	}

In general, won't this be a tiny bit too expensive on debug build and 
with priority/rq stress tests? To walk absolutely all priority levels 
times bucket count on every lookup and dequeue. Or maybe we don't have 
any tests at the moment which instantiate that many levels.

> +}
> +
> +static struct list_head *
>   lookup_priolist(struct intel_engine_cs *engine, int prio)
>   {
>   	struct intel_engine_execlists * const execlists = &engine->execlists;
>   	struct i915_priolist *p;
>   	struct rb_node **parent, *rb;
>   	bool first = true;
> +	int idx, i;
> +
> +	assert_priolists(execlists, INT_MAX);
>   
> +	/* buckets sorted from highest [in slot 0] to lowest priority */
> +	idx = I915_PRIORITY_COUNT - (prio & ~I915_PRIORITY_MASK) - 1;

0 - (prio & ~0) - 1 = -prio - 1, hm..?

Hm no.. I915_PRIORITY_COUNT with zero reserved internal bits is actually 
1.  So 1 - (prio & 0x0) - 1 = 0, ok phew..

Unintuitive for the count to be one with no reserved bits? Although it 
is only a temporary stage..

> +	prio >>= I915_USER_PRIORITY_SHIFT;
>   	if (unlikely(execlists->no_priolist))
>   		prio = I915_PRIORITY_NORMAL;
>   
> @@ -283,7 +318,7 @@ lookup_priolist(struct intel_engine_cs *engine, int prio)
>   			parent = &rb->rb_right;
>   			first = false;
>   		} else {
> -			return p;
> +			goto out;
>   		}
>   	}
>   
> @@ -309,11 +344,15 @@ lookup_priolist(struct intel_engine_cs *engine, int prio)
>   	}
>   
>   	p->priority = prio;
> -	INIT_LIST_HEAD(&p->requests);
> +	for (i = 0; i < ARRAY_SIZE(p->requests); i++)
> +		INIT_LIST_HEAD(&p->requests[i]);
>   	rb_link_node(&p->node, rb, parent);
>   	rb_insert_color_cached(&p->node, &execlists->queue, first);
> +	p->used = 0;
>   
> -	return p;
> +out:
> +	p->used |= BIT(idx);
> +	return &p->requests[idx];
>   }
>   
>   static void unwind_wa_tail(struct i915_request *rq)
> @@ -325,7 +364,7 @@ static void unwind_wa_tail(struct i915_request *rq)
>   static void __unwind_incomplete_requests(struct intel_engine_cs *engine)
>   {
>   	struct i915_request *rq, *rn;
> -	struct i915_priolist *uninitialized_var(p);
> +	struct list_head *uninitialized_var(pl);
>   	int last_prio = I915_PRIORITY_INVALID;
>   
>   	lockdep_assert_held(&engine->timeline.lock);
> @@ -342,12 +381,11 @@ static void __unwind_incomplete_requests(struct intel_engine_cs *engine)
>   		GEM_BUG_ON(rq_prio(rq) == I915_PRIORITY_INVALID);
>   		if (rq_prio(rq) != last_prio) {
>   			last_prio = rq_prio(rq);
> -			p = lookup_priolist(engine, last_prio);
> +			pl = lookup_priolist(engine, last_prio);
>   		}
>   		GEM_BUG_ON(RB_EMPTY_ROOT(&engine->execlists.queue.rb_root));
>   
> -		GEM_BUG_ON(p->priority != rq_prio(rq));
> -		list_add(&rq->sched.link, &p->requests);
> +		list_add(&rq->sched.link, pl);
>   	}
>   }
>   
> @@ -665,8 +703,9 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
>   	while ((rb = rb_first_cached(&execlists->queue))) {
>   		struct i915_priolist *p = to_priolist(rb);
>   		struct i915_request *rq, *rn;
> +		int i;
>   
> -		list_for_each_entry_safe(rq, rn, &p->requests, sched.link) {
> +		priolist_for_each_request_consume(rq, rn, p, i) {
>   			/*
>   			 * Can we combine this request with the current port?
>   			 * It has to be the same context/ringbuffer and not
> @@ -685,11 +724,8 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
>   				 * combine this request with the last, then we
>   				 * are done.
>   				 */
> -				if (port == last_port) {
> -					__list_del_many(&p->requests,
> -							&rq->sched.link);
> +				if (port == last_port)
>   					goto done;
> -				}
>   
>   				/*
>   				 * If GVT overrides us we only ever submit
> @@ -699,11 +735,8 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
>   				 * request) to the second port.
>   				 */
>   				if (ctx_single_port_submission(last->hw_context) ||
> -				    ctx_single_port_submission(rq->hw_context)) {
> -					__list_del_many(&p->requests,
> -							&rq->sched.link);
> +				    ctx_single_port_submission(rq->hw_context))
>   					goto done;
> -				}
>   
>   				GEM_BUG_ON(last->hw_context == rq->hw_context);
>   
> @@ -714,15 +747,16 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
>   				GEM_BUG_ON(port_isset(port));
>   			}
>   
> -			INIT_LIST_HEAD(&rq->sched.link);
> +			list_del_init(&rq->sched.link);
> +
>   			__i915_request_submit(rq);
>   			trace_i915_request_in(rq, port_index(port, execlists));
> +
>   			last = rq;
>   			submit = true;
>   		}
>   
>   		rb_erase_cached(&p->node, &execlists->queue);
> -		INIT_LIST_HEAD(&p->requests);
>   		if (p->priority != I915_PRIORITY_NORMAL)
>   			kmem_cache_free(engine->i915->priorities, p);
>   	}
> @@ -746,6 +780,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
>   	 */
>   	execlists->queue_priority =
>   		port != execlists->port ? rq_prio(last) : INT_MIN;
> +	assert_priolists(execlists, execlists->queue_priority);
>   
>   	if (submit) {
>   		port_assign(port, last);
> @@ -857,16 +892,16 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine)
>   	/* Flush the queued requests to the timeline list (for retiring). */
>   	while ((rb = rb_first_cached(&execlists->queue))) {
>   		struct i915_priolist *p = to_priolist(rb);
> +		int i;
>   
> -		list_for_each_entry_safe(rq, rn, &p->requests, sched.link) {
> -			INIT_LIST_HEAD(&rq->sched.link);
> +		priolist_for_each_request_consume(rq, rn, p, i) {
> +			list_del_init(&rq->sched.link);
>   
>   			dma_fence_set_error(&rq->fence, -EIO);
>   			__i915_request_submit(rq);
>   		}
>   
>   		rb_erase_cached(&p->node, &execlists->queue);
> -		INIT_LIST_HEAD(&p->requests);
>   		if (p->priority != I915_PRIORITY_NORMAL)
>   			kmem_cache_free(engine->i915->priorities, p);
>   	}
> @@ -1072,8 +1107,7 @@ static void queue_request(struct intel_engine_cs *engine,
>   			  struct i915_sched_node *node,
>   			  int prio)
>   {
> -	list_add_tail(&node->link,
> -		      &lookup_priolist(engine, prio)->requests);
> +	list_add_tail(&node->link, lookup_priolist(engine, prio));
>   }
>   
>   static void __update_queue(struct intel_engine_cs *engine, int prio)
> @@ -1143,7 +1177,7 @@ sched_lock_engine(struct i915_sched_node *node, struct intel_engine_cs *locked)
>   static void execlists_schedule(struct i915_request *request,
>   			       const struct i915_sched_attr *attr)
>   {
> -	struct i915_priolist *uninitialized_var(pl);
> +	struct list_head *uninitialized_var(pl);
>   	struct intel_engine_cs *engine, *last;
>   	struct i915_dependency *dep, *p;
>   	struct i915_dependency stack;
> @@ -1242,8 +1276,7 @@ static void execlists_schedule(struct i915_request *request,
>   				pl = lookup_priolist(engine, prio);
>   				last = engine;
>   			}
> -			GEM_BUG_ON(pl->priority != prio);
> -			list_move_tail(&node->link, &pl->requests);
> +			list_move_tail(&node->link, pl);
>   		} else {
>   			/*
>   			 * If the request is not in the priolist queue because
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
> index 2dfa585712c2..1534de5bb852 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.h
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
> @@ -190,11 +190,22 @@ enum intel_engine_id {
>   };
>   
>   struct i915_priolist {
> +	struct list_head requests[I915_PRIORITY_COUNT];
>   	struct rb_node node;
> -	struct list_head requests;
> +	unsigned long used;
>   	int priority;
>   };
>   
> +#define priolist_for_each_request(it, plist, idx) \
> +	for (idx = 0; idx < ARRAY_SIZE((plist)->requests); idx++) \
> +		list_for_each_entry(it, &(plist)->requests[idx], sched.link)
> +
> +#define priolist_for_each_request_consume(it, n, plist, idx) \
> +	for (; (idx = ffs((plist)->used)); (plist)->used &= ~BIT(idx - 1)) \
> +		list_for_each_entry_safe(it, n, \
> +					 &(plist)->requests[idx - 1], \
> +					 sched.link)
> +
>   struct st_preempt_hang {
>   	struct completion completion;
>   	bool inject_hang;
> 

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 106+ messages in thread

* Re: [PATCH 15/40] drm/i915: Priority boost for new clients
  2018-09-19 19:55 ` [PATCH 15/40] drm/i915: Priority boost for new clients Chris Wilson
@ 2018-09-24 10:29   ` Tvrtko Ursulin
  2018-09-25  8:01     ` Chris Wilson
  0 siblings, 1 reply; 106+ messages in thread
From: Tvrtko Ursulin @ 2018-09-24 10:29 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 19/09/2018 20:55, Chris Wilson wrote:
> Taken from an idea used for FQ_CODEL, we give the first request of a
> new request flows a small priority boost. These flows are likely to
> correspond with short, interactive tasks and so be more latency sensitive
> than the longer free running queues. As soon as the client has more than
> one request in the queue, further requests are not boosted and it settles
> down into ordinary steady state behaviour.  Such small kicks dramatically
> help combat the starvation issue, by allowing each client the opportunity
> to run even when the system is under heavy throughput load (within the
> constraints of the user selected priority).
> 
> v2: Mark the preempted request as the start of a new flow, to prevent a
> single client being continually gazumped by its peers.
> 
> Testcase: igt/benchmarks/rrul
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
> ---
>   drivers/gpu/drm/i915/i915_request.c   | 16 ++++++++++++++--
>   drivers/gpu/drm/i915/i915_scheduler.h |  4 +++-
>   drivers/gpu/drm/i915/intel_lrc.c      | 25 +++++++++++++++++++------
>   3 files changed, 36 insertions(+), 9 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
> index a492385b2089..56140ca054e8 100644
> --- a/drivers/gpu/drm/i915/i915_request.c
> +++ b/drivers/gpu/drm/i915/i915_request.c
> @@ -1127,8 +1127,20 @@ void i915_request_add(struct i915_request *request)
>   	 */
>   	local_bh_disable();
>   	rcu_read_lock(); /* RCU serialisation for set-wedged protection */
> -	if (engine->schedule)
> -		engine->schedule(request, &request->gem_context->sched);
> +	if (engine->schedule) {
> +		struct i915_sched_attr attr = request->gem_context->sched;
> +
> +		/*
> +		 * Boost priorities to new clients (new request flows).
> +		 *
> +		 * Allow interactive/synchronous clients to jump ahead of
> +		 * the bulk clients. (FQ_CODEL)
> +		 */
> +		if (!prev || i915_request_completed(prev))
> +			attr.priority |= I915_PRIORITY_NEWCLIENT;
> +
> +		engine->schedule(request, &attr);
> +	}
>   	rcu_read_unlock();
>   	i915_sw_fence_commit(&request->submit);
>   	local_bh_enable(); /* Kick the execlists tasklet if just scheduled */
> diff --git a/drivers/gpu/drm/i915/i915_scheduler.h b/drivers/gpu/drm/i915/i915_scheduler.h
> index 7edfad0abfd7..93e43e263d8c 100644
> --- a/drivers/gpu/drm/i915/i915_scheduler.h
> +++ b/drivers/gpu/drm/i915/i915_scheduler.h
> @@ -19,12 +19,14 @@ enum {
>   	I915_PRIORITY_INVALID = INT_MIN
>   };
>   
> -#define I915_USER_PRIORITY_SHIFT 0
> +#define I915_USER_PRIORITY_SHIFT 1
>   #define I915_USER_PRIORITY(x) ((x) << I915_USER_PRIORITY_SHIFT)
>   
>   #define I915_PRIORITY_COUNT BIT(I915_USER_PRIORITY_SHIFT)
>   #define I915_PRIORITY_MASK (-I915_PRIORITY_COUNT)
>   
> +#define I915_PRIORITY_NEWCLIENT	((u8)BIT(0))

Is the cast important and why?

> +
>   struct i915_sched_attr {
>   	/**
>   	 * @priority: execution and service priority
> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> index aeae82b5223c..ee9a656e549c 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/intel_lrc.c
> @@ -363,9 +363,9 @@ static void unwind_wa_tail(struct i915_request *rq)
>   
>   static void __unwind_incomplete_requests(struct intel_engine_cs *engine)
>   {
> -	struct i915_request *rq, *rn;
> +	struct i915_request *rq, *rn, *active = NULL;
>   	struct list_head *uninitialized_var(pl);
> -	int last_prio = I915_PRIORITY_INVALID;
> +	int prio = I915_PRIORITY_INVALID | I915_PRIORITY_NEWCLIENT;
>   
>   	lockdep_assert_held(&engine->timeline.lock);
>   
> @@ -373,19 +373,32 @@ static void __unwind_incomplete_requests(struct intel_engine_cs *engine)
>   					 &engine->timeline.requests,
>   					 link) {
>   		if (i915_request_completed(rq))
> -			return;
> +			break;
>   
>   		__i915_request_unsubmit(rq);
>   		unwind_wa_tail(rq);
>   
>   		GEM_BUG_ON(rq_prio(rq) == I915_PRIORITY_INVALID);
> -		if (rq_prio(rq) != last_prio) {
> -			last_prio = rq_prio(rq);
> -			pl = lookup_priolist(engine, last_prio);
> +		if (rq_prio(rq) != prio) {
> +			prio = rq_prio(rq);
> +			pl = lookup_priolist(engine, prio);
>   		}
>   		GEM_BUG_ON(RB_EMPTY_ROOT(&engine->execlists.queue.rb_root));
>   
>   		list_add(&rq->sched.link, pl);
> +
> +		active = rq;
> +	}
> +
> +	/*
> +	 * The active request is now effectively the start of a new client
> +	 * stream, so give it the equivalent small priority bump to prevent
> +	 * it being gazumped a second time by another peer.
> +	 */
> +	if (!(prio & I915_PRIORITY_NEWCLIENT)) {
> +		prio |= I915_PRIORITY_NEWCLIENT;
> +		list_move_tail(&active->sched.link,
> +			       lookup_priolist(engine, prio));
>   	}
>   }
>   
> 

Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 106+ messages in thread

* Re: [PATCH 16/40] drm/i915: Pull scheduling under standalone lock
  2018-09-19 19:55 ` [PATCH 16/40] drm/i915: Pull scheduling under standalone lock Chris Wilson
@ 2018-09-24 11:19   ` Tvrtko Ursulin
  2018-09-25  8:19     ` Chris Wilson
  0 siblings, 1 reply; 106+ messages in thread
From: Tvrtko Ursulin @ 2018-09-24 11:19 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 19/09/2018 20:55, Chris Wilson wrote:
> Currently, the backend scheduling code abuses struct_mutex into order to
> have a global lock to manipulate a temporary list (without widespread
> allocation) and to protect against list modifications. This is an
> extraneous coupling to struct_mutex and further can not extend beyond
> the local device.
> 
> Pull all the code that needs to be under the one true lock into
> i915_scheduler.c, and make it so.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>   drivers/gpu/drm/i915/Makefile           |   1 +
>   drivers/gpu/drm/i915/i915_request.c     |  85 ------
>   drivers/gpu/drm/i915/i915_request.h     |   8 -
>   drivers/gpu/drm/i915/i915_scheduler.c   | 377 ++++++++++++++++++++++++
>   drivers/gpu/drm/i915/i915_scheduler.h   |  25 ++
>   drivers/gpu/drm/i915/intel_display.c    |   3 +-
>   drivers/gpu/drm/i915/intel_lrc.c        | 268 +----------------
>   drivers/gpu/drm/i915/intel_ringbuffer.h |   5 +-
>   8 files changed, 411 insertions(+), 361 deletions(-)
>   create mode 100644 drivers/gpu/drm/i915/i915_scheduler.c
> 
> diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
> index 5794f102f9b8..ef1480c14e4e 100644
> --- a/drivers/gpu/drm/i915/Makefile
> +++ b/drivers/gpu/drm/i915/Makefile
> @@ -75,6 +75,7 @@ i915-y += i915_cmd_parser.o \
>   	  i915_gemfs.o \
>   	  i915_query.o \
>   	  i915_request.o \
> +	  i915_scheduler.o \
>   	  i915_timeline.o \
>   	  i915_trace_points.o \
>   	  i915_vma.o \
> diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
> index 56140ca054e8..d73ad490a261 100644
> --- a/drivers/gpu/drm/i915/i915_request.c
> +++ b/drivers/gpu/drm/i915/i915_request.c
> @@ -111,91 +111,6 @@ i915_request_remove_from_client(struct i915_request *request)
>   	spin_unlock(&file_priv->mm.lock);
>   }
>   
> -static struct i915_dependency *
> -i915_dependency_alloc(struct drm_i915_private *i915)
> -{
> -	return kmem_cache_alloc(i915->dependencies, GFP_KERNEL);
> -}
> -
> -static void
> -i915_dependency_free(struct drm_i915_private *i915,
> -		     struct i915_dependency *dep)
> -{
> -	kmem_cache_free(i915->dependencies, dep);
> -}
> -
> -static void
> -__i915_sched_node_add_dependency(struct i915_sched_node *node,
> -				 struct i915_sched_node *signal,
> -				 struct i915_dependency *dep,
> -				 unsigned long flags)
> -{
> -	INIT_LIST_HEAD(&dep->dfs_link);
> -	list_add(&dep->wait_link, &signal->waiters_list);
> -	list_add(&dep->signal_link, &node->signalers_list);
> -	dep->signaler = signal;
> -	dep->flags = flags;
> -}
> -
> -static int
> -i915_sched_node_add_dependency(struct drm_i915_private *i915,
> -			       struct i915_sched_node *node,
> -			       struct i915_sched_node *signal)
> -{
> -	struct i915_dependency *dep;
> -
> -	dep = i915_dependency_alloc(i915);
> -	if (!dep)
> -		return -ENOMEM;
> -
> -	__i915_sched_node_add_dependency(node, signal, dep,
> -					 I915_DEPENDENCY_ALLOC);
> -	return 0;
> -}
> -
> -static void
> -i915_sched_node_fini(struct drm_i915_private *i915,
> -		     struct i915_sched_node *node)
> -{
> -	struct i915_dependency *dep, *tmp;
> -
> -	GEM_BUG_ON(!list_empty(&node->link));
> -
> -	/*
> -	 * Everyone we depended upon (the fences we wait to be signaled)
> -	 * should retire before us and remove themselves from our list.
> -	 * However, retirement is run independently on each timeline and
> -	 * so we may be called out-of-order.
> -	 */
> -	list_for_each_entry_safe(dep, tmp, &node->signalers_list, signal_link) {
> -		GEM_BUG_ON(!i915_sched_node_signaled(dep->signaler));
> -		GEM_BUG_ON(!list_empty(&dep->dfs_link));
> -
> -		list_del(&dep->wait_link);
> -		if (dep->flags & I915_DEPENDENCY_ALLOC)
> -			i915_dependency_free(i915, dep);
> -	}
> -
> -	/* Remove ourselves from everyone who depends upon us */
> -	list_for_each_entry_safe(dep, tmp, &node->waiters_list, wait_link) {
> -		GEM_BUG_ON(dep->signaler != node);
> -		GEM_BUG_ON(!list_empty(&dep->dfs_link));
> -
> -		list_del(&dep->signal_link);
> -		if (dep->flags & I915_DEPENDENCY_ALLOC)
> -			i915_dependency_free(i915, dep);
> -	}
> -}
> -
> -static void
> -i915_sched_node_init(struct i915_sched_node *node)
> -{
> -	INIT_LIST_HEAD(&node->signalers_list);
> -	INIT_LIST_HEAD(&node->waiters_list);
> -	INIT_LIST_HEAD(&node->link);
> -	node->attr.priority = I915_PRIORITY_INVALID;
> -}
> -
>   static int reset_all_global_seqno(struct drm_i915_private *i915, u32 seqno)
>   {
>   	struct intel_engine_cs *engine;
> diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h
> index 7fa94b024968..5f7361e0fca6 100644
> --- a/drivers/gpu/drm/i915/i915_request.h
> +++ b/drivers/gpu/drm/i915/i915_request.h
> @@ -332,14 +332,6 @@ static inline bool i915_request_completed(const struct i915_request *rq)
>   	return __i915_request_completed(rq, seqno);
>   }
>   
> -static inline bool i915_sched_node_signaled(const struct i915_sched_node *node)
> -{
> -	const struct i915_request *rq =
> -		container_of(node, const struct i915_request, sched);
> -
> -	return i915_request_completed(rq);
> -}
> -
>   void i915_retire_requests(struct drm_i915_private *i915);
>   
>   /*
> diff --git a/drivers/gpu/drm/i915/i915_scheduler.c b/drivers/gpu/drm/i915/i915_scheduler.c
> new file mode 100644
> index 000000000000..910ac7089596
> --- /dev/null
> +++ b/drivers/gpu/drm/i915/i915_scheduler.c
> @@ -0,0 +1,377 @@
> +/*
> + * SPDX-License-Identifier: MIT
> + *
> + * Copyright © 2018 Intel Corporation
> + */
> +
> +#include <linux/mutex.h>
> +
> +#include "i915_drv.h"
> +#include "i915_request.h"
> +#include "i915_scheduler.h"
> +
> +static DEFINE_SPINLOCK(schedule_lock);

Any good excuses to not put it into device private straight away?

> +
> +static const struct i915_request *
> +node_to_request(const struct i915_sched_node *node)
> +{
> +	return container_of(node, const struct i915_request, sched);
> +}
> +
> +static inline bool node_signaled(const struct i915_sched_node *node)
> +{
> +	return i915_request_completed(node_to_request(node));
> +}
> +
> +void i915_sched_node_init(struct i915_sched_node *node)
> +{
> +	INIT_LIST_HEAD(&node->signalers_list);
> +	INIT_LIST_HEAD(&node->waiters_list);
> +	INIT_LIST_HEAD(&node->link);
> +	node->attr.priority = I915_PRIORITY_INVALID;
> +}
> +
> +static struct i915_dependency *
> +i915_dependency_alloc(struct drm_i915_private *i915)
> +{
> +	return kmem_cache_alloc(i915->dependencies, GFP_KERNEL);
> +}
> +
> +static void
> +i915_dependency_free(struct drm_i915_private *i915,
> +		     struct i915_dependency *dep)
> +{
> +	kmem_cache_free(i915->dependencies, dep);
> +}
> +
> +bool __i915_sched_node_add_dependency(struct i915_sched_node *node,
> +				      struct i915_sched_node *signal,
> +				      struct i915_dependency *dep,
> +				      unsigned long flags)
> +{
> +	bool ret = false;
> +
> +	spin_lock(&schedule_lock);
> +
> +	if (!node_signaled(signal)) {
> +		INIT_LIST_HEAD(&dep->dfs_link);
> +		list_add(&dep->wait_link, &signal->waiters_list);
> +		list_add(&dep->signal_link, &node->signalers_list);
> +		dep->signaler = signal;
> +		dep->flags = flags;
> +
> +		ret = true;
> +	}
> +
> +	spin_unlock(&schedule_lock);
> +
> +	return ret;
> +}
> +
> +int i915_sched_node_add_dependency(struct drm_i915_private *i915,
> +				   struct i915_sched_node *node,
> +				   struct i915_sched_node *signal)
> +{
> +	struct i915_dependency *dep;
> +
> +	dep = i915_dependency_alloc(i915);
> +	if (!dep)
> +		return -ENOMEM;
> +
> +	if (!__i915_sched_node_add_dependency(node, signal, dep,
> +					      I915_DEPENDENCY_ALLOC))
> +		i915_dependency_free(i915, dep);
> +
> +	return 0;
> +}
> +
> +void i915_sched_node_fini(struct drm_i915_private *i915,
> +			  struct i915_sched_node *node)
> +{
> +	struct i915_dependency *dep, *tmp;
> +
> +	GEM_BUG_ON(!list_empty(&node->link));
> +
> +	spin_lock(&schedule_lock);
> +
> +	/*
> +	 * Everyone we depended upon (the fences we wait to be signaled)
> +	 * should retire before us and remove themselves from our list.
> +	 * However, retirement is run independently on each timeline and
> +	 * so we may be called out-of-order.
> +	 */
> +	list_for_each_entry_safe(dep, tmp, &node->signalers_list, signal_link) {
> +		GEM_BUG_ON(!node_signaled(dep->signaler));
> +		GEM_BUG_ON(!list_empty(&dep->dfs_link));
> +
> +		list_del(&dep->wait_link);
> +		if (dep->flags & I915_DEPENDENCY_ALLOC)
> +			i915_dependency_free(i915, dep);
> +	}
> +
> +	/* Remove ourselves from everyone who depends upon us */
> +	list_for_each_entry_safe(dep, tmp, &node->waiters_list, wait_link) {
> +		GEM_BUG_ON(dep->signaler != node);
> +		GEM_BUG_ON(!list_empty(&dep->dfs_link));
> +
> +		list_del(&dep->signal_link);
> +		if (dep->flags & I915_DEPENDENCY_ALLOC)
> +			i915_dependency_free(i915, dep);
> +	}
> +
> +	spin_unlock(&schedule_lock);
> +}
> +
> +static inline struct i915_priolist *to_priolist(struct rb_node *rb)
> +{
> +	return rb_entry(rb, struct i915_priolist, node);
> +}
> +
> +static void assert_priolists(struct intel_engine_execlists * const execlists,
> +			     int queue_priority)
> +{
> +	struct rb_node *rb;
> +	int last_prio, i;
> +
> +	if (!IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
> +		return;
> +
> +	GEM_BUG_ON(rb_first_cached(&execlists->queue) !=
> +		   rb_first(&execlists->queue.rb_root));
> +
> +	last_prio = (queue_priority >> I915_USER_PRIORITY_SHIFT) + 1;
> +	for (rb = rb_first_cached(&execlists->queue); rb; rb = rb_next(rb)) {
> +		const struct i915_priolist *p = to_priolist(rb);
> +
> +		GEM_BUG_ON(p->priority >= last_prio);
> +		last_prio = p->priority;
> +
> +		GEM_BUG_ON(!p->used);
> +		for (i = 0; i < ARRAY_SIZE(p->requests); i++) {
> +			if (list_empty(&p->requests[i]))
> +				continue;
> +
> +			GEM_BUG_ON(!(p->used & BIT(i)));
> +		}
> +	}
> +}
> +
> +struct list_head *
> +i915_sched_lookup_priolist(struct intel_engine_cs *engine, int prio)
> +{
> +	struct intel_engine_execlists * const execlists = &engine->execlists;
> +	struct i915_priolist *p;
> +	struct rb_node **parent, *rb;
> +	bool first = true;
> +	int idx, i;
> +
> +	lockdep_assert_held(&engine->timeline.lock);
> +	assert_priolists(execlists, INT_MAX);
> +
> +	/* buckets sorted from highest [in slot 0] to lowest priority */
> +	idx = I915_PRIORITY_COUNT - (prio & ~I915_PRIORITY_MASK) - 1;
> +	prio >>= I915_USER_PRIORITY_SHIFT;
> +	if (unlikely(execlists->no_priolist))
> +		prio = I915_PRIORITY_NORMAL;
> +
> +find_priolist:
> +	/* most positive priority is scheduled first, equal priorities fifo */
> +	rb = NULL;
> +	parent = &execlists->queue.rb_root.rb_node;
> +	while (*parent) {
> +		rb = *parent;
> +		p = to_priolist(rb);
> +		if (prio > p->priority) {
> +			parent = &rb->rb_left;
> +		} else if (prio < p->priority) {
> +			parent = &rb->rb_right;
> +			first = false;
> +		} else {
> +			goto out;
> +		}
> +	}
> +
> +	if (prio == I915_PRIORITY_NORMAL) {
> +		p = &execlists->default_priolist;
> +	} else {
> +		p = kmem_cache_alloc(engine->i915->priorities, GFP_ATOMIC);
> +		/* Convert an allocation failure to a priority bump */
> +		if (unlikely(!p)) {
> +			prio = I915_PRIORITY_NORMAL; /* recurses just once */
> +
> +			/* To maintain ordering with all rendering, after an
> +			 * allocation failure we have to disable all scheduling.
> +			 * Requests will then be executed in fifo, and schedule
> +			 * will ensure that dependencies are emitted in fifo.
> +			 * There will be still some reordering with existing
> +			 * requests, so if userspace lied about their
> +			 * dependencies that reordering may be visible.
> +			 */
> +			execlists->no_priolist = true;
> +			goto find_priolist;
> +		}
> +	}
> +
> +	p->priority = prio;
> +	for (i = 0; i < ARRAY_SIZE(p->requests); i++)
> +		INIT_LIST_HEAD(&p->requests[i]);
> +	rb_link_node(&p->node, rb, parent);
> +	rb_insert_color_cached(&p->node, &execlists->queue, first);
> +	p->used = 0;
> +
> +out:
> +	p->used |= BIT(idx);
> +	return &p->requests[idx];
> +}
> +
> +static struct intel_engine_cs *
> +sched_lock_engine(struct i915_sched_node *node, struct intel_engine_cs *locked)
> +{
> +	struct intel_engine_cs *engine = node_to_request(node)->engine;
> +
> +	GEM_BUG_ON(!locked);
> +
> +	if (engine != locked) {
> +		spin_unlock(&locked->timeline.lock);
> +		spin_lock(&engine->timeline.lock);
> +	}
> +
> +	return engine;
> +}
> +
> +void i915_schedule(struct i915_request *rq, const struct i915_sched_attr *attr)
> +{
> +	struct list_head *uninitialized_var(pl);
> +	struct intel_engine_cs *engine, *last;
> +	struct i915_dependency *dep, *p;
> +	struct i915_dependency stack;
> +	const int prio = attr->priority;
> +	LIST_HEAD(dfs);
> +
> +	GEM_BUG_ON(prio == I915_PRIORITY_INVALID);
> +
> +	if (i915_request_completed(rq))
> +		return;
> +
> +	if (prio <= READ_ONCE(rq->sched.attr.priority))
> +		return;
> +
> +	/* Needed in order to use the temporary link inside i915_dependency */
> +	spin_lock(&schedule_lock);
> +
> +	stack.signaler = &rq->sched;
> +	list_add(&stack.dfs_link, &dfs);
> +
> +	/*
> +	 * Recursively bump all dependent priorities to match the new request.
> +	 *
> +	 * A naive approach would be to use recursion:
> +	 * static void update_priorities(struct i915_sched_node *node, prio) {
> +	 *	list_for_each_entry(dep, &node->signalers_list, signal_link)
> +	 *		update_priorities(dep->signal, prio)
> +	 *	queue_request(node);
> +	 * }
> +	 * but that may have unlimited recursion depth and so runs a very
> +	 * real risk of overunning the kernel stack. Instead, we build
> +	 * a flat list of all dependencies starting with the current request.
> +	 * As we walk the list of dependencies, we add all of its dependencies
> +	 * to the end of the list (this may include an already visited
> +	 * request) and continue to walk onwards onto the new dependencies. The
> +	 * end result is a topological list of requests in reverse order, the
> +	 * last element in the list is the request we must execute first.
> +	 */
> +	list_for_each_entry(dep, &dfs, dfs_link) {
> +		struct i915_sched_node *node = dep->signaler;
> +
> +		/*
> +		 * Within an engine, there can be no cycle, but we may
> +		 * refer to the same dependency chain multiple times
> +		 * (redundant dependencies are not eliminated) and across
> +		 * engines.
> +		 */
> +		list_for_each_entry(p, &node->signalers_list, signal_link) {
> +			GEM_BUG_ON(p == dep); /* no cycles! */
> +
> +			if (node_signaled(p->signaler))
> +				continue;
> +
> +			GEM_BUG_ON(p->signaler->attr.priority < node->attr.priority);
> +			if (prio > READ_ONCE(p->signaler->attr.priority))
> +				list_move_tail(&p->dfs_link, &dfs);
> +		}
> +	}
> +
> +	/*
> +	 * If we didn't need to bump any existing priorities, and we haven't
> +	 * yet submitted this request (i.e. there is no potential race with
> +	 * execlists_submit_request()), we can set our own priority and skip
> +	 * acquiring the engine locks.
> +	 */
> +	if (rq->sched.attr.priority == I915_PRIORITY_INVALID) {
> +		GEM_BUG_ON(!list_empty(&rq->sched.link));
> +		rq->sched.attr = *attr;
> +
> +		if (stack.dfs_link.next == stack.dfs_link.prev)
> +			goto out_unlock;
> +
> +		__list_del_entry(&stack.dfs_link);
> +	}
> +
> +	last = NULL;
> +	engine = rq->engine;
> +	spin_lock_irq(&engine->timeline.lock);
> +
> +	/* Fifo and depth-first replacement ensure our deps execute before us */
> +	list_for_each_entry_safe_reverse(dep, p, &dfs, dfs_link) {
> +		struct i915_sched_node *node = dep->signaler;
> +
> +		INIT_LIST_HEAD(&dep->dfs_link);
> +
> +		engine = sched_lock_engine(node, engine);
> +
> +		/* Recheck after acquiring the engine->timeline.lock */
> +		if (prio <= node->attr.priority || node_signaled(node))
> +			continue;
> +
> +		node->attr.priority = prio;
> +		if (!list_empty(&node->link)) {
> +			if (last != engine) {
> +				pl = i915_sched_lookup_priolist(engine, prio);
> +				last = engine;
> +			}
> +			list_move_tail(&node->link, pl);
> +		} else {
> +			/*
> +			 * If the request is not in the priolist queue because
> +			 * it is not yet runnable, then it doesn't contribute
> +			 * to our preemption decisions. On the other hand,
> +			 * if the request is on the HW, it too is not in the
> +			 * queue; but in that case we may still need to reorder
> +			 * the inflight requests.
> +			 */
> +			if (!i915_sw_fence_done(&node_to_request(node)->submit))
> +				continue;
> +		}
> +
> +		if (prio <= engine->execlists.queue_priority)
> +			continue;
> +
> +		/*
> +		 * If we are already the currently executing context, don't
> +		 * bother evaluating if we should preempt ourselves.
> +		 */
> +		if (node_to_request(node)->global_seqno &&
> +		    i915_seqno_passed(port_request(engine->execlists.port)->global_seqno,
> +				      node_to_request(node)->global_seqno))
> +			continue;
> +
> +		/* Defer (tasklet) submission until after all of our updates. */
> +		engine->execlists.queue_priority = prio;
> +		tasklet_hi_schedule(&engine->execlists.tasklet);
> +	}
> +
> +	spin_unlock_irq(&engine->timeline.lock);
> +
> +out_unlock:
> +	spin_unlock(&schedule_lock);
> +}
> diff --git a/drivers/gpu/drm/i915/i915_scheduler.h b/drivers/gpu/drm/i915/i915_scheduler.h
> index 93e43e263d8c..8058c17ae96a 100644
> --- a/drivers/gpu/drm/i915/i915_scheduler.h
> +++ b/drivers/gpu/drm/i915/i915_scheduler.h
> @@ -8,9 +8,14 @@
>   #define _I915_SCHEDULER_H_
>   
>   #include <linux/bitops.h>
> +#include <linux/kernel.h>
>   
>   #include <uapi/drm/i915_drm.h>
>   
> +struct drm_i915_private;
> +struct i915_request;
> +struct intel_engine_cs;
> +
>   enum {
>   	I915_PRIORITY_MIN = I915_CONTEXT_MIN_USER_PRIORITY - 1,
>   	I915_PRIORITY_NORMAL = I915_CONTEXT_DEFAULT_PRIORITY,
> @@ -77,4 +82,24 @@ struct i915_dependency {
>   #define I915_DEPENDENCY_ALLOC BIT(0)
>   };
>   
> +void i915_sched_node_init(struct i915_sched_node *node);
> +
> +bool __i915_sched_node_add_dependency(struct i915_sched_node *node,
> +				      struct i915_sched_node *signal,
> +				      struct i915_dependency *dep,
> +				      unsigned long flags);
> +
> +int i915_sched_node_add_dependency(struct drm_i915_private *i915,
> +				   struct i915_sched_node *node,
> +				   struct i915_sched_node *signal);
> +
> +void i915_sched_node_fini(struct drm_i915_private *i915,
> +			  struct i915_sched_node *node);
> +
> +void i915_schedule(struct i915_request *request,
> +		   const struct i915_sched_attr *attr);
> +
> +struct list_head *
> +i915_sched_lookup_priolist(struct intel_engine_cs *engine, int prio);
> +
>   #endif /* _I915_SCHEDULER_H_ */
> diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
> index fbcc56caffb6..31a3fdf8f051 100644
> --- a/drivers/gpu/drm/i915/intel_display.c
> +++ b/drivers/gpu/drm/i915/intel_display.c
> @@ -13169,13 +13169,12 @@ intel_prepare_plane_fb(struct drm_plane *plane,
>   
>   	ret = intel_plane_pin_fb(to_intel_plane_state(new_state));
>   
> -	fb_obj_bump_render_priority(obj);
> -
>   	mutex_unlock(&dev_priv->drm.struct_mutex);
>   	i915_gem_object_unpin_pages(obj);
>   	if (ret)
>   		return ret;
>   
> +	fb_obj_bump_render_priority(obj);
>   	intel_fb_obj_flush(obj, ORIGIN_DIRTYFB);
>   
>   	if (!new_state->fence) { /* implicit fencing */
> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> index ee9a656e549c..74be9a49ef9e 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/intel_lrc.c
> @@ -259,102 +259,6 @@ intel_lr_context_descriptor_update(struct i915_gem_context *ctx,
>   	ce->lrc_desc = desc;
>   }
>   
> -static void assert_priolists(struct intel_engine_execlists * const execlists,
> -			     int queue_priority)
> -{
> -	struct rb_node *rb;
> -	int last_prio, i;
> -
> -	if (!IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
> -		return;
> -
> -	GEM_BUG_ON(rb_first_cached(&execlists->queue) !=
> -		   rb_first(&execlists->queue.rb_root));
> -
> -	last_prio = (queue_priority >> I915_USER_PRIORITY_SHIFT) + 1;
> -	for (rb = rb_first_cached(&execlists->queue); rb; rb = rb_next(rb)) {
> -		struct i915_priolist *p = to_priolist(rb);
> -
> -		GEM_BUG_ON(p->priority >= last_prio);
> -		last_prio = p->priority;
> -
> -		GEM_BUG_ON(!p->used);
> -		for (i = 0; i < ARRAY_SIZE(p->requests); i++) {
> -			if (list_empty(&p->requests[i]))
> -				continue;
> -
> -			GEM_BUG_ON(!(p->used & BIT(i)));
> -		}
> -	}
> -}
> -
> -static struct list_head *
> -lookup_priolist(struct intel_engine_cs *engine, int prio)
> -{
> -	struct intel_engine_execlists * const execlists = &engine->execlists;
> -	struct i915_priolist *p;
> -	struct rb_node **parent, *rb;
> -	bool first = true;
> -	int idx, i;
> -
> -	assert_priolists(execlists, INT_MAX);
> -
> -	/* buckets sorted from highest [in slot 0] to lowest priority */
> -	idx = I915_PRIORITY_COUNT - (prio & ~I915_PRIORITY_MASK) - 1;
> -	prio >>= I915_USER_PRIORITY_SHIFT;
> -	if (unlikely(execlists->no_priolist))
> -		prio = I915_PRIORITY_NORMAL;
> -
> -find_priolist:
> -	/* most positive priority is scheduled first, equal priorities fifo */
> -	rb = NULL;
> -	parent = &execlists->queue.rb_root.rb_node;
> -	while (*parent) {
> -		rb = *parent;
> -		p = to_priolist(rb);
> -		if (prio > p->priority) {
> -			parent = &rb->rb_left;
> -		} else if (prio < p->priority) {
> -			parent = &rb->rb_right;
> -			first = false;
> -		} else {
> -			goto out;
> -		}
> -	}
> -
> -	if (prio == I915_PRIORITY_NORMAL) {
> -		p = &execlists->default_priolist;
> -	} else {
> -		p = kmem_cache_alloc(engine->i915->priorities, GFP_ATOMIC);
> -		/* Convert an allocation failure to a priority bump */
> -		if (unlikely(!p)) {
> -			prio = I915_PRIORITY_NORMAL; /* recurses just once */
> -
> -			/* To maintain ordering with all rendering, after an
> -			 * allocation failure we have to disable all scheduling.
> -			 * Requests will then be executed in fifo, and schedule
> -			 * will ensure that dependencies are emitted in fifo.
> -			 * There will be still some reordering with existing
> -			 * requests, so if userspace lied about their
> -			 * dependencies that reordering may be visible.
> -			 */
> -			execlists->no_priolist = true;
> -			goto find_priolist;
> -		}
> -	}
> -
> -	p->priority = prio;
> -	for (i = 0; i < ARRAY_SIZE(p->requests); i++)
> -		INIT_LIST_HEAD(&p->requests[i]);
> -	rb_link_node(&p->node, rb, parent);
> -	rb_insert_color_cached(&p->node, &execlists->queue, first);
> -	p->used = 0;
> -
> -out:
> -	p->used |= BIT(idx);
> -	return &p->requests[idx];
> -}
> -
>   static void unwind_wa_tail(struct i915_request *rq)
>   {
>   	rq->tail = intel_ring_wrap(rq->ring, rq->wa_tail - WA_TAIL_BYTES);
> @@ -381,7 +285,7 @@ static void __unwind_incomplete_requests(struct intel_engine_cs *engine)
>   		GEM_BUG_ON(rq_prio(rq) == I915_PRIORITY_INVALID);
>   		if (rq_prio(rq) != prio) {
>   			prio = rq_prio(rq);
> -			pl = lookup_priolist(engine, prio);
> +			pl = i915_sched_lookup_priolist(engine, prio);
>   		}
>   		GEM_BUG_ON(RB_EMPTY_ROOT(&engine->execlists.queue.rb_root));
>   
> @@ -398,7 +302,7 @@ static void __unwind_incomplete_requests(struct intel_engine_cs *engine)
>   	if (!(prio & I915_PRIORITY_NEWCLIENT)) {
>   		prio |= I915_PRIORITY_NEWCLIENT;
>   		list_move_tail(&active->sched.link,
> -			       lookup_priolist(engine, prio));
> +			       i915_sched_lookup_priolist(engine, prio));
>   	}
>   }
>   
> @@ -793,7 +697,6 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
>   	 */
>   	execlists->queue_priority =
>   		port != execlists->port ? rq_prio(last) : INT_MIN;
> -	assert_priolists(execlists, execlists->queue_priority);
>   
>   	if (submit) {
>   		port_assign(port, last);
> @@ -1120,12 +1023,7 @@ static void queue_request(struct intel_engine_cs *engine,
>   			  struct i915_sched_node *node,
>   			  int prio)
>   {
> -	list_add_tail(&node->link, lookup_priolist(engine, prio));
> -}
> -
> -static void __update_queue(struct intel_engine_cs *engine, int prio)
> -{
> -	engine->execlists.queue_priority = prio;
> +	list_add_tail(&node->link, i915_sched_lookup_priolist(engine, prio));
>   }
>   
>   static void __submit_queue_imm(struct intel_engine_cs *engine)
> @@ -1144,7 +1042,7 @@ static void __submit_queue_imm(struct intel_engine_cs *engine)
>   static void submit_queue(struct intel_engine_cs *engine, int prio)
>   {
>   	if (prio > engine->execlists.queue_priority) {
> -		__update_queue(engine, prio);
> +		engine->execlists.queue_priority = prio;
>   		__submit_queue_imm(engine);
>   	}
>   }
> @@ -1167,162 +1065,6 @@ static void execlists_submit_request(struct i915_request *request)
>   	spin_unlock_irqrestore(&engine->timeline.lock, flags);
>   }
>   
> -static struct i915_request *sched_to_request(struct i915_sched_node *node)
> -{
> -	return container_of(node, struct i915_request, sched);
> -}
> -
> -static struct intel_engine_cs *
> -sched_lock_engine(struct i915_sched_node *node, struct intel_engine_cs *locked)
> -{
> -	struct intel_engine_cs *engine = sched_to_request(node)->engine;
> -
> -	GEM_BUG_ON(!locked);
> -
> -	if (engine != locked) {
> -		spin_unlock(&locked->timeline.lock);
> -		spin_lock(&engine->timeline.lock);
> -	}
> -
> -	return engine;
> -}
> -
> -static void execlists_schedule(struct i915_request *request,
> -			       const struct i915_sched_attr *attr)
> -{
> -	struct list_head *uninitialized_var(pl);
> -	struct intel_engine_cs *engine, *last;
> -	struct i915_dependency *dep, *p;
> -	struct i915_dependency stack;
> -	const int prio = attr->priority;
> -	LIST_HEAD(dfs);
> -
> -	GEM_BUG_ON(prio == I915_PRIORITY_INVALID);
> -
> -	if (i915_request_completed(request))
> -		return;
> -
> -	if (prio <= READ_ONCE(request->sched.attr.priority))
> -		return;
> -
> -	/* Need BKL in order to use the temporary link inside i915_dependency */
> -	lockdep_assert_held(&request->i915->drm.struct_mutex);
> -
> -	stack.signaler = &request->sched;
> -	list_add(&stack.dfs_link, &dfs);
> -
> -	/*
> -	 * Recursively bump all dependent priorities to match the new request.
> -	 *
> -	 * A naive approach would be to use recursion:
> -	 * static void update_priorities(struct i915_sched_node *node, prio) {
> -	 *	list_for_each_entry(dep, &node->signalers_list, signal_link)
> -	 *		update_priorities(dep->signal, prio)
> -	 *	queue_request(node);
> -	 * }
> -	 * but that may have unlimited recursion depth and so runs a very
> -	 * real risk of overunning the kernel stack. Instead, we build
> -	 * a flat list of all dependencies starting with the current request.
> -	 * As we walk the list of dependencies, we add all of its dependencies
> -	 * to the end of the list (this may include an already visited
> -	 * request) and continue to walk onwards onto the new dependencies. The
> -	 * end result is a topological list of requests in reverse order, the
> -	 * last element in the list is the request we must execute first.
> -	 */
> -	list_for_each_entry(dep, &dfs, dfs_link) {
> -		struct i915_sched_node *node = dep->signaler;
> -
> -		/*
> -		 * Within an engine, there can be no cycle, but we may
> -		 * refer to the same dependency chain multiple times
> -		 * (redundant dependencies are not eliminated) and across
> -		 * engines.
> -		 */
> -		list_for_each_entry(p, &node->signalers_list, signal_link) {
> -			GEM_BUG_ON(p == dep); /* no cycles! */
> -
> -			if (i915_sched_node_signaled(p->signaler))
> -				continue;
> -
> -			GEM_BUG_ON(p->signaler->attr.priority < node->attr.priority);
> -			if (prio > READ_ONCE(p->signaler->attr.priority))
> -				list_move_tail(&p->dfs_link, &dfs);
> -		}
> -	}
> -
> -	/*
> -	 * If we didn't need to bump any existing priorities, and we haven't
> -	 * yet submitted this request (i.e. there is no potential race with
> -	 * execlists_submit_request()), we can set our own priority and skip
> -	 * acquiring the engine locks.
> -	 */
> -	if (request->sched.attr.priority == I915_PRIORITY_INVALID) {
> -		GEM_BUG_ON(!list_empty(&request->sched.link));
> -		request->sched.attr = *attr;
> -		if (stack.dfs_link.next == stack.dfs_link.prev)
> -			return;
> -		__list_del_entry(&stack.dfs_link);
> -	}
> -
> -	last = NULL;
> -	engine = request->engine;
> -	spin_lock_irq(&engine->timeline.lock);
> -
> -	/* Fifo and depth-first replacement ensure our deps execute before us */
> -	list_for_each_entry_safe_reverse(dep, p, &dfs, dfs_link) {
> -		struct i915_sched_node *node = dep->signaler;
> -
> -		INIT_LIST_HEAD(&dep->dfs_link);
> -
> -		engine = sched_lock_engine(node, engine);
> -
> -		/* Recheck after acquiring the engine->timeline.lock */
> -		if (prio <= node->attr.priority)
> -			continue;
> -
> -		if (i915_sched_node_signaled(node))
> -			continue;
> -
> -		node->attr.priority = prio;
> -		if (!list_empty(&node->link)) {
> -			if (last != engine) {
> -				pl = lookup_priolist(engine, prio);
> -				last = engine;
> -			}
> -			list_move_tail(&node->link, pl);
> -		} else {
> -			/*
> -			 * If the request is not in the priolist queue because
> -			 * it is not yet runnable, then it doesn't contribute
> -			 * to our preemption decisions. On the other hand,
> -			 * if the request is on the HW, it too is not in the
> -			 * queue; but in that case we may still need to reorder
> -			 * the inflight requests.
> -			 */
> -			if (!i915_sw_fence_done(&sched_to_request(node)->submit))
> -				continue;
> -		}
> -
> -		if (prio <= engine->execlists.queue_priority)
> -			continue;
> -
> -		/*
> -		 * If we are already the currently executing context, don't
> -		 * bother evaluating if we should preempt ourselves.
> -		 */
> -		if (sched_to_request(node)->global_seqno &&
> -		    i915_seqno_passed(port_request(engine->execlists.port)->global_seqno,
> -				      sched_to_request(node)->global_seqno))
> -			continue;
> -
> -		/* Defer (tasklet) submission until after all of our updates. */
> -		__update_queue(engine, prio);
> -		tasklet_hi_schedule(&engine->execlists.tasklet);
> -	}
> -
> -	spin_unlock_irq(&engine->timeline.lock);
> -}
> -
>   static void execlists_context_destroy(struct intel_context *ce)
>   {
>   	GEM_BUG_ON(ce->pin_count);
> @@ -2360,7 +2102,7 @@ void intel_execlists_set_default_submission(struct intel_engine_cs *engine)
>   {
>   	engine->submit_request = execlists_submit_request;
>   	engine->cancel_requests = execlists_cancel_requests;
> -	engine->schedule = execlists_schedule;
> +	engine->schedule = i915_schedule;
>   	engine->execlists.tasklet.func = execlists_submission_tasklet;
>   
>   	engine->reset.prepare = execlists_reset_prepare;
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
> index 1534de5bb852..f6ec48a75a69 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.h
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
> @@ -498,11 +498,10 @@ struct intel_engine_cs {
>   	 */
>   	void		(*submit_request)(struct i915_request *rq);
>   
> -	/* Call when the priority on a request has changed and it and its
> +	/*
> +	 * Call when the priority on a request has changed and it and its
>   	 * dependencies may need rescheduling. Note the request itself may
>   	 * not be ready to run!
> -	 *
> -	 * Called under the struct_mutex.
>   	 */
>   	void		(*schedule)(struct i915_request *request,
>   				    const struct i915_sched_attr *attr);
> 

Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 106+ messages in thread

* Re: [PATCH 17/40] drm/i915: Priority boost for waiting clients
  2018-09-19 19:55 ` [PATCH 17/40] drm/i915: Priority boost for waiting clients Chris Wilson
@ 2018-09-24 11:29   ` Tvrtko Ursulin
  2018-09-25  9:00     ` Chris Wilson
  0 siblings, 1 reply; 106+ messages in thread
From: Tvrtko Ursulin @ 2018-09-24 11:29 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 19/09/2018 20:55, Chris Wilson wrote:
> Latency is in the eye of the beholder. In the case where a client stops
> and waits for the gpu, give that request chain a small priority boost
> (not so that it overtakes higher priority clients, to preserve the
> external ordering) so that ideally the wait completes earlier.
> 
> Testcase: igt/gem_sync/switch-default
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
> Cc: Dmitry Rogozhkin <dmitry.v.rogozhkin@intel.com>
> ---
>   drivers/gpu/drm/i915/i915_gem.c       |  5 +++-
>   drivers/gpu/drm/i915/i915_request.c   |  2 ++
>   drivers/gpu/drm/i915/i915_request.h   |  5 ++--
>   drivers/gpu/drm/i915/i915_scheduler.c | 34 ++++++++++++++++++++++-----
>   drivers/gpu/drm/i915/i915_scheduler.h |  5 +++-
>   5 files changed, 41 insertions(+), 10 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index 6b347ffb996b..2fa75f2a1980 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -1748,6 +1748,7 @@ i915_gem_set_domain_ioctl(struct drm_device *dev, void *data,
>   	 */
>   	err = i915_gem_object_wait(obj,
>   				   I915_WAIT_INTERRUPTIBLE |
> +				   I915_WAIT_PRIORITY |
>   				   (write_domain ? I915_WAIT_ALL : 0),
>   				   MAX_SCHEDULE_TIMEOUT,
>   				   to_rps_client(file));
> @@ -3749,7 +3750,9 @@ i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
>   	start = ktime_get();
>   
>   	ret = i915_gem_object_wait(obj,
> -				   I915_WAIT_INTERRUPTIBLE | I915_WAIT_ALL,
> +				   I915_WAIT_INTERRUPTIBLE |
> +				   I915_WAIT_PRIORITY |
> +				   I915_WAIT_ALL,
>   				   to_wait_timeout(args->timeout_ns),
>   				   to_rps_client(file));
>   
> diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
> index d73ad490a261..abd4dacbab8e 100644
> --- a/drivers/gpu/drm/i915/i915_request.c
> +++ b/drivers/gpu/drm/i915/i915_request.c
> @@ -1237,6 +1237,8 @@ long i915_request_wait(struct i915_request *rq,
>   		add_wait_queue(errq, &reset);
>   
>   	intel_wait_init(&wait);
> +	if (flags & I915_WAIT_PRIORITY)
> +		i915_schedule_bump_priority(rq, I915_PRIORITY_WAIT);
>   
>   restart:
>   	do {
> diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h
> index 5f7361e0fca6..90e9d170a0cd 100644
> --- a/drivers/gpu/drm/i915/i915_request.h
> +++ b/drivers/gpu/drm/i915/i915_request.h
> @@ -277,8 +277,9 @@ long i915_request_wait(struct i915_request *rq,
>   	__attribute__((nonnull(1)));
>   #define I915_WAIT_INTERRUPTIBLE	BIT(0)
>   #define I915_WAIT_LOCKED	BIT(1) /* struct_mutex held, handle GPU reset */
> -#define I915_WAIT_ALL		BIT(2) /* used by i915_gem_object_wait() */
> -#define I915_WAIT_FOR_IDLE_BOOST BIT(3)
> +#define I915_WAIT_PRIORITY	BIT(2) /* small priority bump for the request */
> +#define I915_WAIT_ALL		BIT(3) /* used by i915_gem_object_wait() */
> +#define I915_WAIT_FOR_IDLE_BOOST BIT(4)
>   
>   static inline bool intel_engine_has_started(struct intel_engine_cs *engine,
>   					    u32 seqno);
> diff --git a/drivers/gpu/drm/i915/i915_scheduler.c b/drivers/gpu/drm/i915/i915_scheduler.c
> index 910ac7089596..1423088dceff 100644
> --- a/drivers/gpu/drm/i915/i915_scheduler.c
> +++ b/drivers/gpu/drm/i915/i915_scheduler.c
> @@ -239,7 +239,8 @@ sched_lock_engine(struct i915_sched_node *node, struct intel_engine_cs *locked)
>   	return engine;
>   }
>   
> -void i915_schedule(struct i915_request *rq, const struct i915_sched_attr *attr)
> +static void __i915_schedule(struct i915_request *rq,
> +			    const struct i915_sched_attr *attr)
>   {
>   	struct list_head *uninitialized_var(pl);
>   	struct intel_engine_cs *engine, *last;
> @@ -248,6 +249,8 @@ void i915_schedule(struct i915_request *rq, const struct i915_sched_attr *attr)
>   	const int prio = attr->priority;
>   	LIST_HEAD(dfs);
>   
> +	/* Needed in order to use the temporary link inside i915_dependency */
> +	lockdep_assert_held(&schedule_lock);
>   	GEM_BUG_ON(prio == I915_PRIORITY_INVALID);
>   
>   	if (i915_request_completed(rq))
> @@ -256,9 +259,6 @@ void i915_schedule(struct i915_request *rq, const struct i915_sched_attr *attr)
>   	if (prio <= READ_ONCE(rq->sched.attr.priority))
>   		return;
>   
> -	/* Needed in order to use the temporary link inside i915_dependency */
> -	spin_lock(&schedule_lock);
> -
>   	stack.signaler = &rq->sched;
>   	list_add(&stack.dfs_link, &dfs);
>   
> @@ -312,7 +312,7 @@ void i915_schedule(struct i915_request *rq, const struct i915_sched_attr *attr)
>   		rq->sched.attr = *attr;
>   
>   		if (stack.dfs_link.next == stack.dfs_link.prev)
> -			goto out_unlock;
> +			return;
>   
>   		__list_del_entry(&stack.dfs_link);
>   	}
> @@ -371,7 +371,29 @@ void i915_schedule(struct i915_request *rq, const struct i915_sched_attr *attr)
>   	}
>   
>   	spin_unlock_irq(&engine->timeline.lock);
> +}
>   
> -out_unlock:
> +void i915_schedule(struct i915_request *rq, const struct i915_sched_attr *attr)
> +{
> +	spin_lock(&schedule_lock);
> +	__i915_schedule(rq, attr);
>   	spin_unlock(&schedule_lock);
>   }
> +
> +void i915_schedule_bump_priority(struct i915_request *rq, unsigned int bump)
> +{
> +	struct i915_sched_attr attr;
> +
> +	GEM_BUG_ON(bump & I915_PRIORITY_MASK);
> +
> +	if (READ_ONCE(rq->sched.attr.priority) == I915_PRIORITY_INVALID)
> +		return;
> +
> +	spin_lock_bh(&schedule_lock);
> +
> +	attr = rq->sched.attr;
> +	attr.priority |= bump;
> +	__i915_schedule(rq, &attr);
> +
> +	spin_unlock_bh(&schedule_lock);
> +}
> diff --git a/drivers/gpu/drm/i915/i915_scheduler.h b/drivers/gpu/drm/i915/i915_scheduler.h
> index 8058c17ae96a..cbfb64288c61 100644
> --- a/drivers/gpu/drm/i915/i915_scheduler.h
> +++ b/drivers/gpu/drm/i915/i915_scheduler.h
> @@ -24,12 +24,13 @@ enum {
>   	I915_PRIORITY_INVALID = INT_MIN
>   };
>   
> -#define I915_USER_PRIORITY_SHIFT 1
> +#define I915_USER_PRIORITY_SHIFT 2
>   #define I915_USER_PRIORITY(x) ((x) << I915_USER_PRIORITY_SHIFT)
>   
>   #define I915_PRIORITY_COUNT BIT(I915_USER_PRIORITY_SHIFT)
>   #define I915_PRIORITY_MASK (-I915_PRIORITY_COUNT)
>   
> +#define I915_PRIORITY_WAIT	((u8)BIT(1))
>   #define I915_PRIORITY_NEWCLIENT	((u8)BIT(0))

Put a comment here explaining the priority order is reversed in the 
internal range.

With new client protection against being repeatedly pre-empted added in 
a respective previous patch, I am okay that we give this a go.

Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Regards,

Tvrtko

>   
>   struct i915_sched_attr {
> @@ -99,6 +100,8 @@ void i915_sched_node_fini(struct drm_i915_private *i915,
>   void i915_schedule(struct i915_request *request,
>   		   const struct i915_sched_attr *attr);
>   
> +void i915_schedule_bump_priority(struct i915_request *rq, unsigned int bump);
> +
>   struct list_head *
>   i915_sched_lookup_priolist(struct intel_engine_cs *engine, int prio);
>   
> 
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 106+ messages in thread

* Re: [PATCH 18/40] drm/i915: Report the number of closed vma held by each context in debugfs
  2018-09-19 19:55 ` [PATCH 18/40] drm/i915: Report the number of closed vma held by each context in debugfs Chris Wilson
@ 2018-09-24 11:57   ` Tvrtko Ursulin
  2018-09-25 12:20     ` Chris Wilson
  0 siblings, 1 reply; 106+ messages in thread
From: Tvrtko Ursulin @ 2018-09-24 11:57 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 19/09/2018 20:55, Chris Wilson wrote:
> Include the total size of closed vma when reporting the per_ctx_stats of
> debugfs/i915_gem_objects.

Why do we need/want this?

> Whilst adjusting the context tracking, note that we can simply use our
> list of contexts in i915->contexts rather than circumlocute via
> dev->filelist and the per-file context idr.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>   drivers/gpu/drm/i915/i915_debugfs.c | 113 +++++++++++-----------------
>   1 file changed, 42 insertions(+), 71 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
> index 2ac75bc10afa..6b5cc30f3e09 100644
> --- a/drivers/gpu/drm/i915/i915_debugfs.c
> +++ b/drivers/gpu/drm/i915/i915_debugfs.c
> @@ -302,6 +302,7 @@ struct file_stats {
>   	u64 total, unbound;
>   	u64 global, shared;
>   	u64 active, inactive;
> +	u64 closed;
>   };
>   
>   static int per_file_stats(int id, void *ptr, void *data)
> @@ -336,6 +337,9 @@ static int per_file_stats(int id, void *ptr, void *data)
>   			stats->active += vma->node.size;
>   		else
>   			stats->inactive += vma->node.size;
> +
> +		if (i915_vma_is_closed(vma))
> +			stats->closed += vma->node.size;

We can have closed and active?

>   	}
>   
>   	return 0;
> @@ -343,7 +347,7 @@ static int per_file_stats(int id, void *ptr, void *data)
>   
>   #define print_file_stats(m, name, stats) do { \
>   	if (stats.count) \
> -		seq_printf(m, "%s: %lu objects, %llu bytes (%llu active, %llu inactive, %llu global, %llu shared, %llu unbound)\n", \
> +		seq_printf(m, "%s: %lu objects, %llu bytes (%llu active, %llu inactive, %llu global, %llu shared, %llu unbound, %llu closed)\n", \
>   			   name, \
>   			   stats.count, \
>   			   stats.total, \
> @@ -351,7 +355,8 @@ static int per_file_stats(int id, void *ptr, void *data)
>   			   stats.inactive, \
>   			   stats.global, \
>   			   stats.shared, \
> -			   stats.unbound); \
> +			   stats.unbound, \
> +			   stats.closed); \
>   } while (0)
>   
>   static void print_batch_pool_stats(struct seq_file *m,
> @@ -377,44 +382,44 @@ static void print_batch_pool_stats(struct seq_file *m,
>   	print_file_stats(m, "[k]batch pool", stats);
>   }
>   
> -static int per_file_ctx_stats(int idx, void *ptr, void *data)
> +static void print_context_stats(struct seq_file *m,
> +				struct drm_i915_private *i915)
>   {
> -	struct i915_gem_context *ctx = ptr;
> -	struct intel_engine_cs *engine;
> -	enum intel_engine_id id;
> -
> -	for_each_engine(engine, ctx->i915, id) {
> -		struct intel_context *ce = to_intel_context(ctx, engine);
> +	struct file_stats kstats = {};
> +	struct i915_gem_context *ctx;
>   
> -		if (ce->state)
> -			per_file_stats(0, ce->state->obj, data);
> -		if (ce->ring)
> -			per_file_stats(0, ce->ring->vma->obj, data);
> -	}
> +	list_for_each_entry(ctx, &i915->contexts.list, link) {
> +		struct file_stats stats = { .file_priv = ctx->file_priv };
> +		struct intel_engine_cs *engine;
> +		enum intel_engine_id id;
>   
> -	return 0;
> -}
> +		for_each_engine(engine, i915, id) {
> +			struct intel_context *ce = to_intel_context(ctx, engine);
>   
> -static void print_context_stats(struct seq_file *m,
> -				struct drm_i915_private *dev_priv)
> -{
> -	struct drm_device *dev = &dev_priv->drm;
> -	struct file_stats stats;
> -	struct drm_file *file;
> +			if (ce->state)
> +				per_file_stats(0, ce->state->obj, &kstats);
> +			if (ce->ring)
> +				per_file_stats(0, ce->ring->vma->obj, &kstats);
> +		}
>   
> -	memset(&stats, 0, sizeof(stats));
> +		if (!IS_ERR_OR_NULL(stats.file_priv)) {
> +			struct drm_file *file = stats.file_priv->file;
> +			struct task_struct *task;
>   
> -	mutex_lock(&dev->struct_mutex);
> -	if (dev_priv->kernel_context)
> -		per_file_ctx_stats(0, dev_priv->kernel_context, &stats);
> +			spin_lock(&file->table_lock);
> +			idr_for_each(&file->object_idr, per_file_stats, &stats);

Headache inducing diff.. however, doesn't this over-account objects on 
the account of walking the same file from multiple-contexts?

> +			spin_unlock(&file->table_lock);
>   
> -	list_for_each_entry(file, &dev->filelist, lhead) {
> -		struct drm_i915_file_private *fpriv = file->driver_priv;
> -		idr_for_each(&fpriv->context_idr, per_file_ctx_stats, &stats);
> +			rcu_read_lock();
> +			task = pid_task(ctx->pid ?: file->pid, PIDTYPE_PID);
> +			print_file_stats(m,
> +					 task ? task->comm : "<unknown>",
> +					 stats);

And this as well looks like it'll end up duplicated.

> +			rcu_read_unlock();
> +		}
>   	}
> -	mutex_unlock(&dev->struct_mutex);
>   
> -	print_file_stats(m, "[k]contexts", stats);
> +	print_file_stats(m, "[k]contexts", kstats);
>   }
>   
>   static int i915_gem_object_info(struct seq_file *m, void *data)
> @@ -426,14 +431,9 @@ static int i915_gem_object_info(struct seq_file *m, void *data)
>   	u64 size, mapped_size, purgeable_size, dpy_size, huge_size;
>   	struct drm_i915_gem_object *obj;
>   	unsigned int page_sizes = 0;
> -	struct drm_file *file;
>   	char buf[80];
>   	int ret;
>   
> -	ret = mutex_lock_interruptible(&dev->struct_mutex);
> -	if (ret)
> -		return ret;
> -
>   	seq_printf(m, "%u objects, %llu bytes\n",
>   		   dev_priv->mm.object_count,
>   		   dev_priv->mm.object_memory);

Noticed we technically need mm.object_stat_lock here for atomic readout, 
but I guess we don't care much.

> @@ -514,43 +514,14 @@ static int i915_gem_object_info(struct seq_file *m, void *data)
>   					buf, sizeof(buf)));
>   
>   	seq_putc(m, '\n');
> -	print_batch_pool_stats(m, dev_priv);
> -	mutex_unlock(&dev->struct_mutex);
> -
> -	mutex_lock(&dev->filelist_mutex);
> -	print_context_stats(m, dev_priv);
> -	list_for_each_entry_reverse(file, &dev->filelist, lhead) {
> -		struct file_stats stats;
> -		struct drm_i915_file_private *file_priv = file->driver_priv;
> -		struct i915_request *request;
> -		struct task_struct *task;
> -
> -		mutex_lock(&dev->struct_mutex);
>   
> -		memset(&stats, 0, sizeof(stats));
> -		stats.file_priv = file->driver_priv;
> -		spin_lock(&file->table_lock);
> -		idr_for_each(&file->object_idr, per_file_stats, &stats);
> -		spin_unlock(&file->table_lock);
> -		/*
> -		 * Although we have a valid reference on file->pid, that does
> -		 * not guarantee that the task_struct who called get_pid() is
> -		 * still alive (e.g. get_pid(current) => fork() => exit()).
> -		 * Therefore, we need to protect this ->comm access using RCU.
> -		 */
> -		request = list_first_entry_or_null(&file_priv->mm.request_list,
> -						   struct i915_request,
> -						   client_link);
> -		rcu_read_lock();
> -		task = pid_task(request && request->gem_context->pid ?
> -				request->gem_context->pid : file->pid,
> -				PIDTYPE_PID);
> -		print_file_stats(m, task ? task->comm : "<unknown>", stats);
> -		rcu_read_unlock();
> +	ret = mutex_lock_interruptible(&dev->struct_mutex);
> +	if (ret)
> +		return ret;
>   
> -		mutex_unlock(&dev->struct_mutex);
> -	}
> -	mutex_unlock(&dev->filelist_mutex);
> +	print_batch_pool_stats(m, dev_priv);
> +	print_context_stats(m, dev_priv);
> +	mutex_unlock(&dev->struct_mutex);
>   
>   	return 0;
>   }
> 

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 106+ messages in thread

* Re: [PATCH 19/40] drm/i915: Remove debugfs/i915_ppgtt_info
  2018-09-19 19:55 ` [PATCH 19/40] drm/i915: Remove debugfs/i915_ppgtt_info Chris Wilson
@ 2018-09-24 12:03   ` Tvrtko Ursulin
  0 siblings, 0 replies; 106+ messages in thread
From: Tvrtko Ursulin @ 2018-09-24 12:03 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 19/09/2018 20:55, Chris Wilson wrote:
> The information presented here is not relevant to current development.
> We can either use the context information, but more often we want to
> inspect the active gpu state.
> 
> The ulterior motive is to eradicate dev->filelist.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>   drivers/gpu/drm/i915/i915_debugfs.c | 119 ----------------------------
>   1 file changed, 119 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
> index 6b5cc30f3e09..39f319c49def 100644
> --- a/drivers/gpu/drm/i915/i915_debugfs.c
> +++ b/drivers/gpu/drm/i915/i915_debugfs.c
> @@ -2035,124 +2035,6 @@ static int i915_swizzle_info(struct seq_file *m, void *data)
>   	return 0;
>   }
>   
> -static int per_file_ctx(int id, void *ptr, void *data)
> -{
> -	struct i915_gem_context *ctx = ptr;
> -	struct seq_file *m = data;
> -	struct i915_hw_ppgtt *ppgtt = ctx->ppgtt;
> -
> -	if (!ppgtt) {
> -		seq_printf(m, "  no ppgtt for context %d\n",
> -			   ctx->user_handle);
> -		return 0;
> -	}
> -
> -	if (i915_gem_context_is_default(ctx))
> -		seq_puts(m, "  default context:\n");
> -	else
> -		seq_printf(m, "  context %d:\n", ctx->user_handle);
> -	ppgtt->debug_dump(ppgtt, m);
> -
> -	return 0;
> -}
> -
> -static void gen8_ppgtt_info(struct seq_file *m,
> -			    struct drm_i915_private *dev_priv)
> -{
> -	struct i915_hw_ppgtt *ppgtt = dev_priv->mm.aliasing_ppgtt;
> -	struct intel_engine_cs *engine;
> -	enum intel_engine_id id;
> -	int i;
> -
> -	if (!ppgtt)
> -		return;
> -
> -	for_each_engine(engine, dev_priv, id) {
> -		seq_printf(m, "%s\n", engine->name);
> -		for (i = 0; i < 4; i++) {
> -			u64 pdp = I915_READ(GEN8_RING_PDP_UDW(engine, i));
> -			pdp <<= 32;
> -			pdp |= I915_READ(GEN8_RING_PDP_LDW(engine, i));
> -			seq_printf(m, "\tPDP%d 0x%016llx\n", i, pdp);
> -		}
> -	}
> -}
> -
> -static void gen6_ppgtt_info(struct seq_file *m,
> -			    struct drm_i915_private *dev_priv)
> -{
> -	struct intel_engine_cs *engine;
> -	enum intel_engine_id id;
> -
> -	if (IS_GEN6(dev_priv))
> -		seq_printf(m, "GFX_MODE: 0x%08x\n", I915_READ(GFX_MODE));
> -
> -	for_each_engine(engine, dev_priv, id) {
> -		seq_printf(m, "%s\n", engine->name);
> -		if (IS_GEN7(dev_priv))
> -			seq_printf(m, "GFX_MODE: 0x%08x\n",
> -				   I915_READ(RING_MODE_GEN7(engine)));
> -		seq_printf(m, "PP_DIR_BASE: 0x%08x\n",
> -			   I915_READ(RING_PP_DIR_BASE(engine)));
> -		seq_printf(m, "PP_DIR_BASE_READ: 0x%08x\n",
> -			   I915_READ(RING_PP_DIR_BASE_READ(engine)));
> -		seq_printf(m, "PP_DIR_DCLV: 0x%08x\n",
> -			   I915_READ(RING_PP_DIR_DCLV(engine)));
> -	}
> -	if (dev_priv->mm.aliasing_ppgtt) {
> -		struct i915_hw_ppgtt *ppgtt = dev_priv->mm.aliasing_ppgtt;
> -
> -		seq_puts(m, "aliasing PPGTT:\n");
> -		seq_printf(m, "pd gtt offset: 0x%08x\n", ppgtt->pd.base.ggtt_offset);
> -
> -		ppgtt->debug_dump(ppgtt, m);
> -	}
> -
> -	seq_printf(m, "ECOCHK: 0x%08x\n", I915_READ(GAM_ECOCHK));
> -}
> -
> -static int i915_ppgtt_info(struct seq_file *m, void *data)
> -{
> -	struct drm_i915_private *dev_priv = node_to_i915(m->private);
> -	struct drm_device *dev = &dev_priv->drm;
> -	struct drm_file *file;
> -	int ret;
> -
> -	mutex_lock(&dev->filelist_mutex);
> -	ret = mutex_lock_interruptible(&dev->struct_mutex);
> -	if (ret)
> -		goto out_unlock;
> -
> -	intel_runtime_pm_get(dev_priv);
> -
> -	if (INTEL_GEN(dev_priv) >= 8)
> -		gen8_ppgtt_info(m, dev_priv);
> -	else if (INTEL_GEN(dev_priv) >= 6)
> -		gen6_ppgtt_info(m, dev_priv);
> -
> -	list_for_each_entry_reverse(file, &dev->filelist, lhead) {
> -		struct drm_i915_file_private *file_priv = file->driver_priv;
> -		struct task_struct *task;
> -
> -		task = get_pid_task(file->pid, PIDTYPE_PID);
> -		if (!task) {
> -			ret = -ESRCH;
> -			goto out_rpm;
> -		}
> -		seq_printf(m, "\nproc: %s\n", task->comm);
> -		put_task_struct(task);
> -		idr_for_each(&file_priv->context_idr, per_file_ctx,
> -			     (void *)(unsigned long)m);
> -	}
> -
> -out_rpm:
> -	intel_runtime_pm_put(dev_priv);
> -	mutex_unlock(&dev->struct_mutex);
> -out_unlock:
> -	mutex_unlock(&dev->filelist_mutex);
> -	return ret;
> -}
> -
>   static int count_irq_waiters(struct drm_i915_private *i915)
>   {
>   	struct intel_engine_cs *engine;
> @@ -4741,7 +4623,6 @@ static const struct drm_info_list i915_debugfs_list[] = {
>   	{"i915_context_status", i915_context_status, 0},
>   	{"i915_forcewake_domains", i915_forcewake_domains, 0},
>   	{"i915_swizzle_info", i915_swizzle_info, 0},
> -	{"i915_ppgtt_info", i915_ppgtt_info, 0},
>   	{"i915_llc", i915_llc, 0},
>   	{"i915_edp_psr_status", i915_edp_psr_status, 0},
>   	{"i915_energy_uJ", i915_energy_uJ, 0},
> 

Looks like it could only be useful maybe during development. So hiding 
it under the GEM_DEBUG would also work for me. If someone thinks they 
could need it.. run the question past some people who worked in the page 
table area?

Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 106+ messages in thread

* Re: [PATCH 22/40] drm/i915: Syntatic sugar for using intel_runtime_pm
  2018-09-19 19:55 ` [PATCH 22/40] drm/i915: Syntatic sugar for using intel_runtime_pm Chris Wilson
@ 2018-09-24 12:08   ` Tvrtko Ursulin
  0 siblings, 0 replies; 106+ messages in thread
From: Tvrtko Ursulin @ 2018-09-24 12:08 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 19/09/2018 20:55, Chris Wilson wrote:
> Frequently, we use intel_runtime_pm_get/_put around a small block.
> Formalise that usage by providing a macro to define such a block with an
> automatic closure to scope the intel_runtime_pm wakeref to that block,
> i.e. macro abuse smelling of python.

Interesting and tempting but I cannot shake the feeling it is maybe 
going a bit too far. We could add with_lock, with_mutex, 
with_runtime_pm_and_lock and where does it end? :) Don't know.. ping 
some more people on this one I guess.

Regards,

Tvrtko

> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>   drivers/gpu/drm/i915/i915_debugfs.c           | 163 ++++++++----------
>   drivers/gpu/drm/i915/i915_gem.c               |  10 +-
>   drivers/gpu/drm/i915/i915_gem_gtt.c           |  23 ++-
>   drivers/gpu/drm/i915/i915_gem_shrinker.c      |  44 ++---
>   drivers/gpu/drm/i915/i915_pmu.c               |   7 +-
>   drivers/gpu/drm/i915/i915_sysfs.c             |   7 +-
>   drivers/gpu/drm/i915/intel_drv.h              |   8 +
>   drivers/gpu/drm/i915/intel_guc_log.c          |  26 ++-
>   drivers/gpu/drm/i915/intel_huc.c              |   7 +-
>   drivers/gpu/drm/i915/intel_panel.c            |  18 +-
>   drivers/gpu/drm/i915/intel_uncore.c           |  30 ++--
>   drivers/gpu/drm/i915/selftests/i915_gem.c     |  34 ++--
>   .../gpu/drm/i915/selftests/i915_gem_context.c |  12 +-
>   .../gpu/drm/i915/selftests/i915_gem_object.c  |  11 +-
>   .../drm/i915/selftests/intel_workarounds.c    |  12 +-
>   15 files changed, 193 insertions(+), 219 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
> index dbfe4e456d97..98fa216d19bb 100644
> --- a/drivers/gpu/drm/i915/i915_debugfs.c
> +++ b/drivers/gpu/drm/i915/i915_debugfs.c
> @@ -952,9 +952,9 @@ static int i915_gpu_info_open(struct inode *inode, struct file *file)
>   	struct i915_gpu_state *gpu;
>   	intel_wakeref_t wakeref;
>   
> -	wakeref = intel_runtime_pm_get(i915);
> -	gpu = i915_capture_gpu_state(i915);
> -	intel_runtime_pm_put(i915, wakeref);
> +	gpu = NULL;
> +	with_intel_runtime_pm(i915, wakeref)
> +		gpu = i915_capture_gpu_state(i915);
>   	if (!gpu)
>   		return -ENOMEM;
>   
> @@ -1015,9 +1015,8 @@ i915_next_seqno_set(void *data, u64 val)
>   	if (ret)
>   		return ret;
>   
> -	wakeref = intel_runtime_pm_get(dev_priv);
> -	ret = i915_gem_set_global_seqno(dev, val);
> -	intel_runtime_pm_put(dev_priv, wakeref);
> +	with_intel_runtime_pm(dev_priv, wakeref)
> +		ret = i915_gem_set_global_seqno(dev, val);
>   
>   	mutex_unlock(&dev->struct_mutex);
>   
> @@ -1305,17 +1304,15 @@ static int i915_hangcheck_info(struct seq_file *m, void *unused)
>   		return 0;
>   	}
>   
> -	wakeref = intel_runtime_pm_get(dev_priv);
> +	with_intel_runtime_pm(dev_priv, wakeref) {
> +		for_each_engine(engine, dev_priv, id) {
> +			acthd[id] = intel_engine_get_active_head(engine);
> +			seqno[id] = intel_engine_get_seqno(engine);
> +		}
>   
> -	for_each_engine(engine, dev_priv, id) {
> -		acthd[id] = intel_engine_get_active_head(engine);
> -		seqno[id] = intel_engine_get_seqno(engine);
> +		intel_engine_get_instdone(dev_priv->engine[RCS], &instdone);
>   	}
>   
> -	intel_engine_get_instdone(dev_priv->engine[RCS], &instdone);
> -
> -	intel_runtime_pm_put(dev_priv, wakeref);
> -
>   	if (timer_pending(&dev_priv->gpu_error.hangcheck_work.timer))
>   		seq_printf(m, "Hangcheck active, timer fires in %dms\n",
>   			   jiffies_to_msecs(dev_priv->gpu_error.hangcheck_work.timer.expires -
> @@ -1591,18 +1588,16 @@ static int i915_drpc_info(struct seq_file *m, void *unused)
>   {
>   	struct drm_i915_private *dev_priv = node_to_i915(m->private);
>   	intel_wakeref_t wakeref;
> -	int err;
> -
> -	wakeref = intel_runtime_pm_get(dev_priv);
> -
> -	if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv))
> -		err = vlv_drpc_info(m);
> -	else if (INTEL_GEN(dev_priv) >= 6)
> -		err = gen6_drpc_info(m);
> -	else
> -		err = ironlake_drpc_info(m);
> +	int err = -ENODEV;
>   
> -	intel_runtime_pm_put(dev_priv, wakeref);
> +	with_intel_runtime_pm(dev_priv, wakeref) {
> +		if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv))
> +			err = vlv_drpc_info(m);
> +		else if (INTEL_GEN(dev_priv) >= 6)
> +			err = gen6_drpc_info(m);
> +		else
> +			err = ironlake_drpc_info(m);
> +	}
>   
>   	return err;
>   }
> @@ -2167,9 +2162,8 @@ static int i915_huc_load_status_info(struct seq_file *m, void *data)
>   	p = drm_seq_file_printer(m);
>   	intel_uc_fw_dump(&dev_priv->huc.fw, &p);
>   
> -	wakeref = intel_runtime_pm_get(dev_priv);
> -	seq_printf(m, "\nHuC status 0x%08x:\n", I915_READ(HUC_STATUS2));
> -	intel_runtime_pm_put(dev_priv, wakeref);
> +	with_intel_runtime_pm(dev_priv, wakeref)
> +		seq_printf(m, "\nHuC status 0x%08x:\n", I915_READ(HUC_STATUS2));
>   
>   	return 0;
>   }
> @@ -2179,7 +2173,6 @@ static int i915_guc_load_status_info(struct seq_file *m, void *data)
>   	struct drm_i915_private *dev_priv = node_to_i915(m->private);
>   	intel_wakeref_t wakeref;
>   	struct drm_printer p;
> -	u32 tmp, i;
>   
>   	if (!HAS_GUC(dev_priv))
>   		return -ENODEV;
> @@ -2187,22 +2180,23 @@ static int i915_guc_load_status_info(struct seq_file *m, void *data)
>   	p = drm_seq_file_printer(m);
>   	intel_uc_fw_dump(&dev_priv->guc.fw, &p);
>   
> -	wakeref = intel_runtime_pm_get(dev_priv);
> -
> -	tmp = I915_READ(GUC_STATUS);
> -
> -	seq_printf(m, "\nGuC status 0x%08x:\n", tmp);
> -	seq_printf(m, "\tBootrom status = 0x%x\n",
> -		(tmp & GS_BOOTROM_MASK) >> GS_BOOTROM_SHIFT);
> -	seq_printf(m, "\tuKernel status = 0x%x\n",
> -		(tmp & GS_UKERNEL_MASK) >> GS_UKERNEL_SHIFT);
> -	seq_printf(m, "\tMIA Core status = 0x%x\n",
> -		(tmp & GS_MIA_MASK) >> GS_MIA_SHIFT);
> -	seq_puts(m, "\nScratch registers:\n");
> -	for (i = 0; i < 16; i++)
> -		seq_printf(m, "\t%2d: \t0x%x\n", i, I915_READ(SOFT_SCRATCH(i)));
> -
> -	intel_runtime_pm_put(dev_priv, wakeref);
> +	with_intel_runtime_pm(dev_priv, wakeref) {
> +		u32 tmp = I915_READ(GUC_STATUS);
> +		u32 i;
> +
> +		seq_printf(m, "\nGuC status 0x%08x:\n", tmp);
> +		seq_printf(m, "\tBootrom status = 0x%x\n",
> +			   (tmp & GS_BOOTROM_MASK) >> GS_BOOTROM_SHIFT);
> +		seq_printf(m, "\tuKernel status = 0x%x\n",
> +			   (tmp & GS_UKERNEL_MASK) >> GS_UKERNEL_SHIFT);
> +		seq_printf(m, "\tMIA Core status = 0x%x\n",
> +			   (tmp & GS_MIA_MASK) >> GS_MIA_SHIFT);
> +		seq_puts(m, "\nScratch registers:\n");
> +		for (i = 0; i < 16; i++) {
> +			seq_printf(m, "\t%2d: \t0x%x\n",
> +				   i, I915_READ(SOFT_SCRATCH(i)));
> +		}
> +	}
>   
>   	return 0;
>   }
> @@ -2675,19 +2669,14 @@ static int i915_energy_uJ(struct seq_file *m, void *data)
>   	if (INTEL_GEN(dev_priv) < 6)
>   		return -ENODEV;
>   
> -	wakeref = intel_runtime_pm_get(dev_priv);
> -
> -	if (rdmsrl_safe(MSR_RAPL_POWER_UNIT, &power)) {
> -		intel_runtime_pm_put(dev_priv, wakeref);
> +	if (rdmsrl_safe(MSR_RAPL_POWER_UNIT, &power))
>   		return -ENODEV;
> -	}
>   
>   	units = (power & 0x1f00) >> 8;
> -	power = I915_READ(MCH_SECP_NRG_STTS);
> -	power = (1000000 * power) >> units; /* convert to uJ */
> -
> -	intel_runtime_pm_put(dev_priv, wakeref);
> +	with_intel_runtime_pm(dev_priv, wakeref)
> +		power = I915_READ(MCH_SECP_NRG_STTS);
>   
> +	power = (1000000 * power) >> units; /* convert to uJ */
>   	seq_printf(m, "%llu", power);
>   
>   	return 0;
> @@ -3268,22 +3257,20 @@ static ssize_t i915_ipc_status_write(struct file *file, const char __user *ubuf,
>   	struct seq_file *m = file->private_data;
>   	struct drm_i915_private *dev_priv = m->private;
>   	intel_wakeref_t wakeref;
> -	int ret;
>   	bool enable;
> +	int ret;
>   
>   	ret = kstrtobool_from_user(ubuf, len, &enable);
>   	if (ret < 0)
>   		return ret;
>   
> -	wakeref = intel_runtime_pm_get(dev_priv);
> -
> -	if (!dev_priv->ipc_enabled && enable)
> -		DRM_INFO("Enabling IPC: WM will be proper only after next commit\n");
> -	dev_priv->wm.distrust_bios_wm = true;
> -	dev_priv->ipc_enabled = enable;
> -	intel_enable_ipc(dev_priv);
> -
> -	intel_runtime_pm_put(dev_priv, wakeref);
> +	with_intel_runtime_pm(dev_priv, wakeref) {
> +		if (!dev_priv->ipc_enabled && enable)
> +			DRM_INFO("Enabling IPC: WM will be proper only after next commit\n");
> +		dev_priv->wm.distrust_bios_wm = true;
> +		dev_priv->ipc_enabled = enable;
> +		intel_enable_ipc(dev_priv);
> +	}
>   
>   	return len;
>   }
> @@ -4127,16 +4114,13 @@ i915_cache_sharing_get(void *data, u64 *val)
>   {
>   	struct drm_i915_private *dev_priv = data;
>   	intel_wakeref_t wakeref;
> -	u32 snpcr;
> +	u32 snpcr = 0;
>   
>   	if (!(IS_GEN6(dev_priv) || IS_GEN7(dev_priv)))
>   		return -ENODEV;
>   
> -	wakeref = intel_runtime_pm_get(dev_priv);
> -
> -	snpcr = I915_READ(GEN6_MBCUNIT_SNPCR);
> -
> -	intel_runtime_pm_put(dev_priv, wakeref);
> +	with_intel_runtime_pm(dev_priv, wakeref)
> +		snpcr = I915_READ(GEN6_MBCUNIT_SNPCR);
>   
>   	*val = (snpcr & GEN6_MBC_SNPCR_MASK) >> GEN6_MBC_SNPCR_SHIFT;
>   
> @@ -4148,7 +4132,6 @@ i915_cache_sharing_set(void *data, u64 val)
>   {
>   	struct drm_i915_private *dev_priv = data;
>   	intel_wakeref_t wakeref;
> -	u32 snpcr;
>   
>   	if (!(IS_GEN6(dev_priv) || IS_GEN7(dev_priv)))
>   		return -ENODEV;
> @@ -4156,16 +4139,17 @@ i915_cache_sharing_set(void *data, u64 val)
>   	if (val > 3)
>   		return -EINVAL;
>   
> -	wakeref = intel_runtime_pm_get(dev_priv);
>   	DRM_DEBUG_DRIVER("Manually setting uncore sharing to %llu\n", val);
> +	with_intel_runtime_pm(dev_priv, wakeref) {
> +		u32 snpcr;
> +
> +		/* Update the cache sharing policy here as well */
> +		snpcr = I915_READ(GEN6_MBCUNIT_SNPCR);
> +		snpcr &= ~GEN6_MBC_SNPCR_MASK;
> +		snpcr |= val << GEN6_MBC_SNPCR_SHIFT;
> +		I915_WRITE(GEN6_MBCUNIT_SNPCR, snpcr);
> +	}
>   
> -	/* Update the cache sharing policy here as well */
> -	snpcr = I915_READ(GEN6_MBCUNIT_SNPCR);
> -	snpcr &= ~GEN6_MBC_SNPCR_MASK;
> -	snpcr |= (val << GEN6_MBC_SNPCR_SHIFT);
> -	I915_WRITE(GEN6_MBCUNIT_SNPCR, snpcr);
> -
> -	intel_runtime_pm_put(dev_priv, wakeref);
>   	return 0;
>   }
>   
> @@ -4402,20 +4386,17 @@ static int i915_sseu_status(struct seq_file *m, void *unused)
>   	sseu.max_eus_per_subslice =
>   		INTEL_INFO(dev_priv)->sseu.max_eus_per_subslice;
>   
> -	wakeref = intel_runtime_pm_get(dev_priv);
> -
> -	if (IS_CHERRYVIEW(dev_priv)) {
> -		cherryview_sseu_device_status(dev_priv, &sseu);
> -	} else if (IS_BROADWELL(dev_priv)) {
> -		broadwell_sseu_device_status(dev_priv, &sseu);
> -	} else if (IS_GEN9(dev_priv)) {
> -		gen9_sseu_device_status(dev_priv, &sseu);
> -	} else if (INTEL_GEN(dev_priv) >= 10) {
> -		gen10_sseu_device_status(dev_priv, &sseu);
> +	with_intel_runtime_pm(dev_priv, wakeref) {
> +		if (IS_CHERRYVIEW(dev_priv))
> +			cherryview_sseu_device_status(dev_priv, &sseu);
> +		else if (IS_BROADWELL(dev_priv))
> +			broadwell_sseu_device_status(dev_priv, &sseu);
> +		else if (IS_GEN9(dev_priv))
> +			gen9_sseu_device_status(dev_priv, &sseu);
> +		else if (INTEL_GEN(dev_priv) >= 10)
> +			gen10_sseu_device_status(dev_priv, &sseu);
>   	}
>   
> -	intel_runtime_pm_put(dev_priv, wakeref);
> -
>   	i915_print_sseu_info(m, false, &sseu);
>   
>   	return 0;
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index b6da68846164..233a65487c3f 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -814,13 +814,13 @@ void i915_gem_flush_ggtt_writes(struct drm_i915_private *dev_priv)
>   
>   	i915_gem_chipset_flush(dev_priv);
>   
> -	wakeref = intel_runtime_pm_get(dev_priv);
> -	spin_lock_irq(&dev_priv->uncore.lock);
> +	with_intel_runtime_pm(dev_priv, wakeref) {
> +		spin_lock_irq(&dev_priv->uncore.lock);
>   
> -	POSTING_READ_FW(RING_HEAD(RENDER_RING_BASE));
> +		POSTING_READ_FW(RING_HEAD(RENDER_RING_BASE));
>   
> -	spin_unlock_irq(&dev_priv->uncore.lock);
> -	intel_runtime_pm_put(dev_priv, wakeref);
> +		spin_unlock_irq(&dev_priv->uncore.lock);
> +	}
>   }
>   
>   static void
> diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
> index 04710d0ecf8c..ef38d09b6ce0 100644
> --- a/drivers/gpu/drm/i915/i915_gem_gtt.c
> +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
> @@ -2720,9 +2720,8 @@ static int ggtt_bind_vma(struct i915_vma *vma,
>   	if (i915_gem_object_is_readonly(obj))
>   		pte_flags |= PTE_READ_ONLY;
>   
> -	wakeref = intel_runtime_pm_get(i915);
> -	vma->vm->insert_entries(vma->vm, vma, cache_level, pte_flags);
> -	intel_runtime_pm_put(i915, wakeref);
> +	with_intel_runtime_pm(i915, wakeref)
> +		vma->vm->insert_entries(vma->vm, vma, cache_level, pte_flags);
>   
>   	vma->page_sizes.gtt = I915_GTT_PAGE_SIZE;
>   
> @@ -2741,9 +2740,8 @@ static void ggtt_unbind_vma(struct i915_vma *vma)
>   	struct drm_i915_private *i915 = vma->vm->i915;
>   	intel_wakeref_t wakeref;
>   
> -	wakeref = intel_runtime_pm_get(i915);
> -	vma->vm->clear_range(vma->vm, vma->node.start, vma->size);
> -	intel_runtime_pm_put(i915, wakeref);
> +	with_intel_runtime_pm(i915, wakeref)
> +		vma->vm->clear_range(vma->vm, vma->node.start, vma->size);
>   }
>   
>   static int aliasing_gtt_bind_vma(struct i915_vma *vma,
> @@ -2777,9 +2775,10 @@ static int aliasing_gtt_bind_vma(struct i915_vma *vma,
>   	if (flags & I915_VMA_GLOBAL_BIND) {
>   		intel_wakeref_t wakeref;
>   
> -		wakeref = intel_runtime_pm_get(i915);
> -		vma->vm->insert_entries(vma->vm, vma, cache_level, pte_flags);
> -		intel_runtime_pm_put(i915, wakeref);
> +		with_intel_runtime_pm(i915, wakeref) {
> +			vma->vm->insert_entries(vma->vm, vma,
> +						cache_level, pte_flags);
> +		}
>   	}
>   
>   	return 0;
> @@ -2790,11 +2789,11 @@ static void aliasing_gtt_unbind_vma(struct i915_vma *vma)
>   	struct drm_i915_private *i915 = vma->vm->i915;
>   
>   	if (vma->flags & I915_VMA_GLOBAL_BIND) {
> +		struct i915_address_space *vm = vma->vm;
>   		intel_wakeref_t wakeref;
>   
> -		wakeref = intel_runtime_pm_get(i915);
> -		vma->vm->clear_range(vma->vm, vma->node.start, vma->size);
> -		intel_runtime_pm_put(i915, wakeref);
> +		with_intel_runtime_pm(i915, wakeref)
> +			vm->clear_range(vm, vma->node.start, vma->size);
>   	}
>   
>   	if (vma->flags & I915_VMA_LOCAL_BIND) {
> diff --git a/drivers/gpu/drm/i915/i915_gem_shrinker.c b/drivers/gpu/drm/i915/i915_gem_shrinker.c
> index f25e4c7c71b1..fe198824dba1 100644
> --- a/drivers/gpu/drm/i915/i915_gem_shrinker.c
> +++ b/drivers/gpu/drm/i915/i915_gem_shrinker.c
> @@ -299,14 +299,14 @@ i915_gem_shrink(struct drm_i915_private *i915,
>   unsigned long i915_gem_shrink_all(struct drm_i915_private *i915)
>   {
>   	intel_wakeref_t wakeref;
> -	unsigned long freed;
> +	unsigned long freed = 0;
>   
> -	wakeref = intel_runtime_pm_get(i915);
> -	freed = i915_gem_shrink(i915, -1UL, NULL,
> -				I915_SHRINK_BOUND |
> -				I915_SHRINK_UNBOUND |
> -				I915_SHRINK_ACTIVE);
> -	intel_runtime_pm_put(i915, wakeref);
> +	with_intel_runtime_pm(i915, wakeref) {
> +		freed = i915_gem_shrink(i915, -1UL, NULL,
> +					I915_SHRINK_BOUND |
> +					I915_SHRINK_UNBOUND |
> +					I915_SHRINK_ACTIVE);
> +	}
>   
>   	return freed;
>   }
> @@ -379,14 +379,14 @@ i915_gem_shrinker_scan(struct shrinker *shrinker, struct shrink_control *sc)
>   	if (sc->nr_scanned < sc->nr_to_scan && current_is_kswapd()) {
>   		intel_wakeref_t wakeref;
>   
> -		wakeref = intel_runtime_pm_get(i915);
> -		freed += i915_gem_shrink(i915,
> -					 sc->nr_to_scan - sc->nr_scanned,
> -					 &sc->nr_scanned,
> -					 I915_SHRINK_ACTIVE |
> -					 I915_SHRINK_BOUND |
> -					 I915_SHRINK_UNBOUND);
> -		intel_runtime_pm_put(i915, wakeref);
> +		with_intel_runtime_pm(i915, wakeref) {
> +			freed += i915_gem_shrink(i915,
> +						 sc->nr_to_scan - sc->nr_scanned,
> +						 &sc->nr_scanned,
> +						 I915_SHRINK_ACTIVE |
> +						 I915_SHRINK_BOUND |
> +						 I915_SHRINK_UNBOUND);
> +		}
>   	}
>   
>   	shrinker_unlock(i915, unlock);
> @@ -483,13 +483,13 @@ i915_gem_shrinker_vmap(struct notifier_block *nb, unsigned long event, void *ptr
>   	if (ret)
>   		goto out;
>   
> -	wakeref = intel_runtime_pm_get(i915);
> -	freed_pages += i915_gem_shrink(i915, -1UL, NULL,
> -				       I915_SHRINK_BOUND |
> -				       I915_SHRINK_UNBOUND |
> -				       I915_SHRINK_ACTIVE |
> -				       I915_SHRINK_VMAPS);
> -	intel_runtime_pm_put(i915, wakeref);
> +	with_intel_runtime_pm(i915, wakeref) {
> +		freed_pages += i915_gem_shrink(i915, -1UL, NULL,
> +					       I915_SHRINK_BOUND |
> +					       I915_SHRINK_UNBOUND |
> +					       I915_SHRINK_ACTIVE |
> +					       I915_SHRINK_VMAPS);
> +	}
>   
>   	/* We also want to clear any cached iomaps as they wrap vmap */
>   	list_for_each_entry_safe(vma, next,
> diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c
> index 3d43fc9dd25d..b1cb2d3cae16 100644
> --- a/drivers/gpu/drm/i915/i915_pmu.c
> +++ b/drivers/gpu/drm/i915/i915_pmu.c
> @@ -230,14 +230,11 @@ frequency_sample(struct drm_i915_private *dev_priv, unsigned int period_ns)
>   
>   		val = dev_priv->gt_pm.rps.cur_freq;
>   		if (dev_priv->gt.awake) {
> -			intel_wakeref_t wakeref =
> -				intel_runtime_pm_get_if_in_use(dev_priv);
> +			intel_wakeref_t wakeref;
>   
> -			if (wakeref) {
> +			with_intel_runtime_pm_if_in_use(dev_priv, wakeref)
>   				val = intel_get_cagf(dev_priv,
>   						     I915_READ_NOTRACE(GEN6_RPSTAT1));
> -				intel_runtime_pm_put(dev_priv, wakeref);
> -			}
>   		}
>   
>   		add_sample_mult(&dev_priv->pmu.sample[__I915_SAMPLE_FREQ_ACT],
> diff --git a/drivers/gpu/drm/i915/i915_sysfs.c b/drivers/gpu/drm/i915/i915_sysfs.c
> index a959aee208de..e126196413a4 100644
> --- a/drivers/gpu/drm/i915/i915_sysfs.c
> +++ b/drivers/gpu/drm/i915/i915_sysfs.c
> @@ -43,11 +43,10 @@ static u32 calc_residency(struct drm_i915_private *dev_priv,
>   			  i915_reg_t reg)
>   {
>   	intel_wakeref_t wakeref;
> -	u64 res;
> +	u64 res = 0;
>   
> -	wakeref = intel_runtime_pm_get(dev_priv);
> -	res = intel_rc6_residency_us(dev_priv, reg);
> -	intel_runtime_pm_put(dev_priv, wakeref);
> +	with_intel_runtime_pm(dev_priv, wakeref)
> +		res = intel_rc6_residency_us(dev_priv, reg);
>   
>   	return DIV_ROUND_CLOSEST_ULL(res, 1000);
>   }
> diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h
> index 5d88b63a3d6b..fd85a937b237 100644
> --- a/drivers/gpu/drm/i915/intel_drv.h
> +++ b/drivers/gpu/drm/i915/intel_drv.h
> @@ -2062,6 +2062,14 @@ intel_wakeref_t intel_runtime_pm_get(struct drm_i915_private *i915);
>   intel_wakeref_t intel_runtime_pm_get_if_in_use(struct drm_i915_private *i915);
>   intel_wakeref_t intel_runtime_pm_get_noresume(struct drm_i915_private *i915);
>   
> +#define with_intel_runtime_pm(i915, wf) \
> +	for (wf = intel_runtime_pm_get(i915); wf; \
> +	     intel_runtime_pm_put(i915, wf), wf = 0)
> +
> +#define with_intel_runtime_pm_if_in_use(i915, wf) \
> +	for (wf = intel_runtime_pm_get_if_in_use(i915); wf; \
> +	     intel_runtime_pm_put(i915, wf), wf = 0)
> +
>   void intel_runtime_pm_put_unchecked(struct drm_i915_private *i915);
>   #if IS_ENABLED(CONFIG_DRM_I915_DEBUG_RUNTIME_PM)
>   void intel_runtime_pm_put(struct drm_i915_private *i915, intel_wakeref_t wref);
> diff --git a/drivers/gpu/drm/i915/intel_guc_log.c b/drivers/gpu/drm/i915/intel_guc_log.c
> index 20c0b36d748e..b53582c0c6c1 100644
> --- a/drivers/gpu/drm/i915/intel_guc_log.c
> +++ b/drivers/gpu/drm/i915/intel_guc_log.c
> @@ -444,9 +444,8 @@ static void guc_log_capture_logs(struct intel_guc_log *log)
>   	 * Generally device is expected to be active only at this
>   	 * time, so get/put should be really quick.
>   	 */
> -	wakeref = intel_runtime_pm_get(dev_priv);
> -	guc_action_flush_log_complete(guc);
> -	intel_runtime_pm_put(dev_priv, wakeref);
> +	with_intel_runtime_pm(dev_priv, wakeref)
> +		guc_action_flush_log_complete(guc);
>   }
>   
>   int intel_guc_log_create(struct intel_guc_log *log)
> @@ -507,7 +506,7 @@ int intel_guc_log_set_level(struct intel_guc_log *log, u32 level)
>   	struct intel_guc *guc = log_to_guc(log);
>   	struct drm_i915_private *dev_priv = guc_to_i915(guc);
>   	intel_wakeref_t wakeref;
> -	int ret;
> +	int ret = 0;
>   
>   	BUILD_BUG_ON(GUC_LOG_VERBOSITY_MIN != 0);
>   	GEM_BUG_ON(!log->vma);
> @@ -521,16 +520,14 @@ int intel_guc_log_set_level(struct intel_guc_log *log, u32 level)
>   
>   	mutex_lock(&dev_priv->drm.struct_mutex);
>   
> -	if (log->level == level) {
> -		ret = 0;
> +	if (log->level == level)
>   		goto out_unlock;
> -	}
>   
> -	wakeref = intel_runtime_pm_get(dev_priv);
> -	ret = guc_action_control_log(guc, GUC_LOG_LEVEL_IS_VERBOSE(level),
> -				     GUC_LOG_LEVEL_IS_ENABLED(level),
> -				     GUC_LOG_LEVEL_TO_VERBOSITY(level));
> -	intel_runtime_pm_put(dev_priv, wakeref);
> +	with_intel_runtime_pm(dev_priv, wakeref)
> +		ret = guc_action_control_log(guc,
> +					     GUC_LOG_LEVEL_IS_VERBOSE(level),
> +					     GUC_LOG_LEVEL_IS_ENABLED(level),
> +					     GUC_LOG_LEVEL_TO_VERBOSITY(level));
>   	if (ret) {
>   		DRM_DEBUG_DRIVER("guc_log_control action failed %d\n", ret);
>   		goto out_unlock;
> @@ -611,9 +608,8 @@ void intel_guc_log_relay_flush(struct intel_guc_log *log)
>   	 */
>   	flush_work(&log->relay.flush_work);
>   
> -	wakeref = intel_runtime_pm_get(i915);
> -	guc_action_flush_log(guc);
> -	intel_runtime_pm_put(i915, wakeref);
> +	with_intel_runtime_pm(i915, wakeref)
> +		guc_action_flush_log(guc);
>   
>   	/* GuC would have updated log buffer by now, so capture it */
>   	guc_log_capture_logs(log);
> diff --git a/drivers/gpu/drm/i915/intel_huc.c b/drivers/gpu/drm/i915/intel_huc.c
> index a61c944c3036..fb69cf195253 100644
> --- a/drivers/gpu/drm/i915/intel_huc.c
> +++ b/drivers/gpu/drm/i915/intel_huc.c
> @@ -115,14 +115,13 @@ int intel_huc_check_status(struct intel_huc *huc)
>   {
>   	struct drm_i915_private *dev_priv = huc_to_i915(huc);
>   	intel_wakeref_t wakeref;
> -	u32 status;
> +	u32 status = 0;
>   
>   	if (!HAS_HUC(dev_priv))
>   		return -ENODEV;
>   
> -	wakeref = intel_runtime_pm_get(dev_priv);
> -	status = I915_READ(HUC_STATUS2) & HUC_FW_VERIFIED;
> -	intel_runtime_pm_put(dev_priv, wakeref);
> +	with_intel_runtime_pm(dev_priv, wakeref)
> +		status = I915_READ(HUC_STATUS2) & HUC_FW_VERIFIED;
>   
>   	return status;
>   }
> diff --git a/drivers/gpu/drm/i915/intel_panel.c b/drivers/gpu/drm/i915/intel_panel.c
> index e57c42323b17..d36fccaccefa 100644
> --- a/drivers/gpu/drm/i915/intel_panel.c
> +++ b/drivers/gpu/drm/i915/intel_panel.c
> @@ -1204,17 +1204,19 @@ static int intel_backlight_device_get_brightness(struct backlight_device *bd)
>   	struct drm_device *dev = connector->base.dev;
>   	struct drm_i915_private *dev_priv = to_i915(dev);
>   	intel_wakeref_t wakeref;
> -	u32 hw_level;
> -	int ret;
> +	int ret = 0;
>   
> -	wakeref = intel_runtime_pm_get(dev_priv);
> -	drm_modeset_lock(&dev->mode_config.connection_mutex, NULL);
> +	with_intel_runtime_pm(dev_priv, wakeref) {
> +		u32 hw_level;
>   
> -	hw_level = intel_panel_get_backlight(connector);
> -	ret = scale_hw_to_user(connector, hw_level, bd->props.max_brightness);
> +		drm_modeset_lock(&dev->mode_config.connection_mutex, NULL);
>   
> -	drm_modeset_unlock(&dev->mode_config.connection_mutex);
> -	intel_runtime_pm_put(dev_priv, wakeref);
> +		hw_level = intel_panel_get_backlight(connector);
> +		ret = scale_hw_to_user(connector,
> +				       hw_level, bd->props.max_brightness);
> +
> +		drm_modeset_unlock(&dev->mode_config.connection_mutex);
> +	}
>   
>   	return ret;
>   }
> diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c
> index 5b842318edbc..e611dfb2543e 100644
> --- a/drivers/gpu/drm/i915/intel_uncore.c
> +++ b/drivers/gpu/drm/i915/intel_uncore.c
> @@ -1696,21 +1696,21 @@ int i915_reg_read_ioctl(struct drm_device *dev,
>   
>   	flags = reg->offset & (entry->size - 1);
>   
> -	wakeref = intel_runtime_pm_get(dev_priv);
> -	if (entry->size == 8 && flags == I915_REG_READ_8B_WA)
> -		reg->val = I915_READ64_2x32(entry->offset_ldw,
> -					    entry->offset_udw);
> -	else if (entry->size == 8 && flags == 0)
> -		reg->val = I915_READ64(entry->offset_ldw);
> -	else if (entry->size == 4 && flags == 0)
> -		reg->val = I915_READ(entry->offset_ldw);
> -	else if (entry->size == 2 && flags == 0)
> -		reg->val = I915_READ16(entry->offset_ldw);
> -	else if (entry->size == 1 && flags == 0)
> -		reg->val = I915_READ8(entry->offset_ldw);
> -	else
> -		ret = -EINVAL;
> -	intel_runtime_pm_put(dev_priv, wakeref);
> +	with_intel_runtime_pm(dev_priv, wakeref) {
> +		if (entry->size == 8 && flags == I915_REG_READ_8B_WA)
> +			reg->val = I915_READ64_2x32(entry->offset_ldw,
> +						    entry->offset_udw);
> +		else if (entry->size == 8 && flags == 0)
> +			reg->val = I915_READ64(entry->offset_ldw);
> +		else if (entry->size == 4 && flags == 0)
> +			reg->val = I915_READ(entry->offset_ldw);
> +		else if (entry->size == 2 && flags == 0)
> +			reg->val = I915_READ16(entry->offset_ldw);
> +		else if (entry->size == 1 && flags == 0)
> +			reg->val = I915_READ8(entry->offset_ldw);
> +		else
> +			ret = -EINVAL;
> +	}
>   
>   	return ret;
>   }
> diff --git a/drivers/gpu/drm/i915/selftests/i915_gem.c b/drivers/gpu/drm/i915/selftests/i915_gem.c
> index 896f0b37b34c..8db2720b8028 100644
> --- a/drivers/gpu/drm/i915/selftests/i915_gem.c
> +++ b/drivers/gpu/drm/i915/selftests/i915_gem.c
> @@ -98,26 +98,22 @@ static void pm_suspend(struct drm_i915_private *i915)
>   {
>   	intel_wakeref_t wakeref;
>   
> -	wakeref = intel_runtime_pm_get(i915);
> -
> -	i915_gem_suspend_gtt_mappings(i915);
> -	i915_gem_suspend_late(i915);
> -
> -	intel_runtime_pm_put(i915, wakeref);
> +	with_intel_runtime_pm(i915, wakeref) {
> +		i915_gem_suspend_gtt_mappings(i915);
> +		i915_gem_suspend_late(i915);
> +	}
>   }
>   
>   static void pm_hibernate(struct drm_i915_private *i915)
>   {
>   	intel_wakeref_t wakeref;
>   
> -	wakeref = intel_runtime_pm_get(i915);
> -
> -	i915_gem_suspend_gtt_mappings(i915);
> -
> -	i915_gem_freeze(i915);
> -	i915_gem_freeze_late(i915);
> +	with_intel_runtime_pm(i915, wakeref) {
> +		i915_gem_suspend_gtt_mappings(i915);
>   
> -	intel_runtime_pm_put(i915, wakeref);
> +		i915_gem_freeze(i915);
> +		i915_gem_freeze_late(i915);
> +	}
>   }
>   
>   static void pm_resume(struct drm_i915_private *i915)
> @@ -128,13 +124,11 @@ static void pm_resume(struct drm_i915_private *i915)
>   	 * Both suspend and hibernate follow the same wakeup path and assume
>   	 * that runtime-pm just works.
>   	 */
> -	wakeref = intel_runtime_pm_get(i915);
> -
> -	intel_engines_sanitize(i915);
> -	i915_gem_sanitize(i915);
> -	i915_gem_resume(i915);
> -
> -	intel_runtime_pm_put(i915, wakeref);
> +	with_intel_runtime_pm(i915, wakeref) {
> +		intel_engines_sanitize(i915);
> +		i915_gem_sanitize(i915);
> +		i915_gem_resume(i915);
> +	}
>   }
>   
>   static int igt_gem_suspend(void *arg)
> diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/selftests/i915_gem_context.c
> index e5db71e3991d..c7c891a287f5 100644
> --- a/drivers/gpu/drm/i915/selftests/i915_gem_context.c
> +++ b/drivers/gpu/drm/i915/selftests/i915_gem_context.c
> @@ -580,9 +580,9 @@ static int igt_ctx_exec(void *arg)
>   				}
>   			}
>   
> -			wakeref = intel_runtime_pm_get(i915);
> -			err = gpu_fill(obj, ctx, engine, dw);
> -			intel_runtime_pm_put(i915, wakeref);
> +			err = 0;
> +			with_intel_runtime_pm(i915, wakeref)
> +				err = gpu_fill(obj, ctx, engine, dw);
>   			if (err) {
>   				pr_err("Failed to fill dword %lu [%lu/%lu] with gpu (%s) in ctx %u [full-ppgtt? %s], err=%d\n",
>   				       ndwords, dw, max_dwords(obj),
> @@ -683,9 +683,9 @@ static int igt_ctx_readonly(void *arg)
>   					i915_gem_object_set_readonly(obj);
>   			}
>   
> -			wakeref = intel_runtime_pm_get(i915);
> -			err = gpu_fill(obj, ctx, engine, dw);
> -			intel_runtime_pm_put(i915, wakeref);
> +			err = 0;
> +			with_intel_runtime_pm(i915, wakeref)
> +				err = gpu_fill(obj, ctx, engine, dw);
>   			if (err) {
>   				pr_err("Failed to fill dword %lu [%lu/%lu] with gpu (%s) in ctx %u [full-ppgtt? %s], err=%d\n",
>   				       ndwords, dw, max_dwords(obj),
> diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_object.c b/drivers/gpu/drm/i915/selftests/i915_gem_object.c
> index 13d4dc5e8dfb..572016a45a59 100644
> --- a/drivers/gpu/drm/i915/selftests/i915_gem_object.c
> +++ b/drivers/gpu/drm/i915/selftests/i915_gem_object.c
> @@ -506,9 +506,8 @@ static void disable_retire_worker(struct drm_i915_private *i915)
>   	if (!i915->gt.active_requests++) {
>   		intel_wakeref_t wakeref;
>   
> -		wakeref = intel_runtime_pm_get(i915);
> -		i915_gem_unpark(i915);
> -		intel_runtime_pm_put(i915, wakeref);
> +		with_intel_runtime_pm(i915, wakeref)
> +			i915_gem_unpark(i915);
>   	}
>   	mutex_unlock(&i915->drm.struct_mutex);
>   
> @@ -590,10 +589,10 @@ static int igt_mmap_offset_exhaustion(void *arg)
>   			goto out;
>   		}
>   
> +		err = 0;
>   		mutex_lock(&i915->drm.struct_mutex);
> -		wakeref = intel_runtime_pm_get(i915);
> -		err = make_obj_busy(obj);
> -		intel_runtime_pm_put(i915, wakeref);
> +		with_intel_runtime_pm(i915, wakeref)
> +			err = make_obj_busy(obj);
>   		mutex_unlock(&i915->drm.struct_mutex);
>   		if (err) {
>   			pr_err("[loop %d] Failed to busy the object\n", loop);
> diff --git a/drivers/gpu/drm/i915/selftests/intel_workarounds.c b/drivers/gpu/drm/i915/selftests/intel_workarounds.c
> index 0db63eb24dfc..e2b542c6bfb1 100644
> --- a/drivers/gpu/drm/i915/selftests/intel_workarounds.c
> +++ b/drivers/gpu/drm/i915/selftests/intel_workarounds.c
> @@ -45,9 +45,9 @@ read_nonprivs(struct i915_gem_context *ctx, struct intel_engine_cs *engine)
>   	if (err)
>   		goto err_obj;
>   
> -	wakeref = intel_runtime_pm_get(engine->i915);
> -	rq = i915_request_alloc(engine, ctx);
> -	intel_runtime_pm_put(engine->i915, wakeref);
> +	rq = ERR_PTR(-ENODEV);
> +	with_intel_runtime_pm(engine->i915, wakeref)
> +		rq = i915_request_alloc(engine, ctx);
>   	if (IS_ERR(rq)) {
>   		err = PTR_ERR(rq);
>   		goto err_pin;
> @@ -179,9 +179,9 @@ static int switch_to_scratch_context(struct intel_engine_cs *engine)
>   	if (IS_ERR(ctx))
>   		return PTR_ERR(ctx);
>   
> -	wakeref = intel_runtime_pm_get(engine->i915);
> -	rq = i915_request_alloc(engine, ctx);
> -	intel_runtime_pm_put(engine->i915, wakeref);
> +	rq = ERR_PTR(-ENODEV);
> +	with_intel_runtime_pm(engine->i915, wakeref)
> +		rq = i915_request_alloc(engine, ctx);
>   
>   	kernel_context_close(ctx);
>   	if (IS_ERR(rq))
> 
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 106+ messages in thread

* Re: [PATCH 29/40] drm/i915: Differentiate between ggtt->mutex and ppgtt->mutex
  2018-09-19 19:55 ` [PATCH 29/40] drm/i915: Differentiate between ggtt->mutex and ppgtt->mutex Chris Wilson
@ 2018-09-24 13:04   ` Tvrtko Ursulin
  0 siblings, 0 replies; 106+ messages in thread
From: Tvrtko Ursulin @ 2018-09-24 13:04 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 19/09/2018 20:55, Chris Wilson wrote:
> We have two classes of VM, global GTT and per-process GTT. In order to
> allow ourselves the freedom to mix both along call chains, distinguish
> the two classes with regards to their mutex and lockdep maps.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>   drivers/gpu/drm/i915/i915_gem_gtt.c       | 10 +++++-----
>   drivers/gpu/drm/i915/i915_gem_gtt.h       |  2 ++
>   drivers/gpu/drm/i915/selftests/mock_gtt.c |  6 +++---
>   3 files changed, 10 insertions(+), 8 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
> index ef38d09b6ce0..719e1ac212c1 100644
> --- a/drivers/gpu/drm/i915/i915_gem_gtt.c
> +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
> @@ -523,8 +523,7 @@ static void vm_free_page(struct i915_address_space *vm, struct page *page)
>   	spin_unlock(&vm->free_pages.lock);
>   }
>   
> -static void i915_address_space_init(struct i915_address_space *vm,
> -				    struct drm_i915_private *dev_priv)
> +static void i915_address_space_init(struct i915_address_space *vm, int subclass)
>   {
>   	/*
>   	 * The vm->mutex must be reclaim safe (for use in the shrinker).
> @@ -532,6 +531,7 @@ static void i915_address_space_init(struct i915_address_space *vm,
>   	 * attempt holding the lock is immediately reported by lockdep.
>   	 */
>   	mutex_init(&vm->mutex);
> +	lockdep_set_subclass(&vm->mutex, subclass);
>   	i915_gem_shrinker_taints_mutex(&vm->mutex);
>   
>   	GEM_BUG_ON(!vm->total);
> @@ -1658,7 +1658,7 @@ static struct i915_hw_ppgtt *gen8_ppgtt_create(struct drm_i915_private *i915)
>   	 */
>   	ppgtt->vm.has_read_only = !intel_vgpu_active(i915);
>   
> -	i915_address_space_init(&ppgtt->vm, i915);
> +	i915_address_space_init(&ppgtt->vm, VM_CLASS_PPGTT);
>   
>   	/* There are only few exceptions for gen >=6. chv and bxt.
>   	 * And we are not sure about the latter so play safe for now.
> @@ -2165,7 +2165,7 @@ static struct i915_hw_ppgtt *gen6_ppgtt_create(struct drm_i915_private *i915)
>   
>   	ppgtt->base.vm.total = I915_PDES * GEN6_PTES * I915_GTT_PAGE_SIZE;
>   
> -	i915_address_space_init(&ppgtt->base.vm, i915);
> +	i915_address_space_init(&ppgtt->base.vm, VM_CLASS_PPGTT);
>   
>   	ppgtt->base.vm.allocate_va_range = gen6_alloc_va_range;
>   	ppgtt->base.vm.clear_range = gen6_ppgtt_clear_range;
> @@ -3607,7 +3607,7 @@ int i915_ggtt_init_hw(struct drm_i915_private *dev_priv)
>   	 * and beyond the end of the GTT if we do not provide a guard.
>   	 */
>   	mutex_lock(&dev_priv->drm.struct_mutex);
> -	i915_address_space_init(&ggtt->vm, dev_priv);
> +	i915_address_space_init(&ggtt->vm, VM_CLASS_GGTT);
>   
>   	ggtt->vm.is_ggtt = true;
>   
> diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h
> index 7e2af5f4f39b..849a1f67b037 100644
> --- a/drivers/gpu/drm/i915/i915_gem_gtt.h
> +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h
> @@ -286,6 +286,8 @@ struct i915_address_space {
>   	bool closed;
>   
>   	struct mutex mutex; /* protects vma and our lists */
> +#define VM_CLASS_GGTT 0
> +#define VM_CLASS_PPGTT 1
>   
>   	struct i915_page_dma scratch_page;
>   	struct i915_page_table *scratch_pt;
> diff --git a/drivers/gpu/drm/i915/selftests/mock_gtt.c b/drivers/gpu/drm/i915/selftests/mock_gtt.c
> index 6ae418c76015..976c862b3842 100644
> --- a/drivers/gpu/drm/i915/selftests/mock_gtt.c
> +++ b/drivers/gpu/drm/i915/selftests/mock_gtt.c
> @@ -70,7 +70,7 @@ mock_ppgtt(struct drm_i915_private *i915,
>   	ppgtt->vm.total = round_down(U64_MAX, PAGE_SIZE);
>   	ppgtt->vm.file = ERR_PTR(-ENODEV);
>   
> -	i915_address_space_init(&ppgtt->vm, i915);
> +	i915_address_space_init(&ppgtt->vm, VM_CLASS_PPGTT);
>   
>   	ppgtt->vm.clear_range = nop_clear_range;
>   	ppgtt->vm.insert_page = mock_insert_page;
> @@ -102,6 +102,7 @@ void mock_init_ggtt(struct drm_i915_private *i915)
>   	struct i915_ggtt *ggtt = &i915->ggtt;
>   
>   	ggtt->vm.i915 = i915;
> +	ggtt->vm.is_ggtt = true;
>   
>   	ggtt->gmadr = (struct resource) DEFINE_RES_MEM(0, 2048 * PAGE_SIZE);
>   	ggtt->mappable_end = resource_size(&ggtt->gmadr);
> @@ -117,9 +118,8 @@ void mock_init_ggtt(struct drm_i915_private *i915)
>   	ggtt->vm.vma_ops.set_pages   = ggtt_set_pages;
>   	ggtt->vm.vma_ops.clear_pages = clear_pages;
>   
> -	i915_address_space_init(&ggtt->vm, i915);
>   
> -	ggtt->vm.is_ggtt = true;
> +	i915_address_space_init(&ggtt->vm, VM_CLASS_GGTT);
>   }
>   
>   void mock_fini_ggtt(struct drm_i915_private *i915)
> 

Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 106+ messages in thread

* Re: [PATCH 32/40] drm/i915: Introduce the i915_user_extension_method
  2018-09-19 19:55 ` [PATCH 32/40] drm/i915: Introduce the i915_user_extension_method Chris Wilson
@ 2018-09-24 13:20   ` Tvrtko Ursulin
  0 siblings, 0 replies; 106+ messages in thread
From: Tvrtko Ursulin @ 2018-09-24 13:20 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 19/09/2018 20:55, Chris Wilson wrote:
> An idea for extending uABI inspired by Vulkan's extension chains.
> Instead of expanding the data struct for each ioctl every time we need
> to add a new feature, define an extension chain instead. As we add
> optional interfaces to control the ioctl, we define a new extension
> struct that can be linked into the ioctl data only when required by the
> user. The key advantage being able to ignore large control structs for
> optional interfaces/extensions, while being able to process them in a
> consistent manner.
> 
> In comparison to other extensible ioctls, the key difference is the
> use of a linked chain of extension structs vs an array of tagged
> pointers. For example,
> 
> struct drm_amdgpu_cs_chunk {
>          __u32           chunk_id;
>          __u32           length_dw;
>          __u64           chunk_data;
> };
> 
> struct drm_amdgpu_cs_in {
>          __u32           ctx_id;
>          __u32           bo_list_handle;
>          __u32           num_chunks;
>          __u32           _pad;
>          __u64           chunks;
> };

Since verbose example of what this is better from, or at least different 
than, I think an example of itself in the commit message would be preferred.

> 
> allows userspace to pass in array of pointers to extension structs, but
> must therefore keep constructing that array along side the command stream.
> In dynamic situations like that, a linked list is preferred and does not
> similar from extra cache line misses as the extension structs themselves

s/similar/suffer/ ?

> must still be loaded separate to the chunks array.
> 
> v2: Apply the tail call optimisation directly to nip the worry of stack
> overflow in the bud.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>   drivers/gpu/drm/i915/Makefile               |  1 +
>   drivers/gpu/drm/i915/i915_user_extensions.c | 37 +++++++++++++++++++++
>   drivers/gpu/drm/i915/i915_user_extensions.h | 20 +++++++++++
>   include/uapi/drm/i915_drm.h                 | 20 +++++++++++
>   4 files changed, 78 insertions(+)
>   create mode 100644 drivers/gpu/drm/i915/i915_user_extensions.c
>   create mode 100644 drivers/gpu/drm/i915/i915_user_extensions.h
> 
> diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
> index 4654ef481b45..6ffeb60392f5 100644
> --- a/drivers/gpu/drm/i915/Makefile
> +++ b/drivers/gpu/drm/i915/Makefile
> @@ -45,6 +45,7 @@ i915-y := i915_drv.o \
>   	  i915_syncmap.o \
>   	  i915_sw_fence.o \
>   	  i915_sysfs.o \
> +	  i915_user_extensions.o \
>   	  intel_csr.o \
>   	  intel_device_info.o \
>   	  intel_pm.o \
> diff --git a/drivers/gpu/drm/i915/i915_user_extensions.c b/drivers/gpu/drm/i915/i915_user_extensions.c
> new file mode 100644
> index 000000000000..00622f8ac468
> --- /dev/null
> +++ b/drivers/gpu/drm/i915/i915_user_extensions.c
> @@ -0,0 +1,37 @@
> +/*
> + * SPDX-License-Identifier: MIT
> + *
> + * Copyright © 2018 Intel Corporation
> + */
> +
> +#include <linux/uaccess.h>
> +#include <uapi/drm/i915_drm.h>
> +
> +#include "i915_user_extensions.h"
> +
> +int i915_user_extensions(struct i915_user_extension __user *ext,
> +			 const i915_user_extension_fn *tbl,
> +			 unsigned long count,
> +			 void *data)

Should data be called base or something like that? To signify it is not 
per-extension but points back to the base ioctl?

> +{
> +	while (ext) {
> +		int err;
> +		u64 x;

s/x/name/ or id?

> +
> +		if (get_user(x, &ext->name))
> +			return -EFAULT;
> +
> +		err = -EINVAL;
> +		if (x < count && tbl[x])
> +			err = tbl[x](ext, data);
> +		if (err)
> +			return err;

Stopping on unknown extension or error, hm.. probably makes sense given 
the context of extension use cases. Which is not like for instance the 
list of query ioctls, but something really specific to each individual 
base ioctl.

> +
> +		if (get_user(x, &ext->next_extension))
> +			return -EFAULT;
> +
> +		ext = u64_to_user_ptr(x);
> +	}
> +
> +	return 0;
> +}
> diff --git a/drivers/gpu/drm/i915/i915_user_extensions.h b/drivers/gpu/drm/i915/i915_user_extensions.h
> new file mode 100644
> index 000000000000..313a510b068a
> --- /dev/null
> +++ b/drivers/gpu/drm/i915/i915_user_extensions.h
> @@ -0,0 +1,20 @@
> +/*
> + * SPDX-License-Identifier: MIT
> + *
> + * Copyright © 2018 Intel Corporation
> + */
> +
> +#ifndef I915_USER_EXTENSIONS_H
> +#define I915_USER_EXTENSIONS_H
> +
> +struct i915_user_extension;
> +
> +typedef int (*i915_user_extension_fn)(struct i915_user_extension __user *ext,
> +				      void *data);
> +
> +int i915_user_extensions(struct i915_user_extension __user *ext,
> +			 const i915_user_extension_fn *tbl,
> +			 unsigned long count,
> +			 void *data);
> +
> +#endif /* I915_USER_EXTENSIONS_H */
> diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
> index a4446f452040..4fa7e54501cc 100644
> --- a/include/uapi/drm/i915_drm.h
> +++ b/include/uapi/drm/i915_drm.h
> @@ -62,6 +62,26 @@ extern "C" {
>   #define I915_ERROR_UEVENT		"ERROR"
>   #define I915_RESET_UEVENT		"RESET"
>   
> +/*
> + * i915_user_extension: Base class for defining a chain of extensions
> + *
> + * Many interfaces need to grow over time. In most cases we can simply
> + * extend the struct and have userspace pass in more data. Another option,
> + * as demonstrated by Vulkan's approach to providing extensions for forward
> + * and backward compatibilty, is to use a list of optional structs to
> + * provide those extra details.
> + *
> + * The key advantage to using an extension chain is that it allows us to
> + * redefine the interface more easily than an ever growing struct of
> + * increasing complexity, and for large parts of that interface to be
> + * entirely optional. The downside is more pointer chasing; chasing across
> + * the __user boundary with pointers encapsulated inside u64.
> + */
> +struct i915_user_extension {
> +	__u64 next_extension;

next_extension_ptr ?

> +	__u64 name;

Would id be better instead of name? Name is too much a string to me.

Regards,

Tvrtko

> +};
> +
>   /*
>    * MOCS indexes used for GPU surfaces, defining the cacheability of the
>    * surface data and the coherency for this data wrt. CPU vs. GPU accesses.
> 
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 106+ messages in thread

* Re: [PATCH 33/40] drm/i915: Extend CREATE_CONTEXT to allow inheritance ala clone()
  2018-09-19 19:55 ` [PATCH 33/40] drm/i915: Extend CREATE_CONTEXT to allow inheritance ala clone() Chris Wilson
@ 2018-09-24 17:22   ` Tvrtko Ursulin
  0 siblings, 0 replies; 106+ messages in thread
From: Tvrtko Ursulin @ 2018-09-24 17:22 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 19/09/2018 20:55, Chris Wilson wrote:
> A context encompasses the driver's view of process related state, and
> encapsulates the logical GPU state where available. Each context is
> currently equivalent to a process in CPU terms. Like with processes,
> sometimes the user wants a lighter encapsulation that shares some state
> with the parent process, for example two threads have unique register
> state but share the virtual memory mappings. We can support exactly the
> same principle using contexts where we may share the GTT but keep the
> logical GPU state distinct. This allows quicker switching between those
> contexts, and for userspace to allocate a single offset in the GTT and
> use it across multiple contexts. Like with clone(), in the future we may
> wish to allow userspace to select more features to copy across from the
> parent, but for now we only allow sharing of the GTT.
> 
> Note that if full per-process GTT is not supported on the harder, the

s/harder/hardware/

> GTT are already implicitly shared between contexts, and this request
> to create contexts with shared GTT fails. With full ppGTT, every fd
> (i.e. every process) is allocated a unique GTT so this request cannot be
> used to share GTT between processes/fds, it can only share GTT belonging
> to this fd.
> 
> Testcase: igt/gem_ctx_shared
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
> Cc: Michał Winiarski <michal.winiarski@intel.com>
> ---
>   drivers/gpu/drm/i915/i915_gem_context.c       |  62 ++++-
>   drivers/gpu/drm/i915/i915_gem_gtt.c           |  19 +-
>   drivers/gpu/drm/i915/i915_gem_gtt.h           |  14 +-
>   drivers/gpu/drm/i915/selftests/huge_pages.c   |   1 -
>   .../gpu/drm/i915/selftests/i915_gem_context.c | 252 +++++++++++++-----
>   drivers/gpu/drm/i915/selftests/i915_gem_gtt.c |   1 -
>   drivers/gpu/drm/i915/selftests/mock_context.c |   2 +-
>   drivers/gpu/drm/i915/selftests/mock_gtt.c     |   2 +
>   include/uapi/drm/i915_drm.h                   |  11 +-
>   9 files changed, 279 insertions(+), 85 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
> index 150d7a6b2bd3..da2ac10f8e8a 100644
> --- a/drivers/gpu/drm/i915/i915_gem_context.c
> +++ b/drivers/gpu/drm/i915/i915_gem_context.c
> @@ -110,6 +110,8 @@ static void lut_close(struct i915_gem_context *ctx)
>   		struct i915_vma *vma = rcu_dereference_raw(*slot);
>   
>   		radix_tree_iter_delete(&ctx->handles_vma, &iter, slot);
> +
> +		vma->open_count--;
>   		__i915_gem_object_release_unless_active(vma->obj);
>   	}
>   	rcu_read_unlock();
> @@ -292,7 +294,7 @@ static void context_close(struct i915_gem_context *ctx)
>   	 */
>   	lut_close(ctx);
>   	if (ctx->ppgtt)
> -		i915_ppgtt_close(&ctx->ppgtt->vm);
> +		i915_ppgtt_close(ctx->ppgtt);
>   
>   	ctx->file_priv = ERR_PTR(-EBADF);
>   	i915_gem_context_put(ctx);
> @@ -399,9 +401,12 @@ static void __destroy_hw_context(struct i915_gem_context *ctx,
>   	context_close(ctx);
>   }
>   
> +#define CREATE_VM BIT(0)
> +
>   static struct i915_gem_context *
>   i915_gem_create_context(struct drm_i915_private *dev_priv,
> -			struct drm_i915_file_private *file_priv)
> +			struct drm_i915_file_private *file_priv,
> +			unsigned int flags)
>   {
>   	struct i915_gem_context *ctx;
>   
> @@ -414,7 +419,7 @@ i915_gem_create_context(struct drm_i915_private *dev_priv,
>   	if (IS_ERR(ctx))
>   		return ctx;
>   
> -	if (USES_FULL_PPGTT(dev_priv)) {
> +	if (flags & CREATE_VM && USES_FULL_PPGTT(dev_priv)) {
>   		struct i915_hw_ppgtt *ppgtt;
>   
>   		ppgtt = i915_ppgtt_create(dev_priv, file_priv);
> @@ -493,7 +498,7 @@ i915_gem_context_create_kernel(struct drm_i915_private *i915, int prio)
>   	struct i915_gem_context *ctx;
>   	int err;
>   
> -	ctx = i915_gem_create_context(i915, NULL);
> +	ctx = i915_gem_create_context(i915, NULL, CREATE_VM);
>   	if (IS_ERR(ctx))
>   		return ctx;
>   
> @@ -620,7 +625,7 @@ int i915_gem_context_open(struct drm_i915_private *i915,
>   	idr_init(&file_priv->context_idr);
>   
>   	mutex_lock(&i915->drm.struct_mutex);
> -	ctx = i915_gem_create_context(i915, file_priv);
> +	ctx = i915_gem_create_context(i915, file_priv, CREATE_VM);
>   	mutex_unlock(&i915->drm.struct_mutex);
>   	if (IS_ERR(ctx)) {
>   		idr_destroy(&file_priv->context_idr);
> @@ -778,10 +783,12 @@ int i915_gem_context_create_ioctl(struct drm_device *dev, void *data,
>   				  struct drm_file *file)
>   {
>   	struct drm_i915_private *dev_priv = to_i915(dev);
> -	struct drm_i915_gem_context_create *args = data;
> +	struct drm_i915_gem_context_create_v2 *args = data;
>   	struct drm_i915_file_private *file_priv = file->driver_priv;
> +	struct i915_gem_context *share = NULL;
>   	struct i915_gem_context *ctx;
> -	int ret;
> +	unsigned int flags = CREATE_VM;
> +	int err;
>   
>   	if (!DRIVER_CAPS(dev_priv)->has_logical_contexts)
>   		return -ENODEV;
> @@ -789,6 +796,9 @@ int i915_gem_context_create_ioctl(struct drm_device *dev, void *data,
>   	if (args->pad != 0)
>   		return -EINVAL;
>   
> +	if (args->flags & ~I915_GEM_CONTEXT_SHARE_GTT)
> +		return -EINVAL;
> +
>   	if (client_is_banned(file_priv)) {
>   		DRM_DEBUG("client %s[%d] banned from creating ctx\n",
>   			  current->comm,
> @@ -797,21 +807,45 @@ int i915_gem_context_create_ioctl(struct drm_device *dev, void *data,
>   		return -EIO;
>   	}
>   
> -	ret = i915_mutex_lock_interruptible(dev);
> -	if (ret)
> -		return ret;
> +	if (args->flags & I915_GEM_CONTEXT_SHARE_GTT) {
> +		share = i915_gem_context_lookup(file_priv, args->share_ctx);
> +		if (!share)
> +			return -ENOENT;
> +
> +		if (!share->ppgtt) {
> +			err = -ENODEV;
> +			goto out;
> +		}
> +
> +		flags &= ~CREATE_VM;
> +	}
>   
> -	ctx = i915_gem_create_context(dev_priv, file_priv);
> +	err = i915_mutex_lock_interruptible(dev);
> +	if (err)
> +		goto out;
> +
> +	ctx = i915_gem_create_context(dev_priv, file_priv, flags);
>   	mutex_unlock(&dev->struct_mutex);
> -	if (IS_ERR(ctx))
> -		return PTR_ERR(ctx);
> +	if (IS_ERR(ctx)) {
> +		err = PTR_ERR(ctx);
> +		goto out;
> +	}
> +
> +	if (!(flags & CREATE_VM)) {
> +		ctx->desc_template = share->desc_template;
> +		ctx->ppgtt = i915_ppgtt_get(share->ppgtt);
> +		i915_ppgtt_open(ctx->ppgtt);

i915_ppgtt_open/close are guarded by struct mutex?

As such it looks open_count can drop to zero between the context lookup 
and here. In which case the assert in i915_ppgtt_open would trigger.

Is it acceptable that you have something like i915_ppgtt_try_open?

> +	}
>   
>   	GEM_BUG_ON(i915_gem_context_is_kernel(ctx));
>   
>   	args->ctx_id = ctx->user_handle;
>   	DRM_DEBUG("HW context %d created\n", args->ctx_id);
>   
> -	return 0;
> +out:
> +	if (share)
> +		i915_gem_context_put(share);
> +	return err;
>   }
>   
>   int i915_gem_context_destroy_ioctl(struct drm_device *dev, void *data,
> diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
> index d1ca67459bc2..caf48a1a1c97 100644
> --- a/drivers/gpu/drm/i915/i915_gem_gtt.c
> +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
> @@ -1644,6 +1644,7 @@ static struct i915_hw_ppgtt *gen8_ppgtt_create(struct drm_i915_private *i915)
>   		return ERR_PTR(-ENOMEM);
>   
>   	kref_init(&ppgtt->ref);
> +	ppgtt->open_count = 1;
>   
>   	ppgtt->vm.i915 = i915;
>   	ppgtt->vm.dma = &i915->drm.pdev->dev;
> @@ -2160,6 +2161,7 @@ static struct i915_hw_ppgtt *gen6_ppgtt_create(struct drm_i915_private *i915)
>   		return ERR_PTR(-ENOMEM);
>   
>   	kref_init(&ppgtt->base.ref);
> +	ppgtt->base.open_count = 1;
>   
>   	ppgtt->base.vm.i915 = i915;
>   	ppgtt->base.vm.dma = &i915->drm.pdev->dev;
> @@ -2285,10 +2287,21 @@ i915_ppgtt_create(struct drm_i915_private *i915,
>   	return ppgtt;
>   }
>   
> -void i915_ppgtt_close(struct i915_address_space *vm)
> +void i915_ppgtt_open(struct i915_hw_ppgtt *ppgtt)
>   {
> -	GEM_BUG_ON(vm->closed);
> -	vm->closed = true;
> +	GEM_BUG_ON(ppgtt->vm.closed);
> +
> +	ppgtt->open_count++;
> +}
> +
> +void i915_ppgtt_close(struct i915_hw_ppgtt *ppgtt)
> +{
> +	GEM_BUG_ON(!ppgtt->open_count);
> +	if (--ppgtt->open_count)
> +		return;
> +
> +	GEM_BUG_ON(ppgtt->vm.closed);
> +	ppgtt->vm.closed = true;
>   }
>   
>   static void ppgtt_destroy_vma(struct i915_address_space *vm)
> diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h
> index 849a1f67b037..512a5a0ab820 100644
> --- a/drivers/gpu/drm/i915/i915_gem_gtt.h
> +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h
> @@ -408,6 +408,8 @@ struct i915_hw_ppgtt {
>   	struct kref ref;
>   
>   	unsigned long pd_dirty_rings;
> +	unsigned int open_count;
> +
>   	union {
>   		struct i915_pml4 pml4;		/* GEN8+ & 48b PPGTT */
>   		struct i915_page_directory_pointer pdp;	/* GEN8+ */
> @@ -626,12 +628,16 @@ int i915_ppgtt_init_hw(struct drm_i915_private *dev_priv);
>   void i915_ppgtt_release(struct kref *kref);
>   struct i915_hw_ppgtt *i915_ppgtt_create(struct drm_i915_private *dev_priv,
>   					struct drm_i915_file_private *fpriv);
> -void i915_ppgtt_close(struct i915_address_space *vm);
> -static inline void i915_ppgtt_get(struct i915_hw_ppgtt *ppgtt)
> +
> +void i915_ppgtt_open(struct i915_hw_ppgtt *ppgtt);
> +void i915_ppgtt_close(struct i915_hw_ppgtt *ppgtt);
> +
> +static inline struct i915_hw_ppgtt *i915_ppgtt_get(struct i915_hw_ppgtt *ppgtt)
>   {
> -	if (ppgtt)
> -		kref_get(&ppgtt->ref);
> +	kref_get(&ppgtt->ref);
> +	return ppgtt;
>   }
> +
>   static inline void i915_ppgtt_put(struct i915_hw_ppgtt *ppgtt)
>   {
>   	if (ppgtt)
> diff --git a/drivers/gpu/drm/i915/selftests/huge_pages.c b/drivers/gpu/drm/i915/selftests/huge_pages.c
> index 4ab98db946c8..ad25860a3336 100644
> --- a/drivers/gpu/drm/i915/selftests/huge_pages.c
> +++ b/drivers/gpu/drm/i915/selftests/huge_pages.c
> @@ -1724,7 +1724,6 @@ int i915_gem_huge_page_mock_selftests(void)
>   	err = i915_subtests(tests, ppgtt);
>   
>   out_close:
> -	i915_ppgtt_close(&ppgtt->vm);
>   	i915_ppgtt_put(ppgtt);
>   
>   out_unlock:
> diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/selftests/i915_gem_context.c
> index c7c891a287f5..7ef3f9a3b727 100644
> --- a/drivers/gpu/drm/i915/selftests/i915_gem_context.c
> +++ b/drivers/gpu/drm/i915/selftests/i915_gem_context.c
> @@ -131,7 +131,9 @@ static int live_nop_switch(void *arg)
>   	}
>   
>   	for (n = 0; n < nctx; n++) {
> -		ctx[n] = i915_gem_create_context(i915, file->driver_priv);
> +		ctx[n] = i915_gem_create_context(i915,
> +						 file->driver_priv,
> +						 CREATE_VM);
>   		if (IS_ERR(ctx[n])) {
>   			err = PTR_ERR(ctx[n]);
>   			goto out_unlock;
> @@ -317,10 +319,6 @@ static int gpu_fill(struct drm_i915_gem_object *obj,
>   	if (IS_ERR(vma))
>   		return PTR_ERR(vma);
>   
> -	err = i915_gem_object_set_to_gtt_domain(obj, false);
> -	if (err)
> -		return err;
> -

Unrelated hunk?

>   	err = i915_vma_pin(vma, 0, 0, PIN_HIGH | PIN_USER);
>   	if (err)
>   		return err;
> @@ -415,7 +413,8 @@ static int cpu_fill(struct drm_i915_gem_object *obj, u32 value)
>   	return 0;
>   }
>   
> -static int cpu_check(struct drm_i915_gem_object *obj, unsigned int max)
> +static noinline int cpu_check(struct drm_i915_gem_object *obj,
> +			      unsigned int idx, unsigned int max)
>   {
>   	unsigned int n, m, needs_flush;
>   	int err;
> @@ -433,8 +432,8 @@ static int cpu_check(struct drm_i915_gem_object *obj, unsigned int max)
>   
>   		for (m = 0; m < max; m++) {
>   			if (map[m] != m) {
> -				pr_err("Invalid value at page %d, offset %d: found %x expected %x\n",
> -				       n, m, map[m], m);
> +				pr_err("%pS: Invalid value at object %d page %d, offset %d: found %x expected %x\n",
> +				       __builtin_return_address(0), idx, n, m, map[m], m);
>   				err = -EINVAL;
>   				goto out_unmap;
>   			}
> @@ -442,8 +441,9 @@ static int cpu_check(struct drm_i915_gem_object *obj, unsigned int max)
>   
>   		for (; m < DW_PER_PAGE; m++) {
>   			if (map[m] != STACK_MAGIC) {
> -				pr_err("Invalid value at page %d, offset %d: found %x expected %x\n",
> -				       n, m, map[m], STACK_MAGIC);
> +				pr_err("%pS: Invalid value at object %d page %d, offset %d: found %x expected %x\n",
> +				       __builtin_return_address(0), idx, n, m,
> +				       map[m], STACK_MAGIC);
>   				err = -EINVAL;
>   				goto out_unmap;
>   			}
> @@ -506,6 +506,10 @@ create_test_object(struct i915_gem_context *ctx,
>   		return ERR_PTR(err);
>   	}
>   
> +	err = i915_gem_object_set_to_gtt_domain(obj, false);
> +	if (err)
> +		return ERR_PTR(err);
> +

And these ones unrelated as well?

>   	list_add_tail(&obj->st_link, objects);
>   	return obj;
>   }
> @@ -521,12 +525,8 @@ static unsigned long max_dwords(struct drm_i915_gem_object *obj)
>   static int igt_ctx_exec(void *arg)
>   {
>   	struct drm_i915_private *i915 = arg;
> -	struct drm_i915_gem_object *obj = NULL;
> -	struct drm_file *file;
> -	IGT_TIMEOUT(end_time);
> -	LIST_HEAD(objects);
> -	unsigned long ncontexts, ndwords, dw;
> -	bool first_shared_gtt = true;
> +	struct intel_engine_cs *engine;
> +	enum intel_engine_id id;
>   	int err = -ENODEV;
>   
>   	/*
> @@ -538,42 +538,169 @@ static int igt_ctx_exec(void *arg)
>   	if (!DRIVER_CAPS(i915)->has_logical_contexts)
>   		return 0;
>   
> -	file = mock_file(i915);
> -	if (IS_ERR(file))
> -		return PTR_ERR(file);
> +	for_each_engine(engine, i915, id) {
> +		struct drm_i915_gem_object *obj = NULL;
> +		struct drm_file *file;
> +		IGT_TIMEOUT(end_time);
> +		LIST_HEAD(objects);
> +		unsigned long ncontexts, ndwords, dw;
> +		bool first_shared_gtt = true;
> +
> +		if (!intel_engine_can_store_dword(engine))
> +			continue;
>   
> -	mutex_lock(&i915->drm.struct_mutex);
> +		if (!engine->context_size)
> +			continue; /* No logical context support in HW */
>   
> -	ncontexts = 0;
> -	ndwords = 0;
> -	dw = 0;
> -	while (!time_after(jiffies, end_time)) {
> -		struct intel_engine_cs *engine;
> -		struct i915_gem_context *ctx;
> -		unsigned int id;
>   
> -		if (first_shared_gtt) {
> -			ctx = __create_hw_context(i915, file->driver_priv);
> -			first_shared_gtt = false;
> -		} else {
> -			ctx = i915_gem_create_context(i915, file->driver_priv);
> +		file = mock_file(i915);
> +		if (IS_ERR(file))
> +			return PTR_ERR(file);
> +
> +		mutex_lock(&i915->drm.struct_mutex);
> +
> +		ncontexts = 0;
> +		ndwords = 0;
> +		dw = 0;
> +		while (!time_after(jiffies, end_time)) {
> +			struct i915_gem_context *ctx;
> +			intel_wakeref_t wakeref;
> +
> +			if (first_shared_gtt) {
> +				ctx = __create_hw_context(i915, file->driver_priv);
> +				first_shared_gtt = false;
> +			} else {
> +				ctx = i915_gem_create_context(i915,
> +							      file->driver_priv,
> +							      CREATE_VM);
> +			}
> +			if (IS_ERR(ctx)) {
> +				err = PTR_ERR(ctx);
> +				goto out_unlock;
> +			}
> +
> +			if (!obj) {
> +				obj = create_test_object(ctx, file, &objects);
> +				if (IS_ERR(obj)) {
> +					err = PTR_ERR(obj);
> +					goto out_unlock;
> +				}
> +			}
> +
> +			wakeref = intel_runtime_pm_get(i915);
> +			err = gpu_fill(obj, ctx, engine, dw);
> +			intel_runtime_pm_put(i915, wakeref);
> +			if (err) {
> +				pr_err("Failed to fill dword %lu [%lu/%lu] with gpu (%s) in ctx %u [full-ppgtt? %s], err=%d\n",
> +				       ndwords, dw, max_dwords(obj),
> +				       engine->name, ctx->hw_id,
> +				       yesno(!!ctx->ppgtt), err);
> +				goto out_unlock;
> +			}
> +
> +			if (++dw == max_dwords(obj)) {
> +				obj = NULL;
> +				dw = 0;
> +			}
> +
> +			ndwords++;
> +			ncontexts++;
>   		}
> -		if (IS_ERR(ctx)) {
> -			err = PTR_ERR(ctx);
> +
> +		pr_info("Submitted %lu contexts to %s, filling %lu dwords\n",
> +			ncontexts, engine->name, ndwords);
> +
> +		ncontexts = dw = 0;
> +		list_for_each_entry(obj, &objects, st_link) {
> +			unsigned int rem =
> +				min_t(unsigned int, ndwords - dw, max_dwords(obj));
> +
> +			err = cpu_check(obj, ncontexts++, rem);
> +			if (err)
> +				break;
> +
> +			dw += rem;
> +		}
> +
> +out_unlock:
> +		i915_gem_wait_for_idle(i915,
> +				       I915_WAIT_LOCKED,
> +				       MAX_SCHEDULE_TIMEOUT);
> +		mutex_unlock(&i915->drm.struct_mutex);
> +
> +		mock_file_free(i915, file);
> +		if (err)
> +			return err;
> +	}
> +
> +	return 0;
> +}
> +
> +static int igt_shared_ctx_exec(void *arg)
> +{
> +	struct drm_i915_private *i915 = arg;
> +	struct intel_engine_cs *engine;
> +	enum intel_engine_id id;
> +	int err = -ENODEV;
> +
> +	/*
> +	 * Create a few different contexts with the same mm and write
> +	 * through each ctx using the GPU making sure those writes end
> +	 * up in the expected pages of our obj.
> +	 */
> +
> +	for_each_engine(engine, i915, id) {
> +		IGT_TIMEOUT(end_time);
> +		LIST_HEAD(objects);
> +		unsigned long ncontexts, ndwords, dw;
> +		struct drm_i915_gem_object *obj = NULL;
> +		struct drm_file *file;
> +		struct i915_gem_context *parent;
> +
> +		if (!intel_engine_can_store_dword(engine))
> +			continue;
> +
> +		file = mock_file(i915);
> +		if (IS_ERR(file))
> +			return PTR_ERR(file);
> +
> +		mutex_lock(&i915->drm.struct_mutex);
> +
> +		parent = i915_gem_create_context(i915, file->driver_priv,
> +						 CREATE_VM);
> +		if (IS_ERR(parent)) {
> +			err = PTR_ERR(parent);
> +			if (err == -ENODEV) /* no logical ctx support */
> +				err = 0;
>   			goto out_unlock;
>   		}
>   
> -		for_each_engine(engine, i915, id) {
> +		if (!parent->ppgtt) {
> +			err = 0;
> +			goto out_unlock;
> +		}
> +
> +		ncontexts = 0;
> +		ndwords = 0;
> +		dw = 0;
> +		while (!time_after(jiffies, end_time)) {
> +			struct i915_gem_context *ctx;
>   			intel_wakeref_t wakeref;
>   
> -			if (!engine->context_size)
> -				continue; /* No logical context support in HW */
> +			ctx = i915_gem_create_context(i915,
> +						      file->driver_priv,
> +						      0);
> +			if (IS_ERR(ctx)) {
> +				err = PTR_ERR(ctx);
> +				goto out_unlock;
> +			}
>   
> -			if (!intel_engine_can_store_dword(engine))
> -				continue;
> +			i915_ppgtt_open(parent->ppgtt);
> +			ctx->ppgtt = i915_ppgtt_get(parent->ppgtt);
> +			ctx->desc_template = parent->desc_template;
>   
>   			if (!obj) {
> -				obj = create_test_object(ctx, file, &objects);
> +				obj = create_test_object(parent, file, &objects);
>   				if (IS_ERR(obj)) {
>   					err = PTR_ERR(obj);
>   					goto out_unlock;
> @@ -595,32 +722,36 @@ static int igt_ctx_exec(void *arg)
>   				obj = NULL;
>   				dw = 0;
>   			}
> +
>   			ndwords++;
> +			ncontexts++;
>   		}
> -		ncontexts++;
> -	}
> -	pr_info("Submitted %lu contexts (across %u engines), filling %lu dwords\n",
> -		ncontexts, INTEL_INFO(i915)->num_rings, ndwords);
> +		pr_info("Submitted %lu contexts to %s, filling %lu dwords\n",
> +			ncontexts, engine->name, ndwords);
>   
> -	dw = 0;
> -	list_for_each_entry(obj, &objects, st_link) {
> -		unsigned int rem =
> -			min_t(unsigned int, ndwords - dw, max_dwords(obj));
> +		ncontexts = dw = 0;
> +		list_for_each_entry(obj, &objects, st_link) {
> +			unsigned int rem =
> +				min_t(unsigned int, ndwords - dw, max_dwords(obj));
>   
> -		err = cpu_check(obj, rem);
> -		if (err)
> -			break;
> +			err = cpu_check(obj, ncontexts++, rem);
> +			if (err)
> +				break;
>   
> -		dw += rem;
> -	}
> +			dw += rem;
> +		}
>   
>   out_unlock:
> -	if (igt_flush_test(i915, I915_WAIT_LOCKED))
> -		err = -EIO;
> -	mutex_unlock(&i915->drm.struct_mutex);
> +		if (igt_flush_test(i915, I915_WAIT_LOCKED))
> +			err = -EIO;
> +		mutex_unlock(&i915->drm.struct_mutex);
>   
> -	mock_file_free(i915, file);
> -	return err;
> +		mock_file_free(i915, file);
> +		if (err)
> +			return err;
> +	}
> +
> +	return 0;
>   }
>   
>   static int igt_ctx_readonly(void *arg)
> @@ -648,7 +779,7 @@ static int igt_ctx_readonly(void *arg)
>   
>   	mutex_lock(&i915->drm.struct_mutex);
>   
> -	ctx = i915_gem_create_context(i915, file->driver_priv);
> +	ctx = i915_gem_create_context(i915, file->driver_priv, 0);
>   	if (IS_ERR(ctx)) {
>   		err = PTR_ERR(ctx);
>   		goto out_unlock;
> @@ -714,7 +845,7 @@ static int igt_ctx_readonly(void *arg)
>   		if (i915_gem_object_is_readonly(obj))
>   			num_writes = 0;
>   
> -		err = cpu_check(obj, num_writes);
> +		err = cpu_check(obj, 0, num_writes);
>   		if (err)
>   			break;
>   
> @@ -913,6 +1044,7 @@ int i915_gem_context_live_selftests(struct drm_i915_private *dev_priv)
>   		SUBTEST(live_nop_switch),
>   		SUBTEST(igt_ctx_exec),
>   		SUBTEST(igt_ctx_readonly),
> +		SUBTEST(igt_shared_ctx_exec),
>   	};
>   	bool fake_alias = false;
>   	int err;
> diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c
> index 38414558e18b..0e0fc7eedba3 100644
> --- a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c
> +++ b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c
> @@ -1020,7 +1020,6 @@ static int exercise_ppgtt(struct drm_i915_private *dev_priv,
>   
>   	err = func(dev_priv, &ppgtt->vm, 0, ppgtt->vm.total, end_time);
>   
> -	i915_ppgtt_close(&ppgtt->vm);
>   	i915_ppgtt_put(ppgtt);
>   out_unlock:
>   	mutex_unlock(&dev_priv->drm.struct_mutex);
> diff --git a/drivers/gpu/drm/i915/selftests/mock_context.c b/drivers/gpu/drm/i915/selftests/mock_context.c
> index d937bdff26f9..a7511ba98342 100644
> --- a/drivers/gpu/drm/i915/selftests/mock_context.c
> +++ b/drivers/gpu/drm/i915/selftests/mock_context.c
> @@ -92,7 +92,7 @@ live_context(struct drm_i915_private *i915, struct drm_file *file)
>   {
>   	lockdep_assert_held(&i915->drm.struct_mutex);
>   
> -	return i915_gem_create_context(i915, file->driver_priv);
> +	return i915_gem_create_context(i915, file->driver_priv, CREATE_VM);
>   }
>   
>   struct i915_gem_context *
> diff --git a/drivers/gpu/drm/i915/selftests/mock_gtt.c b/drivers/gpu/drm/i915/selftests/mock_gtt.c
> index 976c862b3842..e0880d2c498a 100644
> --- a/drivers/gpu/drm/i915/selftests/mock_gtt.c
> +++ b/drivers/gpu/drm/i915/selftests/mock_gtt.c
> @@ -66,6 +66,8 @@ mock_ppgtt(struct drm_i915_private *i915,
>   		return NULL;
>   
>   	kref_init(&ppgtt->ref);
> +	ppgtt->open_count = 1;
> +
>   	ppgtt->vm.i915 = i915;
>   	ppgtt->vm.total = round_down(U64_MAX, PAGE_SIZE);
>   	ppgtt->vm.file = ERR_PTR(-ENODEV);
> diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
> index 4fa7e54501cc..dc1c52f95cab 100644
> --- a/include/uapi/drm/i915_drm.h
> +++ b/include/uapi/drm/i915_drm.h
> @@ -386,7 +386,7 @@ typedef struct _drm_i915_sarea {
>   #define DRM_IOCTL_I915_SET_SPRITE_COLORKEY DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_SET_SPRITE_COLORKEY, struct drm_intel_sprite_colorkey)
>   #define DRM_IOCTL_I915_GET_SPRITE_COLORKEY DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GET_SPRITE_COLORKEY, struct drm_intel_sprite_colorkey)
>   #define DRM_IOCTL_I915_GEM_WAIT		DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_WAIT, struct drm_i915_gem_wait)
> -#define DRM_IOCTL_I915_GEM_CONTEXT_CREATE	DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_GEM_CONTEXT_CREATE, struct drm_i915_gem_context_create)
> +#define DRM_IOCTL_I915_GEM_CONTEXT_CREATE	DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_GEM_CONTEXT_CREATE, struct drm_i915_gem_context_create_v2)
>   #define DRM_IOCTL_I915_GEM_CONTEXT_DESTROY	DRM_IOW (DRM_COMMAND_BASE + DRM_I915_GEM_CONTEXT_DESTROY, struct drm_i915_gem_context_destroy)
>   #define DRM_IOCTL_I915_REG_READ			DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_REG_READ, struct drm_i915_reg_read)
>   #define DRM_IOCTL_I915_GET_RESET_STATS		DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_GET_RESET_STATS, struct drm_i915_reset_stats)
> @@ -1429,6 +1429,15 @@ struct drm_i915_gem_context_create {
>   	__u32 pad;
>   };
>   
> +struct drm_i915_gem_context_create_v2 {
> +	/*  output: id of new context*/
> +	__u32 ctx_id;
> +	__u32 flags;
> +#define I915_GEM_CONTEXT_SHARE_GTT 0x1
> +	__u32 share_ctx;
> +	__u32 pad;
> +};

Or

__u32 ctx_id;
__u32 share_ctx;
__u64 flags;

for no padding needed and double amount of flags? :)

Regards,

Tvrtko

> +
>   struct drm_i915_gem_context_destroy {
>   	__u32 ctx_id;
>   	__u32 pad;
> 
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 106+ messages in thread

* Re: [PATCH 12/40] drm/i915/execlists: Assert the queue is non-empty on unsubmitting
  2018-09-24  9:07   ` Tvrtko Ursulin
@ 2018-09-25  7:41     ` Chris Wilson
  2018-09-25  8:51       ` Tvrtko Ursulin
  0 siblings, 1 reply; 106+ messages in thread
From: Chris Wilson @ 2018-09-25  7:41 UTC (permalink / raw)
  To: Tvrtko Ursulin, intel-gfx

Quoting Tvrtko Ursulin (2018-09-24 10:07:11)
> 
> On 19/09/2018 20:55, Chris Wilson wrote:
> > In the sequence
> > 
> > <0>[  531.960431] drv_self-4806    7.... 527402570us : intel_gpu_reset: engine_mask=1, ret=0, retry=0
> > <0>[  531.960431] drv_self-4806    7.... 527402571us : execlists_reset: rcs0 request global=115de, current=71133
> 
> How such a unbelievably huge delta between the guilty request and hws?

1?

;)
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 106+ messages in thread

* Re: [PATCH 14/40] drm/i915: Combine multiple internal plists into the same i915_priolist bucket
  2018-09-24 10:25   ` Tvrtko Ursulin
@ 2018-09-25  7:55     ` Chris Wilson
  0 siblings, 0 replies; 106+ messages in thread
From: Chris Wilson @ 2018-09-25  7:55 UTC (permalink / raw)
  To: Tvrtko Ursulin, intel-gfx

Quoting Tvrtko Ursulin (2018-09-24 11:25:37)
> 
> On 19/09/2018 20:55, Chris Wilson wrote:
> > As we are about to allow ourselves to slightly bump the user priority
> > into a few different sublevels, packthose internal priority lists
> > into the same i915_priolist to keep the rbtree compact and avoid having
> > to allocate the default user priority even after the internal bumping.
> > The downside to having an requests[] rather than a node per active list,
> > is that we then have to walk over the empty higher priority lists. To
> > compensate, we track the active buckets and use a small bitmap to skip
> > over any inactive ones.
> > 
> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > ---
> >   drivers/gpu/drm/i915/intel_engine_cs.c      |  6 +-
> >   drivers/gpu/drm/i915/intel_guc_submission.c | 12 ++-
> >   drivers/gpu/drm/i915/intel_lrc.c            | 87 ++++++++++++++-------
> >   drivers/gpu/drm/i915/intel_ringbuffer.h     | 13 ++-
> >   4 files changed, 80 insertions(+), 38 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c
> > index 217ed3ee1cab..83f2f7774c1f 100644
> > --- a/drivers/gpu/drm/i915/intel_engine_cs.c
> > +++ b/drivers/gpu/drm/i915/intel_engine_cs.c
> > @@ -1534,10 +1534,10 @@ void intel_engine_dump(struct intel_engine_cs *engine,
> >       count = 0;
> >       drm_printf(m, "\t\tQueue priority: %d\n", execlists->queue_priority);
> >       for (rb = rb_first_cached(&execlists->queue); rb; rb = rb_next(rb)) {
> > -             struct i915_priolist *p =
> > -                     rb_entry(rb, typeof(*p), node);
> > +             struct i915_priolist *p = rb_entry(rb, typeof(*p), node);
> > +             int i;
> >   
> > -             list_for_each_entry(rq, &p->requests, sched.link) {
> > +             priolist_for_each_request(rq, p, i) {
> >                       if (count++ < MAX_REQUESTS_TO_SHOW - 1)
> >                               print_request(m, rq, "\t\tQ ");
> >                       else
> > diff --git a/drivers/gpu/drm/i915/intel_guc_submission.c b/drivers/gpu/drm/i915/intel_guc_submission.c
> > index 6f693ef62c64..8531bd917ec3 100644
> > --- a/drivers/gpu/drm/i915/intel_guc_submission.c
> > +++ b/drivers/gpu/drm/i915/intel_guc_submission.c
> > @@ -726,30 +726,28 @@ static bool __guc_dequeue(struct intel_engine_cs *engine)
> >       while ((rb = rb_first_cached(&execlists->queue))) {
> >               struct i915_priolist *p = to_priolist(rb);
> >               struct i915_request *rq, *rn;
> > +             int i;
> >   
> > -             list_for_each_entry_safe(rq, rn, &p->requests, sched.link) {
> > +             priolist_for_each_request_consume(rq, rn, p, i) {
> 
> Hm consumed clears the bitmask every time, but we are not certain yet we 
> will consume the request.

We clear it after the loop, so when we take the early break the bitmask
is unaffected.

> >                       if (last && rq->hw_context != last->hw_context) {
> > -                             if (port == last_port) {
> > -                                     __list_del_many(&p->requests,
> > -                                                     &rq->sched.link);
> > +                             if (port == last_port)
> >                                       goto done;
> > -                             }
> >   
> >                               if (submit)
> >                                       port_assign(port, last);
> >                               port++;
> >                       }
> >   
> > -                     INIT_LIST_HEAD(&rq->sched.link);
> > +                     list_del_init(&rq->sched.link);
> >   
> >                       __i915_request_submit(rq);
> >                       trace_i915_request_in(rq, port_index(port, execlists));
> > +
> >                       last = rq;
> >                       submit = true;
> >               }
> >   
> >               rb_erase_cached(&p->node, &execlists->queue);
> > -             INIT_LIST_HEAD(&p->requests);
> >               if (p->priority != I915_PRIORITY_NORMAL)
> >                       kmem_cache_free(engine->i915->priorities, p);
> >       }
> > diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> > index e8de250c3413..aeae82b5223c 100644
> > --- a/drivers/gpu/drm/i915/intel_lrc.c
> > +++ b/drivers/gpu/drm/i915/intel_lrc.c
> > @@ -259,14 +259,49 @@ intel_lr_context_descriptor_update(struct i915_gem_context *ctx,
> >       ce->lrc_desc = desc;
> >   }
> >   
> > -static struct i915_priolist *
> > +static void assert_priolists(struct intel_engine_execlists * const execlists,
> > +                          int queue_priority)
> > +{
> > +     struct rb_node *rb;
> > +     int last_prio, i;
> > +
> > +     if (!IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
> > +             return;
> > +
> > +     GEM_BUG_ON(rb_first_cached(&execlists->queue) !=
> > +                rb_first(&execlists->queue.rb_root));
> > +
> > +     last_prio = (queue_priority >> I915_USER_PRIORITY_SHIFT) + 1;
>  >
> > +     for (rb = rb_first_cached(&execlists->queue); rb; rb = rb_next(rb)) {
> > +             struct i915_priolist *p = to_priolist(rb);
> > +
> > +             GEM_BUG_ON(p->priority >= last_prio);
> > +             last_prio = p->priority;
> > +
> > +             GEM_BUG_ON(!p->used);
> > +             for (i = 0; i < ARRAY_SIZE(p->requests); i++) {
> > +                     if (list_empty(&p->requests[i]))
> > +                             continue;
> 
> Asserting that bitmask slot is not set if list is empty is not interesting?

We can have the bit set with an empty list after request promotion. That
seemed like the reasonable tradeoff rather than have a more complicated
check after each list_move to decide if the old bit needs to be cleared.

> > +
> > +                     GEM_BUG_ON(!(p->used & BIT(i)));
> > +             }
> > +     }
> 
> In general, won't this be a tiny bit too expensive on debug build and 
> with priority/rq stress tests? To walk absolutely all priority levels 
> times bucket count on every lookup and dequeue. Or maybe we don't have 
> any tests at the moment which instantiate that many levels.

Worth it though. Whenever we abuse the system, our ability to catch
errors depends on our sanitychecks. I thought this was worth having the
reassurance that it worked :)

> > +}
> > +
> > +static struct list_head *
> >   lookup_priolist(struct intel_engine_cs *engine, int prio)
> >   {
> >       struct intel_engine_execlists * const execlists = &engine->execlists;
> >       struct i915_priolist *p;
> >       struct rb_node **parent, *rb;
> >       bool first = true;
> > +     int idx, i;
> > +
> > +     assert_priolists(execlists, INT_MAX);
> >   
> > +     /* buckets sorted from highest [in slot 0] to lowest priority */
> > +     idx = I915_PRIORITY_COUNT - (prio & ~I915_PRIORITY_MASK) - 1;
> 
> 0 - (prio & ~0) - 1 = -prio - 1, hm..?
> 
> Hm no.. I915_PRIORITY_COUNT with zero reserved internal bits is actually 
> 1.  So 1 - (prio & 0x0) - 1 = 0, ok phew..
> 
> Unintuitive for the count to be one with no reserved bits? Although it 
> is only a temporary stage..

Would it be preferrable to use "~MASK - (prio & ~MASK)"?

Iirc, mask is only used for internal bit. I followed the pattern of
page_mask but I think it would simply a bit to use INTERNAL_MASK.
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 106+ messages in thread

* Re: [PATCH 15/40] drm/i915: Priority boost for new clients
  2018-09-24 10:29   ` Tvrtko Ursulin
@ 2018-09-25  8:01     ` Chris Wilson
  2018-09-25  8:26       ` Chris Wilson
  0 siblings, 1 reply; 106+ messages in thread
From: Chris Wilson @ 2018-09-25  8:01 UTC (permalink / raw)
  To: Tvrtko Ursulin, intel-gfx

Quoting Tvrtko Ursulin (2018-09-24 11:29:52)
> 
> On 19/09/2018 20:55, Chris Wilson wrote:
> > Taken from an idea used for FQ_CODEL, we give the first request of a
> > new request flows a small priority boost. These flows are likely to
> > correspond with short, interactive tasks and so be more latency sensitive
> > than the longer free running queues. As soon as the client has more than
> > one request in the queue, further requests are not boosted and it settles
> > down into ordinary steady state behaviour.  Such small kicks dramatically
> > help combat the starvation issue, by allowing each client the opportunity
> > to run even when the system is under heavy throughput load (within the
> > constraints of the user selected priority).
> > 
> > v2: Mark the preempted request as the start of a new flow, to prevent a
> > single client being continually gazumped by its peers.
> > 
> > Testcase: igt/benchmarks/rrul
> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> > Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
> > ---
> >   drivers/gpu/drm/i915/i915_request.c   | 16 ++++++++++++++--
> >   drivers/gpu/drm/i915/i915_scheduler.h |  4 +++-
> >   drivers/gpu/drm/i915/intel_lrc.c      | 25 +++++++++++++++++++------
> >   3 files changed, 36 insertions(+), 9 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
> > index a492385b2089..56140ca054e8 100644
> > --- a/drivers/gpu/drm/i915/i915_request.c
> > +++ b/drivers/gpu/drm/i915/i915_request.c
> > @@ -1127,8 +1127,20 @@ void i915_request_add(struct i915_request *request)
> >        */
> >       local_bh_disable();
> >       rcu_read_lock(); /* RCU serialisation for set-wedged protection */
> > -     if (engine->schedule)
> > -             engine->schedule(request, &request->gem_context->sched);
> > +     if (engine->schedule) {
> > +             struct i915_sched_attr attr = request->gem_context->sched;
> > +
> > +             /*
> > +              * Boost priorities to new clients (new request flows).
> > +              *
> > +              * Allow interactive/synchronous clients to jump ahead of
> > +              * the bulk clients. (FQ_CODEL)
> > +              */
> > +             if (!prev || i915_request_completed(prev))
> > +                     attr.priority |= I915_PRIORITY_NEWCLIENT;
> > +
> > +             engine->schedule(request, &attr);
> > +     }
> >       rcu_read_unlock();
> >       i915_sw_fence_commit(&request->submit);
> >       local_bh_enable(); /* Kick the execlists tasklet if just scheduled */
> > diff --git a/drivers/gpu/drm/i915/i915_scheduler.h b/drivers/gpu/drm/i915/i915_scheduler.h
> > index 7edfad0abfd7..93e43e263d8c 100644
> > --- a/drivers/gpu/drm/i915/i915_scheduler.h
> > +++ b/drivers/gpu/drm/i915/i915_scheduler.h
> > @@ -19,12 +19,14 @@ enum {
> >       I915_PRIORITY_INVALID = INT_MIN
> >   };
> >   
> > -#define I915_USER_PRIORITY_SHIFT 0
> > +#define I915_USER_PRIORITY_SHIFT 1
> >   #define I915_USER_PRIORITY(x) ((x) << I915_USER_PRIORITY_SHIFT)
> >   
> >   #define I915_PRIORITY_COUNT BIT(I915_USER_PRIORITY_SHIFT)
> >   #define I915_PRIORITY_MASK (-I915_PRIORITY_COUNT)
> >   
> > +#define I915_PRIORITY_NEWCLIENT      ((u8)BIT(0))
> 
> Is the cast important and why?

Unreliable memory says there was something iffy about the code generation
at one point.
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 106+ messages in thread

* Re: [PATCH 16/40] drm/i915: Pull scheduling under standalone lock
  2018-09-24 11:19   ` Tvrtko Ursulin
@ 2018-09-25  8:19     ` Chris Wilson
  2018-09-25  9:01       ` Tvrtko Ursulin
  0 siblings, 1 reply; 106+ messages in thread
From: Chris Wilson @ 2018-09-25  8:19 UTC (permalink / raw)
  To: Tvrtko Ursulin, intel-gfx

Quoting Tvrtko Ursulin (2018-09-24 12:19:39)
> 
> On 19/09/2018 20:55, Chris Wilson wrote:
> > diff --git a/drivers/gpu/drm/i915/i915_scheduler.c b/drivers/gpu/drm/i915/i915_scheduler.c
> > new file mode 100644
> > index 000000000000..910ac7089596
> > --- /dev/null
> > +++ b/drivers/gpu/drm/i915/i915_scheduler.c
> > @@ -0,0 +1,377 @@
> > +/*
> > + * SPDX-License-Identifier: MIT
> > + *
> > + * Copyright © 2018 Intel Corporation
> > + */
> > +
> > +#include <linux/mutex.h>
> > +
> > +#include "i915_drv.h"
> > +#include "i915_request.h"
> > +#include "i915_scheduler.h"
> > +
> > +static DEFINE_SPINLOCK(schedule_lock);
> 
> Any good excuses to not put it into device private straight away?

It can't be per-device, since it will need to be global across devices
as the dependency chains may cross devices and we will want to choose an
optimal order globally. That's my goal here, at least.

At some point, the horror of scalability vs priority inversion must then
be tackled. (But for now, I'd rather have priority inheritance and start
from a position of good qos and build scalability on top.)
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 106+ messages in thread

* Re: [PATCH 15/40] drm/i915: Priority boost for new clients
  2018-09-25  8:01     ` Chris Wilson
@ 2018-09-25  8:26       ` Chris Wilson
  2018-09-25  8:57         ` Tvrtko Ursulin
  2018-09-25 11:20         ` Michal Wajdeczko
  0 siblings, 2 replies; 106+ messages in thread
From: Chris Wilson @ 2018-09-25  8:26 UTC (permalink / raw)
  To: Tvrtko Ursulin, intel-gfx

Quoting Chris Wilson (2018-09-25 09:01:06)
> Quoting Tvrtko Ursulin (2018-09-24 11:29:52)
> > 
> > On 19/09/2018 20:55, Chris Wilson wrote:
> > > diff --git a/drivers/gpu/drm/i915/i915_scheduler.h b/drivers/gpu/drm/i915/i915_scheduler.h
> > > index 7edfad0abfd7..93e43e263d8c 100644
> > > --- a/drivers/gpu/drm/i915/i915_scheduler.h
> > > +++ b/drivers/gpu/drm/i915/i915_scheduler.h
> > > @@ -19,12 +19,14 @@ enum {
> > >       I915_PRIORITY_INVALID = INT_MIN
> > >   };
> > >   
> > > -#define I915_USER_PRIORITY_SHIFT 0
> > > +#define I915_USER_PRIORITY_SHIFT 1
> > >   #define I915_USER_PRIORITY(x) ((x) << I915_USER_PRIORITY_SHIFT)
> > >   
> > >   #define I915_PRIORITY_COUNT BIT(I915_USER_PRIORITY_SHIFT)
> > >   #define I915_PRIORITY_MASK (-I915_PRIORITY_COUNT)
> > >   
> > > +#define I915_PRIORITY_NEWCLIENT      ((u8)BIT(0))
> > 
> > Is the cast important and why?
> 
> Unreliable memory says there was something iffy about the code generation
> at one point.

drivers/gpu/drm/i915/intel_lrc.c: In function ‘__unwind_incomplete_requests’:
drivers/gpu/drm/i915/intel_lrc.c:272:13: error: overflow in conversion from ‘long unsigned int’ to ‘int’ changes value from ‘18446744071562067969’ to ‘-2147483647’ [-Werror=overflow]
  int prio = I915_PRIORITY_INVALID | I915_PRIORITY_NEWCLIENT;

-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 106+ messages in thread

* Re: [PATCH 34/40] drm/i915: Allow contexts to share a single timeline across all engines
  2018-09-19 19:55 ` [PATCH 34/40] drm/i915: Allow contexts to share a single timeline across all engines Chris Wilson
@ 2018-09-25  8:45   ` Tvrtko Ursulin
  0 siblings, 0 replies; 106+ messages in thread
From: Tvrtko Ursulin @ 2018-09-25  8:45 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 19/09/2018 20:55, Chris Wilson wrote:
> Previously, our view has been always to run the engines independently
> within a context. (Multiple engines happened before we had contexts and
> timelines, so they always operated independently and that behaviour
> persisted into contexts.) However, at the user level the context often
> represents a single timeline (e.g. GL contexts) and userspace must
> ensure that the individual engines are serialised to present that
> ordering to the client (or forgot about this detail entirely and hope no
> one notices - a fair ploy if the client can only directly control one
> engine themselves ;)
> 
> In the next patch, we will want to construct a set of engines that
> operate as one, that have a single timeline interwoven between them, to
> present a single virtual engine to the user. (They submit to the virtual
> engine, then we decide which engine to execute on based.)
> 
> To that end, we want to be able to create contexts which have a single
> timeline (fence context) shared between all engines, rather than multiple
> timelines.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>   drivers/gpu/drm/i915/i915_gem_context.c | 26 ++++++++++++++++-
>   drivers/gpu/drm/i915/i915_gem_context.h |  3 ++
>   drivers/gpu/drm/i915/i915_request.c     | 10 +++++--
>   drivers/gpu/drm/i915/i915_request.h     |  5 +++-
>   drivers/gpu/drm/i915/i915_sw_fence.c    | 39 +++++++++++++++++++++----
>   drivers/gpu/drm/i915/i915_sw_fence.h    | 13 +++++++--
>   drivers/gpu/drm/i915/intel_lrc.c        |  5 +++-
>   include/uapi/drm/i915_drm.h             |  3 +-
>   8 files changed, 91 insertions(+), 13 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
> index da2ac10f8e8a..a8570a07b3b7 100644
> --- a/drivers/gpu/drm/i915/i915_gem_context.c
> +++ b/drivers/gpu/drm/i915/i915_gem_context.c
> @@ -223,6 +223,9 @@ static void i915_gem_context_free(struct i915_gem_context *ctx)
>   			ce->ops->destroy(ce);
>   	}
>   
> +	if (ctx->timeline)
> +		i915_timeline_put(ctx->timeline);
> +
>   	kfree(ctx->name);
>   	put_pid(ctx->pid);
>   
> @@ -402,6 +405,7 @@ static void __destroy_hw_context(struct i915_gem_context *ctx,
>   }
>   
>   #define CREATE_VM BIT(0)
> +#define CREATE_TIMELINE BIT(1)
>   
>   static struct i915_gem_context *
>   i915_gem_create_context(struct drm_i915_private *dev_priv,
> @@ -412,6 +416,9 @@ i915_gem_create_context(struct drm_i915_private *dev_priv,
>   
>   	lockdep_assert_held(&dev_priv->drm.struct_mutex);
>   
> +	if (flags & CREATE_TIMELINE && !HAS_EXECLISTS(dev_priv))
> +		return ERR_PTR(-EINVAL);
> +
>   	/* Reap the most stale context */
>   	contexts_free_first(dev_priv);
>   
> @@ -434,6 +441,18 @@ i915_gem_create_context(struct drm_i915_private *dev_priv,
>   		ctx->desc_template = default_desc_template(dev_priv, ppgtt);
>   	}
>   
> +	if (flags & CREATE_TIMELINE) {
> +		struct i915_timeline *timeline;
> +
> +		timeline = i915_timeline_create(dev_priv, ctx->name);
> +		if (IS_ERR(timeline)) {
> +			__destroy_hw_context(ctx, file_priv);
> +			return ERR_CAST(timeline);
> +		}
> +
> +		ctx->timeline = timeline;
> +	}
> +
>   	trace_i915_context_create(ctx);
>   
>   	return ctx;
> @@ -796,7 +815,9 @@ int i915_gem_context_create_ioctl(struct drm_device *dev, void *data,
>   	if (args->pad != 0)
>   		return -EINVAL;
>   
> -	if (args->flags & ~I915_GEM_CONTEXT_SHARE_GTT)
> +	if (args->flags &
> +	    ~(I915_GEM_CONTEXT_SHARE_GTT |
> +	      I915_GEM_CONTEXT_SINGLE_TIMELINE))
>   		return -EINVAL;
>   
>   	if (client_is_banned(file_priv)) {
> @@ -820,6 +841,9 @@ int i915_gem_context_create_ioctl(struct drm_device *dev, void *data,
>   		flags &= ~CREATE_VM;
>   	}
>   
> +	if (args->flags & I915_GEM_CONTEXT_SINGLE_TIMELINE)
> +		flags |= CREATE_TIMELINE;
> +
>   	err = i915_mutex_lock_interruptible(dev);
>   	if (err)
>   		goto out;
> diff --git a/drivers/gpu/drm/i915/i915_gem_context.h b/drivers/gpu/drm/i915/i915_gem_context.h
> index 08165f6a0a84..770043449ba6 100644
> --- a/drivers/gpu/drm/i915/i915_gem_context.h
> +++ b/drivers/gpu/drm/i915/i915_gem_context.h
> @@ -41,6 +41,7 @@ struct drm_i915_private;
>   struct drm_i915_file_private;
>   struct i915_hw_ppgtt;
>   struct i915_request;
> +struct i915_timeline;
>   struct i915_vma;
>   struct intel_ring;
>   
> @@ -66,6 +67,8 @@ struct i915_gem_context {
>   	/** file_priv: owning file descriptor */
>   	struct drm_i915_file_private *file_priv;
>   
> +	struct i915_timeline *timeline;

Put a short comment please.

> +
>   	/**
>   	 * @ppgtt: unique address space (GTT)
>   	 *
> diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
> index fc7ad8dbc36e..34d410cfa577 100644
> --- a/drivers/gpu/drm/i915/i915_request.c
> +++ b/drivers/gpu/drm/i915/i915_request.c
> @@ -1007,8 +1007,14 @@ void i915_request_add(struct i915_request *request)
>   	prev = i915_gem_active_raw(&timeline->last_request,
>   				   &request->i915->drm.struct_mutex);
>   	if (prev && !i915_request_completed(prev)) {
> -		i915_sw_fence_await_sw_fence(&request->submit, &prev->submit,
> -					     &request->submitq);
> +		if (prev->engine == engine)
> +			i915_sw_fence_await_sw_fence(&request->submit,
> +						     &prev->submit,
> +						     &request->submitq);
> +		else
> +			__i915_sw_fence_await_dma_fence(&request->submit,
> +							&prev->fence,
> +							&request->dmaq);

Could be worth a comment on the conditional block to help future readers.

>   		if (engine->schedule)
>   			__i915_sched_node_add_dependency(&request->sched,
>   							 &prev->sched,
> diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h
> index 90e9d170a0cd..68d36eeb5edb 100644
> --- a/drivers/gpu/drm/i915/i915_request.h
> +++ b/drivers/gpu/drm/i915/i915_request.h
> @@ -116,7 +116,10 @@ struct i915_request {
>   	 * It is used by the driver to then queue the request for execution.
>   	 */
>   	struct i915_sw_fence submit;
> -	wait_queue_entry_t submitq;
> +	union {
> +		wait_queue_entry_t submitq;
> +		struct i915_sw_dma_fence_cb dmaq;

Drop a comment here as well explaining the duality.

> +	};
>   	wait_queue_head_t execute;
>   
>   	/*
> diff --git a/drivers/gpu/drm/i915/i915_sw_fence.c b/drivers/gpu/drm/i915/i915_sw_fence.c
> index 6dbeed079ae5..b68883fe73c5 100644
> --- a/drivers/gpu/drm/i915/i915_sw_fence.c
> +++ b/drivers/gpu/drm/i915/i915_sw_fence.c
> @@ -362,11 +362,6 @@ int i915_sw_fence_await_sw_fence_gfp(struct i915_sw_fence *fence,
>   	return __i915_sw_fence_await_sw_fence(fence, signaler, NULL, gfp);
>   }
>   
> -struct i915_sw_dma_fence_cb {
> -	struct dma_fence_cb base;
> -	struct i915_sw_fence *fence;
> -};
> -
>   struct i915_sw_dma_fence_cb_timer {
>   	struct i915_sw_dma_fence_cb base;
>   	struct dma_fence *dma;
> @@ -483,6 +478,40 @@ int i915_sw_fence_await_dma_fence(struct i915_sw_fence *fence,
>   	return ret;
>   }
>   
> +static void __dma_i915_sw_fence_wake(struct dma_fence *dma,
> +				     struct dma_fence_cb *data)
> +{
> +	struct i915_sw_dma_fence_cb *cb = container_of(data, typeof(*cb), base);
> +
> +	i915_sw_fence_complete(cb->fence);
> +}
> +
> +int __i915_sw_fence_await_dma_fence(struct i915_sw_fence *fence,
> +				    struct dma_fence *dma,
> +				    struct i915_sw_dma_fence_cb *cb)
> +{
> +	int ret;
> +
> +	debug_fence_assert(fence);
> +
> +	if (dma_fence_is_signaled(dma))
> +		return 0;
> +
> +	cb->fence = fence;
> +	i915_sw_fence_await(fence);
> +
> +	ret = dma_fence_add_callback(dma, &cb->base, __dma_i915_sw_fence_wake);
> +	if (ret == 0) {
> +		ret = 1;
> +	} else {
> +		i915_sw_fence_complete(fence);
> +		if (ret == -ENOENT) /* fence already signaled */
> +			ret = 0;
> +	}
> +
> +	return ret;
> +}

ACAICS there is scope to extract commonality between this and 
i915_sw_fence_await_dma_fence by perhaps having a common static function 
which takes the _cb and func pointers. Or something like:

static __i915_sw_fence_await_dma_fence(..., *cb, *func)
{
	...
}

i915_sw_fence_await_dma_fence(..)
{
	... alloc ..

	__i915_sw_fence_await_dma_fence(..., cb, i915_sw_fence_await);

	...
}

i915_sw_fence_await_dma_fence_builtin(...)
{
	_i915_sw_fence_await_dma_fence(..., &dma->cb, __dma_i915_sw_fence_wake);
}

Thoughts?

Regards,

Tvrtko

> +
>   int i915_sw_fence_await_reservation(struct i915_sw_fence *fence,
>   				    struct reservation_object *resv,
>   				    const struct dma_fence_ops *exclude,
> diff --git a/drivers/gpu/drm/i915/i915_sw_fence.h b/drivers/gpu/drm/i915/i915_sw_fence.h
> index fe2ef4dadfc6..914a734d49bc 100644
> --- a/drivers/gpu/drm/i915/i915_sw_fence.h
> +++ b/drivers/gpu/drm/i915/i915_sw_fence.h
> @@ -10,14 +10,13 @@
>   #ifndef _I915_SW_FENCE_H_
>   #define _I915_SW_FENCE_H_
>   
> +#include <linux/dma-fence.h>
>   #include <linux/gfp.h>
>   #include <linux/kref.h>
>   #include <linux/notifier.h> /* for NOTIFY_DONE */
>   #include <linux/wait.h>
>   
>   struct completion;
> -struct dma_fence;
> -struct dma_fence_ops;
>   struct reservation_object;
>   
>   struct i915_sw_fence {
> @@ -69,10 +68,20 @@ int i915_sw_fence_await_sw_fence(struct i915_sw_fence *fence,
>   int i915_sw_fence_await_sw_fence_gfp(struct i915_sw_fence *fence,
>   				     struct i915_sw_fence *after,
>   				     gfp_t gfp);
> +
> +struct i915_sw_dma_fence_cb {
> +	struct dma_fence_cb base;
> +	struct i915_sw_fence *fence;
> +};
> +
> +int __i915_sw_fence_await_dma_fence(struct i915_sw_fence *fence,
> +				    struct dma_fence *dma,
> +				    struct i915_sw_dma_fence_cb *cb);
>   int i915_sw_fence_await_dma_fence(struct i915_sw_fence *fence,
>   				  struct dma_fence *dma,
>   				  unsigned long timeout,
>   				  gfp_t gfp);
> +
>   int i915_sw_fence_await_reservation(struct i915_sw_fence *fence,
>   				    struct reservation_object *resv,
>   				    const struct dma_fence_ops *exclude,
> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> index 74be9a49ef9e..48a2bca7fec3 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/intel_lrc.c
> @@ -2626,7 +2626,10 @@ static int execlists_context_deferred_alloc(struct i915_gem_context *ctx,
>   		goto error_deref_obj;
>   	}
>   
> -	timeline = i915_timeline_create(ctx->i915, ctx->name);
> +	if (ctx->timeline)
> +		timeline = i915_timeline_get(ctx->timeline);
> +	else
> +		timeline = i915_timeline_create(ctx->i915, ctx->name);
>   	if (IS_ERR(timeline)) {
>   		ret = PTR_ERR(timeline);
>   		goto error_deref_obj;
> diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
> index dc1c52f95cab..adb9fed86ef7 100644
> --- a/include/uapi/drm/i915_drm.h
> +++ b/include/uapi/drm/i915_drm.h
> @@ -1433,7 +1433,8 @@ struct drm_i915_gem_context_create_v2 {
>   	/*  output: id of new context*/
>   	__u32 ctx_id;
>   	__u32 flags;
> -#define I915_GEM_CONTEXT_SHARE_GTT 0x1
> +#define I915_GEM_CONTEXT_SHARE_GTT		0x1
> +#define I915_GEM_CONTEXT_SINGLE_TIMELINE	0x2
>   	__u32 share_ctx;
>   	__u32 pad;
>   };
> 
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 106+ messages in thread

* Re: [Intel-gfx] [PATCH 35/40] drm/i915: Fix I915_EXEC_RING_MASK
  2018-09-19 19:55 ` [PATCH 35/40] drm/i915: Fix I915_EXEC_RING_MASK Chris Wilson
@ 2018-09-25  8:46   ` Tvrtko Ursulin
  0 siblings, 0 replies; 106+ messages in thread
From: Tvrtko Ursulin @ 2018-09-25  8:46 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx; +Cc: stable


On 19/09/2018 20:55, Chris Wilson wrote:
> This was supposed to be a mask of all known rings, but it is being used
> by execbuffer to filter out invalid rings, and so is instead mapping high
> unused values onto valid rings. Instead of a mask of all known rings,
> we need it to be the mask of all possible rings.
> 
> Fixes: 549f7365820a ("drm/i915: Enable SandyBridge blitter ring")
> Fixes: de1add360522 ("drm/i915: Decouple execbuf uAPI from internal implementation")
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> Cc: <stable@vger.kernel.org> # v4.6+
> ---
>   include/uapi/drm/i915_drm.h | 2 +-
>   1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
> index adb9fed86ef7..65b0b84419f3 100644
> --- a/include/uapi/drm/i915_drm.h
> +++ b/include/uapi/drm/i915_drm.h
> @@ -984,7 +984,7 @@ struct drm_i915_gem_execbuffer2 {
>   	 * struct drm_i915_gem_exec_fence *fences.
>   	 */
>   	__u64 cliprects_ptr;
> -#define I915_EXEC_RING_MASK              (7<<0)
> +#define I915_EXEC_RING_MASK              (0x3f)
>   #define I915_EXEC_DEFAULT                (0<<0)
>   #define I915_EXEC_RENDER                 (1<<0)
>   #define I915_EXEC_BSD                    (2<<0)
> 

Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Regards,

Tvrtko

^ permalink raw reply	[flat|nested] 106+ messages in thread

* Re: [PATCH 12/40] drm/i915/execlists: Assert the queue is non-empty on unsubmitting
  2018-09-25  7:41     ` Chris Wilson
@ 2018-09-25  8:51       ` Tvrtko Ursulin
  0 siblings, 0 replies; 106+ messages in thread
From: Tvrtko Ursulin @ 2018-09-25  8:51 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 25/09/2018 08:41, Chris Wilson wrote:
> Quoting Tvrtko Ursulin (2018-09-24 10:07:11)
>>
>> On 19/09/2018 20:55, Chris Wilson wrote:
>>> In the sequence
>>>
>>> <0>[  531.960431] drv_self-4806    7.... 527402570us : intel_gpu_reset: engine_mask=1, ret=0, retry=0
>>> <0>[  531.960431] drv_self-4806    7.... 527402571us : execlists_reset: rcs0 request global=115de, current=71133
>>
>> How such a unbelievably huge delta between the guilty request and hws?
> 
> 1?
> 
> ;)

Oh bloody dec vs hex again! :D

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 106+ messages in thread

* Re: [PATCH 15/40] drm/i915: Priority boost for new clients
  2018-09-25  8:26       ` Chris Wilson
@ 2018-09-25  8:57         ` Tvrtko Ursulin
  2018-09-25  9:06           ` Chris Wilson
  2018-09-25 11:20         ` Michal Wajdeczko
  1 sibling, 1 reply; 106+ messages in thread
From: Tvrtko Ursulin @ 2018-09-25  8:57 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 25/09/2018 09:26, Chris Wilson wrote:
> Quoting Chris Wilson (2018-09-25 09:01:06)
>> Quoting Tvrtko Ursulin (2018-09-24 11:29:52)
>>>
>>> On 19/09/2018 20:55, Chris Wilson wrote:
>>>> diff --git a/drivers/gpu/drm/i915/i915_scheduler.h b/drivers/gpu/drm/i915/i915_scheduler.h
>>>> index 7edfad0abfd7..93e43e263d8c 100644
>>>> --- a/drivers/gpu/drm/i915/i915_scheduler.h
>>>> +++ b/drivers/gpu/drm/i915/i915_scheduler.h
>>>> @@ -19,12 +19,14 @@ enum {
>>>>        I915_PRIORITY_INVALID = INT_MIN
>>>>    };
>>>>    
>>>> -#define I915_USER_PRIORITY_SHIFT 0
>>>> +#define I915_USER_PRIORITY_SHIFT 1
>>>>    #define I915_USER_PRIORITY(x) ((x) << I915_USER_PRIORITY_SHIFT)
>>>>    
>>>>    #define I915_PRIORITY_COUNT BIT(I915_USER_PRIORITY_SHIFT)
>>>>    #define I915_PRIORITY_MASK (-I915_PRIORITY_COUNT)
>>>>    
>>>> +#define I915_PRIORITY_NEWCLIENT      ((u8)BIT(0))
>>>
>>> Is the cast important and why?
>>
>> Unreliable memory says there was something iffy about the code generation
>> at one point.
> 
> drivers/gpu/drm/i915/intel_lrc.c: In function ‘__unwind_incomplete_requests’:
> drivers/gpu/drm/i915/intel_lrc.c:272:13: error: overflow in conversion from ‘long unsigned int’ to ‘int’ changes value from ‘18446744071562067969’ to ‘-2147483647’ [-Werror=overflow]
>    int prio = I915_PRIORITY_INVALID | I915_PRIORITY_NEWCLIENT;

So correct cast would be (int)BIT(..), or maybe not use BIT for less 
confusion?

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 106+ messages in thread

* Re: [PATCH 17/40] drm/i915: Priority boost for waiting clients
  2018-09-24 11:29   ` Tvrtko Ursulin
@ 2018-09-25  9:00     ` Chris Wilson
  2018-09-25  9:07       ` Tvrtko Ursulin
  0 siblings, 1 reply; 106+ messages in thread
From: Chris Wilson @ 2018-09-25  9:00 UTC (permalink / raw)
  To: Tvrtko Ursulin, intel-gfx

Quoting Tvrtko Ursulin (2018-09-24 12:29:45)
> 
> On 19/09/2018 20:55, Chris Wilson wrote:
> > +#define I915_PRIORITY_WAIT   ((u8)BIT(1))
> >   #define I915_PRIORITY_NEWCLIENT     ((u8)BIT(0))
> 
> Put a comment here explaining the priority order is reversed in the 
> internal range.
> 
> With new client protection against being repeatedly pre-empted added in 
> a respective previous patch, I am okay that we give this a go.

Hmm, actually it is not reversed. So you would prefer new clients to
have a small priority boost over the stalling clients (which are in
effect under control of the user). Ok.
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 106+ messages in thread

* Re: [PATCH 16/40] drm/i915: Pull scheduling under standalone lock
  2018-09-25  8:19     ` Chris Wilson
@ 2018-09-25  9:01       ` Tvrtko Ursulin
  2018-09-25  9:10         ` Chris Wilson
  0 siblings, 1 reply; 106+ messages in thread
From: Tvrtko Ursulin @ 2018-09-25  9:01 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 25/09/2018 09:19, Chris Wilson wrote:
> Quoting Tvrtko Ursulin (2018-09-24 12:19:39)
>>
>> On 19/09/2018 20:55, Chris Wilson wrote:
>>> diff --git a/drivers/gpu/drm/i915/i915_scheduler.c b/drivers/gpu/drm/i915/i915_scheduler.c
>>> new file mode 100644
>>> index 000000000000..910ac7089596
>>> --- /dev/null
>>> +++ b/drivers/gpu/drm/i915/i915_scheduler.c
>>> @@ -0,0 +1,377 @@
>>> +/*
>>> + * SPDX-License-Identifier: MIT
>>> + *
>>> + * Copyright © 2018 Intel Corporation
>>> + */
>>> +
>>> +#include <linux/mutex.h>
>>> +
>>> +#include "i915_drv.h"
>>> +#include "i915_request.h"
>>> +#include "i915_scheduler.h"
>>> +
>>> +static DEFINE_SPINLOCK(schedule_lock);
>>
>> Any good excuses to not put it into device private straight away?
> 
> It can't be per-device, since it will need to be global across devices
> as the dependency chains may cross devices and we will want to choose an
> optimal order globally. That's my goal here, at least.
> 
> At some point, the horror of scalability vs priority inversion must then
> be tackled. (But for now, I'd rather have priority inheritance and start
> from a position of good qos and build scalability on top.)

Hm.. questionable that we would want to break the isolation between 
devices, but more importantly too early to know. Struct mutex this 
replaces is certainly per device so I'd expect would had been an 
argument to keep that organisation. At least not change two things at 
once. Never mind, it is not a very important detail at the moment.

Regards,

Tvrtko


_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 106+ messages in thread

* Re: [PATCH 15/40] drm/i915: Priority boost for new clients
  2018-09-25  8:57         ` Tvrtko Ursulin
@ 2018-09-25  9:06           ` Chris Wilson
  2018-09-25  9:08             ` Tvrtko Ursulin
  0 siblings, 1 reply; 106+ messages in thread
From: Chris Wilson @ 2018-09-25  9:06 UTC (permalink / raw)
  To: Tvrtko Ursulin, intel-gfx

Quoting Tvrtko Ursulin (2018-09-25 09:57:11)
> 
> On 25/09/2018 09:26, Chris Wilson wrote:
> > Quoting Chris Wilson (2018-09-25 09:01:06)
> >> Quoting Tvrtko Ursulin (2018-09-24 11:29:52)
> >>>
> >>> On 19/09/2018 20:55, Chris Wilson wrote:
> >>>> diff --git a/drivers/gpu/drm/i915/i915_scheduler.h b/drivers/gpu/drm/i915/i915_scheduler.h
> >>>> index 7edfad0abfd7..93e43e263d8c 100644
> >>>> --- a/drivers/gpu/drm/i915/i915_scheduler.h
> >>>> +++ b/drivers/gpu/drm/i915/i915_scheduler.h
> >>>> @@ -19,12 +19,14 @@ enum {
> >>>>        I915_PRIORITY_INVALID = INT_MIN
> >>>>    };
> >>>>    
> >>>> -#define I915_USER_PRIORITY_SHIFT 0
> >>>> +#define I915_USER_PRIORITY_SHIFT 1
> >>>>    #define I915_USER_PRIORITY(x) ((x) << I915_USER_PRIORITY_SHIFT)
> >>>>    
> >>>>    #define I915_PRIORITY_COUNT BIT(I915_USER_PRIORITY_SHIFT)
> >>>>    #define I915_PRIORITY_MASK (-I915_PRIORITY_COUNT)
> >>>>    
> >>>> +#define I915_PRIORITY_NEWCLIENT      ((u8)BIT(0))
> >>>
> >>> Is the cast important and why?
> >>
> >> Unreliable memory says there was something iffy about the code generation
> >> at one point.
> > 
> > drivers/gpu/drm/i915/intel_lrc.c: In function ‘__unwind_incomplete_requests’:
> > drivers/gpu/drm/i915/intel_lrc.c:272:13: error: overflow in conversion from ‘long unsigned int’ to ‘int’ changes value from ‘18446744071562067969’ to ‘-2147483647’ [-Werror=overflow]
> >    int prio = I915_PRIORITY_INVALID | I915_PRIORITY_NEWCLIENT;
> 
> So correct cast would be (int)BIT(..), or maybe not use BIT for less 
> confusion?

It's a bit, I like them unsigned to avoid sign extension confusion most
of the time. (What am I saying, sign extension is already confusing and
no matter what you do, you always want the opposite.)
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 106+ messages in thread

* Re: [PATCH 17/40] drm/i915: Priority boost for waiting clients
  2018-09-25  9:00     ` Chris Wilson
@ 2018-09-25  9:07       ` Tvrtko Ursulin
  0 siblings, 0 replies; 106+ messages in thread
From: Tvrtko Ursulin @ 2018-09-25  9:07 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 25/09/2018 10:00, Chris Wilson wrote:
> Quoting Tvrtko Ursulin (2018-09-24 12:29:45)
>>
>> On 19/09/2018 20:55, Chris Wilson wrote:
>>> +#define I915_PRIORITY_WAIT   ((u8)BIT(1))
>>>    #define I915_PRIORITY_NEWCLIENT     ((u8)BIT(0))
>>
>> Put a comment here explaining the priority order is reversed in the
>> internal range.
>>
>> With new client protection against being repeatedly pre-empted added in
>> a respective previous patch, I am okay that we give this a go.
> 
> Hmm, actually it is not reversed. So you would prefer new clients to
> have a small priority boost over the stalling clients (which are in
> effect under control of the user). Ok.

I thought it was reversed:

+	/* buckets sorted from highest [in slot 0] to lowest priority */
+	idx = I915_PRIORITY_MASK - (prio & I915_PRIORITY_MASK);
+	prio >>= I915_USER_PRIORITY_SHIFT;

But my two comments are not related - I just wanted a comment next to 
internal level definitions in case they are reversed. If they are not 
reversed it is fine without a comment. I guess the comment only applies 
to bucket organization, not the priority levels, that was my confusion..

It made sense to me that new clients would be more important than 
something stuck on a potentially long wait.. but I don't know, it's all 
guesswork.

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 106+ messages in thread

* Re: [PATCH 15/40] drm/i915: Priority boost for new clients
  2018-09-25  9:06           ` Chris Wilson
@ 2018-09-25  9:08             ` Tvrtko Ursulin
  0 siblings, 0 replies; 106+ messages in thread
From: Tvrtko Ursulin @ 2018-09-25  9:08 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 25/09/2018 10:06, Chris Wilson wrote:
> Quoting Tvrtko Ursulin (2018-09-25 09:57:11)
>>
>> On 25/09/2018 09:26, Chris Wilson wrote:
>>> Quoting Chris Wilson (2018-09-25 09:01:06)
>>>> Quoting Tvrtko Ursulin (2018-09-24 11:29:52)
>>>>>
>>>>> On 19/09/2018 20:55, Chris Wilson wrote:
>>>>>> diff --git a/drivers/gpu/drm/i915/i915_scheduler.h b/drivers/gpu/drm/i915/i915_scheduler.h
>>>>>> index 7edfad0abfd7..93e43e263d8c 100644
>>>>>> --- a/drivers/gpu/drm/i915/i915_scheduler.h
>>>>>> +++ b/drivers/gpu/drm/i915/i915_scheduler.h
>>>>>> @@ -19,12 +19,14 @@ enum {
>>>>>>         I915_PRIORITY_INVALID = INT_MIN
>>>>>>     };
>>>>>>     
>>>>>> -#define I915_USER_PRIORITY_SHIFT 0
>>>>>> +#define I915_USER_PRIORITY_SHIFT 1
>>>>>>     #define I915_USER_PRIORITY(x) ((x) << I915_USER_PRIORITY_SHIFT)
>>>>>>     
>>>>>>     #define I915_PRIORITY_COUNT BIT(I915_USER_PRIORITY_SHIFT)
>>>>>>     #define I915_PRIORITY_MASK (-I915_PRIORITY_COUNT)
>>>>>>     
>>>>>> +#define I915_PRIORITY_NEWCLIENT      ((u8)BIT(0))
>>>>>
>>>>> Is the cast important and why?
>>>>
>>>> Unreliable memory says there was something iffy about the code generation
>>>> at one point.
>>>
>>> drivers/gpu/drm/i915/intel_lrc.c: In function ‘__unwind_incomplete_requests’:
>>> drivers/gpu/drm/i915/intel_lrc.c:272:13: error: overflow in conversion from ‘long unsigned int’ to ‘int’ changes value from ‘18446744071562067969’ to ‘-2147483647’ [-Werror=overflow]
>>>     int prio = I915_PRIORITY_INVALID | I915_PRIORITY_NEWCLIENT;
>>
>> So correct cast would be (int)BIT(..), or maybe not use BIT for less
>> confusion?
> 
> It's a bit, I like them unsigned to avoid sign extension confusion most
> of the time. (What am I saying, sign extension is already confusing and
> no matter what you do, you always want the opposite.)

Okay, it's internal so no big deal either way.

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 106+ messages in thread

* Re: [PATCH 16/40] drm/i915: Pull scheduling under standalone lock
  2018-09-25  9:01       ` Tvrtko Ursulin
@ 2018-09-25  9:10         ` Chris Wilson
  2018-09-25  9:19           ` Tvrtko Ursulin
  0 siblings, 1 reply; 106+ messages in thread
From: Chris Wilson @ 2018-09-25  9:10 UTC (permalink / raw)
  To: Tvrtko Ursulin, intel-gfx

Quoting Tvrtko Ursulin (2018-09-25 10:01:21)
> 
> On 25/09/2018 09:19, Chris Wilson wrote:
> > Quoting Tvrtko Ursulin (2018-09-24 12:19:39)
> >>
> >> On 19/09/2018 20:55, Chris Wilson wrote:
> >>> diff --git a/drivers/gpu/drm/i915/i915_scheduler.c b/drivers/gpu/drm/i915/i915_scheduler.c
> >>> new file mode 100644
> >>> index 000000000000..910ac7089596
> >>> --- /dev/null
> >>> +++ b/drivers/gpu/drm/i915/i915_scheduler.c
> >>> @@ -0,0 +1,377 @@
> >>> +/*
> >>> + * SPDX-License-Identifier: MIT
> >>> + *
> >>> + * Copyright © 2018 Intel Corporation
> >>> + */
> >>> +
> >>> +#include <linux/mutex.h>
> >>> +
> >>> +#include "i915_drv.h"
> >>> +#include "i915_request.h"
> >>> +#include "i915_scheduler.h"
> >>> +
> >>> +static DEFINE_SPINLOCK(schedule_lock);
> >>
> >> Any good excuses to not put it into device private straight away?
> > 
> > It can't be per-device, since it will need to be global across devices
> > as the dependency chains may cross devices and we will want to choose an
> > optimal order globally. That's my goal here, at least.
> > 
> > At some point, the horror of scalability vs priority inversion must then
> > be tackled. (But for now, I'd rather have priority inheritance and start
> > from a position of good qos and build scalability on top.)
> 
> Hm.. questionable that we would want to break the isolation between 
> devices, but more importantly too early to know. Struct mutex this 
> replaces is certainly per device so I'd expect would had been an 
> argument to keep that organisation. At least not change two things at 
> once. Never mind, it is not a very important detail at the moment.

The intention was to someday handle even priorities across dmabuf. Using
dfs_link was a mere convenience to not have to have a fancy allocation
scheme here and to avoid some ingenuity for list protection. The lack
of priority inheritance across devices is a user visible problem today
for optimus/hybrid systems :(
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 106+ messages in thread

* Re: [PATCH 16/40] drm/i915: Pull scheduling under standalone lock
  2018-09-25  9:10         ` Chris Wilson
@ 2018-09-25  9:19           ` Tvrtko Ursulin
  0 siblings, 0 replies; 106+ messages in thread
From: Tvrtko Ursulin @ 2018-09-25  9:19 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 25/09/2018 10:10, Chris Wilson wrote:
> Quoting Tvrtko Ursulin (2018-09-25 10:01:21)
>>
>> On 25/09/2018 09:19, Chris Wilson wrote:
>>> Quoting Tvrtko Ursulin (2018-09-24 12:19:39)
>>>>
>>>> On 19/09/2018 20:55, Chris Wilson wrote:
>>>>> diff --git a/drivers/gpu/drm/i915/i915_scheduler.c b/drivers/gpu/drm/i915/i915_scheduler.c
>>>>> new file mode 100644
>>>>> index 000000000000..910ac7089596
>>>>> --- /dev/null
>>>>> +++ b/drivers/gpu/drm/i915/i915_scheduler.c
>>>>> @@ -0,0 +1,377 @@
>>>>> +/*
>>>>> + * SPDX-License-Identifier: MIT
>>>>> + *
>>>>> + * Copyright © 2018 Intel Corporation
>>>>> + */
>>>>> +
>>>>> +#include <linux/mutex.h>
>>>>> +
>>>>> +#include "i915_drv.h"
>>>>> +#include "i915_request.h"
>>>>> +#include "i915_scheduler.h"
>>>>> +
>>>>> +static DEFINE_SPINLOCK(schedule_lock);
>>>>
>>>> Any good excuses to not put it into device private straight away?
>>>
>>> It can't be per-device, since it will need to be global across devices
>>> as the dependency chains may cross devices and we will want to choose an
>>> optimal order globally. That's my goal here, at least.
>>>
>>> At some point, the horror of scalability vs priority inversion must then
>>> be tackled. (But for now, I'd rather have priority inheritance and start
>>> from a position of good qos and build scalability on top.)
>>
>> Hm.. questionable that we would want to break the isolation between
>> devices, but more importantly too early to know. Struct mutex this
>> replaces is certainly per device so I'd expect would had been an
>> argument to keep that organisation. At least not change two things at
>> once. Never mind, it is not a very important detail at the moment.
> 
> The intention was to someday handle even priorities across dmabuf. Using
> dfs_link was a mere convenience to not have to have a fancy allocation
> scheme here and to avoid some ingenuity for list protection. The lack
> of priority inheritance across devices is a user visible problem today
> for optimus/hybrid systems :(

Maybe, but solving that needs a protocol between drivers and I don't 
think a shared private lock in i915 is an interesting step towards it.

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 106+ messages in thread

* Re: [PATCH 15/40] drm/i915: Priority boost for new clients
  2018-09-25  8:26       ` Chris Wilson
  2018-09-25  8:57         ` Tvrtko Ursulin
@ 2018-09-25 11:20         ` Michal Wajdeczko
  1 sibling, 0 replies; 106+ messages in thread
From: Michal Wajdeczko @ 2018-09-25 11:20 UTC (permalink / raw)
  To: Tvrtko Ursulin, intel-gfx, Chris Wilson

On Tue, 25 Sep 2018 10:26:57 +0200, Chris Wilson  
<chris@chris-wilson.co.uk> wrote:

> Quoting Chris Wilson (2018-09-25 09:01:06)
>> Quoting Tvrtko Ursulin (2018-09-24 11:29:52)
>> >
>> > On 19/09/2018 20:55, Chris Wilson wrote:
>> > > diff --git a/drivers/gpu/drm/i915/i915_scheduler.h  
>> b/drivers/gpu/drm/i915/i915_scheduler.h
>> > > index 7edfad0abfd7..93e43e263d8c 100644
>> > > --- a/drivers/gpu/drm/i915/i915_scheduler.h
>> > > +++ b/drivers/gpu/drm/i915/i915_scheduler.h
>> > > @@ -19,12 +19,14 @@ enum {
>> > >       I915_PRIORITY_INVALID = INT_MIN
>> > >   };
>> > >
>> > > -#define I915_USER_PRIORITY_SHIFT 0
>> > > +#define I915_USER_PRIORITY_SHIFT 1
>> > >   #define I915_USER_PRIORITY(x) ((x) << I915_USER_PRIORITY_SHIFT)
>> > >
>> > >   #define I915_PRIORITY_COUNT BIT(I915_USER_PRIORITY_SHIFT)
>> > >   #define I915_PRIORITY_MASK (-I915_PRIORITY_COUNT)
>> > >
>> > > +#define I915_PRIORITY_NEWCLIENT      ((u8)BIT(0))
>> >
>> > Is the cast important and why?
>>
>> Unreliable memory says there was something iffy about the code  
>> generation
>> at one point.
>
> drivers/gpu/drm/i915/intel_lrc.c: In function  
> ‘__unwind_incomplete_requests’:
> drivers/gpu/drm/i915/intel_lrc.c:272:13: error: overflow in conversion  
> from ‘long unsigned int’ to ‘int’ changes value from  
> ‘18446744071562067969’ to ‘-2147483647’ [-Werror=overflow]
>   int prio = I915_PRIORITY_INVALID | I915_PRIORITY_NEWCLIENT;
>

If you plan to use I915_PRIORITY_NEWCLIENT in 'int' vars then
you should not use BIT macro that returns 'unsigned int long'

As I915_USER_PRIORITY is already using explicit shift, maybe the
same can be done for I915_PRIORITY_NEWCLIENT:

	#define I915_PRIORITY_NEWCLIENT      (1 << 0)

Michal
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 106+ messages in thread

* Re: [PATCH 18/40] drm/i915: Report the number of closed vma held by each context in debugfs
  2018-09-24 11:57   ` Tvrtko Ursulin
@ 2018-09-25 12:20     ` Chris Wilson
  0 siblings, 0 replies; 106+ messages in thread
From: Chris Wilson @ 2018-09-25 12:20 UTC (permalink / raw)
  To: Tvrtko Ursulin, intel-gfx

Quoting Tvrtko Ursulin (2018-09-24 12:57:31)
> 
> On 19/09/2018 20:55, Chris Wilson wrote:
> > Include the total size of closed vma when reporting the per_ctx_stats of
> > debugfs/i915_gem_objects.
> 
> Why do we need/want this?

Removing extra locks, reducing struct_mutex coverage and because it
would have shown a transient leak fixed a while back. Raison d'etre of
this debugfs.
 
> > Whilst adjusting the context tracking, note that we can simply use our
> > list of contexts in i915->contexts rather than circumlocute via
> > dev->filelist and the per-file context idr.
> > 
> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > ---
> >   drivers/gpu/drm/i915/i915_debugfs.c | 113 +++++++++++-----------------
> >   1 file changed, 42 insertions(+), 71 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
> > index 2ac75bc10afa..6b5cc30f3e09 100644
> > --- a/drivers/gpu/drm/i915/i915_debugfs.c
> > +++ b/drivers/gpu/drm/i915/i915_debugfs.c
> > @@ -302,6 +302,7 @@ struct file_stats {
> >       u64 total, unbound;
> >       u64 global, shared;
> >       u64 active, inactive;
> > +     u64 closed;
> >   };
> >   
> >   static int per_file_stats(int id, void *ptr, void *data)
> > @@ -336,6 +337,9 @@ static int per_file_stats(int id, void *ptr, void *data)
> >                       stats->active += vma->node.size;
> >               else
> >                       stats->inactive += vma->node.size;
> > +
> > +             if (i915_vma_is_closed(vma))
> > +                     stats->closed += vma->node.size;
> 
> We can have closed and active?

Yes.

> >       }
> >   
> >       return 0;
> > @@ -343,7 +347,7 @@ static int per_file_stats(int id, void *ptr, void *data)
> >   
> >   #define print_file_stats(m, name, stats) do { \
> >       if (stats.count) \
> > -             seq_printf(m, "%s: %lu objects, %llu bytes (%llu active, %llu inactive, %llu global, %llu shared, %llu unbound)\n", \
> > +             seq_printf(m, "%s: %lu objects, %llu bytes (%llu active, %llu inactive, %llu global, %llu shared, %llu unbound, %llu closed)\n", \
> >                          name, \
> >                          stats.count, \
> >                          stats.total, \
> > @@ -351,7 +355,8 @@ static int per_file_stats(int id, void *ptr, void *data)
> >                          stats.inactive, \
> >                          stats.global, \
> >                          stats.shared, \
> > -                        stats.unbound); \
> > +                        stats.unbound, \
> > +                        stats.closed); \
> >   } while (0)
> >   
> >   static void print_batch_pool_stats(struct seq_file *m,
> > @@ -377,44 +382,44 @@ static void print_batch_pool_stats(struct seq_file *m,
> >       print_file_stats(m, "[k]batch pool", stats);
> >   }
> >   
> > -static int per_file_ctx_stats(int idx, void *ptr, void *data)
> > +static void print_context_stats(struct seq_file *m,
> > +                             struct drm_i915_private *i915)
> >   {
> > -     struct i915_gem_context *ctx = ptr;
> > -     struct intel_engine_cs *engine;
> > -     enum intel_engine_id id;
> > -
> > -     for_each_engine(engine, ctx->i915, id) {
> > -             struct intel_context *ce = to_intel_context(ctx, engine);
> > +     struct file_stats kstats = {};
> > +     struct i915_gem_context *ctx;
> >   
> > -             if (ce->state)
> > -                     per_file_stats(0, ce->state->obj, data);
> > -             if (ce->ring)
> > -                     per_file_stats(0, ce->ring->vma->obj, data);
> > -     }
> > +     list_for_each_entry(ctx, &i915->contexts.list, link) {
> > +             struct file_stats stats = { .file_priv = ctx->file_priv };
> > +             struct intel_engine_cs *engine;
> > +             enum intel_engine_id id;
> >   
> > -     return 0;
> > -}
> > +             for_each_engine(engine, i915, id) {
> > +                     struct intel_context *ce = to_intel_context(ctx, engine);
> >   
> > -static void print_context_stats(struct seq_file *m,
> > -                             struct drm_i915_private *dev_priv)
> > -{
> > -     struct drm_device *dev = &dev_priv->drm;
> > -     struct file_stats stats;
> > -     struct drm_file *file;
> > +                     if (ce->state)
> > +                             per_file_stats(0, ce->state->obj, &kstats);
> > +                     if (ce->ring)
> > +                             per_file_stats(0, ce->ring->vma->obj, &kstats);
> > +             }
> >   
> > -     memset(&stats, 0, sizeof(stats));
> > +             if (!IS_ERR_OR_NULL(stats.file_priv)) {
> > +                     struct drm_file *file = stats.file_priv->file;
> > +                     struct task_struct *task;
> >   
> > -     mutex_lock(&dev->struct_mutex);
> > -     if (dev_priv->kernel_context)
> > -             per_file_ctx_stats(0, dev_priv->kernel_context, &stats);
> > +                     spin_lock(&file->table_lock);
> > +                     idr_for_each(&file->object_idr, per_file_stats, &stats);
> 
> Headache inducing diff.. however, doesn't this over-account objects on 
> the account of walking the same file from multiple-contexts?

Sure, but it doesn't matter since we are using this as an indication of
how many objects are usable from each context. We can go beyond the
original code and try and actually narrow it down to only objects in use
by a single context. Looks fiddly, since all I want is rough usage to
spot a leak, not too concerned.

> 
> > +                     spin_unlock(&file->table_lock);
> >   
> > -     list_for_each_entry(file, &dev->filelist, lhead) {
> > -             struct drm_i915_file_private *fpriv = file->driver_priv;
> > -             idr_for_each(&fpriv->context_idr, per_file_ctx_stats, &stats);
> > +                     rcu_read_lock();
> > +                     task = pid_task(ctx->pid ?: file->pid, PIDTYPE_PID);
> > +                     print_file_stats(m,
> > +                                      task ? task->comm : "<unknown>",
> > +                                      stats);
> 
> And this as well looks like it'll end up duplicated.

Sure, it should be since it's reporting each context.
> 
> > +                     rcu_read_unlock();
> > +             }
> >       }
> > -     mutex_unlock(&dev->struct_mutex);
> >   
> > -     print_file_stats(m, "[k]contexts", stats);
> > +     print_file_stats(m, "[k]contexts", kstats);
> >   }
> >   
> >   static int i915_gem_object_info(struct seq_file *m, void *data)
> > @@ -426,14 +431,9 @@ static int i915_gem_object_info(struct seq_file *m, void *data)
> >       u64 size, mapped_size, purgeable_size, dpy_size, huge_size;
> >       struct drm_i915_gem_object *obj;
> >       unsigned int page_sizes = 0;
> > -     struct drm_file *file;
> >       char buf[80];
> >       int ret;
> >   
> > -     ret = mutex_lock_interruptible(&dev->struct_mutex);
> > -     if (ret)
> > -             return ret;
> > -
> >       seq_printf(m, "%u objects, %llu bytes\n",
> >                  dev_priv->mm.object_count,
> >                  dev_priv->mm.object_memory);
> 
> Noticed we technically need mm.object_stat_lock here for atomic readout, 
> but I guess we don't care much.

Correct.
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 106+ messages in thread

* Re: [PATCH 37/40] drm/i915: Allow a context to define its set of engines
  2018-09-19 19:55 ` [PATCH 37/40] drm/i915: Allow a context to define its set of engines Chris Wilson
@ 2018-09-27 11:28   ` Tvrtko Ursulin
  2018-09-28 20:22     ` Chris Wilson
  0 siblings, 1 reply; 106+ messages in thread
From: Tvrtko Ursulin @ 2018-09-27 11:28 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 19/09/2018 20:55, Chris Wilson wrote:
> Over the last few years, we have debated how to extend the user API to
> support an increase in the number of engines, that may be sparse and
> even be heterogeneous within a class (not all video decoders created
> equal). We settled on using (class, instance) tuples to identify a
> specific engine, with an API for the user to construct a map of engines
> to capabilities. Into this picture, we then add a challenge of virtual
> engines; one user engine that maps behind the scenes to any number of
> physical engines. To keep it general, we want the user to have full
> control over that mapping. To that end, we allow the user to constrain a
> context to define the set of engines that it can access, order fully
> controlled by the user via (class, instance). With such precise control
> in context setup, we can continue to use the existing execbuf uABI of
> specifying a single index; only now it doesn't automagically map onto
> the engines, it uses the user defined engine map from the context.
> 
> The I915_EXEC_DEFAULT slot is left empty, and invalid for use by
> execbuf. It's use will be revealed in the next patch.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>   drivers/gpu/drm/i915/i915_gem_context.c    | 88 ++++++++++++++++++++++
>   drivers/gpu/drm/i915/i915_gem_context.h    |  4 +
>   drivers/gpu/drm/i915/i915_gem_execbuffer.c | 22 ++++--
>   include/uapi/drm/i915_drm.h                | 27 +++++++
>   4 files changed, 135 insertions(+), 6 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
> index a8570a07b3b7..313471253f51 100644
> --- a/drivers/gpu/drm/i915/i915_gem_context.c
> +++ b/drivers/gpu/drm/i915/i915_gem_context.c
> @@ -90,6 +90,7 @@
>   #include <drm/i915_drm.h>
>   #include "i915_drv.h"
>   #include "i915_trace.h"
> +#include "i915_user_extensions.h"
>   #include "intel_workarounds.h"
>   
>   #define ALL_L3_SLICES(dev) (1 << NUM_L3_SLICES(dev)) - 1
> @@ -223,6 +224,8 @@ static void i915_gem_context_free(struct i915_gem_context *ctx)
>   			ce->ops->destroy(ce);
>   	}
>   
> +	kfree(ctx->engines);
> +
>   	if (ctx->timeline)
>   		i915_timeline_put(ctx->timeline);
>   
> @@ -948,6 +951,87 @@ int i915_gem_context_getparam_ioctl(struct drm_device *dev, void *data,
>   	return ret;
>   }
>   
> +struct set_engines {
> +	struct i915_gem_context *ctx;
> +	struct intel_engine_cs **engines;
> +	unsigned int nengine;
> +};
> +
> +static const i915_user_extension_fn set_engines__extensions[] = {
> +};

This is OK unless someone one day gets the desire to make the extension 
namespace sparse. I was thinking on how to put some warnings in the code 
to warn about it, but I think that's for later. Namespace is also per 
parent ioctl, another thing which would perhaps need enforcing.

> +
> +static int set_engines(struct i915_gem_context *ctx,
> +		       struct drm_i915_gem_context_param *args)
> +{
> +	struct i915_context_param_engines __user *user;
> +	struct set_engines set = {
> +		.ctx = ctx,
> +		.engines = NULL,
> +		.nengine = -1,
> +	};
> +	unsigned int n;
> +	u64 size, extensions;

Size is u32 in the uAPI, so either that or unsigned int would do here.

> +	int err;
> +
> +	user = u64_to_user_ptr(args->value);
> +	size = args->size;
> +	if (!size)

args->sizes = sizeof(*user);

... if you want to allow size probing via set param, or we add a 
get_param (too much work for nothing?) and only allow probing from there?

> +		goto out;
> +
> +	if (size < sizeof(struct i915_context_param_engines))
> +		return -EINVAL;
> +
> +	size -= sizeof(struct i915_context_param_engines);
> +	if (size % sizeof(*user->class_instance))
> +		return -EINVAL;
> +
> +	set.nengine = size / sizeof(*user->class_instance);
> +	if (set.nengine == 0 || set.nengine >= I915_EXEC_RING_MASK)
> +		return -EINVAL;
> +
> +	set.engines = kmalloc_array(set.nengine + 1,
> +				    sizeof(*set.engines),
> +				    GFP_KERNEL);
> +	if (!set.engines)
> +		return -ENOMEM;
> +
> +	set.engines[0] = NULL;

/* Reserve the I915_EXEC_DEFAULT slot. */

> +	for (n = 0; n < set.nengine; n++) {
> +		u32 class, instance;

I will later recommend we use u16 for class/instance. I think we settled 
for that in the meantime.

> +
> +		if (get_user(class, &user->class_instance[n].class) ||
> +		    get_user(instance, &user->class_instance[n].instance)) {
> +			kfree(set.engines);
> +			return -EFAULT;
> +		}
> +
> +		set.engines[n + 1] =
> +			intel_engine_lookup_user(ctx->i915, class, instance);
> +		if (!set.engines[n + 1]) {
> +			kfree(set.engines);
> +			return -ENOENT;
> +		}
> +	}
> +
> +	err = -EFAULT;
> +	if (!get_user(extensions, &user->extensions))
> +		err = i915_user_extensions(u64_to_user_ptr(extensions),
> +					   set_engines__extensions,
> +					   ARRAY_SIZE(set_engines__extensions),
> +					   &set);
> +	if (err) {

Do we need an user extensions destructor table for a more 
compartmentalized design? This actually applies to the 
i915_user_extensions() implementation.. it would need to take two 
function tables I think and unwind until the one that failed.

> +		kfree(set.engines);
> +		return err;
> +	}
> +
> +out:
> +	kfree(ctx->engines);
> +	ctx->engines = set.engines;
> +	ctx->nengine = set.nengine + 1;
> +
> +	return 0;
> +}
> +
>   int i915_gem_context_setparam_ioctl(struct drm_device *dev, void *data,
>   				    struct drm_file *file)
>   {
> @@ -1011,6 +1095,10 @@ int i915_gem_context_setparam_ioctl(struct drm_device *dev, void *data,
>   		}
>   		break;
>   
> +	case I915_CONTEXT_PARAM_ENGINES:
> +		ret = set_engines(ctx, args);
> +		break;
> +
>   	default:
>   		ret = -EINVAL;
>   		break;
> diff --git a/drivers/gpu/drm/i915/i915_gem_context.h b/drivers/gpu/drm/i915/i915_gem_context.h
> index 770043449ba6..9f89119a6566 100644
> --- a/drivers/gpu/drm/i915/i915_gem_context.h
> +++ b/drivers/gpu/drm/i915/i915_gem_context.h
> @@ -67,6 +67,8 @@ struct i915_gem_context {
>   	/** file_priv: owning file descriptor */
>   	struct drm_i915_file_private *file_priv;
>   
> +	struct intel_engine_cs **engines;
> +
>   	struct i915_timeline *timeline;
>   
>   	/**
> @@ -135,6 +137,8 @@ struct i915_gem_context {
>   #define CONTEXT_CLOSED			1
>   #define CONTEXT_FORCE_SINGLE_SUBMISSION	2
>   
> +	unsigned int nengine;

Why put related fields apart?

> +
>   	/**
>   	 * @hw_id: - unique identifier for the context
>   	 *
> diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> index 109486a6ef07..faedfdca875d 100644
> --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> @@ -2014,13 +2014,23 @@ static const enum intel_engine_id user_ring_map[I915_USER_RINGS + 1] = {
>   };
>   
>   static struct intel_engine_cs *
> -eb_select_engine(struct drm_i915_private *dev_priv,
> +eb_select_engine(struct i915_execbuffer *eb,
>   		 struct drm_file *file,
>   		 struct drm_i915_gem_execbuffer2 *args)
>   {
>   	unsigned int user_ring_id = args->flags & I915_EXEC_RING_MASK;
>   	struct intel_engine_cs *engine;
>   
> +	if (eb->ctx->engines) {
> +		if (user_ring_id >= eb->ctx->nengine) {
> +			DRM_DEBUG("execbuf with unknown ring: %u\n",
> +				  user_ring_id);
> +			return NULL;
> +		}
> +
> +		return eb->ctx->engines[user_ring_id];
> +	}
> +
>   	if (user_ring_id > I915_USER_RINGS) {
>   		DRM_DEBUG("execbuf with unknown ring: %u\n", user_ring_id);
>   		return NULL;
> @@ -2033,11 +2043,11 @@ eb_select_engine(struct drm_i915_private *dev_priv,
>   		return NULL;
>   	}
>   
> -	if (user_ring_id == I915_EXEC_BSD && HAS_BSD2(dev_priv)) {
> +	if (user_ring_id == I915_EXEC_BSD && HAS_BSD2(eb->i915)) {
>   		unsigned int bsd_idx = args->flags & I915_EXEC_BSD_MASK;
>   
>   		if (bsd_idx == I915_EXEC_BSD_DEFAULT) {
> -			bsd_idx = gen8_dispatch_bsd_engine(dev_priv, file);
> +			bsd_idx = gen8_dispatch_bsd_engine(eb->i915, file);
>   		} else if (bsd_idx >= I915_EXEC_BSD_RING1 &&
>   			   bsd_idx <= I915_EXEC_BSD_RING2) {
>   			bsd_idx >>= I915_EXEC_BSD_SHIFT;
> @@ -2048,9 +2058,9 @@ eb_select_engine(struct drm_i915_private *dev_priv,
>   			return NULL;
>   		}
>   
> -		engine = dev_priv->engine[_VCS(bsd_idx)];
> +		engine = eb->i915->engine[_VCS(bsd_idx)];
>   	} else {
> -		engine = dev_priv->engine[user_ring_map[user_ring_id]];
> +		engine = eb->i915->engine[user_ring_map[user_ring_id]];
>   	}
>   
>   	if (!engine) {
> @@ -2260,7 +2270,7 @@ i915_gem_do_execbuffer(struct drm_device *dev,
>   	if (unlikely(err))
>   		goto err_destroy;
>   
> -	eb.engine = eb_select_engine(eb.i915, file, args);
> +	eb.engine = eb_select_engine(&eb, file, args);
>   	if (!eb.engine) {
>   		err = -EINVAL;
>   		goto err_engine;
> diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
> index 65b0b84419f3..d41b4c673af4 100644
> --- a/include/uapi/drm/i915_drm.h
> +++ b/include/uapi/drm/i915_drm.h
> @@ -1508,9 +1508,36 @@ struct drm_i915_gem_context_param {
>   #define   I915_CONTEXT_MAX_USER_PRIORITY	1023 /* inclusive */
>   #define   I915_CONTEXT_DEFAULT_PRIORITY		0
>   #define   I915_CONTEXT_MIN_USER_PRIORITY	-1023 /* inclusive */
> +
> +/*
> + * I915_CONTEXT_PARAM_ENGINES:
> + *
> + * Bind this context to operate on this subset of available engines. Henceforth,
> + * the I915_EXEC_RING selector for DRM_IOCTL_I915_GEM_EXECBUFFER2 operates as
> + * an index into this array of engines; I915_EXEC_DEFAULT selecting engine[0]
> + * and upwards. The array created is offset by 1, such that by default
> + * I915_EXEC_DEFAULT is left empty, to be filled in as directed. Slots 1...N
> + * are then filled in using the specified (class, instance).

Should we allow specifying the default index? I am wondering if that 
would make life easier to any userspace.

> + *
> + * Setting the number of engines bound to the context will revert back to
> + * default settings.
> + *
> + * See struct i915_context_param_engines.
> + */
> +#define I915_CONTEXT_PARAM_ENGINES	0x7
> +
>   	__u64 value;
>   };
>   
> +struct i915_context_param_engines {
> +	__u64 extensions;
> +
> +	struct {
> +		__u32 class; /* see enum drm_i915_gem_engine_class */
> +		__u32 instance;

u16 here as mentioned earlier.

> +	} class_instance[0];
> +};
> +
>   enum drm_i915_oa_format {
>   	I915_OA_FORMAT_A13 = 1,	    /* HSW only */
>   	I915_OA_FORMAT_A29,	    /* HSW only */
> 

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 106+ messages in thread

* Re: [PATCH 39/40] drm/i915/execlists: Refactor out can_merge_rq()
  2018-09-19 19:55 ` [PATCH 39/40] drm/i915/execlists: Refactor out can_merge_rq() Chris Wilson
@ 2018-09-27 11:32   ` Tvrtko Ursulin
  2018-09-28 20:11     ` Chris Wilson
  0 siblings, 1 reply; 106+ messages in thread
From: Tvrtko Ursulin @ 2018-09-27 11:32 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 19/09/2018 20:55, Chris Wilson wrote:
> In the next patch, we add another user that wants to check whether
> requests can be merge into a single HW execution, and in the future we
> want to add more conditions under which requests from the same context
> cannot be merge. In preparation, extract out can_merge_rq().
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>   drivers/gpu/drm/i915/intel_lrc.c | 12 ++++++++++--
>   1 file changed, 10 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> index be7dbdd7fc2c..679ce521be16 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/intel_lrc.c
> @@ -478,6 +478,15 @@ static bool can_merge_ctx(const struct intel_context *prev,
>   	return true;
>   }
>   
> +static bool can_merge_rq(const struct i915_request *prev,
> +			 const struct i915_request *next)
> +{
> +	if (!can_merge_ctx(prev->hw_context, next->hw_context))
> +		return false;
> +
> +	return true;

Not just return can_merge_ctx(..) ?

Regards,

Tvrtko

> +}
> +
>   static void port_assign(struct execlist_port *port, struct i915_request *rq)
>   {
>   	GEM_BUG_ON(rq == port_request(port));
> @@ -639,8 +648,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
>   			 * second request, and so we never need to tell the
>   			 * hardware about the first.
>   			 */
> -			if (last &&
> -			    !can_merge_ctx(rq->hw_context, last->hw_context)) {
> +			if (last && !can_merge_rq(rq, last)) {
>   				/*
>   				 * If we are on the second port and cannot
>   				 * combine this request with the last, then we
> 
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 106+ messages in thread

* Re: [PATCH 39/40] drm/i915/execlists: Refactor out can_merge_rq()
  2018-09-27 11:32   ` Tvrtko Ursulin
@ 2018-09-28 20:11     ` Chris Wilson
  2018-10-01  8:14       ` Tvrtko Ursulin
  0 siblings, 1 reply; 106+ messages in thread
From: Chris Wilson @ 2018-09-28 20:11 UTC (permalink / raw)
  To: Tvrtko Ursulin, intel-gfx

Quoting Tvrtko Ursulin (2018-09-27 12:32:54)
> 
> On 19/09/2018 20:55, Chris Wilson wrote:
> > In the next patch, we add another user that wants to check whether
> > requests can be merge into a single HW execution, and in the future we
> > want to add more conditions under which requests from the same context
> > cannot be merge. In preparation, extract out can_merge_rq().
> > 
> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > ---
> >   drivers/gpu/drm/i915/intel_lrc.c | 12 ++++++++++--
> >   1 file changed, 10 insertions(+), 2 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> > index be7dbdd7fc2c..679ce521be16 100644
> > --- a/drivers/gpu/drm/i915/intel_lrc.c
> > +++ b/drivers/gpu/drm/i915/intel_lrc.c
> > @@ -478,6 +478,15 @@ static bool can_merge_ctx(const struct intel_context *prev,
> >       return true;
> >   }
> >   
> > +static bool can_merge_rq(const struct i915_request *prev,
> > +                      const struct i915_request *next)
> > +{
> > +     if (!can_merge_ctx(prev->hw_context, next->hw_context))
> > +             return false;
> > +
> > +     return true;
> 
> Not just return can_merge_ctx(..) ?

Makes you think I have plans to add more tests here... If you look in
the archives you can even have a seek peek ;)
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 106+ messages in thread

* Re: [PATCH 37/40] drm/i915: Allow a context to define its set of engines
  2018-09-27 11:28   ` Tvrtko Ursulin
@ 2018-09-28 20:22     ` Chris Wilson
  2018-10-01  8:30       ` Tvrtko Ursulin
  0 siblings, 1 reply; 106+ messages in thread
From: Chris Wilson @ 2018-09-28 20:22 UTC (permalink / raw)
  To: Tvrtko Ursulin, intel-gfx

Quoting Tvrtko Ursulin (2018-09-27 12:28:47)
> 
> On 19/09/2018 20:55, Chris Wilson wrote:
> > Over the last few years, we have debated how to extend the user API to
> > support an increase in the number of engines, that may be sparse and
> > even be heterogeneous within a class (not all video decoders created
> > equal). We settled on using (class, instance) tuples to identify a
> > specific engine, with an API for the user to construct a map of engines
> > to capabilities. Into this picture, we then add a challenge of virtual
> > engines; one user engine that maps behind the scenes to any number of
> > physical engines. To keep it general, we want the user to have full
> > control over that mapping. To that end, we allow the user to constrain a
> > context to define the set of engines that it can access, order fully
> > controlled by the user via (class, instance). With such precise control
> > in context setup, we can continue to use the existing execbuf uABI of
> > specifying a single index; only now it doesn't automagically map onto
> > the engines, it uses the user defined engine map from the context.
> > 
> > The I915_EXEC_DEFAULT slot is left empty, and invalid for use by
> > execbuf. It's use will be revealed in the next patch.
> > 
> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > ---
> >   drivers/gpu/drm/i915/i915_gem_context.c    | 88 ++++++++++++++++++++++
> >   drivers/gpu/drm/i915/i915_gem_context.h    |  4 +
> >   drivers/gpu/drm/i915/i915_gem_execbuffer.c | 22 ++++--
> >   include/uapi/drm/i915_drm.h                | 27 +++++++
> >   4 files changed, 135 insertions(+), 6 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
> > index a8570a07b3b7..313471253f51 100644
> > --- a/drivers/gpu/drm/i915/i915_gem_context.c
> > +++ b/drivers/gpu/drm/i915/i915_gem_context.c
> > @@ -90,6 +90,7 @@
> >   #include <drm/i915_drm.h>
> >   #include "i915_drv.h"
> >   #include "i915_trace.h"
> > +#include "i915_user_extensions.h"
> >   #include "intel_workarounds.h"
> >   
> >   #define ALL_L3_SLICES(dev) (1 << NUM_L3_SLICES(dev)) - 1
> > @@ -223,6 +224,8 @@ static void i915_gem_context_free(struct i915_gem_context *ctx)
> >                       ce->ops->destroy(ce);
> >       }
> >   
> > +     kfree(ctx->engines);
> > +
> >       if (ctx->timeline)
> >               i915_timeline_put(ctx->timeline);
> >   
> > @@ -948,6 +951,87 @@ int i915_gem_context_getparam_ioctl(struct drm_device *dev, void *data,
> >       return ret;
> >   }
> >   
> > +struct set_engines {
> > +     struct i915_gem_context *ctx;
> > +     struct intel_engine_cs **engines;
> > +     unsigned int nengine;
> > +};
> > +
> > +static const i915_user_extension_fn set_engines__extensions[] = {
> > +};
> 
> This is OK unless someone one day gets the desire to make the extension 
> namespace sparse. I was thinking on how to put some warnings in the code 
> to warn about it, but I think that's for later. Namespace is also per 
> parent ioctl, another thing which would perhaps need enforcing.

Unless you intend to go extremely sparse, I'd just leave it with the
usual [NAME1] = func1, and skipping over NULLs. We can always add
alternatives if need be (I'd actually been meaning to convert execbuf3
over to this scheme to flesh it out a bit more).

> > +static int set_engines(struct i915_gem_context *ctx,
> > +                    struct drm_i915_gem_context_param *args)
> > +{
> > +     struct i915_context_param_engines __user *user;
> > +     struct set_engines set = {
> > +             .ctx = ctx,
> > +             .engines = NULL,
> > +             .nengine = -1,
> > +     };
> > +     unsigned int n;
> > +     u64 size, extensions;
> 
> Size is u32 in the uAPI, so either that or unsigned int would do here.
> 
> > +     int err;
> > +
> > +     user = u64_to_user_ptr(args->value);
> > +     size = args->size;
> > +     if (!size)
> 
> args->sizes = sizeof(*user);
> 
> ... if you want to allow size probing via set param, or we add a 
> get_param (too much work for nothing?) and only allow probing from there?

It's a variable array, so a little trickier.

> > +             goto out;
> > +
> > +     if (size < sizeof(struct i915_context_param_engines))
> > +             return -EINVAL;
> > +
> > +     size -= sizeof(struct i915_context_param_engines);
> > +     if (size % sizeof(*user->class_instance))
> > +             return -EINVAL;
> > +
> > +     set.nengine = size / sizeof(*user->class_instance);
> > +     if (set.nengine == 0 || set.nengine >= I915_EXEC_RING_MASK)
> > +             return -EINVAL;
> > +
> > +     set.engines = kmalloc_array(set.nengine + 1,
> > +                                 sizeof(*set.engines),
> > +                                 GFP_KERNEL);
> > +     if (!set.engines)
> > +             return -ENOMEM;
> > +
> > +     set.engines[0] = NULL;
> 
> /* Reserve the I915_EXEC_DEFAULT slot. */
> 
> > +     for (n = 0; n < set.nengine; n++) {
> > +             u32 class, instance;
> 
> I will later recommend we use u16 for class/instance. I think we settled 
> for that in the meantime.
> 
> > +
> > +             if (get_user(class, &user->class_instance[n].class) ||
> > +                 get_user(instance, &user->class_instance[n].instance)) {
> > +                     kfree(set.engines);
> > +                     return -EFAULT;
> > +             }
> > +
> > +             set.engines[n + 1] =
> > +                     intel_engine_lookup_user(ctx->i915, class, instance);
> > +             if (!set.engines[n + 1]) {
> > +                     kfree(set.engines);
> > +                     return -ENOENT;
> > +             }
> > +     }
> > +
> > +     err = -EFAULT;
> > +     if (!get_user(extensions, &user->extensions))
> > +             err = i915_user_extensions(u64_to_user_ptr(extensions),
> > +                                        set_engines__extensions,
> > +                                        ARRAY_SIZE(set_engines__extensions),
> > +                                        &set);
> > +     if (err) {
> 
> Do we need an user extensions destructor table for a more 
> compartmentalized design? This actually applies to the 
> i915_user_extensions() implementation.. it would need to take two 
> function tables I think and unwind until the one that failed.

Yeah, dtor for unwind is intriguing.

> > +             kfree(set.engines);
> > +             return err;
> > +     }
> > +
> > +out:
> > +     kfree(ctx->engines);
> > +     ctx->engines = set.engines;
> > +     ctx->nengine = set.nengine + 1;
> > +
> > +     return 0;
> > +}
> > +
> >   int i915_gem_context_setparam_ioctl(struct drm_device *dev, void *data,
> >                                   struct drm_file *file)
> >   {
> > @@ -1011,6 +1095,10 @@ int i915_gem_context_setparam_ioctl(struct drm_device *dev, void *data,
> >               }
> >               break;
> >   
> > +     case I915_CONTEXT_PARAM_ENGINES:
> > +             ret = set_engines(ctx, args);
> > +             break;
> > +
> >       default:
> >               ret = -EINVAL;
> >               break;
> > diff --git a/drivers/gpu/drm/i915/i915_gem_context.h b/drivers/gpu/drm/i915/i915_gem_context.h
> > index 770043449ba6..9f89119a6566 100644
> > --- a/drivers/gpu/drm/i915/i915_gem_context.h
> > +++ b/drivers/gpu/drm/i915/i915_gem_context.h
> > @@ -67,6 +67,8 @@ struct i915_gem_context {
> >       /** file_priv: owning file descriptor */
> >       struct drm_i915_file_private *file_priv;
> >   
> > +     struct intel_engine_cs **engines;
> > +
> >       struct i915_timeline *timeline;
> >   
> >       /**
> > @@ -135,6 +137,8 @@ struct i915_gem_context {
> >   #define CONTEXT_CLOSED                      1
> >   #define CONTEXT_FORCE_SINGLE_SUBMISSION     2
> >   
> > +     unsigned int nengine;
> 
> Why put related fields apart?

Packing.

> 
> > +
> >       /**
> >        * @hw_id: - unique identifier for the context
> >        *
> > diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> > index 109486a6ef07..faedfdca875d 100644
> > --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> > +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> > @@ -2014,13 +2014,23 @@ static const enum intel_engine_id user_ring_map[I915_USER_RINGS + 1] = {
> >   };
> >   
> >   static struct intel_engine_cs *
> > -eb_select_engine(struct drm_i915_private *dev_priv,
> > +eb_select_engine(struct i915_execbuffer *eb,
> >                struct drm_file *file,
> >                struct drm_i915_gem_execbuffer2 *args)
> >   {
> >       unsigned int user_ring_id = args->flags & I915_EXEC_RING_MASK;
> >       struct intel_engine_cs *engine;
> >   
> > +     if (eb->ctx->engines) {
> > +             if (user_ring_id >= eb->ctx->nengine) {
> > +                     DRM_DEBUG("execbuf with unknown ring: %u\n",
> > +                               user_ring_id);
> > +                     return NULL;
> > +             }
> > +
> > +             return eb->ctx->engines[user_ring_id];
> > +     }
> > +
> >       if (user_ring_id > I915_USER_RINGS) {
> >               DRM_DEBUG("execbuf with unknown ring: %u\n", user_ring_id);
> >               return NULL;
> > @@ -2033,11 +2043,11 @@ eb_select_engine(struct drm_i915_private *dev_priv,
> >               return NULL;
> >       }
> >   
> > -     if (user_ring_id == I915_EXEC_BSD && HAS_BSD2(dev_priv)) {
> > +     if (user_ring_id == I915_EXEC_BSD && HAS_BSD2(eb->i915)) {
> >               unsigned int bsd_idx = args->flags & I915_EXEC_BSD_MASK;
> >   
> >               if (bsd_idx == I915_EXEC_BSD_DEFAULT) {
> > -                     bsd_idx = gen8_dispatch_bsd_engine(dev_priv, file);
> > +                     bsd_idx = gen8_dispatch_bsd_engine(eb->i915, file);
> >               } else if (bsd_idx >= I915_EXEC_BSD_RING1 &&
> >                          bsd_idx <= I915_EXEC_BSD_RING2) {
> >                       bsd_idx >>= I915_EXEC_BSD_SHIFT;
> > @@ -2048,9 +2058,9 @@ eb_select_engine(struct drm_i915_private *dev_priv,
> >                       return NULL;
> >               }
> >   
> > -             engine = dev_priv->engine[_VCS(bsd_idx)];
> > +             engine = eb->i915->engine[_VCS(bsd_idx)];
> >       } else {
> > -             engine = dev_priv->engine[user_ring_map[user_ring_id]];
> > +             engine = eb->i915->engine[user_ring_map[user_ring_id]];
> >       }
> >   
> >       if (!engine) {
> > @@ -2260,7 +2270,7 @@ i915_gem_do_execbuffer(struct drm_device *dev,
> >       if (unlikely(err))
> >               goto err_destroy;
> >   
> > -     eb.engine = eb_select_engine(eb.i915, file, args);
> > +     eb.engine = eb_select_engine(&eb, file, args);
> >       if (!eb.engine) {
> >               err = -EINVAL;
> >               goto err_engine;
> > diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
> > index 65b0b84419f3..d41b4c673af4 100644
> > --- a/include/uapi/drm/i915_drm.h
> > +++ b/include/uapi/drm/i915_drm.h
> > @@ -1508,9 +1508,36 @@ struct drm_i915_gem_context_param {
> >   #define   I915_CONTEXT_MAX_USER_PRIORITY    1023 /* inclusive */
> >   #define   I915_CONTEXT_DEFAULT_PRIORITY             0
> >   #define   I915_CONTEXT_MIN_USER_PRIORITY    -1023 /* inclusive */
> > +
> > +/*
> > + * I915_CONTEXT_PARAM_ENGINES:
> > + *
> > + * Bind this context to operate on this subset of available engines. Henceforth,
> > + * the I915_EXEC_RING selector for DRM_IOCTL_I915_GEM_EXECBUFFER2 operates as
> > + * an index into this array of engines; I915_EXEC_DEFAULT selecting engine[0]
> > + * and upwards. The array created is offset by 1, such that by default
> > + * I915_EXEC_DEFAULT is left empty, to be filled in as directed. Slots 1...N
> > + * are then filled in using the specified (class, instance).
> 
> Should we allow specifying the default index? I am wondering if that 
> would make life easier to any userspace.

Could do, certainly gets rid of the clumsy nengines+1, but the skip for
unknown class may make up for the clumsyness :)
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 106+ messages in thread

* Re: [PATCH 39/40] drm/i915/execlists: Refactor out can_merge_rq()
  2018-09-28 20:11     ` Chris Wilson
@ 2018-10-01  8:14       ` Tvrtko Ursulin
  2018-10-01  8:18         ` Chris Wilson
  0 siblings, 1 reply; 106+ messages in thread
From: Tvrtko Ursulin @ 2018-10-01  8:14 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 28/09/2018 21:11, Chris Wilson wrote:
> Quoting Tvrtko Ursulin (2018-09-27 12:32:54)
>>
>> On 19/09/2018 20:55, Chris Wilson wrote:
>>> In the next patch, we add another user that wants to check whether
>>> requests can be merge into a single HW execution, and in the future we
>>> want to add more conditions under which requests from the same context
>>> cannot be merge. In preparation, extract out can_merge_rq().
>>>
>>> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
>>> ---
>>>    drivers/gpu/drm/i915/intel_lrc.c | 12 ++++++++++--
>>>    1 file changed, 10 insertions(+), 2 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
>>> index be7dbdd7fc2c..679ce521be16 100644
>>> --- a/drivers/gpu/drm/i915/intel_lrc.c
>>> +++ b/drivers/gpu/drm/i915/intel_lrc.c
>>> @@ -478,6 +478,15 @@ static bool can_merge_ctx(const struct intel_context *prev,
>>>        return true;
>>>    }
>>>    
>>> +static bool can_merge_rq(const struct i915_request *prev,
>>> +                      const struct i915_request *next)
>>> +{
>>> +     if (!can_merge_ctx(prev->hw_context, next->hw_context))
>>> +             return false;
>>> +
>>> +     return true;
>>
>> Not just return can_merge_ctx(..) ?
> 
> Makes you think I have plans to add more tests here... If you look in
> the archives you can even have a seek peek ;)

I don't see anything in Virtual Engine / Frame Split work, where to look?

Regards,

Tvrtko

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 106+ messages in thread

* Re: [PATCH 39/40] drm/i915/execlists: Refactor out can_merge_rq()
  2018-10-01  8:14       ` Tvrtko Ursulin
@ 2018-10-01  8:18         ` Chris Wilson
  2018-10-01 10:18           ` Tvrtko Ursulin
  0 siblings, 1 reply; 106+ messages in thread
From: Chris Wilson @ 2018-10-01  8:18 UTC (permalink / raw)
  To: Tvrtko Ursulin, intel-gfx

Quoting Tvrtko Ursulin (2018-10-01 09:14:52)
> 
> On 28/09/2018 21:11, Chris Wilson wrote:
> > Quoting Tvrtko Ursulin (2018-09-27 12:32:54)
> >>
> >> On 19/09/2018 20:55, Chris Wilson wrote:
> >>> In the next patch, we add another user that wants to check whether
> >>> requests can be merge into a single HW execution, and in the future we
> >>> want to add more conditions under which requests from the same context
> >>> cannot be merge. In preparation, extract out can_merge_rq().
> >>>
> >>> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> >>> ---
> >>>    drivers/gpu/drm/i915/intel_lrc.c | 12 ++++++++++--
> >>>    1 file changed, 10 insertions(+), 2 deletions(-)
> >>>
> >>> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> >>> index be7dbdd7fc2c..679ce521be16 100644
> >>> --- a/drivers/gpu/drm/i915/intel_lrc.c
> >>> +++ b/drivers/gpu/drm/i915/intel_lrc.c
> >>> @@ -478,6 +478,15 @@ static bool can_merge_ctx(const struct intel_context *prev,
> >>>        return true;
> >>>    }
> >>>    
> >>> +static bool can_merge_rq(const struct i915_request *prev,
> >>> +                      const struct i915_request *next)
> >>> +{
> >>> +     if (!can_merge_ctx(prev->hw_context, next->hw_context))
> >>> +             return false;
> >>> +
> >>> +     return true;
> >>
> >> Not just return can_merge_ctx(..) ?
> > 
> > Makes you think I have plans to add more tests here... If you look in
> > the archives you can even have a seek peek ;)
> 
> I don't see anything in Virtual Engine / Frame Split work, where to look?

Per-context frequency control to allow the rps range to change between
requests.
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 106+ messages in thread

* Re: [PATCH 37/40] drm/i915: Allow a context to define its set of engines
  2018-09-28 20:22     ` Chris Wilson
@ 2018-10-01  8:30       ` Tvrtko Ursulin
  0 siblings, 0 replies; 106+ messages in thread
From: Tvrtko Ursulin @ 2018-10-01  8:30 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 28/09/2018 21:22, Chris Wilson wrote:
> Quoting Tvrtko Ursulin (2018-09-27 12:28:47)
>>
>> On 19/09/2018 20:55, Chris Wilson wrote:
>>> Over the last few years, we have debated how to extend the user API to
>>> support an increase in the number of engines, that may be sparse and
>>> even be heterogeneous within a class (not all video decoders created
>>> equal). We settled on using (class, instance) tuples to identify a
>>> specific engine, with an API for the user to construct a map of engines
>>> to capabilities. Into this picture, we then add a challenge of virtual
>>> engines; one user engine that maps behind the scenes to any number of
>>> physical engines. To keep it general, we want the user to have full
>>> control over that mapping. To that end, we allow the user to constrain a
>>> context to define the set of engines that it can access, order fully
>>> controlled by the user via (class, instance). With such precise control
>>> in context setup, we can continue to use the existing execbuf uABI of
>>> specifying a single index; only now it doesn't automagically map onto
>>> the engines, it uses the user defined engine map from the context.
>>>
>>> The I915_EXEC_DEFAULT slot is left empty, and invalid for use by
>>> execbuf. It's use will be revealed in the next patch.
>>>
>>> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
>>> ---
>>>    drivers/gpu/drm/i915/i915_gem_context.c    | 88 ++++++++++++++++++++++
>>>    drivers/gpu/drm/i915/i915_gem_context.h    |  4 +
>>>    drivers/gpu/drm/i915/i915_gem_execbuffer.c | 22 ++++--
>>>    include/uapi/drm/i915_drm.h                | 27 +++++++
>>>    4 files changed, 135 insertions(+), 6 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
>>> index a8570a07b3b7..313471253f51 100644
>>> --- a/drivers/gpu/drm/i915/i915_gem_context.c
>>> +++ b/drivers/gpu/drm/i915/i915_gem_context.c
>>> @@ -90,6 +90,7 @@
>>>    #include <drm/i915_drm.h>
>>>    #include "i915_drv.h"
>>>    #include "i915_trace.h"
>>> +#include "i915_user_extensions.h"
>>>    #include "intel_workarounds.h"
>>>    
>>>    #define ALL_L3_SLICES(dev) (1 << NUM_L3_SLICES(dev)) - 1
>>> @@ -223,6 +224,8 @@ static void i915_gem_context_free(struct i915_gem_context *ctx)
>>>                        ce->ops->destroy(ce);
>>>        }
>>>    
>>> +     kfree(ctx->engines);
>>> +
>>>        if (ctx->timeline)
>>>                i915_timeline_put(ctx->timeline);
>>>    
>>> @@ -948,6 +951,87 @@ int i915_gem_context_getparam_ioctl(struct drm_device *dev, void *data,
>>>        return ret;
>>>    }
>>>    
>>> +struct set_engines {
>>> +     struct i915_gem_context *ctx;
>>> +     struct intel_engine_cs **engines;
>>> +     unsigned int nengine;
>>> +};
>>> +
>>> +static const i915_user_extension_fn set_engines__extensions[] = {
>>> +};
>>
>> This is OK unless someone one day gets the desire to make the extension
>> namespace sparse. I was thinking on how to put some warnings in the code
>> to warn about it, but I think that's for later. Namespace is also per
>> parent ioctl, another thing which would perhaps need enforcing.
> 
> Unless you intend to go extremely sparse, I'd just leave it with the
> usual [NAME1] = func1, and skipping over NULLs. We can always add
> alternatives if need be (I'd actually been meaning to convert execbuf3
> over to this scheme to flesh it out a bit more).

It is fine for now, I was just thinking out loud how to protect us 
against the table accidentally growing huge one day.

>>> +static int set_engines(struct i915_gem_context *ctx,
>>> +                    struct drm_i915_gem_context_param *args)
>>> +{
>>> +     struct i915_context_param_engines __user *user;
>>> +     struct set_engines set = {
>>> +             .ctx = ctx,
>>> +             .engines = NULL,
>>> +             .nengine = -1,
>>> +     };
>>> +     unsigned int n;
>>> +     u64 size, extensions;
>>
>> Size is u32 in the uAPI, so either that or unsigned int would do here.
>>
>>> +     int err;
>>> +
>>> +     user = u64_to_user_ptr(args->value);
>>> +     size = args->size;
>>> +     if (!size)
>>
>> args->sizes = sizeof(*user);
>>
>> ... if you want to allow size probing via set param, or we add a
>> get_param (too much work for nothing?) and only allow probing from there?
> 
> It's a variable array, so a little trickier.

Indeed!

> 
>>> +             goto out;
>>> +
>>> +     if (size < sizeof(struct i915_context_param_engines))
>>> +             return -EINVAL;
>>> +
>>> +     size -= sizeof(struct i915_context_param_engines);
>>> +     if (size % sizeof(*user->class_instance))
>>> +             return -EINVAL;
>>> +
>>> +     set.nengine = size / sizeof(*user->class_instance);
>>> +     if (set.nengine == 0 || set.nengine >= I915_EXEC_RING_MASK)
>>> +             return -EINVAL;
>>> +
>>> +     set.engines = kmalloc_array(set.nengine + 1,
>>> +                                 sizeof(*set.engines),
>>> +                                 GFP_KERNEL);
>>> +     if (!set.engines)
>>> +             return -ENOMEM;
>>> +
>>> +     set.engines[0] = NULL;
>>
>> /* Reserve the I915_EXEC_DEFAULT slot. */
>>
>>> +     for (n = 0; n < set.nengine; n++) {
>>> +             u32 class, instance;
>>
>> I will later recommend we use u16 for class/instance. I think we settled
>> for that in the meantime.
>>
>>> +
>>> +             if (get_user(class, &user->class_instance[n].class) ||
>>> +                 get_user(instance, &user->class_instance[n].instance)) {
>>> +                     kfree(set.engines);
>>> +                     return -EFAULT;
>>> +             }
>>> +
>>> +             set.engines[n + 1] =
>>> +                     intel_engine_lookup_user(ctx->i915, class, instance);
>>> +             if (!set.engines[n + 1]) {
>>> +                     kfree(set.engines);
>>> +                     return -ENOENT;
>>> +             }
>>> +     }
>>> +
>>> +     err = -EFAULT;
>>> +     if (!get_user(extensions, &user->extensions))
>>> +             err = i915_user_extensions(u64_to_user_ptr(extensions),
>>> +                                        set_engines__extensions,
>>> +                                        ARRAY_SIZE(set_engines__extensions),
>>> +                                        &set);
>>> +     if (err) {
>>
>> Do we need an user extensions destructor table for a more
>> compartmentalized design? This actually applies to the
>> i915_user_extensions() implementation.. it would need to take two
>> function tables I think and unwind until the one that failed.
> 
> Yeah, dtor for unwind is intriguing.
> 
>>> +             kfree(set.engines);
>>> +             return err;
>>> +     }
>>> +
>>> +out:
>>> +     kfree(ctx->engines);
>>> +     ctx->engines = set.engines;
>>> +     ctx->nengine = set.nengine + 1;
>>> +
>>> +     return 0;
>>> +}
>>> +
>>>    int i915_gem_context_setparam_ioctl(struct drm_device *dev, void *data,
>>>                                    struct drm_file *file)
>>>    {
>>> @@ -1011,6 +1095,10 @@ int i915_gem_context_setparam_ioctl(struct drm_device *dev, void *data,
>>>                }
>>>                break;
>>>    
>>> +     case I915_CONTEXT_PARAM_ENGINES:
>>> +             ret = set_engines(ctx, args);
>>> +             break;
>>> +
>>>        default:
>>>                ret = -EINVAL;
>>>                break;
>>> diff --git a/drivers/gpu/drm/i915/i915_gem_context.h b/drivers/gpu/drm/i915/i915_gem_context.h
>>> index 770043449ba6..9f89119a6566 100644
>>> --- a/drivers/gpu/drm/i915/i915_gem_context.h
>>> +++ b/drivers/gpu/drm/i915/i915_gem_context.h
>>> @@ -67,6 +67,8 @@ struct i915_gem_context {
>>>        /** file_priv: owning file descriptor */
>>>        struct drm_i915_file_private *file_priv;
>>>    
>>> +     struct intel_engine_cs **engines;
>>> +
>>>        struct i915_timeline *timeline;
>>>    
>>>        /**
>>> @@ -135,6 +137,8 @@ struct i915_gem_context {
>>>    #define CONTEXT_CLOSED                      1
>>>    #define CONTEXT_FORCE_SINGLE_SUBMISSION     2
>>>    
>>> +     unsigned int nengine;
>>
>> Why put related fields apart?
> 
> Packing.

Hm it feels to far. It's one per execbuf lookup so not that hot. Move 
the engines and nengines down to after struct head_rcu? And add comments 
against the members of course! :)

> 
>>
>>> +
>>>        /**
>>>         * @hw_id: - unique identifier for the context
>>>         *
>>> diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
>>> index 109486a6ef07..faedfdca875d 100644
>>> --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
>>> +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
>>> @@ -2014,13 +2014,23 @@ static const enum intel_engine_id user_ring_map[I915_USER_RINGS + 1] = {
>>>    };
>>>    
>>>    static struct intel_engine_cs *
>>> -eb_select_engine(struct drm_i915_private *dev_priv,
>>> +eb_select_engine(struct i915_execbuffer *eb,
>>>                 struct drm_file *file,
>>>                 struct drm_i915_gem_execbuffer2 *args)
>>>    {
>>>        unsigned int user_ring_id = args->flags & I915_EXEC_RING_MASK;
>>>        struct intel_engine_cs *engine;
>>>    
>>> +     if (eb->ctx->engines) {
>>> +             if (user_ring_id >= eb->ctx->nengine) {
>>> +                     DRM_DEBUG("execbuf with unknown ring: %u\n",
>>> +                               user_ring_id);
>>> +                     return NULL;
>>> +             }
>>> +
>>> +             return eb->ctx->engines[user_ring_id];
>>> +     }
>>> +
>>>        if (user_ring_id > I915_USER_RINGS) {
>>>                DRM_DEBUG("execbuf with unknown ring: %u\n", user_ring_id);
>>>                return NULL;
>>> @@ -2033,11 +2043,11 @@ eb_select_engine(struct drm_i915_private *dev_priv,
>>>                return NULL;
>>>        }
>>>    
>>> -     if (user_ring_id == I915_EXEC_BSD && HAS_BSD2(dev_priv)) {
>>> +     if (user_ring_id == I915_EXEC_BSD && HAS_BSD2(eb->i915)) {
>>>                unsigned int bsd_idx = args->flags & I915_EXEC_BSD_MASK;
>>>    
>>>                if (bsd_idx == I915_EXEC_BSD_DEFAULT) {
>>> -                     bsd_idx = gen8_dispatch_bsd_engine(dev_priv, file);
>>> +                     bsd_idx = gen8_dispatch_bsd_engine(eb->i915, file);
>>>                } else if (bsd_idx >= I915_EXEC_BSD_RING1 &&
>>>                           bsd_idx <= I915_EXEC_BSD_RING2) {
>>>                        bsd_idx >>= I915_EXEC_BSD_SHIFT;
>>> @@ -2048,9 +2058,9 @@ eb_select_engine(struct drm_i915_private *dev_priv,
>>>                        return NULL;
>>>                }
>>>    
>>> -             engine = dev_priv->engine[_VCS(bsd_idx)];
>>> +             engine = eb->i915->engine[_VCS(bsd_idx)];
>>>        } else {
>>> -             engine = dev_priv->engine[user_ring_map[user_ring_id]];
>>> +             engine = eb->i915->engine[user_ring_map[user_ring_id]];
>>>        }
>>>    
>>>        if (!engine) {
>>> @@ -2260,7 +2270,7 @@ i915_gem_do_execbuffer(struct drm_device *dev,
>>>        if (unlikely(err))
>>>                goto err_destroy;
>>>    
>>> -     eb.engine = eb_select_engine(eb.i915, file, args);
>>> +     eb.engine = eb_select_engine(&eb, file, args);
>>>        if (!eb.engine) {
>>>                err = -EINVAL;
>>>                goto err_engine;
>>> diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
>>> index 65b0b84419f3..d41b4c673af4 100644
>>> --- a/include/uapi/drm/i915_drm.h
>>> +++ b/include/uapi/drm/i915_drm.h
>>> @@ -1508,9 +1508,36 @@ struct drm_i915_gem_context_param {
>>>    #define   I915_CONTEXT_MAX_USER_PRIORITY    1023 /* inclusive */
>>>    #define   I915_CONTEXT_DEFAULT_PRIORITY             0
>>>    #define   I915_CONTEXT_MIN_USER_PRIORITY    -1023 /* inclusive */
>>> +
>>> +/*
>>> + * I915_CONTEXT_PARAM_ENGINES:
>>> + *
>>> + * Bind this context to operate on this subset of available engines. Henceforth,
>>> + * the I915_EXEC_RING selector for DRM_IOCTL_I915_GEM_EXECBUFFER2 operates as
>>> + * an index into this array of engines; I915_EXEC_DEFAULT selecting engine[0]
>>> + * and upwards. The array created is offset by 1, such that by default
>>> + * I915_EXEC_DEFAULT is left empty, to be filled in as directed. Slots 1...N
>>> + * are then filled in using the specified (class, instance).
>>
>> Should we allow specifying the default index? I am wondering if that
>> would make life easier to any userspace.
> 
> Could do, certainly gets rid of the clumsy nengines+1, but the skip for
> unknown class may make up for the clumsyness :)

Yeah I don't know. Userspace has to create a context at which point 
there isn't much to gain with the default index capability. So it's 
probably pointless.

I guess what I am thinking of is how we'll make IGT use new engines 
without too much churn, since we dropped the plain class/instance 
execbuf. But the default index idea doesn't help that anyway.

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 106+ messages in thread

* Re: [PATCH 39/40] drm/i915/execlists: Refactor out can_merge_rq()
  2018-10-01  8:18         ` Chris Wilson
@ 2018-10-01 10:18           ` Tvrtko Ursulin
  0 siblings, 0 replies; 106+ messages in thread
From: Tvrtko Ursulin @ 2018-10-01 10:18 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 01/10/2018 09:18, Chris Wilson wrote:
> Quoting Tvrtko Ursulin (2018-10-01 09:14:52)
>>
>> On 28/09/2018 21:11, Chris Wilson wrote:
>>> Quoting Tvrtko Ursulin (2018-09-27 12:32:54)
>>>>
>>>> On 19/09/2018 20:55, Chris Wilson wrote:
>>>>> In the next patch, we add another user that wants to check whether
>>>>> requests can be merge into a single HW execution, and in the future we
>>>>> want to add more conditions under which requests from the same context
>>>>> cannot be merge. In preparation, extract out can_merge_rq().
>>>>>
>>>>> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
>>>>> ---
>>>>>     drivers/gpu/drm/i915/intel_lrc.c | 12 ++++++++++--
>>>>>     1 file changed, 10 insertions(+), 2 deletions(-)
>>>>>
>>>>> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
>>>>> index be7dbdd7fc2c..679ce521be16 100644
>>>>> --- a/drivers/gpu/drm/i915/intel_lrc.c
>>>>> +++ b/drivers/gpu/drm/i915/intel_lrc.c
>>>>> @@ -478,6 +478,15 @@ static bool can_merge_ctx(const struct intel_context *prev,
>>>>>         return true;
>>>>>     }
>>>>>     
>>>>> +static bool can_merge_rq(const struct i915_request *prev,
>>>>> +                      const struct i915_request *next)
>>>>> +{
>>>>> +     if (!can_merge_ctx(prev->hw_context, next->hw_context))
>>>>> +             return false;
>>>>> +
>>>>> +     return true;
>>>>
>>>> Not just return can_merge_ctx(..) ?
>>>
>>> Makes you think I have plans to add more tests here... If you look in
>>> the archives you can even have a seek peek ;)
>>
>> I don't see anything in Virtual Engine / Frame Split work, where to look?
> 
> Per-context frequency control to allow the rps range to change between
> requests.

Who knows when will that be at the head of the queue but OK, it's a 
style issue only so:

Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Regards,

Tvrtko

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 106+ messages in thread

* Re: [PATCH 38/40] drm/i915/execlists: Flush the CS events before unpinning
  2018-09-19 19:55 ` [PATCH 38/40] drm/i915/execlists: Flush the CS events before unpinning Chris Wilson
@ 2018-10-01 10:51   ` Tvrtko Ursulin
  2018-10-01 11:06     ` Chris Wilson
  0 siblings, 1 reply; 106+ messages in thread
From: Tvrtko Ursulin @ 2018-10-01 10:51 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 19/09/2018 20:55, Chris Wilson wrote:
> Inside the execlists submission tasklet, we often make the mistake of
> assuming that everything beneath the request is available for use.
> However, the submission and the request live on two separate timelines,
> and the request contents may be freed from an early retirement before we
> have had a chance to run the submission tasklet (think ksoftirqd). To
> safeguard ourselves against any mistakes, flush the tasklet before we
> unpin the context if execlists still has a reference to this context.
> 
> References: 60367132a214 ("drm/i915: Avoid use-after-free of ctx in request tracepoints")
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> ---
>   drivers/gpu/drm/i915/i915_gem_context.h |  1 +
>   drivers/gpu/drm/i915/intel_lrc.c        | 32 ++++++++++++++++++++++++-
>   2 files changed, 32 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_gem_context.h b/drivers/gpu/drm/i915/i915_gem_context.h
> index 9f89119a6566..1fd71dfdfa62 100644
> --- a/drivers/gpu/drm/i915/i915_gem_context.h
> +++ b/drivers/gpu/drm/i915/i915_gem_context.h
> @@ -170,6 +170,7 @@ struct i915_gem_context {
>   	/** engine: per-engine logical HW state */
>   	struct intel_context {
>   		struct i915_gem_context *gem_context;
> +		struct intel_engine_cs *active;
>   		struct i915_vma *state;
>   		struct intel_ring *ring;
>   		u32 *lrc_reg_state;
> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> index 48a2bca7fec3..be7dbdd7fc2c 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/intel_lrc.c
> @@ -282,6 +282,8 @@ static void __unwind_incomplete_requests(struct intel_engine_cs *engine)
>   		__i915_request_unsubmit(rq);
>   		unwind_wa_tail(rq);
>   
> +		GEM_BUG_ON(rq->hw_context->active);
> +
>   		GEM_BUG_ON(rq_prio(rq) == I915_PRIORITY_INVALID);
>   		if (rq_prio(rq) != prio) {
>   			prio = rq_prio(rq);
> @@ -427,8 +429,11 @@ static void execlists_submit_ports(struct intel_engine_cs *engine)
>   		rq = port_unpack(&port[n], &count);
>   		if (rq) {
>   			GEM_BUG_ON(count > !n);
> -			if (!count++)
> +			if (!count++) {
> +				GEM_BUG_ON(rq->hw_context->active);
>   				execlists_context_schedule_in(rq);
> +				rq->hw_context->active = engine;

Put it in execlists_context_schedule_in/out?

Why does it have to be the engine pointer and not just a boolean? 
Because we don't have an engine backpointer in hw_context? Should we add 
it? I think I had occasionally wished we had it.. maybe too much work to 
evaluate what function prototypes we could clean up with it and whether 
it would be an overall gain.

> +			}
>   			port_set(&port[n], port_pack(rq, count));
>   			desc = execlists_update_context(rq);
>   			GEM_DEBUG_EXEC(port[n].context_id = upper_32_bits(desc));
> @@ -734,6 +739,8 @@ execlists_cancel_port_requests(struct intel_engine_execlists * const execlists)
>   			  intel_engine_get_seqno(rq->engine));
>   
>   		GEM_BUG_ON(!execlists->active);
> +
> +		rq->hw_context->active = NULL;
>   		execlists_context_schedule_out(rq,
>   					       i915_request_completed(rq) ?
>   					       INTEL_CONTEXT_SCHEDULE_OUT :
> @@ -971,6 +978,7 @@ static void process_csb(struct intel_engine_cs *engine)
>   			 */
>   			GEM_BUG_ON(!i915_request_completed(rq));
>   
> +			rq->hw_context->active = NULL;
>   			execlists_context_schedule_out(rq,
>   						       INTEL_CONTEXT_SCHEDULE_OUT);
>   			i915_request_put(rq);
> @@ -1080,6 +1088,28 @@ static void execlists_context_destroy(struct intel_context *ce)
>   
>   static void execlists_context_unpin(struct intel_context *ce)
>   {
> +	struct intel_engine_cs *engine;
> +
> +	/*
> +	 * The tasklet may still be using a pointer to our state, via an
> +	 * old request. However, since we know we only unpin the context
> +	 * on retirement of the following request, we know that the last
> +	 * request referencing us will have had a completion CS interrupt.

Hm hm hm... my initial thought was that interrupts could be more delayed 
than breadcrumb writes (more than one context ahead), in which case the 
process_csb below could be premature and the assert would trigger. But I 
must be forgetting something since that would also mean we would 
prematurely unpin the context. So I must be forgetting something..

Regards,

Tvrtko

> +	 * If we see that it is still active, it means that the tasklet hasn't
> +	 * had the chance to run yet; let it run before we teardown the
> +	 * reference it may use.
> +	 */
> +	engine = READ_ONCE(ce->active);
> +	if (unlikely(engine)) {
> +		unsigned long flags;
> +
> +		spin_lock_irqsave(&engine->timeline.lock, flags);
> +		process_csb(engine);
> +		spin_unlock_irqrestore(&engine->timeline.lock, flags);
> +
> +		GEM_BUG_ON(READ_ONCE(ce->active));
> +	}
> +
>   	i915_gem_context_unpin_hw_id(ce->gem_context);
>   
>   	intel_ring_unpin(ce->ring);
> 

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 106+ messages in thread

* Re: [PATCH 38/40] drm/i915/execlists: Flush the CS events before unpinning
  2018-10-01 10:51   ` Tvrtko Ursulin
@ 2018-10-01 11:06     ` Chris Wilson
  2018-10-01 13:15       ` Tvrtko Ursulin
  0 siblings, 1 reply; 106+ messages in thread
From: Chris Wilson @ 2018-10-01 11:06 UTC (permalink / raw)
  To: Tvrtko Ursulin, intel-gfx

Quoting Tvrtko Ursulin (2018-10-01 11:51:23)
> 
> On 19/09/2018 20:55, Chris Wilson wrote:
> > Inside the execlists submission tasklet, we often make the mistake of
> > assuming that everything beneath the request is available for use.
> > However, the submission and the request live on two separate timelines,
> > and the request contents may be freed from an early retirement before we
> > have had a chance to run the submission tasklet (think ksoftirqd). To
> > safeguard ourselves against any mistakes, flush the tasklet before we
> > unpin the context if execlists still has a reference to this context.
> > 
> > References: 60367132a214 ("drm/i915: Avoid use-after-free of ctx in request tracepoints")
> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> > ---
> >   drivers/gpu/drm/i915/i915_gem_context.h |  1 +
> >   drivers/gpu/drm/i915/intel_lrc.c        | 32 ++++++++++++++++++++++++-
> >   2 files changed, 32 insertions(+), 1 deletion(-)
> > 
> > diff --git a/drivers/gpu/drm/i915/i915_gem_context.h b/drivers/gpu/drm/i915/i915_gem_context.h
> > index 9f89119a6566..1fd71dfdfa62 100644
> > --- a/drivers/gpu/drm/i915/i915_gem_context.h
> > +++ b/drivers/gpu/drm/i915/i915_gem_context.h
> > @@ -170,6 +170,7 @@ struct i915_gem_context {
> >       /** engine: per-engine logical HW state */
> >       struct intel_context {
> >               struct i915_gem_context *gem_context;
> > +             struct intel_engine_cs *active;
> >               struct i915_vma *state;
> >               struct intel_ring *ring;
> >               u32 *lrc_reg_state;
> > diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> > index 48a2bca7fec3..be7dbdd7fc2c 100644
> > --- a/drivers/gpu/drm/i915/intel_lrc.c
> > +++ b/drivers/gpu/drm/i915/intel_lrc.c
> > @@ -282,6 +282,8 @@ static void __unwind_incomplete_requests(struct intel_engine_cs *engine)
> >               __i915_request_unsubmit(rq);
> >               unwind_wa_tail(rq);
> >   
> > +             GEM_BUG_ON(rq->hw_context->active);
> > +
> >               GEM_BUG_ON(rq_prio(rq) == I915_PRIORITY_INVALID);
> >               if (rq_prio(rq) != prio) {
> >                       prio = rq_prio(rq);
> > @@ -427,8 +429,11 @@ static void execlists_submit_ports(struct intel_engine_cs *engine)
> >               rq = port_unpack(&port[n], &count);
> >               if (rq) {
> >                       GEM_BUG_ON(count > !n);
> > -                     if (!count++)
> > +                     if (!count++) {
> > +                             GEM_BUG_ON(rq->hw_context->active);
> >                               execlists_context_schedule_in(rq);
> > +                             rq->hw_context->active = engine;
> 
> Put it in execlists_context_schedule_in/out?
> 
> Why does it have to be the engine pointer and not just a boolean? 
> Because we don't have an engine backpointer in hw_context? Should we add 
> it? I think I had occasionally wished we had it.. maybe too much work to 
> evaluate what function prototypes we could clean up with it and whether 
> it would be an overall gain.

It's a backpointer in hw_context for the purposes of being a backpointer
in hw_context. Its purpose is for:

	active = READ_ONCE(ve->context.active);
        if (active && active != engine) {
        	rb = rb_next(rb);
        	continue;
        }

> > +                     }
> >                       port_set(&port[n], port_pack(rq, count));
> >                       desc = execlists_update_context(rq);
> >                       GEM_DEBUG_EXEC(port[n].context_id = upper_32_bits(desc));
> > @@ -734,6 +739,8 @@ execlists_cancel_port_requests(struct intel_engine_execlists * const execlists)
> >                         intel_engine_get_seqno(rq->engine));
> >   
> >               GEM_BUG_ON(!execlists->active);
> > +
> > +             rq->hw_context->active = NULL;
> >               execlists_context_schedule_out(rq,
> >                                              i915_request_completed(rq) ?
> >                                              INTEL_CONTEXT_SCHEDULE_OUT :
> > @@ -971,6 +978,7 @@ static void process_csb(struct intel_engine_cs *engine)
> >                        */
> >                       GEM_BUG_ON(!i915_request_completed(rq));
> >   
> > +                     rq->hw_context->active = NULL;
> >                       execlists_context_schedule_out(rq,
> >                                                      INTEL_CONTEXT_SCHEDULE_OUT);
> >                       i915_request_put(rq);
> > @@ -1080,6 +1088,28 @@ static void execlists_context_destroy(struct intel_context *ce)
> >   
> >   static void execlists_context_unpin(struct intel_context *ce)
> >   {
> > +     struct intel_engine_cs *engine;
> > +
> > +     /*
> > +      * The tasklet may still be using a pointer to our state, via an
> > +      * old request. However, since we know we only unpin the context
> > +      * on retirement of the following request, we know that the last
> > +      * request referencing us will have had a completion CS interrupt.
> 
> Hm hm hm... my initial thought was that interrupts could be more delayed 
> than breadcrumb writes (more than one context ahead), in which case the 
> process_csb below could be premature and the assert would trigger. But I 
> must be forgetting something since that would also mean we would 
> prematurely unpin the context. So I must be forgetting something..

There will have been at least one CS event written because we have
switched contexts due to the unpin being one seqno behind. I have not
(yet) observed CS events being out of order with breadcrumb writes (and
we have very strict checking) so confident that a single process_csb()
is required rather than a loop.
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 106+ messages in thread

* Re: [PATCH 40/40] drm/i915: Load balancing across a virtual engine
  2018-09-19 19:55 ` [PATCH 40/40] drm/i915: Load balancing across a virtual engine Chris Wilson
@ 2018-10-01 11:37   ` Tvrtko Ursulin
  0 siblings, 0 replies; 106+ messages in thread
From: Tvrtko Ursulin @ 2018-10-01 11:37 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 19/09/2018 20:55, Chris Wilson wrote:
> Having allowed the user to define a set of engines that they will want
> to only use, we go one step further and allow them to bind those engines
> into a single virtual instance. Submitting a batch to the virtual engine
> will then forward it to any one of the set in a manner as best to
> distribute load.  The virtual engine has a single timeline across all
> engines (it operates as a single queue), so it is not able to concurrently
> run batches across multiple engines by itself; that is left up to the user
> to submit multiple concurrent batches to multiple queues. Multiple users
> will be load balanced across the system.
> 
> The mechanism used for load balancing in this patch is a late greedy
> balancer. When a request is ready for execution, it is added to each
> engine's queue, and when an engine is ready for its next request it
> claims it from the virtual engine. The first engine to do so, wins, i.e.
> the request is executed at the earliest opportunity (idle moment) in the
> system.
> 
> As not all HW is created equal, the user is still able to skip the
> virtual engine and execute the batch on a specific engine, all within the
> same queue. It will then be executed in order on the correct engine,
> with execution on other virtual engines being moved away due to the load
> detection.

I think a few paragraphs describing the implementation are warranted. To 
help current and future readers. :)


> A couple of areas for potential improvement left!
> 
> - The virtual engine always take priority over equal-priority tasks.
> Mostly broken up by applying FQ_CODEL rules for prioritising new clients,
> and hopefully the virtual and real engines are not then congested (i.e.
> all work is via virtual engines, or all work is to the real engine).
> 
> - We require the breadcrumb irq around every virtual engine request. For
> normal engines, we eliminate the need for the slow round trip via
> interrupt by using the submit fence and queueing in order. For virtual
> engines, we have to allow any job to transfer to a new ring, and cannot
> coalesce the submissions, so require the completion fence instead,
> forcing the persistent use of interrupts.
> 
> Other areas of improvement are more general, such as reducing lock
> contention, reducing dispatch overhead, looking at direct submission
> rather than bouncing around tasklets etc.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> ---
>   drivers/gpu/drm/i915/i915_gem.h            |   5 +
>   drivers/gpu/drm/i915/i915_gem_context.c    |  85 +++-
>   drivers/gpu/drm/i915/i915_gem_context.h    |   1 +
>   drivers/gpu/drm/i915/i915_request.c        |   2 +-
>   drivers/gpu/drm/i915/i915_scheduler.c      |   1 +
>   drivers/gpu/drm/i915/i915_timeline.h       |   1 +
>   drivers/gpu/drm/i915/intel_engine_cs.c     |  48 +-
>   drivers/gpu/drm/i915/intel_lrc.c           | 500 ++++++++++++++++++++-
>   drivers/gpu/drm/i915/intel_lrc.h           |  16 +-
>   drivers/gpu/drm/i915/intel_ringbuffer.h    |   8 +
>   drivers/gpu/drm/i915/selftests/intel_lrc.c | 216 +++++++++
>   include/uapi/drm/i915_drm.h                |  27 ++
>   12 files changed, 851 insertions(+), 59 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_gem.h b/drivers/gpu/drm/i915/i915_gem.h
> index 599c4f6eb1ea..69d137786700 100644
> --- a/drivers/gpu/drm/i915/i915_gem.h
> +++ b/drivers/gpu/drm/i915/i915_gem.h
> @@ -87,4 +87,9 @@ static inline bool __tasklet_is_enabled(const struct tasklet_struct *t)
>   	return !atomic_read(&t->count);
>   }
>   
> +static inline bool __tasklet_is_scheduled(struct tasklet_struct *t)
> +{
> +	return test_bit(TASKLET_STATE_SCHED, &t->state);
> +}
> +
>   #endif /* __I915_GEM_H__ */
> diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
> index 313471253f51..0ee39772df03 100644
> --- a/drivers/gpu/drm/i915/i915_gem_context.c
> +++ b/drivers/gpu/drm/i915/i915_gem_context.c
> @@ -91,6 +91,7 @@
>   #include "i915_drv.h"
>   #include "i915_trace.h"
>   #include "i915_user_extensions.h"
> +#include "intel_lrc.h"
>   #include "intel_workarounds.h"
>   
>   #define ALL_L3_SLICES(dev) (1 << NUM_L3_SLICES(dev)) - 1
> @@ -224,7 +225,10 @@ static void i915_gem_context_free(struct i915_gem_context *ctx)
>   			ce->ops->destroy(ce);
>   	}
>   
> -	kfree(ctx->engines);
> +	if (ctx->engines) {
> +		intel_virtual_engine_put(ctx->engines[0]);
> +		kfree(ctx->engines);
> +	}
>   
>   	if (ctx->timeline)
>   		i915_timeline_put(ctx->timeline);
> @@ -351,6 +355,7 @@ __create_hw_context(struct drm_i915_private *dev_priv,
>   		struct intel_context *ce = &ctx->__engine[n];
>   
>   		ce->gem_context = ctx;
> +		ce->owner = dev_priv->engine[n];

So owner is actually the engine backpointer AFAICT, it doesn't change as 
the context moves around? Call it engine in that case?

>   	}
>   
>   	INIT_RADIX_TREE(&ctx->handles_vma, GFP_KERNEL);
> @@ -679,7 +684,8 @@ last_request_on_engine(struct i915_timeline *timeline,
>   
>   	rq = i915_gem_active_raw(&timeline->last_request,
>   				 &engine->i915->drm.struct_mutex);
> -	if (rq && rq->engine == engine) {
> +	if (rq &&
> +	    (rq->engine == engine || intel_engine_is_virtual(rq->engine))) {
>   		GEM_TRACE("last request for %s on engine %s: %llx:%d\n",
>   			  timeline->name, engine->name,
>   			  rq->fence.context, rq->fence.seqno);
> @@ -951,13 +957,82 @@ int i915_gem_context_getparam_ioctl(struct drm_device *dev, void *data,
>   	return ret;
>   }
>   
> +static int check_user_mbz64(u64 __user *user)
> +{
> +	u64 mbz;
> +
> +	if (get_user(mbz, user))
> +		return -EFAULT;
> +
> +	return mbz ? -EINVAL : 0;
> +}
> +
>   struct set_engines {
>   	struct i915_gem_context *ctx;
>   	struct intel_engine_cs **engines;
>   	unsigned int nengine;
>   };
>   
> +static int set_engines__load_balance(struct i915_user_extension __user *base,
> +				     void *data)
> +
> +{
> +	struct i915_context_engines_load_balance __user *ext =
> +		container_of(base, typeof(*ext) __user, base);
> +	const struct set_engines *set = data;
> +	struct intel_engine_cs *ve;
> +	unsigned int n;
> +	u64 mask;
> +	int err;
> +
> +	if (set->engines[0])
> +		return -EEXIST;
> +
> +	if (!HAS_EXECLISTS(set->ctx->i915))
> +		return -ENODEV;
> +
> +	if (USES_GUC_SUBMISSION(set->ctx->i915))
> +		return -ENODEV;
> +
> +	if (!set->ctx->timeline)
> +		return -EINVAL;
> +
> +	err = check_user_mbz64(&ext->flags);
> +	if (err)
> +		return err;
> +
> +	for (n = 0; n < ARRAY_SIZE(ext->mbz); n++) {
> +		err = check_user_mbz64(&ext->mbz[n]);
> +		if (err)
> +			return err;
> +	}
> +
> +	if (get_user(mask, &ext->engines_mask))
> +		return -EFAULT;
> +
> +	if (mask == ~0ull) {
> +		ve = intel_execlists_create_virtual(set->ctx,
> +						    set->engines + 1,
> +						    set->nengine);

You think it is worth special casing this instead of just letting the 
branch below handle it?

> +	} else {
> +		struct intel_engine_cs *stack[64];

This is a bit chunky stack allocation. We could limit to 
I915_NUM_ENGINES, if also limiting the set engines uAPI like that, to 
disallow specifying the same engine multiple times?

> +		int bit;
> +
> +		n = 0;
> +		for_each_set_bit(bit, (unsigned long *)&mask, set->nengine)
> +			stack[n++] = set->engines[bit + 1];
> +
> +		ve = intel_execlists_create_virtual(set->ctx, stack, n);
> +	}
> +	if (IS_ERR(ve))
> +		return PTR_ERR(ve);
> +
> +	set->engines[0] = ve;
> +	return 0;
> +}
> +
>   static const i915_user_extension_fn set_engines__extensions[] = {
> +	[I915_CONTEXT_ENGINES_EXT_LOAD_BALANCE] = set_engines__load_balance,
>   };
>   
>   static int set_engines(struct i915_gem_context *ctx,
> @@ -1020,12 +1095,16 @@ static int set_engines(struct i915_gem_context *ctx,
>   					   ARRAY_SIZE(set_engines__extensions),
>   					   &set);
>   	if (err) {
> +		intel_virtual_engine_put(set.engines[0]);
>   		kfree(set.engines);
>   		return err;
>   	}
>   
>   out:
> -	kfree(ctx->engines);
> +	if (ctx->engines) {
> +		intel_virtual_engine_put(ctx->engines[0]);
> +		kfree(ctx->engines);
> +	}
>   	ctx->engines = set.engines;
>   	ctx->nengine = set.nengine + 1;
>   
> diff --git a/drivers/gpu/drm/i915/i915_gem_context.h b/drivers/gpu/drm/i915/i915_gem_context.h
> index 1fd71dfdfa62..6bab994cecba 100644
> --- a/drivers/gpu/drm/i915/i915_gem_context.h
> +++ b/drivers/gpu/drm/i915/i915_gem_context.h
> @@ -171,6 +171,7 @@ struct i915_gem_context {
>   	struct intel_context {
>   		struct i915_gem_context *gem_context;
>   		struct intel_engine_cs *active;
> +		struct intel_engine_cs *owner;
>   		struct i915_vma *state;
>   		struct intel_ring *ring;
>   		u32 *lrc_reg_state;
> diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
> index 34d410cfa577..fda5a936bcc0 100644
> --- a/drivers/gpu/drm/i915/i915_request.c
> +++ b/drivers/gpu/drm/i915/i915_request.c
> @@ -1007,7 +1007,7 @@ void i915_request_add(struct i915_request *request)
>   	prev = i915_gem_active_raw(&timeline->last_request,
>   				   &request->i915->drm.struct_mutex);
>   	if (prev && !i915_request_completed(prev)) {
> -		if (prev->engine == engine)
> +		if (prev->engine == engine && !intel_engine_is_virtual(engine))
>   			i915_sw_fence_await_sw_fence(&request->submit,
>   						     &prev->submit,
>   						     &request->submitq);
> diff --git a/drivers/gpu/drm/i915/i915_scheduler.c b/drivers/gpu/drm/i915/i915_scheduler.c
> index 1423088dceff..cfb0a0353e15 100644
> --- a/drivers/gpu/drm/i915/i915_scheduler.c
> +++ b/drivers/gpu/drm/i915/i915_scheduler.c
> @@ -335,6 +335,7 @@ static void __i915_schedule(struct i915_request *rq,
>   
>   		node->attr.priority = prio;
>   		if (!list_empty(&node->link)) {
> +			GEM_BUG_ON(intel_engine_is_virtual(engine));
>   			if (last != engine) {
>   				pl = i915_sched_lookup_priolist(engine, prio);
>   				last = engine;
> diff --git a/drivers/gpu/drm/i915/i915_timeline.h b/drivers/gpu/drm/i915/i915_timeline.h
> index a2c2c3ab5fb0..b5321dc2d5a5 100644
> --- a/drivers/gpu/drm/i915/i915_timeline.h
> +++ b/drivers/gpu/drm/i915/i915_timeline.h
> @@ -39,6 +39,7 @@ struct i915_timeline {
>   	spinlock_t lock;
>   #define TIMELINE_CLIENT 0 /* default subclass */
>   #define TIMELINE_ENGINE 1
> +#define TIMELINE_VIRTUAL 2
>   
>   	/**
>   	 * List of breadcrumbs associated with GPU requests currently
> diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c
> index b122d82465d0..3659f7e68348 100644
> --- a/drivers/gpu/drm/i915/intel_engine_cs.c
> +++ b/drivers/gpu/drm/i915/intel_engine_cs.c
> @@ -1444,13 +1444,11 @@ void intel_engine_dump(struct intel_engine_cs *engine,
>   {
>   	const int MAX_REQUESTS_TO_SHOW = 8;
>   	struct intel_breadcrumbs * const b = &engine->breadcrumbs;
> -	const struct intel_engine_execlists * const execlists = &engine->execlists;
>   	struct i915_gpu_error * const error = &engine->i915->gpu_error;
> -	struct i915_request *rq, *last;
> +	struct i915_request *rq;
>   	intel_wakeref_t wakeref;
>   	unsigned long flags;
>   	struct rb_node *rb;
> -	int count;
>   
>   	if (header) {
>   		va_list ap;
> @@ -1515,49 +1513,9 @@ void intel_engine_dump(struct intel_engine_cs *engine,
>   	}
>   
>   	local_irq_save(flags);
> -	spin_lock(&engine->timeline.lock);
> -
> -	last = NULL;
> -	count = 0;
> -	list_for_each_entry(rq, &engine->timeline.requests, link) {
> -		if (count++ < MAX_REQUESTS_TO_SHOW - 1)
> -			print_request(m, rq, "\t\tE ");
> -		else
> -			last = rq;
> -	}
> -	if (last) {
> -		if (count > MAX_REQUESTS_TO_SHOW) {
> -			drm_printf(m,
> -				   "\t\t...skipping %d executing requests...\n",
> -				   count - MAX_REQUESTS_TO_SHOW);
> -		}
> -		print_request(m, last, "\t\tE ");
> -	}
> -
> -	last = NULL;
> -	count = 0;
> -	drm_printf(m, "\t\tQueue priority: %d\n", execlists->queue_priority);
> -	for (rb = rb_first_cached(&execlists->queue); rb; rb = rb_next(rb)) {
> -		struct i915_priolist *p = rb_entry(rb, typeof(*p), node);
> -		int i;
> -
> -		priolist_for_each_request(rq, p, i) {
> -			if (count++ < MAX_REQUESTS_TO_SHOW - 1)
> -				print_request(m, rq, "\t\tQ ");
> -			else
> -				last = rq;
> -		}
> -	}
> -	if (last) {
> -		if (count > MAX_REQUESTS_TO_SHOW) {
> -			drm_printf(m,
> -				   "\t\t...skipping %d queued requests...\n",
> -				   count - MAX_REQUESTS_TO_SHOW);
> -		}
> -		print_request(m, last, "\t\tQ ");
> -	}
>   
> -	spin_unlock(&engine->timeline.lock);
> +	intel_execlists_show_requests(engine, m,
> +				      print_request, MAX_REQUESTS_TO_SHOW);
>   
>   	spin_lock(&b->rb_lock);
>   	for (rb = rb_first(&b->waiters); rb; rb = rb_next(rb)) {
> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> index 679ce521be16..999fb13fe0e0 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/intel_lrc.c
> @@ -164,6 +164,29 @@
>   #define WA_TAIL_DWORDS 2
>   #define WA_TAIL_BYTES (sizeof(u32) * WA_TAIL_DWORDS)
>   
> +struct virtual_engine {
> +	struct intel_engine_cs base;
> +
> +	struct intel_context context;

hw_context?

> +	struct kref kref;
> +
> +	struct intel_engine_cs *bound;
> +
> +	struct i915_request *request;

Not rq? ;)

> +	struct ve_node {
> +		struct rb_node rb;
> +		int prio;
> +	} nodes[I915_NUM_ENGINES];
> +
> +	unsigned int count;

sibling_count / num_siblings? Hm.. or are they actually children?

> +	struct intel_engine_cs *siblings[0];
> +};

For the whole struct - we will need comments on the members to give some 
hints on the implementation, lifecycles and similar.

> +
> +static struct virtual_engine *to_virtual_engine(struct intel_engine_cs *engine)
> +{
> +	return container_of(engine, struct virtual_engine, base);
> +}
> +
>   static int execlists_context_deferred_alloc(struct i915_gem_context *ctx,
>   					    struct intel_engine_cs *engine,
>   					    struct intel_context *ce);
> @@ -276,6 +299,8 @@ static void __unwind_incomplete_requests(struct intel_engine_cs *engine)
>   	list_for_each_entry_safe_reverse(rq, rn,
>   					 &engine->timeline.requests,
>   					 link) {
> +		struct intel_engine_cs *owner;
> +
>   		if (i915_request_completed(rq))
>   			break;
>   
> @@ -284,14 +309,20 @@ static void __unwind_incomplete_requests(struct intel_engine_cs *engine)
>   
>   		GEM_BUG_ON(rq->hw_context->active);
>   
> -		GEM_BUG_ON(rq_prio(rq) == I915_PRIORITY_INVALID);
> -		if (rq_prio(rq) != prio) {
> -			prio = rq_prio(rq);
> -			pl = i915_sched_lookup_priolist(engine, prio);
> -		}
> -		GEM_BUG_ON(RB_EMPTY_ROOT(&engine->execlists.queue.rb_root));
> +		owner = rq->hw_context->owner;
> +		if (likely(owner == engine)) {
> +			GEM_BUG_ON(rq_prio(rq) == I915_PRIORITY_INVALID);
> +			if (rq_prio(rq) != prio) {
> +				prio = rq_prio(rq);
> +				pl = i915_sched_lookup_priolist(engine, prio);
> +			}
> +			GEM_BUG_ON(RB_EMPTY_ROOT(&engine->execlists.queue.rb_root));
>   
> -		list_add(&rq->sched.link, pl);
> +			list_add(&rq->sched.link, pl);
> +		} else {
> +			rq->engine = owner;
> +			owner->submit_request(rq);

Comment here.

> +		}
>   
>   		active = rq;
>   	}
> @@ -301,7 +332,8 @@ static void __unwind_incomplete_requests(struct intel_engine_cs *engine)
>   	 * stream, so give it the equivalent small priority bump to prevent
>   	 * it being gazumped a second time by another peer.
>   	 */
> -	if (!(prio & I915_PRIORITY_NEWCLIENT)) {
> +	if (!(prio & I915_PRIORITY_NEWCLIENT) &&
> +	    active->hw_context->owner == engine) {

And comment paragraph above for this special consideration.

>   		prio |= I915_PRIORITY_NEWCLIENT;
>   		list_move_tail(&active->sched.link,
>   			       i915_sched_lookup_priolist(engine, prio));
> @@ -538,6 +570,50 @@ static void complete_preempt_context(struct intel_engine_execlists *execlists)
>   						  execlists));
>   }
>   
> +static void virtual_update_register_offsets(u32 *regs,
> +					    struct intel_engine_cs *engine)
> +{
> +	u32 base = engine->mmio_base;
> +
> +	regs[CTX_CONTEXT_CONTROL] =
> +		i915_mmio_reg_offset(RING_CONTEXT_CONTROL(engine));
> +	regs[CTX_RING_HEAD] = i915_mmio_reg_offset(RING_HEAD(base));
> +	regs[CTX_RING_TAIL] = i915_mmio_reg_offset(RING_TAIL(base));
> +	regs[CTX_RING_BUFFER_START] = i915_mmio_reg_offset(RING_START(base));
> +	regs[CTX_RING_BUFFER_CONTROL] = i915_mmio_reg_offset(RING_CTL(base));
> +
> +	regs[CTX_BB_HEAD_U] = i915_mmio_reg_offset(RING_BBADDR_UDW(base));
> +	regs[CTX_BB_HEAD_L] = i915_mmio_reg_offset(RING_BBADDR(base));
> +	regs[CTX_BB_STATE] = i915_mmio_reg_offset(RING_BBSTATE(base));
> +	regs[CTX_SECOND_BB_HEAD_U] =
> +		i915_mmio_reg_offset(RING_SBBADDR_UDW(base));
> +	regs[CTX_SECOND_BB_HEAD_L] = i915_mmio_reg_offset(RING_SBBADDR(base));
> +	regs[CTX_SECOND_BB_STATE] = i915_mmio_reg_offset(RING_SBBSTATE(base));
> +
> +	regs[CTX_CTX_TIMESTAMP] =
> +		i915_mmio_reg_offset(RING_CTX_TIMESTAMP(base));
> +	regs[CTX_PDP3_UDW] = i915_mmio_reg_offset(GEN8_RING_PDP_UDW(engine, 3));
> +	regs[CTX_PDP3_LDW] = i915_mmio_reg_offset(GEN8_RING_PDP_LDW(engine, 3));
> +	regs[CTX_PDP2_UDW] = i915_mmio_reg_offset(GEN8_RING_PDP_UDW(engine, 2));
> +	regs[CTX_PDP2_LDW] = i915_mmio_reg_offset(GEN8_RING_PDP_LDW(engine, 2));
> +	regs[CTX_PDP1_UDW] = i915_mmio_reg_offset(GEN8_RING_PDP_UDW(engine, 1));
> +	regs[CTX_PDP1_LDW] = i915_mmio_reg_offset(GEN8_RING_PDP_LDW(engine, 1));
> +	regs[CTX_PDP0_UDW] = i915_mmio_reg_offset(GEN8_RING_PDP_UDW(engine, 0));
> +	regs[CTX_PDP0_LDW] = i915_mmio_reg_offset(GEN8_RING_PDP_LDW(engine, 0));
> +
> +	if (engine->class == RENDER_CLASS) {
> +		regs[CTX_RCS_INDIRECT_CTX] =
> +			i915_mmio_reg_offset(RING_INDIRECT_CTX(base));
> +		regs[CTX_RCS_INDIRECT_CTX_OFFSET] =
> +			i915_mmio_reg_offset(RING_INDIRECT_CTX_OFFSET(base));
> +		regs[CTX_BB_PER_CTX_PTR] =
> +			i915_mmio_reg_offset(RING_BB_PER_CTX_PTR(base));
> +
> +		regs[CTX_R_PWR_CLK_STATE] =
> +			i915_mmio_reg_offset(GEN8_R_PWR_CLK_STATE);
> +	}
> +}
> +
>   static void execlists_dequeue(struct intel_engine_cs *engine)
>   {
>   	struct intel_engine_execlists * const execlists = &engine->execlists;
> @@ -547,6 +623,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
>   	struct i915_request *last = port_request(port);
>   	struct rb_node *rb;
>   	bool submit = false;
> +	int prio;
>   
>   	/*
>   	 * Hardware submission is through 2 ports. Conceptually each port
> @@ -570,6 +647,31 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
>   	 * and context switches) submission.
>   	 */
>   
> +restart_virtual_engine:
> +	prio = execlists->queue_priority;
> +	for (rb = rb_first_cached(&execlists->virtual); rb; ) {
> +		struct virtual_engine *ve =
> +			rb_entry(rb, typeof(*ve), nodes[engine->id].rb);
> +		struct i915_request *rq = READ_ONCE(ve->request);
> +		struct intel_engine_cs *active;
> +
> +		if (!rq) {
> +			rb_erase_cached(rb, &execlists->virtual);
> +			RB_CLEAR_NODE(rb);
> +			rb = rb_first_cached(&execlists->virtual);
> +			continue;
> +		}
> +
> +		active = READ_ONCE(ve->context.active);
> +		if (active && active != engine) {
> +			rb = rb_next(rb);
> +			continue;
> +		}
> +
> +		prio = max(prio, rq_prio(rq));
> +		break;
> +	}
> +
>   	if (last) {
>   		/*
>   		 * Don't resubmit or switch until all outstanding
> @@ -591,7 +693,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
>   		if (!execlists_is_active(execlists, EXECLISTS_ACTIVE_HWACK))
>   			return;
>   
> -		if (need_preempt(engine, last, execlists->queue_priority)) {
> +		if (need_preempt(engine, last, prio)) {
>   			inject_preempt_context(engine);
>   			return;
>   		}
> @@ -631,6 +733,67 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
>   		last->tail = last->wa_tail;
>   	}
>   
> +	if (rb) { /* XXX virtual is always taking precedence */
> +		struct virtual_engine *ve =
> +			rb_entry(rb, typeof(*ve), nodes[engine->id].rb);
> +		struct i915_request *rq;
> +
> +		spin_lock(&ve->base.timeline.lock);
> +
> +		rq = ve->request;
> +		if (unlikely(!rq)) { /* lost the race to a sibling */
> +			spin_unlock(&ve->base.timeline.lock);
> +			goto restart_virtual_engine;
> +		}
> +
> +		if (rq_prio(rq) >= prio) {
> +			if (last && !can_merge_rq(rq, last)) {
> +				spin_unlock(&ve->base.timeline.lock);
> +				return;
> +			}
> +
> +			GEM_BUG_ON(rq->engine != &ve->base);
> +			ve->request = NULL;
> +			ve->base.execlists.queue_priority = INT_MIN;
> +			rb_erase_cached(rb, &execlists->virtual);
> +			RB_CLEAR_NODE(rb);
> +
> +			GEM_BUG_ON(rq->hw_context != &ve->context);
> +			rq->engine = engine;
> +
> +			if (engine != ve->bound) {
> +				u32 *regs = ve->context.lrc_reg_state;
> +				unsigned int n;
> +
> +				GEM_BUG_ON(READ_ONCE(ve->context.active));
> +				virtual_update_register_offsets(regs, engine);
> +				ve->bound = engine;
> +
> +				/*
> +				 * Move the bound engine to the top of the list
> +				 * for future execution. We then kick this
> +				 * tasklet first before checking others, so that
> +				 * we preferentially reuse this set of bound
> +				 * registers.
> +				 */
> +				for (n = 1; n < ve->count; n++) {
> +					if (ve->siblings[n] == engine) {
> +						swap(ve->siblings[n],
> +						     ve->siblings[0]);
> +						break;
> +					}
> +				}
> +			}
> +
> +			__i915_request_submit(rq);
> +			trace_i915_request_in(rq, port_index(port, execlists));
> +			submit = true;
> +			last = rq;
> +		}
> +
> +		spin_unlock(&ve->base.timeline.lock);
> +	}
> +
>   	while ((rb = rb_first_cached(&execlists->queue))) {
>   		struct i915_priolist *p = to_priolist(rb);
>   		struct i915_request *rq, *rn;
> @@ -2735,6 +2898,325 @@ void intel_lr_context_resume(struct drm_i915_private *i915)
>   	}
>   }
>   
> +static void virtual_engine_free(struct kref *kref)
> +{
> +	struct virtual_engine *ve = container_of(kref, typeof(*ve), kref);
> +	unsigned int n;
> +
> +	GEM_BUG_ON(ve->request);
> +	GEM_BUG_ON(ve->context.active);
> +
> +	for (n = 0; n < ve->count; n++) {
> +		struct intel_engine_cs *sibling = ve->siblings[n];
> +		struct rb_node *node = &ve->nodes[sibling->id].rb;
> +
> +		if (RB_EMPTY_NODE(node))
> +			continue;
> +
> +		spin_lock_irq(&sibling->timeline.lock);
> +
> +		if (!RB_EMPTY_NODE(node))
> +			rb_erase_cached(node, &sibling->execlists.virtual);
> +
> +		spin_unlock_irq(&sibling->timeline.lock);
> +	}
> +	GEM_BUG_ON(__tasklet_is_scheduled(&ve->base.execlists.tasklet));
> +
> +	if (ve->context.state)
> +		execlists_context_destroy(&ve->context);
> +
> +	intel_engine_cleanup_scratch(&ve->base);
> +	i915_timeline_fini(&ve->base.timeline);
> +	kfree(ve);
> +}
> +
> +static void virtual_context_unpin(struct intel_context *ce)
> +{
> +	struct virtual_engine *ve = container_of(ce, typeof(*ve), context);
> +
> +	execlists_context_unpin(ce);
> +
> +	kref_put(&ve->kref, virtual_engine_free);

Hm.. freed on context complete? How does this work.. reading on.

> +}
> +
> +static const struct intel_context_ops virtual_context_ops = {
> +	.unpin = virtual_context_unpin,
> +};
> +
> +static struct intel_context *
> +virtual_context_pin(struct intel_engine_cs *engine,
> +		    struct i915_gem_context *ctx)
> +{
> +	struct virtual_engine *ve = to_virtual_engine(engine);
> +	struct intel_context *ce = &ve->context;
> +
> +	lockdep_assert_held(&ctx->i915->drm.struct_mutex);
> +
> +	if (likely(ce->pin_count++))
> +		return ce;
> +	GEM_BUG_ON(!ce->pin_count); /* no overflow please! */
> +
> +	kref_get(&ve->kref);
> +	ce->ops = &virtual_context_ops;
> +
> +	if (!ve->bound)
> +		ve->bound = ve->siblings[0];
> +
> +	return __execlists_context_pin(ve->bound, ctx, ce);
> +}
> +
> +static void virtual_submission_tasklet(unsigned long data)
> +{
> +	struct virtual_engine * const ve = (struct virtual_engine *)data;
> +	unsigned int n;
> +	int prio;
> +
> +	prio = READ_ONCE(ve->base.execlists.queue_priority);
> +	if (prio == INT_MIN)
> +		return;

What is this checking for?

> +
> +	local_irq_disable();
> +	for (n = 0; READ_ONCE(ve->request) && n < ve->count; n++) {
> +		struct intel_engine_cs *sibling = ve->siblings[n];
> +		struct ve_node * const node = &ve->nodes[sibling->id];
> +		struct rb_node **parent, *rb;
> +		bool first;
> +
> +		spin_lock(&sibling->timeline.lock);
> +
> +		if (!RB_EMPTY_NODE(&node->rb)) {
> +			first = rb_first_cached(&sibling->execlists.virtual) == &node->rb;
> +			if (prio == node->prio || (prio > node->prio && first))
> +				goto submit_engine;
> +
> +			rb_erase_cached(&node->rb, &sibling->execlists.virtual);
> +		}
> +
> +		rb = NULL;
> +		first = true;
> +		parent = &sibling->execlists.virtual.rb_root.rb_node;
> +		while (*parent) {
> +			struct ve_node *other;
> +
> +			rb = *parent;
> +			other = rb_entry(rb, typeof(*other), rb);
> +			if (prio > other->prio) {
> +				parent = &rb->rb_left;
> +			} else {
> +				parent = &rb->rb_right;
> +				first = false;
> +			}
> +		}
> +
> +		rb_link_node(&node->rb, rb, parent);
> +		rb_insert_color_cached(&node->rb,
> +				       &sibling->execlists.virtual,
> +				       first);
> +
> +submit_engine:
> +		GEM_BUG_ON(RB_EMPTY_NODE(&node->rb));
> +		node->prio = prio;
> +		if (first && prio > sibling->execlists.queue_priority)
> +			tasklet_hi_schedule(&sibling->execlists.tasklet);
> +
> +		spin_unlock(&sibling->timeline.lock);
> +	}
> +	local_irq_enable();
> +}
> +
> +static void virtual_submit_request(struct i915_request *request)
> +{
> +	struct virtual_engine *ve = to_virtual_engine(request->engine);
> +
> +	GEM_BUG_ON(ve->base.submit_request != virtual_submit_request);
> +
> +	GEM_BUG_ON(ve->request);
> +	ve->base.execlists.queue_priority = rq_prio(request);
> +	WRITE_ONCE(ve->request, request);
> +
> +	tasklet_schedule(&ve->base.execlists.tasklet);
> +}
> +
> +struct intel_engine_cs *
> +intel_execlists_create_virtual(struct i915_gem_context *ctx,
> +			       struct intel_engine_cs **siblings,
> +			       unsigned int count)
> +{
> +	struct virtual_engine *ve;
> +	unsigned int n;
> +	int err;
> +
> +	if (!count)
> +		return ERR_PTR(-EINVAL);
> +
> +	ve = kzalloc(sizeof(*ve) + count * sizeof(*ve->siblings), GFP_KERNEL);
> +	if (!ve)
> +		return ERR_PTR(-ENOMEM);
> +
> +	kref_init(&ve->kref);
> +	ve->base.i915 = ctx->i915;
> +	ve->base.id = -1;
> +	ve->base.class = OTHER_CLASS;
> +	ve->base.uabi_class = I915_ENGINE_CLASS_INVALID;
> +	ve->base.flags = I915_ENGINE_IS_VIRTUAL;
> +
> +	snprintf(ve->base.name, sizeof(ve->base.name), "virtual");

Add fence.context to the name?

> +	i915_timeline_init(ctx->i915, &ve->base.timeline, ve->base.name);
> +	lockdep_set_subclass(&ve->base.timeline.lock, TIMELINE_VIRTUAL);
> +
> +	err = intel_engine_create_scratch(&ve->base, 4096);
> +	if (err)
> +		goto err_put;
> +
> +	ve->context.gem_context = ctx;
> +	ve->context.owner = &ve->base;
> +
> +	ve->base.context_pin = virtual_context_pin;
> +	ve->base.request_alloc = execlists_request_alloc;
> +
> +	ve->base.schedule = i915_schedule;
> +	ve->base.submit_request = virtual_submit_request;
> +
> +	ve->base.execlists.queue_priority = INT_MIN;
> +	tasklet_init(&ve->base.execlists.tasklet,
> +		     virtual_submission_tasklet,
> +		     (unsigned long)ve);
> +
> +	ve->count = count;
> +	for (n = 0; n < count; n++) {
> +		struct intel_engine_cs *sibling = siblings[n];
> +
> +		ve->siblings[n] = sibling;
> +
> +		if (sibling->execlists.tasklet.func != execlists_submission_tasklet) {
> +			err = -ENODEV;

Will this fail context creating during reset? By design or accident? I 
think the !guc and has_execlists is checked for at the top level so 
should be enough.

> +			ve->count = n;
> +			goto err_put;
> +		}
> +
> +		if (RB_EMPTY_NODE(&ve->nodes[sibling->id].rb)) {
> +			err = -EINVAL;
> +			ve->count = n;
> +			goto err_put;
> +		}
> +
> +		RB_CLEAR_NODE(&ve->nodes[sibling->id].rb);
> +
> +		if (ve->base.class != OTHER_CLASS) {
> +			if (ve->base.class != sibling->class) {
> +				err = -EINVAL;

Just a consequence of context image compatibility or you have 
implementation limitions as well?

> +				ve->count = n;
> +				goto err_put;
> +			}
> +			continue;
> +		}
> +
> +		ve->base.class = sibling->class;
> +		snprintf(ve->base.name, sizeof(ve->base.name),
> +			 "v%dx%d", ve->base.class, count);

Oh the previous snprint was just using it as temporary storage?

Fence context might be usable here as well.

> +		ve->base.context_size = sibling->context_size;
> +
> +		ve->base.emit_bb_start = sibling->emit_bb_start;
> +		ve->base.emit_flush = sibling->emit_flush;
> +		ve->base.emit_breadcrumb = sibling->emit_breadcrumb;
> +		ve->base.emit_breadcrumb_sz = sibling->emit_breadcrumb_sz;
> +	}
> +
> +	return &ve->base;
> +
> +err_put:
> +	virtual_engine_free(&ve->kref);
> +	return ERR_PTR(err);
> +}
> +
> +void intel_virtual_engine_put(struct intel_engine_cs *engine)
> +{
> +	if (!engine)
> +		return;
> +
> +	kref_put(&to_virtual_engine(engine)->kref, virtual_engine_free);
> +}
> +
> +void intel_execlists_show_requests(struct intel_engine_cs *engine,
> +				   struct drm_printer *m,
> +				   void (*show_request)(struct drm_printer *m,
> +							struct i915_request *rq,
> +							const char *prefix),
> +				   int max)
> +{
> +	const struct intel_engine_execlists *execlists = &engine->execlists;
> +	struct i915_request *rq, *last;
> +	struct rb_node *rb;
> +	int count;
> +
> +	spin_lock(&engine->timeline.lock);
> +
> +	last = NULL;
> +	count = 0;
> +	list_for_each_entry(rq, &engine->timeline.requests, link) {
> +		if (count++ < max - 1)
> +			show_request(m, rq, "\t\tE ");
> +		else
> +			last = rq;
> +	}
> +	if (last) {
> +		if (count > max) {
> +			drm_printf(m,
> +				   "\t\t...skipping %d executing requests...\n",
> +				   count - max);
> +		}
> +		show_request(m, last, "\t\tE ");
> +	}
> +
> +	last = NULL;
> +	count = 0;
> +	drm_printf(m, "\t\tQueue priority: %d\n", execlists->queue_priority);
> +	for (rb = rb_first_cached(&execlists->queue); rb; rb = rb_next(rb)) {
> +		struct i915_priolist *p = rb_entry(rb, typeof(*p), node);
> +		int i;
> +
> +		priolist_for_each_request(rq, p, i) {
> +			if (count++ < max - 1)
> +				show_request(m, rq, "\t\tQ ");
> +			else
> +				last = rq;
> +		}
> +	}
> +	if (last) {
> +		if (count > max) {
> +			drm_printf(m,
> +				   "\t\t...skipping %d queued requests...\n",
> +				   count - max);
> +		}
> +		show_request(m, last, "\t\tQ ");
> +	}
> +
> +	last = NULL;
> +	count = 0;
> +	for (rb = rb_first_cached(&execlists->virtual); rb; rb = rb_next(rb)) {
> +		struct virtual_engine *ve =
> +			rb_entry(rb, typeof(*ve), nodes[engine->id].rb);
> +		struct i915_request *rq = READ_ONCE(ve->request);
> +
> +		if (rq) {
> +			if (count++ < max - 1)
> +				show_request(m, rq, "\t\tV ");
> +			else
> +				last = rq;
> +		}
> +	}
> +	if (last) {
> +		if (count > max) {
> +			drm_printf(m,
> +				   "\t\t...skipping %d virtual requests...\n",
> +				   count - max);
> +		}
> +		show_request(m, last, "\t\tV ");
> +	}
> +
> +	spin_unlock(&engine->timeline.lock);
> +}
> +
>   #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
>   #include "selftests/intel_lrc.c"
>   #endif
> diff --git a/drivers/gpu/drm/i915/intel_lrc.h b/drivers/gpu/drm/i915/intel_lrc.h
> index f5a5502ecf70..1d96d3bbca3c 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.h
> +++ b/drivers/gpu/drm/i915/intel_lrc.h
> @@ -97,11 +97,25 @@ int logical_xcs_ring_init(struct intel_engine_cs *engine);
>    */
>   #define LRC_HEADER_PAGES LRC_PPHWSP_PN
>   
> +struct drm_printer;
> +
>   struct drm_i915_private;
>   struct i915_gem_context;
>   
>   void intel_lr_context_resume(struct drm_i915_private *dev_priv);
> -
>   void intel_execlists_set_default_submission(struct intel_engine_cs *engine);
>   
> +struct intel_engine_cs *
> +intel_execlists_create_virtual(struct i915_gem_context *ctx,
> +			       struct intel_engine_cs **siblings,
> +			       unsigned int count);
> +void intel_virtual_engine_put(struct intel_engine_cs *engine);
> +
> +void intel_execlists_show_requests(struct intel_engine_cs *engine,
> +				   struct drm_printer *m,
> +				   void (*show_request)(struct drm_printer *m,
> +							struct i915_request *rq,
> +							const char *prefix),
> +				   int max);
> +
>   #endif /* _INTEL_LRC_H_ */
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
> index f6ec48a75a69..ec772d836f49 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.h
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
> @@ -311,6 +311,7 @@ struct intel_engine_execlists {
>   	 * @queue: queue of requests, in priority lists
>   	 */
>   	struct rb_root_cached queue;
> +	struct rb_root_cached virtual;
>   
>   	/**
>   	 * @csb_read: control register for Context Switch buffer
> @@ -598,6 +599,7 @@ struct intel_engine_cs {
>   #define I915_ENGINE_NEEDS_CMD_PARSER BIT(0)
>   #define I915_ENGINE_SUPPORTS_STATS   BIT(1)
>   #define I915_ENGINE_HAS_PREEMPTION   BIT(2)
> +#define I915_ENGINE_IS_VIRTUAL       BIT(3)
>   	unsigned int flags;
>   
>   	/*
> @@ -680,6 +682,12 @@ static inline bool __execlists_need_preempt(int prio, int last)
>   	return prio > max(0, last);
>   }
>   
> +static inline bool
> +intel_engine_is_virtual(const struct intel_engine_cs *engine)
> +{
> +	return engine->flags & I915_ENGINE_IS_VIRTUAL;
> +}
> +
>   static inline void
>   execlists_set_active(struct intel_engine_execlists *execlists,
>   		     unsigned int bit)
> diff --git a/drivers/gpu/drm/i915/selftests/intel_lrc.c b/drivers/gpu/drm/i915/selftests/intel_lrc.c
> index b6e11b58b3c9..0fb7a6f9aedb 100644
> --- a/drivers/gpu/drm/i915/selftests/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/selftests/intel_lrc.c
> @@ -4,6 +4,8 @@
>    * Copyright © 2018 Intel Corporation
>    */
>   
> +#include <linux/prime_numbers.h>
> +
>   #include "../i915_reset.h"
>   
>   #include "../i915_selftest.h"
> @@ -581,6 +583,219 @@ static int live_preempt_hang(void *arg)
>   	return err;
>   }
>   
> +struct live_test {
> +	struct drm_i915_private *i915;
> +	const char *func;
> +	const char *name;
> +
> +	unsigned int reset_count;
> +	bool wedge;
> +};
> +
> +static int begin_live_test(struct live_test *t,
> +			   struct drm_i915_private *i915,
> +			   const char *func,
> +			   const char *name)
> +{
> +	t->i915 = i915;
> +	t->func = func;
> +	t->name = name;
> +
> +	if (igt_flush_test(i915, I915_WAIT_LOCKED))
> +		return -EIO;
> +
> +	i915->gpu_error.missed_irq_rings = 0;
> +	t->reset_count = i915_reset_count(&i915->gpu_error);
> +
> +	return 0;
> +}
> +
> +static int end_live_test(struct live_test *t)
> +{
> +	struct drm_i915_private *i915 = t->i915;
> +
> +	if (igt_flush_test(i915, I915_WAIT_LOCKED))
> +		return -EIO;
> +
> +	if (t->reset_count != i915_reset_count(&i915->gpu_error)) {
> +		pr_err("%s(%s): GPU was reset %d times!\n",
> +		       t->func, t->name,
> +		       i915_reset_count(&i915->gpu_error) - t->reset_count);
> +		return -EIO;
> +	}
> +
> +	if (i915->gpu_error.missed_irq_rings) {
> +		pr_err("%s(%s): Missed interrupts on engines %lx\n",
> +		       t->func, t->name, i915->gpu_error.missed_irq_rings);
> +		return -EIO;
> +	}
> +
> +	return 0;
> +}
> +
> +static int nop_virtual_engine(struct drm_i915_private *i915,
> +			      struct intel_engine_cs **siblings,
> +			      unsigned int nsibling,
> +			      unsigned int nctx,
> +			      unsigned int flags)
> +#define CHAIN BIT(0)
> +{
> +	IGT_TIMEOUT(end_time);
> +	struct i915_request *request[16];
> +	struct i915_gem_context *ctx[16];
> +	struct intel_engine_cs *ve[16];
> +	unsigned long n, prime, nc;
> +	ktime_t times[2] = {};
> +	struct live_test t;
> +	int err;
> +
> +	GEM_BUG_ON(!nctx || nctx > ARRAY_SIZE(ctx));
> +
> +	for (n = 0; n < nctx; n++) {
> +		ctx[n] = kernel_context(i915);
> +		if (!ctx[n])
> +			return -ENOMEM;
> +
> +		ve[n] = intel_execlists_create_virtual(ctx[n],
> +						       siblings, nsibling);
> +		if (IS_ERR(ve[n]))
> +			return PTR_ERR(ve[n]);
> +	}
> +
> +	err = begin_live_test(&t, i915, __func__, ve[0]->name);
> +	if (err)
> +		goto out;
> +
> +	for_each_prime_number_from(prime, 1, 8192) {
> +		times[1] = ktime_get_raw();
> +
> +		if (flags & CHAIN) {
> +			for (nc = 0; nc < nctx; nc++) {
> +				for (n = 0; n < prime; n++) {
> +					request[nc] =
> +						i915_request_alloc(ve[nc], ctx[nc]);
> +					if (IS_ERR(request[nc])) {
> +						err = PTR_ERR(request[nc]);
> +						goto out;
> +					}
> +
> +					i915_request_add(request[nc]);
> +				}
> +			}
> +		} else {
> +			for (n = 0; n < prime; n++) {
> +				for (nc = 0; nc < nctx; nc++) {
> +					request[nc] =
> +						i915_request_alloc(ve[nc], ctx[nc]);
> +					if (IS_ERR(request[nc])) {
> +						err = PTR_ERR(request[nc]);
> +						goto out;
> +					}
> +
> +					i915_request_add(request[nc]);
> +				}
> +			}
> +		}
> +
> +		for (nc = 0; nc < nctx; nc++) {
> +			if (i915_request_wait(request[nc],
> +					      I915_WAIT_LOCKED,
> +					      HZ / 10) < 0) {
> +				pr_err("%s(%s): wait for %llx:%d timed out\n",
> +				       __func__, ve[0]->name,
> +				       request[nc]->fence.context,
> +				       request[nc]->fence.seqno);
> +
> +				GEM_TRACE("%s(%s) failed at request %llx:%d\n",
> +					  __func__, ve[0]->name,
> +					  request[nc]->fence.context,
> +					  request[nc]->fence.seqno);
> +				GEM_TRACE_DUMP();
> +				i915_gem_set_wedged(i915);
> +				break;
> +			}
> +		}
> +
> +		times[1] = ktime_sub(ktime_get_raw(), times[1]);
> +		if (prime == 1)
> +			times[0] = times[1];
> +
> +		if (__igt_timeout(end_time, NULL))
> +			break;
> +	}
> +
> +	err = end_live_test(&t);
> +	if (err)
> +		goto out;
> +
> +	pr_info("Requestx%d latencies on %s: 1 = %lluns, %lu = %lluns\n",
> +		nctx, ve[0]->name, ktime_to_ns(times[0]),
> +		prime, div64_u64(ktime_to_ns(times[1]), prime));
> +
> +out:
> +	if (igt_flush_test(i915, I915_WAIT_LOCKED))
> +		err = -EIO;
> +
> +	for (nc = 0; nc < nctx; nc++) {
> +		intel_virtual_engine_put(ve[nc]);
> +		kernel_context_close(ctx[nc]);
> +	}
> +	return err;
> +}
> +
> +static int live_virtual_engine(void *arg)
> +{
> +	struct drm_i915_private *i915 = arg;
> +	struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1];
> +	struct intel_engine_cs *engine;
> +	enum intel_engine_id id;
> +	unsigned int class, inst;
> +	int err = -ENODEV;
> +
> +	if (USES_GUC_SUBMISSION(i915))
> +		return 0;
> +
> +	mutex_lock(&i915->drm.struct_mutex);
> +
> +	for_each_engine(engine, i915, id) {
> +		err = nop_virtual_engine(i915, &engine, 1, 1, 0);
> +		if (err) {
> +			pr_err("Failed to wrap engine %s: err=%d\n",
> +			       engine->name, err);
> +			goto out_unlock;
> +		}
> +	}
> +
> +	for (class = 0; class <= MAX_ENGINE_CLASS; class++) {
> +		int nsibling, n;
> +
> +		nsibling = 0;
> +		for (inst = 0; inst <= MAX_ENGINE_INSTANCE; inst++) {
> +			if (!i915->engine_class[class][inst])
> +				break;
> +
> +			siblings[nsibling++] = i915->engine_class[class][inst];
> +		}
> +		if (nsibling < 2)
> +			continue;
> +
> +		for (n = 1; n <= nsibling + 1; n++) {
> +			err = nop_virtual_engine(i915, siblings, nsibling,
> +						 n, 0);
> +			if (err)
> +				goto out_unlock;
> +		}
> +
> +		err = nop_virtual_engine(i915, siblings, nsibling, n, CHAIN);
> +		if (err)
> +			goto out_unlock;
> +	}
> +
> +out_unlock:
> +	mutex_unlock(&i915->drm.struct_mutex);
> +	return err;
> +}
> +
>   int intel_execlists_live_selftests(struct drm_i915_private *i915)
>   {
>   	static const struct i915_subtest tests[] = {
> @@ -588,6 +803,7 @@ int intel_execlists_live_selftests(struct drm_i915_private *i915)
>   		SUBTEST(live_preempt),
>   		SUBTEST(live_late_preempt),
>   		SUBTEST(live_preempt_hang),
> +		SUBTEST(live_virtual_engine),
>   	};
>   
>   	if (!HAS_EXECLISTS(i915))
> diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
> index d41b4c673af4..6dd2a89410e8 100644
> --- a/include/uapi/drm/i915_drm.h
> +++ b/include/uapi/drm/i915_drm.h
> @@ -1523,14 +1523,41 @@ struct drm_i915_gem_context_param {
>    * default settings.
>    *
>    * See struct i915_context_param_engines.
> + *
> + * Extensions:
> + *   i915_context_engines_load_balance (I915_CONTEXT_ENGINES_EXT_LOAD_BALANCE)
>    */
>   #define I915_CONTEXT_PARAM_ENGINES	0x7
>   
>   	__u64 value;
>   };
>   
> +/*
> + * i915_context_engines_load_balance:
> + *
> + * Enable load balancing across this set of engines.
> + *
> + * Into the I915_EXEC_DEFAULT slot, a virtual engine is created that when
> + * used will proxy the execbuffer request onto one of the set of engines
> + * in such a way as to distribute the load evenly across the set.
> + *
> + * The set of engines must be compatible (e.g. the same HW class) as they
> + * will share the same logical GPU context and ring.
> + *
> + * The context must be defined to use a single timeline for all engines.
> + */
> +struct i915_context_engines_load_balance {
> +	struct i915_user_extension base;
> +
> +	__u64 flags; /* all undefined flags must be zero */
> +	__u64 engines_mask;
> +
> +	__u64 mbz[4]; /* reserved for future use; must be zero */
> +};
> +
>   struct i915_context_param_engines {
>   	__u64 extensions;
> +#define I915_CONTEXT_ENGINES_EXT_LOAD_BALANCE 0
>   
>   	struct {
>   		__u32 class; /* see enum drm_i915_gem_engine_class */
> 

This was just a first pass reading. Will need more passes to figure it 
all out.

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 106+ messages in thread

* Re: [PATCH 38/40] drm/i915/execlists: Flush the CS events before unpinning
  2018-10-01 11:06     ` Chris Wilson
@ 2018-10-01 13:15       ` Tvrtko Ursulin
  2018-10-01 13:26         ` Chris Wilson
  0 siblings, 1 reply; 106+ messages in thread
From: Tvrtko Ursulin @ 2018-10-01 13:15 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 01/10/2018 12:06, Chris Wilson wrote:
> Quoting Tvrtko Ursulin (2018-10-01 11:51:23)
>>
>> On 19/09/2018 20:55, Chris Wilson wrote:
>>> Inside the execlists submission tasklet, we often make the mistake of
>>> assuming that everything beneath the request is available for use.
>>> However, the submission and the request live on two separate timelines,
>>> and the request contents may be freed from an early retirement before we
>>> have had a chance to run the submission tasklet (think ksoftirqd). To
>>> safeguard ourselves against any mistakes, flush the tasklet before we
>>> unpin the context if execlists still has a reference to this context.
>>>
>>> References: 60367132a214 ("drm/i915: Avoid use-after-free of ctx in request tracepoints")
>>> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
>>> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>>> ---
>>>    drivers/gpu/drm/i915/i915_gem_context.h |  1 +
>>>    drivers/gpu/drm/i915/intel_lrc.c        | 32 ++++++++++++++++++++++++-
>>>    2 files changed, 32 insertions(+), 1 deletion(-)
>>>
>>> diff --git a/drivers/gpu/drm/i915/i915_gem_context.h b/drivers/gpu/drm/i915/i915_gem_context.h
>>> index 9f89119a6566..1fd71dfdfa62 100644
>>> --- a/drivers/gpu/drm/i915/i915_gem_context.h
>>> +++ b/drivers/gpu/drm/i915/i915_gem_context.h
>>> @@ -170,6 +170,7 @@ struct i915_gem_context {
>>>        /** engine: per-engine logical HW state */
>>>        struct intel_context {
>>>                struct i915_gem_context *gem_context;
>>> +             struct intel_engine_cs *active;
>>>                struct i915_vma *state;
>>>                struct intel_ring *ring;
>>>                u32 *lrc_reg_state;
>>> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
>>> index 48a2bca7fec3..be7dbdd7fc2c 100644
>>> --- a/drivers/gpu/drm/i915/intel_lrc.c
>>> +++ b/drivers/gpu/drm/i915/intel_lrc.c
>>> @@ -282,6 +282,8 @@ static void __unwind_incomplete_requests(struct intel_engine_cs *engine)
>>>                __i915_request_unsubmit(rq);
>>>                unwind_wa_tail(rq);
>>>    
>>> +             GEM_BUG_ON(rq->hw_context->active);
>>> +
>>>                GEM_BUG_ON(rq_prio(rq) == I915_PRIORITY_INVALID);
>>>                if (rq_prio(rq) != prio) {
>>>                        prio = rq_prio(rq);
>>> @@ -427,8 +429,11 @@ static void execlists_submit_ports(struct intel_engine_cs *engine)
>>>                rq = port_unpack(&port[n], &count);
>>>                if (rq) {
>>>                        GEM_BUG_ON(count > !n);
>>> -                     if (!count++)
>>> +                     if (!count++) {
>>> +                             GEM_BUG_ON(rq->hw_context->active);
>>>                                execlists_context_schedule_in(rq);
>>> +                             rq->hw_context->active = engine;
>>
>> Put it in execlists_context_schedule_in/out?
>>
>> Why does it have to be the engine pointer and not just a boolean?
>> Because we don't have an engine backpointer in hw_context? Should we add
>> it? I think I had occasionally wished we had it.. maybe too much work to
>> evaluate what function prototypes we could clean up with it and whether
>> it would be an overall gain.
> 
> It's a backpointer in hw_context for the purposes of being a backpointer
> in hw_context. Its purpose is for:
> 
> 	active = READ_ONCE(ve->context.active);
>          if (active && active != engine) {
>          	rb = rb_next(rb);
>          	continue;
>          }

Yeah, I was asking why not call it 'engine' and have the active flag as 
boolean. I guess because ce->engine will not be permanent soon.

> 
>>> +                     }
>>>                        port_set(&port[n], port_pack(rq, count));
>>>                        desc = execlists_update_context(rq);
>>>                        GEM_DEBUG_EXEC(port[n].context_id = upper_32_bits(desc));
>>> @@ -734,6 +739,8 @@ execlists_cancel_port_requests(struct intel_engine_execlists * const execlists)
>>>                          intel_engine_get_seqno(rq->engine));
>>>    
>>>                GEM_BUG_ON(!execlists->active);
>>> +
>>> +             rq->hw_context->active = NULL;
>>>                execlists_context_schedule_out(rq,
>>>                                               i915_request_completed(rq) ?
>>>                                               INTEL_CONTEXT_SCHEDULE_OUT :
>>> @@ -971,6 +978,7 @@ static void process_csb(struct intel_engine_cs *engine)
>>>                         */
>>>                        GEM_BUG_ON(!i915_request_completed(rq));
>>>    
>>> +                     rq->hw_context->active = NULL;
>>>                        execlists_context_schedule_out(rq,
>>>                                                       INTEL_CONTEXT_SCHEDULE_OUT);
>>>                        i915_request_put(rq);
>>> @@ -1080,6 +1088,28 @@ static void execlists_context_destroy(struct intel_context *ce)
>>>    
>>>    static void execlists_context_unpin(struct intel_context *ce)
>>>    {
>>> +     struct intel_engine_cs *engine;
>>> +
>>> +     /*
>>> +      * The tasklet may still be using a pointer to our state, via an
>>> +      * old request. However, since we know we only unpin the context
>>> +      * on retirement of the following request, we know that the last
>>> +      * request referencing us will have had a completion CS interrupt.
>>
>> Hm hm hm... my initial thought was that interrupts could be more delayed
>> than breadcrumb writes (more than one context ahead), in which case the
>> process_csb below could be premature and the assert would trigger. But I
>> must be forgetting something since that would also mean we would
>> prematurely unpin the context. So I must be forgetting something..
> 
> There will have been at least one CS event written because we have
> switched contexts due to the unpin being one seqno behind. I have not
> (yet) observed CS events being out of order with breadcrumb writes (and
> we have very strict checking) so confident that a single process_csb()
> is required rather than a loop.

I did not mean out of order but just delayed.

Say port 0 & 1 are both submitted, we observe seqno 1 & 2 as complete, 
but the ctx complete irq/handler has been delayed. We go to unpin ctx0 
(port0) but ce->active hasn't been cleared due no ctx complete yet so 
the assert triggers. Impossible in your experience?

Regards,

Tvrtko


_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 106+ messages in thread

* Re: [PATCH 38/40] drm/i915/execlists: Flush the CS events before unpinning
  2018-10-01 13:15       ` Tvrtko Ursulin
@ 2018-10-01 13:26         ` Chris Wilson
  2018-10-01 14:03           ` Tvrtko Ursulin
  0 siblings, 1 reply; 106+ messages in thread
From: Chris Wilson @ 2018-10-01 13:26 UTC (permalink / raw)
  To: Tvrtko Ursulin, intel-gfx

Quoting Tvrtko Ursulin (2018-10-01 14:15:49)
> 
> On 01/10/2018 12:06, Chris Wilson wrote:
> > Quoting Tvrtko Ursulin (2018-10-01 11:51:23)
> >>
> >> Hm hm hm... my initial thought was that interrupts could be more delayed
> >> than breadcrumb writes (more than one context ahead), in which case the
> >> process_csb below could be premature and the assert would trigger. But I
> >> must be forgetting something since that would also mean we would
> >> prematurely unpin the context. So I must be forgetting something..
> > 
> > There will have been at least one CS event written because we have
> > switched contexts due to the unpin being one seqno behind. I have not
> > (yet) observed CS events being out of order with breadcrumb writes (and
> > we have very strict checking) so confident that a single process_csb()
> > is required rather than a loop.
> 
> I did not mean out of order but just delayed.
> 
> Say port 0 & 1 are both submitted, we observe seqno 1 & 2 as complete, 
> but the ctx complete irq/handler has been delayed. We go to unpin ctx0 
> (port0) but ce->active hasn't been cleared due no ctx complete yet so 
> the assert triggers. Impossible in your experience?

I have not seen anything to doubt that the CS interrupts, and more
importantly here, the CS events are delayed. It's the CS event itself
that we care about, and I think we are very safe in our assertion that
it is written on the context switch prior to the breadcrumb in the
second context.
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 106+ messages in thread

* Re: [PATCH 38/40] drm/i915/execlists: Flush the CS events before unpinning
  2018-10-01 13:26         ` Chris Wilson
@ 2018-10-01 14:03           ` Tvrtko Ursulin
  0 siblings, 0 replies; 106+ messages in thread
From: Tvrtko Ursulin @ 2018-10-01 14:03 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 01/10/2018 14:26, Chris Wilson wrote:
> Quoting Tvrtko Ursulin (2018-10-01 14:15:49)
>>
>> On 01/10/2018 12:06, Chris Wilson wrote:
>>> Quoting Tvrtko Ursulin (2018-10-01 11:51:23)
>>>>
>>>> Hm hm hm... my initial thought was that interrupts could be more delayed
>>>> than breadcrumb writes (more than one context ahead), in which case the
>>>> process_csb below could be premature and the assert would trigger. But I
>>>> must be forgetting something since that would also mean we would
>>>> prematurely unpin the context. So I must be forgetting something..
>>>
>>> There will have been at least one CS event written because we have
>>> switched contexts due to the unpin being one seqno behind. I have not
>>> (yet) observed CS events being out of order with breadcrumb writes (and
>>> we have very strict checking) so confident that a single process_csb()
>>> is required rather than a loop.
>>
>> I did not mean out of order but just delayed.
>>
>> Say port 0 & 1 are both submitted, we observe seqno 1 & 2 as complete,
>> but the ctx complete irq/handler has been delayed. We go to unpin ctx0
>> (port0) but ce->active hasn't been cleared due no ctx complete yet so
>> the assert triggers. Impossible in your experience?
> 
> I have not seen anything to doubt that the CS interrupts, and more
> importantly here, the CS events are delayed. It's the CS event itself
> that we care about, and I think we are very safe in our assertion that
> it is written on the context switch prior to the breadcrumb in the
> second context.

Ugh.. yes.. direct call of the tasklet exactly has nothing to do with 
irq delivery. My confusion.

Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 106+ messages in thread

end of thread, other threads:[~2018-10-01 14:03 UTC | newest]

Thread overview: 106+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-09-19 19:55 [PATCH 01/40] drm: Use default dma_fence hooks where possible for null syncobj Chris Wilson
2018-09-19 19:55 ` [PATCH 02/40] drm: Fix syncobj handing of schedule() returning 0 Chris Wilson
2018-09-20 14:13   ` Tvrtko Ursulin
2018-09-19 19:55 ` [PATCH 03/40] drm/i915/selftests: Live tests emit requests and so require rpm Chris Wilson
2018-09-20 14:01   ` Tvrtko Ursulin
2018-09-20 14:47     ` Chris Wilson
2018-09-19 19:55 ` [PATCH 04/40] drm/i915: Park the GPU on module load Chris Wilson
2018-09-20 14:02   ` Tvrtko Ursulin
2018-09-20 14:52     ` Chris Wilson
2018-09-19 19:55 ` [PATCH 05/40] drm/i915: Handle incomplete Z_FINISH for compressed error states Chris Wilson
2018-09-19 19:55   ` Chris Wilson
2018-09-19 19:55 ` [PATCH 06/40] drm/i915: Clear the error PTE just once on finish Chris Wilson
2018-09-19 19:55 ` [PATCH 07/40] drm/i915: Cache the error string Chris Wilson
2018-09-19 19:55 ` [PATCH 08/40] drm/i915/execlists: Avoid kicking priority on the current context Chris Wilson
2018-09-19 19:55 ` [PATCH 09/40] drm/i915/selftests: Free the batch along the contexts error path Chris Wilson
2018-09-20  8:30   ` Mika Kuoppala
2018-09-20  8:36     ` Chris Wilson
2018-09-20  9:19       ` Mika Kuoppala
2018-09-19 19:55 ` [PATCH 10/40] drm/i915/selftests: Basic stress test for rapid context switching Chris Wilson
2018-09-20 10:38   ` Mika Kuoppala
2018-09-20 10:46     ` Chris Wilson
2018-09-19 19:55 ` [PATCH 11/40] drm/i915/execlists: Onion unwind for logical_ring_init() failure Chris Wilson
2018-09-20 14:18   ` Mika Kuoppala
2018-09-20 14:21   ` Tvrtko Ursulin
2018-09-20 19:59     ` Chris Wilson
2018-09-21 10:00       ` Tvrtko Ursulin
2018-09-21 10:01         ` Chris Wilson
2018-09-19 19:55 ` [PATCH 12/40] drm/i915/execlists: Assert the queue is non-empty on unsubmitting Chris Wilson
2018-09-24  9:07   ` Tvrtko Ursulin
2018-09-25  7:41     ` Chris Wilson
2018-09-25  8:51       ` Tvrtko Ursulin
2018-09-19 19:55 ` [PATCH 13/40] drm/i915: Reserve some priority bits for internal use Chris Wilson
2018-09-24  9:12   ` Tvrtko Ursulin
2018-09-19 19:55 ` [PATCH 14/40] drm/i915: Combine multiple internal plists into the same i915_priolist bucket Chris Wilson
2018-09-24 10:25   ` Tvrtko Ursulin
2018-09-25  7:55     ` Chris Wilson
2018-09-19 19:55 ` [PATCH 15/40] drm/i915: Priority boost for new clients Chris Wilson
2018-09-24 10:29   ` Tvrtko Ursulin
2018-09-25  8:01     ` Chris Wilson
2018-09-25  8:26       ` Chris Wilson
2018-09-25  8:57         ` Tvrtko Ursulin
2018-09-25  9:06           ` Chris Wilson
2018-09-25  9:08             ` Tvrtko Ursulin
2018-09-25 11:20         ` Michal Wajdeczko
2018-09-19 19:55 ` [PATCH 16/40] drm/i915: Pull scheduling under standalone lock Chris Wilson
2018-09-24 11:19   ` Tvrtko Ursulin
2018-09-25  8:19     ` Chris Wilson
2018-09-25  9:01       ` Tvrtko Ursulin
2018-09-25  9:10         ` Chris Wilson
2018-09-25  9:19           ` Tvrtko Ursulin
2018-09-19 19:55 ` [PATCH 17/40] drm/i915: Priority boost for waiting clients Chris Wilson
2018-09-24 11:29   ` Tvrtko Ursulin
2018-09-25  9:00     ` Chris Wilson
2018-09-25  9:07       ` Tvrtko Ursulin
2018-09-19 19:55 ` [PATCH 18/40] drm/i915: Report the number of closed vma held by each context in debugfs Chris Wilson
2018-09-24 11:57   ` Tvrtko Ursulin
2018-09-25 12:20     ` Chris Wilson
2018-09-19 19:55 ` [PATCH 19/40] drm/i915: Remove debugfs/i915_ppgtt_info Chris Wilson
2018-09-24 12:03   ` Tvrtko Ursulin
2018-09-19 19:55 ` [PATCH 20/40] drm/i915: Track all held rpm wakerefs Chris Wilson
2018-09-19 19:55 ` [PATCH 21/40] drm/i915: Markup paired operations on wakerefs Chris Wilson
2018-09-19 19:55 ` [PATCH 22/40] drm/i915: Syntatic sugar for using intel_runtime_pm Chris Wilson
2018-09-24 12:08   ` Tvrtko Ursulin
2018-09-19 19:55 ` [PATCH 23/40] drm/i915: Markup paired operations on display power domains Chris Wilson
2018-09-19 19:55 ` [PATCH 24/40] drm/i915: Track the wakeref used to initialise " Chris Wilson
2018-09-19 19:55 ` [PATCH 25/40] drm/i915/dp: Markup pps lock power well Chris Wilson
2018-09-19 19:55 ` [PATCH 26/40] drm/i915: Complain if hsw_get_pipe_config acquires the same power well twice Chris Wilson
2018-09-19 19:55 ` [PATCH 27/40] drm/i915: Mark up Ironlake ips with rpm wakerefs Chris Wilson
2018-09-19 19:55 ` [PATCH 28/40] drm/i915: Serialise concurrent calls to i915_gem_set_wedged() Chris Wilson
2018-09-19 19:55 ` [PATCH 29/40] drm/i915: Differentiate between ggtt->mutex and ppgtt->mutex Chris Wilson
2018-09-24 13:04   ` Tvrtko Ursulin
2018-09-19 19:55 ` [PATCH 30/40] drm/i915: Pull all the reset functionality together into i915_reset.c Chris Wilson
2018-09-19 19:55 ` [PATCH 31/40] drm/i915: Make all GPU resets atomic Chris Wilson
2018-09-19 19:55 ` [PATCH 32/40] drm/i915: Introduce the i915_user_extension_method Chris Wilson
2018-09-24 13:20   ` Tvrtko Ursulin
2018-09-19 19:55 ` [PATCH 33/40] drm/i915: Extend CREATE_CONTEXT to allow inheritance ala clone() Chris Wilson
2018-09-24 17:22   ` Tvrtko Ursulin
2018-09-19 19:55 ` [PATCH 34/40] drm/i915: Allow contexts to share a single timeline across all engines Chris Wilson
2018-09-25  8:45   ` Tvrtko Ursulin
2018-09-19 19:55 ` [PATCH 35/40] drm/i915: Fix I915_EXEC_RING_MASK Chris Wilson
2018-09-25  8:46   ` [Intel-gfx] " Tvrtko Ursulin
2018-09-19 19:55 ` [PATCH 36/40] drm/i915: Re-arrange execbuf so context is known before engine Chris Wilson
2018-09-19 19:55 ` [PATCH 37/40] drm/i915: Allow a context to define its set of engines Chris Wilson
2018-09-27 11:28   ` Tvrtko Ursulin
2018-09-28 20:22     ` Chris Wilson
2018-10-01  8:30       ` Tvrtko Ursulin
2018-09-19 19:55 ` [PATCH 38/40] drm/i915/execlists: Flush the CS events before unpinning Chris Wilson
2018-10-01 10:51   ` Tvrtko Ursulin
2018-10-01 11:06     ` Chris Wilson
2018-10-01 13:15       ` Tvrtko Ursulin
2018-10-01 13:26         ` Chris Wilson
2018-10-01 14:03           ` Tvrtko Ursulin
2018-09-19 19:55 ` [PATCH 39/40] drm/i915/execlists: Refactor out can_merge_rq() Chris Wilson
2018-09-27 11:32   ` Tvrtko Ursulin
2018-09-28 20:11     ` Chris Wilson
2018-10-01  8:14       ` Tvrtko Ursulin
2018-10-01  8:18         ` Chris Wilson
2018-10-01 10:18           ` Tvrtko Ursulin
2018-09-19 19:55 ` [PATCH 40/40] drm/i915: Load balancing across a virtual engine Chris Wilson
2018-10-01 11:37   ` Tvrtko Ursulin
2018-09-19 21:54 ` ✗ Fi.CI.CHECKPATCH: warning for series starting with [01/40] drm: Use default dma_fence hooks where possible for null syncobj Patchwork
2018-09-19 22:08 ` ✗ Fi.CI.SPARSE: " Patchwork
2018-09-19 22:17 ` ✗ Fi.CI.BAT: failure " Patchwork
2018-09-20 13:34 ` [PATCH 01/40] " Tvrtko Ursulin
2018-09-20 13:40   ` Chris Wilson
2018-09-20 13:54     ` Tvrtko Ursulin

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.