All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 01/28] drm/i915/selftests: Add the usual batch vma managements to st_workarounds
@ 2019-08-26  7:21 Chris Wilson
  2019-08-26  7:21 ` [PATCH 02/28] drm/i915/selftests: Remove accidental serialization between gpu_fill Chris Wilson
                   ` (29 more replies)
  0 siblings, 30 replies; 43+ messages in thread
From: Chris Wilson @ 2019-08-26  7:21 UTC (permalink / raw)
  To: intel-gfx; +Cc: Matthew Auld

To properly handle asynchronous migration of batch objects, we need to
couple the fences on the incoming batch into the request and should not
assume that they always start idle.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Matthew Auld <matthew.auld@intel.com>
---
 drivers/gpu/drm/i915/gt/selftest_workarounds.c | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

diff --git a/drivers/gpu/drm/i915/gt/selftest_workarounds.c b/drivers/gpu/drm/i915/gt/selftest_workarounds.c
index d06d68ac2a3b..999a98f00494 100644
--- a/drivers/gpu/drm/i915/gt/selftest_workarounds.c
+++ b/drivers/gpu/drm/i915/gt/selftest_workarounds.c
@@ -565,6 +565,14 @@ static int check_dirty_whitelist(struct i915_gem_context *ctx,
 				goto err_request;
 		}
 
+		i915_vma_lock(batch);
+		err = i915_request_await_object(rq, batch->obj, false);
+		if (err == 0)
+			err = i915_vma_move_to_active(batch, rq, 0);
+		i915_vma_unlock(batch);
+		if (err)
+			goto err_request;
+
 		err = engine->emit_bb_start(rq,
 					    batch->node.start, PAGE_SIZE,
 					    0);
@@ -850,6 +858,14 @@ static int scrub_whitelisted_registers(struct i915_gem_context *ctx,
 			goto err_request;
 	}
 
+	i915_vma_lock(batch);
+	err = i915_request_await_object(rq, batch->obj, false);
+	if (err == 0)
+		err = i915_vma_move_to_active(batch, rq, 0);
+	i915_vma_unlock(batch);
+	if (err)
+		goto err_request;
+
 	/* Perform the writes from an unprivileged "user" batch */
 	err = engine->emit_bb_start(rq, batch->node.start, 0, 0);
 
-- 
2.23.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 43+ messages in thread

* [PATCH 02/28] drm/i915/selftests: Remove accidental serialization between gpu_fill
  2019-08-26  7:21 [PATCH 01/28] drm/i915/selftests: Add the usual batch vma managements to st_workarounds Chris Wilson
@ 2019-08-26  7:21 ` Chris Wilson
  2019-08-27 15:54   ` Matthew Auld
  2019-08-26  7:21 ` [PATCH 03/28] drm/i915/selftests: Try to recycle context allocations Chris Wilson
                   ` (28 subsequent siblings)
  29 siblings, 1 reply; 43+ messages in thread
From: Chris Wilson @ 2019-08-26  7:21 UTC (permalink / raw)
  To: intel-gfx

Upon object creation for live_gem_contexts, we fill the object with
known scratch and flush it out of the CPU cache. Before performing the
GPU fill, we don't need to flush it again and so avoid serialising with
previous fills.

However, we do need some throttling on the internal interfaces if we do
not want to run out of memory!

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 .../drm/i915/gem/selftests/i915_gem_context.c | 83 ++++++++++++++++---
 1 file changed, 72 insertions(+), 11 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c
index 3adb60c2fd1f..758417d9f3c1 100644
--- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c
@@ -180,12 +180,6 @@ static int gpu_fill(struct intel_context *ce,
 	if (IS_ERR(vma))
 		return PTR_ERR(vma);
 
-	i915_gem_object_lock(obj);
-	err = i915_gem_object_set_to_gtt_domain(obj, true);
-	i915_gem_object_unlock(obj);
-	if (err)
-		return err;
-
 	err = i915_vma_pin(vma, 0, 0, PIN_HIGH | PIN_USER);
 	if (err)
 		return err;
@@ -343,6 +337,45 @@ static unsigned long max_dwords(struct drm_i915_gem_object *obj)
 	return npages / DW_PER_PAGE;
 }
 
+static void throttle_release(struct i915_request **q, int count)
+{
+	int i;
+
+	for (i = 0; i < count; i++) {
+		if (IS_ERR_OR_NULL(q[i]))
+			continue;
+
+		i915_request_put(fetch_and_zero(&q[i]));
+	}
+}
+
+static int throttle(struct intel_context *ce,
+		    struct i915_request **q, int count)
+{
+	int i;
+
+	if (!IS_ERR_OR_NULL(q[0])) {
+		if (i915_request_wait(q[0],
+				      I915_WAIT_INTERRUPTIBLE,
+				      MAX_SCHEDULE_TIMEOUT) < 0)
+			return -EINTR;
+
+		i915_request_put(q[0]);
+	}
+
+	for (i = 0; i < count - 1; i++)
+		q[i] = q[i + 1];
+
+	q[i] = intel_context_create_request(ce);
+	if (IS_ERR(q[i]))
+		return PTR_ERR(q[i]);
+
+	i915_request_get(q[i]);
+	i915_request_add(q[i]);
+
+	return 0;
+}
+
 static int igt_ctx_exec(void *arg)
 {
 	struct drm_i915_private *i915 = arg;
@@ -362,6 +395,7 @@ static int igt_ctx_exec(void *arg)
 	for_each_engine(engine, i915, id) {
 		struct drm_i915_gem_object *obj = NULL;
 		unsigned long ncontexts, ndwords, dw;
+		struct i915_request *tq[5] = {};
 		struct igt_live_test t;
 		struct drm_file *file;
 		IGT_TIMEOUT(end_time);
@@ -408,13 +442,18 @@ static int igt_ctx_exec(void *arg)
 			}
 
 			err = gpu_fill(ce, obj, dw);
-			intel_context_put(ce);
-
 			if (err) {
 				pr_err("Failed to fill dword %lu [%lu/%lu] with gpu (%s) in ctx %u [full-ppgtt? %s], err=%d\n",
 				       ndwords, dw, max_dwords(obj),
 				       engine->name, ctx->hw_id,
 				       yesno(!!ctx->vm), err);
+				intel_context_put(ce);
+				goto out_unlock;
+			}
+
+			err = throttle(ce, tq, ARRAY_SIZE(tq));
+			if (err) {
+				intel_context_put(ce);
 				goto out_unlock;
 			}
 
@@ -425,6 +464,8 @@ static int igt_ctx_exec(void *arg)
 
 			ndwords++;
 			ncontexts++;
+
+			intel_context_put(ce);
 		}
 
 		pr_info("Submitted %lu contexts to %s, filling %lu dwords\n",
@@ -443,6 +484,7 @@ static int igt_ctx_exec(void *arg)
 		}
 
 out_unlock:
+		throttle_release(tq, ARRAY_SIZE(tq));
 		if (igt_live_test_end(&t))
 			err = -EIO;
 		mutex_unlock(&i915->drm.struct_mutex);
@@ -460,6 +502,7 @@ static int igt_ctx_exec(void *arg)
 static int igt_shared_ctx_exec(void *arg)
 {
 	struct drm_i915_private *i915 = arg;
+	struct i915_request *tq[5] = {};
 	struct i915_gem_context *parent;
 	struct intel_engine_cs *engine;
 	enum intel_engine_id id;
@@ -532,14 +575,20 @@ static int igt_shared_ctx_exec(void *arg)
 			}
 
 			err = gpu_fill(ce, obj, dw);
-			intel_context_put(ce);
-			kernel_context_close(ctx);
-
 			if (err) {
 				pr_err("Failed to fill dword %lu [%lu/%lu] with gpu (%s) in ctx %u [full-ppgtt? %s], err=%d\n",
 				       ndwords, dw, max_dwords(obj),
 				       engine->name, ctx->hw_id,
 				       yesno(!!ctx->vm), err);
+				intel_context_put(ce);
+				kernel_context_close(ctx);
+				goto out_test;
+			}
+
+			err = throttle(ce, tq, ARRAY_SIZE(tq));
+			if (err) {
+				intel_context_put(ce);
+				kernel_context_close(ctx);
 				goto out_test;
 			}
 
@@ -550,6 +599,9 @@ static int igt_shared_ctx_exec(void *arg)
 
 			ndwords++;
 			ncontexts++;
+
+			intel_context_put(ce);
+			kernel_context_close(ctx);
 		}
 		pr_info("Submitted %lu contexts to %s, filling %lu dwords\n",
 			ncontexts, engine->name, ndwords);
@@ -571,6 +623,7 @@ static int igt_shared_ctx_exec(void *arg)
 		mutex_lock(&i915->drm.struct_mutex);
 	}
 out_test:
+	throttle_release(tq, ARRAY_SIZE(tq));
 	if (igt_live_test_end(&t))
 		err = -EIO;
 out_unlock:
@@ -1047,6 +1100,7 @@ static int igt_ctx_readonly(void *arg)
 {
 	struct drm_i915_private *i915 = arg;
 	struct drm_i915_gem_object *obj = NULL;
+	struct i915_request *tq[5] = {};
 	struct i915_address_space *vm;
 	struct i915_gem_context *ctx;
 	unsigned long idx, ndwords, dw;
@@ -1118,6 +1172,12 @@ static int igt_ctx_readonly(void *arg)
 				goto out_unlock;
 			}
 
+			err = throttle(ce, tq, ARRAY_SIZE(tq));
+			if (err) {
+				i915_gem_context_unlock_engines(ctx);
+				goto out_unlock;
+			}
+
 			if (++dw == max_dwords(obj)) {
 				obj = NULL;
 				dw = 0;
@@ -1148,6 +1208,7 @@ static int igt_ctx_readonly(void *arg)
 	}
 
 out_unlock:
+	throttle_release(tq, ARRAY_SIZE(tq));
 	if (igt_live_test_end(&t))
 		err = -EIO;
 	mutex_unlock(&i915->drm.struct_mutex);
-- 
2.23.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 43+ messages in thread

* [PATCH 03/28] drm/i915/selftests: Try to recycle context allocations
  2019-08-26  7:21 [PATCH 01/28] drm/i915/selftests: Add the usual batch vma managements to st_workarounds Chris Wilson
  2019-08-26  7:21 ` [PATCH 02/28] drm/i915/selftests: Remove accidental serialization between gpu_fill Chris Wilson
@ 2019-08-26  7:21 ` Chris Wilson
  2019-08-27 16:11   ` Matthew Auld
  2019-08-26  7:21 ` [PATCH 04/28] drm/i915/gtt: Downgrade Baytrail back to aliasing-ppgtt Chris Wilson
                   ` (27 subsequent siblings)
  29 siblings, 1 reply; 43+ messages in thread
From: Chris Wilson @ 2019-08-26  7:21 UTC (permalink / raw)
  To: intel-gfx

igt_ctx_exec allocates a new context for each iteration, keeping them
all allocated until the end. Instead, release the local ctx reference at
the end of each iteration, allowing ourselves to reap those if under
mempressure.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c
index 758417d9f3c1..065f5bad466b 100644
--- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c
@@ -424,7 +424,7 @@ static int igt_ctx_exec(void *arg)
 			struct i915_gem_context *ctx;
 			struct intel_context *ce;
 
-			ctx = live_context(i915, file);
+			ctx = kernel_context(i915);
 			if (IS_ERR(ctx)) {
 				err = PTR_ERR(ctx);
 				goto out_unlock;
@@ -437,6 +437,7 @@ static int igt_ctx_exec(void *arg)
 				if (IS_ERR(obj)) {
 					err = PTR_ERR(obj);
 					intel_context_put(ce);
+					kernel_context_close(ctx);
 					goto out_unlock;
 				}
 			}
@@ -448,12 +449,14 @@ static int igt_ctx_exec(void *arg)
 				       engine->name, ctx->hw_id,
 				       yesno(!!ctx->vm), err);
 				intel_context_put(ce);
+				kernel_context_close(ctx);
 				goto out_unlock;
 			}
 
 			err = throttle(ce, tq, ARRAY_SIZE(tq));
 			if (err) {
 				intel_context_put(ce);
+				kernel_context_close(ctx);
 				goto out_unlock;
 			}
 
@@ -466,6 +469,7 @@ static int igt_ctx_exec(void *arg)
 			ncontexts++;
 
 			intel_context_put(ce);
+			kernel_context_close(ctx);
 		}
 
 		pr_info("Submitted %lu contexts to %s, filling %lu dwords\n",
-- 
2.23.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 43+ messages in thread

* [PATCH 04/28] drm/i915/gtt: Downgrade Baytrail back to aliasing-ppgtt
  2019-08-26  7:21 [PATCH 01/28] drm/i915/selftests: Add the usual batch vma managements to st_workarounds Chris Wilson
  2019-08-26  7:21 ` [PATCH 02/28] drm/i915/selftests: Remove accidental serialization between gpu_fill Chris Wilson
  2019-08-26  7:21 ` [PATCH 03/28] drm/i915/selftests: Try to recycle context allocations Chris Wilson
@ 2019-08-26  7:21 ` Chris Wilson
  2019-08-26  7:21 ` [PATCH 05/28] drm/i915/gtt: Downgrade Cherryview " Chris Wilson
                   ` (26 subsequent siblings)
  29 siblings, 0 replies; 43+ messages in thread
From: Chris Wilson @ 2019-08-26  7:21 UTC (permalink / raw)
  To: intel-gfx

With the upcoming change in timing (dramatically reducing the latency
between manipulating the ppGTT and execution), no amount of tweaking
could save Baytrail, it would always fail to invalidate its TLB. Ville
was right, Baytrail is beyond hope.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/gt/intel_ringbuffer.c | 23 +++-------------------
 drivers/gpu/drm/i915/i915_pci.c            |  2 +-
 2 files changed, 4 insertions(+), 21 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_ringbuffer.c b/drivers/gpu/drm/i915/gt/intel_ringbuffer.c
index 601c16239fdf..fdddda75eb41 100644
--- a/drivers/gpu/drm/i915/gt/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/gt/intel_ringbuffer.c
@@ -1751,26 +1751,10 @@ static int switch_context(struct i915_request *rq)
 
 	if (vm) {
 		struct i915_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
-		int loops;
 
-		/*
-		 * Baytail takes a little more convincing that it really needs
-		 * to reload the PD between contexts. It is not just a little
-		 * longer, as adding more stalls after the load_pd_dir (i.e.
-		 * adding a long loop around flush_pd_dir) is not as effective
-		 * as reloading the PD umpteen times. 32 is derived from
-		 * experimentation (gem_exec_parallel/fds) and has no good
-		 * explanation.
-		 */
-		loops = 1;
-		if (engine->id == BCS0 && IS_VALLEYVIEW(engine->i915))
-			loops = 32;
-
-		do {
-			ret = load_pd_dir(rq, ppgtt);
-			if (ret)
-				goto err;
-		} while (--loops);
+		ret = load_pd_dir(rq, ppgtt);
+		if (ret)
+			return ret;
 
 		if (ppgtt->pd_dirty_engines & engine->mask) {
 			unwind_mm = engine->mask;
@@ -1832,7 +1816,6 @@ static int switch_context(struct i915_request *rq)
 err_mm:
 	if (unwind_mm)
 		i915_vm_to_ppgtt(vm)->pd_dirty_engines |= unwind_mm;
-err:
 	return ret;
 }
 
diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c
index 1974e4c78a43..4c120a2e92af 100644
--- a/drivers/gpu/drm/i915/i915_pci.c
+++ b/drivers/gpu/drm/i915/i915_pci.c
@@ -476,7 +476,7 @@ static const struct intel_device_info intel_valleyview_info = {
 	.has_rps = true,
 	.display.has_gmch = 1,
 	.display.has_hotplug = 1,
-	.ppgtt_type = INTEL_PPGTT_FULL,
+	.ppgtt_type = INTEL_PPGTT_ALIASING,
 	.ppgtt_size = 31,
 	.has_snoop = true,
 	.has_coherent_ggtt = false,
-- 
2.23.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 43+ messages in thread

* [PATCH 05/28] drm/i915/gtt: Downgrade Cherryview back to aliasing-ppgtt
  2019-08-26  7:21 [PATCH 01/28] drm/i915/selftests: Add the usual batch vma managements to st_workarounds Chris Wilson
                   ` (2 preceding siblings ...)
  2019-08-26  7:21 ` [PATCH 04/28] drm/i915/gtt: Downgrade Baytrail back to aliasing-ppgtt Chris Wilson
@ 2019-08-26  7:21 ` Chris Wilson
  2019-08-26  7:21 ` [PATCH 06/28] drm/i915: Protect our local workers against I915_FENCE_TIMEOUT Chris Wilson
                   ` (25 subsequent siblings)
  29 siblings, 0 replies; 43+ messages in thread
From: Chris Wilson @ 2019-08-26  7:21 UTC (permalink / raw)
  To: intel-gfx

With the upcoming change in timing (dramatically reducing the latency
between manipulating the ppGTT and execution), no amount of tweaking
could save Cherryview, it would always fail to invalidate its TLB.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/gt/intel_lrc.c | 69 +++++------------------------
 drivers/gpu/drm/i915/i915_pci.c     |  2 +-
 2 files changed, 11 insertions(+), 60 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
index d42584439f51..1a1d68d16fd9 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -1871,60 +1871,6 @@ static int gen8_emit_init_breadcrumb(struct i915_request *rq)
 	return 0;
 }
 
-static int emit_pdps(struct i915_request *rq)
-{
-	const struct intel_engine_cs * const engine = rq->engine;
-	struct i915_ppgtt * const ppgtt = i915_vm_to_ppgtt(rq->hw_context->vm);
-	int err, i;
-	u32 *cs;
-
-	GEM_BUG_ON(intel_vgpu_active(rq->i915));
-
-	/*
-	 * Beware ye of the dragons, this sequence is magic!
-	 *
-	 * Small changes to this sequence can cause anything from
-	 * GPU hangs to forcewake errors and machine lockups!
-	 */
-
-	/* Flush any residual operations from the context load */
-	err = engine->emit_flush(rq, EMIT_FLUSH);
-	if (err)
-		return err;
-
-	/* Magic required to prevent forcewake errors! */
-	err = engine->emit_flush(rq, EMIT_INVALIDATE);
-	if (err)
-		return err;
-
-	cs = intel_ring_begin(rq, 4 * GEN8_3LVL_PDPES + 2);
-	if (IS_ERR(cs))
-		return PTR_ERR(cs);
-
-	/* Ensure the LRI have landed before we invalidate & continue */
-	*cs++ = MI_LOAD_REGISTER_IMM(2 * GEN8_3LVL_PDPES) | MI_LRI_FORCE_POSTED;
-	for (i = GEN8_3LVL_PDPES; i--; ) {
-		const dma_addr_t pd_daddr = i915_page_dir_dma_addr(ppgtt, i);
-		u32 base = engine->mmio_base;
-
-		*cs++ = i915_mmio_reg_offset(GEN8_RING_PDP_UDW(base, i));
-		*cs++ = upper_32_bits(pd_daddr);
-		*cs++ = i915_mmio_reg_offset(GEN8_RING_PDP_LDW(base, i));
-		*cs++ = lower_32_bits(pd_daddr);
-	}
-	*cs++ = MI_NOOP;
-
-	intel_ring_advance(rq, cs);
-
-	/* Be doubly sure the LRI have landed before proceeding */
-	err = engine->emit_flush(rq, EMIT_FLUSH);
-	if (err)
-		return err;
-
-	/* Re-invalidate the TLB for luck */
-	return engine->emit_flush(rq, EMIT_INVALIDATE);
-}
-
 static int execlists_request_alloc(struct i915_request *request)
 {
 	int ret;
@@ -1947,10 +1893,7 @@ static int execlists_request_alloc(struct i915_request *request)
 	 */
 
 	/* Unconditionally invalidate GPU caches and TLBs. */
-	if (i915_vm_is_4lvl(request->hw_context->vm))
-		ret = request->engine->emit_flush(request, EMIT_INVALIDATE);
-	else
-		ret = emit_pdps(request);
+	ret = request->engine->emit_flush(request, EMIT_INVALIDATE);
 	if (ret)
 		return ret;
 
@@ -3149,12 +3092,20 @@ static u32 intel_lr_indirect_ctx_offset(struct intel_engine_cs *engine)
 	return indirect_ctx_offset;
 }
 
+static struct i915_ppgtt *vm_alias(struct i915_address_space *vm)
+{
+	if (i915_is_ggtt(vm))
+		return i915_vm_to_ggtt(vm)->alias;
+	else
+		return i915_vm_to_ppgtt(vm);
+}
+
 static void execlists_init_reg_state(u32 *regs,
 				     struct intel_context *ce,
 				     struct intel_engine_cs *engine,
 				     struct intel_ring *ring)
 {
-	struct i915_ppgtt *ppgtt = i915_vm_to_ppgtt(ce->vm);
+	struct i915_ppgtt *ppgtt = vm_alias(ce->vm);
 	bool rcs = engine->class == RENDER_CLASS;
 	u32 base = engine->mmio_base;
 
diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c
index 4c120a2e92af..df518199fbfa 100644
--- a/drivers/gpu/drm/i915/i915_pci.c
+++ b/drivers/gpu/drm/i915/i915_pci.c
@@ -570,7 +570,7 @@ static const struct intel_device_info intel_cherryview_info = {
 	.has_rps = true,
 	.has_logical_ring_contexts = 1,
 	.display.has_gmch = 1,
-	.ppgtt_type = INTEL_PPGTT_FULL,
+	.ppgtt_type = INTEL_PPGTT_ALIASING,
 	.ppgtt_size = 32,
 	.has_reset_engine = 1,
 	.has_snoop = true,
-- 
2.23.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 43+ messages in thread

* [PATCH 06/28] drm/i915: Protect our local workers against I915_FENCE_TIMEOUT
  2019-08-26  7:21 [PATCH 01/28] drm/i915/selftests: Add the usual batch vma managements to st_workarounds Chris Wilson
                   ` (3 preceding siblings ...)
  2019-08-26  7:21 ` [PATCH 05/28] drm/i915/gtt: Downgrade Cherryview " Chris Wilson
@ 2019-08-26  7:21 ` Chris Wilson
  2019-08-28 17:01   ` Matthew Auld
  2019-08-26  7:21 ` [PATCH 07/28] drm/i915: Force PD reload on any PD update Chris Wilson
                   ` (24 subsequent siblings)
  29 siblings, 1 reply; 43+ messages in thread
From: Chris Wilson @ 2019-08-26  7:21 UTC (permalink / raw)
  To: intel-gfx

Trust our own workers to not cause unnecessary delays and disable the
automatic timeout on their asynchronous fence waits. (Along the same
lines that we trust our own requests to complete eventually, if
necessary by force.)

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_request.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
index 18865ce04e13..754a78364a63 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -924,7 +924,7 @@ i915_request_await_dma_fence(struct i915_request *rq, struct dma_fence *fence)
 			ret = i915_request_await_request(rq, to_request(fence));
 		else
 			ret = i915_sw_fence_await_dma_fence(&rq->submit, fence,
-							    I915_FENCE_TIMEOUT,
+							    fence->context ? I915_FENCE_TIMEOUT : 0,
 							    I915_FENCE_GFP);
 		if (ret < 0)
 			return ret;
-- 
2.23.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 43+ messages in thread

* [PATCH 07/28] drm/i915: Force PD reload on any PD update
  2019-08-26  7:21 [PATCH 01/28] drm/i915/selftests: Add the usual batch vma managements to st_workarounds Chris Wilson
                   ` (4 preceding siblings ...)
  2019-08-26  7:21 ` [PATCH 06/28] drm/i915: Protect our local workers against I915_FENCE_TIMEOUT Chris Wilson
@ 2019-08-26  7:21 ` Chris Wilson
  2019-08-26  7:21 ` [PATCH 08/28] drm/i915: Replace obj->pin_global with obj->frontbuffer Chris Wilson
                   ` (23 subsequent siblings)
  29 siblings, 0 replies; 43+ messages in thread
From: Chris Wilson @ 2019-08-26  7:21 UTC (permalink / raw)
  To: intel-gfx

Use a serial to track address space updates as this is more robust than
a set of dirty flags stored on the vm (but actually may be shared by
multiple contexts). Bump the serial on any insertion in the vm, then
during request construction we can compare the current serial stashed
away inside the context against the current serial of the address space
and force a PD refresh as required.

The challenge wrt future async PD updates is making sure we flag the
change in address space prior to request construction.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/gem/i915_gem_context.c   |  1 +
 drivers/gpu/drm/i915/gt/intel_context_types.h |  1 +
 drivers/gpu/drm/i915/gt/intel_lrc.c           |  5 ++-
 drivers/gpu/drm/i915/gt/intel_ringbuffer.c    | 40 ++++++++-----------
 drivers/gpu/drm/i915/i915_gem_gtt.c           | 16 +-------
 drivers/gpu/drm/i915/i915_gem_gtt.h           |  2 +-
 drivers/gpu/drm/i915/i915_vma.c               |  3 ++
 7 files changed, 27 insertions(+), 41 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c
index 1f735ca9b173..d7c17a8dadff 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c
@@ -465,6 +465,7 @@ static void __apply_ppgtt(struct intel_context *ce, void *vm)
 {
 	i915_vm_put(ce->vm);
 	ce->vm = i915_vm_get(vm);
+	ce->vm_serial = 0;
 }
 
 static struct i915_address_space *
diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h b/drivers/gpu/drm/i915/gt/intel_context_types.h
index bf9cedfccbf0..0a610355affb 100644
--- a/drivers/gpu/drm/i915/gt/intel_context_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_context_types.h
@@ -55,6 +55,7 @@ struct intel_context {
 
 	unsigned long flags;
 #define CONTEXT_ALLOC_BIT 0
+	unsigned int vm_serial;
 
 	u32 *lrc_reg_state;
 	u64 lrc_desc;
diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
index 1a1d68d16fd9..c8ca75eb79df 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -1873,9 +1873,10 @@ static int gen8_emit_init_breadcrumb(struct i915_request *rq)
 
 static int execlists_request_alloc(struct i915_request *request)
 {
+	struct intel_context *ce = request->hw_context;
 	int ret;
 
-	GEM_BUG_ON(!intel_context_is_pinned(request->hw_context));
+	GEM_BUG_ON(!intel_context_is_pinned(ce));
 
 	/*
 	 * Flush enough space to reduce the likelihood of waiting after
@@ -1897,6 +1898,8 @@ static int execlists_request_alloc(struct i915_request *request)
 	if (ret)
 		return ret;
 
+	ce->vm_serial = ce->vm->serial;
+
 	request->reserved_space -= EXECLISTS_REQUEST_SIZE;
 	return 0;
 }
diff --git a/drivers/gpu/drm/i915/gt/intel_ringbuffer.c b/drivers/gpu/drm/i915/gt/intel_ringbuffer.c
index fdddda75eb41..d6e2f5b3dd62 100644
--- a/drivers/gpu/drm/i915/gt/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/gt/intel_ringbuffer.c
@@ -1742,28 +1742,23 @@ static int remap_l3(struct i915_request *rq)
 static int switch_context(struct i915_request *rq)
 {
 	struct intel_engine_cs *engine = rq->engine;
-	struct i915_address_space *vm = vm_alias(rq->hw_context);
-	unsigned int unwind_mm = 0;
+	struct intel_context *ce = rq->hw_context;
+	struct i915_address_space *vm = vm_alias(ce);
 	u32 hw_flags = 0;
 	int ret;
 
 	GEM_BUG_ON(HAS_EXECLISTS(rq->i915));
 
 	if (vm) {
-		struct i915_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
-
-		ret = load_pd_dir(rq, ppgtt);
+		ret = load_pd_dir(rq, i915_vm_to_ppgtt(vm));
 		if (ret)
 			return ret;
 
-		if (ppgtt->pd_dirty_engines & engine->mask) {
-			unwind_mm = engine->mask;
-			ppgtt->pd_dirty_engines &= ~unwind_mm;
+		if (vm->serial != ce->vm_serial)
 			hw_flags = MI_FORCE_RESTORE;
-		}
 	}
 
-	if (rq->hw_context->state) {
+	if (ce->state) {
 		GEM_BUG_ON(engine->id != RCS0);
 
 		/*
@@ -1778,17 +1773,21 @@ static int switch_context(struct i915_request *rq)
 
 		ret = mi_set_context(rq, hw_flags);
 		if (ret)
-			goto err_mm;
+			return ret;
 	}
 
+	ret = remap_l3(rq);
+	if (ret)
+		return ret;
+
 	if (vm) {
 		ret = engine->emit_flush(rq, EMIT_INVALIDATE);
 		if (ret)
-			goto err_mm;
+			return ret;
 
 		ret = flush_pd_dir(rq);
 		if (ret)
-			goto err_mm;
+			return ret;
 
 		/*
 		 * Not only do we need a full barrier (post-sync write) after
@@ -1800,23 +1799,16 @@ static int switch_context(struct i915_request *rq)
 		 */
 		ret = engine->emit_flush(rq, EMIT_INVALIDATE);
 		if (ret)
-			goto err_mm;
+			return ret;
 
 		ret = engine->emit_flush(rq, EMIT_FLUSH);
 		if (ret)
-			goto err_mm;
-	}
+			return ret;
 
-	ret = remap_l3(rq);
-	if (ret)
-		goto err_mm;
+		ce->vm_serial = vm->serial;
+	}
 
 	return 0;
-
-err_mm:
-	if (unwind_mm)
-		i915_vm_to_ppgtt(vm)->pd_dirty_engines |= unwind_mm;
-	return ret;
 }
 
 static int ring_request_alloc(struct i915_request *request)
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 0db82921fb38..86979f391dd0 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -823,17 +823,6 @@ release_pd_entry(struct i915_page_directory * const pd,
 	return free;
 }
 
-/*
- * PDE TLBs are a pain to invalidate on GEN8+. When we modify
- * the page table structures, we mark them dirty so that
- * context switching/execlist queuing code takes extra steps
- * to ensure that tlbs are flushed.
- */
-static void mark_tlbs_dirty(struct i915_ppgtt *ppgtt)
-{
-	ppgtt->pd_dirty_engines = ALL_ENGINES;
-}
-
 static void gen8_ppgtt_notify_vgt(struct i915_ppgtt *ppgtt, bool create)
 {
 	struct drm_i915_private *dev_priv = ppgtt->vm.i915;
@@ -1735,10 +1724,8 @@ static int gen6_alloc_va_range(struct i915_address_space *vm,
 	}
 	spin_unlock(&pd->lock);
 
-	if (flush) {
-		mark_tlbs_dirty(&ppgtt->base);
+	if (flush)
 		gen6_ggtt_invalidate(vm->gt->ggtt);
-	}
 
 	goto out;
 
@@ -1833,7 +1820,6 @@ static int pd_vma_bind(struct i915_vma *vma,
 	gen6_for_all_pdes(pt, ppgtt->base.pd, pde)
 		gen6_write_pde(ppgtt, pde, pt);
 
-	mark_tlbs_dirty(&ppgtt->base);
 	gen6_ggtt_invalidate(ggtt);
 
 	return 0;
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h
index b97a47fc7a68..dcc3d4e88a45 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.h
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.h
@@ -305,6 +305,7 @@ struct i915_address_space {
 	u64 total;		/* size addr space maps (ex. 2GB for ggtt) */
 	u64 reserved;		/* size addr space reserved */
 
+	unsigned int serial;
 	bool closed;
 
 	struct mutex mutex; /* protects vma and our lists */
@@ -422,7 +423,6 @@ struct i915_ggtt {
 struct i915_ppgtt {
 	struct i915_address_space vm;
 
-	intel_engine_mask_t pd_dirty_engines;
 	struct i915_page_directory *pd;
 };
 
diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
index e0e677b2a3a9..ddb03cbcbf60 100644
--- a/drivers/gpu/drm/i915/i915_vma.c
+++ b/drivers/gpu/drm/i915/i915_vma.c
@@ -340,6 +340,9 @@ int i915_vma_bind(struct i915_vma *vma, enum i915_cache_level cache_level,
 	if (ret)
 		return ret;
 
+	/* Must be incremented prior to request construction */
+	vma->vm->serial++;
+
 	vma->flags |= bind_flags;
 	return 0;
 }
-- 
2.23.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 43+ messages in thread

* [PATCH 08/28] drm/i915: Replace obj->pin_global with obj->frontbuffer
  2019-08-26  7:21 [PATCH 01/28] drm/i915/selftests: Add the usual batch vma managements to st_workarounds Chris Wilson
                   ` (5 preceding siblings ...)
  2019-08-26  7:21 ` [PATCH 07/28] drm/i915: Force PD reload on any PD update Chris Wilson
@ 2019-08-26  7:21 ` Chris Wilson
  2019-08-26  7:21 ` [PATCH 09/28] dma-fence: Serialise signal enabling (dma_fence_enable_sw_signaling) Chris Wilson
                   ` (22 subsequent siblings)
  29 siblings, 0 replies; 43+ messages in thread
From: Chris Wilson @ 2019-08-26  7:21 UTC (permalink / raw)
  To: intel-gfx

obj->pin_global was original used as a means to keep the shrinker off
the active scanout, but we use the vma->pin_count itself for that and
the obj->frontbuffer to delay shrinking active framebuffers. The other
role that obj->pin_global gained was for spotting display objects inside
GEM and working harder to keep those coherent; for which we can again
simply inspect obj->frontbuffer directly.

Coming up next, we will want to manipulate the pin_global counter
outside of the principle locks, so would need to make pin_global atomic.
However, since obj->frontbuffer is already managed atomically, it makes
sense to use that the primary key for display objects instead of having
pin_global.

Ville pointed out the principle difference is that obj->frontbuffer is
set for as long as an intel_framebuffer is attached to an object, but
obj->pin_global was only raised for as long as the object was active. In
practice, this means that we consider the object as being on the scanout
for longer than is strictly required, causing us to be more proactive in
flushing -- though it should be true that we would have flushed
eventually when the back became the front, except that on the flip path
that flush is async but when hit from another ioctl it will be
synchronous.

v2: i915_gem_object_is_framebuffer()

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Cc: Ville Syrjälä <ville.syrjala@linux.intel.com>
---
 .../gpu/drm/i915/display/intel_frontbuffer.c  | 13 +++++--
 drivers/gpu/drm/i915/gem/i915_gem_domain.c    | 34 ++++++-------------
 drivers/gpu/drm/i915/gem/i915_gem_object.h    |  3 +-
 .../gpu/drm/i915/gem/i915_gem_object_types.h  |  2 --
 drivers/gpu/drm/i915/gem/i915_gem_shrinker.c  | 15 +++-----
 drivers/gpu/drm/i915/i915_debugfs.c           | 12 ++-----
 6 files changed, 29 insertions(+), 50 deletions(-)

diff --git a/drivers/gpu/drm/i915/display/intel_frontbuffer.c b/drivers/gpu/drm/i915/display/intel_frontbuffer.c
index 719379774fa5..fc40dc1fdbcc 100644
--- a/drivers/gpu/drm/i915/display/intel_frontbuffer.c
+++ b/drivers/gpu/drm/i915/display/intel_frontbuffer.c
@@ -220,11 +220,18 @@ static void frontbuffer_release(struct kref *ref)
 {
 	struct intel_frontbuffer *front =
 		container_of(ref, typeof(*front), ref);
+	struct drm_i915_gem_object *obj = front->obj;
+	struct i915_vma *vma;
 
-	front->obj->frontbuffer = NULL;
-	spin_unlock(&to_i915(front->obj->base.dev)->fb_tracking.lock);
+	spin_lock(&obj->vma.lock);
+	for_each_ggtt_vma(vma, obj)
+		vma->display_alignment = I915_GTT_MIN_ALIGNMENT;
+	spin_unlock(&obj->vma.lock);
 
-	i915_gem_object_put(front->obj);
+	obj->frontbuffer = NULL;
+	spin_unlock(&to_i915(obj->base.dev)->fb_tracking.lock);
+
+	i915_gem_object_put(obj);
 	kfree(front);
 }
 
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_domain.c b/drivers/gpu/drm/i915/gem/i915_gem_domain.c
index 9c58e8fac1d9..6af740a5e3db 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_domain.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_domain.c
@@ -27,7 +27,7 @@ static void __i915_gem_object_flush_for_display(struct drm_i915_gem_object *obj)
 
 void i915_gem_object_flush_if_display(struct drm_i915_gem_object *obj)
 {
-	if (!READ_ONCE(obj->pin_global))
+	if (!i915_gem_object_is_framebuffer(obj))
 		return;
 
 	i915_gem_object_lock(obj);
@@ -422,12 +422,8 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
 
 	assert_object_held(obj);
 
-	/* Mark the global pin early so that we account for the
-	 * display coherency whilst setting up the cache domains.
-	 */
-	obj->pin_global++;
-
-	/* The display engine is not coherent with the LLC cache on gen6.  As
+	/*
+	 * The display engine is not coherent with the LLC cache on gen6.  As
 	 * a result, we make sure that the pinning that is about to occur is
 	 * done with uncached PTEs. This is lowest common denominator for all
 	 * chipsets.
@@ -439,12 +435,11 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
 	ret = i915_gem_object_set_cache_level(obj,
 					      HAS_WT(to_i915(obj->base.dev)) ?
 					      I915_CACHE_WT : I915_CACHE_NONE);
-	if (ret) {
-		vma = ERR_PTR(ret);
-		goto err_unpin_global;
-	}
+	if (ret)
+		return ERR_PTR(ret);
 
-	/* As the user may map the buffer once pinned in the display plane
+	/*
+	 * As the user may map the buffer once pinned in the display plane
 	 * (e.g. libkms for the bootup splash), we have to ensure that we
 	 * always use map_and_fenceable for all scanout buffers. However,
 	 * it may simply be too big to fit into mappable, in which case
@@ -461,22 +456,19 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
 	if (IS_ERR(vma))
 		vma = i915_gem_object_ggtt_pin(obj, view, 0, alignment, flags);
 	if (IS_ERR(vma))
-		goto err_unpin_global;
+		return vma;
 
 	vma->display_alignment = max_t(u64, vma->display_alignment, alignment);
 
 	__i915_gem_object_flush_for_display(obj);
 
-	/* It should now be out of any other write domains, and we can update
+	/*
+	 * It should now be out of any other write domains, and we can update
 	 * the domain values for our changes.
 	 */
 	obj->read_domains |= I915_GEM_DOMAIN_GTT;
 
 	return vma;
-
-err_unpin_global:
-	obj->pin_global--;
-	return vma;
 }
 
 static void i915_gem_object_bump_inactive_ggtt(struct drm_i915_gem_object *obj)
@@ -514,12 +506,6 @@ i915_gem_object_unpin_from_display_plane(struct i915_vma *vma)
 
 	assert_object_held(obj);
 
-	if (WARN_ON(obj->pin_global == 0))
-		return;
-
-	if (--obj->pin_global == 0)
-		vma->display_alignment = I915_GTT_MIN_ALIGNMENT;
-
 	/* Bump the LRU to try and avoid premature eviction whilst flipping  */
 	i915_gem_object_bump_inactive_ggtt(obj);
 
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.h b/drivers/gpu/drm/i915/gem/i915_gem_object.h
index 5efb9936e05b..29b9eddc4c7f 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object.h
@@ -406,7 +406,8 @@ static inline bool cpu_write_needs_clflush(struct drm_i915_gem_object *obj)
 	if (!(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE))
 		return true;
 
-	return obj->pin_global; /* currently in use by HW, keep flushed */
+	/* Currently in use by HW (display engine)? Keep flushed. */
+	return i915_gem_object_is_framebuffer(obj);
 }
 
 static inline void __start_cpu_write(struct drm_i915_gem_object *obj)
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
index ede0eb4218a8..13b9dc0e1a89 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
@@ -152,8 +152,6 @@ struct drm_i915_gem_object {
 
 	/** Count of VMA actually bound by this object */
 	atomic_t bind_count;
-	/** Count of how many global VMA are currently pinned for use by HW */
-	unsigned int pin_global;
 
 	struct {
 		struct mutex lock; /* protects the pages and their use */
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c b/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c
index edd21d14e64f..4e55cfc2b0dc 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c
@@ -61,7 +61,8 @@ static bool can_release_pages(struct drm_i915_gem_object *obj)
 	if (!i915_gem_object_is_shrinkable(obj))
 		return false;
 
-	/* Only report true if by unbinding the object and putting its pages
+	/*
+	 * Only report true if by unbinding the object and putting its pages
 	 * we can actually make forward progress towards freeing physical
 	 * pages.
 	 *
@@ -72,16 +73,8 @@ static bool can_release_pages(struct drm_i915_gem_object *obj)
 	if (atomic_read(&obj->mm.pages_pin_count) > atomic_read(&obj->bind_count))
 		return false;
 
-	/* If any vma are "permanently" pinned, it will prevent us from
-	 * reclaiming the obj->mm.pages. We only allow scanout objects to claim
-	 * a permanent pin, along with a few others like the context objects.
-	 * To simplify the scan, and to avoid walking the list of vma under the
-	 * object, we just check the count of its permanently pinned.
-	 */
-	if (READ_ONCE(obj->pin_global))
-		return false;
-
-	/* We can only return physical pages to the system if we can either
+	/*
+	 * We can only return physical pages to the system if we can either
 	 * discard the contents (because the user has marked them as being
 	 * purgeable) or if we can move their contents out to swap.
 	 */
diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index 5c1a2b1e7d34..27acedd89785 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -77,11 +77,6 @@ static int i915_capabilities(struct seq_file *m, void *data)
 	return 0;
 }
 
-static char get_pin_flag(struct drm_i915_gem_object *obj)
-{
-	return obj->pin_global ? 'p' : ' ';
-}
-
 static char get_tiling_flag(struct drm_i915_gem_object *obj)
 {
 	switch (i915_gem_object_get_tiling(obj)) {
@@ -140,9 +135,8 @@ describe_obj(struct seq_file *m, struct drm_i915_gem_object *obj)
 	struct i915_vma *vma;
 	int pin_count = 0;
 
-	seq_printf(m, "%pK: %c%c%c%c %8zdKiB %02x %02x %s%s%s",
+	seq_printf(m, "%pK: %c%c%c %8zdKiB %02x %02x %s%s%s",
 		   &obj->base,
-		   get_pin_flag(obj),
 		   get_tiling_flag(obj),
 		   get_global_flag(obj),
 		   get_pin_mapped_flag(obj),
@@ -221,8 +215,8 @@ describe_obj(struct seq_file *m, struct drm_i915_gem_object *obj)
 	seq_printf(m, " (pinned x %d)", pin_count);
 	if (obj->stolen)
 		seq_printf(m, " (stolen: %08llx)", obj->stolen->start);
-	if (obj->pin_global)
-		seq_printf(m, " (global)");
+	if (i915_gem_object_is_framebuffer(obj))
+		seq_printf(m, " (fb)");
 
 	engine = i915_gem_object_last_write_engine(obj);
 	if (engine)
-- 
2.23.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 43+ messages in thread

* [PATCH 09/28] dma-fence: Serialise signal enabling (dma_fence_enable_sw_signaling)
  2019-08-26  7:21 [PATCH 01/28] drm/i915/selftests: Add the usual batch vma managements to st_workarounds Chris Wilson
                   ` (6 preceding siblings ...)
  2019-08-26  7:21 ` [PATCH 08/28] drm/i915: Replace obj->pin_global with obj->frontbuffer Chris Wilson
@ 2019-08-26  7:21 ` Chris Wilson
  2019-08-26  7:21 ` [PATCH 10/28] drm/mm: Pack allocated/scanned boolean into a bitfield Chris Wilson
                   ` (21 subsequent siblings)
  29 siblings, 0 replies; 43+ messages in thread
From: Chris Wilson @ 2019-08-26  7:21 UTC (permalink / raw)
  To: intel-gfx

Make dma_fence_enable_sw_signaling() behave like its
dma_fence_add_callback() and dma_fence_default_wait() counterparts and
perform the test to enable signaling under the fence->lock, along with
the action to do so. This ensure that should an implementation be trying
to flush the cb_list (by signaling) on retirement before freeing the
fence, it can do so in a race-free manner.

See also 0fc89b6802ba ("dma-fence: Simply wrap dma_fence_signal_locked
with dma_fence_signal").

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/dma-buf/dma-fence.c | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/drivers/dma-buf/dma-fence.c b/drivers/dma-buf/dma-fence.c
index 2c136aee3e79..587727089134 100644
--- a/drivers/dma-buf/dma-fence.c
+++ b/drivers/dma-buf/dma-fence.c
@@ -285,19 +285,18 @@ void dma_fence_enable_sw_signaling(struct dma_fence *fence)
 {
 	unsigned long flags;
 
+	if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags))
+		return;
+
+	spin_lock_irqsave(fence->lock, flags);
 	if (!test_and_set_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT,
 			      &fence->flags) &&
-	    !test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags) &&
 	    fence->ops->enable_signaling) {
 		trace_dma_fence_enable_signal(fence);
-
-		spin_lock_irqsave(fence->lock, flags);
-
 		if (!fence->ops->enable_signaling(fence))
 			dma_fence_signal_locked(fence);
-
-		spin_unlock_irqrestore(fence->lock, flags);
 	}
+	spin_unlock_irqrestore(fence->lock, flags);
 }
 EXPORT_SYMBOL(dma_fence_enable_sw_signaling);
 
-- 
2.23.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 43+ messages in thread

* [PATCH 10/28] drm/mm: Pack allocated/scanned boolean into a bitfield
  2019-08-26  7:21 [PATCH 01/28] drm/i915/selftests: Add the usual batch vma managements to st_workarounds Chris Wilson
                   ` (7 preceding siblings ...)
  2019-08-26  7:21 ` [PATCH 09/28] dma-fence: Serialise signal enabling (dma_fence_enable_sw_signaling) Chris Wilson
@ 2019-08-26  7:21 ` Chris Wilson
  2019-08-27  7:12   ` kbuild test robot
  2019-08-26  7:21 ` [PATCH 11/28] drm/i915: Make shrink/unshrink be atomic Chris Wilson
                   ` (20 subsequent siblings)
  29 siblings, 1 reply; 43+ messages in thread
From: Chris Wilson @ 2019-08-26  7:21 UTC (permalink / raw)
  To: intel-gfx

The ulterior motive to switching the booleans over to bitops is to
allow use of the allocated flag as a bitlock.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/drm_mm.c                      | 36 +++++++++++--------
 .../gpu/drm/i915/gem/i915_gem_execbuffer.c    |  6 ++--
 drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c      |  2 +-
 drivers/gpu/drm/i915/i915_gem.c               | 16 ++++-----
 drivers/gpu/drm/i915/i915_gem_evict.c         |  2 +-
 drivers/gpu/drm/i915/i915_gem_gtt.c           |  2 +-
 drivers/gpu/drm/i915/i915_vma.c               |  4 +--
 drivers/gpu/drm/i915/i915_vma.h               |  2 +-
 drivers/gpu/drm/selftests/test-drm_mm.c       | 14 ++++----
 include/drm/drm_mm.h                          |  7 ++--
 10 files changed, 49 insertions(+), 42 deletions(-)

diff --git a/drivers/gpu/drm/drm_mm.c b/drivers/gpu/drm/drm_mm.c
index 4581c5387372..211967006cec 100644
--- a/drivers/gpu/drm/drm_mm.c
+++ b/drivers/gpu/drm/drm_mm.c
@@ -174,7 +174,7 @@ static void drm_mm_interval_tree_add_node(struct drm_mm_node *hole_node,
 
 	node->__subtree_last = LAST(node);
 
-	if (hole_node->allocated) {
+	if (drm_mm_node_allocated(hole_node)) {
 		rb = &hole_node->rb;
 		while (rb) {
 			parent = rb_entry(rb, struct drm_mm_node, rb);
@@ -424,9 +424,9 @@ int drm_mm_reserve_node(struct drm_mm *mm, struct drm_mm_node *node)
 
 	node->mm = mm;
 
+	__set_bit(DRM_MM_NODE_ALLOCATED_BIT, &node->flags);
 	list_add(&node->node_list, &hole->node_list);
 	drm_mm_interval_tree_add_node(hole, node);
-	node->allocated = true;
 	node->hole_size = 0;
 
 	rm_hole(hole);
@@ -543,9 +543,9 @@ int drm_mm_insert_node_in_range(struct drm_mm * const mm,
 		node->color = color;
 		node->hole_size = 0;
 
+		__set_bit(DRM_MM_NODE_ALLOCATED_BIT, &node->flags);
 		list_add(&node->node_list, &hole->node_list);
 		drm_mm_interval_tree_add_node(hole, node);
-		node->allocated = true;
 
 		rm_hole(hole);
 		if (adj_start > hole_start)
@@ -561,6 +561,11 @@ int drm_mm_insert_node_in_range(struct drm_mm * const mm,
 }
 EXPORT_SYMBOL(drm_mm_insert_node_in_range);
 
+static inline bool drm_mm_node_scanned_block(const struct drm_mm_node *node)
+{
+	return test_bit(DRM_MM_NODE_SCANNED_BIT, &node->flags);
+}
+
 /**
  * drm_mm_remove_node - Remove a memory node from the allocator.
  * @node: drm_mm_node to remove
@@ -574,8 +579,8 @@ void drm_mm_remove_node(struct drm_mm_node *node)
 	struct drm_mm *mm = node->mm;
 	struct drm_mm_node *prev_node;
 
-	DRM_MM_BUG_ON(!node->allocated);
-	DRM_MM_BUG_ON(node->scanned_block);
+	DRM_MM_BUG_ON(!drm_mm_node_allocated(node));
+	DRM_MM_BUG_ON(drm_mm_node_scanned_block(node));
 
 	prev_node = list_prev_entry(node, node_list);
 
@@ -584,11 +589,12 @@ void drm_mm_remove_node(struct drm_mm_node *node)
 
 	drm_mm_interval_tree_remove(node, &mm->interval_tree);
 	list_del(&node->node_list);
-	node->allocated = false;
 
 	if (drm_mm_hole_follows(prev_node))
 		rm_hole(prev_node);
 	add_hole(prev_node);
+
+	clear_bit_unlock(DRM_MM_NODE_ALLOCATED_BIT, &node->flags);
 }
 EXPORT_SYMBOL(drm_mm_remove_node);
 
@@ -605,7 +611,7 @@ void drm_mm_replace_node(struct drm_mm_node *old, struct drm_mm_node *new)
 {
 	struct drm_mm *mm = old->mm;
 
-	DRM_MM_BUG_ON(!old->allocated);
+	DRM_MM_BUG_ON(!drm_mm_node_allocated(old));
 
 	*new = *old;
 
@@ -622,8 +628,7 @@ void drm_mm_replace_node(struct drm_mm_node *old, struct drm_mm_node *new)
 				&mm->holes_addr);
 	}
 
-	old->allocated = false;
-	new->allocated = true;
+	clear_bit_unlock(DRM_MM_NODE_ALLOCATED_BIT, &old->flags);
 }
 EXPORT_SYMBOL(drm_mm_replace_node);
 
@@ -731,9 +736,9 @@ bool drm_mm_scan_add_block(struct drm_mm_scan *scan,
 	u64 adj_start, adj_end;
 
 	DRM_MM_BUG_ON(node->mm != mm);
-	DRM_MM_BUG_ON(!node->allocated);
-	DRM_MM_BUG_ON(node->scanned_block);
-	node->scanned_block = true;
+	DRM_MM_BUG_ON(!drm_mm_node_allocated(node));
+	DRM_MM_BUG_ON(drm_mm_node_scanned_block(node));
+	__set_bit(DRM_MM_NODE_SCANNED_BIT, &node->flags);
 	mm->scan_active++;
 
 	/* Remove this block from the node_list so that we enlarge the hole
@@ -818,8 +823,7 @@ bool drm_mm_scan_remove_block(struct drm_mm_scan *scan,
 	struct drm_mm_node *prev_node;
 
 	DRM_MM_BUG_ON(node->mm != scan->mm);
-	DRM_MM_BUG_ON(!node->scanned_block);
-	node->scanned_block = false;
+	DRM_MM_BUG_ON(!drm_mm_node_scanned_block(node));
 
 	DRM_MM_BUG_ON(!node->mm->scan_active);
 	node->mm->scan_active--;
@@ -837,6 +841,8 @@ bool drm_mm_scan_remove_block(struct drm_mm_scan *scan,
 		      list_next_entry(node, node_list));
 	list_add(&node->node_list, &prev_node->node_list);
 
+	__clear_bit(DRM_MM_NODE_SCANNED_BIT, &node->flags);
+
 	return (node->start + node->size > scan->hit_start &&
 		node->start < scan->hit_end);
 }
@@ -917,7 +923,7 @@ void drm_mm_init(struct drm_mm *mm, u64 start, u64 size)
 
 	/* Clever trick to avoid a special case in the free hole tracking. */
 	INIT_LIST_HEAD(&mm->head_node.node_list);
-	mm->head_node.allocated = false;
+	mm->head_node.flags = 0;
 	mm->head_node.mm = mm;
 	mm->head_node.start = start + size;
 	mm->head_node.size = -size;
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
index b5f6937369ea..493f07806b08 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
@@ -902,7 +902,7 @@ static void reloc_cache_init(struct reloc_cache *cache,
 	cache->use_64bit_reloc = HAS_64BIT_RELOC(i915);
 	cache->has_fence = cache->gen < 4;
 	cache->needs_unfenced = INTEL_INFO(i915)->unfenced_needs_alignment;
-	cache->node.allocated = false;
+	cache->node.flags = 0;
 	cache->rq = NULL;
 	cache->rq_size = 0;
 }
@@ -963,7 +963,7 @@ static void reloc_cache_reset(struct reloc_cache *cache)
 		intel_gt_flush_ggtt_writes(ggtt->vm.gt);
 		io_mapping_unmap_atomic((void __iomem *)vaddr);
 
-		if (cache->node.allocated) {
+		if (drm_mm_node_allocated(&cache->node)) {
 			ggtt->vm.clear_range(&ggtt->vm,
 					     cache->node.start,
 					     cache->node.size);
@@ -1056,7 +1056,7 @@ static void *reloc_iomap(struct drm_i915_gem_object *obj,
 	}
 
 	offset = cache->node.start;
-	if (cache->node.allocated) {
+	if (drm_mm_node_allocated(&cache->node)) {
 		ggtt->vm.insert_page(&ggtt->vm,
 				     i915_gem_object_get_dma_address(obj, page),
 				     offset, I915_CACHE_NONE, 0);
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c
index 296a82603be0..07fc6f28abcd 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c
@@ -401,7 +401,7 @@ static u32 uc_fw_ggtt_offset(struct intel_uc_fw *uc_fw, struct i915_ggtt *ggtt)
 {
 	struct drm_mm_node *node = &ggtt->uc_fw;
 
-	GEM_BUG_ON(!node->allocated);
+	GEM_BUG_ON(!drm_mm_node_allocated(node));
 	GEM_BUG_ON(upper_32_bits(node->start));
 	GEM_BUG_ON(upper_32_bits(node->start + node->size - 1));
 
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 95e7c52cf8ed..814f62fca727 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -351,12 +351,12 @@ i915_gem_gtt_pread(struct drm_i915_gem_object *obj,
 					       PIN_NOEVICT);
 	if (!IS_ERR(vma)) {
 		node.start = i915_ggtt_offset(vma);
-		node.allocated = false;
+		node.flags = 0;
 	} else {
 		ret = insert_mappable_node(ggtt, &node, PAGE_SIZE);
 		if (ret)
 			goto out_unlock;
-		GEM_BUG_ON(!node.allocated);
+		GEM_BUG_ON(!drm_mm_node_allocated(&node));
 	}
 
 	mutex_unlock(&i915->drm.struct_mutex);
@@ -393,7 +393,7 @@ i915_gem_gtt_pread(struct drm_i915_gem_object *obj,
 		unsigned page_offset = offset_in_page(offset);
 		unsigned page_length = PAGE_SIZE - page_offset;
 		page_length = remain < page_length ? remain : page_length;
-		if (node.allocated) {
+		if (drm_mm_node_allocated(&node)) {
 			ggtt->vm.insert_page(&ggtt->vm,
 					     i915_gem_object_get_dma_address(obj, offset >> PAGE_SHIFT),
 					     node.start, I915_CACHE_NONE, 0);
@@ -415,7 +415,7 @@ i915_gem_gtt_pread(struct drm_i915_gem_object *obj,
 	i915_gem_object_unlock_fence(obj, fence);
 out_unpin:
 	mutex_lock(&i915->drm.struct_mutex);
-	if (node.allocated) {
+	if (drm_mm_node_allocated(&node)) {
 		ggtt->vm.clear_range(&ggtt->vm, node.start, node.size);
 		remove_mappable_node(&node);
 	} else {
@@ -561,12 +561,12 @@ i915_gem_gtt_pwrite_fast(struct drm_i915_gem_object *obj,
 					       PIN_NOEVICT);
 	if (!IS_ERR(vma)) {
 		node.start = i915_ggtt_offset(vma);
-		node.allocated = false;
+		node.flags = 0;
 	} else {
 		ret = insert_mappable_node(ggtt, &node, PAGE_SIZE);
 		if (ret)
 			goto out_rpm;
-		GEM_BUG_ON(!node.allocated);
+		GEM_BUG_ON(!drm_mm_node_allocated(&node));
 	}
 
 	mutex_unlock(&i915->drm.struct_mutex);
@@ -604,7 +604,7 @@ i915_gem_gtt_pwrite_fast(struct drm_i915_gem_object *obj,
 		unsigned int page_offset = offset_in_page(offset);
 		unsigned int page_length = PAGE_SIZE - page_offset;
 		page_length = remain < page_length ? remain : page_length;
-		if (node.allocated) {
+		if (drm_mm_node_allocated(&node)) {
 			/* flush the write before we modify the GGTT */
 			intel_gt_flush_ggtt_writes(ggtt->vm.gt);
 			ggtt->vm.insert_page(&ggtt->vm,
@@ -636,7 +636,7 @@ i915_gem_gtt_pwrite_fast(struct drm_i915_gem_object *obj,
 out_unpin:
 	mutex_lock(&i915->drm.struct_mutex);
 	intel_gt_flush_ggtt_writes(ggtt->vm.gt);
-	if (node.allocated) {
+	if (drm_mm_node_allocated(&node)) {
 		ggtt->vm.clear_range(&ggtt->vm, node.start, node.size);
 		remove_mappable_node(&node);
 	} else {
diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c b/drivers/gpu/drm/i915/i915_gem_evict.c
index 52c86c6e0673..7abcac3b5e2e 100644
--- a/drivers/gpu/drm/i915/i915_gem_evict.c
+++ b/drivers/gpu/drm/i915/i915_gem_evict.c
@@ -301,7 +301,7 @@ int i915_gem_evict_for_node(struct i915_address_space *vm,
 			break;
 		}
 
-		GEM_BUG_ON(!node->allocated);
+		GEM_BUG_ON(!drm_mm_node_allocated(node));
 		vma = container_of(node, typeof(*vma), node);
 
 		/* If we are using coloring to insert guard pages between
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 86979f391dd0..b001d8b0f787 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -2552,7 +2552,7 @@ static void i915_gtt_color_adjust(const struct drm_mm_node *node,
 				  u64 *start,
 				  u64 *end)
 {
-	if (node->allocated && node->color != color)
+	if (drm_mm_node_allocated(node) && node->color != color)
 		*start += I915_GTT_PAGE_SIZE;
 
 	/* Also leave a space between the unallocated reserved node after the
diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
index ddb03cbcbf60..49fe4a664d29 100644
--- a/drivers/gpu/drm/i915/i915_vma.c
+++ b/drivers/gpu/drm/i915/i915_vma.c
@@ -482,7 +482,7 @@ void __i915_vma_set_map_and_fenceable(struct i915_vma *vma)
 
 static bool color_differs(struct drm_mm_node *node, unsigned long color)
 {
-	return node->allocated && node->color != color;
+	return drm_mm_node_allocated(node) && node->color != color;
 }
 
 bool i915_gem_valid_gtt_space(struct i915_vma *vma, unsigned long cache_level)
@@ -800,7 +800,7 @@ void i915_vma_reopen(struct i915_vma *vma)
 
 static void __i915_vma_destroy(struct i915_vma *vma)
 {
-	GEM_BUG_ON(vma->node.allocated);
+	GEM_BUG_ON(drm_mm_node_allocated(&vma->node));
 	GEM_BUG_ON(vma->fence);
 
 	mutex_lock(&vma->vm->mutex);
diff --git a/drivers/gpu/drm/i915/i915_vma.h b/drivers/gpu/drm/i915/i915_vma.h
index 889fc7cb910a..af2ef0a51455 100644
--- a/drivers/gpu/drm/i915/i915_vma.h
+++ b/drivers/gpu/drm/i915/i915_vma.h
@@ -214,7 +214,7 @@ static inline bool i915_vma_is_closed(const struct i915_vma *vma)
 static inline u32 i915_ggtt_offset(const struct i915_vma *vma)
 {
 	GEM_BUG_ON(!i915_vma_is_ggtt(vma));
-	GEM_BUG_ON(!vma->node.allocated);
+	GEM_BUG_ON(!drm_mm_node_allocated(&vma->node));
 	GEM_BUG_ON(upper_32_bits(vma->node.start));
 	GEM_BUG_ON(upper_32_bits(vma->node.start + vma->node.size - 1));
 	return lower_32_bits(vma->node.start);
diff --git a/drivers/gpu/drm/selftests/test-drm_mm.c b/drivers/gpu/drm/selftests/test-drm_mm.c
index 388f9844f4ba..9aabe82dcd3a 100644
--- a/drivers/gpu/drm/selftests/test-drm_mm.c
+++ b/drivers/gpu/drm/selftests/test-drm_mm.c
@@ -854,7 +854,7 @@ static bool assert_contiguous_in_range(struct drm_mm *mm,
 
 	if (start > 0) {
 		node = __drm_mm_interval_first(mm, 0, start - 1);
-		if (node->allocated) {
+		if (drm_mm_node_allocated(node)) {
 			pr_err("node before start: node=%llx+%llu, start=%llx\n",
 			       node->start, node->size, start);
 			return false;
@@ -863,7 +863,7 @@ static bool assert_contiguous_in_range(struct drm_mm *mm,
 
 	if (end < U64_MAX) {
 		node = __drm_mm_interval_first(mm, end, U64_MAX);
-		if (node->allocated) {
+		if (drm_mm_node_allocated(node)) {
 			pr_err("node after end: node=%llx+%llu, end=%llx\n",
 			       node->start, node->size, end);
 			return false;
@@ -1156,12 +1156,12 @@ static void show_holes(const struct drm_mm *mm, int count)
 		struct drm_mm_node *next = list_next_entry(hole, node_list);
 		const char *node1 = NULL, *node2 = NULL;
 
-		if (hole->allocated)
+		if (drm_mm_node_allocated(hole))
 			node1 = kasprintf(GFP_KERNEL,
 					  "[%llx + %lld, color=%ld], ",
 					  hole->start, hole->size, hole->color);
 
-		if (next->allocated)
+		if (drm_mm_node_allocated(next))
 			node2 = kasprintf(GFP_KERNEL,
 					  ", [%llx + %lld, color=%ld]",
 					  next->start, next->size, next->color);
@@ -1900,18 +1900,18 @@ static void separate_adjacent_colors(const struct drm_mm_node *node,
 				     u64 *start,
 				     u64 *end)
 {
-	if (node->allocated && node->color != color)
+	if (drm_mm_node_allocated(node) && node->color != color)
 		++*start;
 
 	node = list_next_entry(node, node_list);
-	if (node->allocated && node->color != color)
+	if (drm_mm_node_allocated(node) && node->color != color)
 		--*end;
 }
 
 static bool colors_abutt(const struct drm_mm_node *node)
 {
 	if (!drm_mm_hole_follows(node) &&
-	    list_next_entry(node, node_list)->allocated) {
+	    drm_mm_node_allocated(list_next_entry(node, node_list))) {
 		pr_err("colors abutt; %ld [%llx + %llx] is next to %ld [%llx + %llx]!\n",
 		       node->color, node->start, node->size,
 		       list_next_entry(node, node_list)->color,
diff --git a/include/drm/drm_mm.h b/include/drm/drm_mm.h
index 2c3bbb43c7d1..d7939c054259 100644
--- a/include/drm/drm_mm.h
+++ b/include/drm/drm_mm.h
@@ -168,8 +168,9 @@ struct drm_mm_node {
 	struct rb_node rb_hole_addr;
 	u64 __subtree_last;
 	u64 hole_size;
-	bool allocated : 1;
-	bool scanned_block : 1;
+	unsigned long flags;
+#define DRM_MM_NODE_ALLOCATED_BIT	0
+#define DRM_MM_NODE_SCANNED_BIT		1
 #ifdef CONFIG_DRM_DEBUG_MM
 	depot_stack_handle_t stack;
 #endif
@@ -253,7 +254,7 @@ struct drm_mm_scan {
  */
 static inline bool drm_mm_node_allocated(const struct drm_mm_node *node)
 {
-	return node->allocated;
+	return test_bit(DRM_MM_NODE_ALLOCATED_BIT, &node->flags);
 }
 
 /**
-- 
2.23.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 43+ messages in thread

* [PATCH 11/28] drm/i915: Make shrink/unshrink be atomic
  2019-08-26  7:21 [PATCH 01/28] drm/i915/selftests: Add the usual batch vma managements to st_workarounds Chris Wilson
                   ` (8 preceding siblings ...)
  2019-08-26  7:21 ` [PATCH 10/28] drm/mm: Pack allocated/scanned boolean into a bitfield Chris Wilson
@ 2019-08-26  7:21 ` Chris Wilson
  2019-08-26  7:21 ` [PATCH 12/28] drm/i915: Only track bound elements of the GTT Chris Wilson
                   ` (19 subsequent siblings)
  29 siblings, 0 replies; 43+ messages in thread
From: Chris Wilson @ 2019-08-26  7:21 UTC (permalink / raw)
  To: intel-gfx

Add an atomic counter and always take the spinlock around the pin/unpin
events, so that we can perform the list manipulation concurrently.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/gem/i915_gem_domain.c    |  3 +-
 .../gpu/drm/i915/gem/i915_gem_object_types.h  |  1 +
 drivers/gpu/drm/i915/gem/i915_gem_pages.c     |  1 +
 drivers/gpu/drm/i915/gem/i915_gem_shrinker.c  | 36 +++++++++++--------
 drivers/gpu/drm/i915/gt/intel_context.c       |  2 +-
 5 files changed, 26 insertions(+), 17 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_domain.c b/drivers/gpu/drm/i915/gem/i915_gem_domain.c
index 6af740a5e3db..f0c437b6e995 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_domain.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_domain.c
@@ -492,7 +492,8 @@ static void i915_gem_object_bump_inactive_ggtt(struct drm_i915_gem_object *obj)
 
 		spin_lock_irqsave(&i915->mm.obj_lock, flags);
 
-		if (obj->mm.madv == I915_MADV_WILLNEED)
+		if (obj->mm.madv == I915_MADV_WILLNEED &&
+		    !atomic_read(&obj->mm.shrink_pin))
 			list_move_tail(&obj->mm.link, &i915->mm.shrink_list);
 
 		spin_unlock_irqrestore(&i915->mm.obj_lock, flags);
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
index 13b9dc0e1a89..b0550727e69a 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
@@ -156,6 +156,7 @@ struct drm_i915_gem_object {
 	struct {
 		struct mutex lock; /* protects the pages and their use */
 		atomic_t pages_pin_count;
+		atomic_t shrink_pin;
 
 		struct sg_table *pages;
 		void *mapping;
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pages.c b/drivers/gpu/drm/i915/gem/i915_gem_pages.c
index 18f0ce0135c1..2e941f093a20 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_pages.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_pages.c
@@ -71,6 +71,7 @@ void __i915_gem_object_set_pages(struct drm_i915_gem_object *obj,
 			list = &i915->mm.shrink_list;
 		list_add_tail(&obj->mm.link, list);
 
+		atomic_set(&obj->mm.shrink_pin, 0);
 		spin_unlock_irqrestore(&i915->mm.obj_lock, flags);
 	}
 }
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c b/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c
index 4e55cfc2b0dc..d2c05d752909 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c
@@ -516,46 +516,52 @@ void i915_gem_shrinker_taints_mutex(struct drm_i915_private *i915,
 
 void i915_gem_object_make_unshrinkable(struct drm_i915_gem_object *obj)
 {
+	struct drm_i915_private *i915 = obj_to_i915(obj);
+	unsigned long flags;
+
 	/*
 	 * We can only be called while the pages are pinned or when
 	 * the pages are released. If pinned, we should only be called
 	 * from a single caller under controlled conditions; and on release
 	 * only one caller may release us. Neither the two may cross.
 	 */
-	if (!list_empty(&obj->mm.link)) { /* pinned by caller */
-		struct drm_i915_private *i915 = obj_to_i915(obj);
-		unsigned long flags;
-
-		spin_lock_irqsave(&i915->mm.obj_lock, flags);
-		GEM_BUG_ON(list_empty(&obj->mm.link));
+	if (atomic_add_unless(&obj->mm.shrink_pin, 1, 0))
+		return;
 
+	spin_lock_irqsave(&i915->mm.obj_lock, flags);
+	if (!atomic_fetch_inc(&obj->mm.shrink_pin) &&
+	    !list_empty(&obj->mm.link)) {
 		list_del_init(&obj->mm.link);
 		i915->mm.shrink_count--;
 		i915->mm.shrink_memory -= obj->base.size;
-
-		spin_unlock_irqrestore(&i915->mm.obj_lock, flags);
 	}
+	spin_unlock_irqrestore(&i915->mm.obj_lock, flags);
 }
 
 static void __i915_gem_object_make_shrinkable(struct drm_i915_gem_object *obj,
 					      struct list_head *head)
 {
+	struct drm_i915_private *i915 = obj_to_i915(obj);
+	unsigned long flags;
+
 	GEM_BUG_ON(!i915_gem_object_has_pages(obj));
-	GEM_BUG_ON(!list_empty(&obj->mm.link));
+	if (!i915_gem_object_is_shrinkable(obj))
+		return;
 
-	if (i915_gem_object_is_shrinkable(obj)) {
-		struct drm_i915_private *i915 = obj_to_i915(obj);
-		unsigned long flags;
+	if (atomic_add_unless(&obj->mm.shrink_pin, -1, 1))
+		return;
 
-		spin_lock_irqsave(&i915->mm.obj_lock, flags);
-		GEM_BUG_ON(!kref_read(&obj->base.refcount));
+	spin_lock_irqsave(&i915->mm.obj_lock, flags);
+	GEM_BUG_ON(!kref_read(&obj->base.refcount));
+	if (atomic_dec_and_test(&obj->mm.shrink_pin)) {
+		GEM_BUG_ON(!list_empty(&obj->mm.link));
 
 		list_add_tail(&obj->mm.link, head);
 		i915->mm.shrink_count++;
 		i915->mm.shrink_memory += obj->base.size;
 
-		spin_unlock_irqrestore(&i915->mm.obj_lock, flags);
 	}
+	spin_unlock_irqrestore(&i915->mm.obj_lock, flags);
 }
 
 void i915_gem_object_make_shrinkable(struct drm_i915_gem_object *obj)
diff --git a/drivers/gpu/drm/i915/gt/intel_context.c b/drivers/gpu/drm/i915/gt/intel_context.c
index f55691d151ae..c0495811f493 100644
--- a/drivers/gpu/drm/i915/gt/intel_context.c
+++ b/drivers/gpu/drm/i915/gt/intel_context.c
@@ -134,8 +134,8 @@ static int __context_pin_state(struct i915_vma *vma)
 
 static void __context_unpin_state(struct i915_vma *vma)
 {
-	__i915_vma_unpin(vma);
 	i915_vma_make_shrinkable(vma);
+	__i915_vma_unpin(vma);
 }
 
 static void __intel_context_retire(struct i915_active *active)
-- 
2.23.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 43+ messages in thread

* [PATCH 12/28] drm/i915: Only track bound elements of the GTT
  2019-08-26  7:21 [PATCH 01/28] drm/i915/selftests: Add the usual batch vma managements to st_workarounds Chris Wilson
                   ` (9 preceding siblings ...)
  2019-08-26  7:21 ` [PATCH 11/28] drm/i915: Make shrink/unshrink be atomic Chris Wilson
@ 2019-08-26  7:21 ` Chris Wilson
  2019-08-26  7:21 ` [PATCH 13/28] drm/i915: Make i915_vma.flags atomic_t for mutex reduction Chris Wilson
                   ` (18 subsequent siblings)
  29 siblings, 0 replies; 43+ messages in thread
From: Chris Wilson @ 2019-08-26  7:21 UTC (permalink / raw)
  To: intel-gfx; +Cc: Matthew Auld

The premise here is to simply avoiding having to acquire the vm->mutex
inside vma create/destroy to update the vm->unbound_lists, to avoid some
nasty lock recursions later.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Matthew Auld <matthew.auld@intel.com>
---
 drivers/gpu/drm/i915/gem/i915_gem_stolen.c    |  2 +-
 drivers/gpu/drm/i915/i915_gem_gtt.c           | 23 ++++---------------
 drivers/gpu/drm/i915/i915_gem_gtt.h           |  5 ----
 drivers/gpu/drm/i915/i915_vma.c               | 12 ++--------
 drivers/gpu/drm/i915/selftests/i915_gem_gtt.c |  2 +-
 5 files changed, 8 insertions(+), 36 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c
index aa533b4ab5f5..2e1bfd5e4adf 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c
@@ -689,7 +689,7 @@ i915_gem_object_create_stolen_for_preallocated(struct drm_i915_private *dev_priv
 	__i915_vma_set_map_and_fenceable(vma);
 
 	mutex_lock(&ggtt->vm.mutex);
-	list_move_tail(&vma->vm_link, &ggtt->vm.bound_list);
+	list_add_tail(&vma->vm_link, &ggtt->vm.bound_list);
 	mutex_unlock(&ggtt->vm.mutex);
 
 	GEM_BUG_ON(i915_gem_object_is_shrinkable(obj));
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index b001d8b0f787..b5cc500eca23 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -505,19 +505,12 @@ static void i915_address_space_fini(struct i915_address_space *vm)
 
 static void ppgtt_destroy_vma(struct i915_address_space *vm)
 {
-	struct list_head *phases[] = {
-		&vm->bound_list,
-		&vm->unbound_list,
-		NULL,
-	}, **phase;
+	struct i915_vma *vma, *vn;
 
 	mutex_lock(&vm->i915->drm.struct_mutex);
-	for (phase = phases; *phase; phase++) {
-		struct i915_vma *vma, *vn;
-
-		list_for_each_entry_safe(vma, vn, *phase, vm_link)
-			i915_vma_destroy(vma);
-	}
+	list_for_each_entry_safe(vma, vn, &vm->bound_list, vm_link)
+		i915_vma_destroy(vma);
+	GEM_BUG_ON(!list_empty(&vm->bound_list));
 	mutex_unlock(&vm->i915->drm.struct_mutex);
 }
 
@@ -528,9 +521,6 @@ static void __i915_vm_release(struct work_struct *work)
 
 	ppgtt_destroy_vma(vm);
 
-	GEM_BUG_ON(!list_empty(&vm->bound_list));
-	GEM_BUG_ON(!list_empty(&vm->unbound_list));
-
 	vm->cleanup(vm);
 	i915_address_space_fini(vm);
 
@@ -569,7 +559,6 @@ static void i915_address_space_init(struct i915_address_space *vm, int subclass)
 
 	stash_init(&vm->free_pages);
 
-	INIT_LIST_HEAD(&vm->unbound_list);
 	INIT_LIST_HEAD(&vm->bound_list);
 }
 
@@ -1883,10 +1872,6 @@ static struct i915_vma *pd_vma_create(struct gen6_ppgtt *ppgtt, int size)
 	INIT_LIST_HEAD(&vma->obj_link);
 	INIT_LIST_HEAD(&vma->closed_link);
 
-	mutex_lock(&vma->vm->mutex);
-	list_add(&vma->vm_link, &vma->vm->unbound_list);
-	mutex_unlock(&vma->vm->mutex);
-
 	return vma;
 }
 
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h
index dcc3d4e88a45..049db03c3b6d 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.h
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.h
@@ -321,11 +321,6 @@ struct i915_address_space {
 	 */
 	struct list_head bound_list;
 
-	/**
-	 * List of vma that are not unbound.
-	 */
-	struct list_head unbound_list;
-
 	struct pagestash free_pages;
 
 	/* Global GTT */
diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
index 49fe4a664d29..f83500944f80 100644
--- a/drivers/gpu/drm/i915/i915_vma.c
+++ b/drivers/gpu/drm/i915/i915_vma.c
@@ -218,10 +218,6 @@ vma_create(struct drm_i915_gem_object *obj,
 
 	spin_unlock(&obj->vma.lock);
 
-	mutex_lock(&vm->mutex);
-	list_add(&vma->vm_link, &vm->unbound_list);
-	mutex_unlock(&vm->mutex);
-
 	return vma;
 
 err_vma:
@@ -662,7 +658,7 @@ i915_vma_insert(struct i915_vma *vma, u64 size, u64 alignment, u64 flags)
 	GEM_BUG_ON(!i915_gem_valid_gtt_space(vma, cache_level));
 
 	mutex_lock(&vma->vm->mutex);
-	list_move_tail(&vma->vm_link, &vma->vm->bound_list);
+	list_add_tail(&vma->vm_link, &vma->vm->bound_list);
 	mutex_unlock(&vma->vm->mutex);
 
 	if (vma->obj) {
@@ -690,7 +686,7 @@ i915_vma_remove(struct i915_vma *vma)
 
 	mutex_lock(&vma->vm->mutex);
 	drm_mm_remove_node(&vma->node);
-	list_move_tail(&vma->vm_link, &vma->vm->unbound_list);
+	list_del(&vma->vm_link);
 	mutex_unlock(&vma->vm->mutex);
 
 	/*
@@ -803,10 +799,6 @@ static void __i915_vma_destroy(struct i915_vma *vma)
 	GEM_BUG_ON(drm_mm_node_allocated(&vma->node));
 	GEM_BUG_ON(vma->fence);
 
-	mutex_lock(&vma->vm->mutex);
-	list_del(&vma->vm_link);
-	mutex_unlock(&vma->vm->mutex);
-
 	if (vma->obj) {
 		struct drm_i915_gem_object *obj = vma->obj;
 
diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c
index 31a51ca1ddcb..a90c9be95f8c 100644
--- a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c
@@ -1237,7 +1237,7 @@ static void track_vma_bind(struct i915_vma *vma)
 	vma->pages = obj->mm.pages;
 
 	mutex_lock(&vma->vm->mutex);
-	list_move_tail(&vma->vm_link, &vma->vm->bound_list);
+	list_add_tail(&vma->vm_link, &vma->vm->bound_list);
 	mutex_unlock(&vma->vm->mutex);
 }
 
-- 
2.23.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 43+ messages in thread

* [PATCH 13/28] drm/i915: Make i915_vma.flags atomic_t for mutex reduction
  2019-08-26  7:21 [PATCH 01/28] drm/i915/selftests: Add the usual batch vma managements to st_workarounds Chris Wilson
                   ` (10 preceding siblings ...)
  2019-08-26  7:21 ` [PATCH 12/28] drm/i915: Only track bound elements of the GTT Chris Wilson
@ 2019-08-26  7:21 ` Chris Wilson
  2019-08-26  7:21 ` [PATCH 14/28] drm/i915: Mark up address spaces that may need to allocate Chris Wilson
                   ` (17 subsequent siblings)
  29 siblings, 0 replies; 43+ messages in thread
From: Chris Wilson @ 2019-08-26  7:21 UTC (permalink / raw)
  To: intel-gfx

In preparation for reducing struct_mutex stranglehold around the vm,
make the vma.flags atomic so that we can acquire a pin on the vma
atomically before deciding if we need to take the mutex.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/gem/i915_gem_object.c |  2 +-
 drivers/gpu/drm/i915/gem/i915_gem_stolen.c |  2 +-
 drivers/gpu/drm/i915/i915_gem_gtt.c        | 14 ++---
 drivers/gpu/drm/i915/i915_vma.c            | 29 +++++-----
 drivers/gpu/drm/i915/i915_vma.h            | 63 +++++++++++++---------
 drivers/gpu/drm/i915/selftests/mock_gtt.c  |  4 +-
 6 files changed, 65 insertions(+), 49 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.c b/drivers/gpu/drm/i915/gem/i915_gem_object.c
index d7855dc5a5c5..0ef60dae23a7 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object.c
@@ -163,7 +163,7 @@ static void __i915_gem_free_objects(struct drm_i915_private *i915,
 
 		list_for_each_entry_safe(vma, vn, &obj->vma.list, obj_link) {
 			GEM_BUG_ON(i915_vma_is_active(vma));
-			vma->flags &= ~I915_VMA_PIN_MASK;
+			atomic_and(~I915_VMA_PIN_MASK, &vma->flags);
 			i915_vma_destroy(vma);
 		}
 		GEM_BUG_ON(!list_empty(&obj->vma.list));
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c
index 2e1bfd5e4adf..0d81de1461b4 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c
@@ -685,7 +685,7 @@ i915_gem_object_create_stolen_for_preallocated(struct drm_i915_private *dev_priv
 	GEM_BUG_ON(!drm_mm_node_allocated(&vma->node));
 
 	vma->pages = obj->mm.pages;
-	vma->flags |= I915_VMA_GLOBAL_BIND;
+	set_bit(I915_VMA_GLOBAL_BIND_BIT, __i915_vma_flags(vma));
 	__i915_vma_set_map_and_fenceable(vma);
 
 	mutex_lock(&ggtt->vm.mutex);
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index b5cc500eca23..f62641f86545 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -155,7 +155,7 @@ static int ppgtt_bind_vma(struct i915_vma *vma,
 	u32 pte_flags;
 	int err;
 
-	if (!(vma->flags & I915_VMA_LOCAL_BIND)) {
+	if (!i915_vma_is_bound(vma, I915_VMA_LOCAL_BIND)) {
 		err = vma->vm->allocate_va_range(vma->vm,
 						 vma->node.start, vma->size);
 		if (err)
@@ -1866,7 +1866,7 @@ static struct i915_vma *pd_vma_create(struct gen6_ppgtt *ppgtt, int size)
 
 	vma->size = size;
 	vma->fence_size = size;
-	vma->flags = I915_VMA_GGTT;
+	atomic_set(&vma->flags, I915_VMA_GGTT);
 	vma->ggtt_view.type = I915_GGTT_VIEW_ROTATED; /* prevent fencing */
 
 	INIT_LIST_HEAD(&vma->obj_link);
@@ -2425,7 +2425,7 @@ static int ggtt_bind_vma(struct i915_vma *vma,
 	 * GLOBAL/LOCAL_BIND, it's all the same ptes. Hence unconditionally
 	 * upgrade to both bound if we bind either to avoid double-binding.
 	 */
-	vma->flags |= I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND;
+	atomic_or(I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND, &vma->flags);
 
 	return 0;
 }
@@ -2455,7 +2455,7 @@ static int aliasing_gtt_bind_vma(struct i915_vma *vma,
 	if (flags & I915_VMA_LOCAL_BIND) {
 		struct i915_ppgtt *alias = i915_vm_to_ggtt(vma->vm)->alias;
 
-		if (!(vma->flags & I915_VMA_LOCAL_BIND)) {
+		if (!i915_vma_is_bound(vma, I915_VMA_LOCAL_BIND)) {
 			ret = alias->vm.allocate_va_range(&alias->vm,
 							  vma->node.start,
 							  vma->size);
@@ -2483,7 +2483,7 @@ static void aliasing_gtt_unbind_vma(struct i915_vma *vma)
 {
 	struct drm_i915_private *i915 = vma->vm->i915;
 
-	if (vma->flags & I915_VMA_GLOBAL_BIND) {
+	if (i915_vma_is_bound(vma, I915_VMA_GLOBAL_BIND)) {
 		struct i915_address_space *vm = vma->vm;
 		intel_wakeref_t wakeref;
 
@@ -2491,7 +2491,7 @@ static void aliasing_gtt_unbind_vma(struct i915_vma *vma)
 			vm->clear_range(vm, vma->node.start, vma->size);
 	}
 
-	if (vma->flags & I915_VMA_LOCAL_BIND) {
+	if (i915_vma_is_bound(vma, I915_VMA_LOCAL_BIND)) {
 		struct i915_address_space *vm =
 			&i915_vm_to_ggtt(vma->vm)->alias->vm;
 
@@ -3291,7 +3291,7 @@ static void ggtt_restore_mappings(struct i915_ggtt *ggtt)
 	list_for_each_entry_safe(vma, vn, &ggtt->vm.bound_list, vm_link) {
 		struct drm_i915_gem_object *obj = vma->obj;
 
-		if (!(vma->flags & I915_VMA_GLOBAL_BIND))
+		if (!i915_vma_is_bound(vma, I915_VMA_GLOBAL_BIND))
 			continue;
 
 		mutex_unlock(&ggtt->vm.mutex);
diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
index f83500944f80..8672bea8ac0c 100644
--- a/drivers/gpu/drm/i915/i915_vma.c
+++ b/drivers/gpu/drm/i915/i915_vma.c
@@ -171,7 +171,7 @@ vma_create(struct drm_i915_gem_object *obj,
 								i915_gem_object_get_stride(obj));
 		GEM_BUG_ON(!is_power_of_2(vma->fence_alignment));
 
-		vma->flags |= I915_VMA_GGTT;
+		__set_bit(I915_VMA_GGTT_BIT, __i915_vma_flags(vma));
 	}
 
 	spin_lock(&obj->vma.lock);
@@ -321,7 +321,8 @@ int i915_vma_bind(struct i915_vma *vma, enum i915_cache_level cache_level,
 	if (flags & PIN_USER)
 		bind_flags |= I915_VMA_LOCAL_BIND;
 
-	vma_flags = vma->flags & (I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND);
+	vma_flags = atomic_read(&vma->flags);
+	vma_flags &= I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND;
 	if (flags & PIN_UPDATE)
 		bind_flags |= vma_flags;
 	else
@@ -339,7 +340,7 @@ int i915_vma_bind(struct i915_vma *vma, enum i915_cache_level cache_level,
 	/* Must be incremented prior to request construction */
 	vma->vm->serial++;
 
-	vma->flags |= bind_flags;
+	atomic_or(bind_flags, &vma->flags);
 	return 0;
 }
 
@@ -358,7 +359,7 @@ void __iomem *i915_vma_pin_iomap(struct i915_vma *vma)
 	}
 
 	GEM_BUG_ON(!i915_vma_is_ggtt(vma));
-	GEM_BUG_ON((vma->flags & I915_VMA_GLOBAL_BIND) == 0);
+	GEM_BUG_ON(!i915_vma_is_bound(vma, I915_VMA_GLOBAL_BIND));
 
 	ptr = vma->iomap;
 	if (ptr == NULL) {
@@ -471,9 +472,9 @@ void __i915_vma_set_map_and_fenceable(struct i915_vma *vma)
 	mappable = vma->node.start + vma->fence_size <= i915_vm_to_ggtt(vma->vm)->mappable_end;
 
 	if (mappable && fenceable)
-		vma->flags |= I915_VMA_CAN_FENCE;
+		set_bit(I915_VMA_CAN_FENCE_BIT, __i915_vma_flags(vma));
 	else
-		vma->flags &= ~I915_VMA_CAN_FENCE;
+		clear_bit(I915_VMA_CAN_FENCE_BIT, __i915_vma_flags(vma));
 }
 
 static bool color_differs(struct drm_mm_node *node, unsigned long color)
@@ -546,7 +547,7 @@ i915_vma_insert(struct i915_vma *vma, u64 size, u64 alignment, u64 flags)
 	int ret;
 
 	GEM_BUG_ON(i915_vma_is_closed(vma));
-	GEM_BUG_ON(vma->flags & (I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND));
+	GEM_BUG_ON(i915_vma_is_bound(vma, I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND));
 	GEM_BUG_ON(drm_mm_node_allocated(&vma->node));
 
 	size = max(size, vma->size);
@@ -680,7 +681,7 @@ static void
 i915_vma_remove(struct i915_vma *vma)
 {
 	GEM_BUG_ON(!drm_mm_node_allocated(&vma->node));
-	GEM_BUG_ON(vma->flags & (I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND));
+	GEM_BUG_ON(i915_vma_is_bound(vma, I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND));
 
 	vma->ops->clear_pages(vma);
 
@@ -711,7 +712,7 @@ i915_vma_remove(struct i915_vma *vma)
 int __i915_vma_do_pin(struct i915_vma *vma,
 		      u64 size, u64 alignment, u64 flags)
 {
-	const unsigned int bound = vma->flags;
+	const unsigned int bound = atomic_read(&vma->flags);
 	int ret;
 
 	lockdep_assert_held(&vma->vm->i915->drm.struct_mutex);
@@ -734,9 +735,9 @@ int __i915_vma_do_pin(struct i915_vma *vma,
 	if (ret)
 		goto err_remove;
 
-	GEM_BUG_ON((vma->flags & I915_VMA_BIND_MASK) == 0);
+	GEM_BUG_ON(!i915_vma_is_bound(vma, I915_VMA_BIND_MASK));
 
-	if ((bound ^ vma->flags) & I915_VMA_GLOBAL_BIND)
+	if ((bound ^ atomic_read(&vma->flags)) & I915_VMA_GLOBAL_BIND)
 		__i915_vma_set_map_and_fenceable(vma);
 
 	GEM_BUG_ON(i915_vma_misplaced(vma, size, alignment, flags));
@@ -746,7 +747,7 @@ int __i915_vma_do_pin(struct i915_vma *vma,
 	if ((bound & I915_VMA_BIND_MASK) == 0) {
 		i915_vma_remove(vma);
 		GEM_BUG_ON(vma->pages);
-		GEM_BUG_ON(vma->flags & I915_VMA_BIND_MASK);
+		GEM_BUG_ON(atomic_read(&vma->flags) & I915_VMA_BIND_MASK);
 	}
 err_unpin:
 	__i915_vma_unpin(vma);
@@ -989,7 +990,7 @@ int i915_vma_unbind(struct i915_vma *vma)
 		mutex_unlock(&vma->vm->mutex);
 
 		__i915_vma_iounmap(vma);
-		vma->flags &= ~I915_VMA_CAN_FENCE;
+		clear_bit(I915_VMA_CAN_FENCE_BIT, __i915_vma_flags(vma));
 	}
 	GEM_BUG_ON(vma->fence);
 	GEM_BUG_ON(i915_vma_has_userfault(vma));
@@ -998,7 +999,7 @@ int i915_vma_unbind(struct i915_vma *vma)
 		trace_i915_vma_unbind(vma);
 		vma->ops->unbind_vma(vma);
 	}
-	vma->flags &= ~(I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND);
+	atomic_and(~I915_VMA_BIND_MASK, &vma->flags);
 
 	i915_vma_remove(vma);
 
diff --git a/drivers/gpu/drm/i915/i915_vma.h b/drivers/gpu/drm/i915/i915_vma.h
index af2ef0a51455..02d7d815407c 100644
--- a/drivers/gpu/drm/i915/i915_vma.h
+++ b/drivers/gpu/drm/i915/i915_vma.h
@@ -72,7 +72,7 @@ struct i915_vma {
 	 * that exist in the ctx->handle_vmas LUT for this vma.
 	 */
 	atomic_t open_count;
-	unsigned long flags;
+	atomic_t flags;
 	/**
 	 * How many users have pinned this object in GTT space.
 	 *
@@ -97,18 +97,29 @@ struct i915_vma {
 	 * users.
 	 */
 #define I915_VMA_PIN_MASK 0xff
-#define I915_VMA_PIN_OVERFLOW	BIT(8)
+#define I915_VMA_PIN_OVERFLOW_BIT 8
+#define I915_VMA_PIN_OVERFLOW	((int)BIT(I915_VMA_PIN_OVERFLOW_BIT))
 
 	/** Flags and address space this VMA is bound to */
-#define I915_VMA_GLOBAL_BIND	BIT(9)
-#define I915_VMA_LOCAL_BIND	BIT(10)
-#define I915_VMA_BIND_MASK (I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND | I915_VMA_PIN_OVERFLOW)
+#define I915_VMA_GLOBAL_BIND_BIT 9
+#define I915_VMA_LOCAL_BIND_BIT 10
 
-#define I915_VMA_GGTT		BIT(11)
-#define I915_VMA_CAN_FENCE	BIT(12)
+#define I915_VMA_GLOBAL_BIND	((int)BIT(I915_VMA_GLOBAL_BIND_BIT))
+#define I915_VMA_LOCAL_BIND	((int)BIT(I915_VMA_LOCAL_BIND_BIT))
+
+#define I915_VMA_BIND_MASK (I915_VMA_GLOBAL_BIND | \
+			    I915_VMA_LOCAL_BIND | \
+			    I915_VMA_PIN_OVERFLOW)
+
+#define I915_VMA_GGTT_BIT	11
+#define I915_VMA_CAN_FENCE_BIT	12
 #define I915_VMA_USERFAULT_BIT	13
-#define I915_VMA_USERFAULT	BIT(I915_VMA_USERFAULT_BIT)
-#define I915_VMA_GGTT_WRITE	BIT(14)
+#define I915_VMA_GGTT_WRITE_BIT	14
+
+#define I915_VMA_GGTT		((int)BIT(I915_VMA_GGTT_BIT))
+#define I915_VMA_CAN_FENCE	((int)BIT(I915_VMA_CAN_FENCE_BIT))
+#define I915_VMA_USERFAULT	((int)BIT(I915_VMA_USERFAULT_BIT))
+#define I915_VMA_GGTT_WRITE	((int)BIT(I915_VMA_GGTT_WRITE_BIT))
 
 	struct i915_active active;
 
@@ -162,48 +173,52 @@ int __must_check i915_vma_move_to_active(struct i915_vma *vma,
 					 struct i915_request *rq,
 					 unsigned int flags);
 
+#define __i915_vma_flags(v) ((unsigned long *)&(v)->flags)
+
 static inline bool i915_vma_is_ggtt(const struct i915_vma *vma)
 {
-	return vma->flags & I915_VMA_GGTT;
+	return test_bit(I915_VMA_GGTT_BIT, __i915_vma_flags(vma));
 }
 
 static inline bool i915_vma_has_ggtt_write(const struct i915_vma *vma)
 {
-	return vma->flags & I915_VMA_GGTT_WRITE;
+	return test_bit(I915_VMA_GGTT_WRITE_BIT, __i915_vma_flags(vma));
 }
 
 static inline void i915_vma_set_ggtt_write(struct i915_vma *vma)
 {
 	GEM_BUG_ON(!i915_vma_is_ggtt(vma));
-	vma->flags |= I915_VMA_GGTT_WRITE;
+	set_bit(I915_VMA_GGTT_WRITE_BIT, __i915_vma_flags(vma));
 }
 
-static inline void i915_vma_unset_ggtt_write(struct i915_vma *vma)
+static inline bool i915_vma_unset_ggtt_write(struct i915_vma *vma)
 {
-	vma->flags &= ~I915_VMA_GGTT_WRITE;
+	return test_and_clear_bit(I915_VMA_GGTT_WRITE_BIT,
+				  __i915_vma_flags(vma));
 }
 
 void i915_vma_flush_writes(struct i915_vma *vma);
 
 static inline bool i915_vma_is_map_and_fenceable(const struct i915_vma *vma)
 {
-	return vma->flags & I915_VMA_CAN_FENCE;
+	return test_bit(I915_VMA_CAN_FENCE_BIT, __i915_vma_flags(vma));
 }
 
 static inline bool i915_vma_set_userfault(struct i915_vma *vma)
 {
 	GEM_BUG_ON(!i915_vma_is_map_and_fenceable(vma));
-	return __test_and_set_bit(I915_VMA_USERFAULT_BIT, &vma->flags);
+	return __test_and_set_bit(I915_VMA_USERFAULT_BIT,
+				  __i915_vma_flags(vma));
 }
 
 static inline void i915_vma_unset_userfault(struct i915_vma *vma)
 {
-	return __clear_bit(I915_VMA_USERFAULT_BIT, &vma->flags);
+	return __clear_bit(I915_VMA_USERFAULT_BIT, __i915_vma_flags(vma));
 }
 
 static inline bool i915_vma_has_userfault(const struct i915_vma *vma)
 {
-	return test_bit(I915_VMA_USERFAULT_BIT, &vma->flags);
+	return test_bit(I915_VMA_USERFAULT_BIT, __i915_vma_flags(vma));
 }
 
 static inline bool i915_vma_is_closed(const struct i915_vma *vma)
@@ -330,7 +345,7 @@ i915_vma_pin(struct i915_vma *vma, u64 size, u64 alignment, u64 flags)
 	/* Pin early to prevent the shrinker/eviction logic from destroying
 	 * our vma as we insert and bind.
 	 */
-	if (likely(((++vma->flags ^ flags) & I915_VMA_BIND_MASK) == 0)) {
+	if (likely(((atomic_inc_return(&vma->flags) ^ flags) & I915_VMA_BIND_MASK) == 0)) {
 		GEM_BUG_ON(!drm_mm_node_allocated(&vma->node));
 		GEM_BUG_ON(i915_vma_misplaced(vma, size, alignment, flags));
 		return 0;
@@ -341,7 +356,7 @@ i915_vma_pin(struct i915_vma *vma, u64 size, u64 alignment, u64 flags)
 
 static inline int i915_vma_pin_count(const struct i915_vma *vma)
 {
-	return vma->flags & I915_VMA_PIN_MASK;
+	return atomic_read(&vma->flags) & I915_VMA_PIN_MASK;
 }
 
 static inline bool i915_vma_is_pinned(const struct i915_vma *vma)
@@ -351,13 +366,13 @@ static inline bool i915_vma_is_pinned(const struct i915_vma *vma)
 
 static inline void __i915_vma_pin(struct i915_vma *vma)
 {
-	vma->flags++;
-	GEM_BUG_ON(vma->flags & I915_VMA_PIN_OVERFLOW);
+	atomic_inc(&vma->flags);
+	GEM_BUG_ON(atomic_read(&vma->flags) & I915_VMA_PIN_OVERFLOW);
 }
 
 static inline void __i915_vma_unpin(struct i915_vma *vma)
 {
-	vma->flags--;
+	atomic_dec(&vma->flags);
 }
 
 static inline void i915_vma_unpin(struct i915_vma *vma)
@@ -370,7 +385,7 @@ static inline void i915_vma_unpin(struct i915_vma *vma)
 static inline bool i915_vma_is_bound(const struct i915_vma *vma,
 				     unsigned int where)
 {
-	return vma->flags & where;
+	return atomic_read(&vma->flags) & where;
 }
 
 /**
diff --git a/drivers/gpu/drm/i915/selftests/mock_gtt.c b/drivers/gpu/drm/i915/selftests/mock_gtt.c
index e62a67e0f79c..366335981086 100644
--- a/drivers/gpu/drm/i915/selftests/mock_gtt.c
+++ b/drivers/gpu/drm/i915/selftests/mock_gtt.c
@@ -43,7 +43,7 @@ static int mock_bind_ppgtt(struct i915_vma *vma,
 			   u32 flags)
 {
 	GEM_BUG_ON(flags & I915_VMA_GLOBAL_BIND);
-	vma->flags |= I915_VMA_LOCAL_BIND;
+	set_bit(I915_VMA_LOCAL_BIND_BIT, __i915_vma_flags(vma));
 	return 0;
 }
 
@@ -86,7 +86,7 @@ static int mock_bind_ggtt(struct i915_vma *vma,
 			  enum i915_cache_level cache_level,
 			  u32 flags)
 {
-	vma->flags |= I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND;
+	atomic_or(I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND, &vma->flags);
 	return 0;
 }
 
-- 
2.23.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 43+ messages in thread

* [PATCH 14/28] drm/i915: Mark up address spaces that may need to allocate
  2019-08-26  7:21 [PATCH 01/28] drm/i915/selftests: Add the usual batch vma managements to st_workarounds Chris Wilson
                   ` (11 preceding siblings ...)
  2019-08-26  7:21 ` [PATCH 13/28] drm/i915: Make i915_vma.flags atomic_t for mutex reduction Chris Wilson
@ 2019-08-26  7:21 ` Chris Wilson
  2019-08-26  7:21 ` [PATCH 15/28] drm/i915: Pull i915_vma_pin under the vm->mutex Chris Wilson
                   ` (16 subsequent siblings)
  29 siblings, 0 replies; 43+ messages in thread
From: Chris Wilson @ 2019-08-26  7:21 UTC (permalink / raw)
  To: intel-gfx

Since we cannot allocate underneath the vm->mutex (it is used in the
direct-reclaim paths), we need to shift the allocations off into a
mutexless worker with fence recursion prevention. To know when we need
this protection, we mark up the address spaces that do allocate before
insertion.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_gem_gtt.c | 3 +++
 drivers/gpu/drm/i915/i915_gem_gtt.h | 2 ++
 2 files changed, 5 insertions(+)

diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index f62641f86545..10545c64e27f 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -1500,6 +1500,7 @@ static struct i915_ppgtt *gen8_ppgtt_create(struct drm_i915_private *i915)
 			goto err_free_pd;
 	}
 
+	ppgtt->vm.bind_alloc = I915_VMA_LOCAL_BIND;
 	ppgtt->vm.insert_entries = gen8_ppgtt_insert;
 	ppgtt->vm.allocate_va_range = gen8_ppgtt_alloc;
 	ppgtt->vm.clear_range = gen8_ppgtt_clear;
@@ -1944,6 +1945,7 @@ static struct i915_ppgtt *gen6_ppgtt_create(struct drm_i915_private *i915)
 	ppgtt_init(&ppgtt->base, &i915->gt);
 	ppgtt->base.vm.top = 1;
 
+	ppgtt->base.vm.bind_alloc = I915_VMA_LOCAL_BIND;
 	ppgtt->base.vm.allocate_va_range = gen6_alloc_va_range;
 	ppgtt->base.vm.clear_range = gen6_ppgtt_clear_range;
 	ppgtt->base.vm.insert_entries = gen6_ppgtt_insert_entries;
@@ -2575,6 +2577,7 @@ static int init_aliasing_ppgtt(struct i915_ggtt *ggtt)
 		goto err_ppgtt;
 
 	ggtt->alias = ppgtt;
+	ggtt->vm.bind_alloc |= ppgtt->vm.bind_alloc;
 
 	GEM_BUG_ON(ggtt->vm.vma_ops.bind_vma != ggtt_bind_vma);
 	ggtt->vm.vma_ops.bind_vma = aliasing_gtt_bind_vma;
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h
index 049db03c3b6d..46d915d7ffb8 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.h
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.h
@@ -305,7 +305,9 @@ struct i915_address_space {
 	u64 total;		/* size addr space maps (ex. 2GB for ggtt) */
 	u64 reserved;		/* size addr space reserved */
 
+	unsigned int bind_alloc;
 	unsigned int serial;
+
 	bool closed;
 
 	struct mutex mutex; /* protects vma and our lists */
-- 
2.23.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 43+ messages in thread

* [PATCH 15/28] drm/i915: Pull i915_vma_pin under the vm->mutex
  2019-08-26  7:21 [PATCH 01/28] drm/i915/selftests: Add the usual batch vma managements to st_workarounds Chris Wilson
                   ` (12 preceding siblings ...)
  2019-08-26  7:21 ` [PATCH 14/28] drm/i915: Mark up address spaces that may need to allocate Chris Wilson
@ 2019-08-26  7:21 ` Chris Wilson
  2019-08-26  7:21 ` [PATCH 16/28] drm/i915: Push the i915_active.retire into a worker Chris Wilson
                   ` (15 subsequent siblings)
  29 siblings, 0 replies; 43+ messages in thread
From: Chris Wilson @ 2019-08-26  7:21 UTC (permalink / raw)
  To: intel-gfx

Replace the struct_mutex requirement for pinning the i915_vma with the
local vm->mutex instead. Note that the vm->mutex is tainted by the
shrinker (we require unbinding from inside fs-reclaim) and so we cannot
allocate while holding that mutex. Instead we have to preallocate
workers to do allocate and apply the PTE updates after we have we
reserved their slot in the drm_mm (using fences to order the PTE writes
with the GPU work and with later unbind).

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/display/intel_display.c  |  30 +-
 drivers/gpu/drm/i915/display/intel_fbdev.c    |   8 +-
 drivers/gpu/drm/i915/display/intel_overlay.c  |  11 +-
 drivers/gpu/drm/i915/gem/i915_gem_context.c   |  20 +-
 drivers/gpu/drm/i915/gem/i915_gem_domain.c    |  19 +-
 .../gpu/drm/i915/gem/i915_gem_execbuffer.c    |  13 +-
 drivers/gpu/drm/i915/gem/i915_gem_mman.c      |  21 +-
 drivers/gpu/drm/i915/gem/i915_gem_object.c    |  33 +-
 drivers/gpu/drm/i915/gem/i915_gem_object.h    |   5 +
 drivers/gpu/drm/i915/gem/i915_gem_shrinker.c  |  67 ----
 drivers/gpu/drm/i915/gem/i915_gem_stolen.c    |   5 +-
 drivers/gpu/drm/i915/gem/i915_gem_tiling.c    |  15 +-
 drivers/gpu/drm/i915/gem/i915_gem_userptr.c   |  27 +-
 .../gpu/drm/i915/gem/selftests/huge_pages.c   |  23 +-
 .../drm/i915/gem/selftests/i915_gem_mman.c    |   2 -
 drivers/gpu/drm/i915/gt/intel_gt.c            |   5 +-
 drivers/gpu/drm/i915/gt/selftest_hangcheck.c  |  19 +-
 drivers/gpu/drm/i915/gvt/aperture_gm.c        |  12 +-
 drivers/gpu/drm/i915/i915_active.c            |  66 ++++
 drivers/gpu/drm/i915/i915_active.h            |   2 +
 drivers/gpu/drm/i915/i915_active_types.h      |   5 +
 drivers/gpu/drm/i915/i915_gem.c               |  79 ++--
 drivers/gpu/drm/i915/i915_gem_evict.c         |  20 +-
 drivers/gpu/drm/i915/i915_gem_fence_reg.c     |   4 +-
 drivers/gpu/drm/i915/i915_gem_gtt.c           | 100 +++--
 drivers/gpu/drm/i915/i915_gem_gtt.h           |  31 +-
 drivers/gpu/drm/i915/i915_perf.c              |  26 +-
 drivers/gpu/drm/i915/i915_vma.c               | 349 +++++++++++-------
 drivers/gpu/drm/i915/i915_vma.h               |  59 ++-
 .../gpu/drm/i915/selftests/i915_gem_evict.c   |  36 +-
 drivers/gpu/drm/i915/selftests/i915_gem_gtt.c |  88 +++--
 drivers/gpu/drm/i915/selftests/i915_vma.c     |   2 +
 32 files changed, 631 insertions(+), 571 deletions(-)

diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c
index ea2915dde6ab..6e74c33f2ec4 100644
--- a/drivers/gpu/drm/i915/display/intel_display.c
+++ b/drivers/gpu/drm/i915/display/intel_display.c
@@ -2079,8 +2079,6 @@ intel_pin_and_fence_fb_obj(struct drm_framebuffer *fb,
 	unsigned int pinctl;
 	u32 alignment;
 
-	WARN_ON(!mutex_is_locked(&dev->struct_mutex));
-
 	alignment = intel_surf_alignment(fb, 0);
 
 	/* Note that the w/a also requires 64 PTE of padding following the
@@ -2161,8 +2159,6 @@ intel_pin_and_fence_fb_obj(struct drm_framebuffer *fb,
 
 void intel_unpin_fb_vma(struct i915_vma *vma, unsigned long flags)
 {
-	lockdep_assert_held(&vma->vm->i915->drm.struct_mutex);
-
 	i915_gem_object_lock(vma->obj);
 	if (flags & PLANE_HAS_FENCE)
 		i915_vma_unpin_fence(vma);
@@ -3063,12 +3059,10 @@ intel_alloc_initial_plane_obj(struct intel_crtc *crtc,
 		return false;
 	}
 
-	mutex_lock(&dev->struct_mutex);
 	obj = i915_gem_object_create_stolen_for_preallocated(dev_priv,
 							     base_aligned,
 							     base_aligned,
 							     size_aligned);
-	mutex_unlock(&dev->struct_mutex);
 	if (!obj)
 		return false;
 
@@ -3230,13 +3224,11 @@ intel_find_initial_plane_obj(struct intel_crtc *intel_crtc,
 	intel_state->color_plane[0].stride =
 		intel_fb_pitch(fb, 0, intel_state->base.rotation);
 
-	mutex_lock(&dev->struct_mutex);
 	intel_state->vma =
 		intel_pin_and_fence_fb_obj(fb,
 					   &intel_state->view,
 					   intel_plane_uses_fence(intel_state),
 					   &intel_state->flags);
-	mutex_unlock(&dev->struct_mutex);
 	if (IS_ERR(intel_state->vma)) {
 		DRM_ERROR("failed to pin boot fb on pipe %d: %li\n",
 			  intel_crtc->pipe, PTR_ERR(intel_state->vma));
@@ -14330,8 +14322,6 @@ static void fb_obj_bump_render_priority(struct drm_i915_gem_object *obj)
  * bits.  Some older platforms need special physical address handling for
  * cursor planes.
  *
- * Must be called with struct_mutex held.
- *
  * Returns 0 on success, negative error code on failure.
  */
 int
@@ -14388,15 +14378,8 @@ intel_prepare_plane_fb(struct drm_plane *plane,
 	if (ret)
 		return ret;
 
-	ret = mutex_lock_interruptible(&dev_priv->drm.struct_mutex);
-	if (ret) {
-		i915_gem_object_unpin_pages(obj);
-		return ret;
-	}
-
 	ret = intel_plane_pin_fb(to_intel_plane_state(new_state));
 
-	mutex_unlock(&dev_priv->drm.struct_mutex);
 	i915_gem_object_unpin_pages(obj);
 	if (ret)
 		return ret;
@@ -14445,8 +14428,6 @@ intel_prepare_plane_fb(struct drm_plane *plane,
  * @old_state: the state from the previous modeset
  *
  * Cleans up a framebuffer that has just been removed from a plane.
- *
- * Must be called with struct_mutex held.
  */
 void
 intel_cleanup_plane_fb(struct drm_plane *plane,
@@ -14462,9 +14443,7 @@ intel_cleanup_plane_fb(struct drm_plane *plane,
 	}
 
 	/* Should only be called after a successful intel_prepare_plane_fb()! */
-	mutex_lock(&dev_priv->drm.struct_mutex);
 	intel_plane_unpin_fb(to_intel_plane_state(old_state));
-	mutex_unlock(&dev_priv->drm.struct_mutex);
 }
 
 int
@@ -14664,7 +14643,6 @@ intel_legacy_cursor_update(struct drm_plane *plane,
 			   u32 src_w, u32 src_h,
 			   struct drm_modeset_acquire_ctx *ctx)
 {
-	struct drm_i915_private *dev_priv = to_i915(crtc->dev);
 	struct drm_plane_state *old_plane_state, *new_plane_state;
 	struct intel_plane *intel_plane = to_intel_plane(plane);
 	struct intel_crtc_state *crtc_state =
@@ -14730,13 +14708,9 @@ intel_legacy_cursor_update(struct drm_plane *plane,
 	if (ret)
 		goto out_free;
 
-	ret = mutex_lock_interruptible(&dev_priv->drm.struct_mutex);
-	if (ret)
-		goto out_free;
-
 	ret = intel_plane_pin_fb(to_intel_plane_state(new_plane_state));
 	if (ret)
-		goto out_unlock;
+		goto out_free;
 
 	intel_frontbuffer_flush(to_intel_frontbuffer(fb), ORIGIN_FLIP);
 	intel_frontbuffer_track(to_intel_frontbuffer(old_plane_state->fb),
@@ -14766,8 +14740,6 @@ intel_legacy_cursor_update(struct drm_plane *plane,
 
 	intel_plane_unpin_fb(to_intel_plane_state(old_plane_state));
 
-out_unlock:
-	mutex_unlock(&dev_priv->drm.struct_mutex);
 out_free:
 	if (new_crtc_state)
 		intel_crtc_destroy_state(crtc, &new_crtc_state->base);
diff --git a/drivers/gpu/drm/i915/display/intel_fbdev.c b/drivers/gpu/drm/i915/display/intel_fbdev.c
index d59eee5c5d9c..a3dea3f2dacd 100644
--- a/drivers/gpu/drm/i915/display/intel_fbdev.c
+++ b/drivers/gpu/drm/i915/display/intel_fbdev.c
@@ -204,7 +204,6 @@ static int intelfb_create(struct drm_fb_helper *helper,
 		sizes->fb_height = intel_fb->base.height;
 	}
 
-	mutex_lock(&dev->struct_mutex);
 	wakeref = intel_runtime_pm_get(&dev_priv->runtime_pm);
 
 	/* Pin the GGTT vma for our access via info->screen_base.
@@ -266,7 +265,6 @@ static int intelfb_create(struct drm_fb_helper *helper,
 	ifbdev->vma_flags = flags;
 
 	intel_runtime_pm_put(&dev_priv->runtime_pm, wakeref);
-	mutex_unlock(&dev->struct_mutex);
 	vga_switcheroo_client_fb_set(pdev, info);
 	return 0;
 
@@ -274,7 +272,6 @@ static int intelfb_create(struct drm_fb_helper *helper,
 	intel_unpin_fb_vma(vma, flags);
 out_unlock:
 	intel_runtime_pm_put(&dev_priv->runtime_pm, wakeref);
-	mutex_unlock(&dev->struct_mutex);
 	return ret;
 }
 
@@ -291,11 +288,8 @@ static void intel_fbdev_destroy(struct intel_fbdev *ifbdev)
 
 	drm_fb_helper_fini(&ifbdev->helper);
 
-	if (ifbdev->vma) {
-		mutex_lock(&ifbdev->helper.dev->struct_mutex);
+	if (ifbdev->vma)
 		intel_unpin_fb_vma(ifbdev->vma, ifbdev->vma_flags);
-		mutex_unlock(&ifbdev->helper.dev->struct_mutex);
-	}
 
 	if (ifbdev->fb)
 		drm_framebuffer_remove(&ifbdev->fb->base);
diff --git a/drivers/gpu/drm/i915/display/intel_overlay.c b/drivers/gpu/drm/i915/display/intel_overlay.c
index 29edfc343716..4f36557b3f3b 100644
--- a/drivers/gpu/drm/i915/display/intel_overlay.c
+++ b/drivers/gpu/drm/i915/display/intel_overlay.c
@@ -1303,15 +1303,11 @@ static int get_registers(struct intel_overlay *overlay, bool use_phys)
 	struct i915_vma *vma;
 	int err;
 
-	mutex_lock(&i915->drm.struct_mutex);
-
 	obj = i915_gem_object_create_stolen(i915, PAGE_SIZE);
 	if (obj == NULL)
 		obj = i915_gem_object_create_internal(i915, PAGE_SIZE);
-	if (IS_ERR(obj)) {
-		err = PTR_ERR(obj);
-		goto err_unlock;
-	}
+	if (IS_ERR(obj))
+		return PTR_ERR(obj);
 
 	vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, PIN_MAPPABLE);
 	if (IS_ERR(vma)) {
@@ -1332,13 +1328,10 @@ static int get_registers(struct intel_overlay *overlay, bool use_phys)
 	}
 
 	overlay->reg_bo = obj;
-	mutex_unlock(&i915->drm.struct_mutex);
 	return 0;
 
 err_put_bo:
 	i915_gem_object_put(obj);
-err_unlock:
-	mutex_unlock(&i915->drm.struct_mutex);
 	return err;
 }
 
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c
index d7c17a8dadff..20ee89e120b3 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c
@@ -313,8 +313,6 @@ static void i915_gem_context_free(struct i915_gem_context *ctx)
 	GEM_BUG_ON(!i915_gem_context_is_closed(ctx));
 
 	release_hw_id(ctx);
-	if (ctx->vm)
-		i915_vm_put(ctx->vm);
 
 	free_engines(rcu_access_pointer(ctx->engines));
 	mutex_destroy(&ctx->engines_mutex);
@@ -379,9 +377,13 @@ void i915_gem_context_release(struct kref *ref)
 
 static void context_close(struct i915_gem_context *ctx)
 {
+	i915_gem_context_set_closed(ctx);
+
+	if (ctx->vm)
+		i915_vm_close(ctx->vm);
+
 	mutex_lock(&ctx->mutex);
 
-	i915_gem_context_set_closed(ctx);
 	ctx->file_priv = ERR_PTR(-EBADF);
 
 	/*
@@ -475,7 +477,7 @@ __set_ppgtt(struct i915_gem_context *ctx, struct i915_address_space *vm)
 
 	GEM_BUG_ON(old && i915_vm_is_4lvl(vm) != i915_vm_is_4lvl(old));
 
-	ctx->vm = i915_vm_get(vm);
+	ctx->vm = i915_vm_open(vm);
 	context_apply_all(ctx, __apply_ppgtt, vm);
 
 	return old;
@@ -489,7 +491,7 @@ static void __assign_ppgtt(struct i915_gem_context *ctx,
 
 	vm = __set_ppgtt(ctx, vm);
 	if (vm)
-		i915_vm_put(vm);
+		i915_vm_close(vm);
 }
 
 static void __set_timeline(struct intel_timeline **dst,
@@ -954,7 +956,7 @@ static int get_ppgtt(struct drm_i915_file_private *file_priv,
 	if (ret < 0)
 		goto err_unlock;
 
-	i915_vm_get(vm);
+	i915_vm_open(vm);
 
 	args->size = 0;
 	args->value = ret;
@@ -974,7 +976,7 @@ static void set_ppgtt_barrier(void *data)
 	if (INTEL_GEN(old->i915) < 8)
 		gen6_ppgtt_unpin_all(i915_vm_to_ppgtt(old));
 
-	i915_vm_put(old);
+	i915_vm_close(old);
 }
 
 static int emit_ppgtt_update(struct i915_request *rq, void *data)
@@ -1094,8 +1096,8 @@ static int set_ppgtt(struct drm_i915_file_private *file_priv,
 				   set_ppgtt_barrier,
 				   old);
 	if (err) {
-		i915_vm_put(__set_ppgtt(ctx, old));
-		i915_vm_put(old);
+		i915_vm_close(__set_ppgtt(ctx, old));
+		i915_vm_close(old);
 	}
 
 unlock:
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_domain.c b/drivers/gpu/drm/i915/gem/i915_gem_domain.c
index f0c437b6e995..46a23409e1c0 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_domain.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_domain.c
@@ -202,7 +202,11 @@ int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
 		    i915_gem_valid_gtt_space(vma, cache_level))
 			continue;
 
-		ret = i915_vma_unbind(vma);
+		ret = mutex_lock_interruptible(&vma->vm->mutex);
+		if (!ret) {
+			ret = i915_vma_unbind(vma);
+			mutex_unlock(&vma->vm->mutex);
+		}
 		if (ret)
 			return ret;
 
@@ -288,7 +292,12 @@ int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
 			if (!drm_mm_node_allocated(&vma->node))
 				continue;
 
-			ret = i915_vma_bind(vma, cache_level, PIN_UPDATE);
+			/* Wait for an earlier async bind */
+			ret = i915_active_wait(&vma->active);
+			if (ret)
+				return ret;
+
+			ret = i915_vma_bind(vma, cache_level, PIN_UPDATE, NULL);
 			if (ret)
 				return ret;
 		}
@@ -389,16 +398,11 @@ int i915_gem_set_caching_ioctl(struct drm_device *dev, void *data,
 	if (ret)
 		goto out;
 
-	ret = mutex_lock_interruptible(&i915->drm.struct_mutex);
-	if (ret)
-		goto out;
-
 	ret = i915_gem_object_lock_interruptible(obj);
 	if (ret == 0) {
 		ret = i915_gem_object_set_cache_level(obj, level);
 		i915_gem_object_unlock(obj);
 	}
-	mutex_unlock(&i915->drm.struct_mutex);
 
 out:
 	i915_gem_object_put(obj);
@@ -483,6 +487,7 @@ static void i915_gem_object_bump_inactive_ggtt(struct drm_i915_gem_object *obj)
 		if (!drm_mm_node_allocated(&vma->node))
 			continue;
 
+		GEM_BUG_ON(vma->vm != &i915->ggtt.vm);
 		list_move_tail(&vma->vm_link, &vma->vm->bound_list);
 	}
 	mutex_unlock(&i915->ggtt.vm.mutex);
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
index 493f07806b08..068c7976d7e9 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
@@ -549,8 +549,11 @@ eb_add_vma(struct i915_execbuffer *eb,
 		eb_unreserve_vma(vma, vma->exec_flags);
 
 		list_add_tail(&vma->exec_link, &eb->unbound);
-		if (drm_mm_node_allocated(&vma->node))
+		if (drm_mm_node_allocated(&vma->node)) {
+			mutex_lock(&vma->vm->mutex);
 			err = i915_vma_unbind(vma);
+			mutex_unlock(&vma->vm->mutex);
+		}
 		if (unlikely(err))
 			vma->exec_flags = NULL;
 	}
@@ -697,7 +700,9 @@ static int eb_reserve(struct i915_execbuffer *eb)
 
 		case 1:
 			/* Too fragmented, unbind everything and retry */
+			mutex_lock(&eb->context->vm->mutex);
 			err = i915_gem_evict_vm(eb->context->vm);
+			mutex_unlock(&eb->context->vm->mutex);
 			if (err)
 				return err;
 			break;
@@ -967,7 +972,9 @@ static void reloc_cache_reset(struct reloc_cache *cache)
 			ggtt->vm.clear_range(&ggtt->vm,
 					     cache->node.start,
 					     cache->node.size);
+			mutex_lock(&ggtt->vm.mutex);
 			drm_mm_remove_node(&cache->node);
+			mutex_unlock(&ggtt->vm.mutex);
 		} else {
 			i915_vma_unpin((struct i915_vma *)cache->node.mm);
 		}
@@ -1042,11 +1049,13 @@ static void *reloc_iomap(struct drm_i915_gem_object *obj,
 					       PIN_NOEVICT);
 		if (IS_ERR(vma)) {
 			memset(&cache->node, 0, sizeof(cache->node));
+			mutex_lock(&ggtt->vm.mutex);
 			err = drm_mm_insert_node_in_range
 				(&ggtt->vm.mm, &cache->node,
 				 PAGE_SIZE, 0, I915_COLOR_UNEVICTABLE,
 				 0, ggtt->mappable_end,
 				 DRM_MM_INSERT_LOW);
+			mutex_unlock(&ggtt->vm.mutex);
 			if (err) /* no inactive aperture space, use cpu reloc */
 				return NULL;
 		} else {
@@ -1388,7 +1397,7 @@ eb_relocate_entry(struct i915_execbuffer *eb,
 		if (reloc->write_domain == I915_GEM_DOMAIN_INSTRUCTION &&
 		    IS_GEN(eb->i915, 6)) {
 			err = i915_vma_bind(target, target->obj->cache_level,
-					    PIN_GLOBAL);
+					    PIN_GLOBAL, NULL);
 			if (WARN_ONCE(err,
 				      "Unexpected failure to bind target VMA!"))
 				return err;
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/i915_gem_mman.c
index 595539a09e38..0440dbfda295 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_mman.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_mman.c
@@ -251,16 +251,6 @@ vm_fault_t i915_gem_fault(struct vm_fault *vmf)
 		goto err_rpm;
 	}
 
-	ret = i915_mutex_lock_interruptible(dev);
-	if (ret)
-		goto err_reset;
-
-	/* Access to snoopable pages through the GTT is incoherent. */
-	if (obj->cache_level != I915_CACHE_NONE && !HAS_LLC(i915)) {
-		ret = -EFAULT;
-		goto err_unlock;
-	}
-
 	/* Now pin it into the GTT as needed */
 	vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0,
 				       PIN_MAPPABLE |
@@ -290,7 +280,13 @@ vm_fault_t i915_gem_fault(struct vm_fault *vmf)
 	}
 	if (IS_ERR(vma)) {
 		ret = PTR_ERR(vma);
-		goto err_unlock;
+		goto err_reset;
+	}
+
+	/* Access to snoopable pages through the GTT is incoherent. */
+	if (obj->cache_level != I915_CACHE_NONE && !HAS_LLC(i915)) {
+		ret = -EFAULT;
+		goto err_unpin;
 	}
 
 	ret = i915_vma_pin_fence(vma);
@@ -319,13 +315,10 @@ vm_fault_t i915_gem_fault(struct vm_fault *vmf)
 				   msecs_to_jiffies_timeout(CONFIG_DRM_I915_USERFAULT_AUTOSUSPEND));
 
 	i915_vma_set_ggtt_write(vma);
-
 err_fence:
 	i915_vma_unpin_fence(vma);
 err_unpin:
 	__i915_vma_unpin(vma);
-err_unlock:
-	mutex_unlock(&dev->struct_mutex);
 err_reset:
 	intel_gt_reset_unlock(ggtt->vm.gt, srcu);
 err_rpm:
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.c b/drivers/gpu/drm/i915/gem/i915_gem_object.c
index 0ef60dae23a7..dbf9be9a79f4 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object.c
@@ -155,21 +155,30 @@ static void __i915_gem_free_objects(struct drm_i915_private *i915,
 
 	wakeref = intel_runtime_pm_get(&i915->runtime_pm);
 	llist_for_each_entry_safe(obj, on, freed, freed) {
-		struct i915_vma *vma, *vn;
-
 		trace_i915_gem_object_destroy(obj);
 
-		mutex_lock(&i915->drm.struct_mutex);
-
-		list_for_each_entry_safe(vma, vn, &obj->vma.list, obj_link) {
-			GEM_BUG_ON(i915_vma_is_active(vma));
-			atomic_and(~I915_VMA_PIN_MASK, &vma->flags);
-			i915_vma_destroy(vma);
+		if (!list_empty(&obj->vma.list)) {
+			struct i915_vma *vma;
+
+			/*
+			 * Note that the vma keeps an object reference while
+			 * it is active, so it *should* not sleep while we
+			 * destroy it. Our debug code errs insits it *might*.
+			 * For the moment, play along.
+			 */
+			spin_lock(&obj->vma.lock);
+			while ((vma = list_first_entry_or_null(&obj->vma.list,
+							       struct i915_vma,
+							       obj_link))) {
+				GEM_BUG_ON(vma->obj != obj);
+				spin_unlock(&obj->vma.lock);
+
+				i915_vma_destroy(vma);
+
+				spin_lock(&obj->vma.lock);
+			}
+			spin_unlock(&obj->vma.lock);
 		}
-		GEM_BUG_ON(!list_empty(&obj->vma.list));
-		GEM_BUG_ON(!RB_EMPTY_ROOT(&obj->vma.tree));
-
-		mutex_unlock(&i915->drm.struct_mutex);
 
 		GEM_BUG_ON(atomic_read(&obj->bind_count));
 		GEM_BUG_ON(obj->userfault_count);
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.h b/drivers/gpu/drm/i915/gem/i915_gem_object.h
index 29b9eddc4c7f..a78af25dce36 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object.h
@@ -106,6 +106,11 @@ static inline void i915_gem_object_lock(struct drm_i915_gem_object *obj)
 	dma_resv_lock(obj->base.resv, NULL);
 }
 
+static inline bool i915_gem_object_trylock(struct drm_i915_gem_object *obj)
+{
+	return dma_resv_trylock(obj->base.resv);
+}
+
 static inline int
 i915_gem_object_lock_interruptible(struct drm_i915_gem_object *obj)
 {
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c b/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c
index d2c05d752909..447ee2b1e222 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c
@@ -16,40 +16,6 @@
 
 #include "i915_trace.h"
 
-static bool shrinker_lock(struct drm_i915_private *i915,
-			  unsigned int flags,
-			  bool *unlock)
-{
-	struct mutex *m = &i915->drm.struct_mutex;
-
-	switch (mutex_trylock_recursive(m)) {
-	case MUTEX_TRYLOCK_RECURSIVE:
-		*unlock = false;
-		return true;
-
-	case MUTEX_TRYLOCK_FAILED:
-		*unlock = false;
-		if (flags & I915_SHRINK_ACTIVE &&
-		    mutex_lock_killable_nested(m, I915_MM_SHRINKER) == 0)
-			*unlock = true;
-		return *unlock;
-
-	case MUTEX_TRYLOCK_SUCCESS:
-		*unlock = true;
-		return true;
-	}
-
-	BUG();
-}
-
-static void shrinker_unlock(struct drm_i915_private *i915, bool unlock)
-{
-	if (!unlock)
-		return;
-
-	mutex_unlock(&i915->drm.struct_mutex);
-}
-
 static bool swap_available(void)
 {
 	return get_nr_swap_pages() > 0;
@@ -155,10 +121,6 @@ i915_gem_shrink(struct drm_i915_private *i915,
 	intel_wakeref_t wakeref = 0;
 	unsigned long count = 0;
 	unsigned long scanned = 0;
-	bool unlock;
-
-	if (!shrinker_lock(i915, shrink, &unlock))
-		return 0;
 
 	/*
 	 * When shrinking the active list, we should also consider active
@@ -268,8 +230,6 @@ i915_gem_shrink(struct drm_i915_private *i915,
 	if (shrink & I915_SHRINK_BOUND)
 		intel_runtime_pm_put(&i915->runtime_pm, wakeref);
 
-	shrinker_unlock(i915, unlock);
-
 	if (nr_scanned)
 		*nr_scanned += scanned;
 	return count;
@@ -339,13 +299,9 @@ i915_gem_shrinker_scan(struct shrinker *shrinker, struct shrink_control *sc)
 	struct drm_i915_private *i915 =
 		container_of(shrinker, struct drm_i915_private, mm.shrinker);
 	unsigned long freed;
-	bool unlock;
 
 	sc->nr_scanned = 0;
 
-	if (!shrinker_lock(i915, 0, &unlock))
-		return SHRINK_STOP;
-
 	freed = i915_gem_shrink(i915,
 				sc->nr_to_scan,
 				&sc->nr_scanned,
@@ -366,8 +322,6 @@ i915_gem_shrinker_scan(struct shrinker *shrinker, struct shrink_control *sc)
 		}
 	}
 
-	shrinker_unlock(i915, unlock);
-
 	return sc->nr_scanned ? freed : SHRINK_STOP;
 }
 
@@ -419,10 +373,6 @@ i915_gem_shrinker_vmap(struct notifier_block *nb, unsigned long event, void *ptr
 	struct i915_vma *vma, *next;
 	unsigned long freed_pages = 0;
 	intel_wakeref_t wakeref;
-	bool unlock;
-
-	if (!shrinker_lock(i915, 0, &unlock))
-		return NOTIFY_DONE;
 
 	with_intel_runtime_pm(&i915->runtime_pm, wakeref)
 		freed_pages += i915_gem_shrink(i915, -1UL, NULL,
@@ -439,15 +389,11 @@ i915_gem_shrinker_vmap(struct notifier_block *nb, unsigned long event, void *ptr
 		if (!vma->iomap || i915_vma_is_active(vma))
 			continue;
 
-		mutex_unlock(&i915->ggtt.vm.mutex);
 		if (i915_vma_unbind(vma) == 0)
 			freed_pages += count;
-		mutex_lock(&i915->ggtt.vm.mutex);
 	}
 	mutex_unlock(&i915->ggtt.vm.mutex);
 
-	shrinker_unlock(i915, unlock);
-
 	*(unsigned long *)ptr += freed_pages;
 	return NOTIFY_DONE;
 }
@@ -490,22 +436,9 @@ void i915_gem_shrinker_taints_mutex(struct drm_i915_private *i915,
 
 	fs_reclaim_acquire(GFP_KERNEL);
 
-	/*
-	 * As we invariably rely on the struct_mutex within the shrinker,
-	 * but have a complicated recursion dance, taint all the mutexes used
-	 * within the shrinker with the struct_mutex. For completeness, we
-	 * taint with all subclass of struct_mutex, even though we should
-	 * only need tainting by I915_MM_NORMAL to catch possible ABBA
-	 * deadlocks from using struct_mutex inside @mutex.
-	 */
-	mutex_acquire(&i915->drm.struct_mutex.dep_map,
-		      I915_MM_SHRINKER, 0, _RET_IP_);
-
 	mutex_acquire(&mutex->dep_map, 0, 0, _RET_IP_);
 	mutex_release(&mutex->dep_map, 0, _RET_IP_);
 
-	mutex_release(&i915->drm.struct_mutex.dep_map, 0, _RET_IP_);
-
 	fs_reclaim_release(GFP_KERNEL);
 
 	if (unlock)
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c
index 0d81de1461b4..158ddd6e735f 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c
@@ -621,8 +621,6 @@ i915_gem_object_create_stolen_for_preallocated(struct drm_i915_private *dev_priv
 	if (!drm_mm_initialized(&dev_priv->mm.stolen))
 		return NULL;
 
-	lockdep_assert_held(&dev_priv->drm.struct_mutex);
-
 	DRM_DEBUG_DRIVER("creating preallocated stolen object: stolen_offset=%pa, gtt_offset=%pa, size=%pa\n",
 			 &stolen_offset, &gtt_offset, &size);
 
@@ -674,11 +672,13 @@ i915_gem_object_create_stolen_for_preallocated(struct drm_i915_private *dev_priv
 	 * setting up the GTT space. The actual reservation will occur
 	 * later.
 	 */
+	mutex_lock(&ggtt->vm.mutex);
 	ret = i915_gem_gtt_reserve(&ggtt->vm, &vma->node,
 				   size, gtt_offset, obj->cache_level,
 				   0);
 	if (ret) {
 		DRM_DEBUG_DRIVER("failed to allocate stolen GTT space\n");
+		mutex_unlock(&ggtt->vm.mutex);
 		goto err_pages;
 	}
 
@@ -688,7 +688,6 @@ i915_gem_object_create_stolen_for_preallocated(struct drm_i915_private *dev_priv
 	set_bit(I915_VMA_GLOBAL_BIND_BIT, __i915_vma_flags(vma));
 	__i915_vma_set_map_and_fenceable(vma);
 
-	mutex_lock(&ggtt->vm.mutex);
 	list_add_tail(&vma->vm_link, &ggtt->vm.bound_list);
 	mutex_unlock(&ggtt->vm.mutex);
 
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_tiling.c b/drivers/gpu/drm/i915/gem/i915_gem_tiling.c
index ca0c2f451742..b9cfae0e4435 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_tiling.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_tiling.c
@@ -181,22 +181,25 @@ static int
 i915_gem_object_fence_prepare(struct drm_i915_gem_object *obj,
 			      int tiling_mode, unsigned int stride)
 {
+	struct i915_ggtt *ggtt = &to_i915(obj->base.dev)->ggtt;
 	struct i915_vma *vma;
-	int ret;
+	int ret = 0;
 
 	if (tiling_mode == I915_TILING_NONE)
 		return 0;
 
+	mutex_lock(&ggtt->vm.mutex);
 	for_each_ggtt_vma(vma, obj) {
 		if (i915_vma_fence_prepare(vma, tiling_mode, stride))
 			continue;
 
 		ret = i915_vma_unbind(vma);
 		if (ret)
-			return ret;
+			break;
 	}
+	mutex_unlock(&ggtt->vm.mutex);
 
-	return 0;
+	return ret;
 }
 
 int
@@ -212,7 +215,6 @@ i915_gem_object_set_tiling(struct drm_i915_gem_object *obj,
 
 	GEM_BUG_ON(!i915_tiling_ok(obj, tiling, stride));
 	GEM_BUG_ON(!stride ^ (tiling == I915_TILING_NONE));
-	lockdep_assert_held(&i915->drm.struct_mutex);
 
 	if ((tiling | stride) == obj->tiling_and_stride)
 		return 0;
@@ -364,12 +366,7 @@ i915_gem_set_tiling_ioctl(struct drm_device *dev, void *data,
 		}
 	}
 
-	err = mutex_lock_interruptible(&dev->struct_mutex);
-	if (err)
-		goto err;
-
 	err = i915_gem_object_set_tiling(obj, args->tiling_mode, args->stride);
-	mutex_unlock(&dev->struct_mutex);
 
 	/* We have to maintain this existing ABI... */
 	args->stride = i915_gem_object_get_stride(obj);
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_userptr.c b/drivers/gpu/drm/i915/gem/i915_gem_userptr.c
index 74da35611d7c..cd36236e3faf 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_userptr.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_userptr.c
@@ -92,7 +92,6 @@ userptr_mn_invalidate_range_start(struct mmu_notifier *_mn,
 	struct i915_mmu_notifier *mn =
 		container_of(_mn, struct i915_mmu_notifier, mn);
 	struct interval_tree_node *it;
-	struct mutex *unlock = NULL;
 	unsigned long end;
 	int ret = 0;
 
@@ -129,33 +128,13 @@ userptr_mn_invalidate_range_start(struct mmu_notifier *_mn,
 		}
 		spin_unlock(&mn->lock);
 
-		if (!unlock) {
-			unlock = &mn->mm->i915->drm.struct_mutex;
-
-			switch (mutex_trylock_recursive(unlock)) {
-			default:
-			case MUTEX_TRYLOCK_FAILED:
-				if (mutex_lock_killable_nested(unlock, I915_MM_SHRINKER)) {
-					i915_gem_object_put(obj);
-					return -EINTR;
-				}
-				/* fall through */
-			case MUTEX_TRYLOCK_SUCCESS:
-				break;
-
-			case MUTEX_TRYLOCK_RECURSIVE:
-				unlock = ERR_PTR(-EEXIST);
-				break;
-			}
-		}
-
 		ret = i915_gem_object_unbind(obj,
 					     I915_GEM_OBJECT_UNBIND_ACTIVE);
 		if (ret == 0)
 			ret = __i915_gem_object_put_pages(obj, I915_MM_SHRINKER);
 		i915_gem_object_put(obj);
 		if (ret)
-			goto unlock;
+			return ret;
 
 		spin_lock(&mn->lock);
 
@@ -168,10 +147,6 @@ userptr_mn_invalidate_range_start(struct mmu_notifier *_mn,
 	}
 	spin_unlock(&mn->lock);
 
-unlock:
-	if (!IS_ERR_OR_NULL(unlock))
-		mutex_unlock(unlock);
-
 	return ret;
 
 }
diff --git a/drivers/gpu/drm/i915/gem/selftests/huge_pages.c b/drivers/gpu/drm/i915/gem/selftests/huge_pages.c
index c5cea4379216..cd771147b41f 100644
--- a/drivers/gpu/drm/i915/gem/selftests/huge_pages.c
+++ b/drivers/gpu/drm/i915/gem/selftests/huge_pages.c
@@ -25,6 +25,17 @@ static const unsigned int page_sizes[] = {
 	I915_GTT_PAGE_SIZE_4K,
 };
 
+static int unlocked_vma_unbind(struct i915_vma *vma)
+{
+	int ret;
+
+	mutex_lock(&vma->vm->mutex);
+	ret = i915_vma_unbind(vma);
+	mutex_unlock(&vma->vm->mutex);
+
+	return ret;
+}
+
 static unsigned int get_largest_page_size(struct drm_i915_private *i915,
 					  u64 rem)
 {
@@ -333,7 +344,11 @@ static int igt_check_page_sizes(struct i915_vma *vma)
 	struct drm_i915_private *i915 = vma->vm->i915;
 	unsigned int supported = INTEL_INFO(i915)->page_sizes;
 	struct drm_i915_gem_object *obj = vma->obj;
-	int err = 0;
+	int err;
+
+	err = i915_active_wait(&vma->active);
+	if (err)
+		return err;
 
 	if (!HAS_PAGE_SIZES(i915, vma->page_sizes.sg)) {
 		pr_err("unsupported page_sizes.sg=%u, supported=%u\n",
@@ -526,7 +541,7 @@ static int igt_mock_ppgtt_misaligned_dma(void *arg)
 		 * pages.
 		 */
 		for (offset = 4096; offset < page_size; offset += 4096) {
-			err = i915_vma_unbind(vma);
+			err = unlocked_vma_unbind(vma);
 			if (err) {
 				i915_vma_close(vma);
 				goto out_unpin;
@@ -941,7 +956,7 @@ static int __igt_write_huge(struct intel_context *ce,
 	if (IS_ERR(vma))
 		return PTR_ERR(vma);
 
-	err = i915_vma_unbind(vma);
+	err = unlocked_vma_unbind(vma);
 	if (err)
 		goto out_vma_close;
 
@@ -1390,7 +1405,7 @@ static int igt_ppgtt_pin_update(void *arg)
 			goto out_unpin;
 		}
 
-		err = i915_vma_bind(vma, I915_CACHE_NONE, PIN_UPDATE);
+		err = i915_vma_bind(vma, I915_CACHE_NONE, PIN_UPDATE, NULL);
 		if (err)
 			goto out_unpin;
 
diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c
index 1d27babff0ce..9c217dfe96a9 100644
--- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c
+++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c
@@ -205,7 +205,6 @@ static int igt_partial_tiling(void *arg)
 		goto out;
 	}
 
-	mutex_lock(&i915->drm.struct_mutex);
 	wakeref = intel_runtime_pm_get(&i915->runtime_pm);
 
 	if (1) {
@@ -318,7 +317,6 @@ next_tiling: ;
 
 out_unlock:
 	intel_runtime_pm_put(&i915->runtime_pm, wakeref);
-	mutex_unlock(&i915->drm.struct_mutex);
 	i915_gem_object_unpin_pages(obj);
 out:
 	i915_gem_object_put(obj);
diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c
index d48ec9a76ed1..c2afffb94474 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt.c
@@ -207,11 +207,12 @@ void intel_gt_flush_ggtt_writes(struct intel_gt *gt)
 
 	with_intel_runtime_pm(&i915->runtime_pm, wakeref) {
 		struct intel_uncore *uncore = gt->uncore;
+		unsigned long flags;
 
-		spin_lock_irq(&uncore->lock);
+		spin_lock_irqsave(&uncore->lock, flags);
 		intel_uncore_posting_read_fw(uncore,
 					     RING_HEAD(RENDER_RING_BASE));
-		spin_unlock_irq(&uncore->lock);
+		spin_unlock_irqrestore(&uncore->lock, flags);
 	}
 }
 
diff --git a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c
index a0098fc35921..e53eea1050f8 100644
--- a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c
+++ b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c
@@ -1127,15 +1127,14 @@ static int evict_vma(void *data)
 {
 	struct evict_vma *arg = data;
 	struct i915_address_space *vm = arg->vma->vm;
-	struct drm_i915_private *i915 = vm->i915;
 	struct drm_mm_node evict = arg->vma->node;
 	int err;
 
 	complete(&arg->completion);
 
-	mutex_lock(&i915->drm.struct_mutex);
+	mutex_lock(&vm->mutex);
 	err = i915_gem_evict_for_node(vm, &evict, 0);
-	mutex_unlock(&i915->drm.struct_mutex);
+	mutex_unlock(&vm->mutex);
 
 	return err;
 }
@@ -1143,39 +1142,33 @@ static int evict_vma(void *data)
 static int evict_fence(void *data)
 {
 	struct evict_vma *arg = data;
-	struct drm_i915_private *i915 = arg->vma->vm->i915;
 	int err;
 
 	complete(&arg->completion);
 
-	mutex_lock(&i915->drm.struct_mutex);
-
 	/* Mark the fence register as dirty to force the mmio update. */
 	err = i915_gem_object_set_tiling(arg->vma->obj, I915_TILING_Y, 512);
 	if (err) {
 		pr_err("Invalid Y-tiling settings; err:%d\n", err);
-		goto out_unlock;
+		return err;
 	}
 
 	err = i915_vma_pin(arg->vma, 0, 0, PIN_GLOBAL | PIN_MAPPABLE);
 	if (err) {
 		pr_err("Unable to pin vma for Y-tiled fence; err:%d\n", err);
-		goto out_unlock;
+		return err;
 	}
 
 	err = i915_vma_pin_fence(arg->vma);
 	i915_vma_unpin(arg->vma);
 	if (err) {
 		pr_err("Unable to pin Y-tiled fence; err:%d\n", err);
-		goto out_unlock;
+		return err;
 	}
 
 	i915_vma_unpin_fence(arg->vma);
 
-out_unlock:
-	mutex_unlock(&i915->drm.struct_mutex);
-
-	return err;
+	return 0;
 }
 
 static int __igt_reset_evict_vma(struct intel_gt *gt,
diff --git a/drivers/gpu/drm/i915/gvt/aperture_gm.c b/drivers/gpu/drm/i915/gvt/aperture_gm.c
index 5ff2437b2998..d996bbc7ea59 100644
--- a/drivers/gpu/drm/i915/gvt/aperture_gm.c
+++ b/drivers/gpu/drm/i915/gvt/aperture_gm.c
@@ -61,14 +61,14 @@ static int alloc_gm(struct intel_vgpu *vgpu, bool high_gm)
 		flags = PIN_MAPPABLE;
 	}
 
-	mutex_lock(&dev_priv->drm.struct_mutex);
+	mutex_lock(&dev_priv->ggtt.vm.mutex);
 	mmio_hw_access_pre(dev_priv);
 	ret = i915_gem_gtt_insert(&dev_priv->ggtt.vm, node,
 				  size, I915_GTT_PAGE_SIZE,
 				  I915_COLOR_UNEVICTABLE,
 				  start, end, flags);
 	mmio_hw_access_post(dev_priv);
-	mutex_unlock(&dev_priv->drm.struct_mutex);
+	mutex_unlock(&dev_priv->ggtt.vm.mutex);
 	if (ret)
 		gvt_err("fail to alloc %s gm space from host\n",
 			high_gm ? "high" : "low");
@@ -98,9 +98,9 @@ static int alloc_vgpu_gm(struct intel_vgpu *vgpu)
 
 	return 0;
 out_free_aperture:
-	mutex_lock(&dev_priv->drm.struct_mutex);
+	mutex_lock(&dev_priv->ggtt.vm.mutex);
 	drm_mm_remove_node(&vgpu->gm.low_gm_node);
-	mutex_unlock(&dev_priv->drm.struct_mutex);
+	mutex_unlock(&dev_priv->ggtt.vm.mutex);
 	return ret;
 }
 
@@ -108,10 +108,10 @@ static void free_vgpu_gm(struct intel_vgpu *vgpu)
 {
 	struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv;
 
-	mutex_lock(&dev_priv->drm.struct_mutex);
+	mutex_lock(&dev_priv->ggtt.vm.mutex);
 	drm_mm_remove_node(&vgpu->gm.low_gm_node);
 	drm_mm_remove_node(&vgpu->gm.high_gm_node);
-	mutex_unlock(&dev_priv->drm.struct_mutex);
+	mutex_unlock(&dev_priv->ggtt.vm.mutex);
 }
 
 /**
diff --git a/drivers/gpu/drm/i915/i915_active.c b/drivers/gpu/drm/i915/i915_active.c
index 48e16ad93bbd..5e9f826ff132 100644
--- a/drivers/gpu/drm/i915/i915_active.c
+++ b/drivers/gpu/drm/i915/i915_active.c
@@ -142,6 +142,7 @@ __active_retire(struct i915_active *ref)
 	if (!retire)
 		return;
 
+	GEM_BUG_ON(rcu_access_pointer(ref->excl));
 	rbtree_postorder_for_each_entry_safe(it, n, &root, node) {
 		GEM_BUG_ON(i915_active_request_isset(&it->base));
 		kmem_cache_free(global.slab_cache, it);
@@ -241,6 +242,8 @@ void __i915_active_init(struct drm_i915_private *i915,
 	ref->flags = 0;
 	ref->active = active;
 	ref->retire = retire;
+
+	ref->excl = NULL;
 	ref->tree = RB_ROOT;
 	ref->cache = NULL;
 	init_llist_head(&ref->preallocated_barriers);
@@ -337,6 +340,45 @@ int i915_active_ref(struct i915_active *ref,
 	return err;
 }
 
+static void excl_cb(struct dma_fence *f, struct dma_fence_cb *cb)
+{
+	struct i915_active *ref = container_of(cb, typeof(*ref), excl_cb);
+
+	RCU_INIT_POINTER(ref->excl, NULL);
+	dma_fence_put(f);
+
+	active_retire(ref);
+}
+
+void i915_active_set_exclusive(struct i915_active *ref, struct dma_fence *f)
+{
+	GEM_BUG_ON(i915_active_is_idle(ref));
+
+	dma_fence_get(f);
+
+	rcu_read_lock();
+	if (rcu_access_pointer(ref->excl)) {
+		struct dma_fence *old;
+
+		old = dma_fence_get_rcu_safe(&ref->excl);
+		if (old) {
+			if (dma_fence_remove_callback(old, &ref->excl_cb))
+				atomic_dec(&ref->count);
+			dma_fence_put(old);
+		}
+	}
+	rcu_read_unlock();
+
+	atomic_inc(&ref->count);
+	rcu_assign_pointer(ref->excl, f);
+
+	if (dma_fence_add_callback(f, &ref->excl_cb, excl_cb)) {
+		RCU_INIT_POINTER(ref->excl, NULL);
+		atomic_dec(&ref->count);
+		dma_fence_put(f);
+	}
+}
+
 int i915_active_acquire(struct i915_active *ref)
 {
 	int err;
@@ -395,6 +437,25 @@ void i915_active_ungrab(struct i915_active *ref)
 	__active_ungrab(ref);
 }
 
+static int excl_wait(struct i915_active *ref)
+{
+	struct dma_fence *old;
+	int err = 0;
+
+	if (!rcu_access_pointer(ref->excl))
+		return 0;
+
+	rcu_read_lock();
+	old = dma_fence_get_rcu_safe(&ref->excl);
+	rcu_read_unlock();
+	if (old) {
+		err = dma_fence_wait(old, true);
+		dma_fence_put(old);
+	}
+
+	return err;
+}
+
 int i915_active_wait(struct i915_active *ref)
 {
 	struct active_node *it, *n;
@@ -415,6 +476,10 @@ int i915_active_wait(struct i915_active *ref)
 		return 0;
 	}
 
+	err = excl_wait(ref);
+	if (err)
+		goto out;
+
 	rbtree_postorder_for_each_entry_safe(it, n, &ref->tree, node) {
 		if (is_barrier(&it->base)) { /* unconnected idle-barrier */
 			err = -EBUSY;
@@ -426,6 +491,7 @@ int i915_active_wait(struct i915_active *ref)
 			break;
 	}
 
+out:
 	__active_retire(ref);
 	if (err)
 		return err;
diff --git a/drivers/gpu/drm/i915/i915_active.h b/drivers/gpu/drm/i915/i915_active.h
index f95058f99057..bc1c9d5d4f63 100644
--- a/drivers/gpu/drm/i915/i915_active.h
+++ b/drivers/gpu/drm/i915/i915_active.h
@@ -373,6 +373,8 @@ int i915_active_ref(struct i915_active *ref,
 		    struct intel_timeline *tl,
 		    struct i915_request *rq);
 
+void i915_active_set_exclusive(struct i915_active *ref, struct dma_fence *f);
+
 int i915_active_wait(struct i915_active *ref);
 
 int i915_request_await_active(struct i915_request *rq,
diff --git a/drivers/gpu/drm/i915/i915_active_types.h b/drivers/gpu/drm/i915/i915_active_types.h
index 1854e7d168c1..86e7a232ea3c 100644
--- a/drivers/gpu/drm/i915/i915_active_types.h
+++ b/drivers/gpu/drm/i915/i915_active_types.h
@@ -8,6 +8,7 @@
 #define _I915_ACTIVE_TYPES_H_
 
 #include <linux/atomic.h>
+#include <linux/dma-fence.h>
 #include <linux/llist.h>
 #include <linux/mutex.h>
 #include <linux/rbtree.h>
@@ -51,6 +52,10 @@ struct i915_active {
 	struct mutex mutex;
 	atomic_t count;
 
+	/* Preallocated "exclusive" node */
+	struct dma_fence __rcu *excl;
+	struct dma_fence_cb excl_cb;
+
 	unsigned long flags;
 #define I915_ACTIVE_GRAB_BIT 0
 
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 814f62fca727..e30542e5408d 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -62,20 +62,31 @@
 #include "intel_pm.h"
 
 static int
-insert_mappable_node(struct i915_ggtt *ggtt,
-                     struct drm_mm_node *node, u32 size)
+insert_mappable_node(struct i915_ggtt *ggtt, struct drm_mm_node *node, u32 size)
 {
+	int err;
+
+	err = mutex_lock_interruptible(&ggtt->vm.mutex);
+	if (err)
+		return err;
+
 	memset(node, 0, sizeof(*node));
-	return drm_mm_insert_node_in_range(&ggtt->vm.mm, node,
-					   size, 0, I915_COLOR_UNEVICTABLE,
-					   0, ggtt->mappable_end,
-					   DRM_MM_INSERT_LOW);
+	err = drm_mm_insert_node_in_range(&ggtt->vm.mm, node,
+					  size, 0, I915_COLOR_UNEVICTABLE,
+					  0, ggtt->mappable_end,
+					  DRM_MM_INSERT_LOW);
+
+	mutex_unlock(&ggtt->vm.mutex);
+
+	return err;
 }
 
 static void
-remove_mappable_node(struct drm_mm_node *node)
+remove_mappable_node(struct i915_ggtt *ggtt, struct drm_mm_node *node)
 {
+	mutex_lock(&ggtt->vm.mutex);
 	drm_mm_remove_node(node);
+	mutex_unlock(&ggtt->vm.mutex);
 }
 
 int
@@ -87,7 +98,8 @@ i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data,
 	struct i915_vma *vma;
 	u64 pinned;
 
-	mutex_lock(&ggtt->vm.mutex);
+	if (mutex_lock_interruptible(&ggtt->vm.mutex))
+		return -EINTR;
 
 	pinned = ggtt->vm.reserved;
 	list_for_each_entry(vma, &ggtt->vm.bound_list, vm_link)
@@ -109,8 +121,6 @@ int i915_gem_object_unbind(struct drm_i915_gem_object *obj,
 	LIST_HEAD(still_in_list);
 	int ret = 0;
 
-	lockdep_assert_held(&obj->base.dev->struct_mutex);
-
 	spin_lock(&obj->vma.lock);
 	while (!ret && (vma = list_first_entry_or_null(&obj->vma.list,
 						       struct i915_vma,
@@ -120,8 +130,15 @@ int i915_gem_object_unbind(struct drm_i915_gem_object *obj,
 
 		ret = -EBUSY;
 		if (flags & I915_GEM_OBJECT_UNBIND_ACTIVE ||
-		    !i915_vma_is_active(vma))
-			ret = i915_vma_unbind(vma);
+		    !i915_vma_is_active(vma)) {
+			struct i915_address_space *vm = vma->vm;
+
+			ret = mutex_lock_interruptible(&vm->mutex);
+			if (!ret) {
+				ret = i915_vma_unbind(vma);
+				mutex_unlock(&vm->mutex);
+			}
+		}
 
 		spin_lock(&obj->vma.lock);
 	}
@@ -338,10 +355,6 @@ i915_gem_gtt_pread(struct drm_i915_gem_object *obj,
 	u64 remain, offset;
 	int ret;
 
-	ret = mutex_lock_interruptible(&i915->drm.struct_mutex);
-	if (ret)
-		return ret;
-
 	wakeref = intel_runtime_pm_get(&i915->runtime_pm);
 	vma = ERR_PTR(-ENODEV);
 	if (!i915_gem_object_is_tiled(obj))
@@ -355,12 +368,10 @@ i915_gem_gtt_pread(struct drm_i915_gem_object *obj,
 	} else {
 		ret = insert_mappable_node(ggtt, &node, PAGE_SIZE);
 		if (ret)
-			goto out_unlock;
+			goto out_rpm;
 		GEM_BUG_ON(!drm_mm_node_allocated(&node));
 	}
 
-	mutex_unlock(&i915->drm.struct_mutex);
-
 	ret = i915_gem_object_lock_interruptible(obj);
 	if (ret)
 		goto out_unpin;
@@ -414,17 +425,14 @@ i915_gem_gtt_pread(struct drm_i915_gem_object *obj,
 
 	i915_gem_object_unlock_fence(obj, fence);
 out_unpin:
-	mutex_lock(&i915->drm.struct_mutex);
 	if (drm_mm_node_allocated(&node)) {
 		ggtt->vm.clear_range(&ggtt->vm, node.start, node.size);
-		remove_mappable_node(&node);
+		remove_mappable_node(ggtt, &node);
 	} else {
 		i915_vma_unpin(vma);
 	}
-out_unlock:
+out_rpm:
 	intel_runtime_pm_put(&i915->runtime_pm, wakeref);
-	mutex_unlock(&i915->drm.struct_mutex);
-
 	return ret;
 }
 
@@ -531,10 +539,6 @@ i915_gem_gtt_pwrite_fast(struct drm_i915_gem_object *obj,
 	void __user *user_data;
 	int ret;
 
-	ret = mutex_lock_interruptible(&i915->drm.struct_mutex);
-	if (ret)
-		return ret;
-
 	if (i915_gem_object_has_struct_page(obj)) {
 		/*
 		 * Avoid waking the device up if we can fallback, as
@@ -544,10 +548,8 @@ i915_gem_gtt_pwrite_fast(struct drm_i915_gem_object *obj,
 		 * using the cache bypass of indirect GGTT access.
 		 */
 		wakeref = intel_runtime_pm_get_if_in_use(rpm);
-		if (!wakeref) {
-			ret = -EFAULT;
-			goto out_unlock;
-		}
+		if (!wakeref)
+			return -EFAULT;
 	} else {
 		/* No backing pages, no fallback, we must force GGTT access */
 		wakeref = intel_runtime_pm_get(rpm);
@@ -569,8 +571,6 @@ i915_gem_gtt_pwrite_fast(struct drm_i915_gem_object *obj,
 		GEM_BUG_ON(!drm_mm_node_allocated(&node));
 	}
 
-	mutex_unlock(&i915->drm.struct_mutex);
-
 	ret = i915_gem_object_lock_interruptible(obj);
 	if (ret)
 		goto out_unpin;
@@ -634,18 +634,15 @@ i915_gem_gtt_pwrite_fast(struct drm_i915_gem_object *obj,
 
 	i915_gem_object_unlock_fence(obj, fence);
 out_unpin:
-	mutex_lock(&i915->drm.struct_mutex);
 	intel_gt_flush_ggtt_writes(ggtt->vm.gt);
 	if (drm_mm_node_allocated(&node)) {
 		ggtt->vm.clear_range(&ggtt->vm, node.start, node.size);
-		remove_mappable_node(&node);
+		remove_mappable_node(ggtt, &node);
 	} else {
 		i915_vma_unpin(vma);
 	}
 out_rpm:
 	intel_runtime_pm_put(rpm, wakeref);
-out_unlock:
-	mutex_unlock(&i915->drm.struct_mutex);
 	return ret;
 }
 
@@ -967,8 +964,6 @@ i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj,
 	struct i915_vma *vma;
 	int ret;
 
-	lockdep_assert_held(&obj->base.dev->struct_mutex);
-
 	if (flags & PIN_MAPPABLE &&
 	    (!view || view->type == I915_GGTT_VIEW_NORMAL)) {
 		/* If the required space is larger than the available
@@ -1022,7 +1017,9 @@ i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj,
 		     i915_ggtt_offset(vma), alignment,
 		     !!(flags & PIN_MAPPABLE),
 		     i915_vma_is_map_and_fenceable(vma));
+		mutex_lock(&vma->vm->mutex);
 		ret = i915_vma_unbind(vma);
+		mutex_unlock(&vma->vm->mutex);
 		if (ret)
 			return ERR_PTR(ret);
 	}
@@ -1328,7 +1325,9 @@ static int __intel_engines_record_defaults(struct drm_i915_private *i915)
 		 * from the GTT to prevent such accidents and reclaim the
 		 * space.
 		 */
+		mutex_lock(&state->vm->mutex);
 		err = i915_vma_unbind(state);
+		mutex_unlock(&state->vm->mutex);
 		if (err)
 			goto out;
 
diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c b/drivers/gpu/drm/i915/i915_gem_evict.c
index 7abcac3b5e2e..44f5b638fa43 100644
--- a/drivers/gpu/drm/i915/i915_gem_evict.c
+++ b/drivers/gpu/drm/i915/i915_gem_evict.c
@@ -47,8 +47,7 @@ static int ggtt_flush(struct drm_i915_private *i915)
 	 * bound by their active reference.
 	 */
 	return i915_gem_wait_for_idle(i915,
-				      I915_WAIT_INTERRUPTIBLE |
-				      I915_WAIT_LOCKED,
+				      I915_WAIT_INTERRUPTIBLE,
 				      MAX_SCHEDULE_TIMEOUT);
 }
 
@@ -104,7 +103,7 @@ i915_gem_evict_something(struct i915_address_space *vm,
 	struct i915_vma *active;
 	int ret;
 
-	lockdep_assert_held(&vm->i915->drm.struct_mutex);
+	lockdep_assert_held(&vm->mutex);
 	trace_i915_gem_evict(vm, min_size, alignment, flags);
 
 	/*
@@ -127,15 +126,6 @@ i915_gem_evict_something(struct i915_address_space *vm,
 				    min_size, alignment, cache_level,
 				    start, end, mode);
 
-	/*
-	 * Retire before we search the active list. Although we have
-	 * reasonable accuracy in our retirement lists, we may have
-	 * a stray pin (preventing eviction) that can only be resolved by
-	 * retiring.
-	 */
-	if (!(flags & PIN_NONBLOCK))
-		i915_retire_requests(dev_priv);
-
 search_again:
 	active = NULL;
 	INIT_LIST_HEAD(&eviction_list);
@@ -269,7 +259,7 @@ int i915_gem_evict_for_node(struct i915_address_space *vm,
 	bool check_color;
 	int ret = 0;
 
-	lockdep_assert_held(&vm->i915->drm.struct_mutex);
+	lockdep_assert_held(&vm->mutex);
 	GEM_BUG_ON(!IS_ALIGNED(start, I915_GTT_PAGE_SIZE));
 	GEM_BUG_ON(!IS_ALIGNED(end, I915_GTT_PAGE_SIZE));
 
@@ -375,7 +365,7 @@ int i915_gem_evict_vm(struct i915_address_space *vm)
 	struct i915_vma *vma, *next;
 	int ret;
 
-	lockdep_assert_held(&vm->i915->drm.struct_mutex);
+	lockdep_assert_held(&vm->mutex);
 	trace_i915_gem_evict_vm(vm);
 
 	/* Switch back to the default context in order to unpin
@@ -390,7 +380,6 @@ int i915_gem_evict_vm(struct i915_address_space *vm)
 	}
 
 	INIT_LIST_HEAD(&eviction_list);
-	mutex_lock(&vm->mutex);
 	list_for_each_entry(vma, &vm->bound_list, vm_link) {
 		if (i915_vma_is_pinned(vma))
 			continue;
@@ -398,7 +387,6 @@ int i915_gem_evict_vm(struct i915_address_space *vm)
 		__i915_vma_pin(vma);
 		list_add(&vma->evict_link, &eviction_list);
 	}
-	mutex_unlock(&vm->mutex);
 
 	ret = 0;
 	list_for_each_entry_safe(vma, next, &eviction_list, evict_link) {
diff --git a/drivers/gpu/drm/i915/i915_gem_fence_reg.c b/drivers/gpu/drm/i915/i915_gem_fence_reg.c
index 615a9f4ef30c..1e93f2aafc80 100644
--- a/drivers/gpu/drm/i915/i915_gem_fence_reg.c
+++ b/drivers/gpu/drm/i915/i915_gem_fence_reg.c
@@ -331,13 +331,15 @@ static struct i915_fence_reg *fence_find(struct drm_i915_private *i915)
 	return ERR_PTR(-EDEADLK);
 }
 
-static int __i915_vma_pin_fence(struct i915_vma *vma)
+int __i915_vma_pin_fence(struct i915_vma *vma)
 {
 	struct i915_ggtt *ggtt = i915_vm_to_ggtt(vma->vm);
 	struct i915_fence_reg *fence;
 	struct i915_vma *set = i915_gem_object_is_tiled(vma->obj) ? vma : NULL;
 	int err;
 
+	lockdep_assert_held(&vma->vm->mutex);
+
 	/* Just update our place in the LRU if our fence is getting reused. */
 	if (vma->fence) {
 		fence = vma->fence;
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 10545c64e27f..c904b7885523 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -150,16 +150,18 @@ static void gmch_ggtt_invalidate(struct i915_ggtt *ggtt)
 
 static int ppgtt_bind_vma(struct i915_vma *vma,
 			  enum i915_cache_level cache_level,
-			  u32 unused)
+			  u32 flags)
 {
 	u32 pte_flags;
 	int err;
 
-	if (!i915_vma_is_bound(vma, I915_VMA_LOCAL_BIND)) {
+	if (flags & I915_VMA_ALLOC) {
 		err = vma->vm->allocate_va_range(vma->vm,
 						 vma->node.start, vma->size);
 		if (err)
 			return err;
+
+		set_bit(I915_VMA_ALLOC_BIT, __i915_vma_flags(vma));
 	}
 
 	/* Applicable to VLV, and gen8+ */
@@ -167,6 +169,7 @@ static int ppgtt_bind_vma(struct i915_vma *vma,
 	if (i915_gem_object_is_readonly(vma->obj))
 		pte_flags |= PTE_READ_ONLY;
 
+	GEM_BUG_ON(!test_bit(I915_VMA_ALLOC_BIT, __i915_vma_flags(vma)));
 	vma->vm->insert_entries(vma->vm, vma, cache_level, pte_flags);
 	wmb();
 
@@ -175,7 +178,8 @@ static int ppgtt_bind_vma(struct i915_vma *vma,
 
 static void ppgtt_unbind_vma(struct i915_vma *vma)
 {
-	vma->vm->clear_range(vma->vm, vma->node.start, vma->size);
+	if (test_and_clear_bit(I915_VMA_ALLOC_BIT, __i915_vma_flags(vma)))
+		vma->vm->clear_range(vma->vm, vma->node.start, vma->size);
 }
 
 static int ppgtt_set_pages(struct i915_vma *vma)
@@ -503,15 +507,25 @@ static void i915_address_space_fini(struct i915_address_space *vm)
 	mutex_destroy(&vm->mutex);
 }
 
-static void ppgtt_destroy_vma(struct i915_address_space *vm)
+void __i915_vm_close(struct i915_address_space *vm)
 {
 	struct i915_vma *vma, *vn;
 
-	mutex_lock(&vm->i915->drm.struct_mutex);
-	list_for_each_entry_safe(vma, vn, &vm->bound_list, vm_link)
+	mutex_lock(&vm->mutex);
+	list_for_each_entry_safe(vma, vn, &vm->bound_list, vm_link) {
+		struct drm_i915_gem_object *obj = vma->obj;
+
+		if (!kref_get_unless_zero(&obj->base.refcount))
+			continue;
+
+		atomic_and(~I915_VMA_PIN_MASK, &vma->flags);
+		WARN_ON(i915_vma_unbind(vma));
 		i915_vma_destroy(vma);
+
+		i915_gem_object_put(obj);
+	}
 	GEM_BUG_ON(!list_empty(&vm->bound_list));
-	mutex_unlock(&vm->i915->drm.struct_mutex);
+	mutex_unlock(&vm->mutex);
 }
 
 static void __i915_vm_release(struct work_struct *work)
@@ -519,8 +533,6 @@ static void __i915_vm_release(struct work_struct *work)
 	struct i915_address_space *vm =
 		container_of(work, struct i915_address_space, rcu.work);
 
-	ppgtt_destroy_vma(vm);
-
 	vm->cleanup(vm);
 	i915_address_space_fini(vm);
 
@@ -535,7 +547,6 @@ void i915_vm_release(struct kref *kref)
 	GEM_BUG_ON(i915_is_ggtt(vm));
 	trace_i915_ppgtt_release(vm);
 
-	vm->closed = true;
 	queue_rcu_work(vm->i915->wq, &vm->rcu);
 }
 
@@ -543,6 +554,7 @@ static void i915_address_space_init(struct i915_address_space *vm, int subclass)
 {
 	kref_init(&vm->ref);
 	INIT_RCU_WORK(&vm->rcu, __i915_vm_release);
+	atomic_set(&vm->open, 1);
 
 	/*
 	 * The vm->mutex must be reclaim safe (for use in the shrinker).
@@ -1769,12 +1781,8 @@ static void gen6_ppgtt_free_pd(struct gen6_ppgtt *ppgtt)
 static void gen6_ppgtt_cleanup(struct i915_address_space *vm)
 {
 	struct gen6_ppgtt *ppgtt = to_gen6_ppgtt(i915_vm_to_ppgtt(vm));
-	struct drm_i915_private *i915 = vm->i915;
 
-	/* FIXME remove the struct_mutex to bring the locking under control */
-	mutex_lock(&i915->drm.struct_mutex);
 	i915_vma_destroy(ppgtt->vma);
-	mutex_unlock(&i915->drm.struct_mutex);
 
 	gen6_ppgtt_free_pd(ppgtt);
 	free_scratch(vm);
@@ -1861,7 +1869,7 @@ static struct i915_vma *pd_vma_create(struct gen6_ppgtt *ppgtt, int size)
 
 	i915_active_init(i915, &vma->active, NULL, NULL);
 
-	vma->vm = &ggtt->vm;
+	vma->vm = i915_vm_get(&ggtt->vm);
 	vma->ops = &pd_vma_ops;
 	vma->private = ppgtt;
 
@@ -1881,7 +1889,7 @@ int gen6_ppgtt_pin(struct i915_ppgtt *base)
 	struct gen6_ppgtt *ppgtt = to_gen6_ppgtt(base);
 	int err;
 
-	GEM_BUG_ON(ppgtt->base.vm.closed);
+	GEM_BUG_ON(!atomic_read(&ppgtt->base.vm.open));
 
 	/*
 	 * Workaround the limited maximum vma->pin_count and the aliasing_ppgtt
@@ -2457,14 +2465,18 @@ static int aliasing_gtt_bind_vma(struct i915_vma *vma,
 	if (flags & I915_VMA_LOCAL_BIND) {
 		struct i915_ppgtt *alias = i915_vm_to_ggtt(vma->vm)->alias;
 
-		if (!i915_vma_is_bound(vma, I915_VMA_LOCAL_BIND)) {
+		if (flags & I915_VMA_ALLOC) {
 			ret = alias->vm.allocate_va_range(&alias->vm,
 							  vma->node.start,
 							  vma->size);
 			if (ret)
 				return ret;
+
+			set_bit(I915_VMA_ALLOC_BIT, __i915_vma_flags(vma));
 		}
 
+		GEM_BUG_ON(!test_bit(I915_VMA_ALLOC_BIT,
+				     __i915_vma_flags(vma)));
 		alias->vm.insert_entries(&alias->vm, vma,
 					 cache_level, pte_flags);
 	}
@@ -2594,22 +2606,16 @@ static int init_aliasing_ppgtt(struct i915_ggtt *ggtt)
 
 static void fini_aliasing_ppgtt(struct i915_ggtt *ggtt)
 {
-	struct drm_i915_private *i915 = ggtt->vm.i915;
 	struct i915_ppgtt *ppgtt;
 
-	mutex_lock(&i915->drm.struct_mutex);
-
 	ppgtt = fetch_and_zero(&ggtt->alias);
 	if (!ppgtt)
-		goto out;
+		return;
 
 	i915_vm_put(&ppgtt->vm);
 
 	ggtt->vm.vma_ops.bind_vma   = ggtt_bind_vma;
 	ggtt->vm.vma_ops.unbind_vma = ggtt_unbind_vma;
-
-out:
-	mutex_unlock(&i915->drm.struct_mutex);
 }
 
 static int ggtt_reserve_guc_top(struct i915_ggtt *ggtt)
@@ -2726,15 +2732,14 @@ int i915_init_ggtt(struct drm_i915_private *i915)
 
 static void ggtt_cleanup_hw(struct i915_ggtt *ggtt)
 {
-	struct drm_i915_private *i915 = ggtt->vm.i915;
 	struct i915_vma *vma, *vn;
 
-	ggtt->vm.closed = true;
+	atomic_set(&ggtt->vm.open, 0);
 
 	rcu_barrier(); /* flush the RCU'ed__i915_vm_release */
-	flush_workqueue(i915->wq);
+	flush_workqueue(ggtt->vm.i915->wq);
 
-	mutex_lock(&i915->drm.struct_mutex);
+	mutex_lock(&ggtt->vm.mutex);
 
 	list_for_each_entry_safe(vma, vn, &ggtt->vm.bound_list, vm_link)
 		WARN_ON(i915_vma_unbind(vma));
@@ -2743,15 +2748,12 @@ static void ggtt_cleanup_hw(struct i915_ggtt *ggtt)
 		drm_mm_remove_node(&ggtt->error_capture);
 
 	ggtt_release_guc_top(ggtt);
-
-	if (drm_mm_initialized(&ggtt->vm.mm)) {
-		intel_vgt_deballoon(ggtt);
-		i915_address_space_fini(&ggtt->vm);
-	}
+	intel_vgt_deballoon(ggtt);
 
 	ggtt->vm.cleanup(&ggtt->vm);
 
-	mutex_unlock(&i915->drm.struct_mutex);
+	mutex_unlock(&ggtt->vm.mutex);
+	i915_address_space_fini(&ggtt->vm);
 
 	arch_phys_wc_del(ggtt->mtrr);
 	io_mapping_fini(&ggtt->iomap);
@@ -3180,9 +3182,6 @@ int i915_ggtt_probe_hw(struct drm_i915_private *i915)
 static int ggtt_init_hw(struct i915_ggtt *ggtt)
 {
 	struct drm_i915_private *i915 = ggtt->vm.i915;
-	int ret = 0;
-
-	mutex_lock(&i915->drm.struct_mutex);
 
 	i915_address_space_init(&ggtt->vm, VM_CLASS_GGTT);
 
@@ -3198,18 +3197,14 @@ static int ggtt_init_hw(struct i915_ggtt *ggtt)
 				ggtt->gmadr.start,
 				ggtt->mappable_end)) {
 		ggtt->vm.cleanup(&ggtt->vm);
-		ret = -EIO;
-		goto out;
+		return -EIO;
 	}
 
 	ggtt->mtrr = arch_phys_wc_add(ggtt->gmadr.start, ggtt->mappable_end);
 
 	i915_ggtt_init_fences(ggtt);
 
-out:
-	mutex_unlock(&i915->drm.struct_mutex);
-
-	return ret;
+	return 0;
 }
 
 /**
@@ -3281,6 +3276,7 @@ static void ggtt_restore_mappings(struct i915_ggtt *ggtt)
 {
 	struct i915_vma *vma, *vn;
 	bool flush = false;
+	int open;
 
 	intel_gt_check_and_clear_faults(ggtt->vm.gt);
 
@@ -3288,7 +3284,9 @@ static void ggtt_restore_mappings(struct i915_ggtt *ggtt)
 
 	/* First fill our portion of the GTT with scratch pages */
 	ggtt->vm.clear_range(&ggtt->vm, 0, ggtt->vm.total);
-	ggtt->vm.closed = true; /* skip rewriting PTE on VMA unbind */
+
+	/* Skip rewriting PTE on VMA unbind. */
+	open = atomic_xchg(&ggtt->vm.open, 0);
 
 	/* clflush objects bound into the GGTT and rebind them. */
 	list_for_each_entry_safe(vma, vn, &ggtt->vm.bound_list, vm_link) {
@@ -3297,24 +3295,19 @@ static void ggtt_restore_mappings(struct i915_ggtt *ggtt)
 		if (!i915_vma_is_bound(vma, I915_VMA_GLOBAL_BIND))
 			continue;
 
-		mutex_unlock(&ggtt->vm.mutex);
-
 		if (!i915_vma_unbind(vma))
-			goto lock;
+			continue;
 
 		WARN_ON(i915_vma_bind(vma,
 				      obj ? obj->cache_level : 0,
-				      PIN_UPDATE));
+				      PIN_UPDATE, NULL));
 		if (obj) { /* only used during resume => exclusive access */
 			flush |= fetch_and_zero(&obj->write_domain);
 			obj->read_domains |= I915_GEM_DOMAIN_GTT;
 		}
-
-lock:
-		mutex_lock(&ggtt->vm.mutex);
 	}
 
-	ggtt->vm.closed = false;
+	atomic_set(&ggtt->vm.open, open);
 	ggtt->invalidate(ggtt);
 
 	mutex_unlock(&ggtt->vm.mutex);
@@ -3706,7 +3699,8 @@ int i915_gem_gtt_insert(struct i915_address_space *vm,
 	u64 offset;
 	int err;
 
-	lockdep_assert_held(&vm->i915->drm.struct_mutex);
+	lockdep_assert_held(&vm->mutex);
+
 	GEM_BUG_ON(!size);
 	GEM_BUG_ON(!IS_ALIGNED(size, I915_GTT_PAGE_SIZE));
 	GEM_BUG_ON(alignment && !is_power_of_2(alignment));
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h
index 46d915d7ffb8..d0e7e5f9e19e 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.h
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.h
@@ -308,7 +308,7 @@ struct i915_address_space {
 	unsigned int bind_alloc;
 	unsigned int serial;
 
-	bool closed;
+	atomic_t open;
 
 	struct mutex mutex; /* protects vma and our lists */
 #define VM_CLASS_GGTT 0
@@ -574,6 +574,35 @@ static inline void i915_vm_put(struct i915_address_space *vm)
 	kref_put(&vm->ref, i915_vm_release);
 }
 
+static inline struct i915_address_space *
+i915_vm_open(struct i915_address_space *vm)
+{
+	GEM_BUG_ON(!atomic_read(&vm->open));
+	atomic_inc(&vm->open);
+	return i915_vm_get(vm);
+}
+
+static inline bool
+i915_vm_tryopen(struct i915_address_space *vm)
+{
+	if (atomic_add_unless(&vm->open, 1, 0))
+		return i915_vm_get(vm);
+
+	return false;
+}
+
+void __i915_vm_close(struct i915_address_space *vm);
+
+static inline void
+i915_vm_close(struct i915_address_space *vm)
+{
+	GEM_BUG_ON(!atomic_read(&vm->open));
+	if (atomic_dec_and_test(&vm->open))
+		__i915_vm_close(vm);
+
+	i915_vm_put(vm);
+}
+
 int gen6_ppgtt_pin(struct i915_ppgtt *base);
 void gen6_ppgtt_unpin(struct i915_ppgtt *base);
 void gen6_ppgtt_unpin_all(struct i915_ppgtt *base);
diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
index e42b86827d6b..e6aa56d82f9b 100644
--- a/drivers/gpu/drm/i915/i915_perf.c
+++ b/drivers/gpu/drm/i915/i915_perf.c
@@ -1204,15 +1204,10 @@ static int i915_oa_read(struct i915_perf_stream *stream,
 static struct intel_context *oa_pin_context(struct i915_perf_stream *stream)
 {
 	struct i915_gem_engines_iter it;
-	struct drm_i915_private *i915 = stream->dev_priv;
 	struct i915_gem_context *ctx = stream->ctx;
 	struct intel_context *ce;
 	int err;
 
-	err = i915_mutex_lock_interruptible(&i915->drm);
-	if (err)
-		return ERR_PTR(err);
-
 	for_each_gem_engine(ce, i915_gem_context_lock_engines(ctx), it) {
 		if (ce->engine->class != RENDER_CLASS)
 			continue;
@@ -1229,10 +1224,6 @@ static struct intel_context *oa_pin_context(struct i915_perf_stream *stream)
 	}
 	i915_gem_context_unlock_engines(ctx);
 
-	mutex_unlock(&i915->drm.struct_mutex);
-	if (err)
-		return ERR_PTR(err);
-
 	return stream->pinned_ctx;
 }
 
@@ -1347,15 +1338,9 @@ static void oa_put_render_ctx_id(struct i915_perf_stream *stream)
 static void
 free_oa_buffer(struct i915_perf_stream *stream)
 {
-	struct drm_i915_private *i915 = stream->dev_priv;
-
-	mutex_lock(&i915->drm.struct_mutex);
-
 	i915_vma_unpin_and_release(&stream->oa_buffer.vma,
 				   I915_VMA_RELEASE_MAP);
 
-	mutex_unlock(&i915->drm.struct_mutex);
-
 	stream->oa_buffer.vaddr = NULL;
 }
 
@@ -1510,18 +1495,13 @@ static int alloc_oa_buffer(struct i915_perf_stream *stream)
 	if (WARN_ON(stream->oa_buffer.vma))
 		return -ENODEV;
 
-	ret = i915_mutex_lock_interruptible(&dev_priv->drm);
-	if (ret)
-		return ret;
-
 	BUILD_BUG_ON_NOT_POWER_OF_2(OA_BUFFER_SIZE);
 	BUILD_BUG_ON(OA_BUFFER_SIZE < SZ_128K || OA_BUFFER_SIZE > SZ_16M);
 
 	bo = i915_gem_object_create_shmem(dev_priv, OA_BUFFER_SIZE);
 	if (IS_ERR(bo)) {
 		DRM_ERROR("Failed to allocate OA buffer\n");
-		ret = PTR_ERR(bo);
-		goto unlock;
+		return PTR_ERR(bo);
 	}
 
 	i915_gem_object_set_cache_coherency(bo, I915_CACHE_LLC);
@@ -1545,7 +1525,7 @@ static int alloc_oa_buffer(struct i915_perf_stream *stream)
 			 i915_ggtt_offset(stream->oa_buffer.vma),
 			 stream->oa_buffer.vaddr);
 
-	goto unlock;
+	return 0;
 
 err_unpin:
 	__i915_vma_unpin(vma);
@@ -1556,8 +1536,6 @@ static int alloc_oa_buffer(struct i915_perf_stream *stream)
 	stream->oa_buffer.vaddr = NULL;
 	stream->oa_buffer.vma = NULL;
 
-unlock:
-	mutex_unlock(&dev_priv->drm.struct_mutex);
 	return ret;
 }
 
diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
index 8672bea8ac0c..afd9d7a36b2a 100644
--- a/drivers/gpu/drm/i915/i915_vma.c
+++ b/drivers/gpu/drm/i915/i915_vma.c
@@ -32,6 +32,7 @@
 
 #include "i915_drv.h"
 #include "i915_globals.h"
+#include "i915_sw_fence_work.h"
 #include "i915_trace.h"
 #include "i915_vma.h"
 
@@ -110,7 +111,7 @@ vma_create(struct drm_i915_gem_object *obj,
 	if (vma == NULL)
 		return ERR_PTR(-ENOMEM);
 
-	vma->vm = vm;
+	vma->vm = i915_vm_get(vm);
 	vma->ops = &vm->vma_ops;
 	vma->obj = obj;
 	vma->resv = obj->base.resv;
@@ -261,8 +262,6 @@ vma_lookup(struct drm_i915_gem_object *obj,
  * Once created, the VMA is kept until either the object is freed, or the
  * address space is closed.
  *
- * Must be called with struct_mutex held.
- *
  * Returns the vma, or an error pointer.
  */
 struct i915_vma *
@@ -273,7 +272,7 @@ i915_vma_instance(struct drm_i915_gem_object *obj,
 	struct i915_vma *vma;
 
 	GEM_BUG_ON(view && !i915_is_ggtt(vm));
-	GEM_BUG_ON(vm->closed);
+	GEM_BUG_ON(!atomic_read(&vm->open));
 
 	spin_lock(&obj->vma.lock);
 	vma = vma_lookup(obj, vm, view);
@@ -287,18 +286,63 @@ i915_vma_instance(struct drm_i915_gem_object *obj,
 	return vma;
 }
 
+struct i915_vma_work {
+	struct dma_fence_work base;
+	struct i915_vma *vma;
+	enum i915_cache_level cache_level;
+	unsigned int flags;
+};
+
+static int __vma_bind(struct dma_fence_work *work)
+{
+	struct i915_vma_work *vw = container_of(work, typeof(*vw), base);
+	struct i915_vma *vma = vw->vma;
+	int err;
+
+	err = vma->ops->bind_vma(vma, vw->cache_level, vw->flags);
+	if (err)
+		atomic_or(I915_VMA_ERROR, &vma->flags);
+
+	if (vma->obj)
+		atomic_dec(&vma->obj->mm.pages_pin_count);
+
+	return err;
+}
+
+static const struct dma_fence_work_ops bind_ops = {
+	.name = "bind",
+	.work = __vma_bind,
+};
+
+struct i915_vma_work *i915_vma_work(void)
+{
+	struct i915_vma_work *vw;
+
+	vw = kzalloc(sizeof(*vw), GFP_KERNEL);
+	if (!vw)
+		return NULL;
+
+	dma_fence_work_init(&vw->base, &bind_ops);
+	vw->base.dma.error = -EAGAIN; /* disable the worker by default */
+
+	return vw;
+}
+
 /**
  * i915_vma_bind - Sets up PTEs for an VMA in it's corresponding address space.
  * @vma: VMA to map
  * @cache_level: mapping cache level
  * @flags: flags like global or local mapping
+ * @work: preallocated worker for allocating and binding the PTE
  *
  * DMA addresses are taken from the scatter-gather table of this object (or of
  * this VMA in case of non-default GGTT views) and PTE entries set up.
  * Note that DMA addresses are also the only part of the SG table we care about.
  */
-int i915_vma_bind(struct i915_vma *vma, enum i915_cache_level cache_level,
-		  u32 flags)
+int i915_vma_bind(struct i915_vma *vma,
+		  enum i915_cache_level cache_level,
+		  u32 flags,
+		  struct i915_vma_work *work)
 {
 	u32 bind_flags;
 	u32 vma_flags;
@@ -315,11 +359,8 @@ int i915_vma_bind(struct i915_vma *vma, enum i915_cache_level cache_level,
 	if (GEM_DEBUG_WARN_ON(!flags))
 		return -EINVAL;
 
-	bind_flags = 0;
-	if (flags & PIN_GLOBAL)
-		bind_flags |= I915_VMA_GLOBAL_BIND;
-	if (flags & PIN_USER)
-		bind_flags |= I915_VMA_LOCAL_BIND;
+	bind_flags = flags;
+	bind_flags &= I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND;
 
 	vma_flags = atomic_read(&vma->flags);
 	vma_flags &= I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND;
@@ -333,9 +374,34 @@ int i915_vma_bind(struct i915_vma *vma, enum i915_cache_level cache_level,
 	GEM_BUG_ON(!vma->pages);
 
 	trace_i915_vma_bind(vma, bind_flags);
-	ret = vma->ops->bind_vma(vma, cache_level, bind_flags);
-	if (ret)
-		return ret;
+	if (work && (bind_flags & ~vma_flags) & vma->vm->bind_alloc) {
+		work->vma = vma;
+		work->cache_level = cache_level;
+		work->flags = bind_flags | I915_VMA_ALLOC;
+
+		if (!i915_vma_trylock(vma))
+			return -EAGAIN;
+
+		/*
+		 * Note we only want to chain up to the migration fence on
+		 * the pages (not the object itself). As we don't track that,
+		 * yet, we have to use the exclusive fence instead.
+		 */
+		dma_fence_work_chain(&work->base, dma_resv_get_excl(vma->resv));
+		dma_resv_add_excl_fence(vma->resv, &work->base.dma);
+		i915_vma_unlock(vma);
+
+		i915_active_set_exclusive(&vma->active, &work->base.dma);
+		work->base.dma.error = 0; /* enable the queue_work() */
+
+		if (vma->obj)
+			atomic_inc(&vma->obj->mm.pages_pin_count);
+	} else {
+		GEM_BUG_ON((bind_flags & ~vma_flags) & vma->vm->bind_alloc);
+		ret = vma->ops->bind_vma(vma, cache_level, bind_flags);
+		if (ret)
+			return ret;
+	}
 
 	/* Must be incremented prior to request construction */
 	vma->vm->serial++;
@@ -351,9 +417,7 @@ void __iomem *i915_vma_pin_iomap(struct i915_vma *vma)
 
 	/* Access through the GTT requires the device to be awake. */
 	assert_rpm_wakelock_held(&vma->vm->i915->runtime_pm);
-
-	lockdep_assert_held(&vma->vm->i915->drm.struct_mutex);
-	if (WARN_ON(!i915_vma_is_map_and_fenceable(vma))) {
+	if (GEM_WARN_ON(!i915_vma_is_map_and_fenceable(vma))) {
 		err = -ENODEV;
 		goto err;
 	}
@@ -371,7 +435,8 @@ void __iomem *i915_vma_pin_iomap(struct i915_vma *vma)
 			goto err;
 		}
 
-		vma->iomap = ptr;
+		if (unlikely(cmpxchg(&vma->iomap, NULL, ptr)))
+			io_mapping_unmap(ptr);
 	}
 
 	__i915_vma_pin(vma);
@@ -391,18 +456,12 @@ void __iomem *i915_vma_pin_iomap(struct i915_vma *vma)
 
 void i915_vma_flush_writes(struct i915_vma *vma)
 {
-	if (!i915_vma_has_ggtt_write(vma))
-		return;
-
-	intel_gt_flush_ggtt_writes(vma->vm->gt);
-
-	i915_vma_unset_ggtt_write(vma);
+	if (i915_vma_unset_ggtt_write(vma))
+		intel_gt_flush_ggtt_writes(vma->vm->gt);
 }
 
 void i915_vma_unpin_iomap(struct i915_vma *vma)
 {
-	lockdep_assert_held(&vma->vm->i915->drm.struct_mutex);
-
 	GEM_BUG_ON(vma->iomap == NULL);
 
 	i915_vma_flush_writes(vma);
@@ -541,7 +600,6 @@ static void assert_bind_count(const struct drm_i915_gem_object *obj)
 static int
 i915_vma_insert(struct i915_vma *vma, u64 size, u64 alignment, u64 flags)
 {
-	struct drm_i915_private *dev_priv = vma->vm->i915;
 	unsigned int cache_level;
 	u64 start, end;
 	int ret;
@@ -567,7 +625,7 @@ i915_vma_insert(struct i915_vma *vma, u64 size, u64 alignment, u64 flags)
 
 	end = vma->vm->total;
 	if (flags & PIN_MAPPABLE)
-		end = min_t(u64, end, dev_priv->ggtt.mappable_end);
+		end = min_t(u64, end, i915_vm_to_ggtt(vma->vm)->mappable_end);
 	if (flags & PIN_ZONE_4G)
 		end = min_t(u64, end, (1ULL << 32) - I915_GTT_PAGE_SIZE);
 	GEM_BUG_ON(!IS_ALIGNED(end, I915_GTT_PAGE_SIZE));
@@ -583,35 +641,21 @@ i915_vma_insert(struct i915_vma *vma, u64 size, u64 alignment, u64 flags)
 		return -ENOSPC;
 	}
 
-	if (vma->obj) {
-		ret = i915_gem_object_pin_pages(vma->obj);
-		if (ret)
-			return ret;
-
+	cache_level = 0;
+	if (vma->obj)
 		cache_level = vma->obj->cache_level;
-	} else {
-		cache_level = 0;
-	}
-
-	GEM_BUG_ON(vma->pages);
-
-	ret = vma->ops->set_pages(vma);
-	if (ret)
-		goto err_unpin;
 
 	if (flags & PIN_OFFSET_FIXED) {
 		u64 offset = flags & PIN_OFFSET_MASK;
 		if (!IS_ALIGNED(offset, alignment) ||
-		    range_overflows(offset, size, end)) {
-			ret = -EINVAL;
-			goto err_clear;
-		}
+		    range_overflows(offset, size, end))
+			return -EINVAL;
 
 		ret = i915_gem_gtt_reserve(vma->vm, &vma->node,
 					   size, offset, cache_level,
 					   flags);
 		if (ret)
-			goto err_clear;
+			return ret;
 	} else {
 		/*
 		 * We only support huge gtt pages through the 48b PPGTT,
@@ -650,7 +694,7 @@ i915_vma_insert(struct i915_vma *vma, u64 size, u64 alignment, u64 flags)
 					  size, alignment, cache_level,
 					  start, end, flags);
 		if (ret)
-			goto err_clear;
+			return ret;
 
 		GEM_BUG_ON(vma->node.start < start);
 		GEM_BUG_ON(vma->node.start + vma->node.size > end);
@@ -658,23 +702,15 @@ i915_vma_insert(struct i915_vma *vma, u64 size, u64 alignment, u64 flags)
 	GEM_BUG_ON(!drm_mm_node_allocated(&vma->node));
 	GEM_BUG_ON(!i915_gem_valid_gtt_space(vma, cache_level));
 
-	mutex_lock(&vma->vm->mutex);
 	list_add_tail(&vma->vm_link, &vma->vm->bound_list);
-	mutex_unlock(&vma->vm->mutex);
 
 	if (vma->obj) {
+		atomic_inc(&vma->obj->mm.pages_pin_count);
 		atomic_inc(&vma->obj->bind_count);
 		assert_bind_count(vma->obj);
 	}
 
 	return 0;
-
-err_clear:
-	vma->ops->clear_pages(vma);
-err_unpin:
-	if (vma->obj)
-		i915_gem_object_unpin_pages(vma->obj);
-	return ret;
 }
 
 static void
@@ -683,12 +719,7 @@ i915_vma_remove(struct i915_vma *vma)
 	GEM_BUG_ON(!drm_mm_node_allocated(&vma->node));
 	GEM_BUG_ON(i915_vma_is_bound(vma, I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND));
 
-	vma->ops->clear_pages(vma);
-
-	mutex_lock(&vma->vm->mutex);
-	drm_mm_remove_node(&vma->node);
 	list_del(&vma->vm_link);
-	mutex_unlock(&vma->vm->mutex);
 
 	/*
 	 * Since the unbound list is global, only move to that list if
@@ -707,51 +738,115 @@ i915_vma_remove(struct i915_vma *vma)
 		i915_gem_object_unpin_pages(obj);
 		assert_bind_count(obj);
 	}
+
+	drm_mm_remove_node(&vma->node);
 }
 
-int __i915_vma_do_pin(struct i915_vma *vma,
-		      u64 size, u64 alignment, u64 flags)
+int i915_vma_pin(struct i915_vma *vma, u64 size, u64 alignment, u64 flags)
 {
-	const unsigned int bound = atomic_read(&vma->flags);
-	int ret;
+	struct i915_vma_work *work = NULL;
+	bool insert = false;
+	unsigned int bound;
+	int err;
+
+	BUILD_BUG_ON(PIN_MBZ != I915_VMA_PIN_OVERFLOW);
+	BUILD_BUG_ON(PIN_GLOBAL != I915_VMA_GLOBAL_BIND);
+	BUILD_BUG_ON(PIN_USER != I915_VMA_LOCAL_BIND);
+
+	GEM_BUG_ON(flags & I915_VMA_PIN_OVERFLOW);
+	GEM_BUG_ON(flags & I915_VMA_ERROR);
+
+	bound = atomic_inc_return(&vma->flags);
+	if (likely(((bound ^ flags) & I915_VMA_BIND_MASK) == 0)) {
+		GEM_BUG_ON(!drm_mm_node_allocated(&vma->node));
+		GEM_BUG_ON(i915_vma_misplaced(vma, size, alignment, flags));
+		return 0;
+	}
 
-	lockdep_assert_held(&vma->vm->i915->drm.struct_mutex);
-	GEM_BUG_ON((flags & (PIN_GLOBAL | PIN_USER)) == 0);
-	GEM_BUG_ON((flags & PIN_GLOBAL) && !i915_vma_is_ggtt(vma));
+	if (bound & I915_VMA_ERROR) {
+		err = -ENOMEM;
+		goto err_unpin;
+	}
 
 	if (WARN_ON(bound & I915_VMA_PIN_OVERFLOW)) {
-		ret = -EBUSY;
+		err = -ENOMEM;
 		goto err_unpin;
 	}
 
-	if ((bound & I915_VMA_BIND_MASK) == 0) {
-		ret = i915_vma_insert(vma, size, alignment, flags);
-		if (ret)
-			goto err_unpin;
+	if (vma->obj) {
+		err = i915_gem_object_pin_pages(vma->obj);
+		if (err)
+			return err;
 	}
-	GEM_BUG_ON(!drm_mm_node_allocated(&vma->node));
 
-	ret = i915_vma_bind(vma, vma->obj ? vma->obj->cache_level : 0, flags);
-	if (ret)
-		goto err_remove;
+	if (!vma->pages) {
+		GEM_BUG_ON(vma->pages);
+		err = vma->ops->set_pages(vma);
+		if (err)
+			goto err_pages;
 
-	GEM_BUG_ON(!i915_vma_is_bound(vma, I915_VMA_BIND_MASK));
+		GEM_BUG_ON(!vma->pages);
+		insert = true;
+	}
 
-	if ((bound ^ atomic_read(&vma->flags)) & I915_VMA_GLOBAL_BIND)
-		__i915_vma_set_map_and_fenceable(vma);
+	if (flags & PIN_USER) {
+		work = i915_vma_work();
+		if (!work) {
+			err = -ENOMEM;
+			goto err_clear;
+		}
+	}
 
-	GEM_BUG_ON(i915_vma_misplaced(vma, size, alignment, flags));
-	return 0;
+	err = i915_active_acquire(&vma->active);
+	if (err)
+		goto err_fence;
+
+	err = mutex_lock_interruptible(&vma->vm->mutex);
+	if (err)
+		goto err_active;
+
+	if (insert) {
+		err = i915_vma_insert(vma, size, alignment, flags);
+		if (err)
+			goto err_unlock;
+	}
 
-err_remove:
-	if ((bound & I915_VMA_BIND_MASK) == 0) {
-		i915_vma_remove(vma);
+	err = i915_vma_bind(vma,
+			    vma->obj ? vma->obj->cache_level : 0,
+			    flags, work);
+	if (err == 0) {
+		GEM_BUG_ON(!i915_vma_is_bound(vma, I915_VMA_BIND_MASK));
+
+		if ((bound ^ atomic_read(&vma->flags)) & I915_VMA_GLOBAL_BIND)
+			__i915_vma_set_map_and_fenceable(vma);
+
+		GEM_BUG_ON(i915_vma_misplaced(vma, size, alignment, flags));
+
+		__i915_vma_pin(vma);
+	} else {
+		if (insert)
+			i915_vma_remove(vma);
+	}
+
+err_unlock:
+	mutex_unlock(&vma->vm->mutex);
+err_active:
+	i915_active_release(&vma->active);
+err_fence:
+	if (work)
+		dma_fence_work_commit(&work->base);
+err_clear:
+	if (err && insert) {
+		vma->ops->clear_pages(vma);
 		GEM_BUG_ON(vma->pages);
 		GEM_BUG_ON(atomic_read(&vma->flags) & I915_VMA_BIND_MASK);
 	}
+err_pages:
+	if (vma->obj)
+		i915_gem_object_unpin_pages(vma->obj);
 err_unpin:
 	__i915_vma_unpin(vma);
-	return ret;
+	return err;
 }
 
 void i915_vma_close(struct i915_vma *vma)
@@ -795,10 +890,18 @@ void i915_vma_reopen(struct i915_vma *vma)
 	__i915_vma_remove_closed(vma);
 }
 
-static void __i915_vma_destroy(struct i915_vma *vma)
+void i915_vma_destroy(struct i915_vma *vma)
 {
-	GEM_BUG_ON(drm_mm_node_allocated(&vma->node));
-	GEM_BUG_ON(vma->fence);
+	__i915_vma_remove_closed(vma);
+
+	if (drm_mm_node_allocated(&vma->node)) {
+		mutex_lock(&vma->vm->mutex);
+		atomic_and(~I915_VMA_PIN_MASK, &vma->flags);
+		WARN_ON(i915_vma_unbind(vma));
+		mutex_unlock(&vma->vm->mutex);
+		GEM_BUG_ON(drm_mm_node_allocated(&vma->node));
+	}
+	GEM_BUG_ON(i915_vma_is_active(vma));
 
 	if (vma->obj) {
 		struct drm_i915_gem_object *obj = vma->obj;
@@ -809,36 +912,36 @@ static void __i915_vma_destroy(struct i915_vma *vma)
 		spin_unlock(&obj->vma.lock);
 	}
 
-	i915_active_fini(&vma->active);
+	i915_vm_put(vma->vm);
 
+	i915_active_fini(&vma->active);
 	i915_vma_free(vma);
 }
 
-void i915_vma_destroy(struct i915_vma *vma)
-{
-	lockdep_assert_held(&vma->vm->i915->drm.struct_mutex);
-
-	GEM_BUG_ON(i915_vma_is_pinned(vma));
-
-	__i915_vma_remove_closed(vma);
-
-	WARN_ON(i915_vma_unbind(vma));
-	GEM_BUG_ON(i915_vma_is_active(vma));
-
-	__i915_vma_destroy(vma);
-}
-
 void i915_vma_parked(struct drm_i915_private *i915)
 {
 	struct i915_vma *vma, *next;
 
 	spin_lock_irq(&i915->gt.closed_lock);
 	list_for_each_entry_safe(vma, next, &i915->gt.closed_vma, closed_link) {
+		struct drm_i915_gem_object *obj = vma->obj;
+		struct i915_address_space *vm = vma->vm;
+
 		list_del_init(&vma->closed_link);
+		if (!i915_vm_tryopen(vm))
+			continue;
+
+		if (!kref_get_unless_zero(&obj->base.refcount))
+			obj = NULL;
+
 		spin_unlock_irq(&i915->gt.closed_lock);
 
-		i915_vma_destroy(vma);
+		if (obj) {
+			i915_vma_destroy(vma);
+			i915_gem_object_put(obj);
+		}
 
+		i915_vm_close(vm);
 		spin_lock_irq(&i915->gt.closed_lock);
 	}
 	spin_unlock_irq(&i915->gt.closed_lock);
@@ -930,34 +1033,15 @@ int i915_vma_unbind(struct i915_vma *vma)
 {
 	int ret;
 
-	lockdep_assert_held(&vma->vm->i915->drm.struct_mutex);
+	lockdep_assert_held(&vma->vm->mutex);
 
 	/*
 	 * First wait upon any activity as retiring the request may
 	 * have side-effects such as unpinning or even unbinding this vma.
 	 */
-	might_sleep();
-	if (i915_vma_is_active(vma)) {
-		/*
-		 * When a closed VMA is retired, it is unbound - eek.
-		 * In order to prevent it from being recursively closed,
-		 * take a pin on the vma so that the second unbind is
-		 * aborted.
-		 *
-		 * Even more scary is that the retire callback may free
-		 * the object (last active vma). To prevent the explosion
-		 * we defer the actual object free to a worker that can
-		 * only proceed once it acquires the struct_mutex (which
-		 * we currently hold, therefore it cannot free this object
-		 * before we are finished).
-		 */
-		__i915_vma_pin(vma);
-		ret = i915_active_wait(&vma->active);
-		__i915_vma_unpin(vma);
-		if (ret)
-			return ret;
-	}
-	GEM_BUG_ON(i915_vma_is_active(vma));
+	ret = i915_active_wait(&vma->active);
+	if (ret)
+		return ret;
 
 	if (i915_vma_is_pinned(vma)) {
 		vma_print_allocator(vma, "is pinned");
@@ -978,16 +1062,12 @@ int i915_vma_unbind(struct i915_vma *vma)
 		GEM_BUG_ON(i915_vma_has_ggtt_write(vma));
 
 		/* release the fence reg _after_ flushing */
-		mutex_lock(&vma->vm->mutex);
 		ret = i915_vma_revoke_fence(vma);
-		mutex_unlock(&vma->vm->mutex);
 		if (ret)
 			return ret;
 
 		/* Force a pagefault for domain tracking on next user access */
-		mutex_lock(&vma->vm->mutex);
 		i915_vma_revoke_mmap(vma);
-		mutex_unlock(&vma->vm->mutex);
 
 		__i915_vma_iounmap(vma);
 		clear_bit(I915_VMA_CAN_FENCE_BIT, __i915_vma_flags(vma));
@@ -995,12 +1075,13 @@ int i915_vma_unbind(struct i915_vma *vma)
 	GEM_BUG_ON(vma->fence);
 	GEM_BUG_ON(i915_vma_has_userfault(vma));
 
-	if (likely(!vma->vm->closed)) {
+	if (likely(atomic_read(&vma->vm->open))) {
 		trace_i915_vma_unbind(vma);
 		vma->ops->unbind_vma(vma);
 	}
 	atomic_and(~I915_VMA_BIND_MASK, &vma->flags);
 
+	vma->ops->clear_pages(vma);
 	i915_vma_remove(vma);
 
 	return 0;
diff --git a/drivers/gpu/drm/i915/i915_vma.h b/drivers/gpu/drm/i915/i915_vma.h
index 02d7d815407c..3e350b9c39bc 100644
--- a/drivers/gpu/drm/i915/i915_vma.h
+++ b/drivers/gpu/drm/i915/i915_vma.h
@@ -73,6 +73,7 @@ struct i915_vma {
 	 */
 	atomic_t open_count;
 	atomic_t flags;
+
 	/**
 	 * How many users have pinned this object in GTT space.
 	 *
@@ -103,18 +104,24 @@ struct i915_vma {
 	/** Flags and address space this VMA is bound to */
 #define I915_VMA_GLOBAL_BIND_BIT 9
 #define I915_VMA_LOCAL_BIND_BIT 10
+#define I915_VMA_ERROR_BIT	11
 
 #define I915_VMA_GLOBAL_BIND	((int)BIT(I915_VMA_GLOBAL_BIND_BIT))
 #define I915_VMA_LOCAL_BIND	((int)BIT(I915_VMA_LOCAL_BIND_BIT))
+#define I915_VMA_ERROR		((int)BIT(I915_VMA_ERROR_BIT))
 
 #define I915_VMA_BIND_MASK (I915_VMA_GLOBAL_BIND | \
 			    I915_VMA_LOCAL_BIND | \
+			    I915_VMA_ERROR | \
 			    I915_VMA_PIN_OVERFLOW)
 
-#define I915_VMA_GGTT_BIT	11
-#define I915_VMA_CAN_FENCE_BIT	12
-#define I915_VMA_USERFAULT_BIT	13
-#define I915_VMA_GGTT_WRITE_BIT	14
+#define I915_VMA_ALLOC_BIT	12
+#define I915_VMA_ALLOC		((int)BIT(I915_VMA_ALLOC_BIT))
+
+#define I915_VMA_GGTT_BIT	13
+#define I915_VMA_CAN_FENCE_BIT	14
+#define I915_VMA_USERFAULT_BIT	15
+#define I915_VMA_GGTT_WRITE_BIT	16
 
 #define I915_VMA_GGTT		((int)BIT(I915_VMA_GGTT_BIT))
 #define I915_VMA_CAN_FENCE	((int)BIT(I915_VMA_CAN_FENCE_BIT))
@@ -308,8 +315,12 @@ i915_vma_compare(struct i915_vma *vma,
 	return memcmp(&vma->ggtt_view.partial, &view->partial, view->type);
 }
 
-int i915_vma_bind(struct i915_vma *vma, enum i915_cache_level cache_level,
-		  u32 flags);
+struct i915_vma_work *i915_vma_work(void);
+int i915_vma_bind(struct i915_vma *vma,
+		  enum i915_cache_level cache_level,
+		  u32 flags,
+		  struct i915_vma_work *work);
+
 bool i915_gem_valid_gtt_space(struct i915_vma *vma, unsigned long cache_level);
 bool i915_vma_misplaced(const struct i915_vma *vma,
 			u64 size, u64 alignment, u64 flags);
@@ -328,32 +339,19 @@ static inline void i915_vma_lock(struct i915_vma *vma)
 	dma_resv_lock(vma->resv, NULL);
 }
 
-static inline void i915_vma_unlock(struct i915_vma *vma)
+static inline bool i915_vma_trylock(struct i915_vma *vma)
 {
-	dma_resv_unlock(vma->resv);
+	return dma_resv_trylock(vma->resv);
 }
 
-int __i915_vma_do_pin(struct i915_vma *vma,
-		      u64 size, u64 alignment, u64 flags);
-static inline int __must_check
-i915_vma_pin(struct i915_vma *vma, u64 size, u64 alignment, u64 flags)
+static inline void i915_vma_unlock(struct i915_vma *vma)
 {
-	BUILD_BUG_ON(PIN_MBZ != I915_VMA_PIN_OVERFLOW);
-	BUILD_BUG_ON(PIN_GLOBAL != I915_VMA_GLOBAL_BIND);
-	BUILD_BUG_ON(PIN_USER != I915_VMA_LOCAL_BIND);
-
-	/* Pin early to prevent the shrinker/eviction logic from destroying
-	 * our vma as we insert and bind.
-	 */
-	if (likely(((atomic_inc_return(&vma->flags) ^ flags) & I915_VMA_BIND_MASK) == 0)) {
-		GEM_BUG_ON(!drm_mm_node_allocated(&vma->node));
-		GEM_BUG_ON(i915_vma_misplaced(vma, size, alignment, flags));
-		return 0;
-	}
-
-	return __i915_vma_do_pin(vma, size, alignment, flags);
+	dma_resv_unlock(vma->resv);
 }
 
+int __must_check
+i915_vma_pin(struct i915_vma *vma, u64 size, u64 alignment, u64 flags);
+
 static inline int i915_vma_pin_count(const struct i915_vma *vma)
 {
 	return atomic_read(&vma->flags) & I915_VMA_PIN_MASK;
@@ -397,8 +395,6 @@ static inline bool i915_vma_is_bound(const struct i915_vma *vma,
  * the caller must call i915_vma_unpin_iomap to relinquish the pinning
  * after the iomapping is no longer required.
  *
- * Callers must hold the struct_mutex.
- *
  * Returns a valid iomapped pointer or ERR_PTR.
  */
 void __iomem *i915_vma_pin_iomap(struct i915_vma *vma);
@@ -410,8 +406,8 @@ void __iomem *i915_vma_pin_iomap(struct i915_vma *vma);
  *
  * Unpins the previously iomapped VMA from i915_vma_pin_iomap().
  *
- * Callers must hold the struct_mutex. This function is only valid to be
- * called on a VMA previously iomapped by the caller with i915_vma_pin_iomap().
+ * This function is only valid to be called on a VMA previously
+ * iomapped by the caller with i915_vma_pin_iomap().
  */
 void i915_vma_unpin_iomap(struct i915_vma *vma);
 
@@ -439,6 +435,8 @@ static inline struct page *i915_vma_first_page(struct i915_vma *vma)
 int __must_check i915_vma_pin_fence(struct i915_vma *vma);
 int __must_check i915_vma_revoke_fence(struct i915_vma *vma);
 
+int __i915_vma_pin_fence(struct i915_vma *vma);
+
 static inline void __i915_vma_unpin_fence(struct i915_vma *vma)
 {
 	GEM_BUG_ON(atomic_read(&vma->fence->pin_count) <= 0);
@@ -456,7 +454,6 @@ static inline void __i915_vma_unpin_fence(struct i915_vma *vma)
 static inline void
 i915_vma_unpin_fence(struct i915_vma *vma)
 {
-	/* lockdep_assert_held(&vma->vm->i915->drm.struct_mutex); */
 	if (vma->fence)
 		__i915_vma_unpin_fence(vma);
 }
diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_evict.c b/drivers/gpu/drm/i915/selftests/i915_gem_evict.c
index cb30c669b1b7..ba6064147173 100644
--- a/drivers/gpu/drm/i915/selftests/i915_gem_evict.c
+++ b/drivers/gpu/drm/i915/selftests/i915_gem_evict.c
@@ -106,14 +106,11 @@ static int populate_ggtt(struct drm_i915_private *i915,
 
 static void unpin_ggtt(struct drm_i915_private *i915)
 {
-	struct i915_ggtt *ggtt = &i915->ggtt;
 	struct i915_vma *vma;
 
-	mutex_lock(&ggtt->vm.mutex);
 	list_for_each_entry(vma, &i915->ggtt.vm.bound_list, vm_link)
 		if (vma->obj->mm.quirked)
 			i915_vma_unpin(vma);
-	mutex_unlock(&ggtt->vm.mutex);
 }
 
 static void cleanup_objects(struct drm_i915_private *i915,
@@ -127,11 +124,7 @@ static void cleanup_objects(struct drm_i915_private *i915,
 		i915_gem_object_put(obj);
 	}
 
-	mutex_unlock(&i915->drm.struct_mutex);
-
 	i915_gem_drain_freed_objects(i915);
-
-	mutex_lock(&i915->drm.struct_mutex);
 }
 
 static int igt_evict_something(void *arg)
@@ -148,10 +141,12 @@ static int igt_evict_something(void *arg)
 		goto cleanup;
 
 	/* Everything is pinned, nothing should happen */
+	mutex_lock(&ggtt->vm.mutex);
 	err = i915_gem_evict_something(&ggtt->vm,
 				       I915_GTT_PAGE_SIZE, 0, 0,
 				       0, U64_MAX,
 				       0);
+	mutex_unlock(&ggtt->vm.mutex);
 	if (err != -ENOSPC) {
 		pr_err("i915_gem_evict_something failed on a full GGTT with err=%d\n",
 		       err);
@@ -161,10 +156,12 @@ static int igt_evict_something(void *arg)
 	unpin_ggtt(i915);
 
 	/* Everything is unpinned, we should be able to evict something */
+	mutex_lock(&ggtt->vm.mutex);
 	err = i915_gem_evict_something(&ggtt->vm,
 				       I915_GTT_PAGE_SIZE, 0, 0,
 				       0, U64_MAX,
 				       0);
+	mutex_unlock(&ggtt->vm.mutex);
 	if (err) {
 		pr_err("i915_gem_evict_something failed on a full GGTT with err=%d\n",
 		       err);
@@ -230,7 +227,9 @@ static int igt_evict_for_vma(void *arg)
 		goto cleanup;
 
 	/* Everything is pinned, nothing should happen */
+	mutex_lock(&ggtt->vm.mutex);
 	err = i915_gem_evict_for_node(&ggtt->vm, &target, 0);
+	mutex_unlock(&ggtt->vm.mutex);
 	if (err != -ENOSPC) {
 		pr_err("i915_gem_evict_for_node on a full GGTT returned err=%d\n",
 		       err);
@@ -240,7 +239,9 @@ static int igt_evict_for_vma(void *arg)
 	unpin_ggtt(i915);
 
 	/* Everything is unpinned, we should be able to evict the node */
+	mutex_lock(&ggtt->vm.mutex);
 	err = i915_gem_evict_for_node(&ggtt->vm, &target, 0);
+	mutex_unlock(&ggtt->vm.mutex);
 	if (err) {
 		pr_err("i915_gem_evict_for_node returned err=%d\n",
 		       err);
@@ -317,7 +318,9 @@ static int igt_evict_for_cache_color(void *arg)
 	i915_vma_unpin(vma);
 
 	/* Remove just the second vma */
+	mutex_lock(&ggtt->vm.mutex);
 	err = i915_gem_evict_for_node(&ggtt->vm, &target, 0);
+	mutex_unlock(&ggtt->vm.mutex);
 	if (err) {
 		pr_err("[0]i915_gem_evict_for_node returned err=%d\n", err);
 		goto cleanup;
@@ -328,7 +331,9 @@ static int igt_evict_for_cache_color(void *arg)
 	 */
 	target.color = I915_CACHE_L3_LLC;
 
+	mutex_lock(&ggtt->vm.mutex);
 	err = i915_gem_evict_for_node(&ggtt->vm, &target, 0);
+	mutex_unlock(&ggtt->vm.mutex);
 	if (!err) {
 		pr_err("[1]i915_gem_evict_for_node returned err=%d\n", err);
 		err = -EINVAL;
@@ -358,7 +363,9 @@ static int igt_evict_vm(void *arg)
 		goto cleanup;
 
 	/* Everything is pinned, nothing should happen */
+	mutex_lock(&ggtt->vm.mutex);
 	err = i915_gem_evict_vm(&ggtt->vm);
+	mutex_unlock(&ggtt->vm.mutex);
 	if (err) {
 		pr_err("i915_gem_evict_vm on a full GGTT returned err=%d]\n",
 		       err);
@@ -367,7 +374,9 @@ static int igt_evict_vm(void *arg)
 
 	unpin_ggtt(i915);
 
+	mutex_lock(&ggtt->vm.mutex);
 	err = i915_gem_evict_vm(&ggtt->vm);
+	mutex_unlock(&ggtt->vm.mutex);
 	if (err) {
 		pr_err("i915_gem_evict_vm on a full GGTT returned err=%d]\n",
 		       err);
@@ -408,11 +417,11 @@ static int igt_evict_contexts(void *arg)
 	if (!HAS_FULL_PPGTT(i915))
 		return 0;
 
-	mutex_lock(&i915->drm.struct_mutex);
 	wakeref = intel_runtime_pm_get(&i915->runtime_pm);
 
 	/* Reserve a block so that we know we have enough to fit a few rq */
 	memset(&hole, 0, sizeof(hole));
+	mutex_lock(&i915->ggtt.vm.mutex);
 	err = i915_gem_gtt_insert(&i915->ggtt.vm, &hole,
 				  PRETEND_GGTT_SIZE, 0, I915_COLOR_UNEVICTABLE,
 				  0, i915->ggtt.vm.total,
@@ -425,7 +434,9 @@ static int igt_evict_contexts(void *arg)
 	do {
 		struct reserved *r;
 
+		mutex_unlock(&i915->ggtt.vm.mutex);
 		r = kcalloc(1, sizeof(*r), GFP_KERNEL);
+		mutex_lock(&i915->ggtt.vm.mutex);
 		if (!r) {
 			err = -ENOMEM;
 			goto out_locked;
@@ -445,7 +456,7 @@ static int igt_evict_contexts(void *arg)
 		count++;
 	} while (1);
 	drm_mm_remove_node(&hole);
-	mutex_unlock(&i915->drm.struct_mutex);
+	mutex_unlock(&i915->ggtt.vm.mutex);
 	pr_info("Filled GGTT with %lu 1MiB nodes\n", count);
 
 	/* Overfill the GGTT with context objects and so try to evict one. */
@@ -508,7 +519,7 @@ static int igt_evict_contexts(void *arg)
 			break;
 	}
 
-	mutex_lock(&i915->drm.struct_mutex);
+	mutex_lock(&i915->ggtt.vm.mutex);
 out_locked:
 	if (igt_flush_test(i915, I915_WAIT_LOCKED))
 		err = -EIO;
@@ -522,8 +533,8 @@ static int igt_evict_contexts(void *arg)
 	}
 	if (drm_mm_node_allocated(&hole))
 		drm_mm_remove_node(&hole);
+	mutex_unlock(&i915->ggtt.vm.mutex);
 	intel_runtime_pm_put(&i915->runtime_pm, wakeref);
-	mutex_unlock(&i915->drm.struct_mutex);
 
 	return err;
 }
@@ -545,12 +556,9 @@ int i915_gem_evict_mock_selftests(void)
 	if (!i915)
 		return -ENOMEM;
 
-	mutex_lock(&i915->drm.struct_mutex);
 	with_intel_runtime_pm(&i915->runtime_pm, wakeref)
 		err = i915_subtests(tests, i915);
 
-	mutex_unlock(&i915->drm.struct_mutex);
-
 	drm_dev_put(&i915->drm);
 	return err;
 }
diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c
index a90c9be95f8c..3373f20fb144 100644
--- a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c
@@ -35,16 +35,7 @@
 
 static void cleanup_freed_objects(struct drm_i915_private *i915)
 {
-	/*
-	 * As we may hold onto the struct_mutex for inordinate lengths of
-	 * time, the NMI khungtaskd detector may fire for the free objects
-	 * worker.
-	 */
-	mutex_unlock(&i915->drm.struct_mutex);
-
 	i915_gem_drain_freed_objects(i915);
-
-	mutex_lock(&i915->drm.struct_mutex);
 }
 
 static void fake_free_pages(struct drm_i915_gem_object *obj,
@@ -318,6 +309,17 @@ static int lowlevel_hole(struct drm_i915_private *i915,
 	return 0;
 }
 
+static int unlocked_vma_unbind(struct i915_vma *vma)
+{
+	int ret;
+
+	mutex_lock(&vma->vm->mutex);
+	ret = i915_vma_unbind(vma);
+	mutex_unlock(&vma->vm->mutex);
+
+	return ret;
+}
+
 static void close_object_list(struct list_head *objects,
 			      struct i915_address_space *vm)
 {
@@ -329,7 +331,7 @@ static void close_object_list(struct list_head *objects,
 
 		vma = i915_vma_instance(obj, vm, NULL);
 		if (!IS_ERR(vma))
-			ignored = i915_vma_unbind(vma);
+			ignored = unlocked_vma_unbind(vma);
 		/* Only ppgtt vma may be closed before the object is freed */
 		if (!IS_ERR(vma) && !i915_vma_is_ggtt(vma))
 			i915_vma_close(vma);
@@ -444,7 +446,7 @@ static int fill_hole(struct drm_i915_private *i915,
 						goto err;
 					}
 
-					err = i915_vma_unbind(vma);
+					err = unlocked_vma_unbind(vma);
 					if (err) {
 						pr_err("%s(%s) (forward) unbind of vma.node=%llx + %llx failed with err=%d\n",
 						       __func__, p->name, vma->node.start, vma->node.size,
@@ -517,7 +519,7 @@ static int fill_hole(struct drm_i915_private *i915,
 						goto err;
 					}
 
-					err = i915_vma_unbind(vma);
+					err = unlocked_vma_unbind(vma);
 					if (err) {
 						pr_err("%s(%s) (backward) unbind of vma.node=%llx + %llx failed with err=%d\n",
 						       __func__, p->name, vma->node.start, vma->node.size,
@@ -604,7 +606,7 @@ static int walk_hole(struct drm_i915_private *i915,
 				goto err_close;
 			}
 
-			err = i915_vma_unbind(vma);
+			err = unlocked_vma_unbind(vma);
 			if (err) {
 				pr_err("%s unbind failed at %llx + %llx  with err=%d\n",
 				       __func__, addr, vma->size, err);
@@ -685,13 +687,13 @@ static int pot_hole(struct drm_i915_private *i915,
 				pr_err("%s incorrect at %llx + %llx\n",
 				       __func__, addr, vma->size);
 				i915_vma_unpin(vma);
-				err = i915_vma_unbind(vma);
+				err = unlocked_vma_unbind(vma);
 				err = -EINVAL;
 				goto err;
 			}
 
 			i915_vma_unpin(vma);
-			err = i915_vma_unbind(vma);
+			err = unlocked_vma_unbind(vma);
 			GEM_BUG_ON(err);
 		}
 
@@ -789,13 +791,13 @@ static int drunk_hole(struct drm_i915_private *i915,
 				pr_err("%s incorrect at %llx + %llx\n",
 				       __func__, addr, BIT_ULL(size));
 				i915_vma_unpin(vma);
-				err = i915_vma_unbind(vma);
+				err = unlocked_vma_unbind(vma);
 				err = -EINVAL;
 				goto err;
 			}
 
 			i915_vma_unpin(vma);
-			err = i915_vma_unbind(vma);
+			err = unlocked_vma_unbind(vma);
 			GEM_BUG_ON(err);
 
 			if (igt_timeout(end_time,
@@ -867,7 +869,7 @@ static int __shrink_hole(struct drm_i915_private *i915,
 			pr_err("%s incorrect at %llx + %llx\n",
 			       __func__, addr, size);
 			i915_vma_unpin(vma);
-			err = i915_vma_unbind(vma);
+			err = unlocked_vma_unbind(vma);
 			err = -EINVAL;
 			break;
 		}
@@ -875,6 +877,15 @@ static int __shrink_hole(struct drm_i915_private *i915,
 		i915_vma_unpin(vma);
 		addr += size;
 
+		/*
+		 * Since we are injecting allocation faults at random intervals,
+		 * wait for this allocation to complete before we change the
+		 * faultinjection.
+		 */
+		err = i915_active_wait(&vma->active);
+		if (err)
+			break;
+
 		if (igt_timeout(end_time,
 				"%s timed out at ofset %llx [%llx - %llx]\n",
 				__func__, addr, hole_start, hole_end)) {
@@ -1008,21 +1019,19 @@ static int exercise_ppgtt(struct drm_i915_private *dev_priv,
 	if (IS_ERR(file))
 		return PTR_ERR(file);
 
-	mutex_lock(&dev_priv->drm.struct_mutex);
 	ppgtt = i915_ppgtt_create(dev_priv);
 	if (IS_ERR(ppgtt)) {
 		err = PTR_ERR(ppgtt);
-		goto out_unlock;
+		goto out_free;
 	}
 	GEM_BUG_ON(offset_in_page(ppgtt->vm.total));
-	GEM_BUG_ON(ppgtt->vm.closed);
+	GEM_BUG_ON(!atomic_read(&ppgtt->vm.open));
 
 	err = func(dev_priv, &ppgtt->vm, 0, ppgtt->vm.total, end_time);
 
 	i915_vm_put(&ppgtt->vm);
-out_unlock:
-	mutex_unlock(&dev_priv->drm.struct_mutex);
 
+out_free:
 	mock_file_free(dev_priv, file);
 	return err;
 }
@@ -1085,7 +1094,6 @@ static int exercise_ggtt(struct drm_i915_private *i915,
 	IGT_TIMEOUT(end_time);
 	int err = 0;
 
-	mutex_lock(&i915->drm.struct_mutex);
 restart:
 	list_sort(NULL, &ggtt->vm.mm.hole_stack, sort_holes);
 	drm_mm_for_each_hole(node, &ggtt->vm.mm, hole_start, hole_end) {
@@ -1106,7 +1114,6 @@ static int exercise_ggtt(struct drm_i915_private *i915,
 		last = hole_end;
 		goto restart;
 	}
-	mutex_unlock(&i915->drm.struct_mutex);
 
 	return err;
 }
@@ -1148,13 +1155,9 @@ static int igt_ggtt_page(void *arg)
 	unsigned int *order, n;
 	int err;
 
-	mutex_lock(&i915->drm.struct_mutex);
-
 	obj = i915_gem_object_create_internal(i915, PAGE_SIZE);
-	if (IS_ERR(obj)) {
-		err = PTR_ERR(obj);
-		goto out_unlock;
-	}
+	if (IS_ERR(obj))
+		return PTR_ERR(obj);
 
 	err = i915_gem_object_pin_pages(obj);
 	if (err)
@@ -1222,8 +1225,6 @@ static int igt_ggtt_page(void *arg)
 	i915_gem_object_unpin_pages(obj);
 out_free:
 	i915_gem_object_put(obj);
-out_unlock:
-	mutex_unlock(&i915->drm.struct_mutex);
 	return err;
 }
 
@@ -1330,11 +1331,13 @@ static int igt_gtt_reserve(void *arg)
 			goto out;
 		}
 
+		mutex_lock(&ggtt->vm.mutex);
 		err = i915_gem_gtt_reserve(&ggtt->vm, &vma->node,
 					   obj->base.size,
 					   total,
 					   obj->cache_level,
 					   0);
+		mutex_unlock(&ggtt->vm.mutex);
 		if (err) {
 			pr_err("i915_gem_gtt_reserve (pass 1) failed at %llu/%llu with err=%d\n",
 			       total, ggtt->vm.total, err);
@@ -1380,11 +1383,13 @@ static int igt_gtt_reserve(void *arg)
 			goto out;
 		}
 
+		mutex_lock(&ggtt->vm.mutex);
 		err = i915_gem_gtt_reserve(&ggtt->vm, &vma->node,
 					   obj->base.size,
 					   total,
 					   obj->cache_level,
 					   0);
+		mutex_unlock(&ggtt->vm.mutex);
 		if (err) {
 			pr_err("i915_gem_gtt_reserve (pass 2) failed at %llu/%llu with err=%d\n",
 			       total, ggtt->vm.total, err);
@@ -1414,7 +1419,7 @@ static int igt_gtt_reserve(void *arg)
 			goto out;
 		}
 
-		err = i915_vma_unbind(vma);
+		err = unlocked_vma_unbind(vma);
 		if (err) {
 			pr_err("i915_vma_unbind failed with err=%d!\n", err);
 			goto out;
@@ -1424,11 +1429,13 @@ static int igt_gtt_reserve(void *arg)
 				       2*I915_GTT_PAGE_SIZE,
 				       I915_GTT_MIN_ALIGNMENT);
 
+		mutex_lock(&ggtt->vm.mutex);
 		err = i915_gem_gtt_reserve(&ggtt->vm, &vma->node,
 					   obj->base.size,
 					   offset,
 					   obj->cache_level,
 					   0);
+		mutex_unlock(&ggtt->vm.mutex);
 		if (err) {
 			pr_err("i915_gem_gtt_reserve (pass 3) failed at %llu/%llu with err=%d\n",
 			       total, ggtt->vm.total, err);
@@ -1497,11 +1504,13 @@ static int igt_gtt_insert(void *arg)
 
 	/* Check a couple of obviously invalid requests */
 	for (ii = invalid_insert; ii->size; ii++) {
+		mutex_lock(&ggtt->vm.mutex);
 		err = i915_gem_gtt_insert(&ggtt->vm, &tmp,
 					  ii->size, ii->alignment,
 					  I915_COLOR_UNEVICTABLE,
 					  ii->start, ii->end,
 					  0);
+		mutex_unlock(&ggtt->vm.mutex);
 		if (err != -ENOSPC) {
 			pr_err("Invalid i915_gem_gtt_insert(.size=%llx, .alignment=%llx, .start=%llx, .end=%llx) succeeded (err=%d)\n",
 			       ii->size, ii->alignment, ii->start, ii->end,
@@ -1537,10 +1546,12 @@ static int igt_gtt_insert(void *arg)
 			goto out;
 		}
 
+		mutex_lock(&ggtt->vm.mutex);
 		err = i915_gem_gtt_insert(&ggtt->vm, &vma->node,
 					  obj->base.size, 0, obj->cache_level,
 					  0, ggtt->vm.total,
 					  0);
+		mutex_unlock(&ggtt->vm.mutex);
 		if (err == -ENOSPC) {
 			/* maxed out the GGTT space */
 			i915_gem_object_put(obj);
@@ -1589,16 +1600,18 @@ static int igt_gtt_insert(void *arg)
 		GEM_BUG_ON(!drm_mm_node_allocated(&vma->node));
 		offset = vma->node.start;
 
-		err = i915_vma_unbind(vma);
+		err = unlocked_vma_unbind(vma);
 		if (err) {
 			pr_err("i915_vma_unbind failed with err=%d!\n", err);
 			goto out;
 		}
 
+		mutex_lock(&ggtt->vm.mutex);
 		err = i915_gem_gtt_insert(&ggtt->vm, &vma->node,
 					  obj->base.size, 0, obj->cache_level,
 					  0, ggtt->vm.total,
 					  0);
+		mutex_unlock(&ggtt->vm.mutex);
 		if (err) {
 			pr_err("i915_gem_gtt_insert (pass 2) failed at %llu/%llu with err=%d\n",
 			       total, ggtt->vm.total, err);
@@ -1642,10 +1655,12 @@ static int igt_gtt_insert(void *arg)
 			goto out;
 		}
 
+		mutex_lock(&ggtt->vm.mutex);
 		err = i915_gem_gtt_insert(&ggtt->vm, &vma->node,
 					  obj->base.size, 0, obj->cache_level,
 					  0, ggtt->vm.total,
 					  0);
+		mutex_unlock(&ggtt->vm.mutex);
 		if (err) {
 			pr_err("i915_gem_gtt_insert (pass 3) failed at %llu/%llu with err=%d\n",
 			       total, ggtt->vm.total, err);
@@ -1689,8 +1704,9 @@ int i915_gem_gtt_mock_selftests(void)
 	}
 	mock_init_ggtt(i915, ggtt);
 
-	mutex_lock(&i915->drm.struct_mutex);
 	err = i915_subtests(tests, ggtt);
+
+	mutex_lock(&i915->drm.struct_mutex);
 	mock_device_flush(i915);
 	mutex_unlock(&i915->drm.struct_mutex);
 
diff --git a/drivers/gpu/drm/i915/selftests/i915_vma.c b/drivers/gpu/drm/i915/selftests/i915_vma.c
index a5bec0a4cdcc..53aafb9a3d64 100644
--- a/drivers/gpu/drm/i915/selftests/i915_vma.c
+++ b/drivers/gpu/drm/i915/selftests/i915_vma.c
@@ -337,7 +337,9 @@ static int igt_vma_pin1(void *arg)
 
 		if (!err) {
 			i915_vma_unpin(vma);
+			mutex_lock(&ggtt->vm.mutex);
 			err = i915_vma_unbind(vma);
+			mutex_unlock(&ggtt->vm.mutex);
 			if (err) {
 				pr_err("Failed to unbind single page from GGTT, err=%d\n", err);
 				goto out;
-- 
2.23.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 43+ messages in thread

* [PATCH 16/28] drm/i915: Push the i915_active.retire into a worker
  2019-08-26  7:21 [PATCH 01/28] drm/i915/selftests: Add the usual batch vma managements to st_workarounds Chris Wilson
                   ` (13 preceding siblings ...)
  2019-08-26  7:21 ` [PATCH 15/28] drm/i915: Pull i915_vma_pin under the vm->mutex Chris Wilson
@ 2019-08-26  7:21 ` Chris Wilson
  2019-08-26  7:21 ` [PATCH 17/28] drm/i915: Coordinate i915_active with its own mutex Chris Wilson
                   ` (14 subsequent siblings)
  29 siblings, 0 replies; 43+ messages in thread
From: Chris Wilson @ 2019-08-26  7:21 UTC (permalink / raw)
  To: intel-gfx; +Cc: Matthew Auld

As we need to use a mutex to serialise i915_active activation
(because we want to allow the callback to sleep), we need to push the
i915_active.retire into a worker callback in case we get need to retire
from an atomic context.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Matthew Auld <matthew.auld@intel.com>
---
 .../gpu/drm/i915/display/intel_frontbuffer.c  |  4 ++-
 drivers/gpu/drm/i915/gem/i915_gem_context.c   |  1 +
 drivers/gpu/drm/i915/gt/intel_context.c       |  2 ++
 drivers/gpu/drm/i915/gt/intel_engine_pool.c   |  1 +
 drivers/gpu/drm/i915/gt/intel_timeline.c      |  1 +
 drivers/gpu/drm/i915/i915_active.c            | 34 ++++++++++++++++---
 drivers/gpu/drm/i915/i915_active_types.h      | 13 ++++++-
 drivers/gpu/drm/i915/i915_vma.c               |  2 ++
 drivers/gpu/drm/i915/selftests/i915_active.c  |  6 ++--
 9 files changed, 55 insertions(+), 9 deletions(-)

diff --git a/drivers/gpu/drm/i915/display/intel_frontbuffer.c b/drivers/gpu/drm/i915/display/intel_frontbuffer.c
index fc40dc1fdbcc..6428b8dd70d3 100644
--- a/drivers/gpu/drm/i915/display/intel_frontbuffer.c
+++ b/drivers/gpu/drm/i915/display/intel_frontbuffer.c
@@ -206,6 +206,7 @@ static int frontbuffer_active(struct i915_active *ref)
 	return 0;
 }
 
+__i915_active_call
 static void frontbuffer_retire(struct i915_active *ref)
 {
 	struct intel_frontbuffer *front =
@@ -257,7 +258,8 @@ intel_frontbuffer_get(struct drm_i915_gem_object *obj)
 	kref_init(&front->ref);
 	atomic_set(&front->bits, 0);
 	i915_active_init(i915, &front->write,
-			 frontbuffer_active, frontbuffer_retire);
+			 frontbuffer_active,
+			 i915_active_may_sleep(frontbuffer_retire));
 
 	spin_lock(&i915->fb_tracking.lock);
 	if (obj->frontbuffer) {
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c
index 20ee89e120b3..e903f0d993fd 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c
@@ -849,6 +849,7 @@ struct context_barrier_task {
 	void *data;
 };
 
+__i915_active_call
 static void cb_retire(struct i915_active *base)
 {
 	struct context_barrier_task *cb = container_of(base, typeof(*cb), base);
diff --git a/drivers/gpu/drm/i915/gt/intel_context.c b/drivers/gpu/drm/i915/gt/intel_context.c
index c0495811f493..ae7c2689ef30 100644
--- a/drivers/gpu/drm/i915/gt/intel_context.c
+++ b/drivers/gpu/drm/i915/gt/intel_context.c
@@ -138,6 +138,7 @@ static void __context_unpin_state(struct i915_vma *vma)
 	__i915_vma_unpin(vma);
 }
 
+__i915_active_call
 static void __intel_context_retire(struct i915_active *active)
 {
 	struct intel_context *ce = container_of(active, typeof(*ce), active);
@@ -150,6 +151,7 @@ static void __intel_context_retire(struct i915_active *active)
 
 	intel_timeline_unpin(ce->timeline);
 	intel_ring_unpin(ce->ring);
+
 	intel_context_put(ce);
 }
 
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pool.c b/drivers/gpu/drm/i915/gt/intel_engine_pool.c
index 4cd54c569911..36fa8b7e524d 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_pool.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_pool.c
@@ -61,6 +61,7 @@ static int pool_active(struct i915_active *ref)
 	return 0;
 }
 
+__i915_active_call
 static void pool_retire(struct i915_active *ref)
 {
 	struct intel_engine_pool_node *node =
diff --git a/drivers/gpu/drm/i915/gt/intel_timeline.c b/drivers/gpu/drm/i915/gt/intel_timeline.c
index 9cb01d9828f1..d824bca43d55 100644
--- a/drivers/gpu/drm/i915/gt/intel_timeline.c
+++ b/drivers/gpu/drm/i915/gt/intel_timeline.c
@@ -136,6 +136,7 @@ static void __idle_cacheline_free(struct intel_timeline_cacheline *cl)
 	kfree(cl);
 }
 
+__i915_active_call
 static void __cacheline_retire(struct i915_active *active)
 {
 	struct intel_timeline_cacheline *cl =
diff --git a/drivers/gpu/drm/i915/i915_active.c b/drivers/gpu/drm/i915/i915_active.c
index 5e9f826ff132..fba77d284fba 100644
--- a/drivers/gpu/drm/i915/i915_active.c
+++ b/drivers/gpu/drm/i915/i915_active.c
@@ -128,6 +128,7 @@ __active_retire(struct i915_active *ref)
 	bool retire = false;
 
 	lockdep_assert_held(&ref->mutex);
+	GEM_BUG_ON(i915_active_is_idle(ref));
 
 	/* return the unused nodes to our slabcache -- flushing the allocator */
 	if (atomic_dec_and_test(&ref->count)) {
@@ -153,6 +154,19 @@ __active_retire(struct i915_active *ref)
 		ref->retire(ref);
 }
 
+static void
+active_work(struct work_struct *wrk)
+{
+	struct i915_active *ref = container_of(wrk, typeof(*ref), work);
+
+	GEM_BUG_ON(!atomic_read(&ref->count));
+	if (atomic_add_unless(&ref->count, -1, 1))
+		return;
+
+	mutex_lock(&ref->mutex);
+	__active_retire(ref);
+}
+
 static void
 active_retire(struct i915_active *ref)
 {
@@ -160,8 +174,13 @@ active_retire(struct i915_active *ref)
 	if (atomic_add_unless(&ref->count, -1, 1))
 		return;
 
-	/* One active may be flushed from inside the acquire of another */
-	mutex_lock_nested(&ref->mutex, SINGLE_DEPTH_NESTING);
+	/* If we are inside interrupt context (fence signaling), defer */
+	if (ref->flags & I915_ACTIVE_RETIRE_SLEEPS ||
+	    !mutex_trylock(&ref->mutex)) {
+		queue_work(system_unbound_wq, &ref->work);
+		return;
+	}
+
 	__active_retire(ref);
 }
 
@@ -236,12 +255,16 @@ void __i915_active_init(struct drm_i915_private *i915,
 			void (*retire)(struct i915_active *ref),
 			struct lock_class_key *key)
 {
+	unsigned long bits;
+
 	debug_active_init(ref);
 
 	ref->i915 = i915;
 	ref->flags = 0;
 	ref->active = active;
-	ref->retire = retire;
+	ref->retire = ptr_unpack_bits(retire, &bits, 2);
+	if (bits & I915_ACTIVE_MAY_SLEEP)
+		ref->flags |= I915_ACTIVE_RETIRE_SLEEPS;
 
 	ref->excl = NULL;
 	ref->tree = RB_ROOT;
@@ -249,6 +272,7 @@ void __i915_active_init(struct drm_i915_private *i915,
 	init_llist_head(&ref->preallocated_barriers);
 	atomic_set(&ref->count, 0);
 	__mutex_init(&ref->mutex, "i915_active", key);
+	INIT_WORK(&ref->work, active_work);
 }
 
 static bool ____active_del_barrier(struct i915_active *ref,
@@ -499,6 +523,7 @@ int i915_active_wait(struct i915_active *ref)
 	if (wait_on_bit(&ref->flags, I915_ACTIVE_GRAB_BIT, TASK_KILLABLE))
 		return -EINTR;
 
+	flush_work(&ref->work);
 	if (!i915_active_is_idle(ref))
 		return -EBUSY;
 
@@ -543,8 +568,9 @@ int i915_request_await_active(struct i915_request *rq, struct i915_active *ref)
 void i915_active_fini(struct i915_active *ref)
 {
 	debug_active_fini(ref);
-	GEM_BUG_ON(!RB_EMPTY_ROOT(&ref->tree));
 	GEM_BUG_ON(atomic_read(&ref->count));
+	GEM_BUG_ON(work_pending(&ref->work));
+	GEM_BUG_ON(!RB_EMPTY_ROOT(&ref->tree));
 	mutex_destroy(&ref->mutex);
 }
 #endif
diff --git a/drivers/gpu/drm/i915/i915_active_types.h b/drivers/gpu/drm/i915/i915_active_types.h
index 86e7a232ea3c..021167f0004d 100644
--- a/drivers/gpu/drm/i915/i915_active_types.h
+++ b/drivers/gpu/drm/i915/i915_active_types.h
@@ -13,6 +13,9 @@
 #include <linux/mutex.h>
 #include <linux/rbtree.h>
 #include <linux/rcupdate.h>
+#include <linux/workqueue.h>
+
+#include "i915_utils.h"
 
 struct drm_i915_private;
 struct i915_active_request;
@@ -44,6 +47,11 @@ struct i915_active_request {
 
 struct active_node;
 
+#define I915_ACTIVE_MAY_SLEEP BIT(0)
+
+#define __i915_active_call __aligned(4)
+#define i915_active_may_sleep(fn) ptr_pack_bits(&(fn), I915_ACTIVE_MAY_SLEEP, 2)
+
 struct i915_active {
 	struct drm_i915_private *i915;
 
@@ -57,11 +65,14 @@ struct i915_active {
 	struct dma_fence_cb excl_cb;
 
 	unsigned long flags;
-#define I915_ACTIVE_GRAB_BIT 0
+#define I915_ACTIVE_RETIRE_SLEEPS BIT(0)
+#define I915_ACTIVE_GRAB_BIT 1
 
 	int (*active)(struct i915_active *ref);
 	void (*retire)(struct i915_active *ref);
 
+	struct work_struct work;
+
 	struct llist_head preallocated_barriers;
 };
 
diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
index afd9d7a36b2a..730c01cfa36f 100644
--- a/drivers/gpu/drm/i915/i915_vma.c
+++ b/drivers/gpu/drm/i915/i915_vma.c
@@ -91,6 +91,7 @@ static int __i915_vma_active(struct i915_active *ref)
 	return i915_vma_tryget(active_to_vma(ref)) ? 0 : -ENOENT;
 }
 
+__i915_active_call
 static void __i915_vma_retire(struct i915_active *ref)
 {
 	i915_vma_put(active_to_vma(ref));
@@ -1048,6 +1049,7 @@ int i915_vma_unbind(struct i915_vma *vma)
 		return -EBUSY;
 	}
 
+	GEM_BUG_ON(i915_vma_is_active(vma));
 	if (!drm_mm_node_allocated(&vma->node))
 		return 0;
 
diff --git a/drivers/gpu/drm/i915/selftests/i915_active.c b/drivers/gpu/drm/i915/selftests/i915_active.c
index 77d844ac8b71..d5ac9944d093 100644
--- a/drivers/gpu/drm/i915/selftests/i915_active.c
+++ b/drivers/gpu/drm/i915/selftests/i915_active.c
@@ -121,7 +121,7 @@ __live_active_setup(struct drm_i915_private *i915)
 	}
 
 	i915_active_release(&active->base);
-	if (active->retired && count) {
+	if (READ_ONCE(active->retired) && count) {
 		pr_err("i915_active retired before submission!\n");
 		err = -EINVAL;
 	}
@@ -161,7 +161,7 @@ static int live_active_wait(void *arg)
 	}
 
 	i915_active_wait(&active->base);
-	if (!active->retired) {
+	if (!READ_ONCE(active->retired)) {
 		pr_err("i915_active not retired after waiting!\n");
 		err = -EINVAL;
 	}
@@ -200,7 +200,7 @@ static int live_active_retire(void *arg)
 	if (igt_flush_test(i915, I915_WAIT_LOCKED))
 		err = -EIO;
 
-	if (!active->retired) {
+	if (!READ_ONCE(active->retired)) {
 		pr_err("i915_active not retired after flushing!\n");
 		err = -EINVAL;
 	}
-- 
2.23.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 43+ messages in thread

* [PATCH 17/28] drm/i915: Coordinate i915_active with its own mutex
  2019-08-26  7:21 [PATCH 01/28] drm/i915/selftests: Add the usual batch vma managements to st_workarounds Chris Wilson
                   ` (14 preceding siblings ...)
  2019-08-26  7:21 ` [PATCH 16/28] drm/i915: Push the i915_active.retire into a worker Chris Wilson
@ 2019-08-26  7:21 ` Chris Wilson
  2019-08-26  7:21 ` [PATCH 18/28] drm/i915: Move idle barrier cleanup into engine-pm Chris Wilson
                   ` (13 subsequent siblings)
  29 siblings, 0 replies; 43+ messages in thread
From: Chris Wilson @ 2019-08-26  7:21 UTC (permalink / raw)
  To: intel-gfx

Forgo the struct_mutex serialisation for i915_active, and interpose its
own mutex handling for active/retire.

This is a multi-layered sleight-of-hand. First, we had to ensure that no
active/retire callbacks accidentally inverted the mutex ordering rules,
nor assumed that they were themselves serialised by struct_mutex. More
challenging though, is the rule over updating elements of the active
rbtree. Instead of the whole i915_active now being serialised by
struct_mutex, allocations/rotations of the tree are serialised by the
i915_active.mutex and individual nodes are serialised by the caller
using the i915_timeline.mutex (we need to use nested spinlocks to
interact with the dma_fence callback lists).

The pain point here is that instead of a single mutex around execbuf, we
now have to take a mutex for active tracker (one for each vma, context,
etc) and a couple of spinlocks for each fence update. The improvement in
fine grained locking allowing for multiple concurrent clients
(eventually!) should be worth it in typical loads.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 .../gpu/drm/i915/display/intel_frontbuffer.c  |   2 +-
 drivers/gpu/drm/i915/display/intel_overlay.c  |   5 +-
 .../gpu/drm/i915/gem/i915_gem_client_blt.c    |   2 +-
 drivers/gpu/drm/i915/gem/i915_gem_context.c   |   8 +-
 .../gpu/drm/i915/gem/i915_gem_execbuffer.c    |   2 +-
 .../gpu/drm/i915/gem/i915_gem_object_types.h  |   1 +
 drivers/gpu/drm/i915/gem/i915_gem_pm.c        |   9 +-
 drivers/gpu/drm/i915/gt/intel_context.c       |   6 +-
 drivers/gpu/drm/i915/gt/intel_engine_pool.c   |   2 +-
 drivers/gpu/drm/i915/gt/intel_engine_pool.h   |   2 +-
 drivers/gpu/drm/i915/gt/intel_reset.c         |  10 +-
 drivers/gpu/drm/i915/gt/intel_timeline.c      |   9 +-
 .../gpu/drm/i915/gt/intel_timeline_types.h    |   2 +-
 drivers/gpu/drm/i915/gt/selftest_context.c    |  16 +-
 drivers/gpu/drm/i915/gt/selftest_lrc.c        |  10 +-
 .../gpu/drm/i915/gt/selftests/mock_timeline.c |   2 +-
 drivers/gpu/drm/i915/gvt/scheduler.c          |   3 -
 drivers/gpu/drm/i915/i915_active.c            | 256 ++++++--------
 drivers/gpu/drm/i915/i915_active.h            | 315 ++++--------------
 drivers/gpu/drm/i915/i915_active_types.h      |  20 +-
 drivers/gpu/drm/i915/i915_gem.c               |  42 ++-
 drivers/gpu/drm/i915/i915_gem_gtt.c           |   3 +-
 drivers/gpu/drm/i915/i915_gpu_error.c         |   4 +-
 drivers/gpu/drm/i915/i915_request.c           |  39 +--
 drivers/gpu/drm/i915/i915_request.h           |   1 -
 drivers/gpu/drm/i915/i915_vma.c               |   8 +-
 drivers/gpu/drm/i915/selftests/i915_active.c  |  36 +-
 27 files changed, 256 insertions(+), 559 deletions(-)

diff --git a/drivers/gpu/drm/i915/display/intel_frontbuffer.c b/drivers/gpu/drm/i915/display/intel_frontbuffer.c
index 6428b8dd70d3..84b164f31895 100644
--- a/drivers/gpu/drm/i915/display/intel_frontbuffer.c
+++ b/drivers/gpu/drm/i915/display/intel_frontbuffer.c
@@ -257,7 +257,7 @@ intel_frontbuffer_get(struct drm_i915_gem_object *obj)
 	front->obj = obj;
 	kref_init(&front->ref);
 	atomic_set(&front->bits, 0);
-	i915_active_init(i915, &front->write,
+	i915_active_init(&front->write,
 			 frontbuffer_active,
 			 i915_active_may_sleep(frontbuffer_retire));
 
diff --git a/drivers/gpu/drm/i915/display/intel_overlay.c b/drivers/gpu/drm/i915/display/intel_overlay.c
index 4f36557b3f3b..544e953342ea 100644
--- a/drivers/gpu/drm/i915/display/intel_overlay.c
+++ b/drivers/gpu/drm/i915/display/intel_overlay.c
@@ -230,7 +230,7 @@ alloc_request(struct intel_overlay *overlay, void (*fn)(struct intel_overlay *))
 	if (IS_ERR(rq))
 		return rq;
 
-	err = i915_active_ref(&overlay->last_flip, rq->timeline, rq);
+	err = i915_active_ref(&overlay->last_flip, rq->timeline, &rq->fence);
 	if (err) {
 		i915_request_add(rq);
 		return ERR_PTR(err);
@@ -1360,8 +1360,7 @@ void intel_overlay_setup(struct drm_i915_private *dev_priv)
 	overlay->contrast = 75;
 	overlay->saturation = 146;
 
-	i915_active_init(dev_priv,
-			 &overlay->last_flip,
+	i915_active_init(&overlay->last_flip,
 			 NULL, intel_overlay_last_flip_retire);
 
 	ret = get_registers(overlay, OVERLAY_NEEDS_PHYSICAL(dev_priv));
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_client_blt.c b/drivers/gpu/drm/i915/gem/i915_gem_client_blt.c
index f99920652751..c345d3d62df1 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_client_blt.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_client_blt.c
@@ -211,7 +211,7 @@ static void clear_pages_worker(struct work_struct *work)
 	 * keep track of the GPU activity within this vma/request, and
 	 * propagate the signal from the request to w->dma.
 	 */
-	err = i915_active_ref(&vma->active, rq->timeline, rq);
+	err = i915_active_ref(&vma->active, rq->timeline, &rq->fence);
 	if (err)
 		goto out_request;
 
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c
index e903f0d993fd..90aed4d163b3 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c
@@ -869,20 +869,18 @@ static int context_barrier_task(struct i915_gem_context *ctx,
 				void (*task)(void *data),
 				void *data)
 {
-	struct drm_i915_private *i915 = ctx->i915;
 	struct context_barrier_task *cb;
 	struct i915_gem_engines_iter it;
 	struct intel_context *ce;
 	int err = 0;
 
-	lockdep_assert_held(&i915->drm.struct_mutex);
 	GEM_BUG_ON(!task);
 
 	cb = kmalloc(sizeof(*cb), GFP_KERNEL);
 	if (!cb)
 		return -ENOMEM;
 
-	i915_active_init(i915, &cb->base, NULL, cb_retire);
+	i915_active_init(&cb->base, NULL, cb_retire);
 	err = i915_active_acquire(&cb->base);
 	if (err) {
 		kfree(cb);
@@ -914,7 +912,9 @@ static int context_barrier_task(struct i915_gem_context *ctx,
 		if (emit)
 			err = emit(rq, data);
 		if (err == 0)
-			err = i915_active_ref(&cb->base, rq->timeline, rq);
+			err = i915_active_ref(&cb->base,
+					      rq->timeline,
+					      &rq->fence);
 
 		i915_request_add(rq);
 		if (err)
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
index 068c7976d7e9..2f10c1ca705f 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
@@ -1272,7 +1272,7 @@ relocate_entry(struct i915_vma *vma,
 
 	if (!eb->reloc_cache.vaddr &&
 	    (DBG_FORCE_RELOC == FORCE_GPU_RELOC ||
-	     !dma_resv_test_signaled_rcu(vma->resv, true))) {
+	     i915_vma_is_active(vma))) {
 		const unsigned int gen = eb->reloc_cache.gen;
 		unsigned int len;
 		u32 *batch;
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
index b0550727e69a..03e1e3206ab3 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
@@ -8,6 +8,7 @@
 #define __I915_GEM_OBJECT_TYPES_H__
 
 #include <drm/drm_gem.h>
+#include <uapi/drm/i915_drm.h>
 
 #include "i915_active.h"
 #include "i915_selftest.h"
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pm.c b/drivers/gpu/drm/i915/gem/i915_gem_pm.c
index 92e53c25424c..92558fa47108 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_pm.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_pm.c
@@ -16,14 +16,11 @@ static void call_idle_barriers(struct intel_engine_cs *engine)
 	struct llist_node *node, *next;
 
 	llist_for_each_safe(node, next, llist_del_all(&engine->barrier_tasks)) {
-		struct i915_active_request *active =
+		struct dma_fence_cb *cb =
 			container_of((struct list_head *)node,
-				     typeof(*active), link);
+				     typeof(*cb), node);
 
-		INIT_LIST_HEAD(&active->link);
-		RCU_INIT_POINTER(active->request, NULL);
-
-		active->retire(active, NULL);
+		cb->func(NULL, cb);
 	}
 }
 
diff --git a/drivers/gpu/drm/i915/gt/intel_context.c b/drivers/gpu/drm/i915/gt/intel_context.c
index ae7c2689ef30..57e13c6f59c5 100644
--- a/drivers/gpu/drm/i915/gt/intel_context.c
+++ b/drivers/gpu/drm/i915/gt/intel_context.c
@@ -240,7 +240,7 @@ intel_context_init(struct intel_context *ce,
 
 	mutex_init(&ce->pin_mutex);
 
-	i915_active_init(ctx->i915, &ce->active,
+	i915_active_init(&ce->active,
 			 __intel_context_active, __intel_context_retire);
 }
 
@@ -307,7 +307,7 @@ int intel_context_prepare_remote_request(struct intel_context *ce,
 			return err;
 
 		/* Queue this switch after current activity by this context. */
-		err = i915_active_request_set(&tl->last_request, rq);
+		err = i915_active_fence_set(&tl->last_request, rq);
 		mutex_unlock(&tl->mutex);
 		if (err)
 			return err;
@@ -321,7 +321,7 @@ int intel_context_prepare_remote_request(struct intel_context *ce,
 	 * words transfer the pinned ce object to tracked active request.
 	 */
 	GEM_BUG_ON(i915_active_is_idle(&ce->active));
-	return i915_active_ref(&ce->active, rq->timeline, rq);
+	return i915_active_ref(&ce->active, rq->timeline, &rq->fence);
 }
 
 struct i915_request *intel_context_create_request(struct intel_context *ce)
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pool.c b/drivers/gpu/drm/i915/gt/intel_engine_pool.c
index 36fa8b7e524d..25b05c3db637 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_pool.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_pool.c
@@ -95,7 +95,7 @@ node_create(struct intel_engine_pool *pool, size_t sz)
 		return ERR_PTR(-ENOMEM);
 
 	node->pool = pool;
-	i915_active_init(engine->i915, &node->active, pool_active, pool_retire);
+	i915_active_init(&node->active, pool_active, pool_retire);
 
 	obj = i915_gem_object_create_internal(engine->i915, sz);
 	if (IS_ERR(obj)) {
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pool.h b/drivers/gpu/drm/i915/gt/intel_engine_pool.h
index 8d069efd9457..7e83459b8da6 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_pool.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine_pool.h
@@ -18,7 +18,7 @@ static inline int
 intel_engine_pool_mark_active(struct intel_engine_pool_node *node,
 			      struct i915_request *rq)
 {
-	return i915_active_ref(&node->active, rq->timeline, rq);
+	return i915_active_ref(&node->active, rq->timeline, &rq->fence);
 }
 
 static inline void
diff --git a/drivers/gpu/drm/i915/gt/intel_reset.c b/drivers/gpu/drm/i915/gt/intel_reset.c
index b9d84d52e986..4825c82aefee 100644
--- a/drivers/gpu/drm/i915/gt/intel_reset.c
+++ b/drivers/gpu/drm/i915/gt/intel_reset.c
@@ -814,10 +814,10 @@ static bool __intel_gt_unset_wedged(struct intel_gt *gt)
 	 */
 	spin_lock_irqsave(&timelines->lock, flags);
 	list_for_each_entry(tl, &timelines->active_list, link) {
-		struct i915_request *rq;
+		struct dma_fence *fence;
 
-		rq = i915_active_request_get_unlocked(&tl->last_request);
-		if (!rq)
+		fence = i915_active_fence_get(&tl->last_request);
+		if (!fence)
 			continue;
 
 		spin_unlock_irqrestore(&timelines->lock, flags);
@@ -829,8 +829,8 @@ static bool __intel_gt_unset_wedged(struct intel_gt *gt)
 		 * (I915_FENCE_TIMEOUT) so this wait should not be unbounded
 		 * in the worst case.
 		 */
-		dma_fence_default_wait(&rq->fence, false, MAX_SCHEDULE_TIMEOUT);
-		i915_request_put(rq);
+		dma_fence_default_wait(fence, false, MAX_SCHEDULE_TIMEOUT);
+		dma_fence_put(fence);
 
 		/* Restart iteration after droping lock */
 		spin_lock_irqsave(&timelines->lock, flags);
diff --git a/drivers/gpu/drm/i915/gt/intel_timeline.c b/drivers/gpu/drm/i915/gt/intel_timeline.c
index d824bca43d55..75d896167cfb 100644
--- a/drivers/gpu/drm/i915/gt/intel_timeline.c
+++ b/drivers/gpu/drm/i915/gt/intel_timeline.c
@@ -178,8 +178,7 @@ cacheline_alloc(struct intel_timeline_hwsp *hwsp, unsigned int cacheline)
 	cl->hwsp = hwsp;
 	cl->vaddr = page_pack_bits(vaddr, cacheline);
 
-	i915_active_init(hwsp->gt->i915, &cl->active,
-			 __cacheline_active, __cacheline_retire);
+	i915_active_init(&cl->active, __cacheline_active, __cacheline_retire);
 
 	return cl;
 }
@@ -255,7 +254,7 @@ int intel_timeline_init(struct intel_timeline *timeline,
 
 	mutex_init(&timeline->mutex);
 
-	INIT_ACTIVE_REQUEST(&timeline->last_request, &timeline->mutex);
+	INIT_ACTIVE_FENCE(&timeline->last_request, &timeline->mutex);
 	INIT_LIST_HEAD(&timeline->requests);
 
 	i915_syncmap_init(&timeline->sync);
@@ -443,7 +442,7 @@ __intel_timeline_get_seqno(struct intel_timeline *tl,
 	 * free it after the current request is retired, which ensures that
 	 * all writes into the cacheline from previous requests are complete.
 	 */
-	err = i915_active_ref(&tl->hwsp_cacheline->active, tl, rq);
+	err = i915_active_ref(&tl->hwsp_cacheline->active, tl, &rq->fence);
 	if (err)
 		goto err_cacheline;
 
@@ -494,7 +493,7 @@ int intel_timeline_get_seqno(struct intel_timeline *tl,
 static int cacheline_ref(struct intel_timeline_cacheline *cl,
 			 struct i915_request *rq)
 {
-	return i915_active_ref(&cl->active, rq->timeline, rq);
+	return i915_active_ref(&cl->active, rq->timeline, &rq->fence);
 }
 
 int intel_timeline_read_hwsp(struct i915_request *from,
diff --git a/drivers/gpu/drm/i915/gt/intel_timeline_types.h b/drivers/gpu/drm/i915/gt/intel_timeline_types.h
index 2b1baf2fcc8e..6d7ac129ce8a 100644
--- a/drivers/gpu/drm/i915/gt/intel_timeline_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_timeline_types.h
@@ -63,7 +63,7 @@ struct intel_timeline {
 	 * the request using i915_active_request_get_request_rcu(), or hold the
 	 * struct_mutex.
 	 */
-	struct i915_active_request last_request;
+	struct i915_active_fence last_request;
 
 	/**
 	 * We track the most recent seqno that we wait on in every context so
diff --git a/drivers/gpu/drm/i915/gt/selftest_context.c b/drivers/gpu/drm/i915/gt/selftest_context.c
index 9d1ea26c7a2d..1420533e8fd5 100644
--- a/drivers/gpu/drm/i915/gt/selftest_context.c
+++ b/drivers/gpu/drm/i915/gt/selftest_context.c
@@ -41,24 +41,20 @@ static int context_sync(struct intel_context *ce)
 
 	mutex_lock(&tl->mutex);
 	do {
-		struct i915_request *rq;
+		struct dma_fence *fence;
 		long timeout;
 
-		rcu_read_lock();
-		rq = rcu_dereference(tl->last_request.request);
-		if (rq)
-			rq = i915_request_get_rcu(rq);
-		rcu_read_unlock();
-		if (!rq)
+		fence = i915_active_fence_get(&tl->last_request);
+		if (!fence)
 			break;
 
-		timeout = i915_request_wait(rq, 0, HZ / 10);
+		timeout = dma_fence_wait_timeout(fence, false, HZ / 10);
 		if (timeout < 0)
 			err = timeout;
 		else
-			i915_request_retire_upto(rq);
+			i915_request_retire_upto(to_request(fence));
 
-		i915_request_put(rq);
+		dma_fence_put(fence);
 	} while (!err);
 	mutex_unlock(&tl->mutex);
 
diff --git a/drivers/gpu/drm/i915/gt/selftest_lrc.c b/drivers/gpu/drm/i915/gt/selftest_lrc.c
index d791158988d6..aca1b3a9c5de 100644
--- a/drivers/gpu/drm/i915/gt/selftest_lrc.c
+++ b/drivers/gpu/drm/i915/gt/selftest_lrc.c
@@ -984,9 +984,13 @@ static struct i915_request *dummy_request(struct intel_engine_cs *engine)
 	if (!rq)
 		return NULL;
 
-	INIT_LIST_HEAD(&rq->active_list);
 	rq->engine = engine;
 
+	spin_lock_init(&rq->lock);
+	INIT_LIST_HEAD(&rq->fence.cb_list);
+	rq->fence.lock = &rq->lock;
+	rq->fence.ops = &i915_fence_ops;
+
 	i915_sched_node_init(&rq->sched);
 
 	/* mark this request as permanently incomplete */
@@ -1079,8 +1083,8 @@ static int live_suppress_wait_preempt(void *arg)
 				}
 
 				/* Disable NEWCLIENT promotion */
-				__i915_active_request_set(&rq[i]->timeline->last_request,
-							  dummy);
+				__i915_active_fence_set(&rq[i]->timeline->last_request,
+							&dummy->fence);
 				i915_request_add(rq[i]);
 			}
 
diff --git a/drivers/gpu/drm/i915/gt/selftests/mock_timeline.c b/drivers/gpu/drm/i915/gt/selftests/mock_timeline.c
index 598170efcaf6..2a77c051f36a 100644
--- a/drivers/gpu/drm/i915/gt/selftests/mock_timeline.c
+++ b/drivers/gpu/drm/i915/gt/selftests/mock_timeline.c
@@ -15,7 +15,7 @@ void mock_timeline_init(struct intel_timeline *timeline, u64 context)
 
 	mutex_init(&timeline->mutex);
 
-	INIT_ACTIVE_REQUEST(&timeline->last_request, &timeline->mutex);
+	INIT_ACTIVE_FENCE(&timeline->last_request, &timeline->mutex);
 	INIT_LIST_HEAD(&timeline->requests);
 
 	i915_syncmap_init(&timeline->sync);
diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c
index 8940fa8d391a..6beb753b1ea1 100644
--- a/drivers/gpu/drm/i915/gvt/scheduler.c
+++ b/drivers/gpu/drm/i915/gvt/scheduler.c
@@ -385,11 +385,8 @@ intel_gvt_workload_req_alloc(struct intel_vgpu_workload *workload)
 {
 	struct intel_vgpu *vgpu = workload->vgpu;
 	struct intel_vgpu_submission *s = &vgpu->submission;
-	struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv;
 	struct i915_request *rq;
 
-	lockdep_assert_held(&dev_priv->drm.struct_mutex);
-
 	if (workload->req)
 		return 0;
 
diff --git a/drivers/gpu/drm/i915/i915_active.c b/drivers/gpu/drm/i915/i915_active.c
index fba77d284fba..2ecb78cb677e 100644
--- a/drivers/gpu/drm/i915/i915_active.c
+++ b/drivers/gpu/drm/i915/i915_active.c
@@ -12,8 +12,6 @@
 #include "i915_active.h"
 #include "i915_globals.h"
 
-#define BKL(ref) (&(ref)->i915->drm.struct_mutex)
-
 /*
  * Active refs memory management
  *
@@ -27,35 +25,35 @@ static struct i915_global_active {
 } global;
 
 struct active_node {
-	struct i915_active_request base;
+	struct i915_active_fence base;
 	struct i915_active *ref;
 	struct rb_node node;
 	u64 timeline;
 };
 
 static inline struct active_node *
-node_from_active(struct i915_active_request *active)
+node_from_active(struct i915_active_fence *active)
 {
 	return container_of(active, struct active_node, base);
 }
 
 #define take_preallocated_barriers(x) llist_del_all(&(x)->preallocated_barriers)
 
-static inline bool is_barrier(const struct i915_active_request *active)
+static inline bool is_barrier(const struct i915_active_fence *active)
 {
-	return IS_ERR(rcu_access_pointer(active->request));
+	return IS_ERR(rcu_access_pointer(active->fence));
 }
 
 static inline struct llist_node *barrier_to_ll(struct active_node *node)
 {
 	GEM_BUG_ON(!is_barrier(&node->base));
-	return (struct llist_node *)&node->base.link;
+	return (struct llist_node *)&node->base.cb.node;
 }
 
 static inline struct intel_engine_cs *
 __barrier_to_engine(struct active_node *node)
 {
-	return (struct intel_engine_cs *)READ_ONCE(node->base.link.prev);
+	return (struct intel_engine_cs *)READ_ONCE(node->base.cb.node.prev);
 }
 
 static inline struct intel_engine_cs *
@@ -68,7 +66,7 @@ barrier_to_engine(struct active_node *node)
 static inline struct active_node *barrier_from_ll(struct llist_node *x)
 {
 	return container_of((struct list_head *)x,
-			    struct active_node, base.link);
+			    struct active_node, base.cb.node);
 }
 
 #if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM) && IS_ENABLED(CONFIG_DEBUG_OBJECTS)
@@ -145,13 +143,16 @@ __active_retire(struct i915_active *ref)
 
 	GEM_BUG_ON(rcu_access_pointer(ref->excl));
 	rbtree_postorder_for_each_entry_safe(it, n, &root, node) {
-		GEM_BUG_ON(i915_active_request_isset(&it->base));
+		GEM_BUG_ON(i915_active_fence_isset(&it->base));
 		kmem_cache_free(global.slab_cache, it);
 	}
 
 	/* After the final retire, the entire struct may be freed */
 	if (ref->retire)
 		ref->retire(ref);
+
+	/* ... except if you wait on it, you must manage your own references! */
+	wake_up_var(ref);
 }
 
 static void
@@ -185,12 +186,13 @@ active_retire(struct i915_active *ref)
 }
 
 static void
-node_retire(struct i915_active_request *base, struct i915_request *rq)
+node_retire(struct dma_fence *fence, struct dma_fence_cb *cb)
 {
-	active_retire(node_from_active(base)->ref);
+	i915_active_fence_cb(fence, cb);
+	active_retire(container_of(cb, struct active_node, base.cb)->ref);
 }
 
-static struct i915_active_request *
+static struct i915_active_fence *
 active_instance(struct i915_active *ref, struct intel_timeline *tl)
 {
 	struct active_node *node, *prealloc;
@@ -234,7 +236,7 @@ active_instance(struct i915_active *ref, struct intel_timeline *tl)
 	}
 
 	node = prealloc;
-	i915_active_request_init(&node->base, &tl->mutex, NULL, node_retire);
+	__i915_active_fence_init(&node->base, &tl->mutex, NULL, node_retire);
 	node->ref = ref;
 	node->timeline = idx;
 
@@ -249,8 +251,7 @@ active_instance(struct i915_active *ref, struct intel_timeline *tl)
 	return &node->base;
 }
 
-void __i915_active_init(struct drm_i915_private *i915,
-			struct i915_active *ref,
+void __i915_active_init(struct i915_active *ref,
 			int (*active)(struct i915_active *ref),
 			void (*retire)(struct i915_active *ref),
 			struct lock_class_key *key)
@@ -259,7 +260,6 @@ void __i915_active_init(struct drm_i915_private *i915,
 
 	debug_active_init(ref);
 
-	ref->i915 = i915;
 	ref->flags = 0;
 	ref->active = active;
 	ref->retire = ptr_unpack_bits(retire, &bits, 2);
@@ -325,9 +325,9 @@ __active_del_barrier(struct i915_active *ref, struct active_node *node)
 
 int i915_active_ref(struct i915_active *ref,
 		    struct intel_timeline *tl,
-		    struct i915_request *rq)
+		    struct dma_fence *fence)
 {
-	struct i915_active_request *active;
+	struct i915_active_fence *active;
 	int err;
 
 	lockdep_assert_held(&tl->mutex);
@@ -350,14 +350,11 @@ int i915_active_ref(struct i915_active *ref,
 		 * request that we want to emit on the kernel_context.
 		 */
 		__active_del_barrier(ref, node_from_active(active));
-		RCU_INIT_POINTER(active->request, NULL);
-		INIT_LIST_HEAD(&active->link);
-	} else {
-		if (!i915_active_request_isset(active))
-			atomic_inc(&ref->count);
+		RCU_INIT_POINTER(active->fence, NULL);
+		atomic_dec(&ref->count);
 	}
-	GEM_BUG_ON(!atomic_read(&ref->count));
-	__i915_active_request_set(active, rq);
+	if (!__i915_active_fence_set(active, fence))
+		atomic_inc(&ref->count);
 
 out:
 	i915_active_release(ref);
@@ -427,143 +424,53 @@ int i915_active_acquire(struct i915_active *ref)
 	return err;
 }
 
-void i915_active_release(struct i915_active *ref)
+bool i915_active_acquire_if_busy(struct i915_active *ref)
 {
 	debug_active_assert(ref);
-	active_retire(ref);
-}
 
-static void __active_ungrab(struct i915_active *ref)
-{
-	clear_and_wake_up_bit(I915_ACTIVE_GRAB_BIT, &ref->flags);
+	return atomic_add_unless(&ref->count, 1, 0);
 }
 
-bool i915_active_trygrab(struct i915_active *ref)
+void i915_active_release(struct i915_active *ref)
 {
 	debug_active_assert(ref);
-
-	if (test_and_set_bit(I915_ACTIVE_GRAB_BIT, &ref->flags))
-		return false;
-
-	if (!atomic_add_unless(&ref->count, 1, 0)) {
-		__active_ungrab(ref);
-		return false;
-	}
-
-	return true;
-}
-
-void i915_active_ungrab(struct i915_active *ref)
-{
-	GEM_BUG_ON(!test_bit(I915_ACTIVE_GRAB_BIT, &ref->flags));
-
 	active_retire(ref);
-	__active_ungrab(ref);
-}
-
-static int excl_wait(struct i915_active *ref)
-{
-	struct dma_fence *old;
-	int err = 0;
-
-	if (!rcu_access_pointer(ref->excl))
-		return 0;
-
-	rcu_read_lock();
-	old = dma_fence_get_rcu_safe(&ref->excl);
-	rcu_read_unlock();
-	if (old) {
-		err = dma_fence_wait(old, true);
-		dma_fence_put(old);
-	}
-
-	return err;
 }
 
 int i915_active_wait(struct i915_active *ref)
 {
 	struct active_node *it, *n;
-	int err;
+	int err = 0;
 
 	might_sleep();
-	might_lock(&ref->mutex);
 
-	if (i915_active_is_idle(ref))
+	if (!i915_active_acquire_if_busy(ref))
 		return 0;
 
-	err = mutex_lock_interruptible(&ref->mutex);
-	if (err)
-		return err;
-
-	if (!atomic_add_unless(&ref->count, 1, 0)) {
-		mutex_unlock(&ref->mutex);
-		return 0;
-	}
+	/* Flush lazy signals */
+	rbtree_postorder_for_each_entry_safe(it, n, &ref->tree, node) {
+		struct dma_fence *fence;
 
-	err = excl_wait(ref);
-	if (err)
-		goto out;
+		if (is_barrier(&it->base)) /* unconnected idle barrier */
+			continue;
 
-	rbtree_postorder_for_each_entry_safe(it, n, &ref->tree, node) {
-		if (is_barrier(&it->base)) { /* unconnected idle-barrier */
-			err = -EBUSY;
-			break;
+		fence = i915_active_fence_get(&it->base);
+		if (fence) {
+			dma_fence_enable_sw_signaling(fence);
+			dma_fence_put(fence);
 		}
-
-		err = i915_active_request_retire(&it->base, BKL(ref));
-		if (err)
-			break;
 	}
-
-out:
-	__active_retire(ref);
+	/* Any fence added after the wait begins will not be auto-signaled */
+	i915_active_release(ref);
 	if (err)
 		return err;
 
-	if (wait_on_bit(&ref->flags, I915_ACTIVE_GRAB_BIT, TASK_KILLABLE))
+	if (wait_var_event_interruptible(ref, i915_active_is_idle(ref)))
 		return -EINTR;
 
-	flush_work(&ref->work);
-	if (!i915_active_is_idle(ref))
-		return -EBUSY;
-
 	return 0;
 }
 
-int i915_request_await_active_request(struct i915_request *rq,
-				      struct i915_active_request *active)
-{
-	struct i915_request *barrier =
-		i915_active_request_raw(active, &rq->i915->drm.struct_mutex);
-
-	return barrier ? i915_request_await_dma_fence(rq, &barrier->fence) : 0;
-}
-
-int i915_request_await_active(struct i915_request *rq, struct i915_active *ref)
-{
-	struct active_node *it, *n;
-	int err;
-
-	if (RB_EMPTY_ROOT(&ref->tree))
-		return 0;
-
-	/* await allocates and so we need to avoid hitting the shrinker */
-	err = i915_active_acquire(ref);
-	if (err)
-		return err;
-
-	mutex_lock(&ref->mutex);
-	rbtree_postorder_for_each_entry_safe(it, n, &ref->tree, node) {
-		err = i915_request_await_active_request(rq, &it->base);
-		if (err)
-			break;
-	}
-	mutex_unlock(&ref->mutex);
-
-	i915_active_release(ref);
-	return err;
-}
-
 #if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)
 void i915_active_fini(struct i915_active *ref)
 {
@@ -577,7 +484,7 @@ void i915_active_fini(struct i915_active *ref)
 
 static inline bool is_idle_barrier(struct active_node *node, u64 idx)
 {
-	return node->timeline == idx && !i915_active_request_isset(&node->base);
+	return node->timeline == idx && !i915_active_fence_isset(&node->base);
 }
 
 static struct active_node *reuse_idle_barrier(struct i915_active *ref, u64 idx)
@@ -697,13 +604,13 @@ int i915_active_acquire_preallocate_barrier(struct i915_active *ref,
 			node->base.lock =
 				&engine->kernel_context->timeline->mutex;
 #endif
-			RCU_INIT_POINTER(node->base.request, NULL);
-			node->base.retire = node_retire;
+			RCU_INIT_POINTER(node->base.fence, NULL);
+			node->base.cb.func = node_retire;
 			node->timeline = idx;
 			node->ref = ref;
 		}
 
-		if (!i915_active_request_isset(&node->base)) {
+		if (!i915_active_fence_isset(&node->base)) {
 			/*
 			 * Mark this as being *our* unconnected proto-node.
 			 *
@@ -713,8 +620,8 @@ int i915_active_acquire_preallocate_barrier(struct i915_active *ref,
 			 * and then we can use the rb_node and list pointers
 			 * for our tracking of the pending barrier.
 			 */
-			RCU_INIT_POINTER(node->base.request, ERR_PTR(-EAGAIN));
-			node->base.link.prev = (void *)engine;
+			RCU_INIT_POINTER(node->base.fence, ERR_PTR(-EAGAIN));
+			node->base.cb.node.prev = (void *)engine;
 			atomic_inc(&ref->count);
 		}
 
@@ -781,25 +688,65 @@ void i915_request_add_active_barriers(struct i915_request *rq)
 {
 	struct intel_engine_cs *engine = rq->engine;
 	struct llist_node *node, *next;
+	unsigned long flags;
 
 	GEM_BUG_ON(intel_engine_is_virtual(engine));
 	GEM_BUG_ON(rq->timeline != engine->kernel_context->timeline);
 
+	node = llist_del_all(&engine->barrier_tasks);
+	if (!node)
+		return;
 	/*
 	 * Attach the list of proto-fences to the in-flight request such
 	 * that the parent i915_active will be released when this request
 	 * is retired.
 	 */
-	llist_for_each_safe(node, next, llist_del_all(&engine->barrier_tasks)) {
-		RCU_INIT_POINTER(barrier_from_ll(node)->base.request, rq);
+	spin_lock_irqsave(&rq->lock, flags);
+	llist_for_each_safe(node, next, node) {
+		RCU_INIT_POINTER(barrier_from_ll(node)->base.fence, &rq->fence);
 		smp_wmb(); /* serialise with reuse_idle_barrier */
-		list_add_tail((struct list_head *)node, &rq->active_list);
+		list_add_tail((struct list_head *)node, &rq->fence.cb_list);
 	}
+	spin_unlock_irqrestore(&rq->lock, flags);
 }
 
-int i915_active_request_set(struct i915_active_request *active,
-			    struct i915_request *rq)
+#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)
+#define active_is_held(active) lockdep_is_held((active)->lock)
+#else
+#define active_is_held(active) true
+#endif
+
+struct dma_fence *
+__i915_active_fence_set(struct i915_active_fence *active,
+			struct dma_fence *fence)
 {
+	struct dma_fence *prev;
+	unsigned long flags;
+
+	/* NB: updates must be serialised by an outer timeline mutex */
+	spin_lock_irqsave(fence->lock, flags);
+	GEM_BUG_ON(test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags));
+
+	prev = rcu_dereference_protected(active->fence, active_is_held(active));
+	if (prev) {
+		spin_lock_nested(prev->lock, SINGLE_DEPTH_NESTING);
+		__list_del_entry(&active->cb.node);
+		spin_unlock(prev->lock); /* serialise with prev->cb_list */
+		prev = rcu_access_pointer(active->fence);
+	}
+
+	rcu_assign_pointer(active->fence, fence);
+	list_add_tail(&active->cb.node, &fence->cb_list);
+
+	spin_unlock_irqrestore(fence->lock, flags);
+
+	return prev;
+}
+
+int i915_active_fence_set(struct i915_active_fence *active,
+			  struct i915_request *rq)
+{
+	struct dma_fence *fence;
 	int err;
 
 #if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)
@@ -807,18 +754,25 @@ int i915_active_request_set(struct i915_active_request *active,
 #endif
 
 	/* Must maintain ordering wrt previous active requests */
-	err = i915_request_await_active_request(rq, active);
-	if (err)
-		return err;
+	rcu_read_lock();
+	fence = __i915_active_fence_set(active, &rq->fence);
+	if (fence)
+		fence = dma_fence_get_rcu(fence);
+	rcu_read_unlock();
+
+	if (fence) {
+		err = i915_request_await_dma_fence(rq, fence);
+		dma_fence_put(fence);
+		if (err)
+			return err;
+	}
 
-	__i915_active_request_set(active, rq);
 	return 0;
 }
 
-void i915_active_retire_noop(struct i915_active_request *active,
-			     struct i915_request *request)
+void i915_active_noop(struct dma_fence *fence, struct dma_fence_cb *cb)
 {
-	/* Space left intentionally blank */
+	i915_active_fence_cb(fence, cb);
 }
 
 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
diff --git a/drivers/gpu/drm/i915/i915_active.h b/drivers/gpu/drm/i915/i915_active.h
index bc1c9d5d4f63..05e94876c73d 100644
--- a/drivers/gpu/drm/i915/i915_active.h
+++ b/drivers/gpu/drm/i915/i915_active.h
@@ -10,7 +10,10 @@
 #include <linux/lockdep.h>
 
 #include "i915_active_types.h"
-#include "i915_request.h"
+
+struct i915_request;
+struct intel_engine_cs;
+struct intel_timeline;
 
 /*
  * We treat requests as fences. This is not be to confused with our
@@ -28,308 +31,108 @@
  * write access so that we can perform concurrent read operations between
  * the CPU and GPU engines, as well as waiting for all rendering to
  * complete, or waiting for the last GPU user of a "fence register". The
- * object then embeds a #i915_active_request to track the most recent (in
+ * object then embeds a #i915_active_fence to track the most recent (in
  * retirement order) request relevant for the desired mode of access.
- * The #i915_active_request is updated with i915_active_request_set() to
+ * The #i915_active_fence is updated with i915_active_fence_set() to
  * track the most recent fence request, typically this is done as part of
  * i915_vma_move_to_active().
  *
- * When the #i915_active_request completes (is retired), it will
+ * When the #i915_active_fence completes (is retired), it will
  * signal its completion to the owner through a callback as well as mark
- * itself as idle (i915_active_request.request == NULL). The owner
+ * itself as idle (i915_active_fence.request == NULL). The owner
  * can then perform any action, such as delayed freeing of an active
  * resource including itself.
  */
 
-void i915_active_retire_noop(struct i915_active_request *active,
-			     struct i915_request *request);
+void i915_active_noop(struct dma_fence *fence, struct dma_fence_cb *cb);
 
 /**
- * i915_active_request_init - prepares the activity tracker for use
+ * __i915_active_fence_init - prepares the activity tracker for use
  * @active - the active tracker
- * @rq - initial request to track, can be NULL
+ * @fence - initial fence to track, can be NULL
  * @func - a callback when then the tracker is retired (becomes idle),
  *         can be NULL
  *
- * i915_active_request_init() prepares the embedded @active struct for use as
- * an activity tracker, that is for tracking the last known active request
- * associated with it. When the last request becomes idle, when it is retired
+ * i915_active_fence_init() prepares the embedded @active struct for use as
+ * an activity tracker, that is for tracking the last known active fence
+ * associated with it. When the last fence becomes idle, when it is retired
  * after completion, the optional callback @func is invoked.
  */
 static inline void
-i915_active_request_init(struct i915_active_request *active,
+__i915_active_fence_init(struct i915_active_fence *active,
 			 struct mutex *lock,
-			 struct i915_request *rq,
-			 i915_active_retire_fn retire)
+			 void *fence,
+			 dma_fence_func_t fn)
 {
-	RCU_INIT_POINTER(active->request, rq);
-	INIT_LIST_HEAD(&active->link);
-	active->retire = retire ?: i915_active_retire_noop;
+	RCU_INIT_POINTER(active->fence, fence);
+	active->cb.func = fn ?: i915_active_noop;
 #if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)
 	active->lock = lock;
 #endif
 }
 
-#define INIT_ACTIVE_REQUEST(name, lock) \
-	i915_active_request_init((name), (lock), NULL, NULL)
-
-/**
- * i915_active_request_set - updates the tracker to watch the current request
- * @active - the active tracker
- * @request - the request to watch
- *
- * __i915_active_request_set() watches the given @request for completion. Whilst
- * that @request is busy, the @active reports busy. When that @request is
- * retired, the @active tracker is updated to report idle.
- */
-static inline void
-__i915_active_request_set(struct i915_active_request *active,
-			  struct i915_request *request)
-{
-#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)
-	lockdep_assert_held(active->lock);
-#endif
-	list_move(&active->link, &request->active_list);
-	rcu_assign_pointer(active->request, request);
-}
+#define INIT_ACTIVE_FENCE(A, LOCK) \
+	__i915_active_fence_init((A), (LOCK), NULL, NULL)
 
-int __must_check
-i915_active_request_set(struct i915_active_request *active,
-			struct i915_request *rq);
+struct dma_fence *
+__i915_active_fence_set(struct i915_active_fence *active,
+			struct dma_fence *fence);
 
 /**
- * i915_active_request_raw - return the active request
+ * i915_active_fence_set - updates the tracker to watch the current fence
  * @active - the active tracker
+ * @rq - the request to watch
  *
- * i915_active_request_raw() returns the current request being tracked, or NULL.
- * It does not obtain a reference on the request for the caller, so the caller
- * must hold struct_mutex.
+ * i915_active_fence_set() watches the given @rq for completion. While
+ * that @rq is busy, the @active reports busy. When that @rq is signaled
+ * (or else retired) the @active tracker is updated to report idle.
  */
-static inline struct i915_request *
-i915_active_request_raw(const struct i915_active_request *active,
-			struct mutex *mutex)
-{
-	return rcu_dereference_protected(active->request,
-					 lockdep_is_held(mutex));
-}
-
-/**
- * i915_active_request_peek - report the active request being monitored
- * @active - the active tracker
- *
- * i915_active_request_peek() returns the current request being tracked if
- * still active, or NULL. It does not obtain a reference on the request
- * for the caller, so the caller must hold struct_mutex.
- */
-static inline struct i915_request *
-i915_active_request_peek(const struct i915_active_request *active,
-			 struct mutex *mutex)
-{
-	struct i915_request *request;
-
-	request = i915_active_request_raw(active, mutex);
-	if (!request || i915_request_completed(request))
-		return NULL;
-
-	return request;
-}
-
-/**
- * i915_active_request_get - return a reference to the active request
- * @active - the active tracker
- *
- * i915_active_request_get() returns a reference to the active request, or NULL
- * if the active tracker is idle. The caller must hold struct_mutex.
- */
-static inline struct i915_request *
-i915_active_request_get(const struct i915_active_request *active,
-			struct mutex *mutex)
-{
-	return i915_request_get(i915_active_request_peek(active, mutex));
-}
-
-/**
- * __i915_active_request_get_rcu - return a reference to the active request
- * @active - the active tracker
- *
- * __i915_active_request_get() returns a reference to the active request,
- * or NULL if the active tracker is idle. The caller must hold the RCU read
- * lock, but the returned pointer is safe to use outside of RCU.
- */
-static inline struct i915_request *
-__i915_active_request_get_rcu(const struct i915_active_request *active)
-{
-	/*
-	 * Performing a lockless retrieval of the active request is super
-	 * tricky. SLAB_TYPESAFE_BY_RCU merely guarantees that the backing
-	 * slab of request objects will not be freed whilst we hold the
-	 * RCU read lock. It does not guarantee that the request itself
-	 * will not be freed and then *reused*. Viz,
-	 *
-	 * Thread A			Thread B
-	 *
-	 * rq = active.request
-	 *				retire(rq) -> free(rq);
-	 *				(rq is now first on the slab freelist)
-	 *				active.request = NULL
-	 *
-	 *				rq = new submission on a new object
-	 * ref(rq)
-	 *
-	 * To prevent the request from being reused whilst the caller
-	 * uses it, we take a reference like normal. Whilst acquiring
-	 * the reference we check that it is not in a destroyed state
-	 * (refcnt == 0). That prevents the request being reallocated
-	 * whilst the caller holds on to it. To check that the request
-	 * was not reallocated as we acquired the reference we have to
-	 * check that our request remains the active request across
-	 * the lookup, in the same manner as a seqlock. The visibility
-	 * of the pointer versus the reference counting is controlled
-	 * by using RCU barriers (rcu_dereference and rcu_assign_pointer).
-	 *
-	 * In the middle of all that, we inspect whether the request is
-	 * complete. Retiring is lazy so the request may be completed long
-	 * before the active tracker is updated. Querying whether the
-	 * request is complete is far cheaper (as it involves no locked
-	 * instructions setting cachelines to exclusive) than acquiring
-	 * the reference, so we do it first. The RCU read lock ensures the
-	 * pointer dereference is valid, but does not ensure that the
-	 * seqno nor HWS is the right one! However, if the request was
-	 * reallocated, that means the active tracker's request was complete.
-	 * If the new request is also complete, then both are and we can
-	 * just report the active tracker is idle. If the new request is
-	 * incomplete, then we acquire a reference on it and check that
-	 * it remained the active request.
-	 *
-	 * It is then imperative that we do not zero the request on
-	 * reallocation, so that we can chase the dangling pointers!
-	 * See i915_request_alloc().
-	 */
-	do {
-		struct i915_request *request;
-
-		request = rcu_dereference(active->request);
-		if (!request || i915_request_completed(request))
-			return NULL;
-
-		/*
-		 * An especially silly compiler could decide to recompute the
-		 * result of i915_request_completed, more specifically
-		 * re-emit the load for request->fence.seqno. A race would catch
-		 * a later seqno value, which could flip the result from true to
-		 * false. Which means part of the instructions below might not
-		 * be executed, while later on instructions are executed. Due to
-		 * barriers within the refcounting the inconsistency can't reach
-		 * past the call to i915_request_get_rcu, but not executing
-		 * that while still executing i915_request_put() creates
-		 * havoc enough.  Prevent this with a compiler barrier.
-		 */
-		barrier();
-
-		request = i915_request_get_rcu(request);
-
-		/*
-		 * What stops the following rcu_access_pointer() from occurring
-		 * before the above i915_request_get_rcu()? If we were
-		 * to read the value before pausing to get the reference to
-		 * the request, we may not notice a change in the active
-		 * tracker.
-		 *
-		 * The rcu_access_pointer() is a mere compiler barrier, which
-		 * means both the CPU and compiler are free to perform the
-		 * memory read without constraint. The compiler only has to
-		 * ensure that any operations after the rcu_access_pointer()
-		 * occur afterwards in program order. This means the read may
-		 * be performed earlier by an out-of-order CPU, or adventurous
-		 * compiler.
-		 *
-		 * The atomic operation at the heart of
-		 * i915_request_get_rcu(), see dma_fence_get_rcu(), is
-		 * atomic_inc_not_zero() which is only a full memory barrier
-		 * when successful. That is, if i915_request_get_rcu()
-		 * returns the request (and so with the reference counted
-		 * incremented) then the following read for rcu_access_pointer()
-		 * must occur after the atomic operation and so confirm
-		 * that this request is the one currently being tracked.
-		 *
-		 * The corresponding write barrier is part of
-		 * rcu_assign_pointer().
-		 */
-		if (!request || request == rcu_access_pointer(active->request))
-			return rcu_pointer_handoff(request);
-
-		i915_request_put(request);
-	} while (1);
-}
-
+int __must_check
+i915_active_fence_set(struct i915_active_fence *active,
+		      struct i915_request *rq);
 /**
- * i915_active_request_get_unlocked - return a reference to the active request
+ * i915_active_fence_get - return a reference to the active fence
  * @active - the active tracker
  *
- * i915_active_request_get_unlocked() returns a reference to the active request,
+ * i915_active_fence_get() returns a reference to the active fence,
  * or NULL if the active tracker is idle. The reference is obtained under RCU,
  * so no locking is required by the caller.
  *
- * The reference should be freed with i915_request_put().
+ * The reference should be freed with dma_fence_put().
  */
-static inline struct i915_request *
-i915_active_request_get_unlocked(const struct i915_active_request *active)
+static inline struct dma_fence *
+i915_active_fence_get(struct i915_active_fence *active)
 {
-	struct i915_request *request;
+	struct dma_fence *fence;
 
 	rcu_read_lock();
-	request = __i915_active_request_get_rcu(active);
+	fence = dma_fence_get_rcu_safe(&active->fence);
 	rcu_read_unlock();
 
-	return request;
+	return fence;
 }
 
 /**
- * i915_active_request_isset - report whether the active tracker is assigned
+ * i915_active_fence_isset - report whether the active tracker is assigned
  * @active - the active tracker
  *
- * i915_active_request_isset() returns true if the active tracker is currently
- * assigned to a request. Due to the lazy retiring, that request may be idle
+ * i915_active_fence_isset() returns true if the active tracker is currently
+ * assigned to a fence. Due to the lazy retiring, that fence may be idle
  * and this may report stale information.
  */
 static inline bool
-i915_active_request_isset(const struct i915_active_request *active)
+i915_active_fence_isset(const struct i915_active_fence *active)
 {
-	return rcu_access_pointer(active->request);
+	return rcu_access_pointer(active->fence);
 }
 
-/**
- * i915_active_request_retire - waits until the request is retired
- * @active - the active request on which to wait
- *
- * i915_active_request_retire() waits until the request is completed,
- * and then ensures that at least the retirement handler for this
- * @active tracker is called before returning. If the @active
- * tracker is idle, the function returns immediately.
- */
-static inline int __must_check
-i915_active_request_retire(struct i915_active_request *active,
-			   struct mutex *mutex)
+static inline void
+i915_active_fence_cb(struct dma_fence *fence, struct dma_fence_cb *cb)
 {
-	struct i915_request *request;
-	long ret;
-
-	request = i915_active_request_raw(active, mutex);
-	if (!request)
-		return 0;
-
-	ret = i915_request_wait(request,
-				I915_WAIT_INTERRUPTIBLE,
-				MAX_SCHEDULE_TIMEOUT);
-	if (ret < 0)
-		return ret;
+	struct i915_active_fence *active =
+		container_of(cb, typeof(*active), cb);
 
-	list_del_init(&active->link);
-	RCU_INIT_POINTER(active->request, NULL);
-
-	active->retire(active, request);
-
-	return 0;
+	RCU_INIT_POINTER(active->fence, NULL);
 }
 
 /*
@@ -358,20 +161,19 @@ i915_active_request_retire(struct i915_active_request *active,
  * synchronisation.
  */
 
-void __i915_active_init(struct drm_i915_private *i915,
-			struct i915_active *ref,
+void __i915_active_init(struct i915_active *ref,
 			int (*active)(struct i915_active *ref),
 			void (*retire)(struct i915_active *ref),
 			struct lock_class_key *key);
-#define i915_active_init(i915, ref, active, retire) do {		\
+#define i915_active_init(ref, active, retire) do {		\
 	static struct lock_class_key __key;				\
 									\
-	__i915_active_init(i915, ref, active, retire, &__key);		\
+	__i915_active_init(ref, active, retire, &__key);		\
 } while (0)
 
 int i915_active_ref(struct i915_active *ref,
 		    struct intel_timeline *tl,
-		    struct i915_request *rq);
+		    struct dma_fence *fence);
 
 void i915_active_set_exclusive(struct i915_active *ref, struct dma_fence *f);
 
@@ -379,15 +181,12 @@ int i915_active_wait(struct i915_active *ref);
 
 int i915_request_await_active(struct i915_request *rq,
 			      struct i915_active *ref);
-int i915_request_await_active_request(struct i915_request *rq,
-				      struct i915_active_request *active);
+int i915_request_await_active_fence(struct i915_request *rq,
+				    struct i915_active_fence *active);
 
 int i915_active_acquire(struct i915_active *ref);
+bool i915_active_acquire_if_busy(struct i915_active *ref);
 void i915_active_release(struct i915_active *ref);
-void __i915_active_release_nested(struct i915_active *ref, int subclass);
-
-bool i915_active_trygrab(struct i915_active *ref);
-void i915_active_ungrab(struct i915_active *ref);
 
 static inline bool
 i915_active_is_idle(const struct i915_active *ref)
diff --git a/drivers/gpu/drm/i915/i915_active_types.h b/drivers/gpu/drm/i915/i915_active_types.h
index 021167f0004d..123f2e883261 100644
--- a/drivers/gpu/drm/i915/i915_active_types.h
+++ b/drivers/gpu/drm/i915/i915_active_types.h
@@ -17,17 +17,9 @@
 
 #include "i915_utils.h"
 
-struct drm_i915_private;
-struct i915_active_request;
-struct i915_request;
-
-typedef void (*i915_active_retire_fn)(struct i915_active_request *,
-				      struct i915_request *);
-
-struct i915_active_request {
-	struct i915_request __rcu *request;
-	struct list_head link;
-	i915_active_retire_fn retire;
+struct i915_active_fence {
+	struct dma_fence __rcu *fence;
+	struct dma_fence_cb cb;
 #if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)
 	/*
 	 * Incorporeal!
@@ -53,12 +45,11 @@ struct active_node;
 #define i915_active_may_sleep(fn) ptr_pack_bits(&(fn), I915_ACTIVE_MAY_SLEEP, 2)
 
 struct i915_active {
-	struct drm_i915_private *i915;
+	atomic_t count;
+	struct mutex mutex;
 
 	struct active_node *cache;
 	struct rb_root tree;
-	struct mutex mutex;
-	atomic_t count;
 
 	/* Preallocated "exclusive" node */
 	struct dma_fence __rcu *excl;
@@ -66,7 +57,6 @@ struct i915_active {
 
 	unsigned long flags;
 #define I915_ACTIVE_RETIRE_SLEEPS BIT(0)
-#define I915_ACTIVE_GRAB_BIT 1
 
 	int (*active)(struct i915_active *ref);
 	void (*retire)(struct i915_active *ref);
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index e30542e5408d..ac8616382c69 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -894,28 +894,38 @@ wait_for_timelines(struct drm_i915_private *i915,
 
 	spin_lock_irqsave(&timelines->lock, flags);
 	list_for_each_entry(tl, &timelines->active_list, link) {
-		struct i915_request *rq;
+		struct dma_fence *fence;
 
-		rq = i915_active_request_get_unlocked(&tl->last_request);
-		if (!rq)
+		fence = i915_active_fence_get(&tl->last_request);
+		if (!fence)
 			continue;
 
 		spin_unlock_irqrestore(&timelines->lock, flags);
 
-		/*
-		 * "Race-to-idle".
-		 *
-		 * Switching to the kernel context is often used a synchronous
-		 * step prior to idling, e.g. in suspend for flushing all
-		 * current operations to memory before sleeping. These we
-		 * want to complete as quickly as possible to avoid prolonged
-		 * stalls, so allow the gpu to boost to maximum clocks.
-		 */
-		if (wait & I915_WAIT_FOR_IDLE_BOOST)
-			gen6_rps_boost(rq);
+		if (!dma_fence_is_i915(fence)) {
+			timeout = dma_fence_wait_timeout(fence,
+							 flags & I915_WAIT_INTERRUPTIBLE,
+							 timeout);
+		} else {
+			struct i915_request *rq = to_request(fence);
+
+			/*
+			 * "Race-to-idle".
+			 *
+			 * Switching to the kernel context is often used as
+			 * a synchronous step prior to idling, e.g. in suspend
+			 * for flushing all current operations to memory before
+			 * sleeping. These we want to complete as quickly as
+			 * possible to avoid prolonged stalls, so allow the gpu
+			 * to boost to maximum clocks.
+			 */
+			if (flags & I915_WAIT_FOR_IDLE_BOOST)
+				gen6_rps_boost(rq);
+
+			timeout = i915_request_wait(rq, flags, timeout);
+		}
 
-		timeout = i915_request_wait(rq, wait, timeout);
-		i915_request_put(rq);
+		dma_fence_put(fence);
 		if (timeout < 0)
 			return timeout;
 
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index c904b7885523..9482b3490a47 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -1856,7 +1856,6 @@ static const struct i915_vma_ops pd_vma_ops = {
 
 static struct i915_vma *pd_vma_create(struct gen6_ppgtt *ppgtt, int size)
 {
-	struct drm_i915_private *i915 = ppgtt->base.vm.i915;
 	struct i915_ggtt *ggtt = ppgtt->base.vm.gt->ggtt;
 	struct i915_vma *vma;
 
@@ -1867,7 +1866,7 @@ static struct i915_vma *pd_vma_create(struct gen6_ppgtt *ppgtt, int size)
 	if (!vma)
 		return ERR_PTR(-ENOMEM);
 
-	i915_active_init(i915, &vma->active, NULL, NULL);
+	i915_active_init(&vma->active, NULL, NULL);
 
 	vma->vm = i915_vm_get(&ggtt->vm);
 	vma->ops = &pd_vma_ops;
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
index 4aff342b8944..2dd63a261b00 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -1292,7 +1292,7 @@ capture_vma(struct capture_vma *next,
 	if (!c)
 		return next;
 
-	if (!i915_active_trygrab(&vma->active)) {
+	if (!i915_active_acquire_if_busy(&vma->active)) {
 		kfree(c);
 		return next;
 	}
@@ -1432,7 +1432,7 @@ gem_record_rings(struct i915_gpu_state *error, struct compress *compress)
 			*this->slot =
 				i915_error_object_create(i915, vma, compress);
 
-			i915_active_ungrab(&vma->active);
+			i915_active_release(&vma->active);
 			i915_vma_put(vma);
 
 			capture = this->next;
diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
index 754a78364a63..4ecfae143276 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -197,9 +197,8 @@ static void free_capture_list(struct i915_request *request)
 
 static bool i915_request_retire(struct i915_request *rq)
 {
-	struct i915_active_request *active, *next;
-
 	lockdep_assert_held(&rq->timeline->mutex);
+
 	if (!i915_request_completed(rq))
 		return false;
 
@@ -223,35 +222,6 @@ static bool i915_request_retire(struct i915_request *rq)
 	GEM_BUG_ON(!list_is_first(&rq->link, &rq->timeline->requests));
 	rq->ring->head = rq->postfix;
 
-	/*
-	 * Walk through the active list, calling retire on each. This allows
-	 * objects to track their GPU activity and mark themselves as idle
-	 * when their *last* active request is completed (updating state
-	 * tracking lists for eviction, active references for GEM, etc).
-	 *
-	 * As the ->retire() may free the node, we decouple it first and
-	 * pass along the auxiliary information (to avoid dereferencing
-	 * the node after the callback).
-	 */
-	list_for_each_entry_safe(active, next, &rq->active_list, link) {
-		/*
-		 * In microbenchmarks or focusing upon time inside the kernel,
-		 * we may spend an inordinate amount of time simply handling
-		 * the retirement of requests and processing their callbacks.
-		 * Of which, this loop itself is particularly hot due to the
-		 * cache misses when jumping around the list of
-		 * i915_active_request.  So we try to keep this loop as
-		 * streamlined as possible and also prefetch the next
-		 * i915_active_request to try and hide the likely cache miss.
-		 */
-		prefetchw(next);
-
-		INIT_LIST_HEAD(&active->link);
-		RCU_INIT_POINTER(active->request, NULL);
-
-		active->retire(active, rq);
-	}
-
 	local_irq_disable();
 
 	/*
@@ -664,7 +634,6 @@ __i915_request_create(struct intel_context *ce, gfp_t gfp)
 	rq->flags = 0;
 	rq->execution_mask = ALL_ENGINES;
 
-	INIT_LIST_HEAD(&rq->active_list);
 	INIT_LIST_HEAD(&rq->execute_cb);
 
 	/*
@@ -703,7 +672,6 @@ __i915_request_create(struct intel_context *ce, gfp_t gfp)
 	ce->ring->emit = rq->head;
 
 	/* Make sure we didn't add ourselves to external state before freeing */
-	GEM_BUG_ON(!list_empty(&rq->active_list));
 	GEM_BUG_ON(!list_empty(&rq->sched.signalers_list));
 	GEM_BUG_ON(!list_empty(&rq->sched.waiters_list));
 
@@ -1096,8 +1064,8 @@ __i915_request_add_to_timeline(struct i915_request *rq)
 	 * precludes optimising to use semaphores serialisation of a single
 	 * timeline across engines.
 	 */
-	prev = rcu_dereference_protected(timeline->last_request.request,
-					 lockdep_is_held(&timeline->mutex));
+	prev = to_request(__i915_active_fence_set(&timeline->last_request,
+						  &rq->fence));
 	if (prev && !i915_request_completed(prev)) {
 		if (is_power_of_2(prev->engine->mask | rq->engine->mask))
 			i915_sw_fence_await_sw_fence(&rq->submit,
@@ -1122,7 +1090,6 @@ __i915_request_add_to_timeline(struct i915_request *rq)
 	 * us, the timeline will hold its seqno which is later than ours.
 	 */
 	GEM_BUG_ON(timeline->seqno != rq->fence.seqno);
-	__i915_active_request_set(&timeline->last_request, rq);
 
 	return prev;
 }
diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h
index 8ac6e1226a56..3251d2bdbeea 100644
--- a/drivers/gpu/drm/i915/i915_request.h
+++ b/drivers/gpu/drm/i915/i915_request.h
@@ -211,7 +211,6 @@ struct i915_request {
 	 * on the active_list (of their final request).
 	 */
 	struct i915_capture_list *capture_list;
-	struct list_head active_list;
 
 	/** Time at which this request was emitted, in jiffies. */
 	unsigned long emitted_jiffies;
diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
index 730c01cfa36f..852a8313c8c7 100644
--- a/drivers/gpu/drm/i915/i915_vma.c
+++ b/drivers/gpu/drm/i915/i915_vma.c
@@ -119,8 +119,7 @@ vma_create(struct drm_i915_gem_object *obj,
 	vma->size = obj->base.size;
 	vma->display_alignment = I915_GTT_MIN_ALIGNMENT;
 
-	i915_active_init(vm->i915, &vma->active,
-			 __i915_vma_active, __i915_vma_retire);
+	i915_active_init(&vma->active, __i915_vma_active, __i915_vma_retire);
 
 	/* Declare ourselves safe for use inside shrinkers */
 	if (IS_ENABLED(CONFIG_LOCKDEP)) {
@@ -1002,7 +1001,7 @@ int i915_vma_move_to_active(struct i915_vma *vma,
 	 * add the active reference first and queue for it to be dropped
 	 * *last*.
 	 */
-	err = i915_active_ref(&vma->active, rq->timeline, rq);
+	err = i915_active_ref(&vma->active, rq->timeline, &rq->fence);
 	if (unlikely(err))
 		return err;
 
@@ -1010,7 +1009,7 @@ int i915_vma_move_to_active(struct i915_vma *vma,
 		if (intel_frontbuffer_invalidate(obj->frontbuffer, ORIGIN_CS))
 			i915_active_ref(&obj->frontbuffer->write,
 					rq->timeline,
-					rq);
+					&rq->fence);
 
 		dma_resv_add_excl_fence(vma->resv, &rq->fence);
 		obj->write_domain = I915_GEM_DOMAIN_RENDER;
@@ -1044,6 +1043,7 @@ int i915_vma_unbind(struct i915_vma *vma)
 	if (ret)
 		return ret;
 
+	GEM_BUG_ON(i915_vma_is_active(vma));
 	if (i915_vma_is_pinned(vma)) {
 		vma_print_allocator(vma, "is pinned");
 		return -EBUSY;
diff --git a/drivers/gpu/drm/i915/selftests/i915_active.c b/drivers/gpu/drm/i915/selftests/i915_active.c
index d5ac9944d093..af5827aac7b2 100644
--- a/drivers/gpu/drm/i915/selftests/i915_active.c
+++ b/drivers/gpu/drm/i915/selftests/i915_active.c
@@ -68,7 +68,7 @@ static struct live_active *__live_alloc(struct drm_i915_private *i915)
 		return NULL;
 
 	kref_init(&active->ref);
-	i915_active_init(i915, &active->base, __live_active, __live_retire);
+	i915_active_init(&active->base, __live_active, __live_retire);
 
 	return active;
 }
@@ -110,7 +110,9 @@ __live_active_setup(struct drm_i915_private *i915)
 						       submit,
 						       GFP_KERNEL);
 		if (err >= 0)
-			err = i915_active_ref(&active->base, rq->timeline, rq);
+			err = i915_active_ref(&active->base,
+					      rq->timeline,
+					      &rq->fence);
 		i915_request_add(rq);
 		if (err) {
 			pr_err("Failed to track active ref!\n");
@@ -146,19 +148,13 @@ static int live_active_wait(void *arg)
 {
 	struct drm_i915_private *i915 = arg;
 	struct live_active *active;
-	intel_wakeref_t wakeref;
 	int err = 0;
 
 	/* Check that we get a callback when requests retire upon waiting */
 
-	mutex_lock(&i915->drm.struct_mutex);
-	wakeref = intel_runtime_pm_get(&i915->runtime_pm);
-
 	active = __live_active_setup(i915);
-	if (IS_ERR(active)) {
-		err = PTR_ERR(active);
-		goto err;
-	}
+	if (IS_ERR(active))
+		return PTR_ERR(active);
 
 	i915_active_wait(&active->base);
 	if (!READ_ONCE(active->retired)) {
@@ -168,11 +164,9 @@ static int live_active_wait(void *arg)
 
 	__live_put(active);
 
+	mutex_lock(&i915->drm.struct_mutex);
 	if (igt_flush_test(i915, I915_WAIT_LOCKED))
 		err = -EIO;
-
-err:
-	intel_runtime_pm_put(&i915->runtime_pm, wakeref);
 	mutex_unlock(&i915->drm.struct_mutex);
 
 	return err;
@@ -182,23 +176,19 @@ static int live_active_retire(void *arg)
 {
 	struct drm_i915_private *i915 = arg;
 	struct live_active *active;
-	intel_wakeref_t wakeref;
 	int err = 0;
 
 	/* Check that we get a callback when requests are indirectly retired */
 
-	mutex_lock(&i915->drm.struct_mutex);
-	wakeref = intel_runtime_pm_get(&i915->runtime_pm);
-
 	active = __live_active_setup(i915);
-	if (IS_ERR(active)) {
-		err = PTR_ERR(active);
-		goto err;
-	}
+	if (IS_ERR(active))
+		return PTR_ERR(active);
 
 	/* waits for & retires all requests */
+	mutex_lock(&i915->drm.struct_mutex);
 	if (igt_flush_test(i915, I915_WAIT_LOCKED))
 		err = -EIO;
+	mutex_unlock(&i915->drm.struct_mutex);
 
 	if (!READ_ONCE(active->retired)) {
 		pr_err("i915_active not retired after flushing!\n");
@@ -207,10 +197,6 @@ static int live_active_retire(void *arg)
 
 	__live_put(active);
 
-err:
-	intel_runtime_pm_put(&i915->runtime_pm, wakeref);
-	mutex_unlock(&i915->drm.struct_mutex);
-
 	return err;
 }
 
-- 
2.23.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 43+ messages in thread

* [PATCH 18/28] drm/i915: Move idle barrier cleanup into engine-pm
  2019-08-26  7:21 [PATCH 01/28] drm/i915/selftests: Add the usual batch vma managements to st_workarounds Chris Wilson
                   ` (15 preceding siblings ...)
  2019-08-26  7:21 ` [PATCH 17/28] drm/i915: Coordinate i915_active with its own mutex Chris Wilson
@ 2019-08-26  7:21 ` Chris Wilson
  2019-08-26  7:21 ` [PATCH 19/28] drm/i915: Drop struct_mutex from around i915_retire_requests() Chris Wilson
                   ` (12 subsequent siblings)
  29 siblings, 0 replies; 43+ messages in thread
From: Chris Wilson @ 2019-08-26  7:21 UTC (permalink / raw)
  To: intel-gfx

Now that we now longer need to guarantee that the active callback is
under the struct_mutex, we can lift it out of the i915_gem_park() and
into the engine parking itself.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/gem/i915_gem_pm.c    | 19 -------------------
 drivers/gpu/drm/i915/gt/intel_engine_pm.c | 15 +++++++++++++++
 drivers/gpu/drm/i915/i915_active.c        |  1 +
 3 files changed, 16 insertions(+), 19 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pm.c b/drivers/gpu/drm/i915/gem/i915_gem_pm.c
index 92558fa47108..6e4cc177cc7a 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_pm.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_pm.c
@@ -11,29 +11,10 @@
 #include "i915_drv.h"
 #include "i915_globals.h"
 
-static void call_idle_barriers(struct intel_engine_cs *engine)
-{
-	struct llist_node *node, *next;
-
-	llist_for_each_safe(node, next, llist_del_all(&engine->barrier_tasks)) {
-		struct dma_fence_cb *cb =
-			container_of((struct list_head *)node,
-				     typeof(*cb), node);
-
-		cb->func(NULL, cb);
-	}
-}
-
 static void i915_gem_park(struct drm_i915_private *i915)
 {
-	struct intel_engine_cs *engine;
-	enum intel_engine_id id;
-
 	lockdep_assert_held(&i915->drm.struct_mutex);
 
-	for_each_engine(engine, i915, id)
-		call_idle_barriers(engine); /* cleanup after wedging */
-
 	i915_vma_parked(i915);
 
 	i915_globals_park();
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pm.c b/drivers/gpu/drm/i915/gt/intel_engine_pm.c
index 65b5ca74b394..472b2259f629 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_pm.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_pm.c
@@ -123,6 +123,19 @@ static bool switch_to_kernel_context(struct intel_engine_cs *engine)
 	return result;
 }
 
+static void call_idle_barriers(struct intel_engine_cs *engine)
+{
+	struct llist_node *node, *next;
+
+	llist_for_each_safe(node, next, llist_del_all(&engine->barrier_tasks)) {
+		struct dma_fence_cb *cb =
+			container_of((struct list_head *)node,
+				     typeof(*cb), node);
+
+		cb->func(NULL, cb);
+	}
+}
+
 static int __engine_park(struct intel_wakeref *wf)
 {
 	struct intel_engine_cs *engine =
@@ -142,6 +155,8 @@ static int __engine_park(struct intel_wakeref *wf)
 
 	GEM_TRACE("%s\n", engine->name);
 
+	call_idle_barriers(engine); /* cleanup after wedging */
+
 	intel_engine_disarm_breadcrumbs(engine);
 	intel_engine_pool_park(&engine->pool);
 
diff --git a/drivers/gpu/drm/i915/i915_active.c b/drivers/gpu/drm/i915/i915_active.c
index 2ecb78cb677e..aa644cc941d3 100644
--- a/drivers/gpu/drm/i915/i915_active.c
+++ b/drivers/gpu/drm/i915/i915_active.c
@@ -678,6 +678,7 @@ void i915_active_acquire_barrier(struct i915_active *ref)
 		rb_link_node(&node->node, parent, p);
 		rb_insert_color(&node->node, &ref->tree);
 
+		GEM_BUG_ON(!intel_engine_pm_is_awake(engine));
 		llist_add(barrier_to_ll(node), &engine->barrier_tasks);
 		intel_engine_pm_put(engine);
 	}
-- 
2.23.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 43+ messages in thread

* [PATCH 19/28] drm/i915: Drop struct_mutex from around i915_retire_requests()
  2019-08-26  7:21 [PATCH 01/28] drm/i915/selftests: Add the usual batch vma managements to st_workarounds Chris Wilson
                   ` (16 preceding siblings ...)
  2019-08-26  7:21 ` [PATCH 18/28] drm/i915: Move idle barrier cleanup into engine-pm Chris Wilson
@ 2019-08-26  7:21 ` Chris Wilson
  2019-08-26  7:21 ` [PATCH 20/28] drm/i915: Merge wait_for_timelines with retire_request Chris Wilson
                   ` (11 subsequent siblings)
  29 siblings, 0 replies; 43+ messages in thread
From: Chris Wilson @ 2019-08-26  7:21 UTC (permalink / raw)
  To: intel-gfx

We don't need to hold struct_mutex now for retiring requests, so drop it
from i915_retire_requests() and i915_gem_wait_for_idle(), finally
removing I915_WAIT_LOCKED for good.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 .../gpu/drm/i915/gem/i915_gem_client_blt.c    |  7 +--
 drivers/gpu/drm/i915/gem/i915_gem_context.c   | 20 +--------
 drivers/gpu/drm/i915/gem/i915_gem_pm.c        | 45 +++++++------------
 .../drm/i915/gem/selftests/i915_gem_context.c |  4 +-
 .../drm/i915/gem/selftests/i915_gem_mman.c    |  6 +--
 .../i915/gem/selftests/i915_gem_object_blt.c  |  2 -
 drivers/gpu/drm/i915/gt/selftest_context.c    |  4 +-
 drivers/gpu/drm/i915/gt/selftest_hangcheck.c  | 28 ++++--------
 drivers/gpu/drm/i915/gt/selftest_lrc.c        | 21 +++++----
 drivers/gpu/drm/i915/gt/selftest_timeline.c   |  6 +--
 .../gpu/drm/i915/gt/selftest_workarounds.c    |  6 +--
 drivers/gpu/drm/i915/i915_debugfs.c           | 42 +++++------------
 drivers/gpu/drm/i915/i915_gem.c               | 19 ++++----
 drivers/gpu/drm/i915/i915_request.h           |  7 ++-
 drivers/gpu/drm/i915/selftests/i915_active.c  |  8 +---
 .../gpu/drm/i915/selftests/i915_gem_evict.c   |  2 +-
 .../gpu/drm/i915/selftests/i915_selftest.c    |  8 +---
 .../gpu/drm/i915/selftests/igt_flush_test.c   | 30 +++++--------
 .../gpu/drm/i915/selftests/igt_flush_test.h   |  2 +-
 .../gpu/drm/i915/selftests/igt_live_test.c    |  9 +---
 .../gpu/drm/i915/selftests/mock_gem_device.c  |  4 --
 21 files changed, 91 insertions(+), 189 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_client_blt.c b/drivers/gpu/drm/i915/gem/i915_gem_client_blt.c
index c345d3d62df1..3502071e1391 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_client_blt.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_client_blt.c
@@ -155,7 +155,6 @@ static void clear_pages_dma_fence_cb(struct dma_fence *fence,
 static void clear_pages_worker(struct work_struct *work)
 {
 	struct clear_pages_work *w = container_of(work, typeof(*w), work);
-	struct drm_i915_private *i915 = w->ce->engine->i915;
 	struct drm_i915_gem_object *obj = w->sleeve->vma->obj;
 	struct i915_vma *vma = w->sleeve->vma;
 	struct i915_request *rq;
@@ -173,11 +172,9 @@ static void clear_pages_worker(struct work_struct *work)
 	obj->read_domains = I915_GEM_GPU_DOMAINS;
 	obj->write_domain = 0;
 
-	/* XXX: we need to kill this */
-	mutex_lock(&i915->drm.struct_mutex);
 	err = i915_vma_pin(vma, 0, 0, PIN_USER);
 	if (unlikely(err))
-		goto out_unlock;
+		goto out_signal;
 
 	batch = intel_emit_vma_fill_blt(w->ce, vma, w->value);
 	if (IS_ERR(batch)) {
@@ -229,8 +226,6 @@ static void clear_pages_worker(struct work_struct *work)
 	intel_emit_vma_release(w->ce, batch);
 out_unpin:
 	i915_vma_unpin(vma);
-out_unlock:
-	mutex_unlock(&i915->drm.struct_mutex);
 out_signal:
 	if (unlikely(err)) {
 		dma_fence_set_error(&w->dma, err);
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c
index 90aed4d163b3..bd9397669332 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c
@@ -1165,8 +1165,7 @@ gen8_modify_rpcs(struct intel_context *ce, struct intel_sseu sseu)
 }
 
 static int
-__intel_context_reconfigure_sseu(struct intel_context *ce,
-				 struct intel_sseu sseu)
+intel_context_reconfigure_sseu(struct intel_context *ce, struct intel_sseu sseu)
 {
 	int ret;
 
@@ -1189,23 +1188,6 @@ __intel_context_reconfigure_sseu(struct intel_context *ce,
 	return ret;
 }
 
-static int
-intel_context_reconfigure_sseu(struct intel_context *ce, struct intel_sseu sseu)
-{
-	struct drm_i915_private *i915 = ce->engine->i915;
-	int ret;
-
-	ret = mutex_lock_interruptible(&i915->drm.struct_mutex);
-	if (ret)
-		return ret;
-
-	ret = __intel_context_reconfigure_sseu(ce, sseu);
-
-	mutex_unlock(&i915->drm.struct_mutex);
-
-	return ret;
-}
-
 static int
 user_to_context_sseu(struct drm_i915_private *i915,
 		     const struct drm_i915_gem_context_param_sseu *user,
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pm.c b/drivers/gpu/drm/i915/gem/i915_gem_pm.c
index 6e4cc177cc7a..fec0b410d1d9 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_pm.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_pm.c
@@ -48,11 +48,7 @@ static void retire_work_handler(struct work_struct *work)
 	struct drm_i915_private *i915 =
 		container_of(work, typeof(*i915), gem.retire_work.work);
 
-	/* Come back later if the device is busy... */
-	if (mutex_trylock(&i915->drm.struct_mutex)) {
-		i915_retire_requests(i915);
-		mutex_unlock(&i915->drm.struct_mutex);
-	}
+	i915_retire_requests(i915);
 
 	queue_delayed_work(i915->wq,
 			   &i915->gem.retire_work,
@@ -86,26 +82,23 @@ static bool switch_to_kernel_context_sync(struct intel_gt *gt)
 {
 	bool result = !intel_gt_is_wedged(gt);
 
-	do {
-		if (i915_gem_wait_for_idle(gt->i915,
-					   I915_WAIT_LOCKED |
-					   I915_WAIT_FOR_IDLE_BOOST,
-					   I915_GEM_IDLE_TIMEOUT) == -ETIME) {
-			/* XXX hide warning from gem_eio */
-			if (i915_modparams.reset) {
-				dev_err(gt->i915->drm.dev,
-					"Failed to idle engines, declaring wedged!\n");
-				GEM_TRACE_DUMP();
-			}
-
-			/*
-			 * Forcibly cancel outstanding work and leave
-			 * the gpu quiet.
-			 */
-			intel_gt_set_wedged(gt);
-			result = false;
+	if (i915_gem_wait_for_idle(gt->i915,
+				   I915_WAIT_FOR_IDLE_BOOST,
+				   I915_GEM_IDLE_TIMEOUT) == -ETIME) {
+		/* XXX hide warning from gem_eio */
+		if (i915_modparams.reset) {
+			dev_err(gt->i915->drm.dev,
+				"Failed to idle engines, declaring wedged!\n");
+			GEM_TRACE_DUMP();
 		}
-	} while (i915_retire_requests(gt->i915) && result);
+
+		/*
+		 * Forcibly cancel outstanding work and leave
+		 * the gpu quiet.
+		 */
+		intel_gt_set_wedged(gt);
+		result = false;
+	}
 
 	if (intel_gt_pm_wait_for_idle(gt))
 		result = false;
@@ -125,8 +118,6 @@ void i915_gem_suspend(struct drm_i915_private *i915)
 	intel_wakeref_auto(&i915->ggtt.userfault_wakeref, 0);
 	flush_workqueue(i915->wq);
 
-	mutex_lock(&i915->drm.struct_mutex);
-
 	/*
 	 * We have to flush all the executing contexts to main memory so
 	 * that they can saved in the hibernation image. To ensure the last
@@ -138,8 +129,6 @@ void i915_gem_suspend(struct drm_i915_private *i915)
 	 */
 	switch_to_kernel_context_sync(&i915->gt);
 
-	mutex_unlock(&i915->drm.struct_mutex);
-
 	cancel_delayed_work_sync(&i915->gt.hangcheck.work);
 
 	i915_gem_drain_freed_objects(i915);
diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c
index 065f5bad466b..a8220868a56a 100644
--- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c
@@ -950,7 +950,7 @@ __sseu_test(const char *name,
 	if (ret)
 		return ret;
 
-	ret = __intel_context_reconfigure_sseu(ce, sseu);
+	ret = intel_context_reconfigure_sseu(ce, sseu);
 	if (ret)
 		goto out_spin;
 
@@ -1054,7 +1054,7 @@ __igt_ctx_sseu(struct drm_i915_private *i915,
 		goto out_fail;
 
 out_fail:
-	if (igt_flush_test(i915, I915_WAIT_LOCKED))
+	if (igt_flush_test(i915))
 		ret = -EIO;
 
 	intel_context_unpin(ce);
diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c
index 9c217dfe96a9..39c01bc4eb51 100644
--- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c
+++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c
@@ -393,12 +393,8 @@ static void disable_retire_worker(struct drm_i915_private *i915)
 
 static void restore_retire_worker(struct drm_i915_private *i915)
 {
+	igt_flush_test(i915);
 	intel_gt_pm_put(&i915->gt);
-
-	mutex_lock(&i915->drm.struct_mutex);
-	igt_flush_test(i915, I915_WAIT_LOCKED);
-	mutex_unlock(&i915->drm.struct_mutex);
-
 	i915_gem_driver_register__shrinker(i915);
 }
 
diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_object_blt.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_object_blt.c
index c21d747e7d05..5a351998a98d 100644
--- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_object_blt.c
+++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_object_blt.c
@@ -166,9 +166,7 @@ static int igt_copy_blt(void *arg)
 		if (!(dst->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE))
 			dst->cache_dirty = true;
 
-		mutex_lock(&i915->drm.struct_mutex);
 		err = i915_gem_object_copy_blt(src, dst, ce);
-		mutex_unlock(&i915->drm.struct_mutex);
 		if (err)
 			goto err_unpin;
 
diff --git a/drivers/gpu/drm/i915/gt/selftest_context.c b/drivers/gpu/drm/i915/gt/selftest_context.c
index 1420533e8fd5..883739354b07 100644
--- a/drivers/gpu/drm/i915/gt/selftest_context.c
+++ b/drivers/gpu/drm/i915/gt/selftest_context.c
@@ -312,7 +312,7 @@ static int live_active_context(void *arg)
 		if (err)
 			break;
 
-		err = igt_flush_test(gt->i915, I915_WAIT_LOCKED);
+		err = igt_flush_test(gt->i915);
 		if (err)
 			break;
 	}
@@ -425,7 +425,7 @@ static int live_remote_context(void *arg)
 		if (err)
 			break;
 
-		err = igt_flush_test(gt->i915, I915_WAIT_LOCKED);
+		err = igt_flush_test(gt->i915);
 		if (err)
 			break;
 	}
diff --git a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c
index e53eea1050f8..35ab703dac34 100644
--- a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c
+++ b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c
@@ -285,7 +285,7 @@ static void hang_fini(struct hang *h)
 
 	kernel_context_close(h->ctx);
 
-	igt_flush_test(h->gt->i915, I915_WAIT_LOCKED);
+	igt_flush_test(h->gt->i915);
 }
 
 static bool wait_until_running(struct hang *h, struct i915_request *rq)
@@ -429,16 +429,13 @@ static int igt_reset_nop(void *arg)
 			break;
 		}
 
-		err = igt_flush_test(gt->i915, 0);
+		err = igt_flush_test(gt->i915);
 		if (err)
 			break;
 	} while (time_before(jiffies, end_time));
 	pr_info("%s: %d resets\n", __func__, count);
 
-	mutex_lock(&gt->i915->drm.struct_mutex);
-	err = igt_flush_test(gt->i915, I915_WAIT_LOCKED);
-	mutex_unlock(&gt->i915->drm.struct_mutex);
-
+	err = igt_flush_test(gt->i915);
 out:
 	mock_file_free(gt->i915, file);
 	if (intel_gt_is_wedged(gt))
@@ -533,15 +530,12 @@ static int igt_reset_nop_engine(void *arg)
 		if (err)
 			break;
 
-		err = igt_flush_test(gt->i915, 0);
+		err = igt_flush_test(gt->i915);
 		if (err)
 			break;
 	}
 
-	mutex_lock(&gt->i915->drm.struct_mutex);
-	err = igt_flush_test(gt->i915, I915_WAIT_LOCKED);
-	mutex_unlock(&gt->i915->drm.struct_mutex);
-
+	err = igt_flush_test(gt->i915);
 out:
 	mock_file_free(gt->i915, file);
 	if (intel_gt_is_wedged(gt))
@@ -647,7 +641,7 @@ static int __igt_reset_engine(struct intel_gt *gt, bool active)
 		if (err)
 			break;
 
-		err = igt_flush_test(gt->i915, 0);
+		err = igt_flush_test(gt->i915);
 		if (err)
 			break;
 	}
@@ -977,9 +971,7 @@ static int __igt_reset_engines(struct intel_gt *gt,
 		if (err)
 			break;
 
-		mutex_lock(&gt->i915->drm.struct_mutex);
-		err = igt_flush_test(gt->i915, I915_WAIT_LOCKED);
-		mutex_unlock(&gt->i915->drm.struct_mutex);
+		err = igt_flush_test(gt->i915);
 		if (err)
 			break;
 	}
@@ -1511,7 +1503,7 @@ static int igt_reset_queue(void *arg)
 
 		i915_request_put(prev);
 
-		err = igt_flush_test(gt->i915, I915_WAIT_LOCKED);
+		err = igt_flush_test(gt->i915);
 		if (err)
 			break;
 	}
@@ -1751,10 +1743,6 @@ int intel_hangcheck_live_selftests(struct drm_i915_private *i915)
 
 	err = intel_gt_live_subtests(tests, gt);
 
-	mutex_lock(&gt->i915->drm.struct_mutex);
-	igt_flush_test(gt->i915, I915_WAIT_LOCKED);
-	mutex_unlock(&gt->i915->drm.struct_mutex);
-
 	i915_modparams.enable_hangcheck = saved_hangcheck;
 	intel_runtime_pm_put(&gt->i915->runtime_pm, wakeref);
 
diff --git a/drivers/gpu/drm/i915/gt/selftest_lrc.c b/drivers/gpu/drm/i915/gt/selftest_lrc.c
index aca1b3a9c5de..222a7375c787 100644
--- a/drivers/gpu/drm/i915/gt/selftest_lrc.c
+++ b/drivers/gpu/drm/i915/gt/selftest_lrc.c
@@ -61,7 +61,7 @@ static int live_sanitycheck(void *arg)
 		}
 
 		igt_spinner_end(&spin);
-		if (igt_flush_test(i915, I915_WAIT_LOCKED)) {
+		if (igt_flush_test(i915)) {
 			err = -EIO;
 			goto err_ctx;
 		}
@@ -206,8 +206,7 @@ slice_semaphore_queue(struct intel_engine_cs *outer,
 	if (err)
 		goto out;
 
-	if (i915_request_wait(head,
-			      I915_WAIT_LOCKED,
+	if (i915_request_wait(head, 0,
 			      2 * RUNTIME_INFO(outer->i915)->num_engines * (count + 2) * (count + 3)) < 0) {
 		pr_err("Failed to slice along semaphore chain of length (%d, %d)!\n",
 		       count, n);
@@ -279,7 +278,7 @@ static int live_timeslice_preempt(void *arg)
 			if (err)
 				goto err_pin;
 
-			if (igt_flush_test(i915, I915_WAIT_LOCKED)) {
+			if (igt_flush_test(i915)) {
 				err = -EIO;
 				goto err_pin;
 			}
@@ -832,7 +831,7 @@ static int live_nopreempt(void *arg)
 			goto err_wedged;
 		}
 
-		if (igt_flush_test(i915, I915_WAIT_LOCKED))
+		if (igt_flush_test(i915))
 			goto err_wedged;
 	}
 
@@ -948,7 +947,7 @@ static int live_suppress_self_preempt(void *arg)
 			goto err_client_b;
 		}
 
-		if (igt_flush_test(i915, I915_WAIT_LOCKED))
+		if (igt_flush_test(i915))
 			goto err_wedged;
 	}
 
@@ -1109,7 +1108,7 @@ static int live_suppress_wait_preempt(void *arg)
 			for (i = 0; i < ARRAY_SIZE(client); i++)
 				igt_spinner_end(&client[i].spin);
 
-			if (igt_flush_test(i915, I915_WAIT_LOCKED))
+			if (igt_flush_test(i915))
 				goto err_wedged;
 
 			if (engine->execlists.preempt_hang.count) {
@@ -1388,7 +1387,7 @@ static int live_preempt_hang(void *arg)
 
 		igt_spinner_end(&spin_hi);
 		igt_spinner_end(&spin_lo);
-		if (igt_flush_test(i915, I915_WAIT_LOCKED)) {
+		if (igt_flush_test(i915)) {
 			err = -EIO;
 			goto err_ctx_lo;
 		}
@@ -1785,7 +1784,7 @@ static int nop_virtual_engine(struct drm_i915_private *i915,
 		prime, div64_u64(ktime_to_ns(times[1]), prime));
 
 out:
-	if (igt_flush_test(i915, I915_WAIT_LOCKED))
+	if (igt_flush_test(i915))
 		err = -EIO;
 
 	for (nc = 0; nc < nctx; nc++) {
@@ -1930,7 +1929,7 @@ static int mask_virtual_engine(struct drm_i915_private *i915,
 		goto out;
 
 out:
-	if (igt_flush_test(i915, I915_WAIT_LOCKED))
+	if (igt_flush_test(i915))
 		err = -EIO;
 
 	for (n = 0; n < nsibling; n++)
@@ -2108,7 +2107,7 @@ static int bond_virtual_engine(struct drm_i915_private *i915,
 out:
 	for (n = 0; !IS_ERR(rq[n]); n++)
 		i915_request_put(rq[n]);
-	if (igt_flush_test(i915, I915_WAIT_LOCKED))
+	if (igt_flush_test(i915))
 		err = -EIO;
 
 	kernel_context_close(ctx);
diff --git a/drivers/gpu/drm/i915/gt/selftest_timeline.c b/drivers/gpu/drm/i915/gt/selftest_timeline.c
index 321481403165..42d0bbbbc7ea 100644
--- a/drivers/gpu/drm/i915/gt/selftest_timeline.c
+++ b/drivers/gpu/drm/i915/gt/selftest_timeline.c
@@ -545,7 +545,7 @@ static int live_hwsp_engine(void *arg)
 	}
 
 out:
-	if (igt_flush_test(i915, I915_WAIT_LOCKED))
+	if (igt_flush_test(i915))
 		err = -EIO;
 
 	for (n = 0; n < count; n++) {
@@ -621,7 +621,7 @@ static int live_hwsp_alternate(void *arg)
 	}
 
 out:
-	if (igt_flush_test(i915, I915_WAIT_LOCKED))
+	if (igt_flush_test(i915))
 		err = -EIO;
 
 	for (n = 0; n < count; n++) {
@@ -747,7 +747,7 @@ static int live_hwsp_wrap(void *arg)
 	}
 
 out:
-	if (igt_flush_test(i915, I915_WAIT_LOCKED))
+	if (igt_flush_test(i915))
 		err = -EIO;
 
 	intel_timeline_unpin(tl);
diff --git a/drivers/gpu/drm/i915/gt/selftest_workarounds.c b/drivers/gpu/drm/i915/gt/selftest_workarounds.c
index 999a98f00494..06351fefbbf3 100644
--- a/drivers/gpu/drm/i915/gt/selftest_workarounds.c
+++ b/drivers/gpu/drm/i915/gt/selftest_workarounds.c
@@ -676,7 +676,7 @@ static int check_dirty_whitelist(struct i915_gem_context *ctx,
 			break;
 	}
 
-	if (igt_flush_test(ctx->i915, I915_WAIT_LOCKED))
+	if (igt_flush_test(ctx->i915))
 		err = -EIO;
 out_batch:
 	i915_vma_unpin_and_release(&batch, 0);
@@ -1090,7 +1090,7 @@ static int live_isolated_whitelist(void *arg)
 		kernel_context_close(client[i].ctx);
 	}
 
-	if (igt_flush_test(i915, I915_WAIT_LOCKED))
+	if (igt_flush_test(i915))
 		err = -EIO;
 
 	return err;
@@ -1248,7 +1248,7 @@ live_engine_reset_workarounds(void *arg)
 	igt_global_reset_unlock(&i915->gt);
 	kernel_context_close(ctx);
 
-	igt_flush_test(i915, I915_WAIT_LOCKED);
+	igt_flush_test(i915);
 
 	return ret;
 }
diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index 27acedd89785..b3507ec9c8d6 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -3602,6 +3602,7 @@ static int
 i915_drop_caches_set(void *data, u64 val)
 {
 	struct drm_i915_private *i915 = data;
+	int ret;
 
 	DRM_DEBUG("Dropping caches: 0x%08llx [0x%08llx]\n",
 		  val, val & DROP_ALL);
@@ -3611,40 +3612,21 @@ i915_drop_caches_set(void *data, u64 val)
 		     I915_IDLE_ENGINES_TIMEOUT))
 		intel_gt_set_wedged(&i915->gt);
 
-	/* No need to check and wait for gpu resets, only libdrm auto-restarts
-	 * on ioctls on -EAGAIN. */
-	if (val & (DROP_ACTIVE | DROP_IDLE | DROP_RETIRE | DROP_RESET_SEQNO)) {
-		int ret;
+	if (val & DROP_RETIRE)
+		i915_retire_requests(i915);
 
-		ret = mutex_lock_interruptible(&i915->drm.struct_mutex);
+	if (val & (DROP_IDLE | DROP_ACTIVE)) {
+		ret = i915_gem_wait_for_idle(i915,
+					     I915_WAIT_INTERRUPTIBLE,
+					     MAX_SCHEDULE_TIMEOUT);
 		if (ret)
 			return ret;
+	}
 
-		/*
-		 * To finish the flush of the idle_worker, we must complete
-		 * the switch-to-kernel-context, which requires a double
-		 * pass through wait_for_idle: first queues the switch,
-		 * second waits for the switch.
-		 */
-		if (ret == 0 && val & (DROP_IDLE | DROP_ACTIVE))
-			ret = i915_gem_wait_for_idle(i915,
-						     I915_WAIT_INTERRUPTIBLE |
-						     I915_WAIT_LOCKED,
-						     MAX_SCHEDULE_TIMEOUT);
-
-		if (ret == 0 && val & DROP_IDLE)
-			ret = i915_gem_wait_for_idle(i915,
-						     I915_WAIT_INTERRUPTIBLE |
-						     I915_WAIT_LOCKED,
-						     MAX_SCHEDULE_TIMEOUT);
-
-		if (val & DROP_RETIRE)
-			i915_retire_requests(i915);
-
-		mutex_unlock(&i915->drm.struct_mutex);
-
-		if (ret == 0 && val & DROP_IDLE)
-			ret = intel_gt_pm_wait_for_idle(&i915->gt);
+	if (val & DROP_IDLE) {
+		ret = intel_gt_pm_wait_for_idle(&i915->gt);
+		if (ret)
+			return ret;
 	}
 
 	if (val & DROP_RESET_ACTIVE && intel_gt_terminally_wedged(&i915->gt))
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index ac8616382c69..3810cd305409 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -945,19 +945,16 @@ int i915_gem_wait_for_idle(struct drm_i915_private *i915,
 	if (!intel_gt_pm_is_awake(&i915->gt))
 		return 0;
 
-	GEM_TRACE("flags=%x (%s), timeout=%ld%s\n",
-		  flags, flags & I915_WAIT_LOCKED ? "locked" : "unlocked",
-		  timeout, timeout == MAX_SCHEDULE_TIMEOUT ? " (forever)" : "");
-
-	timeout = wait_for_timelines(i915, flags, timeout);
-	if (timeout < 0)
-		return timeout;
+	do {
+		timeout = wait_for_timelines(i915, flags, timeout);
+		if (timeout < 0)
+			return timeout;
 
-	if (flags & I915_WAIT_LOCKED) {
-		lockdep_assert_held(&i915->drm.struct_mutex);
+		cond_resched();
+		if (signal_pending(current))
+			return -EINTR;
 
-		i915_retire_requests(i915);
-	}
+	} while (i915_retire_requests(i915));
 
 	return 0;
 }
diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h
index 3251d2bdbeea..57a2193c64d1 100644
--- a/drivers/gpu/drm/i915/i915_request.h
+++ b/drivers/gpu/drm/i915/i915_request.h
@@ -308,10 +308,9 @@ long i915_request_wait(struct i915_request *rq,
 		       long timeout)
 	__attribute__((nonnull(1)));
 #define I915_WAIT_INTERRUPTIBLE	BIT(0)
-#define I915_WAIT_LOCKED	BIT(1) /* struct_mutex held, handle GPU reset */
-#define I915_WAIT_PRIORITY	BIT(2) /* small priority bump for the request */
-#define I915_WAIT_ALL		BIT(3) /* used by i915_gem_object_wait() */
-#define I915_WAIT_FOR_IDLE_BOOST BIT(4)
+#define I915_WAIT_PRIORITY	BIT(1) /* small priority bump for the request */
+#define I915_WAIT_ALL		BIT(2) /* used by i915_gem_object_wait() */
+#define I915_WAIT_FOR_IDLE_BOOST BIT(3)
 
 static inline bool i915_request_signaled(const struct i915_request *rq)
 {
diff --git a/drivers/gpu/drm/i915/selftests/i915_active.c b/drivers/gpu/drm/i915/selftests/i915_active.c
index af5827aac7b2..ff1337e34522 100644
--- a/drivers/gpu/drm/i915/selftests/i915_active.c
+++ b/drivers/gpu/drm/i915/selftests/i915_active.c
@@ -164,10 +164,8 @@ static int live_active_wait(void *arg)
 
 	__live_put(active);
 
-	mutex_lock(&i915->drm.struct_mutex);
-	if (igt_flush_test(i915, I915_WAIT_LOCKED))
+	if (igt_flush_test(i915))
 		err = -EIO;
-	mutex_unlock(&i915->drm.struct_mutex);
 
 	return err;
 }
@@ -185,10 +183,8 @@ static int live_active_retire(void *arg)
 		return PTR_ERR(active);
 
 	/* waits for & retires all requests */
-	mutex_lock(&i915->drm.struct_mutex);
-	if (igt_flush_test(i915, I915_WAIT_LOCKED))
+	if (igt_flush_test(i915))
 		err = -EIO;
-	mutex_unlock(&i915->drm.struct_mutex);
 
 	if (!READ_ONCE(active->retired)) {
 		pr_err("i915_active not retired after flushing!\n");
diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_evict.c b/drivers/gpu/drm/i915/selftests/i915_gem_evict.c
index ba6064147173..42139db0d69c 100644
--- a/drivers/gpu/drm/i915/selftests/i915_gem_evict.c
+++ b/drivers/gpu/drm/i915/selftests/i915_gem_evict.c
@@ -521,7 +521,7 @@ static int igt_evict_contexts(void *arg)
 
 	mutex_lock(&i915->ggtt.vm.mutex);
 out_locked:
-	if (igt_flush_test(i915, I915_WAIT_LOCKED))
+	if (igt_flush_test(i915))
 		err = -EIO;
 	while (reserved) {
 		struct reserved *next = reserved->next;
diff --git a/drivers/gpu/drm/i915/selftests/i915_selftest.c b/drivers/gpu/drm/i915/selftests/i915_selftest.c
index 438ea0eaa416..825a8286cbe8 100644
--- a/drivers/gpu/drm/i915/selftests/i915_selftest.c
+++ b/drivers/gpu/drm/i915/selftests/i915_selftest.c
@@ -263,10 +263,8 @@ int __i915_live_teardown(int err, void *data)
 {
 	struct drm_i915_private *i915 = data;
 
-	mutex_lock(&i915->drm.struct_mutex);
-	if (igt_flush_test(i915, I915_WAIT_LOCKED))
+	if (igt_flush_test(i915))
 		err = -EIO;
-	mutex_unlock(&i915->drm.struct_mutex);
 
 	i915_gem_drain_freed_objects(i915);
 
@@ -284,10 +282,8 @@ int __intel_gt_live_teardown(int err, void *data)
 {
 	struct intel_gt *gt = data;
 
-	mutex_lock(&gt->i915->drm.struct_mutex);
-	if (igt_flush_test(gt->i915, I915_WAIT_LOCKED))
+	if (igt_flush_test(gt->i915))
 		err = -EIO;
-	mutex_unlock(&gt->i915->drm.struct_mutex);
 
 	i915_gem_drain_freed_objects(gt->i915);
 
diff --git a/drivers/gpu/drm/i915/selftests/igt_flush_test.c b/drivers/gpu/drm/i915/selftests/igt_flush_test.c
index d3b5eb402d33..2a5fbe46ea9f 100644
--- a/drivers/gpu/drm/i915/selftests/igt_flush_test.c
+++ b/drivers/gpu/drm/i915/selftests/igt_flush_test.c
@@ -12,31 +12,25 @@
 
 #include "igt_flush_test.h"
 
-int igt_flush_test(struct drm_i915_private *i915, unsigned int flags)
+int igt_flush_test(struct drm_i915_private *i915)
 {
 	int ret = intel_gt_is_wedged(&i915->gt) ? -EIO : 0;
-	int repeat = !!(flags & I915_WAIT_LOCKED);
 
 	cond_resched();
 
-	do {
-		if (i915_gem_wait_for_idle(i915, flags, HZ / 5) == -ETIME) {
-			pr_err("%pS timed out, cancelling all further testing.\n",
-			       __builtin_return_address(0));
+	i915_retire_requests(i915);
+	if (i915_gem_wait_for_idle(i915, 0, HZ / 5) == -ETIME) {
+		pr_err("%pS timed out, cancelling all further testing.\n",
+		       __builtin_return_address(0));
 
-			GEM_TRACE("%pS timed out.\n",
-				  __builtin_return_address(0));
-			GEM_TRACE_DUMP();
+		GEM_TRACE("%pS timed out.\n",
+			  __builtin_return_address(0));
+		GEM_TRACE_DUMP();
 
-			intel_gt_set_wedged(&i915->gt);
-			repeat = 0;
-			ret = -EIO;
-		}
-
-		/* Ensure we also flush after wedging. */
-		if (flags & I915_WAIT_LOCKED)
-			i915_retire_requests(i915);
-	} while (repeat--);
+		intel_gt_set_wedged(&i915->gt);
+		ret = -EIO;
+	}
+	i915_retire_requests(i915);
 
 	return ret;
 }
diff --git a/drivers/gpu/drm/i915/selftests/igt_flush_test.h b/drivers/gpu/drm/i915/selftests/igt_flush_test.h
index 63e009927c43..7541fa74e641 100644
--- a/drivers/gpu/drm/i915/selftests/igt_flush_test.h
+++ b/drivers/gpu/drm/i915/selftests/igt_flush_test.h
@@ -9,6 +9,6 @@
 
 struct drm_i915_private;
 
-int igt_flush_test(struct drm_i915_private *i915, unsigned int flags);
+int igt_flush_test(struct drm_i915_private *i915);
 
 #endif /* IGT_FLUSH_TEST_H */
diff --git a/drivers/gpu/drm/i915/selftests/igt_live_test.c b/drivers/gpu/drm/i915/selftests/igt_live_test.c
index 3e902761cd16..04a6f88fdf64 100644
--- a/drivers/gpu/drm/i915/selftests/igt_live_test.c
+++ b/drivers/gpu/drm/i915/selftests/igt_live_test.c
@@ -19,15 +19,12 @@ int igt_live_test_begin(struct igt_live_test *t,
 	enum intel_engine_id id;
 	int err;
 
-	lockdep_assert_held(&i915->drm.struct_mutex);
-
 	t->i915 = i915;
 	t->func = func;
 	t->name = name;
 
 	err = i915_gem_wait_for_idle(i915,
-				     I915_WAIT_INTERRUPTIBLE |
-				     I915_WAIT_LOCKED,
+				     I915_WAIT_INTERRUPTIBLE,
 				     MAX_SCHEDULE_TIMEOUT);
 	if (err) {
 		pr_err("%s(%s): failed to idle before, with err=%d!",
@@ -50,9 +47,7 @@ int igt_live_test_end(struct igt_live_test *t)
 	struct intel_engine_cs *engine;
 	enum intel_engine_id id;
 
-	lockdep_assert_held(&i915->drm.struct_mutex);
-
-	if (igt_flush_test(i915, I915_WAIT_LOCKED))
+	if (igt_flush_test(i915))
 		return -EIO;
 
 	if (t->reset_global != i915_reset_count(&i915->gpu_error)) {
diff --git a/drivers/gpu/drm/i915/selftests/mock_gem_device.c b/drivers/gpu/drm/i915/selftests/mock_gem_device.c
index 01a89c071bf5..1956006a0d5b 100644
--- a/drivers/gpu/drm/i915/selftests/mock_gem_device.c
+++ b/drivers/gpu/drm/i915/selftests/mock_gem_device.c
@@ -41,8 +41,6 @@ void mock_device_flush(struct drm_i915_private *i915)
 	struct intel_engine_cs *engine;
 	enum intel_engine_id id;
 
-	lockdep_assert_held(&i915->drm.struct_mutex);
-
 	do {
 		for_each_engine(engine, i915, id)
 			mock_engine_flush(engine);
@@ -55,9 +53,7 @@ static void mock_device_release(struct drm_device *dev)
 	struct intel_engine_cs *engine;
 	enum intel_engine_id id;
 
-	mutex_lock(&i915->drm.struct_mutex);
 	mock_device_flush(i915);
-	mutex_unlock(&i915->drm.struct_mutex);
 
 	flush_work(&i915->gem.idle_work);
 	i915_gem_drain_workqueue(i915);
-- 
2.23.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 43+ messages in thread

* [PATCH 20/28] drm/i915: Merge wait_for_timelines with retire_request
  2019-08-26  7:21 [PATCH 01/28] drm/i915/selftests: Add the usual batch vma managements to st_workarounds Chris Wilson
                   ` (17 preceding siblings ...)
  2019-08-26  7:21 ` [PATCH 19/28] drm/i915: Drop struct_mutex from around i915_retire_requests() Chris Wilson
@ 2019-08-26  7:21 ` Chris Wilson
  2019-08-26  7:21 ` [PATCH 21/28] drm/i915: Serialise the fill BLT with the vma pinning Chris Wilson
                   ` (10 subsequent siblings)
  29 siblings, 0 replies; 43+ messages in thread
From: Chris Wilson @ 2019-08-26  7:21 UTC (permalink / raw)
  To: intel-gfx

wait_for_timelines is essentially the same loop as retiring requests
(with an extra), so merge the two into one routine.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/gem/i915_gem_mman.c      |  4 +-
 drivers/gpu/drm/i915/gem/i915_gem_pm.c        |  6 +-
 .../drm/i915/gem/selftests/i915_gem_context.c |  4 +-
 drivers/gpu/drm/i915/gt/selftest_timeline.c   |  2 +-
 drivers/gpu/drm/i915/i915_debugfs.c           |  6 +-
 drivers/gpu/drm/i915/i915_drv.h               |  3 +-
 drivers/gpu/drm/i915/i915_gem.c               | 68 ++-----------------
 drivers/gpu/drm/i915/i915_gem_evict.c         | 12 ++--
 drivers/gpu/drm/i915/i915_gem_gtt.c           |  2 +-
 drivers/gpu/drm/i915/i915_request.c           | 19 +++++-
 drivers/gpu/drm/i915/i915_request.h           |  3 +-
 .../gpu/drm/i915/selftests/igt_flush_test.c   |  4 +-
 .../gpu/drm/i915/selftests/igt_live_test.c    |  4 +-
 .../gpu/drm/i915/selftests/mock_gem_device.c  |  2 +-
 14 files changed, 41 insertions(+), 98 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/i915_gem_mman.c
index 0440dbfda295..64387e01a0e4 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_mman.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_mman.c
@@ -437,9 +437,7 @@ static int create_mmap_offset(struct drm_i915_gem_object *obj)
 
 	/* Attempt to reap some mmap space from dead objects */
 	do {
-		err = i915_gem_wait_for_idle(i915,
-					     I915_WAIT_INTERRUPTIBLE,
-					     MAX_SCHEDULE_TIMEOUT);
+		err = i915_gem_wait_for_idle(i915, MAX_SCHEDULE_TIMEOUT);
 		if (err)
 			break;
 
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pm.c b/drivers/gpu/drm/i915/gem/i915_gem_pm.c
index fec0b410d1d9..b41f5e4c0c65 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_pm.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_pm.c
@@ -48,7 +48,7 @@ static void retire_work_handler(struct work_struct *work)
 	struct drm_i915_private *i915 =
 		container_of(work, typeof(*i915), gem.retire_work.work);
 
-	i915_retire_requests(i915);
+	i915_retire_requests(i915, 0);
 
 	queue_delayed_work(i915->wq,
 			   &i915->gem.retire_work,
@@ -82,9 +82,7 @@ static bool switch_to_kernel_context_sync(struct intel_gt *gt)
 {
 	bool result = !intel_gt_is_wedged(gt);
 
-	if (i915_gem_wait_for_idle(gt->i915,
-				   I915_WAIT_FOR_IDLE_BOOST,
-				   I915_GEM_IDLE_TIMEOUT) == -ETIME) {
+	if (i915_gem_wait_for_idle(gt->i915, I915_GEM_IDLE_TIMEOUT) == -ETIME) {
 		/* XXX hide warning from gem_eio */
 		if (i915_modparams.reset) {
 			dev_err(gt->i915->drm.dev,
diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c
index a8220868a56a..99c832075429 100644
--- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c
@@ -304,7 +304,7 @@ create_test_object(struct i915_address_space *vm,
 	int err;
 
 	/* Keep in GEM's good graces */
-	i915_retire_requests(vm->i915);
+	i915_retire_requests(vm->i915, 0);
 
 	size = min(vm->total / 2, 1024ull * DW_PER_PAGE * PAGE_SIZE);
 	size = round_down(size, DW_PER_PAGE * PAGE_SIZE);
@@ -924,7 +924,7 @@ __sseu_finish(const char *name,
 
 	if ((flags & TEST_IDLE) && ret == 0) {
 		ret = i915_gem_wait_for_idle(ce->engine->i915,
-					     0, MAX_SCHEDULE_TIMEOUT);
+					     MAX_SCHEDULE_TIMEOUT);
 		if (ret)
 			return ret;
 
diff --git a/drivers/gpu/drm/i915/gt/selftest_timeline.c b/drivers/gpu/drm/i915/gt/selftest_timeline.c
index 42d0bbbbc7ea..0955b9e63746 100644
--- a/drivers/gpu/drm/i915/gt/selftest_timeline.c
+++ b/drivers/gpu/drm/i915/gt/selftest_timeline.c
@@ -743,7 +743,7 @@ static int live_hwsp_wrap(void *arg)
 			goto out;
 		}
 
-		i915_retire_requests(i915); /* recycle HWSP */
+		i915_retire_requests(i915, 0); /* recycle HWSP */
 	}
 
 out:
diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index b3507ec9c8d6..e6362245347b 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -3613,12 +3613,10 @@ i915_drop_caches_set(void *data, u64 val)
 		intel_gt_set_wedged(&i915->gt);
 
 	if (val & DROP_RETIRE)
-		i915_retire_requests(i915);
+		i915_retire_requests(i915, 0);
 
 	if (val & (DROP_IDLE | DROP_ACTIVE)) {
-		ret = i915_gem_wait_for_idle(i915,
-					     I915_WAIT_INTERRUPTIBLE,
-					     MAX_SCHEDULE_TIMEOUT);
+		ret = i915_gem_wait_for_idle(i915, MAX_SCHEDULE_TIMEOUT);
 		if (ret)
 			return ret;
 	}
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index b42651a387d9..85767b7cd00b 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2322,8 +2322,7 @@ void i915_gem_driver_register(struct drm_i915_private *i915);
 void i915_gem_driver_unregister(struct drm_i915_private *i915);
 void i915_gem_driver_remove(struct drm_i915_private *dev_priv);
 void i915_gem_driver_release(struct drm_i915_private *dev_priv);
-int i915_gem_wait_for_idle(struct drm_i915_private *dev_priv,
-			   unsigned int flags, long timeout);
+int i915_gem_wait_for_idle(struct drm_i915_private *dev_priv, long timeout);
 void i915_gem_suspend(struct drm_i915_private *dev_priv);
 void i915_gem_suspend_late(struct drm_i915_private *dev_priv);
 void i915_gem_resume(struct drm_i915_private *dev_priv);
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 3810cd305409..60d6eb546f7f 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -884,79 +884,19 @@ void i915_gem_runtime_suspend(struct drm_i915_private *i915)
 	}
 }
 
-static long
-wait_for_timelines(struct drm_i915_private *i915,
-		   unsigned int wait, long timeout)
-{
-	struct intel_gt_timelines *timelines = &i915->gt.timelines;
-	struct intel_timeline *tl;
-	unsigned long flags;
-
-	spin_lock_irqsave(&timelines->lock, flags);
-	list_for_each_entry(tl, &timelines->active_list, link) {
-		struct dma_fence *fence;
-
-		fence = i915_active_fence_get(&tl->last_request);
-		if (!fence)
-			continue;
-
-		spin_unlock_irqrestore(&timelines->lock, flags);
-
-		if (!dma_fence_is_i915(fence)) {
-			timeout = dma_fence_wait_timeout(fence,
-							 flags & I915_WAIT_INTERRUPTIBLE,
-							 timeout);
-		} else {
-			struct i915_request *rq = to_request(fence);
-
-			/*
-			 * "Race-to-idle".
-			 *
-			 * Switching to the kernel context is often used as
-			 * a synchronous step prior to idling, e.g. in suspend
-			 * for flushing all current operations to memory before
-			 * sleeping. These we want to complete as quickly as
-			 * possible to avoid prolonged stalls, so allow the gpu
-			 * to boost to maximum clocks.
-			 */
-			if (flags & I915_WAIT_FOR_IDLE_BOOST)
-				gen6_rps_boost(rq);
-
-			timeout = i915_request_wait(rq, flags, timeout);
-		}
-
-		dma_fence_put(fence);
-		if (timeout < 0)
-			return timeout;
-
-		/* restart after reacquiring the lock */
-		spin_lock_irqsave(&timelines->lock, flags);
-		tl = list_entry(&timelines->active_list, typeof(*tl), link);
-	}
-	spin_unlock_irqrestore(&timelines->lock, flags);
-
-	return timeout;
-}
-
-int i915_gem_wait_for_idle(struct drm_i915_private *i915,
-			   unsigned int flags, long timeout)
+int i915_gem_wait_for_idle(struct drm_i915_private *i915, long timeout)
 {
 	/* If the device is asleep, we have no requests outstanding */
 	if (!intel_gt_pm_is_awake(&i915->gt))
 		return 0;
 
-	do {
-		timeout = wait_for_timelines(i915, flags, timeout);
-		if (timeout < 0)
-			return timeout;
-
+	while ((timeout = i915_retire_requests(i915, timeout)) > 0) {
 		cond_resched();
 		if (signal_pending(current))
 			return -EINTR;
+	}
 
-	} while (i915_retire_requests(i915));
-
-	return 0;
+	return timeout;
 }
 
 struct i915_vma *
diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c b/drivers/gpu/drm/i915/i915_gem_evict.c
index 44f5b638fa43..708055a3887e 100644
--- a/drivers/gpu/drm/i915/i915_gem_evict.c
+++ b/drivers/gpu/drm/i915/i915_gem_evict.c
@@ -46,9 +46,7 @@ static int ggtt_flush(struct drm_i915_private *i915)
 	 * the hopes that we can then remove contexts and the like only
 	 * bound by their active reference.
 	 */
-	return i915_gem_wait_for_idle(i915,
-				      I915_WAIT_INTERRUPTIBLE,
-				      MAX_SCHEDULE_TIMEOUT);
+	return i915_gem_wait_for_idle(i915, MAX_SCHEDULE_TIMEOUT);
 }
 
 static bool
@@ -126,6 +124,8 @@ i915_gem_evict_something(struct i915_address_space *vm,
 				    min_size, alignment, cache_level,
 				    start, end, mode);
 
+	i915_retire_requests(vm->i915, 0);
+
 search_again:
 	active = NULL;
 	INIT_LIST_HEAD(&eviction_list);
@@ -265,13 +265,13 @@ int i915_gem_evict_for_node(struct i915_address_space *vm,
 
 	trace_i915_gem_evict_node(vm, target, flags);
 
-	/* Retire before we search the active list. Although we have
+	/*
+	 * Retire before we search the active list. Although we have
 	 * reasonable accuracy in our retirement lists, we may have
 	 * a stray pin (preventing eviction) that can only be resolved by
 	 * retiring.
 	 */
-	if (!(flags & PIN_NONBLOCK))
-		i915_retire_requests(vm->i915);
+	i915_retire_requests(vm->i915, 0);
 
 	check_color = vm->mm.color_adjust;
 	if (check_color) {
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 9482b3490a47..d0395675b33a 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -2520,7 +2520,7 @@ void i915_gem_gtt_finish_pages(struct drm_i915_gem_object *obj,
 	struct i915_ggtt *ggtt = &dev_priv->ggtt;
 
 	if (unlikely(ggtt->do_idle_maps)) {
-		if (i915_gem_wait_for_idle(dev_priv, 0, MAX_SCHEDULE_TIMEOUT)) {
+		if (i915_retire_requests(dev_priv, MAX_SCHEDULE_TIMEOUT)) {
 			DRM_ERROR("Failed to wait for idle; VT'd may hang.\n");
 			/* Wait a bit, in hopes it avoids the hang */
 			udelay(10);
diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
index 4ecfae143276..7ac03588c813 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -1429,7 +1429,7 @@ long i915_request_wait(struct i915_request *rq,
 	return timeout;
 }
 
-bool i915_retire_requests(struct drm_i915_private *i915)
+long i915_retire_requests(struct drm_i915_private *i915, long timeout)
 {
 	struct intel_gt_timelines *timelines = &i915->gt.timelines;
 	struct intel_timeline *tl, *tn;
@@ -1446,6 +1446,18 @@ bool i915_retire_requests(struct drm_i915_private *i915)
 		tl->active_count++; /* pin the list element */
 		spin_unlock_irqrestore(&timelines->lock, flags);
 
+		if (timeout > 0) {
+			struct dma_fence *fence;
+
+			fence = i915_active_fence_get(&tl->last_request);
+			if (fence) {
+				timeout = dma_fence_wait_timeout(fence,
+								 true,
+								 timeout);
+				dma_fence_put(fence);
+			}
+		}
+
 		retire_requests(tl);
 
 		spin_lock_irqsave(&timelines->lock, flags);
@@ -1468,7 +1480,10 @@ bool i915_retire_requests(struct drm_i915_private *i915)
 	list_for_each_entry_safe(tl, tn, &free, link)
 		__intel_timeline_free(&tl->kref);
 
-	return !list_empty(&timelines->active_list);
+	if (list_empty(&timelines->active_list))
+		return 0;
+
+	return timeout;
 }
 
 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h
index 57a2193c64d1..2a5d682aa6b1 100644
--- a/drivers/gpu/drm/i915/i915_request.h
+++ b/drivers/gpu/drm/i915/i915_request.h
@@ -310,7 +310,6 @@ long i915_request_wait(struct i915_request *rq,
 #define I915_WAIT_INTERRUPTIBLE	BIT(0)
 #define I915_WAIT_PRIORITY	BIT(1) /* small priority bump for the request */
 #define I915_WAIT_ALL		BIT(2) /* used by i915_gem_object_wait() */
-#define I915_WAIT_FOR_IDLE_BOOST BIT(3)
 
 static inline bool i915_request_signaled(const struct i915_request *rq)
 {
@@ -440,6 +439,6 @@ static inline bool i915_request_has_nopreempt(const struct i915_request *rq)
 	return unlikely(rq->flags & I915_REQUEST_NOPREEMPT);
 }
 
-bool i915_retire_requests(struct drm_i915_private *i915);
+long i915_retire_requests(struct drm_i915_private *i915, long timeout);
 
 #endif /* I915_REQUEST_H */
diff --git a/drivers/gpu/drm/i915/selftests/igt_flush_test.c b/drivers/gpu/drm/i915/selftests/igt_flush_test.c
index 2a5fbe46ea9f..ed496bd6d84f 100644
--- a/drivers/gpu/drm/i915/selftests/igt_flush_test.c
+++ b/drivers/gpu/drm/i915/selftests/igt_flush_test.c
@@ -18,8 +18,7 @@ int igt_flush_test(struct drm_i915_private *i915)
 
 	cond_resched();
 
-	i915_retire_requests(i915);
-	if (i915_gem_wait_for_idle(i915, 0, HZ / 5) == -ETIME) {
+	if (i915_gem_wait_for_idle(i915, HZ / 5) == -ETIME) {
 		pr_err("%pS timed out, cancelling all further testing.\n",
 		       __builtin_return_address(0));
 
@@ -30,7 +29,6 @@ int igt_flush_test(struct drm_i915_private *i915)
 		intel_gt_set_wedged(&i915->gt);
 		ret = -EIO;
 	}
-	i915_retire_requests(i915);
 
 	return ret;
 }
diff --git a/drivers/gpu/drm/i915/selftests/igt_live_test.c b/drivers/gpu/drm/i915/selftests/igt_live_test.c
index 04a6f88fdf64..eae90f97df6c 100644
--- a/drivers/gpu/drm/i915/selftests/igt_live_test.c
+++ b/drivers/gpu/drm/i915/selftests/igt_live_test.c
@@ -23,9 +23,7 @@ int igt_live_test_begin(struct igt_live_test *t,
 	t->func = func;
 	t->name = name;
 
-	err = i915_gem_wait_for_idle(i915,
-				     I915_WAIT_INTERRUPTIBLE,
-				     MAX_SCHEDULE_TIMEOUT);
+	err = i915_gem_wait_for_idle(i915, MAX_SCHEDULE_TIMEOUT);
 	if (err) {
 		pr_err("%s(%s): failed to idle before, with err=%d!",
 		       func, name, err);
diff --git a/drivers/gpu/drm/i915/selftests/mock_gem_device.c b/drivers/gpu/drm/i915/selftests/mock_gem_device.c
index 1956006a0d5b..e1878c952dfa 100644
--- a/drivers/gpu/drm/i915/selftests/mock_gem_device.c
+++ b/drivers/gpu/drm/i915/selftests/mock_gem_device.c
@@ -44,7 +44,7 @@ void mock_device_flush(struct drm_i915_private *i915)
 	do {
 		for_each_engine(engine, i915, id)
 			mock_engine_flush(engine);
-	} while (i915_retire_requests(i915));
+	} while (i915_retire_requests(i915, MAX_SCHEDULE_TIMEOUT));
 }
 
 static void mock_device_release(struct drm_device *dev)
-- 
2.23.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 43+ messages in thread

* [PATCH 21/28] drm/i915: Serialise the fill BLT with the vma pinning
  2019-08-26  7:21 [PATCH 01/28] drm/i915/selftests: Add the usual batch vma managements to st_workarounds Chris Wilson
                   ` (18 preceding siblings ...)
  2019-08-26  7:21 ` [PATCH 20/28] drm/i915: Merge wait_for_timelines with retire_request Chris Wilson
@ 2019-08-26  7:21 ` Chris Wilson
  2019-08-26  7:21 ` [PATCH 22/28] drm/i915/execlists: Always request completion before marking an error Chris Wilson
                   ` (9 subsequent siblings)
  29 siblings, 0 replies; 43+ messages in thread
From: Chris Wilson @ 2019-08-26  7:21 UTC (permalink / raw)
  To: intel-gfx; +Cc: Matthew Auld

Make sure that we wait for the vma to be pinned prior to telling the GPU
to fill the pages through that vma.

However, since our async operations fight over obj->resv->excl_fence we
must manually order them. This makes it much more fragile, and gives an
outside observer the chance to see the intermediate fences. To be
discussed!

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Matthew Auld <matthew.auld@intel.com>
Reviewed-by: Matthew Auld <matthew.auld@intel.com>
---
 .../gpu/drm/i915/gem/i915_gem_client_blt.c    | 46 ++++++++++++++-----
 drivers/gpu/drm/i915/gt/intel_ringbuffer.c    |  3 +-
 2 files changed, 36 insertions(+), 13 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_client_blt.c b/drivers/gpu/drm/i915/gem/i915_gem_client_blt.c
index 3502071e1391..bbbc10499099 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_client_blt.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_client_blt.c
@@ -71,10 +71,30 @@ static struct i915_sleeve *create_sleeve(struct i915_address_space *vm,
 		goto err_free;
 	}
 
+	/*
+	 * XXX fix scheduling with get_pages & clear workers
+	 *
+	 * The complication is that we end up overwriting the same
+	 * obj->resv->excl_fence for each stage of the operation. That fence
+	 * should be set on scheduling the work, and only signaled upon
+	 * completion of the entire workqueue.
+	 *
+	 * Within the workqueue, we use the fence to schedule each individual
+	 * task. Each individual task knows to use obj->resv->fence.
+	 *
+	 * To an outsider, they must wait until the end and so the
+	 * obj->resv->fence must be the composite.
+	 *
+	 * Ideas?
+	 */
+	err = i915_vma_pin(vma, 0, 0, PIN_USER);
+	if (unlikely(err))
+		goto err_free;
+
 	vma->private = sleeve;
 	vma->ops = &proxy_vma_ops;
 
-	sleeve->vma = vma;
+	sleeve->vma = i915_vma_get(vma);
 	sleeve->pages = pages;
 	sleeve->page_sizes = *page_sizes;
 
@@ -87,6 +107,13 @@ static struct i915_sleeve *create_sleeve(struct i915_address_space *vm,
 
 static void destroy_sleeve(struct i915_sleeve *sleeve)
 {
+	struct i915_vma *vma = sleeve->vma;
+
+	if (vma) {
+		i915_vma_unpin(vma);
+		i915_vma_put(vma);
+	}
+
 	kfree(sleeve);
 }
 
@@ -155,8 +182,8 @@ static void clear_pages_dma_fence_cb(struct dma_fence *fence,
 static void clear_pages_worker(struct work_struct *work)
 {
 	struct clear_pages_work *w = container_of(work, typeof(*w), work);
-	struct drm_i915_gem_object *obj = w->sleeve->vma->obj;
-	struct i915_vma *vma = w->sleeve->vma;
+	struct i915_vma *vma = fetch_and_zero(&w->sleeve->vma);
+	struct drm_i915_gem_object *obj = vma->obj;
 	struct i915_request *rq;
 	struct i915_vma *batch;
 	int err = w->dma.error;
@@ -166,20 +193,16 @@ static void clear_pages_worker(struct work_struct *work)
 
 	if (obj->cache_dirty) {
 		if (i915_gem_object_has_struct_page(obj))
-			drm_clflush_sg(w->sleeve->pages);
+			drm_clflush_sg(vma->pages);
 		obj->cache_dirty = false;
 	}
 	obj->read_domains = I915_GEM_GPU_DOMAINS;
 	obj->write_domain = 0;
 
-	err = i915_vma_pin(vma, 0, 0, PIN_USER);
-	if (unlikely(err))
-		goto out_signal;
-
 	batch = intel_emit_vma_fill_blt(w->ce, vma, w->value);
 	if (IS_ERR(batch)) {
 		err = PTR_ERR(batch);
-		goto out_unpin;
+		goto out_signal;
 	}
 
 	rq = intel_context_create_request(w->ce);
@@ -224,14 +247,15 @@ static void clear_pages_worker(struct work_struct *work)
 	i915_request_add(rq);
 out_batch:
 	intel_emit_vma_release(w->ce, batch);
-out_unpin:
-	i915_vma_unpin(vma);
 out_signal:
 	if (unlikely(err)) {
 		dma_fence_set_error(&w->dma, err);
 		dma_fence_signal(&w->dma);
 		dma_fence_put(&w->dma);
 	}
+
+	i915_vma_unpin(vma);
+	i915_vma_put(vma);
 }
 
 static int __i915_sw_fence_call
diff --git a/drivers/gpu/drm/i915/gt/intel_ringbuffer.c b/drivers/gpu/drm/i915/gt/intel_ringbuffer.c
index d6e2f5b3dd62..271aea4279c3 100644
--- a/drivers/gpu/drm/i915/gt/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/gt/intel_ringbuffer.c
@@ -941,13 +941,12 @@ static u32 *i9xx_emit_breadcrumb(struct i915_request *rq, u32 *cs)
 	GEM_BUG_ON(offset_in_page(rq->timeline->hwsp_offset) != I915_GEM_HWS_SEQNO_ADDR);
 
 	*cs++ = MI_FLUSH;
-
 	*cs++ = MI_STORE_DWORD_INDEX;
 	*cs++ = I915_GEM_HWS_SEQNO_ADDR;
 	*cs++ = rq->fence.seqno;
 
+	*cs++ = MI_FLUSH | MI_NO_WRITE_FLUSH;
 	*cs++ = MI_USER_INTERRUPT;
-	*cs++ = MI_NOOP;
 
 	rq->tail = intel_ring_offset(rq, cs);
 	assert_ring_tail_valid(rq->ring, rq->tail);
-- 
2.23.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 43+ messages in thread

* [PATCH 22/28] drm/i915/execlists: Always request completion before marking an error
  2019-08-26  7:21 [PATCH 01/28] drm/i915/selftests: Add the usual batch vma managements to st_workarounds Chris Wilson
                   ` (19 preceding siblings ...)
  2019-08-26  7:21 ` [PATCH 21/28] drm/i915: Serialise the fill BLT with the vma pinning Chris Wilson
@ 2019-08-26  7:21 ` Chris Wilson
  2019-08-26  7:21 ` [PATCH 23/28] drm/i915: Only enqueue already completed requests Chris Wilson
                   ` (8 subsequent siblings)
  29 siblings, 0 replies; 43+ messages in thread
From: Chris Wilson @ 2019-08-26  7:21 UTC (permalink / raw)
  To: intel-gfx

Due to fun and games in our preempt-to-busy, it is possible for a
request to be completed in the background. Be vigilant and avoid setting
an error on already signaled request, as dma_fence_set_error() throws a
warning.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/gt/intel_lrc.c | 21 +++++++++++----------
 1 file changed, 11 insertions(+), 10 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
index c8ca75eb79df..7474681cd025 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -234,6 +234,13 @@ static void execlists_init_reg_state(u32 *reg_state,
 				     struct intel_engine_cs *engine,
 				     struct intel_ring *ring);
 
+static void mark_eio(struct i915_request *rq)
+{
+	if (!i915_request_signaled(rq))
+		dma_fence_set_error(&rq->fence, -EIO);
+	i915_request_mark_complete(rq);
+}
+
 static inline u32 intel_hws_preempt_address(struct intel_engine_cs *engine)
 {
 	return (i915_ggtt_offset(engine->status_page.vma) +
@@ -2498,12 +2505,8 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine)
 	__execlists_reset(engine, true);
 
 	/* Mark all executing requests as skipped. */
-	list_for_each_entry(rq, &engine->active.requests, sched.link) {
-		if (!i915_request_signaled(rq))
-			dma_fence_set_error(&rq->fence, -EIO);
-
-		i915_request_mark_complete(rq);
-	}
+	list_for_each_entry(rq, &engine->active.requests, sched.link)
+		mark_eio(rq);
 
 	/* Flush the queued requests to the timeline list (for retiring). */
 	while ((rb = rb_first_cached(&execlists->queue))) {
@@ -2513,8 +2516,7 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine)
 		priolist_for_each_request_consume(rq, rn, p, i) {
 			list_del_init(&rq->sched.link);
 			__i915_request_submit(rq);
-			dma_fence_set_error(&rq->fence, -EIO);
-			i915_request_mark_complete(rq);
+			mark_eio(rq);
 		}
 
 		rb_erase_cached(&p->node, &execlists->queue);
@@ -2533,8 +2535,7 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine)
 		if (ve->request) {
 			ve->request->engine = engine;
 			__i915_request_submit(ve->request);
-			dma_fence_set_error(&ve->request->fence, -EIO);
-			i915_request_mark_complete(ve->request);
+			mark_eio(ve->request);
 			ve->base.execlists.queue_priority_hint = INT_MIN;
 			ve->request = NULL;
 		}
-- 
2.23.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 43+ messages in thread

* [PATCH 23/28] drm/i915: Only enqueue already completed requests
  2019-08-26  7:21 [PATCH 01/28] drm/i915/selftests: Add the usual batch vma managements to st_workarounds Chris Wilson
                   ` (20 preceding siblings ...)
  2019-08-26  7:21 ` [PATCH 22/28] drm/i915/execlists: Always request completion before marking an error Chris Wilson
@ 2019-08-26  7:21 ` Chris Wilson
  2019-08-26  7:21 ` [PATCH 24/28] drm/i915/execlists: Force preemption Chris Wilson
                   ` (7 subsequent siblings)
  29 siblings, 0 replies; 43+ messages in thread
From: Chris Wilson @ 2019-08-26  7:21 UTC (permalink / raw)
  To: intel-gfx

If we are asked to submit a completed request, just move it onto the
active-list without modifying it's payload. If we try to emit the
modified payload of a completed request, we risk racing with the
ring->head update during retirement which may advance the head past our
breadcrumb and so we generate a warning for the emission being behind
the RING_HEAD.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_request.c | 15 +++++++++------
 1 file changed, 9 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
index 7ac03588c813..31039d55b463 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -341,6 +341,9 @@ void __i915_request_submit(struct i915_request *request)
 	GEM_BUG_ON(!irqs_disabled());
 	lockdep_assert_held(&engine->active.lock);
 
+	if (i915_request_completed(request))
+		goto xfer;
+
 	if (i915_gem_context_is_banned(request->gem_context))
 		i915_request_skip(request, -EIO);
 
@@ -364,7 +367,12 @@ void __i915_request_submit(struct i915_request *request)
 	    i915_sw_fence_signaled(&request->semaphore))
 		engine->saturated |= request->sched.semaphores;
 
-	/* We may be recursing from the signal callback of another i915 fence */
+	engine->emit_fini_breadcrumb(request,
+				     request->ring->vaddr + request->postfix);
+
+	engine->serial++;
+
+xfer:	/* We may be recursing from the signal callback of another i915 fence */
 	spin_lock_nested(&request->lock, SINGLE_DEPTH_NESTING);
 
 	list_move_tail(&request->sched.link, &engine->active.requests);
@@ -381,11 +389,6 @@ void __i915_request_submit(struct i915_request *request)
 
 	spin_unlock(&request->lock);
 
-	engine->emit_fini_breadcrumb(request,
-				     request->ring->vaddr + request->postfix);
-
-	engine->serial++;
-
 	trace_i915_request_execute(request);
 }
 
-- 
2.23.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 43+ messages in thread

* [PATCH 24/28] drm/i915/execlists: Force preemption
  2019-08-26  7:21 [PATCH 01/28] drm/i915/selftests: Add the usual batch vma managements to st_workarounds Chris Wilson
                   ` (21 preceding siblings ...)
  2019-08-26  7:21 ` [PATCH 23/28] drm/i915: Only enqueue already completed requests Chris Wilson
@ 2019-08-26  7:21 ` Chris Wilson
  2019-08-26  7:21 ` [PATCH 25/28] drm/i915: Mark up "sentinel" requests Chris Wilson
                   ` (6 subsequent siblings)
  29 siblings, 0 replies; 43+ messages in thread
From: Chris Wilson @ 2019-08-26  7:21 UTC (permalink / raw)
  To: intel-gfx

If the preempted context takes too long to relinquish control, e.g. it
is stuck inside a shader with arbitration disabled, evict that context
with an engine reset. This ensures that preemptions are reasonably
responsive, providing a tighter QoS for the more important context at
the cost of flagging unresponsive contexts more frequently (i.e. instead
of using an ~10s hangcheck, we now evict at ~100ms).  The challenge of
lies in picking a timeout that can be reasonably serviced by HW for
typical workloads, balancing the existing clients against the needs for
responsiveness.

Note that coupled with timeslicing, this will lead to rapid GPU "hang"
detection with multiple active contexts vying for GPU time.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Reviewed-by: Mika Kuoppala <mika.kuoppala@linux.intel.com>
---
 drivers/gpu/drm/i915/Kconfig.profile | 12 +++++++
 drivers/gpu/drm/i915/gt/intel_lrc.c  | 51 ++++++++++++++++++++++++++++
 drivers/gpu/drm/i915/i915_params.h   |  2 +-
 3 files changed, 64 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/Kconfig.profile b/drivers/gpu/drm/i915/Kconfig.profile
index 48df8889a88a..3184e8491333 100644
--- a/drivers/gpu/drm/i915/Kconfig.profile
+++ b/drivers/gpu/drm/i915/Kconfig.profile
@@ -25,3 +25,15 @@ config DRM_I915_SPIN_REQUEST
 	  May be 0 to disable the initial spin. In practice, we estimate
 	  the cost of enabling the interrupt (if currently disabled) to be
 	  a few microseconds.
+
+config DRM_I915_PREEMPT_TIMEOUT
+	int "Preempt timeout (ms)"
+	default 100 # milliseconds
+	help
+	  How long to wait (in milliseconds) for a preemption event to occur
+	  when submitting a new context via execlists. If the current context
+	  does not hit an arbitration point and yield to HW before the timer
+	  expires, the HW will be reset to allow the more important context
+	  to execute.
+
+	  May be 0 to disable the timeout.
diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
index 7474681cd025..6b3b9026d840 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -1005,6 +1005,21 @@ static void record_preemption(struct intel_engine_execlists *execlists)
 	(void)I915_SELFTEST_ONLY(execlists->preempt_hang.count++);
 }
 
+static unsigned long preempt_expires(void)
+{
+	const unsigned long timeout =
+		msecs_to_jiffies_timeout(CONFIG_DRM_I915_PREEMPT_TIMEOUT);
+
+	/*
+	 * Paranoia to make sure the compiler computes the timeout before
+	 * loading 'jiffies' as jiffies is volatile and may be updated in
+	 * the background by a timer tick. All to reduce the complexity
+	 * of the addition and reduce the risk of losing a jiffie.
+	 */
+	barrier();
+	return jiffies + timeout;
+}
+
 static void execlists_dequeue(struct intel_engine_cs *engine)
 {
 	struct intel_engine_execlists * const execlists = &engine->execlists;
@@ -1345,6 +1360,8 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
 		execlists->switch_priority_hint =
 			switch_prio(engine, *execlists->pending);
 		execlists_submit_ports(engine);
+		if (CONFIG_DRM_I915_PREEMPT_TIMEOUT)
+			mod_timer(&execlists->timer, preempt_expires());
 	} else {
 		ring_set_paused(engine, 0);
 	}
@@ -1604,6 +1621,37 @@ static void __execlists_submission_tasklet(struct intel_engine_cs *const engine)
 		execlists_dequeue(engine);
 }
 
+static noinline void preempt_reset(struct intel_engine_cs *engine)
+{
+	const unsigned int bit = I915_RESET_ENGINE + engine->id;
+	unsigned long *lock = &engine->gt->reset.flags;
+
+	if (i915_modparams.reset < 3)
+		return;
+
+	if (test_and_set_bit(bit, lock))
+		return;
+
+	/* Mark this tasklet as disabled to avoid waiting for it to complete */
+	tasklet_disable_nosync(&engine->execlists.tasklet);
+
+	intel_engine_reset(engine, "preemption time out");
+
+	tasklet_enable(&engine->execlists.tasklet);
+	clear_and_wake_up_bit(bit, lock);
+}
+
+static bool preempt_timeout(struct intel_engine_cs *const engine)
+{
+	if (!CONFIG_DRM_I915_PREEMPT_TIMEOUT)
+		return false;
+
+	if (!intel_engine_has_preemption(engine))
+		return false;
+
+	return !timer_pending(&engine->execlists.timer);
+}
+
 /*
  * Check the unread Context Status Buffers and manage the submission of new
  * contexts to the ELSP accordingly.
@@ -1618,6 +1666,9 @@ static void execlists_submission_tasklet(unsigned long data)
 		spin_lock_irqsave(&engine->active.lock, flags);
 		__execlists_submission_tasklet(engine);
 		spin_unlock_irqrestore(&engine->active.lock, flags);
+	} else {
+		if (preempt_timeout(engine))
+			preempt_reset(engine);
 	}
 }
 
diff --git a/drivers/gpu/drm/i915/i915_params.h b/drivers/gpu/drm/i915/i915_params.h
index d29ade3b7de6..56058978bb27 100644
--- a/drivers/gpu/drm/i915/i915_params.h
+++ b/drivers/gpu/drm/i915/i915_params.h
@@ -61,7 +61,7 @@ struct drm_printer;
 	param(char *, dmc_firmware_path, NULL) \
 	param(int, mmio_debug, -IS_ENABLED(CONFIG_DRM_I915_DEBUG_MMIO)) \
 	param(int, edp_vswing, 0) \
-	param(int, reset, 2) \
+	param(int, reset, 3) \
 	param(unsigned int, inject_load_failure, 0) \
 	param(int, fastboot, -1) \
 	param(int, enable_dpcd_backlight, 0) \
-- 
2.23.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 43+ messages in thread

* [PATCH 25/28] drm/i915: Mark up "sentinel" requests
  2019-08-26  7:21 [PATCH 01/28] drm/i915/selftests: Add the usual batch vma managements to st_workarounds Chris Wilson
                   ` (22 preceding siblings ...)
  2019-08-26  7:21 ` [PATCH 24/28] drm/i915/execlists: Force preemption Chris Wilson
@ 2019-08-26  7:21 ` Chris Wilson
  2019-08-26  7:21 ` [PATCH 26/28] drm/i915/execlists: Cancel banned contexts on schedule-out Chris Wilson
                   ` (5 subsequent siblings)
  29 siblings, 0 replies; 43+ messages in thread
From: Chris Wilson @ 2019-08-26  7:21 UTC (permalink / raw)
  To: intel-gfx

Sometimes we want to emit a terminator request, a request that flushes
the pipeline and allows no request to come after it. This can be used
for a "preempt-to-idle" to ensure that upon processing the
context-switch to that request, all other active contexts have been
flushed.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/gt/intel_lrc.c |  6 ++++++
 drivers/gpu/drm/i915/i915_request.h | 10 ++++++++--
 2 files changed, 14 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
index 6b3b9026d840..5a6f1b1ddefa 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -804,6 +804,9 @@ static bool can_merge_rq(const struct i915_request *prev,
 	GEM_BUG_ON(prev == next);
 	GEM_BUG_ON(!assert_priority_queue(prev, next));
 
+	if (i915_request_has_sentinel(prev))
+		return false;
+
 	if (!can_merge_ctx(prev->hw_context, next->hw_context))
 		return false;
 
@@ -1307,6 +1310,9 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
 				if (last->hw_context == rq->hw_context)
 					goto done;
 
+				if (i915_request_has_sentinel(last))
+					goto done;
+
 				/*
 				 * If GVT overrides us we only ever submit
 				 * port[0], leaving port[1] empty. Note that we
diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h
index 2a5d682aa6b1..49c21672da3d 100644
--- a/drivers/gpu/drm/i915/i915_request.h
+++ b/drivers/gpu/drm/i915/i915_request.h
@@ -216,8 +216,9 @@ struct i915_request {
 	unsigned long emitted_jiffies;
 
 	unsigned long flags;
-#define I915_REQUEST_WAITBOOST BIT(0)
-#define I915_REQUEST_NOPREEMPT BIT(1)
+#define I915_REQUEST_WAITBOOST	BIT(0)
+#define I915_REQUEST_NOPREEMPT	BIT(1)
+#define I915_REQUEST_SENTINEL	BIT(2)
 
 	/** timeline->request entry for this request */
 	struct list_head link;
@@ -439,6 +440,11 @@ static inline bool i915_request_has_nopreempt(const struct i915_request *rq)
 	return unlikely(rq->flags & I915_REQUEST_NOPREEMPT);
 }
 
+static inline bool i915_request_has_sentinel(const struct i915_request *rq)
+{
+	return unlikely(rq->flags & I915_REQUEST_SENTINEL);
+}
+
 long i915_retire_requests(struct drm_i915_private *i915, long timeout);
 
 #endif /* I915_REQUEST_H */
-- 
2.23.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 43+ messages in thread

* [PATCH 26/28] drm/i915/execlists: Cancel banned contexts on schedule-out
  2019-08-26  7:21 [PATCH 01/28] drm/i915/selftests: Add the usual batch vma managements to st_workarounds Chris Wilson
                   ` (23 preceding siblings ...)
  2019-08-26  7:21 ` [PATCH 25/28] drm/i915: Mark up "sentinel" requests Chris Wilson
@ 2019-08-26  7:21 ` Chris Wilson
  2019-08-26  7:21 ` [PATCH 27/28] drm/i915: Cancel non-persistent contexts on close Chris Wilson
                   ` (4 subsequent siblings)
  29 siblings, 0 replies; 43+ messages in thread
From: Chris Wilson @ 2019-08-26  7:21 UTC (permalink / raw)
  To: intel-gfx

On completion of a banned context, scrub the context image so that we do
not replay the active payload. The intent is that we skip banned
payloads on request submission so that the timeline advancement
continues on in the background. However, if we are returning to a
preempted request, i915_request_skip() is ineffective and instead we
need to patch up the context image so that it continues from the start
of the next request.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/gt/intel_lrc.c | 53 +++++++++++++++++++++++++++++
 1 file changed, 53 insertions(+)

diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
index 5a6f1b1ddefa..7280fe32e7e6 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -233,6 +233,9 @@ static void execlists_init_reg_state(u32 *reg_state,
 				     struct intel_context *ce,
 				     struct intel_engine_cs *engine,
 				     struct intel_ring *ring);
+static void
+__execlists_update_reg_state(struct intel_context *ce,
+			     struct intel_engine_cs *engine);
 
 static void mark_eio(struct i915_request *rq)
 {
@@ -606,6 +609,53 @@ static void kick_siblings(struct i915_request *rq, struct intel_context *ce)
 		tasklet_schedule(&ve->base.execlists.tasklet);
 }
 
+static void mark_complete(struct i915_request *rq,
+			  struct intel_engine_cs *engine)
+{
+	const struct list_head * const list = &rq->timeline->requests;
+
+	*(u32 *)rq->timeline->hwsp_seqno = rq->fence.seqno;
+	GEM_BUG_ON(!i915_request_completed(rq));
+
+	list_for_each_entry_from_reverse(rq, list, link) {
+		if (i915_request_signaled(rq))
+			break;
+
+		mark_eio(rq);
+	}
+
+	intel_engine_queue_breadcrumbs(engine);
+}
+
+static void cancel_active(struct i915_request *rq,
+			  struct intel_engine_cs *engine)
+{
+	struct intel_context * const ce = rq->hw_context;
+	u32 *regs = ce->lrc_reg_state;
+
+	if (i915_request_completed(rq))
+		return;
+
+	GEM_TRACE("%s(%s): { rq=%llx:%lld }\n",
+		  __func__, engine->name, rq->fence.context, rq->fence.seqno);
+
+	/* Scrub the context image to prevent replaying the previous batch */
+	memcpy(regs, /* skip restoring the vanilla PPHWSP */
+	       engine->pinned_default_state + LRC_STATE_PN * PAGE_SIZE,
+	       engine->context_size - PAGE_SIZE);
+	execlists_init_reg_state(regs, ce, engine, ce->ring);
+
+	/* Ring will be advanced on retire; here we need to reset the context */
+	ce->ring->head = intel_ring_wrap(ce->ring, rq->wa_tail);
+	__execlists_update_reg_state(ce, engine);
+
+	/* We've switched away, so this should be a no-op, but intent matters */
+	ce->lrc_desc |= CTX_DESC_FORCE_RESTORE;
+
+	/* Let everyone know that the request may now be retired */
+	mark_complete(rq, engine);
+}
+
 static inline void
 __execlists_schedule_out(struct i915_request *rq,
 			 struct intel_engine_cs * const engine)
@@ -616,6 +666,9 @@ __execlists_schedule_out(struct i915_request *rq,
 	execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_OUT);
 	intel_gt_pm_put(engine->gt);
 
+	if (unlikely(i915_gem_context_is_banned(ce->gem_context)))
+		cancel_active(rq, engine);
+
 	/*
 	 * If this is part of a virtual engine, its next request may
 	 * have been blocked waiting for access to the active context.
-- 
2.23.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 43+ messages in thread

* [PATCH 27/28] drm/i915: Cancel non-persistent contexts on close
  2019-08-26  7:21 [PATCH 01/28] drm/i915/selftests: Add the usual batch vma managements to st_workarounds Chris Wilson
                   ` (24 preceding siblings ...)
  2019-08-26  7:21 ` [PATCH 26/28] drm/i915/execlists: Cancel banned contexts on schedule-out Chris Wilson
@ 2019-08-26  7:21 ` Chris Wilson
  2019-08-26 13:39   ` Bloomfield, Jon
  2019-08-26  7:21 ` [PATCH 28/28] drm/i915: Replace hangcheck by heartbeats Chris Wilson
                   ` (3 subsequent siblings)
  29 siblings, 1 reply; 43+ messages in thread
From: Chris Wilson @ 2019-08-26  7:21 UTC (permalink / raw)
  To: intel-gfx

Normally, we rely on our hangcheck to prevent persistent batches from
hogging the GPU. However, if the user disables hangcheck, this mechanism
breaks down. Despite our insistence that this is unsafe, the users are
equally insistent that they want to use endless batches and will disable
the hangcheck mechanism. We are looking are perhaps replacing hangcheck
with a softer mechanism, that sends a pulse down the engine to check if
it is well. We can use the same preemptive pulse to flush an active
persistent context off the GPU upon context close, preventing resources
being lost and unkillable requests remaining on the GPU after process
termination. To avoid changing the ABI and accidentally breaking
existing userspace, we make the persistence of a context explicit and
enable it by default (matching current ABI). Userspace can opt out of
persistent mode (forcing requests to be cancelled when the context is
closed by process termination or explicitly) by a context parameter. To
facilitate existing use-cases of disabling hangcheck, if the modparam is
disabled (i915.enable_hangcheck=0), we disable peristence mode by
default.  (Note, one of the outcomes for supporting endless mode will be
the removal of hangchecking, at which point opting into persistent mode
will be mandatory, or maybe the default perhaps controlled by cgroups.)

Testcase: igt/gem_ctx_persistence
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Cc: Michał Winiarski <michal.winiarski@intel.com>
Cc: Jon Bloomfield <jon.bloomfield@intel.com>
---
 drivers/gpu/drm/i915/Makefile                 |   3 +-
 drivers/gpu/drm/i915/gem/i915_gem_context.c   | 122 ++++++++++++++++++
 drivers/gpu/drm/i915/gem/i915_gem_context.h   |  15 +++
 .../gpu/drm/i915/gem/i915_gem_context_types.h |   1 +
 .../gpu/drm/i915/gt/intel_engine_heartbeat.c  |  54 ++++++++
 .../gpu/drm/i915/gt/intel_engine_heartbeat.h  |  14 ++
 drivers/gpu/drm/i915/gt/intel_engine_pm.c     |   2 +-
 drivers/gpu/drm/i915/i915_priolist_types.h    |   1 +
 include/uapi/drm/i915_drm.h                   |  15 +++
 9 files changed, 225 insertions(+), 2 deletions(-)
 create mode 100644 drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c
 create mode 100644 drivers/gpu/drm/i915/gt/intel_engine_heartbeat.h

diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
index 658b930d34a8..eaa74e000985 100644
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -76,8 +76,9 @@ gt-y += \
 	gt/intel_breadcrumbs.o \
 	gt/intel_context.o \
 	gt/intel_engine_cs.o \
-	gt/intel_engine_pool.o \
+	gt/intel_engine_heartbeat.o \
 	gt/intel_engine_pm.o \
+	gt/intel_engine_pool.o \
 	gt/intel_engine_user.o \
 	gt/intel_gt.o \
 	gt/intel_gt_irq.o \
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c
index bd9397669332..5520a896e701 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c
@@ -70,6 +70,7 @@
 #include <drm/i915_drm.h>
 
 #include "gt/intel_lrc_reg.h"
+#include "gt/intel_engine_heartbeat.h"
 #include "gt/intel_engine_user.h"
 
 #include "i915_gem_context.h"
@@ -375,6 +376,78 @@ void i915_gem_context_release(struct kref *ref)
 		queue_work(i915->wq, &i915->contexts.free_work);
 }
 
+static inline struct i915_gem_engines *
+__context_engines_static(struct i915_gem_context *ctx)
+{
+	return rcu_dereference_protected(ctx->engines, true);
+}
+
+static void kill_context(struct i915_gem_context *ctx)
+{
+	intel_engine_mask_t tmp, active, reset;
+	struct intel_gt *gt = &ctx->i915->gt;
+	struct i915_gem_engines_iter it;
+	struct intel_engine_cs *engine;
+	struct intel_context *ce;
+
+	/*
+	 * If we are already banned, it was due to a guilty request causing
+	 * a reset and the entire context being evicted from the GPU.
+	 */
+	if (i915_gem_context_is_banned(ctx))
+		return;
+
+	i915_gem_context_set_banned(ctx);
+
+	/*
+	 * Map the user's engine back to the actual engines; one virtual
+	 * engine will be mapped to multiple engines, and using ctx->engine[]
+	 * the same engine may be have multiple instances in the user's map.
+	 * However, we only care about pending requests, so only include
+	 * engines on which there are incomplete requests.
+	 */
+	active = 0;
+	for_each_gem_engine(ce, __context_engines_static(ctx), it) {
+		struct dma_fence *fence;
+
+		if (!ce->timeline)
+			continue;
+
+		fence = i915_active_fence_get(&ce->timeline->last_request);
+		if (!fence)
+			continue;
+
+		engine = to_request(fence)->engine;
+		if (HAS_EXECLISTS(gt->i915))
+			engine = intel_context_inflight(ce);
+		if (engine)
+			active |= engine->mask;
+
+		dma_fence_put(fence);
+	}
+
+	/*
+	 * Send a "high priority pulse" down the engine to cause the
+	 * current request to be momentarily preempted. (If it fails to
+	 * be preempted, it will be reset). As we have marked our context
+	 * as banned, any incomplete request, including any running, will
+	 * be skipped following the preemption.
+	 */
+	reset = 0;
+	for_each_engine_masked(engine, gt->i915, active, tmp)
+		if (intel_engine_pulse(engine))
+			reset |= engine->mask;
+
+	/*
+	 * If we are unable to send a preemptive pulse to bump
+	 * the context from the GPU, we have to resort to a full
+	 * reset. We hope the collateral damage is worth it.
+	 */
+	if (reset)
+		intel_gt_handle_error(gt, reset, 0,
+				      "context closure in %s", ctx->name);
+}
+
 static void context_close(struct i915_gem_context *ctx)
 {
 	i915_gem_context_set_closed(ctx);
@@ -400,6 +473,10 @@ static void context_close(struct i915_gem_context *ctx)
 	lut_close(ctx);
 
 	mutex_unlock(&ctx->mutex);
+
+	if (!i915_gem_context_is_persistent(ctx))
+		kill_context(ctx);
+
 	i915_gem_context_put(ctx);
 }
 
@@ -440,6 +517,21 @@ __create_context(struct drm_i915_private *i915)
 	i915_gem_context_set_bannable(ctx);
 	i915_gem_context_set_recoverable(ctx);
 
+	/*
+	 * If the user has disabled hangchecking, we can not be sure that
+	 * the batches will ever complete after the context is closed,
+	 * keep the context and all resources pinned forever. So in this
+	 * case we opt to forcibly kill off all remaining requests on
+	 * context close.
+	 *
+	 * Note that the user may chance the value of the modparam between
+	 * context creation and close, we choose to ignore this for the
+	 * sake of determinism and expect the user to set the parameter
+	 * on module load and never touch it again.
+	 */
+	if (i915_modparams.enable_hangcheck) /* cgroup hook? */
+		i915_gem_context_set_persistence(ctx);
+
 	for (i = 0; i < ARRAY_SIZE(ctx->hang_timestamp); i++)
 		ctx->hang_timestamp[i] = jiffies - CONTEXT_FAST_HANG_JIFFIES;
 
@@ -598,6 +690,7 @@ i915_gem_context_create_kernel(struct drm_i915_private *i915, int prio)
 	}
 
 	i915_gem_context_clear_bannable(ctx);
+	i915_gem_context_set_persistence(ctx);
 	ctx->sched.priority = I915_USER_PRIORITY(prio);
 
 	GEM_BUG_ON(!i915_gem_context_is_kernel(ctx));
@@ -1730,6 +1823,26 @@ get_engines(struct i915_gem_context *ctx,
 	return err;
 }
 
+static int
+set_persistence(struct i915_gem_context *ctx,
+		const struct drm_i915_gem_context_param *args)
+{
+	if (args->size)
+		return -EINVAL;
+
+	if (args->value) {
+		i915_gem_context_set_persistence(ctx);
+		return 0;
+	}
+
+	/* To cancel a context we use "preempt-to-idle" */
+	if (!(ctx->i915->caps.scheduler & I915_SCHEDULER_CAP_PREEMPTION))
+		return -ENODEV;
+
+	i915_gem_context_clear_persistence(ctx);
+	return 0;
+}
+
 static int ctx_setparam(struct drm_i915_file_private *fpriv,
 			struct i915_gem_context *ctx,
 			struct drm_i915_gem_context_param *args)
@@ -1807,6 +1920,10 @@ static int ctx_setparam(struct drm_i915_file_private *fpriv,
 		ret = set_engines(ctx, args);
 		break;
 
+	case I915_CONTEXT_PARAM_PERSISTENCE:
+		ret = set_persistence(ctx, args);
+		break;
+
 	case I915_CONTEXT_PARAM_BAN_PERIOD:
 	default:
 		ret = -EINVAL;
@@ -2258,6 +2375,11 @@ int i915_gem_context_getparam_ioctl(struct drm_device *dev, void *data,
 		ret = get_engines(ctx, args);
 		break;
 
+	case I915_CONTEXT_PARAM_PERSISTENCE:
+		args->size = 0;
+		args->value = i915_gem_context_is_persistent(ctx);
+		break;
+
 	case I915_CONTEXT_PARAM_BAN_PERIOD:
 	default:
 		ret = -EINVAL;
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.h b/drivers/gpu/drm/i915/gem/i915_gem_context.h
index 176978608b6f..e0f5b6c6a331 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_context.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context.h
@@ -74,6 +74,21 @@ static inline void i915_gem_context_clear_recoverable(struct i915_gem_context *c
 	clear_bit(UCONTEXT_RECOVERABLE, &ctx->user_flags);
 }
 
+static inline bool i915_gem_context_is_persistent(const struct i915_gem_context *ctx)
+{
+	return test_bit(UCONTEXT_PERSISTENCE, &ctx->user_flags);
+}
+
+static inline void i915_gem_context_set_persistence(struct i915_gem_context *ctx)
+{
+	set_bit(UCONTEXT_PERSISTENCE, &ctx->user_flags);
+}
+
+static inline void i915_gem_context_clear_persistence(struct i915_gem_context *ctx)
+{
+	clear_bit(UCONTEXT_PERSISTENCE, &ctx->user_flags);
+}
+
 static inline bool i915_gem_context_is_banned(const struct i915_gem_context *ctx)
 {
 	return test_bit(CONTEXT_BANNED, &ctx->flags);
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h
index 260d59cc3de8..daf1ea5075a6 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h
@@ -137,6 +137,7 @@ struct i915_gem_context {
 #define UCONTEXT_NO_ERROR_CAPTURE	1
 #define UCONTEXT_BANNABLE		2
 #define UCONTEXT_RECOVERABLE		3
+#define UCONTEXT_PERSISTENCE		4
 
 	/**
 	 * @flags: small set of booleans
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c b/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c
new file mode 100644
index 000000000000..43d1370eaa7f
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c
@@ -0,0 +1,54 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2019 Intel Corporation
+ */
+
+#include "i915_request.h"
+
+#include "intel_context.h"
+#include "intel_engine_heartbeat.h"
+#include "intel_engine_pm.h"
+#include "intel_engine.h"
+#include "intel_gt.h"
+
+static void idle_pulse(struct intel_engine_cs *engine, struct i915_request *rq)
+{
+	engine->wakeref_serial = READ_ONCE(engine->serial) + 1;
+	i915_request_add_active_barriers(rq);
+}
+
+int intel_engine_pulse(struct intel_engine_cs *engine)
+{
+	struct i915_sched_attr attr = { .priority = I915_PRIORITY_BARRIER };
+	struct intel_context *ce = engine->kernel_context;
+	struct i915_request *rq;
+	int err = 0;
+
+	if (!intel_engine_has_preemption(engine))
+		return -ENODEV;
+
+	if (!intel_engine_pm_get_if_awake(engine))
+		return 0;
+
+	mutex_lock(&ce->timeline->mutex);
+
+	intel_context_enter(ce);
+	rq = __i915_request_create(ce, GFP_NOWAIT | __GFP_NOWARN);
+	intel_context_exit(ce);
+	if (IS_ERR(rq)) {
+		err = PTR_ERR(rq);
+		goto out_unlock;
+	}
+
+	rq->flags |= I915_REQUEST_SENTINEL;
+	idle_pulse(engine, rq);
+
+	__i915_request_commit(rq);
+	__i915_request_queue(rq, &attr);
+
+out_unlock:
+	mutex_unlock(&ce->timeline->mutex);
+	intel_engine_pm_put(engine);
+	return err;
+}
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.h b/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.h
new file mode 100644
index 000000000000..b950451b5998
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.h
@@ -0,0 +1,14 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2019 Intel Corporation
+ */
+
+#ifndef INTEL_ENGINE_HEARTBEAT_H
+#define INTEL_ENGINE_HEARTBEAT_H
+
+struct intel_engine_cs;
+
+int intel_engine_pulse(struct intel_engine_cs *engine);
+
+#endif /* INTEL_ENGINE_HEARTBEAT_H */
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pm.c b/drivers/gpu/drm/i915/gt/intel_engine_pm.c
index 472b2259f629..12a2608a8889 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_pm.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_pm.c
@@ -110,7 +110,7 @@ static bool switch_to_kernel_context(struct intel_engine_cs *engine)
 	i915_request_add_active_barriers(rq);
 
 	/* Install ourselves as a preemption barrier */
-	rq->sched.attr.priority = I915_PRIORITY_UNPREEMPTABLE;
+	rq->sched.attr.priority = I915_PRIORITY_BARRIER;
 	__i915_request_commit(rq);
 
 	/* Release our exclusive hold on the engine */
diff --git a/drivers/gpu/drm/i915/i915_priolist_types.h b/drivers/gpu/drm/i915/i915_priolist_types.h
index 21037a2e2038..ae8bb3cb627e 100644
--- a/drivers/gpu/drm/i915/i915_priolist_types.h
+++ b/drivers/gpu/drm/i915/i915_priolist_types.h
@@ -39,6 +39,7 @@ enum {
  * active request.
  */
 #define I915_PRIORITY_UNPREEMPTABLE INT_MAX
+#define I915_PRIORITY_BARRIER INT_MAX
 
 #define __NO_PREEMPTION (I915_PRIORITY_WAIT)
 
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index 469dc512cca3..dbc8691d75d0 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -1565,6 +1565,21 @@ struct drm_i915_gem_context_param {
  *   i915_context_engines_bond (I915_CONTEXT_ENGINES_EXT_BOND)
  */
 #define I915_CONTEXT_PARAM_ENGINES	0xa
+
+/*
+ * I915_CONTEXT_PARAM_PERSISTENCE:
+ *
+ * Allow the context and active rendering to survive the process until
+ * completion. Persistence allows fire-and-forget clients to queue up a
+ * bunch of work, hand the output over to a display server and the quit.
+ * If the context is not marked as persistent, upon closing (either via
+ * an explicit DRM_I915_GEM_CONTEXT_DESTROY or implicitly from file closure
+ * or process termination), the context and any outstanding requests will be
+ * cancelled (and exported fences for cancelled requests marked as -EIO).
+ *
+ * By default, new contexts allow persistence.
+ */
+#define I915_CONTEXT_PARAM_PERSISTENCE	0xb
 /* Must be kept compact -- no holes and well documented */
 
 	__u64 value;
-- 
2.23.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 43+ messages in thread

* [PATCH 28/28] drm/i915: Replace hangcheck by heartbeats
  2019-08-26  7:21 [PATCH 01/28] drm/i915/selftests: Add the usual batch vma managements to st_workarounds Chris Wilson
                   ` (25 preceding siblings ...)
  2019-08-26  7:21 ` [PATCH 27/28] drm/i915: Cancel non-persistent contexts on close Chris Wilson
@ 2019-08-26  7:21 ` Chris Wilson
  2019-08-26 14:08   ` Bloomfield, Jon
  2019-08-26  7:34 ` ✗ Fi.CI.CHECKPATCH: warning for series starting with [01/28] drm/i915/selftests: Add the usual batch vma managements to st_workarounds Patchwork
                   ` (2 subsequent siblings)
  29 siblings, 1 reply; 43+ messages in thread
From: Chris Wilson @ 2019-08-26  7:21 UTC (permalink / raw)
  To: intel-gfx

Replace sampling the engine state every so often with a periodic
heartbeat request to measure the health of an engine. This is coupled
with the forced-preemption to allow long running requests to survive so
long as they do not block other users.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: Jon Bloomfield <jon.bloomfield@intel.com>
---
 drivers/gpu/drm/i915/Kconfig.profile          |  11 +
 drivers/gpu/drm/i915/Makefile                 |   1 -
 drivers/gpu/drm/i915/display/intel_display.c  |   2 +-
 drivers/gpu/drm/i915/gem/i915_gem_object.h    |   1 -
 drivers/gpu/drm/i915/gem/i915_gem_pm.c        |   2 -
 drivers/gpu/drm/i915/gt/intel_engine.h        |  32 --
 drivers/gpu/drm/i915/gt/intel_engine_cs.c     |  10 +-
 .../gpu/drm/i915/gt/intel_engine_heartbeat.c  | 113 +++++-
 .../gpu/drm/i915/gt/intel_engine_heartbeat.h  |   5 +
 drivers/gpu/drm/i915/gt/intel_engine_pm.c     |   5 +-
 drivers/gpu/drm/i915/gt/intel_engine_types.h  |  14 +-
 drivers/gpu/drm/i915/gt/intel_gt.c            |   1 -
 drivers/gpu/drm/i915/gt/intel_gt.h            |   4 -
 drivers/gpu/drm/i915/gt/intel_gt_pm.c         |   2 -
 drivers/gpu/drm/i915/gt/intel_gt_types.h      |   9 -
 drivers/gpu/drm/i915/gt/intel_hangcheck.c     | 361 ------------------
 drivers/gpu/drm/i915/gt/intel_reset.c         |   3 +-
 drivers/gpu/drm/i915/gt/selftest_hangcheck.c  |   4 -
 drivers/gpu/drm/i915/i915_debugfs.c           |  87 -----
 drivers/gpu/drm/i915/i915_drv.c               |   3 -
 drivers/gpu/drm/i915/i915_drv.h               |   1 -
 drivers/gpu/drm/i915/i915_getparam.c          |   3 +-
 drivers/gpu/drm/i915/i915_gpu_error.c         |  33 +-
 drivers/gpu/drm/i915/i915_gpu_error.h         |   2 -
 drivers/gpu/drm/i915/i915_params.c            |   6 +-
 drivers/gpu/drm/i915/i915_priolist_types.h    |   6 +
 26 files changed, 159 insertions(+), 562 deletions(-)
 delete mode 100644 drivers/gpu/drm/i915/gt/intel_hangcheck.c

diff --git a/drivers/gpu/drm/i915/Kconfig.profile b/drivers/gpu/drm/i915/Kconfig.profile
index 3184e8491333..aafb57f84169 100644
--- a/drivers/gpu/drm/i915/Kconfig.profile
+++ b/drivers/gpu/drm/i915/Kconfig.profile
@@ -37,3 +37,14 @@ config DRM_I915_PREEMPT_TIMEOUT
 	  to execute.
 
 	  May be 0 to disable the timeout.
+
+config DRM_I915_HEARTBEAT_INTERVAL
+	int "Interval between heartbeat pulses (ms)"
+	default 2500 # microseconds
+	help
+	  While active the driver uses a periodic request, a heartbeat, to
+	  check the wellness of the GPU and to regularly flush state changes
+	  (idle barriers).
+
+	  May be 0 to disable heartbeats and therefore disable automatic GPU
+	  hang detection.
diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
index eaa74e000985..d7286720de83 100644
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -84,7 +84,6 @@ gt-y += \
 	gt/intel_gt_irq.o \
 	gt/intel_gt_pm.o \
 	gt/intel_gt_pm_irq.o \
-	gt/intel_hangcheck.o \
 	gt/intel_lrc.o \
 	gt/intel_renderstate.o \
 	gt/intel_reset.o \
diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c
index 6e74c33f2ec4..e008016d864c 100644
--- a/drivers/gpu/drm/i915/display/intel_display.c
+++ b/drivers/gpu/drm/i915/display/intel_display.c
@@ -14306,7 +14306,7 @@ static void intel_plane_unpin_fb(struct intel_plane_state *old_plane_state)
 static void fb_obj_bump_render_priority(struct drm_i915_gem_object *obj)
 {
 	struct i915_sched_attr attr = {
-		.priority = I915_PRIORITY_DISPLAY,
+		.priority = I915_USER_PRIORITY(I915_PRIORITY_DISPLAY),
 	};
 
 	i915_gem_object_wait_priority(obj, 0, &attr);
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.h b/drivers/gpu/drm/i915/gem/i915_gem_object.h
index a78af25dce36..967c30737dc5 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object.h
@@ -429,6 +429,5 @@ int i915_gem_object_wait(struct drm_i915_gem_object *obj,
 int i915_gem_object_wait_priority(struct drm_i915_gem_object *obj,
 				  unsigned int flags,
 				  const struct i915_sched_attr *attr);
-#define I915_PRIORITY_DISPLAY I915_USER_PRIORITY(I915_PRIORITY_MAX)
 
 #endif
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pm.c b/drivers/gpu/drm/i915/gem/i915_gem_pm.c
index b41f5e4c0c65..28a8f3aac402 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_pm.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_pm.c
@@ -127,8 +127,6 @@ void i915_gem_suspend(struct drm_i915_private *i915)
 	 */
 	switch_to_kernel_context_sync(&i915->gt);
 
-	cancel_delayed_work_sync(&i915->gt.hangcheck.work);
-
 	i915_gem_drain_freed_objects(i915);
 
 	intel_uc_suspend(&i915->gt.uc);
diff --git a/drivers/gpu/drm/i915/gt/intel_engine.h b/drivers/gpu/drm/i915/gt/intel_engine.h
index d3c6993f4f46..da09eea0fb86 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine.h
@@ -89,38 +89,6 @@ struct drm_printer;
 /* seqno size is actually only a uint32, but since we plan to use MI_FLUSH_DW to
  * do the writes, and that must have qw aligned offsets, simply pretend it's 8b.
  */
-enum intel_engine_hangcheck_action {
-	ENGINE_IDLE = 0,
-	ENGINE_WAIT,
-	ENGINE_ACTIVE_SEQNO,
-	ENGINE_ACTIVE_HEAD,
-	ENGINE_ACTIVE_SUBUNITS,
-	ENGINE_WAIT_KICK,
-	ENGINE_DEAD,
-};
-
-static inline const char *
-hangcheck_action_to_str(const enum intel_engine_hangcheck_action a)
-{
-	switch (a) {
-	case ENGINE_IDLE:
-		return "idle";
-	case ENGINE_WAIT:
-		return "wait";
-	case ENGINE_ACTIVE_SEQNO:
-		return "active seqno";
-	case ENGINE_ACTIVE_HEAD:
-		return "active head";
-	case ENGINE_ACTIVE_SUBUNITS:
-		return "active subunits";
-	case ENGINE_WAIT_KICK:
-		return "wait kick";
-	case ENGINE_DEAD:
-		return "dead";
-	}
-
-	return "unknown";
-}
 
 static inline unsigned int
 execlists_num_ports(const struct intel_engine_execlists * const execlists)
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
index 17006d50b63f..16439c542a52 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
@@ -597,7 +597,6 @@ static int intel_engine_setup_common(struct intel_engine_cs *engine)
 	intel_engine_init_active(engine, ENGINE_PHYSICAL);
 	intel_engine_init_breadcrumbs(engine);
 	intel_engine_init_execlists(engine);
-	intel_engine_init_hangcheck(engine);
 	intel_engine_init_cmd_parser(engine);
 	intel_engine_init__pm(engine);
 
@@ -1375,8 +1374,13 @@ void intel_engine_dump(struct intel_engine_cs *engine,
 		drm_printf(m, "*** WEDGED ***\n");
 
 	drm_printf(m, "\tAwake? %d\n", atomic_read(&engine->wakeref.count));
-	drm_printf(m, "\tHangcheck: %d ms ago\n",
-		   jiffies_to_msecs(jiffies - engine->hangcheck.action_timestamp));
+
+	rcu_read_lock();
+	rq = READ_ONCE(engine->last_heartbeat);
+	if (rq)
+		drm_printf(m, "\tHeartbeat: %d ms ago\n",
+			   jiffies_to_msecs(jiffies - rq->emitted_jiffies));
+	rcu_read_unlock();
 	drm_printf(m, "\tReset count: %d (global %d)\n",
 		   i915_reset_engine_count(error, engine),
 		   i915_reset_count(error));
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c b/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c
index 43d1370eaa7f..d0fc94c71ee7 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c
@@ -11,6 +11,21 @@
 #include "intel_engine_pm.h"
 #include "intel_engine.h"
 #include "intel_gt.h"
+#include "intel_reset.h"
+
+/*
+ * While the engine is active, we send a periodic pulse along the entire
+ * to check on its health and to flush any idle-barriers. If that request
+ * is stuck, and we fail to preempt it, we declare the engine hung and
+ * issue a reset -- in the hope that restores progress.
+ */
+
+static long delay(void)
+{
+	const long t = msecs_to_jiffies(CONFIG_DRM_I915_HEARTBEAT_INTERVAL);
+
+	return round_jiffies_up_relative(t);
+}
 
 static void idle_pulse(struct intel_engine_cs *engine, struct i915_request *rq)
 {
@@ -18,6 +33,100 @@ static void idle_pulse(struct intel_engine_cs *engine, struct i915_request *rq)
 	i915_request_add_active_barriers(rq);
 }
 
+static void heartbeat(struct work_struct *wrk)
+{
+	struct i915_sched_attr attr = {
+		.priority = I915_USER_PRIORITY(I915_PRIORITY_MIN),
+	};
+	struct intel_engine_cs *engine =
+		container_of(wrk, typeof(*engine), heartbeat.work);
+	struct intel_context *ce = engine->kernel_context;
+	struct i915_request *rq;
+
+	if (!intel_engine_pm_get_if_awake(engine))
+		return;
+
+	rq = engine->last_heartbeat;
+	if (rq && i915_request_completed(rq)) {
+		i915_request_put(rq);
+		engine->last_heartbeat = NULL;
+	}
+
+	if (intel_gt_is_wedged(engine->gt))
+		goto out;
+
+	if (engine->last_heartbeat) {
+		if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) {
+			struct drm_printer p = drm_debug_printer(__func__);
+
+			intel_engine_dump(engine, &p,
+					  "%s heartbeat not ticking\n",
+					  engine->name);
+		}
+
+		if (engine->schedule &&
+		    rq->sched.attr.priority < I915_PRIORITY_BARRIER) {
+			attr.priority =
+				I915_USER_PRIORITY(I915_PRIORITY_HEARTBEAT);
+			if (rq->sched.attr.priority >= attr.priority)
+				attr.priority = I915_PRIORITY_BARRIER;
+
+			local_bh_disable();
+			engine->schedule(rq, &attr);
+			local_bh_enable();
+		} else {
+			intel_gt_handle_error(engine->gt, engine->mask,
+					      I915_ERROR_CAPTURE,
+					      "stopped heartbeat on %s",
+					      engine->name);
+		}
+		goto out;
+	}
+
+	if (engine->wakeref_serial == engine->serial)
+		goto out;
+
+	mutex_lock(&ce->timeline->mutex);
+
+	intel_context_enter(ce);
+	rq = __i915_request_create(ce, GFP_NOWAIT | __GFP_NOWARN);
+	intel_context_exit(ce);
+	if (IS_ERR(rq))
+		goto unlock;
+
+	idle_pulse(engine, rq);
+	if (i915_modparams.enable_hangcheck)
+		engine->last_heartbeat = i915_request_get(rq);
+
+	__i915_request_commit(rq);
+	__i915_request_queue(rq, &attr);
+
+unlock:
+	mutex_unlock(&ce->timeline->mutex);
+out:
+	schedule_delayed_work(&engine->heartbeat, delay());
+	intel_engine_pm_put(engine);
+}
+
+void intel_engine_unpark_heartbeat(struct intel_engine_cs *engine)
+{
+	if (!CONFIG_DRM_I915_HEARTBEAT_INTERVAL)
+		return;
+
+	schedule_delayed_work(&engine->heartbeat, delay());
+}
+
+void intel_engine_park_heartbeat(struct intel_engine_cs *engine)
+{
+	cancel_delayed_work(&engine->heartbeat);
+	i915_request_put(fetch_and_zero(&engine->last_heartbeat));
+}
+
+void intel_engine_init_heartbeat(struct intel_engine_cs *engine)
+{
+	INIT_DELAYED_WORK(&engine->heartbeat, heartbeat);
+}
+
 int intel_engine_pulse(struct intel_engine_cs *engine)
 {
 	struct i915_sched_attr attr = { .priority = I915_PRIORITY_BARRIER };
@@ -31,7 +140,8 @@ int intel_engine_pulse(struct intel_engine_cs *engine)
 	if (!intel_engine_pm_get_if_awake(engine))
 		return 0;
 
-	mutex_lock(&ce->timeline->mutex);
+	if (!mutex_trylock(&ce->timeline->mutex))
+		goto out_rpm;
 
 	intel_context_enter(ce);
 	rq = __i915_request_create(ce, GFP_NOWAIT | __GFP_NOWARN);
@@ -49,6 +159,7 @@ int intel_engine_pulse(struct intel_engine_cs *engine)
 
 out_unlock:
 	mutex_unlock(&ce->timeline->mutex);
+out_rpm:
 	intel_engine_pm_put(engine);
 	return err;
 }
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.h b/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.h
index b950451b5998..39391004554d 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.h
@@ -9,6 +9,11 @@
 
 struct intel_engine_cs;
 
+void intel_engine_init_heartbeat(struct intel_engine_cs *engine);
+
+void intel_engine_park_heartbeat(struct intel_engine_cs *engine);
+void intel_engine_unpark_heartbeat(struct intel_engine_cs *engine);
+
 int intel_engine_pulse(struct intel_engine_cs *engine);
 
 #endif /* INTEL_ENGINE_HEARTBEAT_H */
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pm.c b/drivers/gpu/drm/i915/gt/intel_engine_pm.c
index 12a2608a8889..5e74c3b86a21 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_pm.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_pm.c
@@ -7,6 +7,7 @@
 #include "i915_drv.h"
 
 #include "intel_engine.h"
+#include "intel_engine_heartbeat.h"
 #include "intel_engine_pm.h"
 #include "intel_engine_pool.h"
 #include "intel_gt.h"
@@ -33,7 +34,7 @@ static int __engine_unpark(struct intel_wakeref *wf)
 	if (engine->unpark)
 		engine->unpark(engine);
 
-	intel_engine_init_hangcheck(engine);
+	intel_engine_unpark_heartbeat(engine);
 	return 0;
 }
 
@@ -157,6 +158,7 @@ static int __engine_park(struct intel_wakeref *wf)
 
 	call_idle_barriers(engine); /* cleanup after wedging */
 
+	intel_engine_park_heartbeat(engine);
 	intel_engine_disarm_breadcrumbs(engine);
 	intel_engine_pool_park(&engine->pool);
 
@@ -187,6 +189,7 @@ void intel_engine_init__pm(struct intel_engine_cs *engine)
 	struct intel_runtime_pm *rpm = &engine->i915->runtime_pm;
 
 	intel_wakeref_init(&engine->wakeref, rpm, &wf_ops);
+	intel_engine_init_heartbeat(engine);
 }
 
 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h
index 15e02cb58a67..53c649f4c8e4 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h
@@ -15,6 +15,7 @@
 #include <linux/rbtree.h>
 #include <linux/timer.h>
 #include <linux/types.h>
+#include <linux/workqueue.h>
 
 #include "i915_gem.h"
 #include "i915_pmu.h"
@@ -76,14 +77,6 @@ struct intel_instdone {
 	u32 row[I915_MAX_SLICES][I915_MAX_SUBSLICES];
 };
 
-struct intel_engine_hangcheck {
-	u64 acthd;
-	u32 last_ring;
-	u32 last_head;
-	unsigned long action_timestamp;
-	struct intel_instdone instdone;
-};
-
 struct intel_ring {
 	struct kref ref;
 	struct i915_vma *vma;
@@ -323,6 +316,9 @@ struct intel_engine_cs {
 
 	intel_engine_mask_t saturated; /* submitting semaphores too late? */
 
+	struct delayed_work heartbeat;
+	struct i915_request *last_heartbeat;
+
 	unsigned long serial;
 
 	unsigned long wakeref_serial;
@@ -473,8 +469,6 @@ struct intel_engine_cs {
 	/* status_notifier: list of callbacks for context-switch changes */
 	struct atomic_notifier_head context_status_notifier;
 
-	struct intel_engine_hangcheck hangcheck;
-
 #define I915_ENGINE_NEEDS_CMD_PARSER BIT(0)
 #define I915_ENGINE_SUPPORTS_STATS   BIT(1)
 #define I915_ENGINE_HAS_PREEMPTION   BIT(2)
diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c
index c2afffb94474..20f3181c93bd 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt.c
@@ -18,7 +18,6 @@ void intel_gt_init_early(struct intel_gt *gt, struct drm_i915_private *i915)
 	INIT_LIST_HEAD(&gt->closed_vma);
 	spin_lock_init(&gt->closed_lock);
 
-	intel_gt_init_hangcheck(gt);
 	intel_gt_init_reset(gt);
 	intel_gt_pm_init_early(gt);
 	intel_uc_init_early(&gt->uc);
diff --git a/drivers/gpu/drm/i915/gt/intel_gt.h b/drivers/gpu/drm/i915/gt/intel_gt.h
index 4920cb351f10..a43903b1cc63 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt.h
@@ -39,8 +39,6 @@ void intel_gt_clear_error_registers(struct intel_gt *gt,
 void intel_gt_flush_ggtt_writes(struct intel_gt *gt);
 void intel_gt_chipset_flush(struct intel_gt *gt);
 
-void intel_gt_init_hangcheck(struct intel_gt *gt);
-
 int intel_gt_init_scratch(struct intel_gt *gt, unsigned int size);
 void intel_gt_fini_scratch(struct intel_gt *gt);
 
@@ -55,6 +53,4 @@ static inline bool intel_gt_is_wedged(struct intel_gt *gt)
 	return __intel_reset_failed(&gt->reset);
 }
 
-void intel_gt_queue_hangcheck(struct intel_gt *gt);
-
 #endif /* __INTEL_GT_H__ */
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm.c b/drivers/gpu/drm/i915/gt/intel_gt_pm.c
index 1363e069ec83..45fa0a58c387 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_pm.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt_pm.c
@@ -46,8 +46,6 @@ static int __gt_unpark(struct intel_wakeref *wf)
 
 	i915_pmu_gt_unparked(i915);
 
-	intel_gt_queue_hangcheck(gt);
-
 	pm_notify(i915, INTEL_GT_UNPARK);
 
 	return 0;
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_types.h b/drivers/gpu/drm/i915/gt/intel_gt_types.h
index dc295c196d11..0f799335a00a 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt_types.h
@@ -25,14 +25,6 @@ struct i915_ggtt;
 struct intel_engine_cs;
 struct intel_uncore;
 
-struct intel_hangcheck {
-	/* For hangcheck timer */
-#define DRM_I915_HANGCHECK_PERIOD 1500 /* in ms */
-#define DRM_I915_HANGCHECK_JIFFIES msecs_to_jiffies(DRM_I915_HANGCHECK_PERIOD)
-
-	struct delayed_work work;
-};
-
 struct intel_gt {
 	struct drm_i915_private *i915;
 	struct intel_uncore *uncore;
@@ -54,7 +46,6 @@ struct intel_gt {
 	struct list_head closed_vma;
 	spinlock_t closed_lock; /* guards the list of closed_vma */
 
-	struct intel_hangcheck hangcheck;
 	struct intel_reset reset;
 
 	/**
diff --git a/drivers/gpu/drm/i915/gt/intel_hangcheck.c b/drivers/gpu/drm/i915/gt/intel_hangcheck.c
deleted file mode 100644
index 40f62f780be5..000000000000
--- a/drivers/gpu/drm/i915/gt/intel_hangcheck.c
+++ /dev/null
@@ -1,361 +0,0 @@
-/*
- * Copyright © 2016 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- *
- */
-
-#include "i915_drv.h"
-#include "intel_engine.h"
-#include "intel_gt.h"
-#include "intel_reset.h"
-
-struct hangcheck {
-	u64 acthd;
-	u32 ring;
-	u32 head;
-	enum intel_engine_hangcheck_action action;
-	unsigned long action_timestamp;
-	int deadlock;
-	struct intel_instdone instdone;
-	bool wedged:1;
-	bool stalled:1;
-};
-
-static bool instdone_unchanged(u32 current_instdone, u32 *old_instdone)
-{
-	u32 tmp = current_instdone | *old_instdone;
-	bool unchanged;
-
-	unchanged = tmp == *old_instdone;
-	*old_instdone |= tmp;
-
-	return unchanged;
-}
-
-static bool subunits_stuck(struct intel_engine_cs *engine)
-{
-	struct drm_i915_private *dev_priv = engine->i915;
-	const struct sseu_dev_info *sseu = &RUNTIME_INFO(dev_priv)->sseu;
-	struct intel_instdone instdone;
-	struct intel_instdone *accu_instdone = &engine->hangcheck.instdone;
-	bool stuck;
-	int slice;
-	int subslice;
-
-	intel_engine_get_instdone(engine, &instdone);
-
-	/* There might be unstable subunit states even when
-	 * actual head is not moving. Filter out the unstable ones by
-	 * accumulating the undone -> done transitions and only
-	 * consider those as progress.
-	 */
-	stuck = instdone_unchanged(instdone.instdone,
-				   &accu_instdone->instdone);
-	stuck &= instdone_unchanged(instdone.slice_common,
-				    &accu_instdone->slice_common);
-
-	for_each_instdone_slice_subslice(dev_priv, sseu, slice, subslice) {
-		stuck &= instdone_unchanged(instdone.sampler[slice][subslice],
-					    &accu_instdone->sampler[slice][subslice]);
-		stuck &= instdone_unchanged(instdone.row[slice][subslice],
-					    &accu_instdone->row[slice][subslice]);
-	}
-
-	return stuck;
-}
-
-static enum intel_engine_hangcheck_action
-head_stuck(struct intel_engine_cs *engine, u64 acthd)
-{
-	if (acthd != engine->hangcheck.acthd) {
-
-		/* Clear subunit states on head movement */
-		memset(&engine->hangcheck.instdone, 0,
-		       sizeof(engine->hangcheck.instdone));
-
-		return ENGINE_ACTIVE_HEAD;
-	}
-
-	if (!subunits_stuck(engine))
-		return ENGINE_ACTIVE_SUBUNITS;
-
-	return ENGINE_DEAD;
-}
-
-static enum intel_engine_hangcheck_action
-engine_stuck(struct intel_engine_cs *engine, u64 acthd)
-{
-	enum intel_engine_hangcheck_action ha;
-	u32 tmp;
-
-	ha = head_stuck(engine, acthd);
-	if (ha != ENGINE_DEAD)
-		return ha;
-
-	if (IS_GEN(engine->i915, 2))
-		return ENGINE_DEAD;
-
-	/* Is the chip hanging on a WAIT_FOR_EVENT?
-	 * If so we can simply poke the RB_WAIT bit
-	 * and break the hang. This should work on
-	 * all but the second generation chipsets.
-	 */
-	tmp = ENGINE_READ(engine, RING_CTL);
-	if (tmp & RING_WAIT) {
-		intel_gt_handle_error(engine->gt, engine->mask, 0,
-				      "stuck wait on %s", engine->name);
-		ENGINE_WRITE(engine, RING_CTL, tmp);
-		return ENGINE_WAIT_KICK;
-	}
-
-	return ENGINE_DEAD;
-}
-
-static void hangcheck_load_sample(struct intel_engine_cs *engine,
-				  struct hangcheck *hc)
-{
-	hc->acthd = intel_engine_get_active_head(engine);
-	hc->ring = ENGINE_READ(engine, RING_START);
-	hc->head = ENGINE_READ(engine, RING_HEAD);
-}
-
-static void hangcheck_store_sample(struct intel_engine_cs *engine,
-				   const struct hangcheck *hc)
-{
-	engine->hangcheck.acthd = hc->acthd;
-	engine->hangcheck.last_ring = hc->ring;
-	engine->hangcheck.last_head = hc->head;
-}
-
-static enum intel_engine_hangcheck_action
-hangcheck_get_action(struct intel_engine_cs *engine,
-		     const struct hangcheck *hc)
-{
-	if (intel_engine_is_idle(engine))
-		return ENGINE_IDLE;
-
-	if (engine->hangcheck.last_ring != hc->ring)
-		return ENGINE_ACTIVE_SEQNO;
-
-	if (engine->hangcheck.last_head != hc->head)
-		return ENGINE_ACTIVE_SEQNO;
-
-	return engine_stuck(engine, hc->acthd);
-}
-
-static void hangcheck_accumulate_sample(struct intel_engine_cs *engine,
-					struct hangcheck *hc)
-{
-	unsigned long timeout = I915_ENGINE_DEAD_TIMEOUT;
-
-	hc->action = hangcheck_get_action(engine, hc);
-
-	/* We always increment the progress
-	 * if the engine is busy and still processing
-	 * the same request, so that no single request
-	 * can run indefinitely (such as a chain of
-	 * batches). The only time we do not increment
-	 * the hangcheck score on this ring, if this
-	 * engine is in a legitimate wait for another
-	 * engine. In that case the waiting engine is a
-	 * victim and we want to be sure we catch the
-	 * right culprit. Then every time we do kick
-	 * the ring, make it as a progress as the seqno
-	 * advancement might ensure and if not, it
-	 * will catch the hanging engine.
-	 */
-
-	switch (hc->action) {
-	case ENGINE_IDLE:
-	case ENGINE_ACTIVE_SEQNO:
-		/* Clear head and subunit states on seqno movement */
-		hc->acthd = 0;
-
-		memset(&engine->hangcheck.instdone, 0,
-		       sizeof(engine->hangcheck.instdone));
-
-		/* Intentional fall through */
-	case ENGINE_WAIT_KICK:
-	case ENGINE_WAIT:
-		engine->hangcheck.action_timestamp = jiffies;
-		break;
-
-	case ENGINE_ACTIVE_HEAD:
-	case ENGINE_ACTIVE_SUBUNITS:
-		/*
-		 * Seqno stuck with still active engine gets leeway,
-		 * in hopes that it is just a long shader.
-		 */
-		timeout = I915_SEQNO_DEAD_TIMEOUT;
-		break;
-
-	case ENGINE_DEAD:
-		break;
-
-	default:
-		MISSING_CASE(hc->action);
-	}
-
-	hc->stalled = time_after(jiffies,
-				 engine->hangcheck.action_timestamp + timeout);
-	hc->wedged = time_after(jiffies,
-				 engine->hangcheck.action_timestamp +
-				 I915_ENGINE_WEDGED_TIMEOUT);
-}
-
-static void hangcheck_declare_hang(struct intel_gt *gt,
-				   intel_engine_mask_t hung,
-				   intel_engine_mask_t stuck)
-{
-	struct intel_engine_cs *engine;
-	intel_engine_mask_t tmp;
-	char msg[80];
-	int len;
-
-	/* If some rings hung but others were still busy, only
-	 * blame the hanging rings in the synopsis.
-	 */
-	if (stuck != hung)
-		hung &= ~stuck;
-	len = scnprintf(msg, sizeof(msg),
-			"%s on ", stuck == hung ? "no progress" : "hang");
-	for_each_engine_masked(engine, gt->i915, hung, tmp)
-		len += scnprintf(msg + len, sizeof(msg) - len,
-				 "%s, ", engine->name);
-	msg[len-2] = '\0';
-
-	return intel_gt_handle_error(gt, hung, I915_ERROR_CAPTURE, "%s", msg);
-}
-
-/*
- * This is called when the chip hasn't reported back with completed
- * batchbuffers in a long time. We keep track per ring seqno progress and
- * if there are no progress, hangcheck score for that ring is increased.
- * Further, acthd is inspected to see if the ring is stuck. On stuck case
- * we kick the ring. If we see no progress on three subsequent calls
- * we assume chip is wedged and try to fix it by resetting the chip.
- */
-static void hangcheck_elapsed(struct work_struct *work)
-{
-	struct intel_gt *gt =
-		container_of(work, typeof(*gt), hangcheck.work.work);
-	intel_engine_mask_t hung = 0, stuck = 0, wedged = 0;
-	struct intel_engine_cs *engine;
-	enum intel_engine_id id;
-	intel_wakeref_t wakeref;
-
-	if (!i915_modparams.enable_hangcheck)
-		return;
-
-	if (!READ_ONCE(gt->awake))
-		return;
-
-	if (intel_gt_is_wedged(gt))
-		return;
-
-	wakeref = intel_runtime_pm_get_if_in_use(&gt->i915->runtime_pm);
-	if (!wakeref)
-		return;
-
-	/* As enabling the GPU requires fairly extensive mmio access,
-	 * periodically arm the mmio checker to see if we are triggering
-	 * any invalid access.
-	 */
-	intel_uncore_arm_unclaimed_mmio_detection(gt->uncore);
-
-	for_each_engine(engine, gt->i915, id) {
-		struct hangcheck hc;
-
-		intel_engine_signal_breadcrumbs(engine);
-
-		hangcheck_load_sample(engine, &hc);
-		hangcheck_accumulate_sample(engine, &hc);
-		hangcheck_store_sample(engine, &hc);
-
-		if (hc.stalled) {
-			hung |= engine->mask;
-			if (hc.action != ENGINE_DEAD)
-				stuck |= engine->mask;
-		}
-
-		if (hc.wedged)
-			wedged |= engine->mask;
-	}
-
-	if (GEM_SHOW_DEBUG() && (hung | stuck)) {
-		struct drm_printer p = drm_debug_printer("hangcheck");
-
-		for_each_engine(engine, gt->i915, id) {
-			if (intel_engine_is_idle(engine))
-				continue;
-
-			intel_engine_dump(engine, &p, "%s\n", engine->name);
-		}
-	}
-
-	if (wedged) {
-		dev_err(gt->i915->drm.dev,
-			"GPU recovery timed out,"
-			" cancelling all in-flight rendering.\n");
-		GEM_TRACE_DUMP();
-		intel_gt_set_wedged(gt);
-	}
-
-	if (hung)
-		hangcheck_declare_hang(gt, hung, stuck);
-
-	intel_runtime_pm_put(&gt->i915->runtime_pm, wakeref);
-
-	/* Reset timer in case GPU hangs without another request being added */
-	intel_gt_queue_hangcheck(gt);
-}
-
-void intel_gt_queue_hangcheck(struct intel_gt *gt)
-{
-	unsigned long delay;
-
-	if (unlikely(!i915_modparams.enable_hangcheck))
-		return;
-
-	/*
-	 * Don't continually defer the hangcheck so that it is always run at
-	 * least once after work has been scheduled on any ring. Otherwise,
-	 * we will ignore a hung ring if a second ring is kept busy.
-	 */
-
-	delay = round_jiffies_up_relative(DRM_I915_HANGCHECK_JIFFIES);
-	queue_delayed_work(system_long_wq, &gt->hangcheck.work, delay);
-}
-
-void intel_engine_init_hangcheck(struct intel_engine_cs *engine)
-{
-	memset(&engine->hangcheck, 0, sizeof(engine->hangcheck));
-	engine->hangcheck.action_timestamp = jiffies;
-}
-
-void intel_gt_init_hangcheck(struct intel_gt *gt)
-{
-	INIT_DELAYED_WORK(&gt->hangcheck.work, hangcheck_elapsed);
-}
-
-#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
-#include "selftest_hangcheck.c"
-#endif
diff --git a/drivers/gpu/drm/i915/gt/intel_reset.c b/drivers/gpu/drm/i915/gt/intel_reset.c
index 4825c82aefee..febab368d8cf 100644
--- a/drivers/gpu/drm/i915/gt/intel_reset.c
+++ b/drivers/gpu/drm/i915/gt/intel_reset.c
@@ -983,8 +983,6 @@ void intel_gt_reset(struct intel_gt *gt,
 	if (ret)
 		goto taint;
 
-	intel_gt_queue_hangcheck(gt);
-
 finish:
 	reset_finish(gt, awake);
 unlock:
@@ -1310,4 +1308,5 @@ void __intel_fini_wedge(struct intel_wedge_me *w)
 
 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
 #include "selftest_reset.c"
+#include "selftest_hangcheck.c"
 #endif
diff --git a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c
index 35ab703dac34..959dde864f4a 100644
--- a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c
+++ b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c
@@ -1728,7 +1728,6 @@ int intel_hangcheck_live_selftests(struct drm_i915_private *i915)
 	};
 	struct intel_gt *gt = &i915->gt;
 	intel_wakeref_t wakeref;
-	bool saved_hangcheck;
 	int err;
 
 	if (!intel_has_gpu_reset(gt->i915))
@@ -1738,12 +1737,9 @@ int intel_hangcheck_live_selftests(struct drm_i915_private *i915)
 		return -EIO; /* we're long past hope of a successful reset */
 
 	wakeref = intel_runtime_pm_get(&gt->i915->runtime_pm);
-	saved_hangcheck = fetch_and_zero(&i915_modparams.enable_hangcheck);
-	drain_delayed_work(&gt->hangcheck.work); /* flush param */
 
 	err = intel_gt_live_subtests(tests, gt);
 
-	i915_modparams.enable_hangcheck = saved_hangcheck;
 	intel_runtime_pm_put(&gt->i915->runtime_pm, wakeref);
 
 	return err;
diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index e6362245347b..f726c669ffff 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -986,92 +986,6 @@ static int i915_frequency_info(struct seq_file *m, void *unused)
 	return ret;
 }
 
-static void i915_instdone_info(struct drm_i915_private *dev_priv,
-			       struct seq_file *m,
-			       struct intel_instdone *instdone)
-{
-	const struct sseu_dev_info *sseu = &RUNTIME_INFO(dev_priv)->sseu;
-	int slice;
-	int subslice;
-
-	seq_printf(m, "\t\tINSTDONE: 0x%08x\n",
-		   instdone->instdone);
-
-	if (INTEL_GEN(dev_priv) <= 3)
-		return;
-
-	seq_printf(m, "\t\tSC_INSTDONE: 0x%08x\n",
-		   instdone->slice_common);
-
-	if (INTEL_GEN(dev_priv) <= 6)
-		return;
-
-	for_each_instdone_slice_subslice(dev_priv, sseu, slice, subslice)
-		seq_printf(m, "\t\tSAMPLER_INSTDONE[%d][%d]: 0x%08x\n",
-			   slice, subslice, instdone->sampler[slice][subslice]);
-
-	for_each_instdone_slice_subslice(dev_priv, sseu, slice, subslice)
-		seq_printf(m, "\t\tROW_INSTDONE[%d][%d]: 0x%08x\n",
-			   slice, subslice, instdone->row[slice][subslice]);
-}
-
-static int i915_hangcheck_info(struct seq_file *m, void *unused)
-{
-	struct drm_i915_private *i915 = node_to_i915(m->private);
-	struct intel_gt *gt = &i915->gt;
-	struct intel_engine_cs *engine;
-	intel_wakeref_t wakeref;
-	enum intel_engine_id id;
-
-	seq_printf(m, "Reset flags: %lx\n", gt->reset.flags);
-	if (test_bit(I915_WEDGED, &gt->reset.flags))
-		seq_puts(m, "\tWedged\n");
-	if (test_bit(I915_RESET_BACKOFF, &gt->reset.flags))
-		seq_puts(m, "\tDevice (global) reset in progress\n");
-
-	if (!i915_modparams.enable_hangcheck) {
-		seq_puts(m, "Hangcheck disabled\n");
-		return 0;
-	}
-
-	if (timer_pending(&gt->hangcheck.work.timer))
-		seq_printf(m, "Hangcheck active, timer fires in %dms\n",
-			   jiffies_to_msecs(gt->hangcheck.work.timer.expires -
-					    jiffies));
-	else if (delayed_work_pending(&gt->hangcheck.work))
-		seq_puts(m, "Hangcheck active, work pending\n");
-	else
-		seq_puts(m, "Hangcheck inactive\n");
-
-	seq_printf(m, "GT active? %s\n", yesno(gt->awake));
-
-	with_intel_runtime_pm(&i915->runtime_pm, wakeref) {
-		for_each_engine(engine, i915, id) {
-			struct intel_instdone instdone;
-
-			seq_printf(m, "%s: %d ms ago\n",
-				   engine->name,
-				   jiffies_to_msecs(jiffies -
-						    engine->hangcheck.action_timestamp));
-
-			seq_printf(m, "\tACTHD = 0x%08llx [current 0x%08llx]\n",
-				   (long long)engine->hangcheck.acthd,
-				   intel_engine_get_active_head(engine));
-
-			intel_engine_get_instdone(engine, &instdone);
-
-			seq_puts(m, "\tinstdone read =\n");
-			i915_instdone_info(i915, m, &instdone);
-
-			seq_puts(m, "\tinstdone accu =\n");
-			i915_instdone_info(i915, m,
-					   &engine->hangcheck.instdone);
-		}
-	}
-
-	return 0;
-}
-
 static int ironlake_drpc_info(struct seq_file *m)
 {
 	struct drm_i915_private *i915 = node_to_i915(m->private);
@@ -4291,7 +4205,6 @@ static const struct drm_info_list i915_debugfs_list[] = {
 	{"i915_guc_stage_pool", i915_guc_stage_pool, 0},
 	{"i915_huc_load_status", i915_huc_load_status_info, 0},
 	{"i915_frequency_info", i915_frequency_info, 0},
-	{"i915_hangcheck_info", i915_hangcheck_info, 0},
 	{"i915_drpc_info", i915_drpc_info, 0},
 	{"i915_ring_freq_table", i915_ring_freq_table, 0},
 	{"i915_frontbuffer_tracking", i915_frontbuffer_tracking, 0},
diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index 1c4576a4a5e9..0bc626db4467 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -1618,10 +1618,7 @@ void i915_driver_remove(struct drm_i915_private *i915)
 
 	intel_csr_ucode_fini(i915);
 
-	/* Free error state after interrupts are fully disabled. */
-	cancel_delayed_work_sync(&i915->gt.hangcheck.work);
 	i915_reset_error_state(i915);
-
 	i915_gem_driver_remove(i915);
 
 	intel_power_domains_driver_remove(i915);
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 85767b7cd00b..36eb674f2d66 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2214,7 +2214,6 @@ extern const struct dev_pm_ops i915_pm_ops;
 int i915_driver_probe(struct pci_dev *pdev, const struct pci_device_id *ent);
 void i915_driver_remove(struct drm_i915_private *i915);
 
-void intel_engine_init_hangcheck(struct intel_engine_cs *engine);
 int vlv_force_gfx_clock(struct drm_i915_private *dev_priv, bool on);
 
 static inline bool intel_gvt_active(struct drm_i915_private *dev_priv)
diff --git a/drivers/gpu/drm/i915/i915_getparam.c b/drivers/gpu/drm/i915/i915_getparam.c
index 5d9101376a3d..e6c351080593 100644
--- a/drivers/gpu/drm/i915/i915_getparam.c
+++ b/drivers/gpu/drm/i915/i915_getparam.c
@@ -78,8 +78,7 @@ int i915_getparam_ioctl(struct drm_device *dev, void *data,
 			return -ENODEV;
 		break;
 	case I915_PARAM_HAS_GPU_RESET:
-		value = i915_modparams.enable_hangcheck &&
-			intel_has_gpu_reset(i915);
+		value = intel_has_gpu_reset(i915);
 		if (value && intel_has_reset_engine(i915))
 			value = 2;
 		break;
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
index 2dd63a261b00..fcae93a9767e 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -534,10 +534,6 @@ static void error_print_engine(struct drm_i915_error_state_buf *m,
 	}
 	err_printf(m, "  ring->head: 0x%08x\n", ee->cpu_ring_head);
 	err_printf(m, "  ring->tail: 0x%08x\n", ee->cpu_ring_tail);
-	err_printf(m, "  hangcheck timestamp: %dms (%lu%s)\n",
-		   jiffies_to_msecs(ee->hangcheck_timestamp - epoch),
-		   ee->hangcheck_timestamp,
-		   ee->hangcheck_timestamp == epoch ? "; epoch" : "");
 	err_printf(m, "  engine reset count: %u\n", ee->reset_count);
 
 	for (n = 0; n < ee->num_ports; n++) {
@@ -676,11 +672,8 @@ static void __err_print_to_sgl(struct drm_i915_error_state_buf *m,
 	ts = ktime_to_timespec64(error->uptime);
 	err_printf(m, "Uptime: %lld s %ld us\n",
 		   (s64)ts.tv_sec, ts.tv_nsec / NSEC_PER_USEC);
-	err_printf(m, "Epoch: %lu jiffies (%u HZ)\n", error->epoch, HZ);
-	err_printf(m, "Capture: %lu jiffies; %d ms ago, %d ms after epoch\n",
-		   error->capture,
-		   jiffies_to_msecs(jiffies - error->capture),
-		   jiffies_to_msecs(error->capture - error->epoch));
+	err_printf(m, "Capture: %lu jiffies; %d ms ago\n",
+		   error->capture, jiffies_to_msecs(jiffies - error->capture));
 
 	for (ee = error->engine; ee; ee = ee->next)
 		err_printf(m, "Active process (on ring %s): %s [%d]\n",
@@ -736,7 +729,7 @@ static void __err_print_to_sgl(struct drm_i915_error_state_buf *m,
 		err_printf(m, "ERR_INT: 0x%08x\n", error->err_int);
 
 	for (ee = error->engine; ee; ee = ee->next)
-		error_print_engine(m, ee, error->epoch);
+		error_print_engine(m, ee, error->capture);
 
 	for (ee = error->engine; ee; ee = ee->next) {
 		const struct drm_i915_error_object *obj;
@@ -764,7 +757,7 @@ static void __err_print_to_sgl(struct drm_i915_error_state_buf *m,
 			for (j = 0; j < ee->num_requests; j++)
 				error_print_request(m, " ",
 						    &ee->requests[j],
-						    error->epoch);
+						    error->capture);
 		}
 
 		print_error_obj(m, ee->engine, "ringbuffer", ee->ringbuffer);
@@ -1137,8 +1130,6 @@ static void error_record_engine_registers(struct i915_gpu_state *error,
 	}
 
 	ee->idle = intel_engine_is_idle(engine);
-	if (!ee->idle)
-		ee->hangcheck_timestamp = engine->hangcheck.action_timestamp;
 	ee->reset_count = i915_reset_engine_count(&dev_priv->gpu_error,
 						  engine);
 
@@ -1648,20 +1639,6 @@ static void capture_params(struct i915_gpu_state *error)
 	i915_params_copy(&error->params, &i915_modparams);
 }
 
-static unsigned long capture_find_epoch(const struct i915_gpu_state *error)
-{
-	const struct drm_i915_error_engine *ee;
-	unsigned long epoch = error->capture;
-
-	for (ee = error->engine; ee; ee = ee->next) {
-		if (ee->hangcheck_timestamp &&
-		    time_before(ee->hangcheck_timestamp, epoch))
-			epoch = ee->hangcheck_timestamp;
-	}
-
-	return epoch;
-}
-
 static void capture_finish(struct i915_gpu_state *error)
 {
 	struct i915_ggtt *ggtt = &error->i915->ggtt;
@@ -1713,8 +1690,6 @@ i915_capture_gpu_state(struct drm_i915_private *i915)
 	error->overlay = intel_overlay_capture_error_state(i915);
 	error->display = intel_display_capture_error_state(i915);
 
-	error->epoch = capture_find_epoch(error);
-
 	capture_finish(error);
 	compress_fini(&compress);
 
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.h b/drivers/gpu/drm/i915/i915_gpu_error.h
index df9f57766626..0ed061ee3378 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.h
+++ b/drivers/gpu/drm/i915/i915_gpu_error.h
@@ -34,7 +34,6 @@ struct i915_gpu_state {
 	ktime_t boottime;
 	ktime_t uptime;
 	unsigned long capture;
-	unsigned long epoch;
 
 	struct drm_i915_private *i915;
 
@@ -85,7 +84,6 @@ struct i915_gpu_state {
 
 		/* Software tracked state */
 		bool idle;
-		unsigned long hangcheck_timestamp;
 		int num_requests;
 		u32 reset_count;
 
diff --git a/drivers/gpu/drm/i915/i915_params.c b/drivers/gpu/drm/i915/i915_params.c
index 296452f9efe4..f48b2490ea43 100644
--- a/drivers/gpu/drm/i915/i915_params.c
+++ b/drivers/gpu/drm/i915/i915_params.c
@@ -78,9 +78,9 @@ i915_param_named(error_capture, bool, 0600,
 #endif
 
 i915_param_named_unsafe(enable_hangcheck, bool, 0600,
-	"Periodically check GPU activity for detecting hangs. "
-	"WARNING: Disabling this can cause system wide hangs. "
-	"(default: true)");
+       "Periodically check GPU activity for detecting hangs. "
+       "WARNING: Disabling this can cause system wide hangs. "
+       "(default: true)");
 
 i915_param_named_unsafe(enable_psr, int, 0600,
 	"Enable PSR "
diff --git a/drivers/gpu/drm/i915/i915_priolist_types.h b/drivers/gpu/drm/i915/i915_priolist_types.h
index ae8bb3cb627e..732aad148881 100644
--- a/drivers/gpu/drm/i915/i915_priolist_types.h
+++ b/drivers/gpu/drm/i915/i915_priolist_types.h
@@ -16,6 +16,12 @@ enum {
 	I915_PRIORITY_MIN = I915_CONTEXT_MIN_USER_PRIORITY - 1,
 	I915_PRIORITY_NORMAL = I915_CONTEXT_DEFAULT_PRIORITY,
 	I915_PRIORITY_MAX = I915_CONTEXT_MAX_USER_PRIORITY + 1,
+
+	/* A preemptive pulse used to monitor the health of each engine */
+	I915_PRIORITY_HEARTBEAT,
+
+	/* Interactive workload, scheduled for immediate pageflipping */
+	I915_PRIORITY_DISPLAY,
 };
 
 #define I915_USER_PRIORITY_SHIFT 2
-- 
2.23.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 43+ messages in thread

* ✗ Fi.CI.CHECKPATCH: warning for series starting with [01/28] drm/i915/selftests: Add the usual batch vma managements to st_workarounds
  2019-08-26  7:21 [PATCH 01/28] drm/i915/selftests: Add the usual batch vma managements to st_workarounds Chris Wilson
                   ` (26 preceding siblings ...)
  2019-08-26  7:21 ` [PATCH 28/28] drm/i915: Replace hangcheck by heartbeats Chris Wilson
@ 2019-08-26  7:34 ` Patchwork
  2019-08-26  7:57 ` ✗ Fi.CI.BAT: failure " Patchwork
  2019-08-27  9:19 ` [PATCH 01/28] " Matthew Auld
  29 siblings, 0 replies; 43+ messages in thread
From: Patchwork @ 2019-08-26  7:34 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

== Series Details ==

Series: series starting with [01/28] drm/i915/selftests: Add the usual batch vma managements to st_workarounds
URL   : https://patchwork.freedesktop.org/series/65785/
State : warning

== Summary ==

$ dim checkpatch origin/drm-tip
c46feb50a4b3 drm/i915/selftests: Add the usual batch vma managements to st_workarounds
d8a7398e45ac drm/i915/selftests: Remove accidental serialization between gpu_fill
6b9f45a2bc05 drm/i915/selftests: Try to recycle context allocations
d9c3cdfc9443 drm/i915/gtt: Downgrade Baytrail back to aliasing-ppgtt
9c4446789f7a drm/i915/gtt: Downgrade Cherryview back to aliasing-ppgtt
9d5ff6ed7063 drm/i915: Protect our local workers against I915_FENCE_TIMEOUT
b06fa86d555e drm/i915: Force PD reload on any PD update
6e00732fef73 drm/i915: Replace obj->pin_global with obj->frontbuffer
-:242: WARNING:PREFER_SEQ_PUTS: Prefer seq_puts to seq_printf
#242: FILE: drivers/gpu/drm/i915/i915_debugfs.c:219:
+		seq_printf(m, " (fb)");

total: 0 errors, 1 warnings, 0 checks, 171 lines checked
c4b69b94331d dma-fence: Serialise signal enabling (dma_fence_enable_sw_signaling)
-:14: ERROR:GIT_COMMIT_ID: Please use git commit description style 'commit <12+ chars of sha1> ("<title line>")' - ie: 'commit 0fc89b6802ba ("dma-fence: Simply wrap dma_fence_signal_locked with dma_fence_signal")'
#14: 
See also 0fc89b6802ba ("dma-fence: Simply wrap dma_fence_signal_locked

total: 1 errors, 0 warnings, 0 checks, 24 lines checked
9b66ce0ad481 drm/mm: Pack allocated/scanned boolean into a bitfield
f6fd81297067 drm/i915: Make shrink/unshrink be atomic
2288b16c1bb7 drm/i915: Only track bound elements of the GTT
7dbc1e8e8b38 drm/i915: Make i915_vma.flags atomic_t for mutex reduction
baddd23cde7f drm/i915: Mark up address spaces that may need to allocate
04fb38398216 drm/i915: Pull i915_vma_pin under the vm->mutex
067dda6f4d10 drm/i915: Push the i915_active.retire into a worker
d2aff1d0ef1a drm/i915: Coordinate i915_active with its own mutex
-:1299: CHECK:UNCOMMENTED_DEFINITION: struct mutex definition without comment
#1299: FILE: drivers/gpu/drm/i915/i915_active_types.h:49:
+	struct mutex mutex;

total: 0 errors, 0 warnings, 1 checks, 1448 lines checked
aba79af808bb drm/i915: Move idle barrier cleanup into engine-pm
31ead25c61da drm/i915: Drop struct_mutex from around i915_retire_requests()
459bf1b2292e drm/i915: Merge wait_for_timelines with retire_request
b04562bce34d drm/i915: Serialise the fill BLT with the vma pinning
bafb3c675329 drm/i915/execlists: Always request completion before marking an error
086dedbef59f drm/i915: Only enqueue already completed requests
897e5de8863a drm/i915/execlists: Force preemption
3f19a88b00ea drm/i915: Mark up "sentinel" requests
4871b0180b24 drm/i915/execlists: Cancel banned contexts on schedule-out
bf80ea3a9801 drm/i915: Cancel non-persistent contexts on close
-:271: WARNING:FILE_PATH_CHANGES: added, moved or deleted file(s), does MAINTAINERS need updating?
#271: 
new file mode 100644

-:276: WARNING:SPDX_LICENSE_TAG: Missing or malformed SPDX-License-Identifier tag in line 1
#276: FILE: drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c:1:
+/*

-:277: WARNING:SPDX_LICENSE_TAG: Misplaced SPDX-License-Identifier tag - use line 1 instead
#277: FILE: drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c:2:
+ * SPDX-License-Identifier: MIT

-:336: WARNING:SPDX_LICENSE_TAG: Missing or malformed SPDX-License-Identifier tag in line 1
#336: FILE: drivers/gpu/drm/i915/gt/intel_engine_heartbeat.h:1:
+/*

-:337: WARNING:SPDX_LICENSE_TAG: Misplaced SPDX-License-Identifier tag - use line 1 instead
#337: FILE: drivers/gpu/drm/i915/gt/intel_engine_heartbeat.h:2:
+ * SPDX-License-Identifier: MIT

total: 0 errors, 5 warnings, 0 checks, 312 lines checked
9131579e8ef2 drm/i915: Replace hangcheck by heartbeats
-:212: WARNING:EMBEDDED_FUNCTION_NAME: Prefer using '"%s...", __func__' to using 'heartbeat', this function's name, in a string
#212: FILE: drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c:63:
+					  "%s heartbeat not ticking\n",

-:229: WARNING:EMBEDDED_FUNCTION_NAME: Prefer using '"%s...", __func__' to using 'heartbeat', this function's name, in a string
#229: FILE: drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c:80:
+					      "stopped heartbeat on %s",

-:472: WARNING:FILE_PATH_CHANGES: added, moved or deleted file(s), does MAINTAINERS need updating?
#472: 
deleted file mode 100644

-:1145: WARNING:LEADING_SPACE: please, no spaces at the start of a line
#1145: FILE: drivers/gpu/drm/i915/i915_params.c:81:
+       "Periodically check GPU activity for detecting hangs. "$

-:1146: WARNING:LEADING_SPACE: please, no spaces at the start of a line
#1146: FILE: drivers/gpu/drm/i915/i915_params.c:82:
+       "WARNING: Disabling this can cause system wide hangs. "$

-:1147: WARNING:LEADING_SPACE: please, no spaces at the start of a line
#1147: FILE: drivers/gpu/drm/i915/i915_params.c:83:
+       "(default: true)");$

total: 0 errors, 6 warnings, 0 checks, 638 lines checked

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 43+ messages in thread

* ✗ Fi.CI.BAT: failure for series starting with [01/28] drm/i915/selftests: Add the usual batch vma managements to st_workarounds
  2019-08-26  7:21 [PATCH 01/28] drm/i915/selftests: Add the usual batch vma managements to st_workarounds Chris Wilson
                   ` (27 preceding siblings ...)
  2019-08-26  7:34 ` ✗ Fi.CI.CHECKPATCH: warning for series starting with [01/28] drm/i915/selftests: Add the usual batch vma managements to st_workarounds Patchwork
@ 2019-08-26  7:57 ` Patchwork
  2019-08-26  9:35   ` Chris Wilson
  2019-08-26 13:33   ` Chris Wilson
  2019-08-27  9:19 ` [PATCH 01/28] " Matthew Auld
  29 siblings, 2 replies; 43+ messages in thread
From: Patchwork @ 2019-08-26  7:57 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

== Series Details ==

Series: series starting with [01/28] drm/i915/selftests: Add the usual batch vma managements to st_workarounds
URL   : https://patchwork.freedesktop.org/series/65785/
State : failure

== Summary ==

CI Bug Log - changes from CI_DRM_6783 -> Patchwork_14187
====================================================

Summary
-------

  **FAILURE**

  Serious unknown changes coming with Patchwork_14187 absolutely need to be
  verified manually.
  
  If you think the reported changes have nothing to do with the changes
  introduced in Patchwork_14187, please notify your bug team to allow them
  to document this new failure mode, which will reduce false positives in CI.

  External URL: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_14187/

Possible new issues
-------------------

  Here are the unknown changes that may have been introduced in Patchwork_14187:

### IGT changes ###

#### Possible regressions ####

  * igt@i915_selftest@live_coherency:
    - fi-bwr-2160:        [PASS][1] -> [DMESG-FAIL][2] +1 similar issue
   [1]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6783/fi-bwr-2160/igt@i915_selftest@live_coherency.html
   [2]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_14187/fi-bwr-2160/igt@i915_selftest@live_coherency.html

  * igt@i915_selftest@live_gem_contexts:
    - fi-ivb-3770:        [PASS][3] -> [DMESG-FAIL][4]
   [3]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6783/fi-ivb-3770/igt@i915_selftest@live_gem_contexts.html
   [4]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_14187/fi-ivb-3770/igt@i915_selftest@live_gem_contexts.html

  
#### Suppressed ####

  The following results come from untrusted machines, tests, or statuses.
  They do not affect the overall result.

  * {igt@i915_selftest@live_gt_timelines}:
    - fi-bwr-2160:        [PASS][5] -> [DMESG-FAIL][6]
   [5]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6783/fi-bwr-2160/igt@i915_selftest@live_gt_timelines.html
   [6]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_14187/fi-bwr-2160/igt@i915_selftest@live_gt_timelines.html

  
Known issues
------------

  Here are the changes found in Patchwork_14187 that come from known issues:

### IGT changes ###

#### Issues hit ####

  * igt@gem_exec_suspend@basic-s3:
    - fi-blb-e6850:       [PASS][7] -> [INCOMPLETE][8] ([fdo#107718])
   [7]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6783/fi-blb-e6850/igt@gem_exec_suspend@basic-s3.html
   [8]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_14187/fi-blb-e6850/igt@gem_exec_suspend@basic-s3.html

  * igt@i915_selftest@live_execlists:
    - fi-skl-gvtdvm:      [PASS][9] -> [DMESG-FAIL][10] ([fdo#111108])
   [9]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6783/fi-skl-gvtdvm/igt@i915_selftest@live_execlists.html
   [10]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_14187/fi-skl-gvtdvm/igt@i915_selftest@live_execlists.html

  * igt@kms_frontbuffer_tracking@basic:
    - fi-bsw-n3050:       [PASS][11] -> [FAIL][12] ([fdo#103167])
   [11]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6783/fi-bsw-n3050/igt@kms_frontbuffer_tracking@basic.html
   [12]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_14187/fi-bsw-n3050/igt@kms_frontbuffer_tracking@basic.html

  * igt@prime_vgem@basic-fence-read:
    - fi-icl-u3:          [PASS][13] -> [DMESG-WARN][14] ([fdo#107724]) +1 similar issue
   [13]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6783/fi-icl-u3/igt@prime_vgem@basic-fence-read.html
   [14]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_14187/fi-icl-u3/igt@prime_vgem@basic-fence-read.html

  
#### Possible fixes ####

  * igt@i915_module_load@reload-no-display:
    - fi-icl-u3:          [DMESG-WARN][15] ([fdo#107724]) -> [PASS][16]
   [15]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6783/fi-icl-u3/igt@i915_module_load@reload-no-display.html
   [16]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_14187/fi-icl-u3/igt@i915_module_load@reload-no-display.html

  
#### Warnings ####

  * igt@kms_chamelium@hdmi-hpd-fast:
    - fi-kbl-7500u:       [FAIL][17] ([fdo#111407]) -> [FAIL][18] ([fdo#111096])
   [17]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6783/fi-kbl-7500u/igt@kms_chamelium@hdmi-hpd-fast.html
   [18]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_14187/fi-kbl-7500u/igt@kms_chamelium@hdmi-hpd-fast.html

  
  {name}: This element is suppressed. This means it is ignored when computing
          the status of the difference (SUCCESS, WARNING, or FAILURE).

  [fdo#103167]: https://bugs.freedesktop.org/show_bug.cgi?id=103167
  [fdo#107718]: https://bugs.freedesktop.org/show_bug.cgi?id=107718
  [fdo#107724]: https://bugs.freedesktop.org/show_bug.cgi?id=107724
  [fdo#111096]: https://bugs.freedesktop.org/show_bug.cgi?id=111096
  [fdo#111108]: https://bugs.freedesktop.org/show_bug.cgi?id=111108
  [fdo#111407]: https://bugs.freedesktop.org/show_bug.cgi?id=111407


Participating hosts (55 -> 47)
------------------------------

  Missing    (8): fi-kbl-soraka fi-ilk-m540 fi-hsw-4200u fi-byt-squawks fi-bsw-cyan fi-icl-y fi-byt-clapper fi-bdw-samus 


Build changes
-------------

  * CI: CI-20190529 -> None
  * Linux: CI_DRM_6783 -> Patchwork_14187

  CI-20190529: 20190529
  CI_DRM_6783: c8d316e9005aee1ae6c9f2214da1c95d9c65fd5f @ git://anongit.freedesktop.org/gfx-ci/linux
  IGT_5149: 6756ede680ee12745393360d7cc87cc0eb733ff6 @ git://anongit.freedesktop.org/xorg/app/intel-gpu-tools
  Patchwork_14187: 9131579e8ef2868511d96865a87def48dda36de6 @ git://anongit.freedesktop.org/gfx-ci/linux


== Linux commits ==

9131579e8ef2 drm/i915: Replace hangcheck by heartbeats
bf80ea3a9801 drm/i915: Cancel non-persistent contexts on close
4871b0180b24 drm/i915/execlists: Cancel banned contexts on schedule-out
3f19a88b00ea drm/i915: Mark up "sentinel" requests
897e5de8863a drm/i915/execlists: Force preemption
086dedbef59f drm/i915: Only enqueue already completed requests
bafb3c675329 drm/i915/execlists: Always request completion before marking an error
b04562bce34d drm/i915: Serialise the fill BLT with the vma pinning
459bf1b2292e drm/i915: Merge wait_for_timelines with retire_request
31ead25c61da drm/i915: Drop struct_mutex from around i915_retire_requests()
aba79af808bb drm/i915: Move idle barrier cleanup into engine-pm
d2aff1d0ef1a drm/i915: Coordinate i915_active with its own mutex
067dda6f4d10 drm/i915: Push the i915_active.retire into a worker
04fb38398216 drm/i915: Pull i915_vma_pin under the vm->mutex
baddd23cde7f drm/i915: Mark up address spaces that may need to allocate
7dbc1e8e8b38 drm/i915: Make i915_vma.flags atomic_t for mutex reduction
2288b16c1bb7 drm/i915: Only track bound elements of the GTT
f6fd81297067 drm/i915: Make shrink/unshrink be atomic
9b66ce0ad481 drm/mm: Pack allocated/scanned boolean into a bitfield
c4b69b94331d dma-fence: Serialise signal enabling (dma_fence_enable_sw_signaling)
6e00732fef73 drm/i915: Replace obj->pin_global with obj->frontbuffer
b06fa86d555e drm/i915: Force PD reload on any PD update
9d5ff6ed7063 drm/i915: Protect our local workers against I915_FENCE_TIMEOUT
9c4446789f7a drm/i915/gtt: Downgrade Cherryview back to aliasing-ppgtt
d9c3cdfc9443 drm/i915/gtt: Downgrade Baytrail back to aliasing-ppgtt
6b9f45a2bc05 drm/i915/selftests: Try to recycle context allocations
d8a7398e45ac drm/i915/selftests: Remove accidental serialization between gpu_fill
c46feb50a4b3 drm/i915/selftests: Add the usual batch vma managements to st_workarounds

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_14187/
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: ✗ Fi.CI.BAT: failure for series starting with [01/28] drm/i915/selftests: Add the usual batch vma managements to st_workarounds
  2019-08-26  7:57 ` ✗ Fi.CI.BAT: failure " Patchwork
@ 2019-08-26  9:35   ` Chris Wilson
  2019-08-26 13:33   ` Chris Wilson
  1 sibling, 0 replies; 43+ messages in thread
From: Chris Wilson @ 2019-08-26  9:35 UTC (permalink / raw)
  To: Patchwork; +Cc: intel-gfx

Quoting Patchwork (2019-08-26 08:57:32)
>   * igt@i915_selftest@live_gem_contexts:
>     - fi-ivb-3770:        [PASS][3] -> [DMESG-FAIL][4]
>    [3]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6783/fi-ivb-3770/igt@i915_selftest@live_gem_contexts.html
>    [4]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_14187/fi-ivb-3770/igt@i915_selftest@live_gem_contexts.html

ivb/hsw looked to be much more of a timing issue wrt to TLB
invalidation. Adding msleep() and whacking memory barriers around the
place appeared to improve the situation, but that may have just been
wishful thinking. Short term, I think we need to switch gen7 back to
aliasing-ppgtt until the TLB mess can be resolved (all we've changing in
this series is timing, so the same incorrect page access should be
possible in current kernels just harder to hit).
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: ✗ Fi.CI.BAT: failure for series starting with [01/28] drm/i915/selftests: Add the usual batch vma managements to st_workarounds
  2019-08-26  7:57 ` ✗ Fi.CI.BAT: failure " Patchwork
  2019-08-26  9:35   ` Chris Wilson
@ 2019-08-26 13:33   ` Chris Wilson
  1 sibling, 0 replies; 43+ messages in thread
From: Chris Wilson @ 2019-08-26 13:33 UTC (permalink / raw)
  To: Patchwork; +Cc: intel-gfx

Quoting Patchwork (2019-08-26 08:57:32)
>   * igt@i915_selftest@live_coherency:
>     - fi-bwr-2160:        [PASS][1] -> [DMESG-FAIL][2] +1 similar issue
>    [1]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6783/fi-bwr-2160/igt@i915_selftest@live_coherency.html
>    [2]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_14187/fi-bwr-2160/igt@i915_selftest@live_coherency.html

Finally got a round to checking on Crestline; this appears to be
Broadwater specific -- just like all the random fail we ignore in igt.

Without being able to locally reproduce, I think I might just quietly
ignore these.
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [PATCH 27/28] drm/i915: Cancel non-persistent contexts on close
  2019-08-26  7:21 ` [PATCH 27/28] drm/i915: Cancel non-persistent contexts on close Chris Wilson
@ 2019-08-26 13:39   ` Bloomfield, Jon
  2019-08-26 16:50     ` Chris Wilson
  0 siblings, 1 reply; 43+ messages in thread
From: Bloomfield, Jon @ 2019-08-26 13:39 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx

> -----Original Message-----
> From: Chris Wilson <chris@chris-wilson.co.uk>
> Sent: Monday, August 26, 2019 12:22 AM
> To: intel-gfx@lists.freedesktop.org
> Cc: Chris Wilson <chris@chris-wilson.co.uk>; Joonas Lahtinen
> <joonas.lahtinen@linux.intel.com>; Winiarski, Michal
> <michal.winiarski@intel.com>; Bloomfield, Jon <jon.bloomfield@intel.com>
> Subject: [PATCH 27/28] drm/i915: Cancel non-persistent contexts on close
> 
> Normally, we rely on our hangcheck to prevent persistent batches from
> hogging the GPU. However, if the user disables hangcheck, this mechanism
> breaks down. Despite our insistence that this is unsafe, the users are
> equally insistent that they want to use endless batches and will disable
> the hangcheck mechanism. We are looking are perhaps replacing hangcheck

"looking at"?

> with a softer mechanism, that sends a pulse down the engine to check if
> it is well. We can use the same preemptive pulse to flush an active
> persistent context off the GPU upon context close, preventing resources
> being lost and unkillable requests remaining on the GPU after process
> termination. To avoid changing the ABI and accidentally breaking
> existing userspace, we make the persistence of a context explicit and
> enable it by default (matching current ABI). Userspace can opt out of
> persistent mode (forcing requests to be cancelled when the context is
> closed by process termination or explicitly) by a context parameter. To
> facilitate existing use-cases of disabling hangcheck, if the modparam is
> disabled (i915.enable_hangcheck=0), we disable peristence mode by
> default.  (Note, one of the outcomes for supporting endless mode will be
> the removal of hangchecking, at which point opting into persistent mode
> will be mandatory, or maybe the default perhaps controlled by cgroups.)
> 
> Testcase: igt/gem_ctx_persistence
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
> Cc: Michał Winiarski <michal.winiarski@intel.com>
> Cc: Jon Bloomfield <jon.bloomfield@intel.com>
> ---
>  drivers/gpu/drm/i915/Makefile                 |   3 +-
>  drivers/gpu/drm/i915/gem/i915_gem_context.c   | 122 ++++++++++++++++++
>  drivers/gpu/drm/i915/gem/i915_gem_context.h   |  15 +++
>  .../gpu/drm/i915/gem/i915_gem_context_types.h |   1 +
>  .../gpu/drm/i915/gt/intel_engine_heartbeat.c  |  54 ++++++++
>  .../gpu/drm/i915/gt/intel_engine_heartbeat.h  |  14 ++
>  drivers/gpu/drm/i915/gt/intel_engine_pm.c     |   2 +-
>  drivers/gpu/drm/i915/i915_priolist_types.h    |   1 +
>  include/uapi/drm/i915_drm.h                   |  15 +++
>  9 files changed, 225 insertions(+), 2 deletions(-)
>  create mode 100644 drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c
>  create mode 100644 drivers/gpu/drm/i915/gt/intel_engine_heartbeat.h
> 
> diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
> index 658b930d34a8..eaa74e000985 100644
> --- a/drivers/gpu/drm/i915/Makefile
> +++ b/drivers/gpu/drm/i915/Makefile
> @@ -76,8 +76,9 @@ gt-y += \
>  	gt/intel_breadcrumbs.o \
>  	gt/intel_context.o \
>  	gt/intel_engine_cs.o \
> -	gt/intel_engine_pool.o \
> +	gt/intel_engine_heartbeat.o \
>  	gt/intel_engine_pm.o \
> +	gt/intel_engine_pool.o \
>  	gt/intel_engine_user.o \
>  	gt/intel_gt.o \
>  	gt/intel_gt_irq.o \
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c
> b/drivers/gpu/drm/i915/gem/i915_gem_context.c
> index bd9397669332..5520a896e701 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c
> @@ -70,6 +70,7 @@
>  #include <drm/i915_drm.h>
> 
>  #include "gt/intel_lrc_reg.h"
> +#include "gt/intel_engine_heartbeat.h"
>  #include "gt/intel_engine_user.h"
> 
>  #include "i915_gem_context.h"
> @@ -375,6 +376,78 @@ void i915_gem_context_release(struct kref *ref)
>  		queue_work(i915->wq, &i915->contexts.free_work);
>  }
> 
> +static inline struct i915_gem_engines *
> +__context_engines_static(struct i915_gem_context *ctx)
> +{
> +	return rcu_dereference_protected(ctx->engines, true);
> +}
> +
> +static void kill_context(struct i915_gem_context *ctx)
> +{
> +	intel_engine_mask_t tmp, active, reset;
> +	struct intel_gt *gt = &ctx->i915->gt;
> +	struct i915_gem_engines_iter it;
> +	struct intel_engine_cs *engine;
> +	struct intel_context *ce;
> +
> +	/*
> +	 * If we are already banned, it was due to a guilty request causing
> +	 * a reset and the entire context being evicted from the GPU.
> +	 */
> +	if (i915_gem_context_is_banned(ctx))
> +		return;
> +
> +	i915_gem_context_set_banned(ctx);
> +
> +	/*
> +	 * Map the user's engine back to the actual engines; one virtual
> +	 * engine will be mapped to multiple engines, and using ctx->engine[]
> +	 * the same engine may be have multiple instances in the user's map.
> +	 * However, we only care about pending requests, so only include
> +	 * engines on which there are incomplete requests.
> +	 */
> +	active = 0;
> +	for_each_gem_engine(ce, __context_engines_static(ctx), it) {
> +		struct dma_fence *fence;
> +
> +		if (!ce->timeline)
> +			continue;
> +
> +		fence = i915_active_fence_get(&ce->timeline->last_request);
> +		if (!fence)
> +			continue;
> +
> +		engine = to_request(fence)->engine;
> +		if (HAS_EXECLISTS(gt->i915))
> +			engine = intel_context_inflight(ce);
> +		if (engine)
> +			active |= engine->mask;
> +
> +		dma_fence_put(fence);
> +	}
> +
> +	/*
> +	 * Send a "high priority pulse" down the engine to cause the
> +	 * current request to be momentarily preempted. (If it fails to
> +	 * be preempted, it will be reset). As we have marked our context
> +	 * as banned, any incomplete request, including any running, will
> +	 * be skipped following the preemption.
> +	 */
> +	reset = 0;
> +	for_each_engine_masked(engine, gt->i915, active, tmp)
> +		if (intel_engine_pulse(engine))
> +			reset |= engine->mask;
> +
> +	/*
> +	 * If we are unable to send a preemptive pulse to bump
> +	 * the context from the GPU, we have to resort to a full
> +	 * reset. We hope the collateral damage is worth it.
> +	 */
> +	if (reset)
> +		intel_gt_handle_error(gt, reset, 0,
> +				      "context closure in %s", ctx->name);

This seems inconsistent with the policy not to allow non-persistence without pre-emption, since if we can't pre-empt we nuke anyway.

But this feels unsafe to me - How does intel_gt_handle_error prevent us from nuking a following context, instead of the target? Ideally we would:
	1) Unqueue any context currently behind the target context
	2) Reset engine only if the target context is running (it could complete during 1)
	3) Requeue other contexts

If the above is viable (?) we don't even need to attempt pre-emption.

> +}
> +
>  static void context_close(struct i915_gem_context *ctx)
>  {
>  	i915_gem_context_set_closed(ctx);
> @@ -400,6 +473,10 @@ static void context_close(struct i915_gem_context
> *ctx)
>  	lut_close(ctx);
> 
>  	mutex_unlock(&ctx->mutex);
> +
> +	if (!i915_gem_context_is_persistent(ctx))
> +		kill_context(ctx);
> +
>  	i915_gem_context_put(ctx);
>  }
> 
> @@ -440,6 +517,21 @@ __create_context(struct drm_i915_private *i915)
>  	i915_gem_context_set_bannable(ctx);
>  	i915_gem_context_set_recoverable(ctx);
> 
> +	/*
> +	 * If the user has disabled hangchecking, we can not be sure that
> +	 * the batches will ever complete after the context is closed,
> +	 * keep the context and all resources pinned forever. So in this
> +	 * case we opt to forcibly kill off all remaining requests on
> +	 * context close.
> +	 *
> +	 * Note that the user may chance the value of the modparam between

s/chance/change/

> +	 * context creation and close, we choose to ignore this for the
> +	 * sake of determinism and expect the user to set the parameter
> +	 * on module load and never touch it again.
> +	 */
> +	if (i915_modparams.enable_hangcheck) /* cgroup hook? */
> +		i915_gem_context_set_persistence(ctx);
> +
>  	for (i = 0; i < ARRAY_SIZE(ctx->hang_timestamp); i++)
>  		ctx->hang_timestamp[i] = jiffies -
> CONTEXT_FAST_HANG_JIFFIES;
> 
> @@ -598,6 +690,7 @@ i915_gem_context_create_kernel(struct
> drm_i915_private *i915, int prio)
>  	}
> 
>  	i915_gem_context_clear_bannable(ctx);
> +	i915_gem_context_set_persistence(ctx);
>  	ctx->sched.priority = I915_USER_PRIORITY(prio);
> 
>  	GEM_BUG_ON(!i915_gem_context_is_kernel(ctx));
> @@ -1730,6 +1823,26 @@ get_engines(struct i915_gem_context *ctx,
>  	return err;
>  }
> 
> +static int
> +set_persistence(struct i915_gem_context *ctx,
> +		const struct drm_i915_gem_context_param *args)
> +{
> +	if (args->size)
> +		return -EINVAL;
> +
> +	if (args->value) {
> +		i915_gem_context_set_persistence(ctx);
> +		return 0;
> +	}
> +
> +	/* To cancel a context we use "preempt-to-idle" */
> +	if (!(ctx->i915->caps.scheduler &

Why do we need to give up on older devices? If you fail to preempt you reset the context anyway, so can't we just use the reset fallback path?

> I915_SCHEDULER_CAP_PREEMPTION))
> +		return -ENODEV;
> +
> +	i915_gem_context_clear_persistence(ctx);
> +	return 0;
> +}
> +
>  static int ctx_setparam(struct drm_i915_file_private *fpriv,
>  			struct i915_gem_context *ctx,
>  			struct drm_i915_gem_context_param *args)
> @@ -1807,6 +1920,10 @@ static int ctx_setparam(struct
> drm_i915_file_private *fpriv,
>  		ret = set_engines(ctx, args);
>  		break;
> 
> +	case I915_CONTEXT_PARAM_PERSISTENCE:
> +		ret = set_persistence(ctx, args);
> +		break;
> +
>  	case I915_CONTEXT_PARAM_BAN_PERIOD:
>  	default:
>  		ret = -EINVAL;
> @@ -2258,6 +2375,11 @@ int i915_gem_context_getparam_ioctl(struct
> drm_device *dev, void *data,
>  		ret = get_engines(ctx, args);
>  		break;
> 
> +	case I915_CONTEXT_PARAM_PERSISTENCE:
> +		args->size = 0;
> +		args->value = i915_gem_context_is_persistent(ctx);
> +		break;
> +
>  	case I915_CONTEXT_PARAM_BAN_PERIOD:
>  	default:
>  		ret = -EINVAL;
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.h
> b/drivers/gpu/drm/i915/gem/i915_gem_context.h
> index 176978608b6f..e0f5b6c6a331 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_context.h
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.h
> @@ -74,6 +74,21 @@ static inline void
> i915_gem_context_clear_recoverable(struct i915_gem_context *c
>  	clear_bit(UCONTEXT_RECOVERABLE, &ctx->user_flags);
>  }
> 
> +static inline bool i915_gem_context_is_persistent(const struct
> i915_gem_context *ctx)
> +{
> +	return test_bit(UCONTEXT_PERSISTENCE, &ctx->user_flags);
> +}
> +
> +static inline void i915_gem_context_set_persistence(struct i915_gem_context
> *ctx)
> +{
> +	set_bit(UCONTEXT_PERSISTENCE, &ctx->user_flags);
> +}
> +
> +static inline void i915_gem_context_clear_persistence(struct
> i915_gem_context *ctx)
> +{
> +	clear_bit(UCONTEXT_PERSISTENCE, &ctx->user_flags);
> +}
> +
>  static inline bool i915_gem_context_is_banned(const struct i915_gem_context
> *ctx)
>  {
>  	return test_bit(CONTEXT_BANNED, &ctx->flags);
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h
> b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h
> index 260d59cc3de8..daf1ea5075a6 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h
> @@ -137,6 +137,7 @@ struct i915_gem_context {
>  #define UCONTEXT_NO_ERROR_CAPTURE	1
>  #define UCONTEXT_BANNABLE		2
>  #define UCONTEXT_RECOVERABLE		3
> +#define UCONTEXT_PERSISTENCE		4
> 
>  	/**
>  	 * @flags: small set of booleans
> diff --git a/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c
> b/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c
> new file mode 100644
> index 000000000000..43d1370eaa7f
> --- /dev/null
> +++ b/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c
> @@ -0,0 +1,54 @@
> +/*
> + * SPDX-License-Identifier: MIT
> + *
> + * Copyright © 2019 Intel Corporation
> + */
> +
> +#include "i915_request.h"
> +
> +#include "intel_context.h"
> +#include "intel_engine_heartbeat.h"
> +#include "intel_engine_pm.h"
> +#include "intel_engine.h"
> +#include "intel_gt.h"
> +
> +static void idle_pulse(struct intel_engine_cs *engine, struct i915_request *rq)
> +{
> +	engine->wakeref_serial = READ_ONCE(engine->serial) + 1;
> +	i915_request_add_active_barriers(rq);
> +}
> +
> +int intel_engine_pulse(struct intel_engine_cs *engine)
> +{
> +	struct i915_sched_attr attr = { .priority = I915_PRIORITY_BARRIER };
> +	struct intel_context *ce = engine->kernel_context;
> +	struct i915_request *rq;
> +	int err = 0;
> +
> +	if (!intel_engine_has_preemption(engine))
> +		return -ENODEV;
> +
> +	if (!intel_engine_pm_get_if_awake(engine))
> +		return 0;
> +
> +	mutex_lock(&ce->timeline->mutex);
> +
> +	intel_context_enter(ce);
> +	rq = __i915_request_create(ce, GFP_NOWAIT | __GFP_NOWARN);
> +	intel_context_exit(ce);
> +	if (IS_ERR(rq)) {
> +		err = PTR_ERR(rq);
> +		goto out_unlock;
> +	}
> +
> +	rq->flags |= I915_REQUEST_SENTINEL;
> +	idle_pulse(engine, rq);
> +
> +	__i915_request_commit(rq);
> +	__i915_request_queue(rq, &attr);
> +
> +out_unlock:
> +	mutex_unlock(&ce->timeline->mutex);
> +	intel_engine_pm_put(engine);
> +	return err;
> +}
> diff --git a/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.h
> b/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.h
> new file mode 100644
> index 000000000000..b950451b5998
> --- /dev/null
> +++ b/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.h
> @@ -0,0 +1,14 @@
> +/*
> + * SPDX-License-Identifier: MIT
> + *
> + * Copyright © 2019 Intel Corporation
> + */
> +
> +#ifndef INTEL_ENGINE_HEARTBEAT_H
> +#define INTEL_ENGINE_HEARTBEAT_H
> +
> +struct intel_engine_cs;
> +
> +int intel_engine_pulse(struct intel_engine_cs *engine);
> +
> +#endif /* INTEL_ENGINE_HEARTBEAT_H */
> diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pm.c
> b/drivers/gpu/drm/i915/gt/intel_engine_pm.c
> index 472b2259f629..12a2608a8889 100644
> --- a/drivers/gpu/drm/i915/gt/intel_engine_pm.c
> +++ b/drivers/gpu/drm/i915/gt/intel_engine_pm.c
> @@ -110,7 +110,7 @@ static bool switch_to_kernel_context(struct
> intel_engine_cs *engine)
>  	i915_request_add_active_barriers(rq);
> 
>  	/* Install ourselves as a preemption barrier */
> -	rq->sched.attr.priority = I915_PRIORITY_UNPREEMPTABLE;
> +	rq->sched.attr.priority = I915_PRIORITY_BARRIER;
>  	__i915_request_commit(rq);
> 
>  	/* Release our exclusive hold on the engine */
> diff --git a/drivers/gpu/drm/i915/i915_priolist_types.h
> b/drivers/gpu/drm/i915/i915_priolist_types.h
> index 21037a2e2038..ae8bb3cb627e 100644
> --- a/drivers/gpu/drm/i915/i915_priolist_types.h
> +++ b/drivers/gpu/drm/i915/i915_priolist_types.h
> @@ -39,6 +39,7 @@ enum {
>   * active request.
>   */
>  #define I915_PRIORITY_UNPREEMPTABLE INT_MAX
> +#define I915_PRIORITY_BARRIER INT_MAX
> 
>  #define __NO_PREEMPTION (I915_PRIORITY_WAIT)
> 
> diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
> index 469dc512cca3..dbc8691d75d0 100644
> --- a/include/uapi/drm/i915_drm.h
> +++ b/include/uapi/drm/i915_drm.h
> @@ -1565,6 +1565,21 @@ struct drm_i915_gem_context_param {
>   *   i915_context_engines_bond (I915_CONTEXT_ENGINES_EXT_BOND)
>   */
>  #define I915_CONTEXT_PARAM_ENGINES	0xa
> +
> +/*
> + * I915_CONTEXT_PARAM_PERSISTENCE:
> + *
> + * Allow the context and active rendering to survive the process until
> + * completion. Persistence allows fire-and-forget clients to queue up a
> + * bunch of work, hand the output over to a display server and the quit.
> + * If the context is not marked as persistent, upon closing (either via
> + * an explicit DRM_I915_GEM_CONTEXT_DESTROY or implicitly from file
> closure
> + * or process termination), the context and any outstanding requests will be
> + * cancelled (and exported fences for cancelled requests marked as -EIO).
> + *
> + * By default, new contexts allow persistence.
> + */
> +#define I915_CONTEXT_PARAM_PERSISTENCE	0xb
>  /* Must be kept compact -- no holes and well documented */
> 
>  	__u64 value;
> --
> 2.23.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [PATCH 28/28] drm/i915: Replace hangcheck by heartbeats
  2019-08-26  7:21 ` [PATCH 28/28] drm/i915: Replace hangcheck by heartbeats Chris Wilson
@ 2019-08-26 14:08   ` Bloomfield, Jon
  2019-08-26 16:56     ` Chris Wilson
  0 siblings, 1 reply; 43+ messages in thread
From: Bloomfield, Jon @ 2019-08-26 14:08 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx

> -----Original Message-----
> From: Chris Wilson <chris@chris-wilson.co.uk>
> Sent: Monday, August 26, 2019 12:22 AM
> To: intel-gfx@lists.freedesktop.org
> Cc: Chris Wilson <chris@chris-wilson.co.uk>; Joonas Lahtinen
> <joonas.lahtinen@linux.intel.com>; Ursulin, Tvrtko <tvrtko.ursulin@intel.com>;
> Bloomfield, Jon <jon.bloomfield@intel.com>
> Subject: [PATCH 28/28] drm/i915: Replace hangcheck by heartbeats
> 
> Replace sampling the engine state every so often with a periodic
> heartbeat request to measure the health of an engine. This is coupled
> with the forced-preemption to allow long running requests to survive so
> long as they do not block other users.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> Cc: Jon Bloomfield <jon.bloomfield@intel.com>
> ---
>  drivers/gpu/drm/i915/Kconfig.profile          |  11 +
>  drivers/gpu/drm/i915/Makefile                 |   1 -
>  drivers/gpu/drm/i915/display/intel_display.c  |   2 +-
>  drivers/gpu/drm/i915/gem/i915_gem_object.h    |   1 -
>  drivers/gpu/drm/i915/gem/i915_gem_pm.c        |   2 -
>  drivers/gpu/drm/i915/gt/intel_engine.h        |  32 --
>  drivers/gpu/drm/i915/gt/intel_engine_cs.c     |  10 +-
>  .../gpu/drm/i915/gt/intel_engine_heartbeat.c  | 113 +++++-
>  .../gpu/drm/i915/gt/intel_engine_heartbeat.h  |   5 +
>  drivers/gpu/drm/i915/gt/intel_engine_pm.c     |   5 +-
>  drivers/gpu/drm/i915/gt/intel_engine_types.h  |  14 +-
>  drivers/gpu/drm/i915/gt/intel_gt.c            |   1 -
>  drivers/gpu/drm/i915/gt/intel_gt.h            |   4 -
>  drivers/gpu/drm/i915/gt/intel_gt_pm.c         |   2 -
>  drivers/gpu/drm/i915/gt/intel_gt_types.h      |   9 -
>  drivers/gpu/drm/i915/gt/intel_hangcheck.c     | 361 ------------------
>  drivers/gpu/drm/i915/gt/intel_reset.c         |   3 +-
>  drivers/gpu/drm/i915/gt/selftest_hangcheck.c  |   4 -
>  drivers/gpu/drm/i915/i915_debugfs.c           |  87 -----
>  drivers/gpu/drm/i915/i915_drv.c               |   3 -
>  drivers/gpu/drm/i915/i915_drv.h               |   1 -
>  drivers/gpu/drm/i915/i915_getparam.c          |   3 +-
>  drivers/gpu/drm/i915/i915_gpu_error.c         |  33 +-
>  drivers/gpu/drm/i915/i915_gpu_error.h         |   2 -
>  drivers/gpu/drm/i915/i915_params.c            |   6 +-
>  drivers/gpu/drm/i915/i915_priolist_types.h    |   6 +
>  26 files changed, 159 insertions(+), 562 deletions(-)
>  delete mode 100644 drivers/gpu/drm/i915/gt/intel_hangcheck.c
> 
> diff --git a/drivers/gpu/drm/i915/Kconfig.profile
> b/drivers/gpu/drm/i915/Kconfig.profile
> index 3184e8491333..aafb57f84169 100644
> --- a/drivers/gpu/drm/i915/Kconfig.profile
> +++ b/drivers/gpu/drm/i915/Kconfig.profile
> @@ -37,3 +37,14 @@ config DRM_I915_PREEMPT_TIMEOUT
>  	  to execute.
> 
>  	  May be 0 to disable the timeout.
> +
> +config DRM_I915_HEARTBEAT_INTERVAL
> +	int "Interval between heartbeat pulses (ms)"
> +	default 2500 # microseconds
> +	help
> +	  While active the driver uses a periodic request, a heartbeat, to
> +	  check the wellness of the GPU and to regularly flush state changes
> +	  (idle barriers).
> +
> +	  May be 0 to disable heartbeats and therefore disable automatic GPU
> +	  hang detection.
> diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
> index eaa74e000985..d7286720de83 100644
> --- a/drivers/gpu/drm/i915/Makefile
> +++ b/drivers/gpu/drm/i915/Makefile
> @@ -84,7 +84,6 @@ gt-y += \
>  	gt/intel_gt_irq.o \
>  	gt/intel_gt_pm.o \
>  	gt/intel_gt_pm_irq.o \
> -	gt/intel_hangcheck.o \
>  	gt/intel_lrc.o \
>  	gt/intel_renderstate.o \
>  	gt/intel_reset.o \
> diff --git a/drivers/gpu/drm/i915/display/intel_display.c
> b/drivers/gpu/drm/i915/display/intel_display.c
> index 6e74c33f2ec4..e008016d864c 100644
> --- a/drivers/gpu/drm/i915/display/intel_display.c
> +++ b/drivers/gpu/drm/i915/display/intel_display.c
> @@ -14306,7 +14306,7 @@ static void intel_plane_unpin_fb(struct
> intel_plane_state *old_plane_state)
>  static void fb_obj_bump_render_priority(struct drm_i915_gem_object *obj)
>  {
>  	struct i915_sched_attr attr = {
> -		.priority = I915_PRIORITY_DISPLAY,
> +		.priority = I915_USER_PRIORITY(I915_PRIORITY_DISPLAY),
>  	};
> 
>  	i915_gem_object_wait_priority(obj, 0, &attr);
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.h
> b/drivers/gpu/drm/i915/gem/i915_gem_object.h
> index a78af25dce36..967c30737dc5 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_object.h
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.h
> @@ -429,6 +429,5 @@ int i915_gem_object_wait(struct
> drm_i915_gem_object *obj,
>  int i915_gem_object_wait_priority(struct drm_i915_gem_object *obj,
>  				  unsigned int flags,
>  				  const struct i915_sched_attr *attr);
> -#define I915_PRIORITY_DISPLAY I915_USER_PRIORITY(I915_PRIORITY_MAX)
> 
>  #endif
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pm.c
> b/drivers/gpu/drm/i915/gem/i915_gem_pm.c
> index b41f5e4c0c65..28a8f3aac402 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_pm.c
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_pm.c
> @@ -127,8 +127,6 @@ void i915_gem_suspend(struct drm_i915_private
> *i915)
>  	 */
>  	switch_to_kernel_context_sync(&i915->gt);
> 
> -	cancel_delayed_work_sync(&i915->gt.hangcheck.work);
> -
>  	i915_gem_drain_freed_objects(i915);
> 
>  	intel_uc_suspend(&i915->gt.uc);
> diff --git a/drivers/gpu/drm/i915/gt/intel_engine.h
> b/drivers/gpu/drm/i915/gt/intel_engine.h
> index d3c6993f4f46..da09eea0fb86 100644
> --- a/drivers/gpu/drm/i915/gt/intel_engine.h
> +++ b/drivers/gpu/drm/i915/gt/intel_engine.h
> @@ -89,38 +89,6 @@ struct drm_printer;
>  /* seqno size is actually only a uint32, but since we plan to use MI_FLUSH_DW
> to
>   * do the writes, and that must have qw aligned offsets, simply pretend it's 8b.
>   */
> -enum intel_engine_hangcheck_action {
> -	ENGINE_IDLE = 0,
> -	ENGINE_WAIT,
> -	ENGINE_ACTIVE_SEQNO,
> -	ENGINE_ACTIVE_HEAD,
> -	ENGINE_ACTIVE_SUBUNITS,
> -	ENGINE_WAIT_KICK,
> -	ENGINE_DEAD,
> -};
> -
> -static inline const char *
> -hangcheck_action_to_str(const enum intel_engine_hangcheck_action a)
> -{
> -	switch (a) {
> -	case ENGINE_IDLE:
> -		return "idle";
> -	case ENGINE_WAIT:
> -		return "wait";
> -	case ENGINE_ACTIVE_SEQNO:
> -		return "active seqno";
> -	case ENGINE_ACTIVE_HEAD:
> -		return "active head";
> -	case ENGINE_ACTIVE_SUBUNITS:
> -		return "active subunits";
> -	case ENGINE_WAIT_KICK:
> -		return "wait kick";
> -	case ENGINE_DEAD:
> -		return "dead";
> -	}
> -
> -	return "unknown";
> -}
> 
>  static inline unsigned int
>  execlists_num_ports(const struct intel_engine_execlists * const execlists)
> diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
> b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
> index 17006d50b63f..16439c542a52 100644
> --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
> +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
> @@ -597,7 +597,6 @@ static int intel_engine_setup_common(struct
> intel_engine_cs *engine)
>  	intel_engine_init_active(engine, ENGINE_PHYSICAL);
>  	intel_engine_init_breadcrumbs(engine);
>  	intel_engine_init_execlists(engine);
> -	intel_engine_init_hangcheck(engine);
>  	intel_engine_init_cmd_parser(engine);
>  	intel_engine_init__pm(engine);
> 
> @@ -1375,8 +1374,13 @@ void intel_engine_dump(struct intel_engine_cs
> *engine,
>  		drm_printf(m, "*** WEDGED ***\n");
> 
>  	drm_printf(m, "\tAwake? %d\n", atomic_read(&engine-
> >wakeref.count));
> -	drm_printf(m, "\tHangcheck: %d ms ago\n",
> -		   jiffies_to_msecs(jiffies - engine-
> >hangcheck.action_timestamp));
> +
> +	rcu_read_lock();
> +	rq = READ_ONCE(engine->last_heartbeat);
> +	if (rq)
> +		drm_printf(m, "\tHeartbeat: %d ms ago\n",
> +			   jiffies_to_msecs(jiffies - rq->emitted_jiffies));
> +	rcu_read_unlock();
>  	drm_printf(m, "\tReset count: %d (global %d)\n",
>  		   i915_reset_engine_count(error, engine),
>  		   i915_reset_count(error));
> diff --git a/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c
> b/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c
> index 43d1370eaa7f..d0fc94c71ee7 100644
> --- a/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c
> +++ b/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c
> @@ -11,6 +11,21 @@
>  #include "intel_engine_pm.h"
>  #include "intel_engine.h"
>  #include "intel_gt.h"
> +#include "intel_reset.h"
> +
> +/*
> + * While the engine is active, we send a periodic pulse along the entire

along the entire what?

> + * to check on its health and to flush any idle-barriers. If that request
> + * is stuck, and we fail to preempt it, we declare the engine hung and
> + * issue a reset -- in the hope that restores progress.
> + */
> +
> +static long delay(void)

Probably NOT the best function name in the world, ever. 'delay' could equally be a verb, so it's not self-descriptive.

> +{
> +	const long t =
> msecs_to_jiffies(CONFIG_DRM_I915_HEARTBEAT_INTERVAL);
> +
> +	return round_jiffies_up_relative(t);
> +}
> 
>  static void idle_pulse(struct intel_engine_cs *engine, struct i915_request *rq)
>  {
> @@ -18,6 +33,100 @@ static void idle_pulse(struct intel_engine_cs *engine,
> struct i915_request *rq)
>  	i915_request_add_active_barriers(rq);
>  }
> 
> +static void heartbeat(struct work_struct *wrk)
> +{
> +	struct i915_sched_attr attr = {
> +		.priority = I915_USER_PRIORITY(I915_PRIORITY_MIN),
> +	};
> +	struct intel_engine_cs *engine =
> +		container_of(wrk, typeof(*engine), heartbeat.work);
> +	struct intel_context *ce = engine->kernel_context;
> +	struct i915_request *rq;
> +
> +	if (!intel_engine_pm_get_if_awake(engine))
> +		return;
> +
> +	rq = engine->last_heartbeat;
> +	if (rq && i915_request_completed(rq)) {
> +		i915_request_put(rq);
> +		engine->last_heartbeat = NULL;
> +	}
> +
> +	if (intel_gt_is_wedged(engine->gt))
> +		goto out;
> +
> +	if (engine->last_heartbeat) {
> +		if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) {
> +			struct drm_printer p = drm_debug_printer(__func__);
> +
> +			intel_engine_dump(engine, &p,
> +					  "%s heartbeat not ticking\n",
> +					  engine->name);
> +		}
> +
> +		if (engine->schedule &&
> +		    rq->sched.attr.priority < I915_PRIORITY_BARRIER) {
> +			attr.priority =
> +
> 	I915_USER_PRIORITY(I915_PRIORITY_HEARTBEAT);
> +			if (rq->sched.attr.priority >= attr.priority)
> +				attr.priority = I915_PRIORITY_BARRIER;
> +
> +			local_bh_disable();
> +			engine->schedule(rq, &attr);
> +			local_bh_enable();
> +		} else {
> +			intel_gt_handle_error(engine->gt, engine->mask,
> +					      I915_ERROR_CAPTURE,
> +					      "stopped heartbeat on %s",
> +					      engine->name);
> +		}
> +		goto out;
> +	}
> +
> +	if (engine->wakeref_serial == engine->serial)
> +		goto out;
> +
> +	mutex_lock(&ce->timeline->mutex);
> +
> +	intel_context_enter(ce);
> +	rq = __i915_request_create(ce, GFP_NOWAIT | __GFP_NOWARN);
> +	intel_context_exit(ce);
> +	if (IS_ERR(rq))
> +		goto unlock;
> +
> +	idle_pulse(engine, rq);
> +	if (i915_modparams.enable_hangcheck)
> +		engine->last_heartbeat = i915_request_get(rq);
> +
> +	__i915_request_commit(rq);
> +	__i915_request_queue(rq, &attr);
> +
> +unlock:
> +	mutex_unlock(&ce->timeline->mutex);
> +out:
> +	schedule_delayed_work(&engine->heartbeat, delay());

Isn't engine->heartbeat now NULL in some cases?

> +	intel_engine_pm_put(engine);
> +}
> +
> +void intel_engine_unpark_heartbeat(struct intel_engine_cs *engine)
> +{
> +	if (!CONFIG_DRM_I915_HEARTBEAT_INTERVAL)
> +		return;
> +
> +	schedule_delayed_work(&engine->heartbeat, delay());
> +}
> +
> +void intel_engine_park_heartbeat(struct intel_engine_cs *engine)
> +{
> +	cancel_delayed_work(&engine->heartbeat);
> +	i915_request_put(fetch_and_zero(&engine->last_heartbeat));
> +}
> +
> +void intel_engine_init_heartbeat(struct intel_engine_cs *engine)
> +{
> +	INIT_DELAYED_WORK(&engine->heartbeat, heartbeat);
> +}
> +
>  int intel_engine_pulse(struct intel_engine_cs *engine)
>  {
>  	struct i915_sched_attr attr = { .priority = I915_PRIORITY_BARRIER };
> @@ -31,7 +140,8 @@ int intel_engine_pulse(struct intel_engine_cs *engine)
>  	if (!intel_engine_pm_get_if_awake(engine))
>  		return 0;
> 
> -	mutex_lock(&ce->timeline->mutex);
> +	if (!mutex_trylock(&ce->timeline->mutex))
> +		goto out_rpm;
> 
>  	intel_context_enter(ce);
>  	rq = __i915_request_create(ce, GFP_NOWAIT | __GFP_NOWARN);
> @@ -49,6 +159,7 @@ int intel_engine_pulse(struct intel_engine_cs *engine)
> 
>  out_unlock:
>  	mutex_unlock(&ce->timeline->mutex);
> +out_rpm:
>  	intel_engine_pm_put(engine);
>  	return err;
>  }
> diff --git a/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.h
> b/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.h
> index b950451b5998..39391004554d 100644
> --- a/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.h
> +++ b/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.h
> @@ -9,6 +9,11 @@
> 
>  struct intel_engine_cs;
> 
> +void intel_engine_init_heartbeat(struct intel_engine_cs *engine);
> +
> +void intel_engine_park_heartbeat(struct intel_engine_cs *engine);
> +void intel_engine_unpark_heartbeat(struct intel_engine_cs *engine);
> +
>  int intel_engine_pulse(struct intel_engine_cs *engine);
> 
>  #endif /* INTEL_ENGINE_HEARTBEAT_H */
> diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pm.c
> b/drivers/gpu/drm/i915/gt/intel_engine_pm.c
> index 12a2608a8889..5e74c3b86a21 100644
> --- a/drivers/gpu/drm/i915/gt/intel_engine_pm.c
> +++ b/drivers/gpu/drm/i915/gt/intel_engine_pm.c
> @@ -7,6 +7,7 @@
>  #include "i915_drv.h"
> 
>  #include "intel_engine.h"
> +#include "intel_engine_heartbeat.h"
>  #include "intel_engine_pm.h"
>  #include "intel_engine_pool.h"
>  #include "intel_gt.h"
> @@ -33,7 +34,7 @@ static int __engine_unpark(struct intel_wakeref *wf)
>  	if (engine->unpark)
>  		engine->unpark(engine);
> 
> -	intel_engine_init_hangcheck(engine);
> +	intel_engine_unpark_heartbeat(engine);
>  	return 0;
>  }
> 
> @@ -157,6 +158,7 @@ static int __engine_park(struct intel_wakeref *wf)
> 
>  	call_idle_barriers(engine); /* cleanup after wedging */
> 
> +	intel_engine_park_heartbeat(engine);
>  	intel_engine_disarm_breadcrumbs(engine);
>  	intel_engine_pool_park(&engine->pool);
> 
> @@ -187,6 +189,7 @@ void intel_engine_init__pm(struct intel_engine_cs
> *engine)
>  	struct intel_runtime_pm *rpm = &engine->i915->runtime_pm;
> 
>  	intel_wakeref_init(&engine->wakeref, rpm, &wf_ops);
> +	intel_engine_init_heartbeat(engine);
>  }
> 
>  #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
> diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h
> b/drivers/gpu/drm/i915/gt/intel_engine_types.h
> index 15e02cb58a67..53c649f4c8e4 100644
> --- a/drivers/gpu/drm/i915/gt/intel_engine_types.h
> +++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h
> @@ -15,6 +15,7 @@
>  #include <linux/rbtree.h>
>  #include <linux/timer.h>
>  #include <linux/types.h>
> +#include <linux/workqueue.h>
> 
>  #include "i915_gem.h"
>  #include "i915_pmu.h"
> @@ -76,14 +77,6 @@ struct intel_instdone {
>  	u32 row[I915_MAX_SLICES][I915_MAX_SUBSLICES];
>  };
> 
> -struct intel_engine_hangcheck {
> -	u64 acthd;
> -	u32 last_ring;
> -	u32 last_head;
> -	unsigned long action_timestamp;
> -	struct intel_instdone instdone;
> -};
> -
>  struct intel_ring {
>  	struct kref ref;
>  	struct i915_vma *vma;
> @@ -323,6 +316,9 @@ struct intel_engine_cs {
> 
>  	intel_engine_mask_t saturated; /* submitting semaphores too late? */
> 
> +	struct delayed_work heartbeat;
> +	struct i915_request *last_heartbeat;
> +
>  	unsigned long serial;
> 
>  	unsigned long wakeref_serial;
> @@ -473,8 +469,6 @@ struct intel_engine_cs {
>  	/* status_notifier: list of callbacks for context-switch changes */
>  	struct atomic_notifier_head context_status_notifier;
> 
> -	struct intel_engine_hangcheck hangcheck;
> -
>  #define I915_ENGINE_NEEDS_CMD_PARSER BIT(0)
>  #define I915_ENGINE_SUPPORTS_STATS   BIT(1)
>  #define I915_ENGINE_HAS_PREEMPTION   BIT(2)
> diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c
> b/drivers/gpu/drm/i915/gt/intel_gt.c
> index c2afffb94474..20f3181c93bd 100644
> --- a/drivers/gpu/drm/i915/gt/intel_gt.c
> +++ b/drivers/gpu/drm/i915/gt/intel_gt.c
> @@ -18,7 +18,6 @@ void intel_gt_init_early(struct intel_gt *gt, struct
> drm_i915_private *i915)
>  	INIT_LIST_HEAD(&gt->closed_vma);
>  	spin_lock_init(&gt->closed_lock);
> 
> -	intel_gt_init_hangcheck(gt);
>  	intel_gt_init_reset(gt);
>  	intel_gt_pm_init_early(gt);
>  	intel_uc_init_early(&gt->uc);
> diff --git a/drivers/gpu/drm/i915/gt/intel_gt.h
> b/drivers/gpu/drm/i915/gt/intel_gt.h
> index 4920cb351f10..a43903b1cc63 100644
> --- a/drivers/gpu/drm/i915/gt/intel_gt.h
> +++ b/drivers/gpu/drm/i915/gt/intel_gt.h
> @@ -39,8 +39,6 @@ void intel_gt_clear_error_registers(struct intel_gt *gt,
>  void intel_gt_flush_ggtt_writes(struct intel_gt *gt);
>  void intel_gt_chipset_flush(struct intel_gt *gt);
> 
> -void intel_gt_init_hangcheck(struct intel_gt *gt);
> -
>  int intel_gt_init_scratch(struct intel_gt *gt, unsigned int size);
>  void intel_gt_fini_scratch(struct intel_gt *gt);
> 
> @@ -55,6 +53,4 @@ static inline bool intel_gt_is_wedged(struct intel_gt *gt)
>  	return __intel_reset_failed(&gt->reset);
>  }
> 
> -void intel_gt_queue_hangcheck(struct intel_gt *gt);
> -
>  #endif /* __INTEL_GT_H__ */
> diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm.c
> b/drivers/gpu/drm/i915/gt/intel_gt_pm.c
> index 1363e069ec83..45fa0a58c387 100644
> --- a/drivers/gpu/drm/i915/gt/intel_gt_pm.c
> +++ b/drivers/gpu/drm/i915/gt/intel_gt_pm.c
> @@ -46,8 +46,6 @@ static int __gt_unpark(struct intel_wakeref *wf)
> 
>  	i915_pmu_gt_unparked(i915);
> 
> -	intel_gt_queue_hangcheck(gt);
> -
>  	pm_notify(i915, INTEL_GT_UNPARK);
> 
>  	return 0;
> diff --git a/drivers/gpu/drm/i915/gt/intel_gt_types.h
> b/drivers/gpu/drm/i915/gt/intel_gt_types.h
> index dc295c196d11..0f799335a00a 100644
> --- a/drivers/gpu/drm/i915/gt/intel_gt_types.h
> +++ b/drivers/gpu/drm/i915/gt/intel_gt_types.h
> @@ -25,14 +25,6 @@ struct i915_ggtt;
>  struct intel_engine_cs;
>  struct intel_uncore;
> 
> -struct intel_hangcheck {
> -	/* For hangcheck timer */
> -#define DRM_I915_HANGCHECK_PERIOD 1500 /* in ms */
> -#define DRM_I915_HANGCHECK_JIFFIES
> msecs_to_jiffies(DRM_I915_HANGCHECK_PERIOD)
> -
> -	struct delayed_work work;
> -};
> -
>  struct intel_gt {
>  	struct drm_i915_private *i915;
>  	struct intel_uncore *uncore;
> @@ -54,7 +46,6 @@ struct intel_gt {
>  	struct list_head closed_vma;
>  	spinlock_t closed_lock; /* guards the list of closed_vma */
> 
> -	struct intel_hangcheck hangcheck;
>  	struct intel_reset reset;
> 
>  	/**
> diff --git a/drivers/gpu/drm/i915/gt/intel_hangcheck.c
> b/drivers/gpu/drm/i915/gt/intel_hangcheck.c
> deleted file mode 100644
> index 40f62f780be5..000000000000
> --- a/drivers/gpu/drm/i915/gt/intel_hangcheck.c
> +++ /dev/null
> @@ -1,361 +0,0 @@
> -/*
> - * Copyright © 2016 Intel Corporation
> - *
> - * Permission is hereby granted, free of charge, to any person obtaining a
> - * copy of this software and associated documentation files (the "Software"),
> - * to deal in the Software without restriction, including without limitation
> - * the rights to use, copy, modify, merge, publish, distribute, sublicense,
> - * and/or sell copies of the Software, and to permit persons to whom the
> - * Software is furnished to do so, subject to the following conditions:
> - *
> - * The above copyright notice and this permission notice (including the next
> - * paragraph) shall be included in all copies or substantial portions of the
> - * Software.
> - *
> - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
> EXPRESS OR
> - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
> MERCHANTABILITY,
> - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO
> EVENT SHALL
> - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
> DAMAGES OR OTHER
> - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
> ARISING
> - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
> OTHER DEALINGS
> - * IN THE SOFTWARE.
> - *
> - */
> -
> -#include "i915_drv.h"
> -#include "intel_engine.h"
> -#include "intel_gt.h"
> -#include "intel_reset.h"
> -
> -struct hangcheck {
> -	u64 acthd;
> -	u32 ring;
> -	u32 head;
> -	enum intel_engine_hangcheck_action action;
> -	unsigned long action_timestamp;
> -	int deadlock;
> -	struct intel_instdone instdone;
> -	bool wedged:1;
> -	bool stalled:1;
> -};
> -
> -static bool instdone_unchanged(u32 current_instdone, u32 *old_instdone)
> -{
> -	u32 tmp = current_instdone | *old_instdone;
> -	bool unchanged;
> -
> -	unchanged = tmp == *old_instdone;
> -	*old_instdone |= tmp;
> -
> -	return unchanged;
> -}
> -
> -static bool subunits_stuck(struct intel_engine_cs *engine)
> -{
> -	struct drm_i915_private *dev_priv = engine->i915;
> -	const struct sseu_dev_info *sseu = &RUNTIME_INFO(dev_priv)->sseu;
> -	struct intel_instdone instdone;
> -	struct intel_instdone *accu_instdone = &engine->hangcheck.instdone;
> -	bool stuck;
> -	int slice;
> -	int subslice;
> -
> -	intel_engine_get_instdone(engine, &instdone);
> -
> -	/* There might be unstable subunit states even when
> -	 * actual head is not moving. Filter out the unstable ones by
> -	 * accumulating the undone -> done transitions and only
> -	 * consider those as progress.
> -	 */
> -	stuck = instdone_unchanged(instdone.instdone,
> -				   &accu_instdone->instdone);
> -	stuck &= instdone_unchanged(instdone.slice_common,
> -				    &accu_instdone->slice_common);
> -
> -	for_each_instdone_slice_subslice(dev_priv, sseu, slice, subslice) {
> -		stuck &=
> instdone_unchanged(instdone.sampler[slice][subslice],
> -					    &accu_instdone-
> >sampler[slice][subslice]);
> -		stuck &= instdone_unchanged(instdone.row[slice][subslice],
> -					    &accu_instdone-
> >row[slice][subslice]);
> -	}
> -
> -	return stuck;
> -}
> -
> -static enum intel_engine_hangcheck_action
> -head_stuck(struct intel_engine_cs *engine, u64 acthd)
> -{
> -	if (acthd != engine->hangcheck.acthd) {
> -
> -		/* Clear subunit states on head movement */
> -		memset(&engine->hangcheck.instdone, 0,
> -		       sizeof(engine->hangcheck.instdone));
> -
> -		return ENGINE_ACTIVE_HEAD;
> -	}
> -
> -	if (!subunits_stuck(engine))
> -		return ENGINE_ACTIVE_SUBUNITS;
> -
> -	return ENGINE_DEAD;
> -}
> -
> -static enum intel_engine_hangcheck_action
> -engine_stuck(struct intel_engine_cs *engine, u64 acthd)
> -{
> -	enum intel_engine_hangcheck_action ha;
> -	u32 tmp;
> -
> -	ha = head_stuck(engine, acthd);
> -	if (ha != ENGINE_DEAD)
> -		return ha;
> -
> -	if (IS_GEN(engine->i915, 2))
> -		return ENGINE_DEAD;
> -
> -	/* Is the chip hanging on a WAIT_FOR_EVENT?
> -	 * If so we can simply poke the RB_WAIT bit
> -	 * and break the hang. This should work on
> -	 * all but the second generation chipsets.
> -	 */
> -	tmp = ENGINE_READ(engine, RING_CTL);
> -	if (tmp & RING_WAIT) {
> -		intel_gt_handle_error(engine->gt, engine->mask, 0,
> -				      "stuck wait on %s", engine->name);
> -		ENGINE_WRITE(engine, RING_CTL, tmp);
> -		return ENGINE_WAIT_KICK;
> -	}
> -
> -	return ENGINE_DEAD;
> -}
> -
> -static void hangcheck_load_sample(struct intel_engine_cs *engine,
> -				  struct hangcheck *hc)
> -{
> -	hc->acthd = intel_engine_get_active_head(engine);
> -	hc->ring = ENGINE_READ(engine, RING_START);
> -	hc->head = ENGINE_READ(engine, RING_HEAD);
> -}
> -
> -static void hangcheck_store_sample(struct intel_engine_cs *engine,
> -				   const struct hangcheck *hc)
> -{
> -	engine->hangcheck.acthd = hc->acthd;
> -	engine->hangcheck.last_ring = hc->ring;
> -	engine->hangcheck.last_head = hc->head;
> -}
> -
> -static enum intel_engine_hangcheck_action
> -hangcheck_get_action(struct intel_engine_cs *engine,
> -		     const struct hangcheck *hc)
> -{
> -	if (intel_engine_is_idle(engine))
> -		return ENGINE_IDLE;
> -
> -	if (engine->hangcheck.last_ring != hc->ring)
> -		return ENGINE_ACTIVE_SEQNO;
> -
> -	if (engine->hangcheck.last_head != hc->head)
> -		return ENGINE_ACTIVE_SEQNO;
> -
> -	return engine_stuck(engine, hc->acthd);
> -}
> -
> -static void hangcheck_accumulate_sample(struct intel_engine_cs *engine,
> -					struct hangcheck *hc)
> -{
> -	unsigned long timeout = I915_ENGINE_DEAD_TIMEOUT;
> -
> -	hc->action = hangcheck_get_action(engine, hc);
> -
> -	/* We always increment the progress
> -	 * if the engine is busy and still processing
> -	 * the same request, so that no single request
> -	 * can run indefinitely (such as a chain of
> -	 * batches). The only time we do not increment
> -	 * the hangcheck score on this ring, if this
> -	 * engine is in a legitimate wait for another
> -	 * engine. In that case the waiting engine is a
> -	 * victim and we want to be sure we catch the
> -	 * right culprit. Then every time we do kick
> -	 * the ring, make it as a progress as the seqno
> -	 * advancement might ensure and if not, it
> -	 * will catch the hanging engine.
> -	 */
> -
> -	switch (hc->action) {
> -	case ENGINE_IDLE:
> -	case ENGINE_ACTIVE_SEQNO:
> -		/* Clear head and subunit states on seqno movement */
> -		hc->acthd = 0;
> -
> -		memset(&engine->hangcheck.instdone, 0,
> -		       sizeof(engine->hangcheck.instdone));
> -
> -		/* Intentional fall through */
> -	case ENGINE_WAIT_KICK:
> -	case ENGINE_WAIT:
> -		engine->hangcheck.action_timestamp = jiffies;
> -		break;
> -
> -	case ENGINE_ACTIVE_HEAD:
> -	case ENGINE_ACTIVE_SUBUNITS:
> -		/*
> -		 * Seqno stuck with still active engine gets leeway,
> -		 * in hopes that it is just a long shader.
> -		 */
> -		timeout = I915_SEQNO_DEAD_TIMEOUT;
> -		break;
> -
> -	case ENGINE_DEAD:
> -		break;
> -
> -	default:
> -		MISSING_CASE(hc->action);
> -	}
> -
> -	hc->stalled = time_after(jiffies,
> -				 engine->hangcheck.action_timestamp +
> timeout);
> -	hc->wedged = time_after(jiffies,
> -				 engine->hangcheck.action_timestamp +
> -				 I915_ENGINE_WEDGED_TIMEOUT);
> -}
> -
> -static void hangcheck_declare_hang(struct intel_gt *gt,
> -				   intel_engine_mask_t hung,
> -				   intel_engine_mask_t stuck)
> -{
> -	struct intel_engine_cs *engine;
> -	intel_engine_mask_t tmp;
> -	char msg[80];
> -	int len;
> -
> -	/* If some rings hung but others were still busy, only
> -	 * blame the hanging rings in the synopsis.
> -	 */
> -	if (stuck != hung)
> -		hung &= ~stuck;
> -	len = scnprintf(msg, sizeof(msg),
> -			"%s on ", stuck == hung ? "no progress" : "hang");
> -	for_each_engine_masked(engine, gt->i915, hung, tmp)
> -		len += scnprintf(msg + len, sizeof(msg) - len,
> -				 "%s, ", engine->name);
> -	msg[len-2] = '\0';
> -
> -	return intel_gt_handle_error(gt, hung, I915_ERROR_CAPTURE, "%s",
> msg);
> -}
> -
> -/*
> - * This is called when the chip hasn't reported back with completed
> - * batchbuffers in a long time. We keep track per ring seqno progress and
> - * if there are no progress, hangcheck score for that ring is increased.
> - * Further, acthd is inspected to see if the ring is stuck. On stuck case
> - * we kick the ring. If we see no progress on three subsequent calls
> - * we assume chip is wedged and try to fix it by resetting the chip.
> - */
> -static void hangcheck_elapsed(struct work_struct *work)
> -{
> -	struct intel_gt *gt =
> -		container_of(work, typeof(*gt), hangcheck.work.work);
> -	intel_engine_mask_t hung = 0, stuck = 0, wedged = 0;
> -	struct intel_engine_cs *engine;
> -	enum intel_engine_id id;
> -	intel_wakeref_t wakeref;
> -
> -	if (!i915_modparams.enable_hangcheck)
> -		return;
> -
> -	if (!READ_ONCE(gt->awake))
> -		return;
> -
> -	if (intel_gt_is_wedged(gt))
> -		return;
> -
> -	wakeref = intel_runtime_pm_get_if_in_use(&gt->i915->runtime_pm);
> -	if (!wakeref)
> -		return;
> -
> -	/* As enabling the GPU requires fairly extensive mmio access,
> -	 * periodically arm the mmio checker to see if we are triggering
> -	 * any invalid access.
> -	 */
> -	intel_uncore_arm_unclaimed_mmio_detection(gt->uncore);
> -
> -	for_each_engine(engine, gt->i915, id) {
> -		struct hangcheck hc;
> -
> -		intel_engine_signal_breadcrumbs(engine);
> -
> -		hangcheck_load_sample(engine, &hc);
> -		hangcheck_accumulate_sample(engine, &hc);
> -		hangcheck_store_sample(engine, &hc);
> -
> -		if (hc.stalled) {
> -			hung |= engine->mask;
> -			if (hc.action != ENGINE_DEAD)
> -				stuck |= engine->mask;
> -		}
> -
> -		if (hc.wedged)
> -			wedged |= engine->mask;
> -	}
> -
> -	if (GEM_SHOW_DEBUG() && (hung | stuck)) {
> -		struct drm_printer p = drm_debug_printer("hangcheck");
> -
> -		for_each_engine(engine, gt->i915, id) {
> -			if (intel_engine_is_idle(engine))
> -				continue;
> -
> -			intel_engine_dump(engine, &p, "%s\n", engine-
> >name);
> -		}
> -	}
> -
> -	if (wedged) {
> -		dev_err(gt->i915->drm.dev,
> -			"GPU recovery timed out,"
> -			" cancelling all in-flight rendering.\n");
> -		GEM_TRACE_DUMP();
> -		intel_gt_set_wedged(gt);
> -	}
> -
> -	if (hung)
> -		hangcheck_declare_hang(gt, hung, stuck);
> -
> -	intel_runtime_pm_put(&gt->i915->runtime_pm, wakeref);
> -
> -	/* Reset timer in case GPU hangs without another request being added
> */
> -	intel_gt_queue_hangcheck(gt);
> -}
> -
> -void intel_gt_queue_hangcheck(struct intel_gt *gt)
> -{
> -	unsigned long delay;
> -
> -	if (unlikely(!i915_modparams.enable_hangcheck))
> -		return;
> -
> -	/*
> -	 * Don't continually defer the hangcheck so that it is always run at
> -	 * least once after work has been scheduled on any ring. Otherwise,
> -	 * we will ignore a hung ring if a second ring is kept busy.
> -	 */
> -
> -	delay = round_jiffies_up_relative(DRM_I915_HANGCHECK_JIFFIES);
> -	queue_delayed_work(system_long_wq, &gt->hangcheck.work, delay);
> -}
> -
> -void intel_engine_init_hangcheck(struct intel_engine_cs *engine)
> -{
> -	memset(&engine->hangcheck, 0, sizeof(engine->hangcheck));
> -	engine->hangcheck.action_timestamp = jiffies;
> -}
> -
> -void intel_gt_init_hangcheck(struct intel_gt *gt)
> -{
> -	INIT_DELAYED_WORK(&gt->hangcheck.work, hangcheck_elapsed);
> -}
> -
> -#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
> -#include "selftest_hangcheck.c"
> -#endif
> diff --git a/drivers/gpu/drm/i915/gt/intel_reset.c
> b/drivers/gpu/drm/i915/gt/intel_reset.c
> index 4825c82aefee..febab368d8cf 100644
> --- a/drivers/gpu/drm/i915/gt/intel_reset.c
> +++ b/drivers/gpu/drm/i915/gt/intel_reset.c
> @@ -983,8 +983,6 @@ void intel_gt_reset(struct intel_gt *gt,
>  	if (ret)
>  		goto taint;
> 
> -	intel_gt_queue_hangcheck(gt);
> -
>  finish:
>  	reset_finish(gt, awake);
>  unlock:
> @@ -1310,4 +1308,5 @@ void __intel_fini_wedge(struct intel_wedge_me *w)
> 
>  #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
>  #include "selftest_reset.c"
> +#include "selftest_hangcheck.c"
>  #endif
> diff --git a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c
> b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c
> index 35ab703dac34..959dde864f4a 100644
> --- a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c
> +++ b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c
> @@ -1728,7 +1728,6 @@ int intel_hangcheck_live_selftests(struct
> drm_i915_private *i915)
>  	};
>  	struct intel_gt *gt = &i915->gt;
>  	intel_wakeref_t wakeref;
> -	bool saved_hangcheck;
>  	int err;
> 
>  	if (!intel_has_gpu_reset(gt->i915))
> @@ -1738,12 +1737,9 @@ int intel_hangcheck_live_selftests(struct
> drm_i915_private *i915)
>  		return -EIO; /* we're long past hope of a successful reset */
> 
>  	wakeref = intel_runtime_pm_get(&gt->i915->runtime_pm);
> -	saved_hangcheck =
> fetch_and_zero(&i915_modparams.enable_hangcheck);
> -	drain_delayed_work(&gt->hangcheck.work); /* flush param */
> 
>  	err = intel_gt_live_subtests(tests, gt);
> 
> -	i915_modparams.enable_hangcheck = saved_hangcheck;
>  	intel_runtime_pm_put(&gt->i915->runtime_pm, wakeref);
> 
>  	return err;
> diff --git a/drivers/gpu/drm/i915/i915_debugfs.c
> b/drivers/gpu/drm/i915/i915_debugfs.c
> index e6362245347b..f726c669ffff 100644
> --- a/drivers/gpu/drm/i915/i915_debugfs.c
> +++ b/drivers/gpu/drm/i915/i915_debugfs.c
> @@ -986,92 +986,6 @@ static int i915_frequency_info(struct seq_file *m, void
> *unused)
>  	return ret;
>  }
> 
> -static void i915_instdone_info(struct drm_i915_private *dev_priv,
> -			       struct seq_file *m,
> -			       struct intel_instdone *instdone)
> -{
> -	const struct sseu_dev_info *sseu = &RUNTIME_INFO(dev_priv)->sseu;
> -	int slice;
> -	int subslice;
> -
> -	seq_printf(m, "\t\tINSTDONE: 0x%08x\n",
> -		   instdone->instdone);
> -
> -	if (INTEL_GEN(dev_priv) <= 3)
> -		return;
> -
> -	seq_printf(m, "\t\tSC_INSTDONE: 0x%08x\n",
> -		   instdone->slice_common);
> -
> -	if (INTEL_GEN(dev_priv) <= 6)
> -		return;
> -
> -	for_each_instdone_slice_subslice(dev_priv, sseu, slice, subslice)
> -		seq_printf(m, "\t\tSAMPLER_INSTDONE[%d][%d]: 0x%08x\n",
> -			   slice, subslice, instdone->sampler[slice][subslice]);
> -
> -	for_each_instdone_slice_subslice(dev_priv, sseu, slice, subslice)
> -		seq_printf(m, "\t\tROW_INSTDONE[%d][%d]: 0x%08x\n",
> -			   slice, subslice, instdone->row[slice][subslice]);
> -}
> -
> -static int i915_hangcheck_info(struct seq_file *m, void *unused)
> -{
> -	struct drm_i915_private *i915 = node_to_i915(m->private);
> -	struct intel_gt *gt = &i915->gt;
> -	struct intel_engine_cs *engine;
> -	intel_wakeref_t wakeref;
> -	enum intel_engine_id id;
> -
> -	seq_printf(m, "Reset flags: %lx\n", gt->reset.flags);
> -	if (test_bit(I915_WEDGED, &gt->reset.flags))
> -		seq_puts(m, "\tWedged\n");
> -	if (test_bit(I915_RESET_BACKOFF, &gt->reset.flags))
> -		seq_puts(m, "\tDevice (global) reset in progress\n");
> -
> -	if (!i915_modparams.enable_hangcheck) {
> -		seq_puts(m, "Hangcheck disabled\n");
> -		return 0;
> -	}
> -
> -	if (timer_pending(&gt->hangcheck.work.timer))
> -		seq_printf(m, "Hangcheck active, timer fires in %dms\n",
> -			   jiffies_to_msecs(gt->hangcheck.work.timer.expires -
> -					    jiffies));
> -	else if (delayed_work_pending(&gt->hangcheck.work))
> -		seq_puts(m, "Hangcheck active, work pending\n");
> -	else
> -		seq_puts(m, "Hangcheck inactive\n");
> -
> -	seq_printf(m, "GT active? %s\n", yesno(gt->awake));
> -
> -	with_intel_runtime_pm(&i915->runtime_pm, wakeref) {
> -		for_each_engine(engine, i915, id) {
> -			struct intel_instdone instdone;
> -
> -			seq_printf(m, "%s: %d ms ago\n",
> -				   engine->name,
> -				   jiffies_to_msecs(jiffies -
> -						    engine-
> >hangcheck.action_timestamp));
> -
> -			seq_printf(m, "\tACTHD = 0x%08llx [current
> 0x%08llx]\n",
> -				   (long long)engine->hangcheck.acthd,
> -				   intel_engine_get_active_head(engine));
> -
> -			intel_engine_get_instdone(engine, &instdone);
> -
> -			seq_puts(m, "\tinstdone read =\n");
> -			i915_instdone_info(i915, m, &instdone);
> -
> -			seq_puts(m, "\tinstdone accu =\n");
> -			i915_instdone_info(i915, m,
> -					   &engine->hangcheck.instdone);
> -		}
> -	}
> -
> -	return 0;
> -}
> -
>  static int ironlake_drpc_info(struct seq_file *m)
>  {
>  	struct drm_i915_private *i915 = node_to_i915(m->private);
> @@ -4291,7 +4205,6 @@ static const struct drm_info_list i915_debugfs_list[] =
> {
>  	{"i915_guc_stage_pool", i915_guc_stage_pool, 0},
>  	{"i915_huc_load_status", i915_huc_load_status_info, 0},
>  	{"i915_frequency_info", i915_frequency_info, 0},
> -	{"i915_hangcheck_info", i915_hangcheck_info, 0},
>  	{"i915_drpc_info", i915_drpc_info, 0},
>  	{"i915_ring_freq_table", i915_ring_freq_table, 0},
>  	{"i915_frontbuffer_tracking", i915_frontbuffer_tracking, 0},
> diff --git a/drivers/gpu/drm/i915/i915_drv.c
> b/drivers/gpu/drm/i915/i915_drv.c
> index 1c4576a4a5e9..0bc626db4467 100644
> --- a/drivers/gpu/drm/i915/i915_drv.c
> +++ b/drivers/gpu/drm/i915/i915_drv.c
> @@ -1618,10 +1618,7 @@ void i915_driver_remove(struct drm_i915_private
> *i915)
> 
>  	intel_csr_ucode_fini(i915);
> 
> -	/* Free error state after interrupts are fully disabled. */
> -	cancel_delayed_work_sync(&i915->gt.hangcheck.work);
>  	i915_reset_error_state(i915);
> -
>  	i915_gem_driver_remove(i915);
> 
>  	intel_power_domains_driver_remove(i915);
> diff --git a/drivers/gpu/drm/i915/i915_drv.h
> b/drivers/gpu/drm/i915/i915_drv.h
> index 85767b7cd00b..36eb674f2d66 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -2214,7 +2214,6 @@ extern const struct dev_pm_ops i915_pm_ops;
>  int i915_driver_probe(struct pci_dev *pdev, const struct pci_device_id *ent);
>  void i915_driver_remove(struct drm_i915_private *i915);
> 
> -void intel_engine_init_hangcheck(struct intel_engine_cs *engine);
>  int vlv_force_gfx_clock(struct drm_i915_private *dev_priv, bool on);
> 
>  static inline bool intel_gvt_active(struct drm_i915_private *dev_priv)
> diff --git a/drivers/gpu/drm/i915/i915_getparam.c
> b/drivers/gpu/drm/i915/i915_getparam.c
> index 5d9101376a3d..e6c351080593 100644
> --- a/drivers/gpu/drm/i915/i915_getparam.c
> +++ b/drivers/gpu/drm/i915/i915_getparam.c
> @@ -78,8 +78,7 @@ int i915_getparam_ioctl(struct drm_device *dev, void
> *data,
>  			return -ENODEV;
>  		break;
>  	case I915_PARAM_HAS_GPU_RESET:
> -		value = i915_modparams.enable_hangcheck &&
> -			intel_has_gpu_reset(i915);
> +		value = intel_has_gpu_reset(i915);

Don't understand this tweak. We haven't really changed the essence of hangcheck, just improved it, so why do we change this get_param?

>  		if (value && intel_has_reset_engine(i915))
>  			value = 2;
>  		break;
> diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c
> b/drivers/gpu/drm/i915/i915_gpu_error.c
> index 2dd63a261b00..fcae93a9767e 100644
> --- a/drivers/gpu/drm/i915/i915_gpu_error.c
> +++ b/drivers/gpu/drm/i915/i915_gpu_error.c
> @@ -534,10 +534,6 @@ static void error_print_engine(struct
> drm_i915_error_state_buf *m,
>  	}
>  	err_printf(m, "  ring->head: 0x%08x\n", ee->cpu_ring_head);
>  	err_printf(m, "  ring->tail: 0x%08x\n", ee->cpu_ring_tail);
> -	err_printf(m, "  hangcheck timestamp: %dms (%lu%s)\n",
> -		   jiffies_to_msecs(ee->hangcheck_timestamp - epoch),
> -		   ee->hangcheck_timestamp,
> -		   ee->hangcheck_timestamp == epoch ? "; epoch" : "");
>  	err_printf(m, "  engine reset count: %u\n", ee->reset_count);
> 
>  	for (n = 0; n < ee->num_ports; n++) {
> @@ -676,11 +672,8 @@ static void __err_print_to_sgl(struct
> drm_i915_error_state_buf *m,
>  	ts = ktime_to_timespec64(error->uptime);
>  	err_printf(m, "Uptime: %lld s %ld us\n",
>  		   (s64)ts.tv_sec, ts.tv_nsec / NSEC_PER_USEC);
> -	err_printf(m, "Epoch: %lu jiffies (%u HZ)\n", error->epoch, HZ);
> -	err_printf(m, "Capture: %lu jiffies; %d ms ago, %d ms after epoch\n",
> -		   error->capture,
> -		   jiffies_to_msecs(jiffies - error->capture),
> -		   jiffies_to_msecs(error->capture - error->epoch));
> +	err_printf(m, "Capture: %lu jiffies; %d ms ago\n",
> +		   error->capture, jiffies_to_msecs(jiffies - error->capture));
> 
>  	for (ee = error->engine; ee; ee = ee->next)
>  		err_printf(m, "Active process (on ring %s): %s [%d]\n",
> @@ -736,7 +729,7 @@ static void __err_print_to_sgl(struct
> drm_i915_error_state_buf *m,
>  		err_printf(m, "ERR_INT: 0x%08x\n", error->err_int);
> 
>  	for (ee = error->engine; ee; ee = ee->next)
> -		error_print_engine(m, ee, error->epoch);
> +		error_print_engine(m, ee, error->capture);
> 
>  	for (ee = error->engine; ee; ee = ee->next) {
>  		const struct drm_i915_error_object *obj;
> @@ -764,7 +757,7 @@ static void __err_print_to_sgl(struct
> drm_i915_error_state_buf *m,
>  			for (j = 0; j < ee->num_requests; j++)
>  				error_print_request(m, " ",
>  						    &ee->requests[j],
> -						    error->epoch);
> +						    error->capture);
>  		}
> 
>  		print_error_obj(m, ee->engine, "ringbuffer", ee->ringbuffer);
> @@ -1137,8 +1130,6 @@ static void error_record_engine_registers(struct
> i915_gpu_state *error,
>  	}
> 
>  	ee->idle = intel_engine_is_idle(engine);
> -	if (!ee->idle)
> -		ee->hangcheck_timestamp = engine-
> >hangcheck.action_timestamp;
>  	ee->reset_count = i915_reset_engine_count(&dev_priv->gpu_error,
>  						  engine);
> 
> @@ -1648,20 +1639,6 @@ static void capture_params(struct i915_gpu_state
> *error)
>  	i915_params_copy(&error->params, &i915_modparams);
>  }
> 
> -static unsigned long capture_find_epoch(const struct i915_gpu_state *error)
> -{
> -	const struct drm_i915_error_engine *ee;
> -	unsigned long epoch = error->capture;
> -
> -	for (ee = error->engine; ee; ee = ee->next) {
> -		if (ee->hangcheck_timestamp &&
> -		    time_before(ee->hangcheck_timestamp, epoch))
> -			epoch = ee->hangcheck_timestamp;
> -	}
> -
> -	return epoch;
> -}
> -
>  static void capture_finish(struct i915_gpu_state *error)
>  {
>  	struct i915_ggtt *ggtt = &error->i915->ggtt;
> @@ -1713,8 +1690,6 @@ i915_capture_gpu_state(struct drm_i915_private
> *i915)
>  	error->overlay = intel_overlay_capture_error_state(i915);
>  	error->display = intel_display_capture_error_state(i915);
> 
> -	error->epoch = capture_find_epoch(error);
> -
>  	capture_finish(error);
>  	compress_fini(&compress);
> 
> diff --git a/drivers/gpu/drm/i915/i915_gpu_error.h
> b/drivers/gpu/drm/i915/i915_gpu_error.h
> index df9f57766626..0ed061ee3378 100644
> --- a/drivers/gpu/drm/i915/i915_gpu_error.h
> +++ b/drivers/gpu/drm/i915/i915_gpu_error.h
> @@ -34,7 +34,6 @@ struct i915_gpu_state {
>  	ktime_t boottime;
>  	ktime_t uptime;
>  	unsigned long capture;
> -	unsigned long epoch;
> 
>  	struct drm_i915_private *i915;
> 
> @@ -85,7 +84,6 @@ struct i915_gpu_state {
> 
>  		/* Software tracked state */
>  		bool idle;
> -		unsigned long hangcheck_timestamp;
>  		int num_requests;
>  		u32 reset_count;
> 
> diff --git a/drivers/gpu/drm/i915/i915_params.c
> b/drivers/gpu/drm/i915/i915_params.c
> index 296452f9efe4..f48b2490ea43 100644
> --- a/drivers/gpu/drm/i915/i915_params.c
> +++ b/drivers/gpu/drm/i915/i915_params.c
> @@ -78,9 +78,9 @@ i915_param_named(error_capture, bool, 0600,
>  #endif
> 
>  i915_param_named_unsafe(enable_hangcheck, bool, 0600,
> -	"Periodically check GPU activity for detecting hangs. "
> -	"WARNING: Disabling this can cause system wide hangs. "
> -	"(default: true)");
> +       "Periodically check GPU activity for detecting hangs. "
> +       "WARNING: Disabling this can cause system wide hangs. "
> +       "(default: true)");
> 
>  i915_param_named_unsafe(enable_psr, int, 0600,
>  	"Enable PSR "
> diff --git a/drivers/gpu/drm/i915/i915_priolist_types.h
> b/drivers/gpu/drm/i915/i915_priolist_types.h
> index ae8bb3cb627e..732aad148881 100644
> --- a/drivers/gpu/drm/i915/i915_priolist_types.h
> +++ b/drivers/gpu/drm/i915/i915_priolist_types.h
> @@ -16,6 +16,12 @@ enum {
>  	I915_PRIORITY_MIN = I915_CONTEXT_MIN_USER_PRIORITY - 1,
>  	I915_PRIORITY_NORMAL = I915_CONTEXT_DEFAULT_PRIORITY,
>  	I915_PRIORITY_MAX = I915_CONTEXT_MAX_USER_PRIORITY + 1,
> +
> +	/* A preemptive pulse used to monitor the health of each engine */
> +	I915_PRIORITY_HEARTBEAT,
> +
> +	/* Interactive workload, scheduled for immediate pageflipping */
> +	I915_PRIORITY_DISPLAY,
>  };
> 
>  #define I915_USER_PRIORITY_SHIFT 2
> --
> 2.23.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [PATCH 27/28] drm/i915: Cancel non-persistent contexts on close
  2019-08-26 13:39   ` Bloomfield, Jon
@ 2019-08-26 16:50     ` Chris Wilson
  0 siblings, 0 replies; 43+ messages in thread
From: Chris Wilson @ 2019-08-26 16:50 UTC (permalink / raw)
  To: Bloomfield, Jon, intel-gfx

Quoting Bloomfield, Jon (2019-08-26 14:39:55)
> > -----Original Message-----
> > From: Chris Wilson <chris@chris-wilson.co.uk>
> > +     /*
> > +      * Send a "high priority pulse" down the engine to cause the
> > +      * current request to be momentarily preempted. (If it fails to
> > +      * be preempted, it will be reset). As we have marked our context
> > +      * as banned, any incomplete request, including any running, will
> > +      * be skipped following the preemption.
> > +      */
> > +     reset = 0;
> > +     for_each_engine_masked(engine, gt->i915, active, tmp)
> > +             if (intel_engine_pulse(engine))
> > +                     reset |= engine->mask;
> > +
> > +     /*
> > +      * If we are unable to send a preemptive pulse to bump
> > +      * the context from the GPU, we have to resort to a full
> > +      * reset. We hope the collateral damage is worth it.
> > +      */
> > +     if (reset)
> > +             intel_gt_handle_error(gt, reset, 0,
> > +                                   "context closure in %s", ctx->name);
> 
> This seems inconsistent with the policy not to allow non-persistence without pre-emption, since if we can't pre-empt we nuke anyway.

So, the only way to get here is if i915.enable_hangcheck=0 on older
hardware; the user has forced ourselves into a situation we do not like.

Having recognised that i915.enable_hangcheck=0 is a trivial way to
accidentally dos (as opposed to the deliberate dos that is the expected
behaviour), this is our mitigation.
 
> But this feels unsafe to me - How does intel_gt_handle_error prevent us from nuking a following context, instead of the target? Ideally we would:
>         1) Unqueue any context currently behind the target context
>         2) Reset engine only if the target context is running (it could complete during 1)
>         3) Requeue other contexts
> 
> If the above is viable (?) we don't even need to attempt pre-emption.

That's exactly what the safe strategy attempts before we fallback
to the reset path. (With a bit of handwaving over gen8, it could do with
a bit of refinement as it has kernel preemption, but not user
preemption.)

> > +static int
> > +set_persistence(struct i915_gem_context *ctx,
> > +             const struct drm_i915_gem_context_param *args)
> > +{
> > +     if (args->size)
> > +             return -EINVAL;
> > +
> > +     if (args->value) {
> > +             i915_gem_context_set_persistence(ctx);
> > +             return 0;
> > +     }
> > +
> > +     /* To cancel a context we use "preempt-to-idle" */
> > +     if (!(ctx->i915->caps.scheduler &
> 
> Why do we need to give up on older devices? If you fail to preempt you reset the context anyway, so can't we just use the reset fallback path?

The fallback path is to reset the entire gpu with no regard as to what
is actually running. So we only allow the context parameter if we can
safely kill the context on closure.
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [PATCH 28/28] drm/i915: Replace hangcheck by heartbeats
  2019-08-26 14:08   ` Bloomfield, Jon
@ 2019-08-26 16:56     ` Chris Wilson
  2019-08-26 17:51       ` Bloomfield, Jon
  0 siblings, 1 reply; 43+ messages in thread
From: Chris Wilson @ 2019-08-26 16:56 UTC (permalink / raw)
  To: Bloomfield, Jon, intel-gfx

Quoting Bloomfield, Jon (2019-08-26 15:08:02)
> > -----Original Message-----
> > From: Chris Wilson <chris@chris-wilson.co.uk>
> > + * While the engine is active, we send a periodic pulse along the entire
> 
> along the entire what?

s/entire/engine/

> > + * to check on its health and to flush any idle-barriers. If that request
> > + * is stuck, and we fail to preempt it, we declare the engine hung and
> > + * issue a reset -- in the hope that restores progress.
> > + */
> > +
> > +static long delay(void)
> 
> Probably NOT the best function name in the world, ever. 'delay' could equally be a verb, so it's not self-descriptive.

Is it a noun or verb in this case? Or just shorthand.

> > +{
> > +     const long t =
> > msecs_to_jiffies(CONFIG_DRM_I915_HEARTBEAT_INTERVAL);
> > +
> > +     return round_jiffies_up_relative(t);
> > +}

> > +static void heartbeat(struct work_struct *wrk)
> > +{
> > +     struct i915_sched_attr attr = {
> > +             .priority = I915_USER_PRIORITY(I915_PRIORITY_MIN),
> > +     };
> > +     struct intel_engine_cs *engine =
> > +             container_of(wrk, typeof(*engine), heartbeat.work);
> > +     struct intel_context *ce = engine->kernel_context;
> > +     struct i915_request *rq;
> > +
> > +     if (!intel_engine_pm_get_if_awake(engine))
> > +             return;
> > +
> > +     rq = engine->last_heartbeat;
> > +     if (rq && i915_request_completed(rq)) {
> > +             i915_request_put(rq);
> > +             engine->last_heartbeat = NULL;
> > +     }
> > +
> > +     if (intel_gt_is_wedged(engine->gt))
> > +             goto out;
> > +
> > +     if (engine->last_heartbeat) {
> > +             if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) {
> > +                     struct drm_printer p = drm_debug_printer(__func__);
> > +
> > +                     intel_engine_dump(engine, &p,
> > +                                       "%s heartbeat not ticking\n",
> > +                                       engine->name);
> > +             }
> > +
> > +             if (engine->schedule &&
> > +                 rq->sched.attr.priority < I915_PRIORITY_BARRIER) {
> > +                     attr.priority =
> > +
> >       I915_USER_PRIORITY(I915_PRIORITY_HEARTBEAT);
> > +                     if (rq->sched.attr.priority >= attr.priority)
> > +                             attr.priority = I915_PRIORITY_BARRIER;
> > +
> > +                     local_bh_disable();
> > +                     engine->schedule(rq, &attr);
> > +                     local_bh_enable();
> > +             } else {
> > +                     intel_gt_handle_error(engine->gt, engine->mask,
> > +                                           I915_ERROR_CAPTURE,
> > +                                           "stopped heartbeat on %s",
> > +                                           engine->name);
> > +             }
> > +             goto out;
> > +     }
> > +
> > +     if (engine->wakeref_serial == engine->serial)
> > +             goto out;
> > +
> > +     mutex_lock(&ce->timeline->mutex);
> > +
> > +     intel_context_enter(ce);
> > +     rq = __i915_request_create(ce, GFP_NOWAIT | __GFP_NOWARN);
> > +     intel_context_exit(ce);
> > +     if (IS_ERR(rq))
> > +             goto unlock;
> > +
> > +     idle_pulse(engine, rq);
> > +     if (i915_modparams.enable_hangcheck)
> > +             engine->last_heartbeat = i915_request_get(rq);
> > +
> > +     __i915_request_commit(rq);
> > +     __i915_request_queue(rq, &attr);
> > +
> > +unlock:
> > +     mutex_unlock(&ce->timeline->mutex);
> > +out:
> > +     schedule_delayed_work(&engine->heartbeat, delay());
> 
> Isn't engine->heartbeat now NULL in some cases?

engine->heartbeat, the worker
vs
engine->last_heartbeat

Maybe,

struct intel_engine_heartbeat {
	work;
	systole;
};


> > diff --git a/drivers/gpu/drm/i915/i915_getparam.c
> > b/drivers/gpu/drm/i915/i915_getparam.c
> > index 5d9101376a3d..e6c351080593 100644
> > --- a/drivers/gpu/drm/i915/i915_getparam.c
> > +++ b/drivers/gpu/drm/i915/i915_getparam.c
> > @@ -78,8 +78,7 @@ int i915_getparam_ioctl(struct drm_device *dev, void
> > *data,
> >                       return -ENODEV;
> >               break;
> >       case I915_PARAM_HAS_GPU_RESET:
> > -             value = i915_modparams.enable_hangcheck &&
> > -                     intel_has_gpu_reset(i915);
> > +             value = intel_has_gpu_reset(i915);
> 
> Don't understand this tweak. We haven't really changed the essence of hangcheck, just improved it, so why do we change this get_param?

I deleted the modparam in earlier patches. But anticipated you would
object...
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [PATCH 28/28] drm/i915: Replace hangcheck by heartbeats
  2019-08-26 16:56     ` Chris Wilson
@ 2019-08-26 17:51       ` Bloomfield, Jon
  2019-08-26 17:57         ` Chris Wilson
  0 siblings, 1 reply; 43+ messages in thread
From: Bloomfield, Jon @ 2019-08-26 17:51 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx

> -----Original Message-----
> From: Chris Wilson <chris@chris-wilson.co.uk>
> Sent: Monday, August 26, 2019 9:57 AM
> To: Bloomfield, Jon <jon.bloomfield@intel.com>; intel-
> gfx@lists.freedesktop.org
> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>; Ursulin, Tvrtko
> <tvrtko.ursulin@intel.com>
> Subject: RE: [PATCH 28/28] drm/i915: Replace hangcheck by heartbeats
> 
> > Isn't engine->heartbeat now NULL in some cases?
> 
> engine->heartbeat, the worker
> vs
> engine->last_heartbeat
> 
Doh!

> Maybe,
> 
> struct intel_engine_heartbeat {
> 	work;
> 	systole;
> };
> 
> 
> > > diff --git a/drivers/gpu/drm/i915/i915_getparam.c
> > > b/drivers/gpu/drm/i915/i915_getparam.c
> > > index 5d9101376a3d..e6c351080593 100644
> > > --- a/drivers/gpu/drm/i915/i915_getparam.c
> > > +++ b/drivers/gpu/drm/i915/i915_getparam.c
> > > @@ -78,8 +78,7 @@ int i915_getparam_ioctl(struct drm_device *dev, void
> > > *data,
> > >                       return -ENODEV;
> > >               break;
> > >       case I915_PARAM_HAS_GPU_RESET:
> > > -             value = i915_modparams.enable_hangcheck &&
> > > -                     intel_has_gpu_reset(i915);
> > > +             value = intel_has_gpu_reset(i915);
> >
> > Don't understand this tweak. We haven't really changed the essence of
> hangcheck, just improved it, so why do we change this get_param?
> 
> I deleted the modparam in earlier patches. But anticipated you would
> object...

Ok, I see. But then shouldn't we just be checking the new param for a non-zero timeout? That would then be equivalent.
Or, it seems fair to conclude that this never made sense, but then it really ought to be a separate patch to remove the association between HAS_GPU_RESET and hangcheck.

> -Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [PATCH 28/28] drm/i915: Replace hangcheck by heartbeats
  2019-08-26 17:51       ` Bloomfield, Jon
@ 2019-08-26 17:57         ` Chris Wilson
  0 siblings, 0 replies; 43+ messages in thread
From: Chris Wilson @ 2019-08-26 17:57 UTC (permalink / raw)
  To: Bloomfield, Jon, intel-gfx

Quoting Bloomfield, Jon (2019-08-26 18:51:32)
> > -----Original Message-----
> > From: Chris Wilson <chris@chris-wilson.co.uk>
> > > > diff --git a/drivers/gpu/drm/i915/i915_getparam.c
> > > > b/drivers/gpu/drm/i915/i915_getparam.c
> > > > index 5d9101376a3d..e6c351080593 100644
> > > > --- a/drivers/gpu/drm/i915/i915_getparam.c
> > > > +++ b/drivers/gpu/drm/i915/i915_getparam.c
> > > > @@ -78,8 +78,7 @@ int i915_getparam_ioctl(struct drm_device *dev, void
> > > > *data,
> > > >                       return -ENODEV;
> > > >               break;
> > > >       case I915_PARAM_HAS_GPU_RESET:
> > > > -             value = i915_modparams.enable_hangcheck &&
> > > > -                     intel_has_gpu_reset(i915);
> > > > +             value = intel_has_gpu_reset(i915);
> > >
> > > Don't understand this tweak. We haven't really changed the essence of
> > hangcheck, just improved it, so why do we change this get_param?
> > 
> > I deleted the modparam in earlier patches. But anticipated you would
> > object...
> 
> Ok, I see. But then shouldn't we just be checking the new param for a non-zero timeout? That would then be equivalent.
> Or, it seems fair to conclude that this never made sense, but then it really ought to be a separate patch to remove the association between HAS_GPU_RESET and hangcheck.

The chunk is gone. Questions for another day. :-p
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [PATCH 10/28] drm/mm: Pack allocated/scanned boolean into a bitfield
  2019-08-26  7:21 ` [PATCH 10/28] drm/mm: Pack allocated/scanned boolean into a bitfield Chris Wilson
@ 2019-08-27  7:12   ` kbuild test robot
  0 siblings, 0 replies; 43+ messages in thread
From: kbuild test robot @ 2019-08-27  7:12 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx, kbuild-all

[-- Attachment #1: Type: text/plain, Size: 3551 bytes --]

Hi Chris,

Thank you for the patch! Yet something to improve:

[auto build test ERROR on drm-intel/for-linux-next]
[cannot apply to v5.3-rc6 next-20190826]
[if your patch is applied to the wrong git tree, please drop us a note to help improve the system]

url:    https://github.com/0day-ci/linux/commits/Chris-Wilson/drm-i915-selftests-Add-the-usual-batch-vma-managements-to-st_workarounds/20190827-004721
base:   git://anongit.freedesktop.org/drm-intel for-linux-next
config: x86_64-allyesconfig (attached as .config)
compiler: gcc-7 (Debian 7.4.0-10) 7.4.0
reproduce:
        # save the attached .config to linux build tree
        make ARCH=x86_64 

If you fix the issue, kindly add following tag
Reported-by: kbuild test robot <lkp@intel.com>

All errors (new ones prefixed by >>):

   drivers/gpu/drm/vc4/vc4_crtc.c: In function 'vc4_crtc_destroy_state':
>> drivers/gpu/drm/vc4/vc4_crtc.c:997:19: error: 'struct drm_mm_node' has no member named 'allocated'
     if (vc4_state->mm.allocated) {
                      ^
--
   drivers/gpu/drm/vc4/vc4_plane.c: In function 'vc4_plane_destroy_state':
>> drivers/gpu/drm/vc4/vc4_plane.c:181:20: error: 'struct drm_mm_node' has no member named 'allocated'
     if (vc4_state->lbm.allocated) {
                       ^
   drivers/gpu/drm/vc4/vc4_plane.c: In function 'vc4_plane_allocate_lbm':
   drivers/gpu/drm/vc4/vc4_plane.c:560:21: error: 'struct drm_mm_node' has no member named 'allocated'
     if (!vc4_state->lbm.allocated) {
                        ^
--
   drivers/gpu/drm/vc4/vc4_hvs.c: In function 'vc4_hvs_unbind':
>> drivers/gpu/drm/vc4/vc4_hvs.c:318:41: error: 'struct drm_mm_node' has no member named 'allocated'
     if (vc4->hvs->mitchell_netravali_filter.allocated)
                                            ^

vim +997 drivers/gpu/drm/vc4/vc4_crtc.c

d8dbf44f13b911 Eric Anholt 2015-12-28   990  
d8dbf44f13b911 Eric Anholt 2015-12-28   991  static void vc4_crtc_destroy_state(struct drm_crtc *crtc,
d8dbf44f13b911 Eric Anholt 2015-12-28   992  				   struct drm_crtc_state *state)
d8dbf44f13b911 Eric Anholt 2015-12-28   993  {
d8dbf44f13b911 Eric Anholt 2015-12-28   994  	struct vc4_dev *vc4 = to_vc4_dev(crtc->dev);
d8dbf44f13b911 Eric Anholt 2015-12-28   995  	struct vc4_crtc_state *vc4_state = to_vc4_crtc_state(state);
d8dbf44f13b911 Eric Anholt 2015-12-28   996  
d8dbf44f13b911 Eric Anholt 2015-12-28  @997  	if (vc4_state->mm.allocated) {
d8dbf44f13b911 Eric Anholt 2015-12-28   998  		unsigned long flags;
d8dbf44f13b911 Eric Anholt 2015-12-28   999  
d8dbf44f13b911 Eric Anholt 2015-12-28  1000  		spin_lock_irqsave(&vc4->hvs->mm_lock, flags);
d8dbf44f13b911 Eric Anholt 2015-12-28  1001  		drm_mm_remove_node(&vc4_state->mm);
d8dbf44f13b911 Eric Anholt 2015-12-28  1002  		spin_unlock_irqrestore(&vc4->hvs->mm_lock, flags);
d8dbf44f13b911 Eric Anholt 2015-12-28  1003  
d8dbf44f13b911 Eric Anholt 2015-12-28  1004  	}
d8dbf44f13b911 Eric Anholt 2015-12-28  1005  
7622b255436655 Eric Anholt 2016-10-10  1006  	drm_atomic_helper_crtc_destroy_state(crtc, state);
d8dbf44f13b911 Eric Anholt 2015-12-28  1007  }
d8dbf44f13b911 Eric Anholt 2015-12-28  1008  

:::::: The code at line 997 was first introduced by commit
:::::: d8dbf44f13b91185c618219d912b246817a8d132 drm/vc4: Make the CRTCs cooperate on allocating display lists.

:::::: TO: Eric Anholt <eric@anholt.net>
:::::: CC: Eric Anholt <eric@anholt.net>

---
0-DAY kernel test infrastructure                Open Source Technology Center
https://lists.01.org/pipermail/kbuild-all                   Intel Corporation

[-- Attachment #2: .config.gz --]
[-- Type: application/gzip, Size: 69533 bytes --]

[-- Attachment #3: Type: text/plain, Size: 159 bytes --]

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [PATCH 01/28] drm/i915/selftests: Add the usual batch vma managements to st_workarounds
  2019-08-26  7:21 [PATCH 01/28] drm/i915/selftests: Add the usual batch vma managements to st_workarounds Chris Wilson
                   ` (28 preceding siblings ...)
  2019-08-26  7:57 ` ✗ Fi.CI.BAT: failure " Patchwork
@ 2019-08-27  9:19 ` Matthew Auld
  29 siblings, 0 replies; 43+ messages in thread
From: Matthew Auld @ 2019-08-27  9:19 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx

On 26/08/2019 08:21, Chris Wilson wrote:
> To properly handle asynchronous migration of batch objects, we need to
> couple the fences on the incoming batch into the request and should not
> assume that they always start idle.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Matthew Auld <matthew.auld@intel.com>
Reviewed-by: Matthew Auld <matthew.auld@intel.com>

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [PATCH 02/28] drm/i915/selftests: Remove accidental serialization between gpu_fill
  2019-08-26  7:21 ` [PATCH 02/28] drm/i915/selftests: Remove accidental serialization between gpu_fill Chris Wilson
@ 2019-08-27 15:54   ` Matthew Auld
  0 siblings, 0 replies; 43+ messages in thread
From: Matthew Auld @ 2019-08-27 15:54 UTC (permalink / raw)
  To: Chris Wilson; +Cc: Intel Graphics Development

On Mon, 26 Aug 2019 at 08:24, Chris Wilson <chris@chris-wilson.co.uk> wrote:
>
> Upon object creation for live_gem_contexts, we fill the object with
> known scratch and flush it out of the CPU cache. Before performing the
> GPU fill, we don't need to flush it again and so avoid serialising with
> previous fills.
>
> However, we do need some throttling on the internal interfaces if we do
> not want to run out of memory!
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Matthew Auld <matthew.auld@intel.com>
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [PATCH 03/28] drm/i915/selftests: Try to recycle context allocations
  2019-08-26  7:21 ` [PATCH 03/28] drm/i915/selftests: Try to recycle context allocations Chris Wilson
@ 2019-08-27 16:11   ` Matthew Auld
  0 siblings, 0 replies; 43+ messages in thread
From: Matthew Auld @ 2019-08-27 16:11 UTC (permalink / raw)
  To: Chris Wilson; +Cc: Intel Graphics Development

On Mon, 26 Aug 2019 at 08:23, Chris Wilson <chris@chris-wilson.co.uk> wrote:
>
> igt_ctx_exec allocates a new context for each iteration, keeping them
> all allocated until the end. Instead, release the local ctx reference at
> the end of each iteration, allowing ourselves to reap those if under
> mempressure.
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Matthew Auld <matthew.auld@intel.com>
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [PATCH 06/28] drm/i915: Protect our local workers against I915_FENCE_TIMEOUT
  2019-08-26  7:21 ` [PATCH 06/28] drm/i915: Protect our local workers against I915_FENCE_TIMEOUT Chris Wilson
@ 2019-08-28 17:01   ` Matthew Auld
  0 siblings, 0 replies; 43+ messages in thread
From: Matthew Auld @ 2019-08-28 17:01 UTC (permalink / raw)
  To: Chris Wilson; +Cc: Intel Graphics Development

On Mon, 26 Aug 2019 at 08:22, Chris Wilson <chris@chris-wilson.co.uk> wrote:
>
> Trust our own workers to not cause unnecessary delays and disable the
> automatic timeout on their asynchronous fence waits. (Along the same
> lines that we trust our own requests to complete eventually, if
> necessary by force.)
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Matthew Auld <matthew.auld@intel.com>
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 43+ messages in thread

end of thread, other threads:[~2019-08-28 17:01 UTC | newest]

Thread overview: 43+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-08-26  7:21 [PATCH 01/28] drm/i915/selftests: Add the usual batch vma managements to st_workarounds Chris Wilson
2019-08-26  7:21 ` [PATCH 02/28] drm/i915/selftests: Remove accidental serialization between gpu_fill Chris Wilson
2019-08-27 15:54   ` Matthew Auld
2019-08-26  7:21 ` [PATCH 03/28] drm/i915/selftests: Try to recycle context allocations Chris Wilson
2019-08-27 16:11   ` Matthew Auld
2019-08-26  7:21 ` [PATCH 04/28] drm/i915/gtt: Downgrade Baytrail back to aliasing-ppgtt Chris Wilson
2019-08-26  7:21 ` [PATCH 05/28] drm/i915/gtt: Downgrade Cherryview " Chris Wilson
2019-08-26  7:21 ` [PATCH 06/28] drm/i915: Protect our local workers against I915_FENCE_TIMEOUT Chris Wilson
2019-08-28 17:01   ` Matthew Auld
2019-08-26  7:21 ` [PATCH 07/28] drm/i915: Force PD reload on any PD update Chris Wilson
2019-08-26  7:21 ` [PATCH 08/28] drm/i915: Replace obj->pin_global with obj->frontbuffer Chris Wilson
2019-08-26  7:21 ` [PATCH 09/28] dma-fence: Serialise signal enabling (dma_fence_enable_sw_signaling) Chris Wilson
2019-08-26  7:21 ` [PATCH 10/28] drm/mm: Pack allocated/scanned boolean into a bitfield Chris Wilson
2019-08-27  7:12   ` kbuild test robot
2019-08-26  7:21 ` [PATCH 11/28] drm/i915: Make shrink/unshrink be atomic Chris Wilson
2019-08-26  7:21 ` [PATCH 12/28] drm/i915: Only track bound elements of the GTT Chris Wilson
2019-08-26  7:21 ` [PATCH 13/28] drm/i915: Make i915_vma.flags atomic_t for mutex reduction Chris Wilson
2019-08-26  7:21 ` [PATCH 14/28] drm/i915: Mark up address spaces that may need to allocate Chris Wilson
2019-08-26  7:21 ` [PATCH 15/28] drm/i915: Pull i915_vma_pin under the vm->mutex Chris Wilson
2019-08-26  7:21 ` [PATCH 16/28] drm/i915: Push the i915_active.retire into a worker Chris Wilson
2019-08-26  7:21 ` [PATCH 17/28] drm/i915: Coordinate i915_active with its own mutex Chris Wilson
2019-08-26  7:21 ` [PATCH 18/28] drm/i915: Move idle barrier cleanup into engine-pm Chris Wilson
2019-08-26  7:21 ` [PATCH 19/28] drm/i915: Drop struct_mutex from around i915_retire_requests() Chris Wilson
2019-08-26  7:21 ` [PATCH 20/28] drm/i915: Merge wait_for_timelines with retire_request Chris Wilson
2019-08-26  7:21 ` [PATCH 21/28] drm/i915: Serialise the fill BLT with the vma pinning Chris Wilson
2019-08-26  7:21 ` [PATCH 22/28] drm/i915/execlists: Always request completion before marking an error Chris Wilson
2019-08-26  7:21 ` [PATCH 23/28] drm/i915: Only enqueue already completed requests Chris Wilson
2019-08-26  7:21 ` [PATCH 24/28] drm/i915/execlists: Force preemption Chris Wilson
2019-08-26  7:21 ` [PATCH 25/28] drm/i915: Mark up "sentinel" requests Chris Wilson
2019-08-26  7:21 ` [PATCH 26/28] drm/i915/execlists: Cancel banned contexts on schedule-out Chris Wilson
2019-08-26  7:21 ` [PATCH 27/28] drm/i915: Cancel non-persistent contexts on close Chris Wilson
2019-08-26 13:39   ` Bloomfield, Jon
2019-08-26 16:50     ` Chris Wilson
2019-08-26  7:21 ` [PATCH 28/28] drm/i915: Replace hangcheck by heartbeats Chris Wilson
2019-08-26 14:08   ` Bloomfield, Jon
2019-08-26 16:56     ` Chris Wilson
2019-08-26 17:51       ` Bloomfield, Jon
2019-08-26 17:57         ` Chris Wilson
2019-08-26  7:34 ` ✗ Fi.CI.CHECKPATCH: warning for series starting with [01/28] drm/i915/selftests: Add the usual batch vma managements to st_workarounds Patchwork
2019-08-26  7:57 ` ✗ Fi.CI.BAT: failure " Patchwork
2019-08-26  9:35   ` Chris Wilson
2019-08-26 13:33   ` Chris Wilson
2019-08-27  9:19 ` [PATCH 01/28] " Matthew Auld

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.