All of lore.kernel.org
 help / color / mirror / Atom feed
* [Intel-gfx] [PATCH 01/14] drm/i915/gt: Push context state allocation earlier
@ 2020-01-09  8:58 Chris Wilson
  2020-01-09  8:58 ` [Intel-gfx] [PATCH 02/14] drm/i915/gt: Pull context activation into central intel_context_pin() Chris Wilson
                   ` (13 more replies)
  0 siblings, 14 replies; 30+ messages in thread
From: Chris Wilson @ 2020-01-09  8:58 UTC (permalink / raw)
  To: intel-gfx

Allow for knowledgeable users to preallocate the context state, and to
separate the allocation step from the pinning step during
intel_context_pin()

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Reviewed-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
---
 drivers/gpu/drm/i915/gt/intel_context.c | 34 +++++++++++++++++++------
 drivers/gpu/drm/i915/gt/intel_context.h |  2 ++
 2 files changed, 28 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_context.c b/drivers/gpu/drm/i915/gt/intel_context.c
index 5ea8305fd633..bc5ae18dbe3e 100644
--- a/drivers/gpu/drm/i915/gt/intel_context.c
+++ b/drivers/gpu/drm/i915/gt/intel_context.c
@@ -43,24 +43,42 @@ intel_context_create(struct intel_engine_cs *engine)
 	return ce;
 }
 
+int intel_context_alloc_state(struct intel_context *ce)
+{
+	int err = 0;
+
+	if (mutex_lock_interruptible(&ce->pin_mutex))
+		return -EINTR;
+
+	if (!test_bit(CONTEXT_ALLOC_BIT, &ce->flags)) {
+		err = ce->ops->alloc(ce);
+		if (unlikely(err))
+			goto unlock;
+
+		set_bit(CONTEXT_ALLOC_BIT, &ce->flags);
+	}
+
+unlock:
+	mutex_unlock(&ce->pin_mutex);
+	return err;
+}
+
 int __intel_context_do_pin(struct intel_context *ce)
 {
 	int err;
 
+	if (unlikely(!test_bit(CONTEXT_ALLOC_BIT, &ce->flags))) {
+		err = intel_context_alloc_state(ce);
+		if (err)
+			return err;
+	}
+
 	if (mutex_lock_interruptible(&ce->pin_mutex))
 		return -EINTR;
 
 	if (likely(!atomic_read(&ce->pin_count))) {
 		intel_wakeref_t wakeref;
 
-		if (unlikely(!test_bit(CONTEXT_ALLOC_BIT, &ce->flags))) {
-			err = ce->ops->alloc(ce);
-			if (unlikely(err))
-				goto err;
-
-			__set_bit(CONTEXT_ALLOC_BIT, &ce->flags);
-		}
-
 		err = 0;
 		with_intel_runtime_pm(ce->engine->uncore->rpm, wakeref)
 			err = ce->ops->pin(ce);
diff --git a/drivers/gpu/drm/i915/gt/intel_context.h b/drivers/gpu/drm/i915/gt/intel_context.h
index 0f5ae4ff3b10..673f5fb2967a 100644
--- a/drivers/gpu/drm/i915/gt/intel_context.h
+++ b/drivers/gpu/drm/i915/gt/intel_context.h
@@ -31,6 +31,8 @@ void intel_context_fini(struct intel_context *ce);
 struct intel_context *
 intel_context_create(struct intel_engine_cs *engine);
 
+int intel_context_alloc_state(struct intel_context *ce);
+
 void intel_context_free(struct intel_context *ce);
 
 /**
-- 
2.25.0.rc1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 30+ messages in thread

* [Intel-gfx] [PATCH 02/14] drm/i915/gt: Pull context activation into central intel_context_pin()
  2020-01-09  8:58 [Intel-gfx] [PATCH 01/14] drm/i915/gt: Push context state allocation earlier Chris Wilson
@ 2020-01-09  8:58 ` Chris Wilson
  2020-01-09  8:58 ` [Intel-gfx] [PATCH 03/14] drm/i915/gt: runtime-pm is no longer required for ce->ops->pin() Chris Wilson
                   ` (12 subsequent siblings)
  13 siblings, 0 replies; 30+ messages in thread
From: Chris Wilson @ 2020-01-09  8:58 UTC (permalink / raw)
  To: intel-gfx

While this is encroaching on midlayer territory, having already made the
state allocation a previous step in pinning, we can now pull the common
intel_context_active_acquire() into intel_context_pin() itself. This is
a prelude to make the activation a separate step inside pinning, outside
of the ce->pin_mutex

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Reviewed-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
---
 drivers/gpu/drm/i915/gt/intel_context.c       | 67 ++++++++++---------
 drivers/gpu/drm/i915/gt/intel_context.h       |  3 -
 drivers/gpu/drm/i915/gt/intel_lrc.c           | 16 +----
 .../gpu/drm/i915/gt/intel_ring_submission.c   | 16 +----
 drivers/gpu/drm/i915/gt/mock_engine.c         |  2 +-
 5 files changed, 40 insertions(+), 64 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_context.c b/drivers/gpu/drm/i915/gt/intel_context.c
index bc5ae18dbe3e..cac80d87b2bb 100644
--- a/drivers/gpu/drm/i915/gt/intel_context.c
+++ b/drivers/gpu/drm/i915/gt/intel_context.c
@@ -63,6 +63,34 @@ int intel_context_alloc_state(struct intel_context *ce)
 	return err;
 }
 
+static int intel_context_active_acquire(struct intel_context *ce)
+{
+	int err;
+
+	err = i915_active_acquire(&ce->active);
+	if (err)
+		return err;
+
+	/* Preallocate tracking nodes */
+	if (!intel_context_is_barrier(ce)) {
+		err = i915_active_acquire_preallocate_barrier(&ce->active,
+							      ce->engine);
+		if (err) {
+			i915_active_release(&ce->active);
+			return err;
+		}
+	}
+
+	return 0;
+}
+
+static void intel_context_active_release(struct intel_context *ce)
+{
+	/* Nodes preallocated in intel_context_active() */
+	i915_active_acquire_barrier(&ce->active);
+	i915_active_release(&ce->active);
+}
+
 int __intel_context_do_pin(struct intel_context *ce)
 {
 	int err;
@@ -79,11 +107,14 @@ int __intel_context_do_pin(struct intel_context *ce)
 	if (likely(!atomic_read(&ce->pin_count))) {
 		intel_wakeref_t wakeref;
 
-		err = 0;
+		err = intel_context_active_acquire(ce);
+		if (unlikely(err))
+			goto err;
+
 		with_intel_runtime_pm(ce->engine->uncore->rpm, wakeref)
 			err = ce->ops->pin(ce);
-		if (err)
-			goto err;
+		if (unlikely(err))
+			goto err_active;
 
 		CE_TRACE(ce, "pin ring:{head:%04x, tail:%04x}\n",
 			 ce->ring->head, ce->ring->tail);
@@ -97,6 +128,8 @@ int __intel_context_do_pin(struct intel_context *ce)
 	mutex_unlock(&ce->pin_mutex);
 	return 0;
 
+err_active:
+	intel_context_active_release(ce);
 err:
 	mutex_unlock(&ce->pin_mutex);
 	return err;
@@ -198,34 +231,6 @@ static int __intel_context_active(struct i915_active *active)
 	return err;
 }
 
-int intel_context_active_acquire(struct intel_context *ce)
-{
-	int err;
-
-	err = i915_active_acquire(&ce->active);
-	if (err)
-		return err;
-
-	/* Preallocate tracking nodes */
-	if (!intel_context_is_barrier(ce)) {
-		err = i915_active_acquire_preallocate_barrier(&ce->active,
-							      ce->engine);
-		if (err) {
-			i915_active_release(&ce->active);
-			return err;
-		}
-	}
-
-	return 0;
-}
-
-void intel_context_active_release(struct intel_context *ce)
-{
-	/* Nodes preallocated in intel_context_active() */
-	i915_active_acquire_barrier(&ce->active);
-	i915_active_release(&ce->active);
-}
-
 void
 intel_context_init(struct intel_context *ce,
 		   struct intel_engine_cs *engine)
diff --git a/drivers/gpu/drm/i915/gt/intel_context.h b/drivers/gpu/drm/i915/gt/intel_context.h
index 673f5fb2967a..f0f49b50b968 100644
--- a/drivers/gpu/drm/i915/gt/intel_context.h
+++ b/drivers/gpu/drm/i915/gt/intel_context.h
@@ -118,9 +118,6 @@ static inline void intel_context_exit(struct intel_context *ce)
 		ce->ops->exit(ce);
 }
 
-int intel_context_active_acquire(struct intel_context *ce);
-void intel_context_active_release(struct intel_context *ce);
-
 static inline struct intel_context *intel_context_get(struct intel_context *ce)
 {
 	kref_get(&ce->ref);
diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
index bd74b76c6403..28b1fbcb4370 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -2566,33 +2566,21 @@ __execlists_context_pin(struct intel_context *ce,
 			struct intel_engine_cs *engine)
 {
 	void *vaddr;
-	int ret;
 
 	GEM_BUG_ON(!ce->state);
-
-	ret = intel_context_active_acquire(ce);
-	if (ret)
-		goto err;
 	GEM_BUG_ON(!i915_vma_is_pinned(ce->state));
 
 	vaddr = i915_gem_object_pin_map(ce->state->obj,
 					i915_coherent_map_type(engine->i915) |
 					I915_MAP_OVERRIDE);
-	if (IS_ERR(vaddr)) {
-		ret = PTR_ERR(vaddr);
-		goto unpin_active;
-	}
+	if (IS_ERR(vaddr))
+		return PTR_ERR(vaddr);
 
 	ce->lrc_desc = lrc_descriptor(ce, engine) | CTX_DESC_FORCE_RESTORE;
 	ce->lrc_reg_state = vaddr + LRC_STATE_PN * PAGE_SIZE;
 	__execlists_update_reg_state(ce, engine);
 
 	return 0;
-
-unpin_active:
-	intel_context_active_release(ce);
-err:
-	return ret;
 }
 
 static int execlists_context_pin(struct intel_context *ce)
diff --git a/drivers/gpu/drm/i915/gt/intel_ring_submission.c b/drivers/gpu/drm/i915/gt/intel_ring_submission.c
index eca5ab048cd9..bc44fe8e5ffa 100644
--- a/drivers/gpu/drm/i915/gt/intel_ring_submission.c
+++ b/drivers/gpu/drm/i915/gt/intel_ring_submission.c
@@ -1329,21 +1329,7 @@ static int ring_context_alloc(struct intel_context *ce)
 
 static int ring_context_pin(struct intel_context *ce)
 {
-	int err;
-
-	err = intel_context_active_acquire(ce);
-	if (err)
-		return err;
-
-	err = __context_pin_ppgtt(ce);
-	if (err)
-		goto err_active;
-
-	return 0;
-
-err_active:
-	intel_context_active_release(ce);
-	return err;
+	return __context_pin_ppgtt(ce);
 }
 
 static void ring_context_reset(struct intel_context *ce)
diff --git a/drivers/gpu/drm/i915/gt/mock_engine.c b/drivers/gpu/drm/i915/gt/mock_engine.c
index d0e68ce9aa51..a560b7eee2cd 100644
--- a/drivers/gpu/drm/i915/gt/mock_engine.c
+++ b/drivers/gpu/drm/i915/gt/mock_engine.c
@@ -149,7 +149,7 @@ static int mock_context_alloc(struct intel_context *ce)
 
 static int mock_context_pin(struct intel_context *ce)
 {
-	return intel_context_active_acquire(ce);
+	return 0;
 }
 
 static void mock_context_reset(struct intel_context *ce)
-- 
2.25.0.rc1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 30+ messages in thread

* [Intel-gfx] [PATCH 03/14] drm/i915/gt: runtime-pm is no longer required for ce->ops->pin()
  2020-01-09  8:58 [Intel-gfx] [PATCH 01/14] drm/i915/gt: Push context state allocation earlier Chris Wilson
  2020-01-09  8:58 ` [Intel-gfx] [PATCH 02/14] drm/i915/gt: Pull context activation into central intel_context_pin() Chris Wilson
@ 2020-01-09  8:58 ` Chris Wilson
  2020-01-10 14:37   ` Mika Kuoppala
  2020-01-09  8:58 ` [Intel-gfx] [PATCH 04/14] drm/i915: Pin the context as we work on it Chris Wilson
                   ` (11 subsequent siblings)
  13 siblings, 1 reply; 30+ messages in thread
From: Chris Wilson @ 2020-01-09  8:58 UTC (permalink / raw)
  To: intel-gfx

Now that we have moved the runtime-pm management out of
intel_context_acctive_acquire, and that itself out of ce->ops->pin(), no
explicit runtime pm wakeref is required in intel_context_pin().

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/gt/intel_context.c | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_context.c b/drivers/gpu/drm/i915/gt/intel_context.c
index cac80d87b2bb..9796a54b4f47 100644
--- a/drivers/gpu/drm/i915/gt/intel_context.c
+++ b/drivers/gpu/drm/i915/gt/intel_context.c
@@ -105,14 +105,11 @@ int __intel_context_do_pin(struct intel_context *ce)
 		return -EINTR;
 
 	if (likely(!atomic_read(&ce->pin_count))) {
-		intel_wakeref_t wakeref;
-
 		err = intel_context_active_acquire(ce);
 		if (unlikely(err))
 			goto err;
 
-		with_intel_runtime_pm(ce->engine->uncore->rpm, wakeref)
-			err = ce->ops->pin(ce);
+		err = ce->ops->pin(ce);
 		if (unlikely(err))
 			goto err_active;
 
-- 
2.25.0.rc1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 30+ messages in thread

* [Intel-gfx] [PATCH 04/14] drm/i915: Pin the context as we work on it
  2020-01-09  8:58 [Intel-gfx] [PATCH 01/14] drm/i915/gt: Push context state allocation earlier Chris Wilson
  2020-01-09  8:58 ` [Intel-gfx] [PATCH 02/14] drm/i915/gt: Pull context activation into central intel_context_pin() Chris Wilson
  2020-01-09  8:58 ` [Intel-gfx] [PATCH 03/14] drm/i915/gt: runtime-pm is no longer required for ce->ops->pin() Chris Wilson
@ 2020-01-09  8:58 ` Chris Wilson
  2020-01-09  8:58 ` [Intel-gfx] [PATCH 05/14] drm/i915: Replace vma parking with a clock aging algorithm Chris Wilson
                   ` (10 subsequent siblings)
  13 siblings, 0 replies; 30+ messages in thread
From: Chris Wilson @ 2020-01-09  8:58 UTC (permalink / raw)
  To: intel-gfx

Since we now allow the intel_context_unpin() to run unserialised, we
risk our operations under the intel_context_lock_pinned() being run as
the context is unpinned (and thus invalidating our state). We can
atomically acquire the pin, testing to see if it is pinned in the
process, thus ensuring that the state remains consistent during the
course of the whole operation.

Fixes: 841350223816 ("drm/i915/gt: Drop mutex serialisation between context pin/unpin")
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Reviewed-by: Mika Kuoppala <mika.kuoppala@linux.intel.com>
---
 drivers/gpu/drm/i915/gem/i915_gem_context.c | 10 +++++++---
 drivers/gpu/drm/i915/gt/intel_context.h     |  7 ++++++-
 drivers/gpu/drm/i915/i915_debugfs.c         | 10 ++++------
 drivers/gpu/drm/i915/i915_perf.c            | 15 +++++----------
 4 files changed, 22 insertions(+), 20 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c
index 88f6253f5405..a2e57e62af30 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c
@@ -1236,12 +1236,14 @@ gen8_modify_rpcs(struct intel_context *ce, struct intel_sseu sseu)
 	 * image, or into the registers directory, does not stick). Pristine
 	 * and idle contexts will be configured on pinning.
 	 */
-	if (!intel_context_is_pinned(ce))
+	if (!intel_context_pin_if_active(ce))
 		return 0;
 
 	rq = intel_engine_create_kernel_request(ce->engine);
-	if (IS_ERR(rq))
-		return PTR_ERR(rq);
+	if (IS_ERR(rq)) {
+		ret = PTR_ERR(rq);
+		goto out_unpin;
+	}
 
 	/* Serialise with the remote context */
 	ret = intel_context_prepare_remote_request(ce, rq);
@@ -1249,6 +1251,8 @@ gen8_modify_rpcs(struct intel_context *ce, struct intel_sseu sseu)
 		ret = gen8_emit_rpcs_config(rq, ce, sseu);
 
 	i915_request_add(rq);
+out_unpin:
+	intel_context_unpin(ce);
 	return ret;
 }
 
diff --git a/drivers/gpu/drm/i915/gt/intel_context.h b/drivers/gpu/drm/i915/gt/intel_context.h
index f0f49b50b968..30bd248827d8 100644
--- a/drivers/gpu/drm/i915/gt/intel_context.h
+++ b/drivers/gpu/drm/i915/gt/intel_context.h
@@ -78,9 +78,14 @@ static inline void intel_context_unlock_pinned(struct intel_context *ce)
 
 int __intel_context_do_pin(struct intel_context *ce);
 
+static inline bool intel_context_pin_if_active(struct intel_context *ce)
+{
+	return atomic_inc_not_zero(&ce->pin_count);
+}
+
 static inline int intel_context_pin(struct intel_context *ce)
 {
-	if (likely(atomic_inc_not_zero(&ce->pin_count)))
+	if (likely(intel_context_pin_if_active(ce)))
 		return 0;
 
 	return __intel_context_do_pin(ce);
diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index 0ac98e39eb75..db184536acef 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -321,16 +321,15 @@ static void print_context_stats(struct seq_file *m,
 
 		for_each_gem_engine(ce,
 				    i915_gem_context_lock_engines(ctx), it) {
-			intel_context_lock_pinned(ce);
-			if (intel_context_is_pinned(ce)) {
+			if (intel_context_pin_if_active(ce)) {
 				rcu_read_lock();
 				if (ce->state)
 					per_file_stats(0,
 						       ce->state->obj, &kstats);
 				per_file_stats(0, ce->ring->vma->obj, &kstats);
 				rcu_read_unlock();
+				intel_context_unpin(ce);
 			}
-			intel_context_unlock_pinned(ce);
 		}
 		i915_gem_context_unlock_engines(ctx);
 
@@ -1513,15 +1512,14 @@ static int i915_context_status(struct seq_file *m, void *unused)
 
 		for_each_gem_engine(ce,
 				    i915_gem_context_lock_engines(ctx), it) {
-			intel_context_lock_pinned(ce);
-			if (intel_context_is_pinned(ce)) {
+			if (intel_context_pin_if_active(ce)) {
 				seq_printf(m, "%s: ", ce->engine->name);
 				if (ce->state)
 					describe_obj(m, ce->state->obj);
 				describe_ctx_ring(m, ce->ring);
 				seq_putc(m, '\n');
+				intel_context_unpin(ce);
 			}
-			intel_context_unlock_pinned(ce);
 		}
 		i915_gem_context_unlock_engines(ctx);
 
diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
index 84350c7bc711..0f556d80ba36 100644
--- a/drivers/gpu/drm/i915/i915_perf.c
+++ b/drivers/gpu/drm/i915/i915_perf.c
@@ -2159,8 +2159,6 @@ static int gen8_modify_context(struct intel_context *ce,
 	struct i915_request *rq;
 	int err;
 
-	lockdep_assert_held(&ce->pin_mutex);
-
 	rq = intel_engine_create_kernel_request(ce->engine);
 	if (IS_ERR(rq))
 		return PTR_ERR(rq);
@@ -2203,17 +2201,14 @@ static int gen8_configure_context(struct i915_gem_context *ctx,
 		if (ce->engine->class != RENDER_CLASS)
 			continue;
 
-		err = intel_context_lock_pinned(ce);
-		if (err)
-			break;
+		/* Otherwise OA settings will be set upon first use */
+		if (!intel_context_pin_if_active(ce))
+			continue;
 
 		flex->value = intel_sseu_make_rpcs(ctx->i915, &ce->sseu);
+		err = gen8_modify_context(ce, flex, count);
 
-		/* Otherwise OA settings will be set upon first use */
-		if (intel_context_is_pinned(ce))
-			err = gen8_modify_context(ce, flex, count);
-
-		intel_context_unlock_pinned(ce);
+		intel_context_unpin(ce);
 		if (err)
 			break;
 	}
-- 
2.25.0.rc1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 30+ messages in thread

* [Intel-gfx] [PATCH 05/14] drm/i915: Replace vma parking with a clock aging algorithm
  2020-01-09  8:58 [Intel-gfx] [PATCH 01/14] drm/i915/gt: Push context state allocation earlier Chris Wilson
                   ` (2 preceding siblings ...)
  2020-01-09  8:58 ` [Intel-gfx] [PATCH 04/14] drm/i915: Pin the context as we work on it Chris Wilson
@ 2020-01-09  8:58 ` Chris Wilson
  2020-01-09  8:58 ` [Intel-gfx] [PATCH 06/14] drm/i915/gt: Always reset the timeslice after a context switch Chris Wilson
                   ` (9 subsequent siblings)
  13 siblings, 0 replies; 30+ messages in thread
From: Chris Wilson @ 2020-01-09  8:58 UTC (permalink / raw)
  To: intel-gfx

We cache the user's vma for a brief period of time after they close them
so that if they are immediately reopened we avoid having to unbind and
rebind them. This happens quite frequently for display servers which
only keep a client's frame open for as long as they are copying from it,
and so they open/close every vma about 30 Hz (every other frame for
double buffering).

Our current strategy is to keep the vma alive until the next global idle
point. However this cache should be purely temporal, so switch over from
using the parked notifier to using its own clock based aging algorithm:
if the closed vma is not reused within 2 clock ticks, it is destroyed.

Closes: https://gitlab.freedesktop.org/drm/intel/issues/644
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 drivers/gpu/drm/i915/gt/intel_gt.c            |  3 -
 drivers/gpu/drm/i915/gt/intel_gt_pm.c         |  1 -
 drivers/gpu/drm/i915/gt/intel_gt_types.h      |  3 -
 drivers/gpu/drm/i915/i915_debugfs.c           |  3 +
 drivers/gpu/drm/i915/i915_drv.c               |  4 +-
 drivers/gpu/drm/i915/i915_drv.h               |  1 +
 drivers/gpu/drm/i915/i915_vma.c               | 83 +++++++++++++++----
 drivers/gpu/drm/i915/i915_vma.h               | 11 ++-
 .../gpu/drm/i915/selftests/mock_gem_device.c  |  2 +
 9 files changed, 86 insertions(+), 25 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c
index da2b6e2ae692..63d70cf62ddb 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt.c
@@ -23,9 +23,6 @@ void intel_gt_init_early(struct intel_gt *gt, struct drm_i915_private *i915)
 
 	spin_lock_init(&gt->irq_lock);
 
-	INIT_LIST_HEAD(&gt->closed_vma);
-	spin_lock_init(&gt->closed_lock);
-
 	intel_gt_init_reset(gt);
 	intel_gt_init_requests(gt);
 	intel_gt_init_timelines(gt);
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm.c b/drivers/gpu/drm/i915/gt/intel_gt_pm.c
index d1c2f034296a..3302f676d12b 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_pm.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt_pm.c
@@ -80,7 +80,6 @@ static int __gt_park(struct intel_wakeref *wf)
 
 	intel_gt_park_requests(gt);
 
-	i915_vma_parked(gt);
 	i915_pmu_gt_parked(i915);
 	intel_rps_park(&gt->rps);
 	intel_rc6_park(&gt->rc6);
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_types.h b/drivers/gpu/drm/i915/gt/intel_gt_types.h
index 96890dd12b5f..4589dea67b8f 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt_types.h
@@ -58,9 +58,6 @@ struct intel_gt {
 	struct intel_wakeref wakeref;
 	atomic_t user_wakeref;
 
-	struct list_head closed_vma;
-	spinlock_t closed_lock; /* guards the list of closed_vma */
-
 	struct intel_reset reset;
 
 	/**
diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index db184536acef..9e18e10c125f 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -3587,6 +3587,9 @@ i915_drop_caches_set(void *data, u64 val)
 	if (ret)
 		return ret;
 
+	if (val & DROP_IDLE)
+		i915_vma_clock_flush(&i915->vma_clock);
+
 	fs_reclaim_acquire(GFP_KERNEL);
 	if (val & DROP_BOUND)
 		i915_gem_shrink(i915, LONG_MAX, NULL, I915_SHRINK_BOUND);
diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index f7385abdd74b..9fde3918094f 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -523,8 +523,8 @@ static int i915_driver_early_probe(struct drm_i915_private *dev_priv)
 
 	intel_wopcm_init_early(&dev_priv->wopcm);
 
+	i915_vma_clock_init_early(&dev_priv->vma_clock);
 	intel_gt_init_early(&dev_priv->gt, dev_priv);
-
 	i915_gem_init_early(dev_priv);
 
 	/* This must be called before any calls to HAS_PCH_* */
@@ -561,6 +561,8 @@ static int i915_driver_early_probe(struct drm_i915_private *dev_priv)
  */
 static void i915_driver_late_release(struct drm_i915_private *dev_priv)
 {
+	i915_vma_clock_flush(&dev_priv->vma_clock);
+
 	intel_irq_fini(dev_priv);
 	intel_power_domains_cleanup(dev_priv);
 	i915_gem_cleanup_early(dev_priv);
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 1025d783f494..49d314bb84c9 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1241,6 +1241,7 @@ struct drm_i915_private {
 	struct intel_runtime_pm runtime_pm;
 
 	struct i915_perf perf;
+	struct i915_vma_clock vma_clock;
 
 	/* Abstract the submission mechanism (legacy ringbuffer or execlists) away */
 	struct intel_gt gt;
diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
index 43d5c270bdb0..4744643edf2d 100644
--- a/drivers/gpu/drm/i915/i915_vma.c
+++ b/drivers/gpu/drm/i915/i915_vma.c
@@ -985,8 +985,7 @@ int i915_ggtt_pin(struct i915_vma *vma, u32 align, unsigned int flags)
 
 void i915_vma_close(struct i915_vma *vma)
 {
-	struct intel_gt *gt = vma->vm->gt;
-	unsigned long flags;
+	struct i915_vma_clock *clock = &vma->vm->i915->vma_clock;
 
 	GEM_BUG_ON(i915_vma_is_closed(vma));
 
@@ -1002,18 +1001,20 @@ void i915_vma_close(struct i915_vma *vma)
 	 * causing us to rebind the VMA once more. This ends up being a lot
 	 * of wasted work for the steady state.
 	 */
-	spin_lock_irqsave(&gt->closed_lock, flags);
-	list_add(&vma->closed_link, &gt->closed_vma);
-	spin_unlock_irqrestore(&gt->closed_lock, flags);
+	spin_lock(&clock->lock);
+	list_add(&vma->closed_link, &clock->age[0]);
+	spin_unlock(&clock->lock);
+
+	schedule_delayed_work(&clock->work, round_jiffies_up_relative(HZ));
 }
 
 static void __i915_vma_remove_closed(struct i915_vma *vma)
 {
-	struct intel_gt *gt = vma->vm->gt;
+	struct i915_vma_clock *clock = &vma->vm->i915->vma_clock;
 
-	spin_lock_irq(&gt->closed_lock);
+	spin_lock(&clock->lock);
 	list_del_init(&vma->closed_link);
-	spin_unlock_irq(&gt->closed_lock);
+	spin_unlock(&clock->lock);
 }
 
 void i915_vma_reopen(struct i915_vma *vma)
@@ -1051,15 +1052,35 @@ void i915_vma_release(struct kref *ref)
 	i915_vma_free(vma);
 }
 
-void i915_vma_parked(struct intel_gt *gt)
+static void i915_vma_clock(struct work_struct *w)
 {
+	struct i915_vma_clock *clock =
+		container_of(w, typeof(*clock), work.work);
 	struct i915_vma *vma, *next;
 
-	spin_lock_irq(&gt->closed_lock);
-	list_for_each_entry_safe(vma, next, &gt->closed_vma, closed_link) {
+	/*
+	 * A very simple clock aging algorithm: we keep the user's closed
+	 * vma alive for a couple of timer ticks before destroying them.
+	 * This serves a shortlived cache so that frequently reused VMA
+	 * are kept alive between frames and we skip having to rebing them.
+	 *
+	 * When closed, we insert the vma into age[0]. Upon completion of
+	 * a timer tick, it is moved to age[1]. At the start of each timer
+	 * tick, we destroy all the old vma that were accumulated into age[1]
+	 * and have not been reused. All destroyed vma have therefore been
+	 * unused for more than 1 tick (at least a second), and at most 2
+	 * ticks (we expect the average to be 1.5 ticks).
+	 */
+
+	spin_lock(&clock->lock);
+
+	list_for_each_entry_safe(vma, next, &clock->age[1], closed_link) {
 		struct drm_i915_gem_object *obj = vma->obj;
 		struct i915_address_space *vm = vma->vm;
 
+		if (i915_vma_is_active(vma))
+			continue;
+
 		/* XXX All to avoid keeping a reference on i915_vma itself */
 
 		if (!kref_get_unless_zero(&obj->base.refcount))
@@ -1072,7 +1093,7 @@ void i915_vma_parked(struct intel_gt *gt)
 			obj = NULL;
 		}
 
-		spin_unlock_irq(&gt->closed_lock);
+		spin_unlock(&clock->lock);
 
 		if (obj) {
 			__i915_vma_put(vma);
@@ -1082,11 +1103,26 @@ void i915_vma_parked(struct intel_gt *gt)
 		i915_vm_close(vm);
 
 		/* Restart after dropping lock */
-		spin_lock_irq(&gt->closed_lock);
-		next = list_first_entry(&gt->closed_vma,
+		spin_lock(&clock->lock);
+		next = list_first_entry(&clock->age[1],
 					typeof(*next), closed_link);
 	}
-	spin_unlock_irq(&gt->closed_lock);
+	list_splice_tail_init(&clock->age[0], &clock->age[1]);
+
+	if (!list_empty(&clock->age[1])) {
+		/* Keep active VMA around until second tick after idling */
+		list_for_each_entry_safe(vma, next,
+					 &clock->age[1], closed_link) {
+			if (i915_vma_is_active(vma))
+				list_move_tail(&vma->closed_link,
+					       &clock->age[0]);
+		}
+
+		schedule_delayed_work(&clock->work,
+				      round_jiffies_up_relative(HZ));
+	}
+
+	spin_unlock(&clock->lock);
 }
 
 static void __i915_vma_iounmap(struct i915_vma *vma)
@@ -1277,6 +1313,23 @@ void i915_vma_make_purgeable(struct i915_vma *vma)
 	i915_gem_object_make_purgeable(vma->obj);
 }
 
+void i915_vma_clock_init_early(struct i915_vma_clock *clock)
+{
+	spin_lock_init(&clock->lock);
+	INIT_LIST_HEAD(&clock->age[0]);
+	INIT_LIST_HEAD(&clock->age[1]);
+
+	INIT_DELAYED_WORK(&clock->work, i915_vma_clock);
+}
+
+void i915_vma_clock_flush(struct i915_vma_clock *clock)
+{
+	do {
+		if (cancel_delayed_work_sync(&clock->work))
+			i915_vma_clock(&clock->work.work);
+	} while (delayed_work_pending(&clock->work));
+}
+
 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
 #include "selftests/i915_vma.c"
 #endif
diff --git a/drivers/gpu/drm/i915/i915_vma.h b/drivers/gpu/drm/i915/i915_vma.h
index 02b31a62951e..7b8a18b72d81 100644
--- a/drivers/gpu/drm/i915/i915_vma.h
+++ b/drivers/gpu/drm/i915/i915_vma.h
@@ -351,8 +351,6 @@ i915_vma_unpin_fence(struct i915_vma *vma)
 		__i915_vma_unpin_fence(vma);
 }
 
-void i915_vma_parked(struct intel_gt *gt);
-
 #define for_each_until(cond) if (cond) break; else
 
 /**
@@ -381,4 +379,13 @@ static inline int i915_vma_sync(struct i915_vma *vma)
 	return i915_active_wait(&vma->active);
 }
 
+struct i915_vma_clock {
+	spinlock_t lock;
+	struct list_head age[2];
+	struct delayed_work work;
+};
+
+void i915_vma_clock_init_early(struct i915_vma_clock *clock);
+void i915_vma_clock_flush(struct i915_vma_clock *clock);
+
 #endif
diff --git a/drivers/gpu/drm/i915/selftests/mock_gem_device.c b/drivers/gpu/drm/i915/selftests/mock_gem_device.c
index 3b8986983afc..6c27f43155ea 100644
--- a/drivers/gpu/drm/i915/selftests/mock_gem_device.c
+++ b/drivers/gpu/drm/i915/selftests/mock_gem_device.c
@@ -57,6 +57,7 @@ static void mock_device_release(struct drm_device *dev)
 
 	mock_device_flush(i915);
 	intel_gt_driver_remove(&i915->gt);
+	i915_vma_clock_flush(&i915->vma_clock);
 
 	i915_gem_driver_release__contexts(i915);
 
@@ -164,6 +165,7 @@ struct drm_i915_private *mock_gem_device(void)
 	mock_uncore_init(&i915->uncore, i915);
 
 	i915_gem_init__mm(i915);
+	i915_vma_clock_init_early(&i915->vma_clock);
 	intel_gt_init_early(&i915->gt, i915);
 	atomic_inc(&i915->gt.wakeref.count); /* disable; no hw support */
 	i915->gt.awake = -ENODEV;
-- 
2.25.0.rc1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 30+ messages in thread

* [Intel-gfx] [PATCH 06/14] drm/i915/gt: Always reset the timeslice after a context switch
  2020-01-09  8:58 [Intel-gfx] [PATCH 01/14] drm/i915/gt: Push context state allocation earlier Chris Wilson
                   ` (3 preceding siblings ...)
  2020-01-09  8:58 ` [Intel-gfx] [PATCH 05/14] drm/i915: Replace vma parking with a clock aging algorithm Chris Wilson
@ 2020-01-09  8:58 ` Chris Wilson
  2020-01-09  8:58 ` [Intel-gfx] [PATCH 07/14] drm/i915/gt: Yield the timeslice if waiting on a semaphore Chris Wilson
                   ` (8 subsequent siblings)
  13 siblings, 0 replies; 30+ messages in thread
From: Chris Wilson @ 2020-01-09  8:58 UTC (permalink / raw)
  To: intel-gfx

Currently, we reset the timer after a pre-eemption event. This has the
side-effect that the timeslice runs into the second context after the
first is completed. To be more fair, we want to reset the clock after
promotion as well.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 drivers/gpu/drm/i915/gt/intel_lrc.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
index 28b1fbcb4370..3f1f55327506 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -2289,7 +2289,6 @@ static void process_csb(struct intel_engine_cs *engine)
 
 			/* Point active to the new ELSP; prevent overwriting */
 			WRITE_ONCE(execlists->active, execlists->pending);
-			set_timeslice(engine);
 
 			if (!inject_preempt_hang(execlists))
 				ring_set_paused(engine, 0);
@@ -2329,6 +2328,9 @@ static void process_csb(struct intel_engine_cs *engine)
 		}
 	} while (head != tail);
 
+	if (execlists_active(execlists))
+		set_timeslice(engine);
+
 	execlists->csb_head = head;
 
 	/*
-- 
2.25.0.rc1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 30+ messages in thread

* [Intel-gfx] [PATCH 07/14] drm/i915/gt: Yield the timeslice if waiting on a semaphore
  2020-01-09  8:58 [Intel-gfx] [PATCH 01/14] drm/i915/gt: Push context state allocation earlier Chris Wilson
                   ` (4 preceding siblings ...)
  2020-01-09  8:58 ` [Intel-gfx] [PATCH 06/14] drm/i915/gt: Always reset the timeslice after a context switch Chris Wilson
@ 2020-01-09  8:58 ` Chris Wilson
  2020-01-09  8:58 ` [Intel-gfx] [PATCH 08/14] drm/i915: Use common priotree lists for virtual engine Chris Wilson
                   ` (7 subsequent siblings)
  13 siblings, 0 replies; 30+ messages in thread
From: Chris Wilson @ 2020-01-09  8:58 UTC (permalink / raw)
  To: intel-gfx

If we find ourselves waiting on a MI_SEMAPHORE_WAIT, either within the
user batch or in our own preamble, the engine raises a
GT_WAIT_ON_SEMAPHORE interrupt. We can unmask that interrupt and so
respond to a semaphore wait by yielding the timeslice, if we have
another context to yield to!

The only real complication is that the interrupt is only generated for
the start of the semaphore wait, and is asynchronous to our
process_csb() -- that is, we may not have registered the timeslice before
we see the interrupt. To ensure we don't miss a potential semaphore
blocking forward progress (e.g. selftests/live_timeslice_preempt) we mark
the interrupt and apply it to the next timeslice regardless of whether it
was active at the time.

v2: We use semaphores in preempt-to-busy, within the timeslicing
implementation itself! Ergo, when we do insert a preemption due to an
expired timeslice, the new context may start with the missed semaphore
flagged by the retired context and be yielded, ad infinitum. To avoid
this, read the context id at the time of the semaphore interrupt and
only yield if that context is still active.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 drivers/gpu/drm/i915/gt/intel_engine_types.h |  9 ++++++
 drivers/gpu/drm/i915/gt/intel_gt_irq.c       | 32 +++++++++++---------
 drivers/gpu/drm/i915/gt/intel_lrc.c          | 31 ++++++++++++++++---
 drivers/gpu/drm/i915/i915_reg.h              |  5 +++
 4 files changed, 59 insertions(+), 18 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h
index 00287515e7af..d146d2fbd42a 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h
@@ -156,6 +156,15 @@ struct intel_engine_execlists {
 	 */
 	struct i915_priolist default_priolist;
 
+	/**
+	 * @yield: CCID at the time of the last semaphore-wait interrupt.
+	 *
+	 * Instead of leaving a semaphore busy-spinning on an engine, we would
+	 * like to switch to another ready context, i.e. yielding the semaphore
+	 * timeslice.
+	 */
+	u32 yield;
+
 	/**
 	 * @no_priolist: priority lists disabled
 	 */
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_irq.c b/drivers/gpu/drm/i915/gt/intel_gt_irq.c
index f796bdf1ed30..6ae64a224b02 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_irq.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt_irq.c
@@ -24,6 +24,13 @@ cs_irq_handler(struct intel_engine_cs *engine, u32 iir)
 {
 	bool tasklet = false;
 
+	if (iir & GT_WAIT_SEMAPHORE_INTERRUPT) {
+		WRITE_ONCE(engine->execlists.yield,
+			   ENGINE_READ_FW(engine, EXECLIST_CCID));
+		if (del_timer(&engine->execlists.timer))
+			tasklet = true;
+	}
+
 	if (iir & GT_CONTEXT_SWITCH_INTERRUPT)
 		tasklet = true;
 
@@ -210,7 +217,10 @@ void gen11_gt_irq_reset(struct intel_gt *gt)
 
 void gen11_gt_irq_postinstall(struct intel_gt *gt)
 {
-	const u32 irqs = GT_RENDER_USER_INTERRUPT | GT_CONTEXT_SWITCH_INTERRUPT;
+	const u32 irqs =
+		GT_RENDER_USER_INTERRUPT |
+		GT_CONTEXT_SWITCH_INTERRUPT |
+		GT_WAIT_SEMAPHORE_INTERRUPT;
 	struct intel_uncore *uncore = gt->uncore;
 	const u32 dmask = irqs << 16 | irqs;
 	const u32 smask = irqs << 16;
@@ -357,21 +367,15 @@ void gen8_gt_irq_postinstall(struct intel_gt *gt)
 	struct intel_uncore *uncore = gt->uncore;
 
 	/* These are interrupts we'll toggle with the ring mask register */
+	const u32 irqs =
+		GT_RENDER_USER_INTERRUPT |
+		GT_CONTEXT_SWITCH_INTERRUPT |
+		GT_WAIT_SEMAPHORE_INTERRUPT;
 	u32 gt_interrupts[] = {
-		(GT_RENDER_USER_INTERRUPT << GEN8_RCS_IRQ_SHIFT |
-		 GT_CONTEXT_SWITCH_INTERRUPT << GEN8_RCS_IRQ_SHIFT |
-		 GT_RENDER_USER_INTERRUPT << GEN8_BCS_IRQ_SHIFT |
-		 GT_CONTEXT_SWITCH_INTERRUPT << GEN8_BCS_IRQ_SHIFT),
-
-		(GT_RENDER_USER_INTERRUPT << GEN8_VCS0_IRQ_SHIFT |
-		 GT_CONTEXT_SWITCH_INTERRUPT << GEN8_VCS0_IRQ_SHIFT |
-		 GT_RENDER_USER_INTERRUPT << GEN8_VCS1_IRQ_SHIFT |
-		 GT_CONTEXT_SWITCH_INTERRUPT << GEN8_VCS1_IRQ_SHIFT),
-
+		irqs << GEN8_RCS_IRQ_SHIFT | irqs << GEN8_BCS_IRQ_SHIFT,
+		irqs << GEN8_VCS0_IRQ_SHIFT | irqs << GEN8_VCS1_IRQ_SHIFT,
 		0,
-
-		(GT_RENDER_USER_INTERRUPT << GEN8_VECS_IRQ_SHIFT |
-		 GT_CONTEXT_SWITCH_INTERRUPT << GEN8_VECS_IRQ_SHIFT)
+		irqs << GEN8_VECS_IRQ_SHIFT,
 	};
 
 	gt->pm_ier = 0x0;
diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
index 3f1f55327506..08d5087e7582 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -1662,7 +1662,8 @@ static void defer_active(struct intel_engine_cs *engine)
 }
 
 static bool
-need_timeslice(struct intel_engine_cs *engine, const struct i915_request *rq)
+need_timeslice(const struct intel_engine_cs *engine,
+	       const struct i915_request *rq)
 {
 	int hint;
 
@@ -1678,6 +1679,27 @@ need_timeslice(struct intel_engine_cs *engine, const struct i915_request *rq)
 	return hint >= effective_prio(rq);
 }
 
+static bool
+timeslice_expired(const struct intel_engine_cs *engine,
+		  const struct i915_request *rq)
+{
+	const struct intel_engine_execlists *el = &engine->execlists;
+
+	return (timer_expired(&el->timer) ||
+		/*
+		 * Once bitten, forever smitten!
+		 *
+		 * If the active context ever busy-waited on a semaphore,
+		 * it will be treated as a hog until the end of its timeslice.
+		 * The HW only sends an interrupt on the first miss, and we
+		 * do know if that semaphore has been signaled, or even if it
+		 * is now stuck on another semaphore. Play safe, yield if it
+		 * might be stuck -- it will be given a fresh timeslice in
+		 * the near future.
+		 */
+		upper_32_bits(rq->context->lrc_desc) == READ_ONCE(el->yield));
+}
+
 static int
 switch_prio(struct intel_engine_cs *engine, const struct i915_request *rq)
 {
@@ -1693,8 +1715,7 @@ timeslice(const struct intel_engine_cs *engine)
 	return READ_ONCE(engine->props.timeslice_duration_ms);
 }
 
-static unsigned long
-active_timeslice(const struct intel_engine_cs *engine)
+static unsigned long active_timeslice(const struct intel_engine_cs *engine)
 {
 	const struct i915_request *rq = *engine->execlists.active;
 
@@ -1845,7 +1866,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
 			last->context->lrc_desc |= CTX_DESC_FORCE_RESTORE;
 			last = NULL;
 		} else if (need_timeslice(engine, last) &&
-			   timer_expired(&engine->execlists.timer)) {
+			   timeslice_expired(engine, last)) {
 			ENGINE_TRACE(engine,
 				     "expired last=%llx:%lld, prio=%d, hint=%d\n",
 				     last->fence.context,
@@ -2111,6 +2132,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
 		}
 		clear_ports(port + 1, last_port - port);
 
+		WRITE_ONCE(execlists->yield, -1);
 		execlists_submit_ports(engine);
 		set_preempt_timeout(engine);
 	} else {
@@ -3960,6 +3982,7 @@ logical_ring_default_irqs(struct intel_engine_cs *engine)
 
 	engine->irq_enable_mask = GT_RENDER_USER_INTERRUPT << shift;
 	engine->irq_keep_mask = GT_CONTEXT_SWITCH_INTERRUPT << shift;
+	engine->irq_keep_mask |= GT_WAIT_SEMAPHORE_INTERRUPT << shift;
 }
 
 static void rcs_submission_override(struct intel_engine_cs *engine)
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index 6cc55c103f67..a4976512835f 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -3085,6 +3085,7 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg)
 #define GT_BSD_CS_ERROR_INTERRUPT		(1 << 15)
 #define GT_BSD_USER_INTERRUPT			(1 << 12)
 #define GT_RENDER_L3_PARITY_ERROR_INTERRUPT_S1	(1 << 11) /* hsw+; rsvd on snb, ivb, vlv */
+#define GT_WAIT_SEMAPHORE_INTERRUPT		REG_BIT(11) /* bdw+ */
 #define GT_CONTEXT_SWITCH_INTERRUPT		(1 <<  8)
 #define GT_RENDER_L3_PARITY_ERROR_INTERRUPT	(1 <<  5) /* !snb */
 #define GT_RENDER_PIPECTL_NOTIFY_INTERRUPT	(1 <<  4)
@@ -4036,6 +4037,10 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg)
 #define   CCID_EN			BIT(0)
 #define   CCID_EXTENDED_STATE_RESTORE	BIT(2)
 #define   CCID_EXTENDED_STATE_SAVE	BIT(3)
+
+#define EXECLIST_STATUS(base)	_MMIO((base) + 0x234)
+#define EXECLIST_CCID(base)	_MMIO((base) + 0x238)
+
 /*
  * Notes on SNB/IVB/VLV context size:
  * - Power context is saved elsewhere (LLC or stolen)
-- 
2.25.0.rc1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 30+ messages in thread

* [Intel-gfx] [PATCH 08/14] drm/i915: Use common priotree lists for virtual engine
  2020-01-09  8:58 [Intel-gfx] [PATCH 01/14] drm/i915/gt: Push context state allocation earlier Chris Wilson
                   ` (5 preceding siblings ...)
  2020-01-09  8:58 ` [Intel-gfx] [PATCH 07/14] drm/i915/gt: Yield the timeslice if waiting on a semaphore Chris Wilson
@ 2020-01-09  8:58 ` Chris Wilson
  2020-01-09  8:58 ` [Intel-gfx] [PATCH 09/14] drm/i915/gt: Allow temporary suspension of inflight requests Chris Wilson
                   ` (6 subsequent siblings)
  13 siblings, 0 replies; 30+ messages in thread
From: Chris Wilson @ 2020-01-09  8:58 UTC (permalink / raw)
  To: intel-gfx

Since commit 422d7df4f090 ("drm/i915: Replace engine->timeline with a
plain list"), we used the default embedded priotree slot for the virtual
engine request queue, which means we can also use the same solitary slot
with the scheduler. However, the priolist is expected to be guarded by
the engine->active.lock, but this is not true for the virtual engine

References: 422d7df4f090 ("drm/i915: Replace engine->timeline with a plain list")
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 drivers/gpu/drm/i915/gt/intel_lrc.c   |  3 +++
 drivers/gpu/drm/i915/i915_request.c   |  4 +++-
 drivers/gpu/drm/i915/i915_request.h   | 16 ++++++++++++++++
 drivers/gpu/drm/i915/i915_scheduler.c |  3 +--
 4 files changed, 23 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
index 08d5087e7582..45f90cfbe2d6 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -985,6 +985,8 @@ __unwind_incomplete_requests(struct intel_engine_cs *engine)
 			GEM_BUG_ON(RB_EMPTY_ROOT(&engine->execlists.queue.rb_root));
 
 			list_move(&rq->sched.link, pl);
+			set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags);
+
 			active = rq;
 		} else {
 			struct intel_engine_cs *owner = rq->context->engine;
@@ -2499,6 +2501,7 @@ static void execlists_submit_request(struct i915_request *request)
 	spin_lock_irqsave(&engine->active.lock, flags);
 
 	queue_request(engine, &request->sched, rq_prio(request));
+	set_bit(I915_FENCE_FLAG_PQUEUE, &request->fence.flags);
 
 	GEM_BUG_ON(RB_EMPTY_ROOT(&engine->execlists.queue.rb_root));
 	GEM_BUG_ON(list_empty(&request->sched.link));
diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
index be185886e4fc..9ed0d3bc7249 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -408,8 +408,10 @@ bool __i915_request_submit(struct i915_request *request)
 xfer:	/* We may be recursing from the signal callback of another i915 fence */
 	spin_lock_nested(&request->lock, SINGLE_DEPTH_NESTING);
 
-	if (!test_and_set_bit(I915_FENCE_FLAG_ACTIVE, &request->fence.flags))
+	if (!test_and_set_bit(I915_FENCE_FLAG_ACTIVE, &request->fence.flags)) {
 		list_move_tail(&request->sched.link, &engine->active.requests);
+		clear_bit(I915_FENCE_FLAG_PQUEUE, &request->fence.flags);
+	}
 
 	if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &request->fence.flags) &&
 	    !test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &request->fence.flags) &&
diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h
index 031433691a06..f3e50ec989b8 100644
--- a/drivers/gpu/drm/i915/i915_request.h
+++ b/drivers/gpu/drm/i915/i915_request.h
@@ -70,6 +70,17 @@ enum {
 	 */
 	I915_FENCE_FLAG_ACTIVE = DMA_FENCE_FLAG_USER_BITS,
 
+	/*
+	 * I915_FENCE_FLAG_PQUEUE - this request is ready for execution
+	 *
+	 * Using the scheduler, when a request is ready for execution it is put
+	 * into the priority queue. We want to track its membership within that
+	 * queue so that we can easily check before rescheduling.
+	 *
+	 * See i915_request_in_priority_queue()
+	 */
+	I915_FENCE_FLAG_PQUEUE,
+
 	/*
 	 * I915_FENCE_FLAG_SIGNAL - this request is currently on signal_list
 	 *
@@ -361,6 +372,11 @@ static inline bool i915_request_is_active(const struct i915_request *rq)
 	return test_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags);
 }
 
+static inline bool i915_request_in_priority_queue(const struct i915_request *rq)
+{
+	return test_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags);
+}
+
 /**
  * Returns true if seq1 is later than seq2.
  */
diff --git a/drivers/gpu/drm/i915/i915_scheduler.c b/drivers/gpu/drm/i915/i915_scheduler.c
index bf87c70bfdd9..4f6e4d6c590a 100644
--- a/drivers/gpu/drm/i915/i915_scheduler.c
+++ b/drivers/gpu/drm/i915/i915_scheduler.c
@@ -338,8 +338,7 @@ static void __i915_schedule(struct i915_sched_node *node,
 			continue;
 		}
 
-		if (!intel_engine_is_virtual(engine) &&
-		    !i915_request_is_active(node_to_request(node))) {
+		if (i915_request_in_priority_queue(node_to_request(node))) {
 			if (!cache.priolist)
 				cache.priolist =
 					i915_sched_lookup_priolist(engine,
-- 
2.25.0.rc1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 30+ messages in thread

* [Intel-gfx] [PATCH 09/14] drm/i915/gt: Allow temporary suspension of inflight requests
  2020-01-09  8:58 [Intel-gfx] [PATCH 01/14] drm/i915/gt: Push context state allocation earlier Chris Wilson
                   ` (6 preceding siblings ...)
  2020-01-09  8:58 ` [Intel-gfx] [PATCH 08/14] drm/i915: Use common priotree lists for virtual engine Chris Wilson
@ 2020-01-09  8:58 ` Chris Wilson
  2020-01-09  8:58 ` [Intel-gfx] [PATCH 10/14] drm/i915: Start chopping up the GPU error capture Chris Wilson
                   ` (5 subsequent siblings)
  13 siblings, 0 replies; 30+ messages in thread
From: Chris Wilson @ 2020-01-09  8:58 UTC (permalink / raw)
  To: intel-gfx

In order to support out-of-line error capture, we need to remove the
active request from HW and put it to one side while a worker compresses
and stores all the details associated with that request. (As that
compression may take an arbitrary user-controlled amount of time, we
want to let the engine continue running on other workloads while the
hanging request is dumped.) Not only do we need to remove the active
request, but we also have to remove its context and all requests that
were dependent on it (both in flight, queued and future submission).

Finally once the capture is complete, we need to be able to resubmit the
request and its dependents and allow them to execute.

References: https://gitlab.freedesktop.org/drm/intel/issues/738
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 drivers/gpu/drm/i915/gt/intel_engine_cs.c    |   1 +
 drivers/gpu/drm/i915/gt/intel_engine_types.h |   1 +
 drivers/gpu/drm/i915/gt/intel_lrc.c          | 109 ++++++++++++++++++-
 drivers/gpu/drm/i915/gt/selftest_lrc.c       | 103 ++++++++++++++++++
 drivers/gpu/drm/i915/i915_request.h          |  22 ++++
 5 files changed, 231 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
index 825c94e7ca0b..f06fc6627529 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
@@ -671,6 +671,7 @@ void
 intel_engine_init_active(struct intel_engine_cs *engine, unsigned int subclass)
 {
 	INIT_LIST_HEAD(&engine->active.requests);
+	INIT_LIST_HEAD(&engine->active.hold);
 
 	spin_lock_init(&engine->active.lock);
 	lockdep_set_subclass(&engine->active.lock, subclass);
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h
index d146d2fbd42a..5181549246de 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h
@@ -304,6 +304,7 @@ struct intel_engine_cs {
 	struct {
 		spinlock_t lock;
 		struct list_head requests;
+		struct list_head hold;
 	} active;
 
 	struct llist_head barrier_tasks;
diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
index 45f90cfbe2d6..f2791f98ea68 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -2381,6 +2381,77 @@ static void __execlists_submission_tasklet(struct intel_engine_cs *const engine)
 	}
 }
 
+static void __execlists_hold(struct i915_request *rq)
+{
+	struct i915_dependency *p;
+
+	if (!i915_request_completed(rq)) {
+		RQ_TRACE(rq, "on hold\n");
+		if (i915_request_is_active(rq))
+			__i915_request_unsubmit(rq);
+		clear_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags);
+		list_move_tail(&rq->sched.link, &rq->engine->active.hold);
+		i915_request_set_hold(rq);
+	}
+
+	list_for_each_entry(p, &rq->sched.waiters_list, wait_link) {
+		struct i915_request *w =
+			container_of(p->waiter, typeof(*w), sched);
+
+		/* Leave semaphores spinning on the other engines */
+		if (w->engine != rq->engine)
+			continue;
+
+		__execlists_hold(w);
+	}
+}
+
+__maybe_unused
+static void execlists_hold(struct intel_engine_cs *engine,
+			   struct i915_request *rq)
+{
+	spin_lock_irq(&engine->active.lock);
+	__execlists_hold(rq);
+	spin_unlock_irq(&engine->active.lock);
+}
+
+static void __execlists_unhold(struct i915_request *rq)
+{
+	struct i915_dependency *p;
+
+	if (!i915_request_has_hold(rq))
+		return;
+
+	i915_request_clear_hold(rq);
+	list_move_tail(&rq->sched.link,
+		       i915_sched_lookup_priolist(rq->engine, rq_prio(rq)));
+	set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags);
+	RQ_TRACE(rq, "hold release\n");
+
+	list_for_each_entry(p, &rq->sched.waiters_list, wait_link) {
+		struct i915_request *w =
+			container_of(p->waiter, typeof(*w), sched);
+
+		if (w->engine != rq->engine)
+			continue;
+
+		__execlists_unhold(w);
+	}
+}
+
+__maybe_unused
+static void execlists_unhold(struct intel_engine_cs *engine,
+			     struct i915_request *rq)
+{
+	spin_lock_irq(&engine->active.lock);
+	__execlists_unhold(rq);
+	if (rq_prio(rq) > engine->execlists.queue_priority_hint) {
+		engine->execlists.queue_priority_hint = rq_prio(rq);
+		tasklet_hi_schedule(&engine->execlists.tasklet);
+	}
+	spin_unlock_irq(&engine->active.lock);
+}
+
 static noinline void preempt_reset(struct intel_engine_cs *engine)
 {
 	const unsigned int bit = I915_RESET_ENGINE + engine->id;
@@ -2492,6 +2563,29 @@ static void submit_queue(struct intel_engine_cs *engine,
 	__submit_queue_imm(engine);
 }
 
+static bool hold_request(const struct i915_request *rq)
+{
+	struct i915_dependency *p;
+
+	list_for_each_entry(p, &rq->sched.signalers_list, signal_link) {
+		const struct i915_request *s =
+			container_of(p->signaler, typeof(*s), sched);
+
+		if (s->engine != rq->engine)
+			continue;
+
+		return i915_request_has_hold(s);
+	}
+
+	return false;
+}
+
+static bool on_hold(const struct intel_engine_cs *engine,
+		    const struct i915_request *rq)
+{
+	return !list_empty(&engine->active.hold) && hold_request(rq);
+}
+
 static void execlists_submit_request(struct i915_request *request)
 {
 	struct intel_engine_cs *engine = request->engine;
@@ -2500,13 +2594,18 @@ static void execlists_submit_request(struct i915_request *request)
 	/* Will be called from irq-context when using foreign fences. */
 	spin_lock_irqsave(&engine->active.lock, flags);
 
-	queue_request(engine, &request->sched, rq_prio(request));
-	set_bit(I915_FENCE_FLAG_PQUEUE, &request->fence.flags);
+	if (unlikely(on_hold(engine, request))) {
+		i915_request_set_hold(request);
+		list_add_tail(&request->sched.link, &engine->active.hold);
+	} else {
+		queue_request(engine, &request->sched, rq_prio(request));
+		set_bit(I915_FENCE_FLAG_PQUEUE, &request->fence.flags);
 
-	GEM_BUG_ON(RB_EMPTY_ROOT(&engine->execlists.queue.rb_root));
-	GEM_BUG_ON(list_empty(&request->sched.link));
+		GEM_BUG_ON(RB_EMPTY_ROOT(&engine->execlists.queue.rb_root));
+		GEM_BUG_ON(list_empty(&request->sched.link));
 
-	submit_queue(engine, request);
+		submit_queue(engine, request);
+	}
 
 	spin_unlock_irqrestore(&engine->active.lock, flags);
 }
diff --git a/drivers/gpu/drm/i915/gt/selftest_lrc.c b/drivers/gpu/drm/i915/gt/selftest_lrc.c
index 15cda024e3e4..78501d79c0ea 100644
--- a/drivers/gpu/drm/i915/gt/selftest_lrc.c
+++ b/drivers/gpu/drm/i915/gt/selftest_lrc.c
@@ -285,6 +285,108 @@ static int live_unlite_preempt(void *arg)
 	return live_unlite_restore(arg, I915_USER_PRIORITY(I915_PRIORITY_MAX));
 }
 
+static int live_hold_reset(void *arg)
+{
+	struct intel_gt *gt = arg;
+	struct intel_engine_cs *engine;
+	enum intel_engine_id id;
+	struct igt_spinner spin;
+	int err = 0;
+
+	/*
+	 * In order to support offline error capture for fast preempt reset,
+	 * we need to decouple the guilty request and ensure that it and its
+	 * descendents are not executed while the capture is in progress.
+	 */
+
+	if (!intel_has_reset_engine(gt))
+		return 0;
+
+	if (igt_spinner_init(&spin, gt))
+		return -ENOMEM;
+
+	for_each_engine(engine, gt, id) {
+		struct intel_context *ce;
+		unsigned long heartbeat;
+		struct i915_request *rq;
+
+		ce = intel_context_create(engine);
+		if (IS_ERR(ce)) {
+			err = PTR_ERR(ce);
+			break;
+		}
+
+		engine_heartbeat_disable(engine, &heartbeat);
+
+		rq = igt_spinner_create_request(&spin, ce, MI_ARB_CHECK);
+		if (IS_ERR(rq)) {
+			err = PTR_ERR(rq);
+			goto out;
+		}
+		i915_request_add(rq);
+
+		if (!igt_wait_for_spinner(&spin, rq)) {
+			intel_gt_set_wedged(gt);
+			err = -ETIME;
+			goto out;
+		}
+
+		/* We have our request executing, now remove it and reset */
+
+		if (test_and_set_bit(I915_RESET_ENGINE + id,
+				     &gt->reset.flags)) {
+			spin_unlock_irq(&engine->active.lock);
+			intel_gt_set_wedged(gt);
+			err = -EBUSY;
+			goto out;
+		}
+		tasklet_disable(&engine->execlists.tasklet);
+
+		engine->execlists.tasklet.func(engine->execlists.tasklet.data);
+		GEM_BUG_ON(execlists_active(&engine->execlists) != rq);
+
+		execlists_hold(engine, rq);
+		GEM_BUG_ON(!i915_request_has_hold(rq));
+
+		intel_engine_reset(engine, NULL);
+		GEM_BUG_ON(rq->fence.error != -EIO);
+
+		tasklet_enable(&engine->execlists.tasklet);
+		clear_and_wake_up_bit(I915_RESET_ENGINE + id,
+				      &gt->reset.flags);
+
+		/* Check that we do not resubmit the held request */
+		i915_request_get(rq);
+		if (!i915_request_wait(rq, 0, HZ / 5)) {
+			pr_err("%s: on hold request completed!\n",
+			       engine->name);
+			i915_request_put(rq);
+			err = -EIO;
+			goto out;
+		}
+		GEM_BUG_ON(!i915_request_has_hold(rq));
+
+		/* But is resubmitted on release */
+		execlists_unhold(engine, rq);
+		if (i915_request_wait(rq, 0, HZ / 5) < 0) {
+			pr_err("%s: held request did not complete!\n",
+			       engine->name);
+			intel_gt_set_wedged(gt);
+			err = -ETIME;
+		}
+		i915_request_put(rq);
+
+out:
+		engine_heartbeat_enable(engine, heartbeat);
+		intel_context_put(ce);
+		if (err)
+			break;
+	}
+
+	igt_spinner_fini(&spin);
+	return err;
+}
+
 static int
 emit_semaphore_chain(struct i915_request *rq, struct i915_vma *vma, int idx)
 {
@@ -3315,6 +3417,7 @@ int intel_execlists_live_selftests(struct drm_i915_private *i915)
 		SUBTEST(live_sanitycheck),
 		SUBTEST(live_unlite_switch),
 		SUBTEST(live_unlite_preempt),
+		SUBTEST(live_hold_reset),
 		SUBTEST(live_timeslice_preempt),
 		SUBTEST(live_timeslice_queue),
 		SUBTEST(live_busywait_preempt),
diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h
index f3e50ec989b8..2da8cfb3f3a1 100644
--- a/drivers/gpu/drm/i915/i915_request.h
+++ b/drivers/gpu/drm/i915/i915_request.h
@@ -89,6 +89,13 @@ enum {
 	 */
 	I915_FENCE_FLAG_SIGNAL,
 
+	/*
+	 * I915_FENCE_FLAG_HOLD - this request is currently on hold
+	 *
+	 * This request has been suspended, pending an ongoing investigation.
+	 */
+	I915_FENCE_FLAG_HOLD,
+
 	/*
 	 * I915_FENCE_FLAG_NOPREEMPT - this request should not be preempted
 	 *
@@ -499,6 +506,21 @@ static inline bool i915_request_has_sentinel(const struct i915_request *rq)
 	return unlikely(test_bit(I915_FENCE_FLAG_SENTINEL, &rq->fence.flags));
 }
 
+static inline bool i915_request_has_hold(const struct i915_request *rq)
+{
+	return unlikely(test_bit(I915_FENCE_FLAG_HOLD, &rq->fence.flags));
+}
+
+static inline void i915_request_set_hold(struct i915_request *rq)
+{
+	set_bit(I915_FENCE_FLAG_HOLD, &rq->fence.flags);
+}
+
+static inline void i915_request_clear_hold(struct i915_request *rq)
+{
+	clear_bit(I915_FENCE_FLAG_HOLD, &rq->fence.flags);
+}
+
 static inline struct intel_timeline *
 i915_request_timeline(struct i915_request *rq)
 {
-- 
2.25.0.rc1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 30+ messages in thread

* [Intel-gfx] [PATCH 10/14] drm/i915: Start chopping up the GPU error capture
  2020-01-09  8:58 [Intel-gfx] [PATCH 01/14] drm/i915/gt: Push context state allocation earlier Chris Wilson
                   ` (7 preceding siblings ...)
  2020-01-09  8:58 ` [Intel-gfx] [PATCH 09/14] drm/i915/gt: Allow temporary suspension of inflight requests Chris Wilson
@ 2020-01-09  8:58 ` Chris Wilson
  2020-01-09 15:31   ` Andi Shyti
  2020-01-11 10:24   ` kbuild test robot
  2020-01-09  8:58 ` [Intel-gfx] [PATCH 11/14] drm/i915: Drop the shadow ring state from the " Chris Wilson
                   ` (4 subsequent siblings)
  13 siblings, 2 replies; 30+ messages in thread
From: Chris Wilson @ 2020-01-09  8:58 UTC (permalink / raw)
  To: intel-gfx

In the near future, we will want to start a GPU error capture from a new
context, from inside the softirq region of a forced preemption. To do
so requires us to break up the monolithic error capture to provide new
entry points with finer control; in particular focusing on one
engine/gt, and being able to compose an error state from little pieces
of HW capture.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Andi Shyti <andi.shyti@intel.com>
---
 drivers/gpu/drm/i915/gt/intel_engine.h       |    2 +-
 drivers/gpu/drm/i915/gt/intel_engine_cs.c    |    6 +-
 drivers/gpu/drm/i915/gt/intel_ggtt.c         |    3 +
 drivers/gpu/drm/i915/gt/intel_gtt.h          |    1 +
 drivers/gpu/drm/i915/gt/intel_reset.c        |    2 +-
 drivers/gpu/drm/i915/gt/selftest_hangcheck.c |    2 +-
 drivers/gpu/drm/i915/i915_debugfs.c          |   14 +-
 drivers/gpu/drm/i915/i915_drv.h              |    2 +-
 drivers/gpu/drm/i915/i915_gpu_error.c        | 1169 ++++++++++--------
 drivers/gpu/drm/i915/i915_gpu_error.h        |  328 +++--
 drivers/gpu/drm/i915/i915_sysfs.c            |    6 +-
 11 files changed, 864 insertions(+), 671 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_engine.h b/drivers/gpu/drm/i915/gt/intel_engine.h
index 71f1bcdfc92f..5df003061e44 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine.h
@@ -202,7 +202,7 @@ void intel_engine_set_hwsp_writemask(struct intel_engine_cs *engine, u32 mask);
 u64 intel_engine_get_active_head(const struct intel_engine_cs *engine);
 u64 intel_engine_get_last_batch_head(const struct intel_engine_cs *engine);
 
-void intel_engine_get_instdone(struct intel_engine_cs *engine,
+void intel_engine_get_instdone(const struct intel_engine_cs *engine,
 			       struct intel_instdone *instdone);
 
 void intel_engine_init_execlists(struct intel_engine_cs *engine);
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
index f06fc6627529..c296aaf381e7 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
@@ -915,8 +915,8 @@ const char *i915_cache_level_str(struct drm_i915_private *i915, int type)
 }
 
 static u32
-read_subslice_reg(struct intel_engine_cs *engine, int slice, int subslice,
-		  i915_reg_t reg)
+read_subslice_reg(const struct intel_engine_cs *engine,
+		  int slice, int subslice, i915_reg_t reg)
 {
 	struct drm_i915_private *i915 = engine->i915;
 	struct intel_uncore *uncore = engine->uncore;
@@ -960,7 +960,7 @@ read_subslice_reg(struct intel_engine_cs *engine, int slice, int subslice,
 }
 
 /* NB: please notice the memset */
-void intel_engine_get_instdone(struct intel_engine_cs *engine,
+void intel_engine_get_instdone(const struct intel_engine_cs *engine,
 			       struct intel_instdone *instdone)
 {
 	struct drm_i915_private *i915 = engine->i915;
diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt.c b/drivers/gpu/drm/i915/gt/intel_ggtt.c
index 99189cdba8a9..b8434cff9639 100644
--- a/drivers/gpu/drm/i915/gt/intel_ggtt.c
+++ b/drivers/gpu/drm/i915/gt/intel_ggtt.c
@@ -494,6 +494,7 @@ static void cleanup_init_ggtt(struct i915_ggtt *ggtt)
 	ggtt_release_guc_top(ggtt);
 	if (drm_mm_node_allocated(&ggtt->error_capture))
 		drm_mm_remove_node(&ggtt->error_capture);
+	mutex_destroy(&ggtt->error_mutex);
 }
 
 static int init_ggtt(struct i915_ggtt *ggtt)
@@ -525,6 +526,7 @@ static int init_ggtt(struct i915_ggtt *ggtt)
 	if (ret)
 		return ret;
 
+	mutex_init(&ggtt->error_mutex);
 	if (ggtt->mappable_end) {
 		/* Reserve a mappable slot for our lockless error capture */
 		ret = drm_mm_insert_node_in_range(&ggtt->vm.mm,
@@ -715,6 +717,7 @@ static void ggtt_cleanup_hw(struct i915_ggtt *ggtt)
 
 	if (drm_mm_node_allocated(&ggtt->error_capture))
 		drm_mm_remove_node(&ggtt->error_capture);
+	mutex_destroy(&ggtt->error_mutex);
 
 	ggtt_release_guc_top(ggtt);
 	intel_vgt_deballoon(ggtt);
diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.h b/drivers/gpu/drm/i915/gt/intel_gtt.h
index 029363cbdf49..7da7681c20b1 100644
--- a/drivers/gpu/drm/i915/gt/intel_gtt.h
+++ b/drivers/gpu/drm/i915/gt/intel_gtt.h
@@ -345,6 +345,7 @@ struct i915_ggtt {
 	/* Manual runtime pm autosuspend delay for user GGTT mmaps */
 	struct intel_wakeref_auto userfault_wakeref;
 
+	struct mutex error_mutex;
 	struct drm_mm_node error_capture;
 	struct drm_mm_node uc_fw;
 };
diff --git a/drivers/gpu/drm/i915/gt/intel_reset.c b/drivers/gpu/drm/i915/gt/intel_reset.c
index 76de33ae9efe..beee0cf89bce 100644
--- a/drivers/gpu/drm/i915/gt/intel_reset.c
+++ b/drivers/gpu/drm/i915/gt/intel_reset.c
@@ -1230,7 +1230,7 @@ void intel_gt_handle_error(struct intel_gt *gt,
 	engine_mask &= INTEL_INFO(gt->i915)->engine_mask;
 
 	if (flags & I915_ERROR_CAPTURE) {
-		i915_capture_error_state(gt->i915, engine_mask, msg);
+		i915_capture_error_state(gt->i915);
 		intel_gt_clear_error_registers(gt, engine_mask);
 	}
 
diff --git a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c
index 7c824c26b705..3e5e6c86e843 100644
--- a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c
+++ b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c
@@ -1498,7 +1498,7 @@ static int igt_handle_error(void *arg)
 	struct intel_engine_cs *engine = gt->engine[RCS0];
 	struct hang h;
 	struct i915_request *rq;
-	struct i915_gpu_state *error;
+	struct i915_gpu_coredump *error;
 	int err;
 
 	/* Check that we can issue a global GPU and engine reset */
diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index 9e18e10c125f..e19eaf275c13 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -685,7 +685,7 @@ static int i915_gem_fence_regs_info(struct seq_file *m, void *data)
 static ssize_t gpu_state_read(struct file *file, char __user *ubuf,
 			      size_t count, loff_t *pos)
 {
-	struct i915_gpu_state *error;
+	struct i915_gpu_coredump *error;
 	ssize_t ret;
 	void *buf;
 
@@ -698,7 +698,7 @@ static ssize_t gpu_state_read(struct file *file, char __user *ubuf,
 	if (!buf)
 		return -ENOMEM;
 
-	ret = i915_gpu_state_copy_to_buffer(error, buf, *pos, count);
+	ret = i915_gpu_coredump_copy_to_buffer(error, buf, *pos, count);
 	if (ret <= 0)
 		goto out;
 
@@ -714,19 +714,19 @@ static ssize_t gpu_state_read(struct file *file, char __user *ubuf,
 
 static int gpu_state_release(struct inode *inode, struct file *file)
 {
-	i915_gpu_state_put(file->private_data);
+	i915_gpu_coredump_put(file->private_data);
 	return 0;
 }
 
 static int i915_gpu_info_open(struct inode *inode, struct file *file)
 {
 	struct drm_i915_private *i915 = inode->i_private;
-	struct i915_gpu_state *gpu;
+	struct i915_gpu_coredump *gpu;
 	intel_wakeref_t wakeref;
 
 	gpu = NULL;
 	with_intel_runtime_pm(&i915->runtime_pm, wakeref)
-		gpu = i915_capture_gpu_state(i915);
+		gpu = i915_gpu_coredump(i915);
 	if (IS_ERR(gpu))
 		return PTR_ERR(gpu);
 
@@ -748,7 +748,7 @@ i915_error_state_write(struct file *filp,
 		       size_t cnt,
 		       loff_t *ppos)
 {
-	struct i915_gpu_state *error = filp->private_data;
+	struct i915_gpu_coredump *error = filp->private_data;
 
 	if (!error)
 		return 0;
@@ -761,7 +761,7 @@ i915_error_state_write(struct file *filp,
 
 static int i915_error_state_open(struct inode *inode, struct file *file)
 {
-	struct i915_gpu_state *error;
+	struct i915_gpu_coredump *error;
 
 	error = i915_first_error_state(inode->i_private);
 	if (IS_ERR(error))
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 49d314bb84c9..d5526a0b1f7e 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1875,7 +1875,7 @@ static inline u32 i915_reset_count(struct i915_gpu_error *error)
 }
 
 static inline u32 i915_reset_engine_count(struct i915_gpu_error *error,
-					  struct intel_engine_cs *engine)
+					  const struct intel_engine_cs *engine)
 {
 	return atomic_read(&error->reset_engine_count[engine->uabi_class]);
 }
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
index fda0977d2059..b45c128d0a17 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -41,6 +41,7 @@
 
 #include "gem/i915_gem_context.h"
 #include "gem/i915_gem_lmem.h"
+#include "gt/intel_gt_pm.h"
 
 #include "i915_drv.h"
 #include "i915_gpu_error.h"
@@ -232,14 +233,13 @@ static void pool_free(struct pagevec *pv, void *addr)
 
 #ifdef CONFIG_DRM_I915_COMPRESS_ERROR
 
-struct compress {
+struct i915_vma_compress {
 	struct pagevec pool;
 	struct z_stream_s zstream;
 	void *tmp;
-	bool wc;
 };
 
-static bool compress_init(struct compress *c)
+static bool compress_init(struct i915_vma_compress *c)
 {
 	struct z_stream_s *zstream = &c->zstream;
 
@@ -261,7 +261,7 @@ static bool compress_init(struct compress *c)
 	return true;
 }
 
-static bool compress_start(struct compress *c)
+static bool compress_start(struct i915_vma_compress *c)
 {
 	struct z_stream_s *zstream = &c->zstream;
 	void *workspace = zstream->workspace;
@@ -272,8 +272,8 @@ static bool compress_start(struct compress *c)
 	return zlib_deflateInit(zstream, Z_DEFAULT_COMPRESSION) == Z_OK;
 }
 
-static void *compress_next_page(struct compress *c,
-				struct drm_i915_error_object *dst)
+static void *compress_next_page(struct i915_vma_compress *c,
+				struct i915_vma_coredump *dst)
 {
 	void *page;
 
@@ -287,14 +287,15 @@ static void *compress_next_page(struct compress *c,
 	return dst->pages[dst->page_count++] = page;
 }
 
-static int compress_page(struct compress *c,
+static int compress_page(struct i915_vma_compress *c,
 			 void *src,
-			 struct drm_i915_error_object *dst)
+			 struct i915_vma_coredump *dst,
+			 bool wc)
 {
 	struct z_stream_s *zstream = &c->zstream;
 
 	zstream->next_in = src;
-	if (c->wc && c->tmp && i915_memcpy_from_wc(c->tmp, src, PAGE_SIZE))
+	if (wc && c->tmp && i915_memcpy_from_wc(c->tmp, src, PAGE_SIZE))
 		zstream->next_in = c->tmp;
 	zstream->avail_in = PAGE_SIZE;
 
@@ -318,8 +319,8 @@ static int compress_page(struct compress *c,
 	return 0;
 }
 
-static int compress_flush(struct compress *c,
-			  struct drm_i915_error_object *dst)
+static int compress_flush(struct i915_vma_compress *c,
+			  struct i915_vma_coredump *dst)
 {
 	struct z_stream_s *zstream = &c->zstream;
 
@@ -347,12 +348,12 @@ static int compress_flush(struct compress *c,
 	return 0;
 }
 
-static void compress_finish(struct compress *c)
+static void compress_finish(struct i915_vma_compress *c)
 {
 	zlib_deflateEnd(&c->zstream);
 }
 
-static void compress_fini(struct compress *c)
+static void compress_fini(struct i915_vma_compress *c)
 {
 	kfree(c->zstream.workspace);
 	if (c->tmp)
@@ -367,24 +368,24 @@ static void err_compression_marker(struct drm_i915_error_state_buf *m)
 
 #else
 
-struct compress {
+struct i915_vma_compress {
 	struct pagevec pool;
-	bool wc;
 };
 
-static bool compress_init(struct compress *c)
+static bool compress_init(struct i915_vma_compress *c)
 {
 	return pool_init(&c->pool, ALLOW_FAIL) == 0;
 }
 
-static bool compress_start(struct compress *c)
+static bool compress_start(struct i915_vma_compress *c)
 {
 	return true;
 }
 
-static int compress_page(struct compress *c,
+static int compress_page(struct i915_vma_compress *c,
 			 void *src,
-			 struct drm_i915_error_object *dst)
+			 struct i915_vma_coredump *dst,
+			 bool wc)
 {
 	void *ptr;
 
@@ -392,24 +393,24 @@ static int compress_page(struct compress *c,
 	if (!ptr)
 		return -ENOMEM;
 
-	if (!(c->wc && i915_memcpy_from_wc(ptr, src, PAGE_SIZE)))
+	if (!(wc && i915_memcpy_from_wc(ptr, src, PAGE_SIZE)))
 		memcpy(ptr, src, PAGE_SIZE);
 	dst->pages[dst->page_count++] = ptr;
 
 	return 0;
 }
 
-static int compress_flush(struct compress *c,
-			  struct drm_i915_error_object *dst)
+static int compress_flush(struct i915_vma_compress *c,
+			  struct i915_vma_coredump *dst)
 {
 	return 0;
 }
 
-static void compress_finish(struct compress *c)
+static void compress_finish(struct i915_vma_compress *c)
 {
 }
 
-static void compress_fini(struct compress *c)
+static void compress_fini(struct i915_vma_compress *c)
 {
 	pool_fini(&c->pool);
 }
@@ -422,7 +423,7 @@ static void err_compression_marker(struct drm_i915_error_state_buf *m)
 #endif
 
 static void error_print_instdone(struct drm_i915_error_state_buf *m,
-				 const struct drm_i915_error_engine *ee)
+				 const struct intel_engine_coredump *ee)
 {
 	const struct sseu_dev_info *sseu = &RUNTIME_INFO(m->i915)->sseu;
 	int slice;
@@ -453,40 +454,56 @@ static void error_print_instdone(struct drm_i915_error_state_buf *m,
 
 static void error_print_request(struct drm_i915_error_state_buf *m,
 				const char *prefix,
-				const struct drm_i915_error_request *erq,
-				const unsigned long epoch)
+				const struct i915_request_coredump *erq)
 {
 	if (!erq->seqno)
 		return;
 
-	err_printf(m, "%s pid %d, seqno %8x:%08x%s%s, prio %d, emitted %dms, start %08x, head %08x, tail %08x\n",
+	err_printf(m, "%s pid %d, seqno %8x:%08x%s%s, prio %d, start %08x, head %08x, tail %08x\n",
 		   prefix, erq->pid, erq->context, erq->seqno,
 		   test_bit(DMA_FENCE_FLAG_SIGNALED_BIT,
 			    &erq->flags) ? "!" : "",
 		   test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT,
 			    &erq->flags) ? "+" : "",
 		   erq->sched_attr.priority,
-		   jiffies_to_msecs(erq->jiffies - epoch),
 		   erq->start, erq->head, erq->tail);
 }
 
 static void error_print_context(struct drm_i915_error_state_buf *m,
 				const char *header,
-				const struct drm_i915_error_context *ctx)
+				const struct i915_gem_context_coredump *ctx)
 {
 	err_printf(m, "%s%s[%d] prio %d, guilty %d active %d\n",
 		   header, ctx->comm, ctx->pid, ctx->sched_attr.priority,
 		   ctx->guilty, ctx->active);
 }
 
+static struct i915_vma_coredump *
+__find_vma(struct i915_vma_coredump *vma, const char *name)
+{
+	while (vma) {
+		if (strcmp(vma->name, name) == 0)
+			return vma;
+		vma = vma->next;
+	}
+
+	return NULL;
+}
+
+static struct i915_vma_coredump *
+find_batch(const struct intel_engine_coredump *ee)
+{
+	return __find_vma(ee->vma, "batch");
+}
+
 static void error_print_engine(struct drm_i915_error_state_buf *m,
-			       const struct drm_i915_error_engine *ee,
-			       const unsigned long epoch)
+			       const struct intel_engine_coredump *ee)
 {
+	struct i915_vma_coredump *batch;
 	int n;
 
 	err_printf(m, "%s command stream:\n", ee->engine->name);
-	err_printf(m, "  IDLE?: %s\n", yesno(ee->idle));
+	err_printf(m, "  CCID:  0x%08x\n", ee->ccid);
 	err_printf(m, "  START: 0x%08x\n", ee->start);
 	err_printf(m, "  HEAD:  0x%08x [0x%08x]\n", ee->head, ee->rq_head);
 	err_printf(m, "  TAIL:  0x%08x [0x%08x, 0x%08x]\n",
@@ -501,9 +518,10 @@ static void error_print_engine(struct drm_i915_error_state_buf *m,
 
 	error_print_instdone(m, ee);
 
-	if (ee->batchbuffer) {
-		u64 start = ee->batchbuffer->gtt_offset;
-		u64 end = start + ee->batchbuffer->gtt_size;
+	batch = find_batch(ee);
+	if (batch) {
+		u64 start = batch->gtt_offset;
+		u64 end = start + batch->gtt_size;
 
 		err_printf(m, "  batch: [0x%08x_%08x, 0x%08x_%08x]\n",
 			   upper_32_bits(start), lower_32_bits(start),
@@ -541,7 +559,7 @@ static void error_print_engine(struct drm_i915_error_state_buf *m,
 
 	for (n = 0; n < ee->num_ports; n++) {
 		err_printf(m, "  ELSP[%d]:", n);
-		error_print_request(m, " ", &ee->execlist[n], epoch);
+		error_print_request(m, " ", &ee->execlist[n]);
 	}
 
 	error_print_context(m, "  Active context: ", &ee->context);
@@ -556,38 +574,35 @@ void i915_error_printf(struct drm_i915_error_state_buf *e, const char *f, ...)
 	va_end(args);
 }
 
-static void print_error_obj(struct drm_i915_error_state_buf *m,
+static void print_error_vma(struct drm_i915_error_state_buf *m,
 			    const struct intel_engine_cs *engine,
-			    const char *name,
-			    const struct drm_i915_error_object *obj)
+			    const struct i915_vma_coredump *vma)
 {
 	char out[ASCII85_BUFSZ];
 	int page;
 
-	if (!obj)
+	if (!vma)
 		return;
 
-	if (name) {
-		err_printf(m, "%s --- %s = 0x%08x %08x\n",
-			   engine ? engine->name : "global", name,
-			   upper_32_bits(obj->gtt_offset),
-			   lower_32_bits(obj->gtt_offset));
-	}
+	err_printf(m, "%s --- %s = 0x%08x %08x\n",
+		   engine ? engine->name : "global", vma->name,
+		   upper_32_bits(vma->gtt_offset),
+		   lower_32_bits(vma->gtt_offset));
 
-	if (obj->gtt_page_sizes > I915_GTT_PAGE_SIZE_4K)
-		err_printf(m, "gtt_page_sizes = 0x%08x\n", obj->gtt_page_sizes);
+	if (vma->gtt_page_sizes > I915_GTT_PAGE_SIZE_4K)
+		err_printf(m, "gtt_page_sizes = 0x%08x\n", vma->gtt_page_sizes);
 
 	err_compression_marker(m);
-	for (page = 0; page < obj->page_count; page++) {
+	for (page = 0; page < vma->page_count; page++) {
 		int i, len;
 
 		len = PAGE_SIZE;
-		if (page == obj->page_count - 1)
-			len -= obj->unused;
+		if (page == vma->page_count - 1)
+			len -= vma->unused;
 		len = ascii85_encode_len(len);
 
 		for (i = 0; i < len; i++)
-			err_puts(m, ascii85_encode(obj->pages[page][i], out));
+			err_puts(m, ascii85_encode(vma->pages[page][i], out));
 	}
 	err_puts(m, "\n");
 }
@@ -626,18 +641,13 @@ static void err_print_pciid(struct drm_i915_error_state_buf *m,
 }
 
 static void err_print_uc(struct drm_i915_error_state_buf *m,
-			 const struct i915_error_uc *error_uc)
+			 const struct intel_uc_coredump *error_uc)
 {
 	struct drm_printer p = i915_error_printer(m);
-	const struct i915_gpu_state *error =
-		container_of(error_uc, typeof(*error), uc);
-
-	if (!error->device_info.has_gt_uc)
-		return;
 
 	intel_uc_fw_dump(&error_uc->guc_fw, &p);
 	intel_uc_fw_dump(&error_uc->huc_fw, &p);
-	print_error_obj(m, NULL, "GuC log buffer", error_uc->guc_log);
+	print_error_vma(m, NULL, error_uc->guc_log);
 }
 
 static void err_free_sgl(struct scatterlist *sgl)
@@ -657,12 +667,78 @@ static void err_free_sgl(struct scatterlist *sgl)
 	}
 }
 
+static void err_print_gt(struct drm_i915_error_state_buf *m,
+			 struct intel_gt_coredump *gt)
+{
+	const struct intel_engine_coredump *ee;
+	int i, j;
+
+	err_printf(m, "GT awake: %s\n", yesno(gt->awake));
+	err_printf(m, "EIR: 0x%08x\n", gt->eir);
+	err_printf(m, "IER: 0x%08x\n", gt->ier);
+	for (i = 0; i < gt->ngtier; i++)
+		err_printf(m, "GTIER[%d]: 0x%08x\n", i, gt->gtier[i]);
+	err_printf(m, "PGTBL_ER: 0x%08x\n", gt->pgtbl_er);
+	err_printf(m, "FORCEWAKE: 0x%08x\n", gt->forcewake);
+	err_printf(m, "DERRMR: 0x%08x\n", gt->derrmr);
+
+	for (i = 0; i < gt->nfence; i++)
+		err_printf(m, "  fence[%d] = %08llx\n", i, gt->fence[i]);
+
+	if (IS_GEN_RANGE(m->i915, 6, 11)) {
+		err_printf(m, "ERROR: 0x%08x\n", gt->error);
+		err_printf(m, "DONE_REG: 0x%08x\n", gt->done_reg);
+	}
+
+	if (INTEL_GEN(m->i915) >= 8)
+		err_printf(m, "FAULT_TLB_DATA: 0x%08x 0x%08x\n",
+			   gt->fault_data1, gt->fault_data0);
+
+	if (IS_GEN(m->i915, 7))
+		err_printf(m, "ERR_INT: 0x%08x\n", gt->err_int);
+
+	if (IS_GEN_RANGE(m->i915, 8, 11))
+		err_printf(m, "GTT_CACHE_EN: 0x%08x\n", gt->gtt_cache);
+
+	if (IS_GEN(m->i915, 12))
+		err_printf(m, "AUX_ERR_DBG: 0x%08x\n", gt->aux_err);
+
+	if (INTEL_GEN(m->i915) >= 12) {
+		int i;
+
+		for (i = 0; i < GEN12_SFC_DONE_MAX; i++)
+			err_printf(m, "  SFC_DONE[%d]: 0x%08x\n", i,
+				   gt->sfc_done[i]);
+
+		err_printf(m, "  GAM_DONE: 0x%08x\n", gt->gam_done);
+	}
+
+	for (ee = gt->engine; ee; ee = ee->next) {
+		const struct i915_vma_coredump *vma;
+
+		error_print_engine(m, ee);
+
+		for (vma = ee->vma; vma; vma = vma->next)
+			print_error_vma(m, ee->engine, vma);
+
+		if (ee->num_requests) {
+			err_printf(m, "%s --- %d requests\n",
+				   ee->engine->name,
+				   ee->num_requests);
+			for (j = 0; j < ee->num_requests; j++)
+				error_print_request(m, " ", &ee->requests[j]);
+		}
+	}
+
+	if (gt->uc)
+		err_print_uc(m, gt->uc);
+}
+
 static void __err_print_to_sgl(struct drm_i915_error_state_buf *m,
-			       struct i915_gpu_state *error)
+			       struct i915_gpu_coredump *error)
 {
-	const struct drm_i915_error_engine *ee;
+	const struct intel_engine_coredump *ee;
 	struct timespec64 ts;
-	int i, j;
 
 	if (*error->error_msg)
 		err_printf(m, "%s\n", error->error_msg);
@@ -682,7 +758,7 @@ static void __err_print_to_sgl(struct drm_i915_error_state_buf *m,
 	err_printf(m, "Capture: %lu jiffies; %d ms ago\n",
 		   error->capture, jiffies_to_msecs(jiffies - error->capture));
 
-	for (ee = error->engine; ee; ee = ee->next)
+	for (ee = error->gt ? error->gt->engine : NULL; ee; ee = ee->next)
 		err_printf(m, "Active process (on ring %s): %s [%d]\n",
 			   ee->engine->name,
 			   ee->context.comm,
@@ -708,90 +784,11 @@ static void __err_print_to_sgl(struct drm_i915_error_state_buf *m,
 			   CSR_VERSION_MINOR(csr->version));
 	}
 
-	err_printf(m, "GT awake: %s\n", yesno(error->awake));
 	err_printf(m, "RPM wakelock: %s\n", yesno(error->wakelock));
 	err_printf(m, "PM suspended: %s\n", yesno(error->suspended));
-	err_printf(m, "EIR: 0x%08x\n", error->eir);
-	err_printf(m, "IER: 0x%08x\n", error->ier);
-	for (i = 0; i < error->ngtier; i++)
-		err_printf(m, "GTIER[%d]: 0x%08x\n", i, error->gtier[i]);
-	err_printf(m, "PGTBL_ER: 0x%08x\n", error->pgtbl_er);
-	err_printf(m, "FORCEWAKE: 0x%08x\n", error->forcewake);
-	err_printf(m, "DERRMR: 0x%08x\n", error->derrmr);
-	err_printf(m, "CCID: 0x%08x\n", error->ccid);
-
-	for (i = 0; i < error->nfence; i++)
-		err_printf(m, "  fence[%d] = %08llx\n", i, error->fence[i]);
-
-	if (IS_GEN_RANGE(m->i915, 6, 11)) {
-		err_printf(m, "ERROR: 0x%08x\n", error->error);
-		err_printf(m, "DONE_REG: 0x%08x\n", error->done_reg);
-	}
-
-	if (INTEL_GEN(m->i915) >= 8)
-		err_printf(m, "FAULT_TLB_DATA: 0x%08x 0x%08x\n",
-			   error->fault_data1, error->fault_data0);
-
-	if (IS_GEN(m->i915, 7))
-		err_printf(m, "ERR_INT: 0x%08x\n", error->err_int);
-
-	if (IS_GEN_RANGE(m->i915, 8, 11))
-		err_printf(m, "GTT_CACHE_EN: 0x%08x\n", error->gtt_cache);
-
-	if (IS_GEN(m->i915, 12))
-		err_printf(m, "AUX_ERR_DBG: 0x%08x\n", error->aux_err);
-
-	if (INTEL_GEN(m->i915) >= 12) {
-		int i;
-
-		for (i = 0; i < GEN12_SFC_DONE_MAX; i++)
-			err_printf(m, "  SFC_DONE[%d]: 0x%08x\n", i,
-				   error->sfc_done[i]);
-
-		err_printf(m, "  GAM_DONE: 0x%08x\n", error->gam_done);
-	}
-
-	for (ee = error->engine; ee; ee = ee->next)
-		error_print_engine(m, ee, error->capture);
-
-	for (ee = error->engine; ee; ee = ee->next) {
-		const struct drm_i915_error_object *obj;
-
-		obj = ee->batchbuffer;
-		if (obj) {
-			err_puts(m, ee->engine->name);
-			if (ee->context.pid)
-				err_printf(m, " (submitted by %s [%d])",
-					   ee->context.comm,
-					   ee->context.pid);
-			err_printf(m, " --- gtt_offset = 0x%08x %08x\n",
-				   upper_32_bits(obj->gtt_offset),
-				   lower_32_bits(obj->gtt_offset));
-			print_error_obj(m, ee->engine, NULL, obj);
-		}
-
-		for (j = 0; j < ee->user_bo_count; j++)
-			print_error_obj(m, ee->engine, "user", ee->user_bo[j]);
-
-		if (ee->num_requests) {
-			err_printf(m, "%s --- %d requests\n",
-				   ee->engine->name,
-				   ee->num_requests);
-			for (j = 0; j < ee->num_requests; j++)
-				error_print_request(m, " ",
-						    &ee->requests[j],
-						    error->capture);
-		}
 
-		print_error_obj(m, ee->engine, "ringbuffer", ee->ringbuffer);
-		print_error_obj(m, ee->engine, "HW Status", ee->hws_page);
-		print_error_obj(m, ee->engine, "HW context", ee->ctx);
-		print_error_obj(m, ee->engine, "WA context", ee->wa_ctx);
-		print_error_obj(m, ee->engine,
-				"WA batchbuffer", ee->wa_batchbuffer);
-		print_error_obj(m, ee->engine,
-				"NULL context", ee->default_state);
-	}
+	if (error->gt)
+		err_print_gt(m, error->gt);
 
 	if (error->overlay)
 		intel_overlay_print_error_state(m, error->overlay);
@@ -802,10 +799,9 @@ static void __err_print_to_sgl(struct drm_i915_error_state_buf *m,
 	err_print_capabilities(m, &error->device_info, &error->runtime_info,
 			       &error->driver_caps);
 	err_print_params(m, &error->params);
-	err_print_uc(m, &error->uc);
 }
 
-static int err_print_to_sgl(struct i915_gpu_state *error)
+static int err_print_to_sgl(struct i915_gpu_coredump *error)
 {
 	struct drm_i915_error_state_buf m;
 
@@ -842,8 +838,8 @@ static int err_print_to_sgl(struct i915_gpu_state *error)
 	return 0;
 }
 
-ssize_t i915_gpu_state_copy_to_buffer(struct i915_gpu_state *error,
-				      char *buf, loff_t off, size_t rem)
+ssize_t i915_gpu_coredump_copy_to_buffer(struct i915_gpu_coredump *error,
+					 char *buf, loff_t off, size_t rem)
 {
 	struct scatterlist *sg;
 	size_t count;
@@ -906,78 +902,82 @@ ssize_t i915_gpu_state_copy_to_buffer(struct i915_gpu_state *error,
 	return count;
 }
 
-static void i915_error_object_free(struct drm_i915_error_object *obj)
+static void i915_vma_coredump_free(struct i915_vma_coredump *vma)
 {
-	int page;
+	while (vma) {
+		struct i915_vma_coredump *next = vma->next;
+		int page;
 
-	if (obj == NULL)
-		return;
+		for (page = 0; page < vma->page_count; page++)
+			free_page((unsigned long)vma->pages[page]);
 
-	for (page = 0; page < obj->page_count; page++)
-		free_page((unsigned long)obj->pages[page]);
-
-	kfree(obj);
+		kfree(vma);
+		vma = next;
+	}
 }
 
-
-static void cleanup_params(struct i915_gpu_state *error)
+static void cleanup_params(struct i915_gpu_coredump *error)
 {
 	i915_params_free(&error->params);
 }
 
-static void cleanup_uc_state(struct i915_gpu_state *error)
+static void cleanup_uc(struct intel_uc_coredump *uc)
 {
-	struct i915_error_uc *error_uc = &error->uc;
+	kfree(uc->guc_fw.path);
+	kfree(uc->huc_fw.path);
+	i915_vma_coredump_free(uc->guc_log);
 
-	kfree(error_uc->guc_fw.path);
-	kfree(error_uc->huc_fw.path);
-	i915_error_object_free(error_uc->guc_log);
+	kfree(uc);
 }
 
-void __i915_gpu_state_free(struct kref *error_ref)
+static void cleanup_gt(struct intel_gt_coredump *gt)
 {
-	struct i915_gpu_state *error =
-		container_of(error_ref, typeof(*error), ref);
-	long i;
+	while (gt->engine) {
+		struct intel_engine_coredump *ee = gt->engine;
 
-	while (error->engine) {
-		struct drm_i915_error_engine *ee = error->engine;
+		gt->engine = ee->next;
 
-		error->engine = ee->next;
+		i915_vma_coredump_free(ee->vma);
+		kfree(ee->requests);
+		kfree(ee);
+	}
 
-		for (i = 0; i < ee->user_bo_count; i++)
-			i915_error_object_free(ee->user_bo[i]);
-		kfree(ee->user_bo);
+	if (gt->uc)
+		cleanup_uc(gt->uc);
 
-		i915_error_object_free(ee->batchbuffer);
-		i915_error_object_free(ee->wa_batchbuffer);
-		i915_error_object_free(ee->ringbuffer);
-		i915_error_object_free(ee->hws_page);
-		i915_error_object_free(ee->ctx);
-		i915_error_object_free(ee->wa_ctx);
+	kfree(gt);
+}
 
-		kfree(ee->requests);
-		kfree(ee);
+void __i915_gpu_coredump_free(struct kref *error_ref)
+{
+	struct i915_gpu_coredump *error =
+		container_of(error_ref, typeof(*error), ref);
+
+	while (error->gt) {
+		struct intel_gt_coredump *gt = error->gt;
+
+		error->gt = gt->next;
+		cleanup_gt(gt);
 	}
 
 	kfree(error->overlay);
 	kfree(error->display);
 
 	cleanup_params(error);
-	cleanup_uc_state(error);
 
 	err_free_sgl(error->sgl);
 	kfree(error);
 }
 
-static struct drm_i915_error_object *
-i915_error_object_create(struct drm_i915_private *i915,
-			 struct i915_vma *vma,
-			 struct compress *compress)
+static struct i915_vma_coredump *
+i915_vma_coredump_create(const struct intel_gt *gt,
+			 const struct i915_vma *vma,
+			 const char *name,
+			 struct i915_vma_compress *compress)
 {
-	struct i915_ggtt *ggtt = &i915->ggtt;
+	struct i915_ggtt *ggtt = gt->ggtt;
 	const u64 slot = ggtt->error_capture.start;
-	struct drm_i915_error_object *dst;
+	struct i915_vma_coredump *dst;
 	unsigned long num_pages;
 	struct sgt_iter iter;
 	int ret;
@@ -998,6 +998,9 @@ i915_error_object_create(struct drm_i915_private *i915,
 		return NULL;
 	}
 
+	strcpy(dst->name, name);
+	dst->next = NULL;
+
 	dst->gtt_offset = vma->node.start;
 	dst->gtt_size = vma->node.size;
 	dst->gtt_page_sizes = vma->page_sizes.gtt;
@@ -1005,9 +1008,6 @@ i915_error_object_create(struct drm_i915_private *i915,
 	dst->page_count = 0;
 	dst->unused = 0;
 
-	compress->wc = i915_gem_object_is_lmem(vma->obj) ||
-		       drm_mm_node_allocated(&ggtt->error_capture);
-
 	ret = -EINVAL;
 	if (drm_mm_node_allocated(&ggtt->error_capture)) {
 		void __iomem *s;
@@ -1018,7 +1018,9 @@ i915_error_object_create(struct drm_i915_private *i915,
 					     I915_CACHE_NONE, 0);
 
 			s = io_mapping_map_wc(&ggtt->iomap, slot, PAGE_SIZE);
-			ret = compress_page(compress, (void  __force *)s, dst);
+			ret = compress_page(compress,
+					    (void  __force *)s, dst,
+					    true);
 			io_mapping_unmap(s);
 			if (ret)
 				break;
@@ -1031,7 +1033,9 @@ i915_error_object_create(struct drm_i915_private *i915,
 			void __iomem *s;
 
 			s = io_mapping_map_wc(&mem->iomap, dma, PAGE_SIZE);
-			ret = compress_page(compress, (void __force *)s, dst);
+			ret = compress_page(compress,
+					    (void __force *)s, dst,
+					    true);
 			io_mapping_unmap(s);
 			if (ret)
 				break;
@@ -1045,7 +1049,7 @@ i915_error_object_create(struct drm_i915_private *i915,
 			drm_clflush_pages(&page, 1);
 
 			s = kmap(page);
-			ret = compress_page(compress, s, dst);
+			ret = compress_page(compress, s, dst, false);
 			kunmap(page);
 
 			drm_clflush_pages(&page, 1);
@@ -1066,77 +1070,56 @@ i915_error_object_create(struct drm_i915_private *i915,
 	return dst;
 }
 
-/*
- * Generate a semi-unique error code. The code is not meant to have meaning, The
- * code's only purpose is to try to prevent false duplicated bug reports by
- * grossly estimating a GPU error state.
- *
- * TODO Ideally, hashing the batchbuffer would be a very nice way to determine
- * the hang if we could strip the GTT offset information from it.
- *
- * It's only a small step better than a random number in its current form.
- */
-static u32 i915_error_generate_code(struct i915_gpu_state *error)
-{
-	const struct drm_i915_error_engine *ee = error->engine;
-
-	/*
-	 * IPEHR would be an ideal way to detect errors, as it's the gross
-	 * measure of "the command that hung." However, has some very common
-	 * synchronization commands which almost always appear in the case
-	 * strictly a client bug. Use instdone to differentiate those some.
-	 */
-	return ee ? ee->ipehr ^ ee->instdone.instdone : 0;
-}
-
-static void gem_record_fences(struct i915_gpu_state *error)
+static void gt_record_fences(struct intel_gt_coredump *gt)
 {
-	struct drm_i915_private *dev_priv = error->i915;
-	struct intel_uncore *uncore = &dev_priv->uncore;
+	struct i915_ggtt *ggtt = gt->_gt->ggtt;
+	struct intel_uncore *uncore = gt->_gt->uncore;
 	int i;
 
-	if (INTEL_GEN(dev_priv) >= 6) {
-		for (i = 0; i < dev_priv->ggtt.num_fences; i++)
-			error->fence[i] =
+	if (INTEL_GEN(uncore->i915) >= 6) {
+		for (i = 0; i < ggtt->num_fences; i++)
+			gt->fence[i] =
 				intel_uncore_read64(uncore,
 						    FENCE_REG_GEN6_LO(i));
-	} else if (INTEL_GEN(dev_priv) >= 4) {
-		for (i = 0; i < dev_priv->ggtt.num_fences; i++)
-			error->fence[i] =
+	} else if (INTEL_GEN(uncore->i915) >= 4) {
+		for (i = 0; i < ggtt->num_fences; i++)
+			gt->fence[i] =
 				intel_uncore_read64(uncore,
 						    FENCE_REG_965_LO(i));
 	} else {
-		for (i = 0; i < dev_priv->ggtt.num_fences; i++)
-			error->fence[i] =
+		for (i = 0; i < ggtt->num_fences; i++)
+			gt->fence[i] =
 				intel_uncore_read(uncore, FENCE_REG(i));
 	}
-	error->nfence = i;
+	gt->nfence = i;
 }
 
-static void error_record_engine_registers(struct i915_gpu_state *error,
-					  struct intel_engine_cs *engine,
-					  struct drm_i915_error_engine *ee)
+static void engine_record_registers(struct intel_engine_coredump *ee)
 {
-	struct drm_i915_private *dev_priv = engine->i915;
+	const struct intel_engine_cs *engine = ee->engine;
+	struct drm_i915_private *i915 = engine->i915;
 
-	if (INTEL_GEN(dev_priv) >= 6) {
+	if (INTEL_GEN(i915) >= 6) {
 		ee->rc_psmi = ENGINE_READ(engine, RING_PSMI_CTL);
 
-		if (INTEL_GEN(dev_priv) >= 12)
-			ee->fault_reg = I915_READ(GEN12_RING_FAULT_REG);
-		else if (INTEL_GEN(dev_priv) >= 8)
-			ee->fault_reg = I915_READ(GEN8_RING_FAULT_REG);
+		if (INTEL_GEN(i915) >= 12)
+			ee->fault_reg = intel_uncore_read(engine->uncore,
+							  GEN12_RING_FAULT_REG);
+		else if (INTEL_GEN(i915) >= 8)
+			ee->fault_reg = intel_uncore_read(engine->uncore,
+							  GEN8_RING_FAULT_REG);
 		else
 			ee->fault_reg = GEN6_RING_FAULT_REG_READ(engine);
 	}
 
-	if (INTEL_GEN(dev_priv) >= 4) {
+	if (INTEL_GEN(i915) >= 4) {
 		ee->faddr = ENGINE_READ(engine, RING_DMA_FADD);
 		ee->ipeir = ENGINE_READ(engine, RING_IPEIR);
 		ee->ipehr = ENGINE_READ(engine, RING_IPEHR);
 		ee->instps = ENGINE_READ(engine, RING_INSTPS);
 		ee->bbaddr = ENGINE_READ(engine, RING_BBADDR);
-		if (INTEL_GEN(dev_priv) >= 8) {
+		ee->ccid = ENGINE_READ(engine, CCID);
+		if (INTEL_GEN(i915) >= 8) {
 			ee->faddr |= (u64)ENGINE_READ(engine, RING_DMA_FADD_UDW) << 32;
 			ee->bbaddr |= (u64)ENGINE_READ(engine, RING_BBADDR_UDW) << 32;
 		}
@@ -1155,13 +1138,13 @@ static void error_record_engine_registers(struct i915_gpu_state *error,
 	ee->head = ENGINE_READ(engine, RING_HEAD);
 	ee->tail = ENGINE_READ(engine, RING_TAIL);
 	ee->ctl = ENGINE_READ(engine, RING_CTL);
-	if (INTEL_GEN(dev_priv) > 2)
+	if (INTEL_GEN(i915) > 2)
 		ee->mode = ENGINE_READ(engine, RING_MI_MODE);
 
-	if (!HWS_NEEDS_PHYSICAL(dev_priv)) {
+	if (!HWS_NEEDS_PHYSICAL(i915)) {
 		i915_reg_t mmio;
 
-		if (IS_GEN(dev_priv, 7)) {
+		if (IS_GEN(i915, 7)) {
 			switch (engine->id) {
 			default:
 				MISSING_CASE(engine->id);
@@ -1186,40 +1169,40 @@ static void error_record_engine_registers(struct i915_gpu_state *error,
 			mmio = RING_HWS_PGA(engine->mmio_base);
 		}
 
-		ee->hws = I915_READ(mmio);
+		ee->hws = intel_uncore_read(engine->uncore, mmio);
 	}
 
-	ee->idle = intel_engine_is_idle(engine);
-	ee->reset_count = i915_reset_engine_count(&dev_priv->gpu_error,
-						  engine);
+	ee->reset_count = i915_reset_engine_count(&i915->gpu_error, engine);
 
-	if (HAS_PPGTT(dev_priv)) {
+	if (HAS_PPGTT(i915)) {
 		int i;
 
 		ee->vm_info.gfx_mode = ENGINE_READ(engine, RING_MODE_GEN7);
 
-		if (IS_GEN(dev_priv, 6)) {
+		if (IS_GEN(i915, 6)) {
 			ee->vm_info.pp_dir_base =
 				ENGINE_READ(engine, RING_PP_DIR_BASE_READ);
-		} else if (IS_GEN(dev_priv, 7)) {
+		} else if (IS_GEN(i915, 7)) {
 			ee->vm_info.pp_dir_base =
 				ENGINE_READ(engine, RING_PP_DIR_BASE);
-		} else if (INTEL_GEN(dev_priv) >= 8) {
+		} else if (INTEL_GEN(i915) >= 8) {
 			u32 base = engine->mmio_base;
 
 			for (i = 0; i < 4; i++) {
 				ee->vm_info.pdp[i] =
-					I915_READ(GEN8_RING_PDP_UDW(base, i));
+					intel_uncore_read(engine->uncore,
+							  GEN8_RING_PDP_UDW(base, i));
 				ee->vm_info.pdp[i] <<= 32;
 				ee->vm_info.pdp[i] |=
-					I915_READ(GEN8_RING_PDP_LDW(base, i));
+					intel_uncore_read(engine->uncore,
+							  GEN8_RING_PDP_LDW(base, i));
 			}
 		}
 	}
 }
 
 static void record_request(const struct i915_request *request,
-			   struct drm_i915_error_request *erq)
+			   struct i915_request_coredump *erq)
 {
 	const struct i915_gem_context *ctx;
 
@@ -1227,7 +1210,6 @@ static void record_request(const struct i915_request *request,
 	erq->context = request->fence.context;
 	erq->seqno = request->fence.seqno;
 	erq->sched_attr = request->sched.attr;
-	erq->jiffies = request->emitted_jiffies;
 	erq->start = i915_ggtt_offset(request->ring->vma);
 	erq->head = request->head;
 	erq->tail = request->tail;
@@ -1240,9 +1222,9 @@ static void record_request(const struct i915_request *request,
 	rcu_read_unlock();
 }
 
-static void engine_record_requests(struct intel_engine_cs *engine,
+static void engine_record_requests(const struct intel_engine_cs *engine,
 				   struct i915_request *first,
-				   struct drm_i915_error_engine *ee)
+				   struct intel_engine_coredump *ee)
 {
 	struct i915_request *request;
 	int count;
@@ -1288,11 +1270,10 @@ static void engine_record_requests(struct intel_engine_cs *engine,
 	ee->num_requests = count;
 }
 
-static void error_record_engine_execlists(const struct intel_engine_cs *engine,
-					  struct drm_i915_error_engine *ee)
+static void engine_record_execlists(struct intel_engine_coredump *ee)
 {
-	const struct intel_engine_execlists * const execlists = &engine->execlists;
-	struct i915_request * const *port = execlists->active;
+	const struct intel_engine_execlists * const el = &ee->engine->execlists;
+	struct i915_request * const *port = el->active;
 	unsigned int n = 0;
 
 	while (*port)
@@ -1301,7 +1282,7 @@ static void error_record_engine_execlists(const struct intel_engine_cs *engine,
 	ee->num_ports = n;
 }
 
-static bool record_context(struct drm_i915_error_context *e,
+static bool record_context(struct i915_gem_context_coredump *e,
 			   const struct i915_request *rq)
 {
 	struct i915_gem_context *ctx;
@@ -1334,23 +1315,24 @@ static bool record_context(struct drm_i915_error_context *e,
 	return capture;
 }
 
-struct capture_vma {
-	struct capture_vma *next;
-	void **slot;
+struct intel_engine_capture_vma {
+	struct intel_engine_capture_vma *next;
+	struct i915_vma *vma;
+	char name[16];
 };
 
-static struct capture_vma *
-capture_vma(struct capture_vma *next,
+static struct intel_engine_capture_vma *
+capture_vma(struct intel_engine_capture_vma *next,
 	    struct i915_vma *vma,
-	    struct drm_i915_error_object **out)
+	    const char *name,
+	    gfp_t gfp)
 {
-	struct capture_vma *c;
+	struct intel_engine_capture_vma *c;
 
-	*out = NULL;
 	if (!vma)
 		return next;
 
-	c = kmalloc(sizeof(*c), ATOMIC_MAYFAIL);
+	c = kmalloc(sizeof(*c), gfp);
 	if (!c)
 		return next;
 
@@ -1359,54 +1341,31 @@ capture_vma(struct capture_vma *next,
 		return next;
 	}
 
-	c->slot = (void **)out;
-	*c->slot = i915_vma_get(vma);
+	strcpy(c->name, name);
+	c->vma = i915_vma_get(vma);
 
 	c->next = next;
 	return c;
 }
 
-static struct capture_vma *
-request_record_user_bo(struct i915_request *request,
-		       struct drm_i915_error_engine *ee,
-		       struct capture_vma *capture)
+static struct intel_engine_capture_vma *
+request_record_user(const struct i915_request *request,
+		    struct intel_engine_capture_vma *capture,
+		    gfp_t gfp)
 {
 	struct i915_capture_list *c;
-	struct drm_i915_error_object **bo;
-	long count, max;
 
-	max = 0;
 	for (c = request->capture_list; c; c = c->next)
-		max++;
-	if (!max)
-		return capture;
-
-	bo = kmalloc_array(max, sizeof(*bo), ATOMIC_MAYFAIL);
-	if (!bo) {
-		/* If we can't capture everything, try to capture something. */
-		max = min_t(long, max, PAGE_SIZE / sizeof(*bo));
-		bo = kmalloc_array(max, sizeof(*bo), ATOMIC_MAYFAIL);
-	}
-	if (!bo)
-		return capture;
-
-	count = 0;
-	for (c = request->capture_list; c; c = c->next) {
-		capture = capture_vma(capture, c->vma, &bo[count]);
-		if (++count == max)
-			break;
-	}
-
-	ee->user_bo = bo;
-	ee->user_bo_count = count;
+		capture = capture_vma(capture, c->vma, "user", gfp);
 
 	return capture;
 }
 
-static struct drm_i915_error_object *
-capture_object(struct drm_i915_private *dev_priv,
+static struct i915_vma_coredump *
+capture_object(const struct intel_gt *gt,
 	       struct drm_i915_gem_object *obj,
-	       struct compress *compress)
+	       const char *name,
+	       struct i915_vma_compress *compress)
 {
 	if (obj && i915_gem_object_has_pages(obj)) {
 		struct i915_vma fake = {
@@ -1416,127 +1375,176 @@ capture_object(struct drm_i915_private *dev_priv,
 			.obj = obj,
 		};
 
-		return i915_error_object_create(dev_priv, &fake, compress);
+		return i915_vma_coredump_create(gt, &fake, name, compress);
 	} else {
 		return NULL;
 	}
 }
 
-static void
-gem_record_rings(struct i915_gpu_state *error, struct compress *compress)
+static void add_vma(struct intel_engine_coredump *ee,
+		    struct i915_vma_coredump *vma)
 {
-	struct drm_i915_private *i915 = error->i915;
-	struct intel_engine_cs *engine;
-	struct drm_i915_error_engine *ee;
+	if (vma) {
+		vma->next = ee->vma;
+		ee->vma = vma;
+	}
+}
 
-	ee = kzalloc(sizeof(*ee), GFP_KERNEL);
+struct intel_engine_coredump *
+intel_engine_coredump_alloc(struct intel_engine_cs *engine, gfp_t gfp)
+{
+	struct intel_engine_coredump *ee;
+
+	ee = kzalloc(sizeof(*ee), gfp);
 	if (!ee)
-		return;
+		return NULL;
 
-	for_each_uabi_engine(engine, i915) {
-		struct capture_vma *capture = NULL;
-		struct i915_request *request;
-		unsigned long flags;
+	ee->engine = engine;
 
-		/* Refill our page pool before entering atomic section */
-		pool_refill(&compress->pool, ALLOW_FAIL);
+	engine_record_registers(ee);
+	engine_record_execlists(ee);
 
-		spin_lock_irqsave(&engine->active.lock, flags);
-		request = intel_engine_find_active_request(engine);
-		if (!request) {
-			spin_unlock_irqrestore(&engine->active.lock, flags);
-			continue;
-		}
+	return ee;
+}
 
-		error->simulated |= record_context(&ee->context, request);
+struct intel_engine_capture_vma *
+intel_engine_coredump_add_request(struct intel_engine_coredump *ee,
+				  struct i915_request *rq,
+				  gfp_t gfp)
+{
+	struct intel_engine_capture_vma *vma = NULL;
 
-		/*
-		 * We need to copy these to an anonymous buffer
-		 * as the simplest method to avoid being overwritten
-		 * by userspace.
-		 */
-		capture = capture_vma(capture,
-				      request->batch,
-				      &ee->batchbuffer);
+	ee->simulated |= record_context(&ee->context, rq);
 
-		if (HAS_BROKEN_CS_TLB(i915))
-			capture = capture_vma(capture,
-					      engine->gt->scratch,
-					      &ee->wa_batchbuffer);
+	/*
+	 * We need to copy these to an anonymous buffer
+	 * as the simplest method to avoid being overwritten
+	 * by userspace.
+	 */
+	vma = capture_vma(vma, rq->batch, "batch", gfp);
+	vma = request_record_user(rq, vma, gfp);
+	vma = capture_vma(vma, rq->ring->vma, "ring", gfp);
+	vma = capture_vma(vma, rq->context->state, "HW context", gfp);
+	if (HAS_BROKEN_CS_TLB(rq->i915))
+		vma = capture_vma(vma, ee->engine->gt->scratch, "WA batch", gfp);
 
-		capture = request_record_user_bo(request, ee, capture);
+	ee->cpu_ring_head = rq->ring->head;
+	ee->cpu_ring_tail = rq->ring->tail;
 
-		capture = capture_vma(capture,
-				      request->context->state,
-				      &ee->ctx);
+	ee->rq_head = rq->head;
+	ee->rq_post = rq->postfix;
+	ee->rq_tail = rq->tail;
 
-		capture = capture_vma(capture,
-				      request->ring->vma,
-				      &ee->ringbuffer);
+	return vma;
+}
 
-		ee->cpu_ring_head = request->ring->head;
-		ee->cpu_ring_tail = request->ring->tail;
+void
+intel_engine_coredump_add_vma(struct intel_engine_coredump *ee,
+			      struct intel_engine_capture_vma *capture,
+			      struct i915_vma_compress *compress)
+{
+	const struct intel_engine_cs *engine = ee->engine;
 
-		ee->rq_head = request->head;
-		ee->rq_post = request->postfix;
-		ee->rq_tail = request->tail;
+	while (capture) {
+		struct intel_engine_capture_vma *this = capture;
+		struct i915_vma *vma = this->vma;
 
-		engine_record_requests(engine, request, ee);
-		spin_unlock_irqrestore(&engine->active.lock, flags);
+		add_vma(ee,
+			i915_vma_coredump_create(engine->gt,
+						 vma, this->name,
+						 compress));
 
-		error_record_engine_registers(error, engine, ee);
-		error_record_engine_execlists(engine, ee);
+		i915_active_release(&vma->active);
+		i915_vma_put(vma);
 
-		while (capture) {
-			struct capture_vma *this = capture;
-			struct i915_vma *vma = *this->slot;
+		capture = this->next;
+		kfree(this);
+	}
 
-			*this->slot =
-				i915_error_object_create(i915, vma, compress);
+	add_vma(ee,
+		i915_vma_coredump_create(engine->gt,
+					 engine->status_page.vma,
+					 "HW Status",
+					 compress));
 
-			i915_active_release(&vma->active);
-			i915_vma_put(vma);
+	add_vma(ee,
+		i915_vma_coredump_create(engine->gt,
+					 engine->wa_ctx.vma,
+					 "WA context",
+					 compress));
 
-			capture = this->next;
-			kfree(this);
-		}
+	add_vma(ee,
+		capture_object(engine->gt,
+			       engine->default_state,
+			       "NULL context",
+			       compress));
+}
 
-		ee->hws_page =
-			i915_error_object_create(i915,
-						 engine->status_page.vma,
-						 compress);
+static struct intel_engine_coredump *
+capture_engine(struct intel_engine_cs *engine,
+	       struct i915_vma_compress *compress)
+{
+	struct intel_engine_capture_vma *capture;
+	struct intel_engine_coredump *ee;
+	struct i915_request *rq;
+	unsigned long flags;
 
-		ee->wa_ctx =
-			i915_error_object_create(i915,
-						 engine->wa_ctx.vma,
-						 compress);
+	ee = intel_engine_coredump_alloc(engine, GFP_KERNEL);
+	if (!ee)
+		return NULL;
 
-		ee->default_state =
-			capture_object(i915, engine->default_state, compress);
+	spin_lock_irqsave(&engine->active.lock, flags);
+
+	rq = intel_engine_find_active_request(engine);
+	if (!rq) {
+		spin_unlock_irqrestore(&engine->active.lock, flags);
+		kfree(ee);
+		return NULL;
+	}
 
-		ee->engine = engine;
+	capture = intel_engine_coredump_add_request(ee, rq, ATOMIC_MAYFAIL);
+	engine_record_requests(engine, rq, ee);
 
-		ee->next = error->engine;
-		error->engine = ee;
+	spin_unlock_irqrestore(&engine->active.lock, flags);
 
-		ee = kzalloc(sizeof(*ee), GFP_KERNEL);
-		if (!ee)
-			return;
-	}
+	intel_engine_coredump_add_vma(ee, capture, compress);
 
-	kfree(ee);
+	return ee;
 }
 
 static void
-capture_uc_state(struct i915_gpu_state *error, struct compress *compress)
+gt_record_engines(struct intel_gt_coredump *gt,
+		  struct i915_vma_compress *compress)
 {
-	struct drm_i915_private *i915 = error->i915;
-	struct i915_error_uc *error_uc = &error->uc;
-	struct intel_uc *uc = &i915->gt.uc;
+	struct intel_engine_cs *engine;
+	enum intel_engine_id id;
 
-	/* Capturing uC state won't be useful if there is no GuC */
-	if (!error->device_info.has_gt_uc)
-		return;
+	for_each_engine(engine, gt->_gt, id) {
+		struct intel_engine_coredump *ee;
+
+		/* Refill our page pool before entering atomic section */
+		pool_refill(&compress->pool, ALLOW_FAIL);
+
+		ee = capture_engine(engine, compress);
+		if (!ee)
+			continue;
+
+		ee->next = gt->engine;
+		gt->engine = ee;
+		gt->simulated |= ee->simulated;
+	}
+}
+
+static struct intel_uc_coredump *
+gt_record_uc(struct intel_gt_coredump *gt,
+	     struct i915_vma_compress *compress)
+{
+	const struct intel_uc *uc = &gt->_gt->uc;
+	struct intel_uc_coredump *error_uc;
+
+	error_uc = kzalloc(sizeof(*error_uc), ALLOW_FAIL);
+	if (!error_uc)
+		return NULL;
 
 	memcpy(&error_uc->guc_fw, &uc->guc.fw, sizeof(uc->guc.fw));
 	memcpy(&error_uc->huc_fw, &uc->huc.fw, sizeof(uc->huc.fw));
@@ -1547,19 +1555,42 @@ capture_uc_state(struct i915_gpu_state *error, struct compress *compress)
 	 */
 	error_uc->guc_fw.path = kstrdup(uc->guc.fw.path, ALLOW_FAIL);
 	error_uc->huc_fw.path = kstrdup(uc->huc.fw.path, ALLOW_FAIL);
-	error_uc->guc_log = i915_error_object_create(i915,
-						     uc->guc.log.vma,
-						     compress);
+	error_uc->guc_log =
+		i915_vma_coredump_create(gt->_gt,
+					 uc->guc.log.vma, "GuC log buffer",
+					 compress);
+
+	return error_uc;
+}
+
+static void gt_capture_prepare(struct intel_gt_coredump *gt)
+{
+	struct i915_ggtt *ggtt = gt->_gt->ggtt;
+
+	mutex_lock(&ggtt->error_mutex);
+}
+
+static void gt_capture_finish(struct intel_gt_coredump *gt)
+{
+	struct i915_ggtt *ggtt = gt->_gt->ggtt;
+
+	if (drm_mm_node_allocated(&ggtt->error_capture))
+		ggtt->vm.clear_range(&ggtt->vm,
+				     ggtt->error_capture.start,
+				     PAGE_SIZE);
+
+	mutex_unlock(&ggtt->error_mutex);
 }
 
 /* Capture all registers which don't fit into another category. */
-static void capture_reg_state(struct i915_gpu_state *error)
+static void gt_record_regs(struct intel_gt_coredump *gt)
 {
-	struct drm_i915_private *i915 = error->i915;
-	struct intel_uncore *uncore = &i915->uncore;
+	struct intel_uncore *uncore = gt->_gt->uncore;
+	struct drm_i915_private *i915 = uncore->i915;
 	int i;
 
-	/* General organization
+	/*
+	 * General organization
 	 * 1. Registers specific to a single generation
 	 * 2. Registers which belong to multiple generations
 	 * 3. Feature specific registers.
@@ -1569,138 +1600,162 @@ static void capture_reg_state(struct i915_gpu_state *error)
 
 	/* 1: Registers specific to a single generation */
 	if (IS_VALLEYVIEW(i915)) {
-		error->gtier[0] = intel_uncore_read(uncore, GTIER);
-		error->ier = intel_uncore_read(uncore, VLV_IER);
-		error->forcewake = intel_uncore_read_fw(uncore, FORCEWAKE_VLV);
+		gt->gtier[0] = intel_uncore_read(uncore, GTIER);
+		gt->ier = intel_uncore_read(uncore, VLV_IER);
+		gt->forcewake = intel_uncore_read_fw(uncore, FORCEWAKE_VLV);
 	}
 
 	if (IS_GEN(i915, 7))
-		error->err_int = intel_uncore_read(uncore, GEN7_ERR_INT);
+		gt->err_int = intel_uncore_read(uncore, GEN7_ERR_INT);
 
 	if (INTEL_GEN(i915) >= 12) {
-		error->fault_data0 = intel_uncore_read(uncore,
-						       GEN12_FAULT_TLB_DATA0);
-		error->fault_data1 = intel_uncore_read(uncore,
-						       GEN12_FAULT_TLB_DATA1);
+		gt->fault_data0 = intel_uncore_read(uncore,
+						    GEN12_FAULT_TLB_DATA0);
+		gt->fault_data1 = intel_uncore_read(uncore,
+						    GEN12_FAULT_TLB_DATA1);
 	} else if (INTEL_GEN(i915) >= 8) {
-		error->fault_data0 = intel_uncore_read(uncore,
-						       GEN8_FAULT_TLB_DATA0);
-		error->fault_data1 = intel_uncore_read(uncore,
-						       GEN8_FAULT_TLB_DATA1);
+		gt->fault_data0 = intel_uncore_read(uncore,
+						    GEN8_FAULT_TLB_DATA0);
+		gt->fault_data1 = intel_uncore_read(uncore,
+						    GEN8_FAULT_TLB_DATA1);
 	}
 
 	if (IS_GEN(i915, 6)) {
-		error->forcewake = intel_uncore_read_fw(uncore, FORCEWAKE);
-		error->gab_ctl = intel_uncore_read(uncore, GAB_CTL);
-		error->gfx_mode = intel_uncore_read(uncore, GFX_MODE);
+		gt->forcewake = intel_uncore_read_fw(uncore, FORCEWAKE);
+		gt->gab_ctl = intel_uncore_read(uncore, GAB_CTL);
+		gt->gfx_mode = intel_uncore_read(uncore, GFX_MODE);
 	}
 
 	/* 2: Registers which belong to multiple generations */
 	if (INTEL_GEN(i915) >= 7)
-		error->forcewake = intel_uncore_read_fw(uncore, FORCEWAKE_MT);
+		gt->forcewake = intel_uncore_read_fw(uncore, FORCEWAKE_MT);
 
 	if (INTEL_GEN(i915) >= 6) {
-		error->derrmr = intel_uncore_read(uncore, DERRMR);
+		gt->derrmr = intel_uncore_read(uncore, DERRMR);
 		if (INTEL_GEN(i915) < 12) {
-			error->error = intel_uncore_read(uncore, ERROR_GEN6);
-			error->done_reg = intel_uncore_read(uncore, DONE_REG);
+			gt->error = intel_uncore_read(uncore, ERROR_GEN6);
+			gt->done_reg = intel_uncore_read(uncore, DONE_REG);
 		}
 	}
 
-	if (INTEL_GEN(i915) >= 5)
-		error->ccid = intel_uncore_read(uncore, CCID(RENDER_RING_BASE));
-
 	/* 3: Feature specific registers */
 	if (IS_GEN_RANGE(i915, 6, 7)) {
-		error->gam_ecochk = intel_uncore_read(uncore, GAM_ECOCHK);
-		error->gac_eco = intel_uncore_read(uncore, GAC_ECO_BITS);
+		gt->gam_ecochk = intel_uncore_read(uncore, GAM_ECOCHK);
+		gt->gac_eco = intel_uncore_read(uncore, GAC_ECO_BITS);
 	}
 
 	if (IS_GEN_RANGE(i915, 8, 11))
-		error->gtt_cache = intel_uncore_read(uncore, HSW_GTT_CACHE_EN);
+		gt->gtt_cache = intel_uncore_read(uncore, HSW_GTT_CACHE_EN);
 
 	if (IS_GEN(i915, 12))
-		error->aux_err = intel_uncore_read(uncore, GEN12_AUX_ERR_DBG);
+		gt->aux_err = intel_uncore_read(uncore, GEN12_AUX_ERR_DBG);
 
 	if (INTEL_GEN(i915) >= 12) {
 		for (i = 0; i < GEN12_SFC_DONE_MAX; i++) {
-			error->sfc_done[i] =
+			gt->sfc_done[i] =
 				intel_uncore_read(uncore, GEN12_SFC_DONE(i));
 		}
 
-		error->gam_done = intel_uncore_read(uncore, GEN12_GAM_DONE);
+		gt->gam_done = intel_uncore_read(uncore, GEN12_GAM_DONE);
 	}
 
 	/* 4: Everything else */
 	if (INTEL_GEN(i915) >= 11) {
-		error->ier = intel_uncore_read(uncore, GEN8_DE_MISC_IER);
-		error->gtier[0] =
+		gt->ier = intel_uncore_read(uncore, GEN8_DE_MISC_IER);
+		gt->gtier[0] =
 			intel_uncore_read(uncore,
 					  GEN11_RENDER_COPY_INTR_ENABLE);
-		error->gtier[1] =
+		gt->gtier[1] =
 			intel_uncore_read(uncore, GEN11_VCS_VECS_INTR_ENABLE);
-		error->gtier[2] =
+		gt->gtier[2] =
 			intel_uncore_read(uncore, GEN11_GUC_SG_INTR_ENABLE);
-		error->gtier[3] =
+		gt->gtier[3] =
 			intel_uncore_read(uncore,
 					  GEN11_GPM_WGBOXPERF_INTR_ENABLE);
-		error->gtier[4] =
+		gt->gtier[4] =
 			intel_uncore_read(uncore,
 					  GEN11_CRYPTO_RSVD_INTR_ENABLE);
-		error->gtier[5] =
+		gt->gtier[5] =
 			intel_uncore_read(uncore,
 					  GEN11_GUNIT_CSME_INTR_ENABLE);
-		error->ngtier = 6;
+		gt->ngtier = 6;
 	} else if (INTEL_GEN(i915) >= 8) {
-		error->ier = intel_uncore_read(uncore, GEN8_DE_MISC_IER);
+		gt->ier = intel_uncore_read(uncore, GEN8_DE_MISC_IER);
 		for (i = 0; i < 4; i++)
-			error->gtier[i] = intel_uncore_read(uncore,
-							    GEN8_GT_IER(i));
-		error->ngtier = 4;
+			gt->gtier[i] =
+				intel_uncore_read(uncore, GEN8_GT_IER(i));
+		gt->ngtier = 4;
 	} else if (HAS_PCH_SPLIT(i915)) {
-		error->ier = intel_uncore_read(uncore, DEIER);
-		error->gtier[0] = intel_uncore_read(uncore, GTIER);
-		error->ngtier = 1;
+		gt->ier = intel_uncore_read(uncore, DEIER);
+		gt->gtier[0] = intel_uncore_read(uncore, GTIER);
+		gt->ngtier = 1;
 	} else if (IS_GEN(i915, 2)) {
-		error->ier = intel_uncore_read16(uncore, GEN2_IER);
+		gt->ier = intel_uncore_read16(uncore, GEN2_IER);
 	} else if (!IS_VALLEYVIEW(i915)) {
-		error->ier = intel_uncore_read(uncore, GEN2_IER);
+		gt->ier = intel_uncore_read(uncore, GEN2_IER);
 	}
-	error->eir = intel_uncore_read(uncore, EIR);
-	error->pgtbl_er = intel_uncore_read(uncore, PGTBL_ER);
+	gt->eir = intel_uncore_read(uncore, EIR);
+	gt->pgtbl_er = intel_uncore_read(uncore, PGTBL_ER);
+}
+
+/*
+ * Generate a semi-unique error code. The code is not meant to have meaning, The
+ * code's only purpose is to try to prevent false duplicated bug reports by
+ * grossly estimating a GPU error state.
+ *
+ * TODO Ideally, hashing the batchbuffer would be a very nice way to determine
+ * the hang if we could strip the GTT offset information from it.
+ *
+ * It's only a small step better than a random number in its current form.
+ */
+static u32 generate_ecode(const struct intel_engine_coredump *ee)
+{
+	/*
+	 * IPEHR would be an ideal way to detect errors, as it's the gross
+	 * measure of "the command that hung." However, has some very common
+	 * synchronization commands which almost always appear in the case
+	 * strictly a client bug. Use instdone to differentiate those some.
+	 */
+	return ee ? ee->ipehr ^ ee->instdone.instdone : 0;
 }
 
-static const char *
-error_msg(struct i915_gpu_state *error,
-	  intel_engine_mask_t engines, const char *msg)
+static const char *error_msg(struct i915_gpu_coredump *error)
 {
+	struct intel_engine_coredump *first = NULL;
+	struct intel_gt_coredump *gt;
+	intel_engine_mask_t engines;
 	int len;
 
+	engines = 0;
+	for (gt = error->gt; gt; gt = gt->next) {
+		struct intel_engine_coredump *cs;
+
+		if (gt->engine && !first)
+			first = gt->engine;
+
+		for (cs = gt->engine; cs; cs = cs->next)
+			engines |= cs->engine->mask;
+	}
+
 	len = scnprintf(error->error_msg, sizeof(error->error_msg),
-			"GPU HANG: ecode %d:%x:0x%08x",
+			"GPU HANG: ecode %d:%x:%08x",
 			INTEL_GEN(error->i915), engines,
-			i915_error_generate_code(error));
-	if (error->engine) {
+			generate_ecode(first));
+	if (first) {
 		/* Just show the first executing process, more is confusing */
 		len += scnprintf(error->error_msg + len,
 				 sizeof(error->error_msg) - len,
 				 ", in %s [%d]",
-				 error->engine->context.comm,
-				 error->engine->context.pid);
+				 first->context.comm, first->context.pid);
 	}
-	if (msg)
-		len += scnprintf(error->error_msg + len,
-				 sizeof(error->error_msg) - len,
-				 ", %s", msg);
 
 	return error->error_msg;
 }
 
-static void capture_gen_state(struct i915_gpu_state *error)
+static void capture_gen(struct i915_gpu_coredump *error)
 {
 	struct drm_i915_private *i915 = error->i915;
 
-	error->awake = i915->gt.awake;
 	error->wakelock = atomic_read(&i915->runtime_pm.wakeref_count);
 	error->suspended = i915->runtime_pm.suspended;
 
@@ -1711,6 +1766,7 @@ static void capture_gen_state(struct i915_gpu_state *error)
 	error->reset_count = i915_reset_count(&i915->gpu_error);
 	error->suspend_count = i915->suspend_count;
 
+	i915_params_copy(&error->params, &i915_modparams);
 	memcpy(&error->device_info,
 	       INTEL_INFO(i915),
 	       sizeof(error->device_info));
@@ -1720,115 +1776,135 @@ static void capture_gen_state(struct i915_gpu_state *error)
 	error->driver_caps = i915->caps;
 }
 
-static void capture_params(struct i915_gpu_state *error)
+struct i915_gpu_coredump *
+i915_gpu_coredump_alloc(struct drm_i915_private *i915, gfp_t gfp)
 {
-	i915_params_copy(&error->params, &i915_modparams);
+	struct i915_gpu_coredump *error;
+
+	if (!i915_modparams.error_capture)
+		return NULL;
+
+	error = kzalloc(sizeof(*error), gfp);
+	if (!error)
+		return NULL;
+
+	kref_init(&error->ref);
+	error->i915 = i915;
+
+	error->time = ktime_get_real();
+	error->boottime = ktime_get_boottime();
+	error->uptime = ktime_sub(ktime_get(), i915->gt.last_init_time);
+	error->capture = jiffies;
+
+	capture_gen(error);
+
+	return error;
 }
 
-static void capture_finish(struct i915_gpu_state *error)
+#define DAY_AS_SECONDS(x) (24 * 60 * 60 * (x))
+
+struct intel_gt_coredump *
+intel_gt_coredump_alloc(struct intel_gt *gt, gfp_t gfp)
 {
-	struct i915_ggtt *ggtt = &error->i915->ggtt;
+	struct intel_gt_coredump *gc;
 
-	if (drm_mm_node_allocated(&ggtt->error_capture)) {
-		const u64 slot = ggtt->error_capture.start;
+	gc = kzalloc(sizeof(*gc), gfp);
+	if (!gc)
+		return NULL;
+
+	gc->_gt = gt;
+	gc->awake = intel_gt_pm_is_awake(gt);
+
+	gt_record_regs(gc);
+	gt_record_fences(gc);
+
+	return gc;
+}
+
+struct i915_vma_compress *
+i915_vma_capture_prepare(struct intel_gt_coredump *gt)
+{
+	struct i915_vma_compress *compress;
+
+	compress = kmalloc(sizeof(*compress), ALLOW_FAIL);
+	if (!compress)
+		return NULL;
 
-		ggtt->vm.clear_range(&ggtt->vm, slot, PAGE_SIZE);
+	if (!compress_init(compress)) {
+		kfree(compress);
+		return NULL;
 	}
+
+	gt_capture_prepare(gt);
+
+	return compress;
 }
 
-#define DAY_AS_SECONDS(x) (24 * 60 * 60 * (x))
+void i915_vma_capture_finish(struct intel_gt_coredump *gt,
+			     struct i915_vma_compress *compress)
+{
+	gt_capture_finish(gt);
 
-struct i915_gpu_state *
-i915_capture_gpu_state(struct drm_i915_private *i915)
+	compress_fini(compress);
+	kfree(compress);
+}
+
+struct i915_gpu_coredump *i915_gpu_coredump(struct drm_i915_private *i915)
 {
-	struct i915_gpu_state *error;
-	struct compress compress;
+	struct i915_gpu_coredump *error;
 
 	/* Check if GPU capture has been disabled */
 	error = READ_ONCE(i915->gpu_error.first_error);
 	if (IS_ERR(error))
 		return error;
 
-	error = kzalloc(sizeof(*error), ALLOW_FAIL);
-	if (!error) {
-		i915_disable_error_state(i915, -ENOMEM);
+	error = i915_gpu_coredump_alloc(i915, ALLOW_FAIL);
+	if (!error)
 		return ERR_PTR(-ENOMEM);
-	}
 
-	if (!compress_init(&compress)) {
-		kfree(error);
-		i915_disable_error_state(i915, -ENOMEM);
-		return ERR_PTR(-ENOMEM);
-	}
+	error->gt = intel_gt_coredump_alloc(&i915->gt, ALLOW_FAIL);
+	if (error->gt) {
+		struct i915_vma_compress *compress;
 
-	kref_init(&error->ref);
-	error->i915 = i915;
+		compress = i915_vma_capture_prepare(error->gt);
+		if (!compress) {
+			kfree(error->gt);
+			kfree(error);
+			return ERR_PTR(-ENOMEM);
+		}
 
-	error->time = ktime_get_real();
-	error->boottime = ktime_get_boottime();
-	error->uptime = ktime_sub(ktime_get(), i915->gt.last_init_time);
-	error->capture = jiffies;
+		gt_record_engines(error->gt, compress);
 
-	capture_params(error);
-	capture_gen_state(error);
-	capture_uc_state(error, &compress);
-	capture_reg_state(error);
-	gem_record_fences(error);
-	gem_record_rings(error, &compress);
+		if (INTEL_INFO(i915)->has_gt_uc)
+			error->gt->uc = gt_record_uc(error->gt, compress);
+
+		i915_vma_capture_finish(error->gt, compress);
+
+		error->simulated |= error->gt->simulated;
+	}
 
 	error->overlay = intel_overlay_capture_error_state(i915);
 	error->display = intel_display_capture_error_state(i915);
 
-	capture_finish(error);
-	compress_fini(&compress);
-
 	return error;
 }
 
-/**
- * i915_capture_error_state - capture an error record for later analysis
- * @i915: i915 device
- * @engine_mask: the mask of engines triggering the hang
- * @msg: a message to insert into the error capture header
- *
- * Should be called when an error is detected (either a hang or an error
- * interrupt) to capture error state from the time of the error.  Fills
- * out a structure which becomes available in debugfs for user level tools
- * to pick up.
- */
-void i915_capture_error_state(struct drm_i915_private *i915,
-			      intel_engine_mask_t engine_mask,
-			      const char *msg)
+void i915_error_state_store(struct i915_gpu_coredump *error)
 {
+	struct drm_i915_private *i915;
 	static bool warned;
-	struct i915_gpu_state *error;
-	unsigned long flags;
 
-	if (!i915_modparams.error_capture)
+	if (IS_ERR_OR_NULL(error))
 		return;
 
-	if (READ_ONCE(i915->gpu_error.first_error))
-		return;
+	i915 = error->i915;
+	dev_info(i915->drm.dev, "%s\n", error_msg(error));
 
-	error = i915_capture_gpu_state(i915);
-	if (IS_ERR(error))
+	if (error->simulated ||
+	    cmpxchg(&i915->gpu_error.first_error, NULL, error))
 		return;
 
-	dev_info(i915->drm.dev, "%s\n", error_msg(error, engine_mask, msg));
-
-	if (!error->simulated) {
-		spin_lock_irqsave(&i915->gpu_error.lock, flags);
-		if (!i915->gpu_error.first_error) {
-			i915->gpu_error.first_error = error;
-			error = NULL;
-		}
-		spin_unlock_irqrestore(&i915->gpu_error.lock, flags);
-	}
-
-	if (error) {
-		__i915_gpu_state_free(&error->ref);
-		return;
-	}
+	i915_gpu_coredump_get(error);
 
 	if (!xchg(&warned, true) &&
 	    ktime_get_real_seconds() - DRIVER_TIMESTAMP < DAY_AS_SECONDS(180)) {
@@ -1841,15 +1917,38 @@ void i915_capture_error_state(struct drm_i915_private *i915,
 	}
 }
 
-struct i915_gpu_state *
+/**
+ * i915_capture_error_state - capture an error record for later analysis
+ * @i915: i915 device
+ *
+ * Should be called when an error is detected (either a hang or an error
+ * interrupt) to capture error state from the time of the error.  Fills
+ * out a structure which becomes available in debugfs for user level tools
+ * to pick up.
+ */
+void i915_capture_error_state(struct drm_i915_private *i915)
+{
+	struct i915_gpu_coredump *error;
+
+	error = i915_gpu_coredump(i915);
+	if (IS_ERR(error)) {
+		cmpxchg(&i915->gpu_error.first_error, NULL, error);
+		return;
+	}
+
+	i915_error_state_store(error);
+	i915_gpu_coredump_put(error);
+}
+
+struct i915_gpu_coredump *
 i915_first_error_state(struct drm_i915_private *i915)
 {
-	struct i915_gpu_state *error;
+	struct i915_gpu_coredump *error;
 
 	spin_lock_irq(&i915->gpu_error.lock);
 	error = i915->gpu_error.first_error;
 	if (!IS_ERR_OR_NULL(error))
-		i915_gpu_state_get(error);
+		i915_gpu_coredump_get(error);
 	spin_unlock_irq(&i915->gpu_error.lock);
 
 	return error;
@@ -1857,7 +1956,7 @@ i915_first_error_state(struct drm_i915_private *i915)
 
 void i915_reset_error_state(struct drm_i915_private *i915)
 {
-	struct i915_gpu_state *error;
+	struct i915_gpu_coredump *error;
 
 	spin_lock_irq(&i915->gpu_error.lock);
 	error = i915->gpu_error.first_error;
@@ -1866,7 +1965,7 @@ void i915_reset_error_state(struct drm_i915_private *i915)
 	spin_unlock_irq(&i915->gpu_error.lock);
 
 	if (!IS_ERR_OR_NULL(error))
-		i915_gpu_state_put(error);
+		i915_gpu_coredump_put(error);
 }
 
 void i915_disable_error_state(struct drm_i915_private *i915, int err)
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.h b/drivers/gpu/drm/i915/i915_gpu_error.h
index 5d2c3372ff99..0df9d8c32056 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.h
+++ b/drivers/gpu/drm/i915/i915_gpu_error.h
@@ -25,43 +25,105 @@
 #include "i915_scheduler.h"
 
 struct drm_i915_private;
+struct i915_vma_compress;
+struct intel_engine_capture_vma;
 struct intel_overlay_error_state;
 struct intel_display_error_state;
 
-struct i915_gpu_state {
-	struct kref ref;
-	ktime_t time;
-	ktime_t boottime;
-	ktime_t uptime;
-	unsigned long capture;
+struct i915_vma_coredump {
+	struct i915_vma_coredump *next;
 
-	struct drm_i915_private *i915;
+	char name[20];
+
+	u64 gtt_offset;
+	u64 gtt_size;
+	u32 gtt_page_sizes;
+
+	int num_pages;
+	int page_count;
+	int unused;
+	u32 *pages[0];
+};
+
+struct i915_request_coredump {
+	unsigned long flags;
+	pid_t pid;
+	u32 context;
+	u32 seqno;
+	u32 start;
+	u32 head;
+	u32 tail;
+	struct i915_sched_attr sched_attr;
+};
+
+struct intel_engine_coredump {
+	const struct intel_engine_cs *engine;
 
-	char error_msg[128];
 	bool simulated;
-	bool awake;
-	bool wakelock;
-	bool suspended;
-	int iommu;
+	int num_requests;
 	u32 reset_count;
-	u32 suspend_count;
-	struct intel_device_info device_info;
-	struct intel_runtime_info runtime_info;
-	struct intel_driver_caps driver_caps;
-	struct i915_params params;
 
-	struct i915_error_uc {
-		struct intel_uc_fw guc_fw;
-		struct intel_uc_fw huc_fw;
-		struct drm_i915_error_object *guc_log;
-	} uc;
+	/* position of active request inside the ring */
+	u32 rq_head, rq_post, rq_tail;
+
+	/* our own tracking of ring head and tail */
+	u32 cpu_ring_head;
+	u32 cpu_ring_tail;
+
+	/* Register state */
+	u32 ccid;
+	u32 start;
+	u32 tail;
+	u32 head;
+	u32 ctl;
+	u32 mode;
+	u32 hws;
+	u32 ipeir;
+	u32 ipehr;
+	u32 bbstate;
+	u32 instpm;
+	u32 instps;
+	u64 bbaddr;
+	u64 acthd;
+	u32 fault_reg;
+	u64 faddr;
+	u32 rc_psmi; /* sleep state */
+	struct intel_instdone instdone;
+
+	struct i915_gem_context_coredump {
+		char comm[TASK_COMM_LEN];
+		pid_t pid;
+		int active;
+		int guilty;
+		struct i915_sched_attr sched_attr;
+	} context;
+
+	struct i915_vma_coredump *vma;
+
+	struct i915_request_coredump *requests, execlist[EXECLIST_MAX_PORTS];
+	unsigned int num_ports;
+
+	struct {
+		u32 gfx_mode;
+		union {
+			u64 pdp[4];
+			u32 pp_dir_base;
+		};
+	} vm_info;
+
+	struct intel_engine_coredump *next;
+};
+
+struct intel_gt_coredump {
+	const struct intel_gt *_gt;
+	bool awake;
+	bool simulated;
 
 	/* Generic register state */
 	u32 eir;
 	u32 pgtbl_er;
 	u32 ier;
 	u32 gtier[6], ngtier;
-	u32 ccid;
 	u32 derrmr;
 	u32 forcewake;
 	u32 error; /* gen6+ */
@@ -80,91 +142,45 @@ struct i915_gpu_state {
 
 	u32 nfence;
 	u64 fence[I915_MAX_NUM_FENCES];
+
+	struct intel_engine_coredump *engine;
+
+	struct intel_uc_coredump {
+		struct intel_uc_fw guc_fw;
+		struct intel_uc_fw huc_fw;
+		struct i915_vma_coredump *guc_log;
+	} *uc;
+
+	struct intel_gt_coredump *next;
+};
+
+struct i915_gpu_coredump {
+	struct kref ref;
+	ktime_t time;
+	ktime_t boottime;
+	ktime_t uptime;
+	unsigned long capture;
+
+	struct drm_i915_private *i915;
+
+	struct intel_gt_coredump *gt;
+
+	char error_msg[128];
+	bool simulated;
+	bool wakelock;
+	bool suspended;
+	int iommu;
+	u32 reset_count;
+	u32 suspend_count;
+
+	struct intel_device_info device_info;
+	struct intel_runtime_info runtime_info;
+	struct intel_driver_caps driver_caps;
+	struct i915_params params;
+
 	struct intel_overlay_error_state *overlay;
 	struct intel_display_error_state *display;
 
-	struct drm_i915_error_engine {
-		const struct intel_engine_cs *engine;
-
-		/* Software tracked state */
-		bool idle;
-		int num_requests;
-		u32 reset_count;
-
-		/* position of active request inside the ring */
-		u32 rq_head, rq_post, rq_tail;
-
-		/* our own tracking of ring head and tail */
-		u32 cpu_ring_head;
-		u32 cpu_ring_tail;
-
-		/* Register state */
-		u32 start;
-		u32 tail;
-		u32 head;
-		u32 ctl;
-		u32 mode;
-		u32 hws;
-		u32 ipeir;
-		u32 ipehr;
-		u32 bbstate;
-		u32 instpm;
-		u32 instps;
-		u64 bbaddr;
-		u64 acthd;
-		u32 fault_reg;
-		u64 faddr;
-		u32 rc_psmi; /* sleep state */
-		struct intel_instdone instdone;
-
-		struct drm_i915_error_context {
-			char comm[TASK_COMM_LEN];
-			pid_t pid;
-			int active;
-			int guilty;
-			struct i915_sched_attr sched_attr;
-		} context;
-
-		struct drm_i915_error_object {
-			u64 gtt_offset;
-			u64 gtt_size;
-			u32 gtt_page_sizes;
-			int num_pages;
-			int page_count;
-			int unused;
-			u32 *pages[0];
-		} *ringbuffer, *batchbuffer, *wa_batchbuffer, *ctx, *hws_page;
-
-		struct drm_i915_error_object **user_bo;
-		long user_bo_count;
-
-		struct drm_i915_error_object *wa_ctx;
-		struct drm_i915_error_object *default_state;
-
-		struct drm_i915_error_request {
-			unsigned long flags;
-			long jiffies;
-			pid_t pid;
-			u32 context;
-			u32 seqno;
-			u32 start;
-			u32 head;
-			u32 tail;
-			struct i915_sched_attr sched_attr;
-		} *requests, execlist[EXECLIST_MAX_PORTS];
-		unsigned int num_ports;
-
-		struct {
-			u32 gfx_mode;
-			union {
-				u64 pdp[4];
-				u32 pp_dir_base;
-			};
-		} vm_info;
-
-		struct drm_i915_error_engine *next;
-	} *engine;
-
 	struct scatterlist *sgl, *fit;
 };
 
@@ -172,7 +188,7 @@ struct i915_gpu_error {
 	/* For reset and error_state handling. */
 	spinlock_t lock;
 	/* Protected by the above dev->gpu_error.lock. */
-	struct i915_gpu_state *first_error;
+	struct i915_gpu_coredump *first_error;
 
 	atomic_t pending_fb_pin;
 
@@ -200,29 +216,54 @@ struct drm_i915_error_state_buf {
 __printf(2, 3)
 void i915_error_printf(struct drm_i915_error_state_buf *e, const char *f, ...);
 
-struct i915_gpu_state *i915_capture_gpu_state(struct drm_i915_private *i915);
-void i915_capture_error_state(struct drm_i915_private *dev_priv,
-			      intel_engine_mask_t engine_mask,
-			      const char *error_msg);
+struct i915_gpu_coredump *i915_gpu_coredump(struct drm_i915_private *i915);
+void i915_capture_error_state(struct drm_i915_private *dev_priv);
+
+struct i915_gpu_coredump *
+i915_gpu_coredump_alloc(struct drm_i915_private *i915, gfp_t gfp);
+
+struct intel_gt_coredump *
+intel_gt_coredump_alloc(struct intel_gt *gt, gfp_t gfp);
+
+struct intel_engine_coredump *
+intel_engine_coredump_alloc(struct intel_engine_cs *engine, gfp_t gfp);
 
-static inline struct i915_gpu_state *
-i915_gpu_state_get(struct i915_gpu_state *gpu)
+struct intel_engine_capture_vma *
+intel_engine_coredump_add_request(struct intel_engine_coredump *ee,
+				  struct i915_request *rq,
+				  gfp_t gfp);
+
+void intel_engine_coredump_add_vma(struct intel_engine_coredump *ee,
+				   struct intel_engine_capture_vma *capture,
+				   struct i915_vma_compress *compress);
+
+struct i915_vma_compress *
+i915_vma_capture_prepare(struct intel_gt_coredump *gt);
+
+void i915_vma_capture_finish(struct intel_gt_coredump *gt,
+			     struct i915_vma_compress *compress);
+
+void i915_error_state_store(struct i915_gpu_coredump *error);
+
+static inline struct i915_gpu_coredump *
+i915_gpu_coredump_get(struct i915_gpu_coredump *gpu)
 {
 	kref_get(&gpu->ref);
 	return gpu;
 }
 
-ssize_t i915_gpu_state_copy_to_buffer(struct i915_gpu_state *error,
-				      char *buf, loff_t offset, size_t count);
+ssize_t
+i915_gpu_coredump_copy_to_buffer(struct i915_gpu_coredump *error,
+				 char *buf, loff_t offset, size_t count);
 
-void __i915_gpu_state_free(struct kref *kref);
-static inline void i915_gpu_state_put(struct i915_gpu_state *gpu)
+void __i915_gpu_coredump_free(struct kref *kref);
+static inline void i915_gpu_coredump_put(struct i915_gpu_coredump *gpu)
 {
 	if (gpu)
-		kref_put(&gpu->ref, __i915_gpu_state_free);
+		kref_put(&gpu->ref, __i915_gpu_coredump_free);
 }
 
-struct i915_gpu_state *i915_first_error_state(struct drm_i915_private *i915);
+struct i915_gpu_coredump *i915_first_error_state(struct drm_i915_private *i915);
 void i915_reset_error_state(struct drm_i915_private *i915);
 void i915_disable_error_state(struct drm_i915_private *i915, int err);
 
@@ -234,7 +275,56 @@ static inline void i915_capture_error_state(struct drm_i915_private *dev_priv,
 {
 }
 
-static inline struct i915_gpu_state *
+static inline struct i915_gpu_coredump *
+i915_gpu_coredump_alloc(struct drm_i915_private *i915, gfp_t gfp)
+{
+	return NULL;
+}
+
+static inline struct intel_gt_coredump *
+intel_gt_coredump_alloc(struct intel_gt *gt, gfp_t gfp)
+{
+	return NULL;
+}
+
+static inline struct intel_engine_coredump *
+intel_engine_coredump_alloc(struct intel_engine_cs *engine, gfp_t gfp)
+{
+	return NULL;
+}
+
+static inline struct intel_engine_capture_vma *
+intel_engine_coredump_add_request(struct intel_engine_coredump *ee,
+				  struct i915_request *rq,
+				  gfp_t gfp)
+{
+	return NULL;
+}
+
+static inline void
+intel_engine_coredump_add_vma(struct intel_engine_coredump *ee,
+			      struct intel_engine_capture_vma *capture,
+			      struct i915_vma_compress *compress)
+{
+}
+
+static inline struct i915_vma_compress *
+i915_vma_compress_prepare(struct intel_gt_coredump *gt)
+{
+	return NULL;
+}
+
+void i915_vma_compress_prepare(struct i915_vma_compress *compress)
+{
+}
+
+static inline void
+i915_error_state_store(struct drm_i915_private *i915,
+		       struct i915_gpu_coredump *error)
+{
+}
+
+static inline struct i915_gpu_coredump *
 i915_first_error_state(struct drm_i915_private *i915)
 {
 	return ERR_PTR(-ENODEV);
diff --git a/drivers/gpu/drm/i915/i915_sysfs.c b/drivers/gpu/drm/i915/i915_sysfs.c
index ad2b1b833d7b..0cef3130db05 100644
--- a/drivers/gpu/drm/i915/i915_sysfs.c
+++ b/drivers/gpu/drm/i915/i915_sysfs.c
@@ -498,15 +498,15 @@ static ssize_t error_state_read(struct file *filp, struct kobject *kobj,
 
 	struct device *kdev = kobj_to_dev(kobj);
 	struct drm_i915_private *i915 = kdev_minor_to_i915(kdev);
-	struct i915_gpu_state *gpu;
+	struct i915_gpu_coredump *gpu;
 	ssize_t ret;
 
 	gpu = i915_first_error_state(i915);
 	if (IS_ERR(gpu)) {
 		ret = PTR_ERR(gpu);
 	} else if (gpu) {
-		ret = i915_gpu_state_copy_to_buffer(gpu, buf, off, count);
-		i915_gpu_state_put(gpu);
+		ret = i915_gpu_coredump_copy_to_buffer(gpu, buf, off, count);
+		i915_gpu_coredump_put(gpu);
 	} else {
 		const char *str = "No error state collected\n";
 		size_t len = strlen(str);
-- 
2.25.0.rc1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 30+ messages in thread

* [Intel-gfx] [PATCH 11/14] drm/i915: Drop the shadow ring state from the error capture
  2020-01-09  8:58 [Intel-gfx] [PATCH 01/14] drm/i915/gt: Push context state allocation earlier Chris Wilson
                   ` (8 preceding siblings ...)
  2020-01-09  8:58 ` [Intel-gfx] [PATCH 10/14] drm/i915: Start chopping up the GPU error capture Chris Wilson
@ 2020-01-09  8:58 ` Chris Wilson
  2020-01-09  9:04   ` Mika Kuoppala
  2020-01-09  8:58 ` [Intel-gfx] [PATCH 12/14] drm/i915: Drop the shadow w/a batch buffer Chris Wilson
                   ` (3 subsequent siblings)
  13 siblings, 1 reply; 30+ messages in thread
From: Chris Wilson @ 2020-01-09  8:58 UTC (permalink / raw)
  To: intel-gfx

The shadow ring regs (ring->head, ring->tail) are meaningless in the
post-mortem dump as they do not related to anything on HW. Remove them
from the coredump.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_gpu_error.c | 5 -----
 drivers/gpu/drm/i915/i915_gpu_error.h | 4 ----
 2 files changed, 9 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
index b45c128d0a17..a35aad439281 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -553,8 +553,6 @@ static void error_print_engine(struct drm_i915_error_state_buf *m,
 				   ee->vm_info.pp_dir_base);
 		}
 	}
-	err_printf(m, "  ring->head: 0x%08x\n", ee->cpu_ring_head);
-	err_printf(m, "  ring->tail: 0x%08x\n", ee->cpu_ring_tail);
 	err_printf(m, "  engine reset count: %u\n", ee->reset_count);
 
 	for (n = 0; n < ee->num_ports; n++) {
@@ -1428,9 +1426,6 @@ intel_engine_coredump_add_request(struct intel_engine_coredump *ee,
 	if (HAS_BROKEN_CS_TLB(rq->i915))
 		vma = capture_vma(vma, ee->engine->gt->scratch, "WA batch", gfp);
 
-	ee->cpu_ring_head = rq->ring->head;
-	ee->cpu_ring_tail = rq->ring->tail;
-
 	ee->rq_head = rq->head;
 	ee->rq_post = rq->postfix;
 	ee->rq_tail = rq->tail;
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.h b/drivers/gpu/drm/i915/i915_gpu_error.h
index 0df9d8c32056..8f4579d64d8c 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.h
+++ b/drivers/gpu/drm/i915/i915_gpu_error.h
@@ -66,10 +66,6 @@ struct intel_engine_coredump {
 	/* position of active request inside the ring */
 	u32 rq_head, rq_post, rq_tail;
 
-	/* our own tracking of ring head and tail */
-	u32 cpu_ring_head;
-	u32 cpu_ring_tail;
-
 	/* Register state */
 	u32 ccid;
 	u32 start;
-- 
2.25.0.rc1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 30+ messages in thread

* [Intel-gfx] [PATCH 12/14] drm/i915: Drop the shadow w/a batch buffer
  2020-01-09  8:58 [Intel-gfx] [PATCH 01/14] drm/i915/gt: Push context state allocation earlier Chris Wilson
                   ` (9 preceding siblings ...)
  2020-01-09  8:58 ` [Intel-gfx] [PATCH 11/14] drm/i915: Drop the shadow ring state from the " Chris Wilson
@ 2020-01-09  8:58 ` Chris Wilson
  2020-01-09  9:05   ` Mika Kuoppala
  2020-01-10 11:02   ` Andi Shyti
  2020-01-09  8:58 ` [Intel-gfx] [PATCH 13/14] drm/i915: Drop request list from error state Chris Wilson
                   ` (2 subsequent siblings)
  13 siblings, 2 replies; 30+ messages in thread
From: Chris Wilson @ 2020-01-09  8:58 UTC (permalink / raw)
  To: intel-gfx

While this is technically the batch as executed by the HW (in part at
least), it is confusing, and only used for a minority of gen.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_gpu_error.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
index a35aad439281..796c9ce0c494 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -1423,8 +1423,6 @@ intel_engine_coredump_add_request(struct intel_engine_coredump *ee,
 	vma = request_record_user(rq, vma, gfp);
 	vma = capture_vma(vma, rq->ring->vma, "ring", gfp);
 	vma = capture_vma(vma, rq->context->state, "HW context", gfp);
-	if (HAS_BROKEN_CS_TLB(rq->i915))
-		vma = capture_vma(vma, ee->engine->gt->scratch, "WA batch", gfp);
 
 	ee->rq_head = rq->head;
 	ee->rq_post = rq->postfix;
-- 
2.25.0.rc1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 30+ messages in thread

* [Intel-gfx] [PATCH 13/14] drm/i915: Drop request list from error state
  2020-01-09  8:58 [Intel-gfx] [PATCH 01/14] drm/i915/gt: Push context state allocation earlier Chris Wilson
                   ` (10 preceding siblings ...)
  2020-01-09  8:58 ` [Intel-gfx] [PATCH 12/14] drm/i915: Drop the shadow w/a batch buffer Chris Wilson
@ 2020-01-09  8:58 ` Chris Wilson
  2020-01-10 11:04   ` Andi Shyti
  2020-01-09  8:58 ` [Intel-gfx] [PATCH 14/14] drm/i915/execlists: Offline error capture Chris Wilson
  2020-01-09 18:13 ` [Intel-gfx] ✗ Fi.CI.BUILD: failure for series starting with [01/14] drm/i915/gt: Push context state allocation earlier Patchwork
  13 siblings, 1 reply; 30+ messages in thread
From: Chris Wilson @ 2020-01-09  8:58 UTC (permalink / raw)
  To: intel-gfx

The list of requests from after the hang tells little about the hang
itself, only how busy userspace was after the fact. As it pertains
nothing to the HW state, drop it from the error state.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_gpu_error.c | 75 +++------------------------
 drivers/gpu/drm/i915/i915_gpu_error.h |  3 +-
 2 files changed, 8 insertions(+), 70 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
index 796c9ce0c494..79b5e06b0865 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -669,7 +669,7 @@ static void err_print_gt(struct drm_i915_error_state_buf *m,
 			 struct intel_gt_coredump *gt)
 {
 	const struct intel_engine_coredump *ee;
-	int i, j;
+	int i;
 
 	err_printf(m, "GT awake: %s\n", yesno(gt->awake));
 	err_printf(m, "EIR: 0x%08x\n", gt->eir);
@@ -715,17 +715,8 @@ static void err_print_gt(struct drm_i915_error_state_buf *m,
 		const struct i915_vma_coredump *vma;
 
 		error_print_engine(m, ee);
-
 		for (vma = ee->vma; vma; vma = vma->next)
 			print_error_vma(m, ee->engine, vma);
-
-		if (ee->num_requests) {
-			err_printf(m, "%s --- %d requests\n",
-				   ee->engine->name,
-				   ee->num_requests);
-			for (j = 0; j < ee->num_requests; j++)
-				error_print_request(m, " ", &ee->requests[j]);
-		}
 	}
 
 	if (gt->uc)
@@ -936,7 +927,6 @@ static void cleanup_gt(struct intel_gt_coredump *gt)
 		gt->engine = ee->next;
 
 		i915_vma_coredump_free(ee->vma);
-		kfree(ee->requests);
 		kfree(ee);
 	}
 
@@ -1220,54 +1210,6 @@ static void record_request(const struct i915_request *request,
 	rcu_read_unlock();
 }
 
-static void engine_record_requests(const struct intel_engine_cs *engine,
-				   struct i915_request *first,
-				   struct intel_engine_coredump *ee)
-{
-	struct i915_request *request;
-	int count;
-
-	count = 0;
-	request = first;
-	list_for_each_entry_from(request, &engine->active.requests, sched.link)
-		count++;
-	if (!count)
-		return;
-
-	ee->requests = kcalloc(count, sizeof(*ee->requests), ATOMIC_MAYFAIL);
-	if (!ee->requests)
-		return;
-
-	ee->num_requests = count;
-
-	count = 0;
-	request = first;
-	list_for_each_entry_from(request,
-				 &engine->active.requests, sched.link) {
-		if (count >= ee->num_requests) {
-			/*
-			 * If the ring request list was changed in
-			 * between the point where the error request
-			 * list was created and dimensioned and this
-			 * point then just exit early to avoid crashes.
-			 *
-			 * We don't need to communicate that the
-			 * request list changed state during error
-			 * state capture and that the error state is
-			 * slightly incorrect as a consequence since we
-			 * are typically only interested in the request
-			 * list state at the point of error state
-			 * capture, not in any changes happening during
-			 * the capture.
-			 */
-			break;
-		}
-
-		record_request(request, &ee->requests[count++]);
-	}
-	ee->num_requests = count;
-}
-
 static void engine_record_execlists(struct intel_engine_coredump *ee)
 {
 	const struct intel_engine_execlists * const el = &ee->engine->execlists;
@@ -1477,7 +1419,7 @@ static struct intel_engine_coredump *
 capture_engine(struct intel_engine_cs *engine,
 	       struct i915_vma_compress *compress)
 {
-	struct intel_engine_capture_vma *capture;
+	struct intel_engine_capture_vma *capture = NULL;
 	struct intel_engine_coredump *ee;
 	struct i915_request *rq;
 	unsigned long flags;
@@ -1487,19 +1429,16 @@ capture_engine(struct intel_engine_cs *engine,
 		return NULL;
 
 	spin_lock_irqsave(&engine->active.lock, flags);
-
 	rq = intel_engine_find_active_request(engine);
-	if (!rq) {
-		spin_unlock_irqrestore(&engine->active.lock, flags);
+	if (rq)
+		capture = intel_engine_coredump_add_request(ee, rq,
+							    ATOMIC_MAYFAIL);
+	spin_unlock_irqrestore(&engine->active.lock, flags);
+	if (!capture) {
 		kfree(ee);
 		return NULL;
 	}
 
-	capture = intel_engine_coredump_add_request(ee, rq, ATOMIC_MAYFAIL);
-	engine_record_requests(engine, rq, ee);
-
-	spin_unlock_irqrestore(&engine->active.lock, flags);
-
 	intel_engine_coredump_add_vma(ee, capture, compress);
 
 	return ee;
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.h b/drivers/gpu/drm/i915/i915_gpu_error.h
index 8f4579d64d8c..b87f39291c07 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.h
+++ b/drivers/gpu/drm/i915/i915_gpu_error.h
@@ -60,7 +60,6 @@ struct intel_engine_coredump {
 	const struct intel_engine_cs *engine;
 
 	bool simulated;
-	int num_requests;
 	u32 reset_count;
 
 	/* position of active request inside the ring */
@@ -96,7 +95,7 @@ struct intel_engine_coredump {
 
 	struct i915_vma_coredump *vma;
 
-	struct i915_request_coredump *requests, execlist[EXECLIST_MAX_PORTS];
+	struct i915_request_coredump execlist[EXECLIST_MAX_PORTS];
 	unsigned int num_ports;
 
 	struct {
-- 
2.25.0.rc1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 30+ messages in thread

* [Intel-gfx] [PATCH 14/14] drm/i915/execlists: Offline error capture
  2020-01-09  8:58 [Intel-gfx] [PATCH 01/14] drm/i915/gt: Push context state allocation earlier Chris Wilson
                   ` (11 preceding siblings ...)
  2020-01-09  8:58 ` [Intel-gfx] [PATCH 13/14] drm/i915: Drop request list from error state Chris Wilson
@ 2020-01-09  8:58 ` Chris Wilson
  2020-01-10 22:46     ` kbuild test robot
  2020-01-11 22:33     ` kbuild test robot
  2020-01-09 18:13 ` [Intel-gfx] ✗ Fi.CI.BUILD: failure for series starting with [01/14] drm/i915/gt: Push context state allocation earlier Patchwork
  13 siblings, 2 replies; 30+ messages in thread
From: Chris Wilson @ 2020-01-09  8:58 UTC (permalink / raw)
  To: intel-gfx

Currently, we skip error capture upon forced preemption. We apply forced
preemption when there is a higher priority request that should be
running but is being blocked, and we skip inline error capture so that
the preemption request is not further delayed by a user controlled
capture -- extending the denial of service.

However, preemption reset is also used for heartbeats and regular GPU
hangs. By skipping the error capture, we remove the ability to debug GPU
hangs.

In order to capture the error without delaying the preemption request
further, we can do an out-of-line capture by removing the guilty request
from the execution queue and scheduling a work to dump that request.
When removing a request, we need to remove the entire context and all
descendants from the execution queue, so that they do not jump past.

Closes: https://gitlab.freedesktop.org/drm/intel/issues/738
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/gt/intel_lrc.c | 102 +++++++++++++++++++++++++++-
 1 file changed, 100 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
index f2791f98ea68..883d6d46e4e8 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -2406,7 +2406,6 @@ static void __execlists_hold(struct i915_request *rq)
 	}
 }
 
-__maybe_unused
 static void execlists_hold(struct intel_engine_cs *engine,
 			   struct i915_request *rq)
 {
@@ -2439,7 +2438,12 @@ static void __execlists_unhold(struct i915_request *rq)
 	}
 }
 
-__maybe_unused
+struct execlists_capture {
+	struct work_struct work;
+	struct i915_request *rq;
+	struct i915_gpu_coredump *error;
+};
+
 static void execlists_unhold(struct intel_engine_cs *engine,
 			     struct i915_request *rq)
 {
@@ -2452,6 +2456,99 @@ static void execlists_unhold(struct intel_engine_cs *engine,
 	spin_unlock_irq(&engine->active.lock);
 }
 
+static void execlists_capture_work(struct work_struct *work)
+{
+	struct execlists_capture *cap = container_of(work, typeof(*cap), work);
+	const gfp_t gfp = GFP_KERNEL | __GFP_RETRY_MAYFAIL | __GFP_NOWARN;
+	struct intel_engine_cs *engine = cap->rq->engine;
+	struct intel_gt_coredump *gt = cap->error->gt;
+	struct intel_engine_capture_vma *vma;
+
+	vma = intel_engine_coredump_add_request(gt->engine, cap->rq, gfp);
+	if (vma) {
+		struct i915_vma_compress *comp;
+
+		comp = i915_vma_capture_prepare(gt);
+		if (comp) {
+			intel_engine_coredump_add_vma(gt->engine, vma, comp);
+			i915_vma_capture_finish(gt, comp);
+		}
+	}
+
+	gt->simulated = gt->engine->simulated;
+	cap->error->simulated = gt->simulated;
+
+	i915_error_state_store(cap->error);
+	i915_gpu_coredump_put(cap->error);
+
+	execlists_unhold(engine, cap->rq);
+
+	kfree(cap);
+}
+
+static struct i915_gpu_coredump *capture_regs(struct intel_engine_cs *engine)
+{
+	const gfp_t gfp = GFP_ATOMIC | __GFP_NOWARN;
+	struct i915_gpu_coredump *e;
+
+	e = i915_gpu_coredump_alloc(engine->i915, gfp);
+	if (!e)
+		return NULL;
+
+	e->gt = intel_gt_coredump_alloc(engine->gt, gfp);
+	if (!e->gt)
+		goto err;
+
+	e->gt->engine = intel_engine_coredump_alloc(engine, gfp);
+	if (!e->gt->engine)
+		goto err_gt;
+
+	return e;
+
+err_gt:
+	kfree(e->gt);
+err:
+	kfree(e);
+	return NULL;
+}
+
+static void execlists_capture(struct intel_engine_cs *engine)
+{
+	struct execlists_capture *cap;
+
+	if (!IS_ENABLED(CONFIG_DRM_I915_CAPTURE_ERROR))
+		return;
+
+	cap = kmalloc(sizeof(*cap), GFP_ATOMIC);
+	if (!cap)
+		return;
+
+	cap->rq = execlists_active(&engine->execlists);
+	GEM_BUG_ON(!cap->rq);
+
+	/*
+	 * We need to _quickly_ capture the engine state before we reset.
+	 * We are inside an atomic section (softirq) here and we are delaying
+	 * the forced preemption event.
+	 */
+	cap->error = capture_regs(engine);
+	if (!cap->error) {
+		kfree(cap);
+		return;
+	}
+
+	/*
+	 * Remove the request from the execlists queue, and take ownership
+	 * of the request. We pass it to our worker who will _slowly_ compress
+	 * all the pages the _user_ requested for debugging their batch, after
+	 * which we return it to the queue for signaling.
+	 */
+	execlists_hold(engine, cap->rq);
+
+	INIT_WORK(&cap->work, execlists_capture_work);
+	schedule_work(&cap->work);
+}
+
 static noinline void preempt_reset(struct intel_engine_cs *engine)
 {
 	const unsigned int bit = I915_RESET_ENGINE + engine->id;
@@ -2469,6 +2566,7 @@ static noinline void preempt_reset(struct intel_engine_cs *engine)
 	ENGINE_TRACE(engine, "preempt timeout %lu+%ums\n",
 		     READ_ONCE(engine->props.preempt_timeout_ms),
 		     jiffies_to_msecs(jiffies - engine->execlists.preempt.expires));
+	execlists_capture(engine);
 	intel_engine_reset(engine, "preemption time out");
 
 	tasklet_enable(&engine->execlists.tasklet);
-- 
2.25.0.rc1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 30+ messages in thread

* Re: [Intel-gfx] [PATCH 11/14] drm/i915: Drop the shadow ring state from the error capture
  2020-01-09  8:58 ` [Intel-gfx] [PATCH 11/14] drm/i915: Drop the shadow ring state from the " Chris Wilson
@ 2020-01-09  9:04   ` Mika Kuoppala
  2020-01-09  9:14     ` Chris Wilson
  0 siblings, 1 reply; 30+ messages in thread
From: Mika Kuoppala @ 2020-01-09  9:04 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx

Chris Wilson <chris@chris-wilson.co.uk> writes:

> The shadow ring regs (ring->head, ring->tail) are meaningless in the
> post-mortem dump as they do not related to anything on HW. Remove them
> from the coredump.

We have been dumping these just to check that our bookkeepping matches?
-Mika

>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>  drivers/gpu/drm/i915/i915_gpu_error.c | 5 -----
>  drivers/gpu/drm/i915/i915_gpu_error.h | 4 ----
>  2 files changed, 9 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
> index b45c128d0a17..a35aad439281 100644
> --- a/drivers/gpu/drm/i915/i915_gpu_error.c
> +++ b/drivers/gpu/drm/i915/i915_gpu_error.c
> @@ -553,8 +553,6 @@ static void error_print_engine(struct drm_i915_error_state_buf *m,
>  				   ee->vm_info.pp_dir_base);
>  		}
>  	}
> -	err_printf(m, "  ring->head: 0x%08x\n", ee->cpu_ring_head);
> -	err_printf(m, "  ring->tail: 0x%08x\n", ee->cpu_ring_tail);
>  	err_printf(m, "  engine reset count: %u\n", ee->reset_count);
>  
>  	for (n = 0; n < ee->num_ports; n++) {
> @@ -1428,9 +1426,6 @@ intel_engine_coredump_add_request(struct intel_engine_coredump *ee,
>  	if (HAS_BROKEN_CS_TLB(rq->i915))
>  		vma = capture_vma(vma, ee->engine->gt->scratch, "WA batch", gfp);
>  
> -	ee->cpu_ring_head = rq->ring->head;
> -	ee->cpu_ring_tail = rq->ring->tail;
> -
>  	ee->rq_head = rq->head;
>  	ee->rq_post = rq->postfix;
>  	ee->rq_tail = rq->tail;
> diff --git a/drivers/gpu/drm/i915/i915_gpu_error.h b/drivers/gpu/drm/i915/i915_gpu_error.h
> index 0df9d8c32056..8f4579d64d8c 100644
> --- a/drivers/gpu/drm/i915/i915_gpu_error.h
> +++ b/drivers/gpu/drm/i915/i915_gpu_error.h
> @@ -66,10 +66,6 @@ struct intel_engine_coredump {
>  	/* position of active request inside the ring */
>  	u32 rq_head, rq_post, rq_tail;
>  
> -	/* our own tracking of ring head and tail */
> -	u32 cpu_ring_head;
> -	u32 cpu_ring_tail;
> -
>  	/* Register state */
>  	u32 ccid;
>  	u32 start;
> -- 
> 2.25.0.rc1
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [Intel-gfx] [PATCH 12/14] drm/i915: Drop the shadow w/a batch buffer
  2020-01-09  8:58 ` [Intel-gfx] [PATCH 12/14] drm/i915: Drop the shadow w/a batch buffer Chris Wilson
@ 2020-01-09  9:05   ` Mika Kuoppala
  2020-01-10 11:02   ` Andi Shyti
  1 sibling, 0 replies; 30+ messages in thread
From: Mika Kuoppala @ 2020-01-09  9:05 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx

Chris Wilson <chris@chris-wilson.co.uk> writes:

> While this is technically the batch as executed by the HW (in part at
> least), it is confusing, and only used for a minority of gen.
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>

Reviewed-by: Mika Kuoppala <mika.kuoppala@linux.intel.com>

> ---
>  drivers/gpu/drm/i915/i915_gpu_error.c | 2 --
>  1 file changed, 2 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
> index a35aad439281..796c9ce0c494 100644
> --- a/drivers/gpu/drm/i915/i915_gpu_error.c
> +++ b/drivers/gpu/drm/i915/i915_gpu_error.c
> @@ -1423,8 +1423,6 @@ intel_engine_coredump_add_request(struct intel_engine_coredump *ee,
>  	vma = request_record_user(rq, vma, gfp);
>  	vma = capture_vma(vma, rq->ring->vma, "ring", gfp);
>  	vma = capture_vma(vma, rq->context->state, "HW context", gfp);
> -	if (HAS_BROKEN_CS_TLB(rq->i915))
> -		vma = capture_vma(vma, ee->engine->gt->scratch, "WA batch", gfp);
>  
>  	ee->rq_head = rq->head;
>  	ee->rq_post = rq->postfix;
> -- 
> 2.25.0.rc1
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [Intel-gfx] [PATCH 11/14] drm/i915: Drop the shadow ring state from the error capture
  2020-01-09  9:04   ` Mika Kuoppala
@ 2020-01-09  9:14     ` Chris Wilson
  2020-01-10 10:41       ` Mika Kuoppala
  0 siblings, 1 reply; 30+ messages in thread
From: Chris Wilson @ 2020-01-09  9:14 UTC (permalink / raw)
  To: Mika Kuoppala, intel-gfx

Quoting Mika Kuoppala (2020-01-09 09:04:31)
> Chris Wilson <chris@chris-wilson.co.uk> writes:
> 
> > The shadow ring regs (ring->head, ring->tail) are meaningless in the
> > post-mortem dump as they do not related to anything on HW. Remove them
> > from the coredump.
> 
> We have been dumping these just to check that our bookkeepping matches?

Kind off, but they never really match since the ring->head is very lazy,
and ring->tail is wherever the user got up to. We have the relevant
information from the request where we expect to be in the ring for the
error, and the HW tells us where it was executing.
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [Intel-gfx] [PATCH 10/14] drm/i915: Start chopping up the GPU error capture
  2020-01-09  8:58 ` [Intel-gfx] [PATCH 10/14] drm/i915: Start chopping up the GPU error capture Chris Wilson
@ 2020-01-09 15:31   ` Andi Shyti
  2020-01-09 15:40     ` Chris Wilson
  2020-01-11 10:24   ` kbuild test robot
  1 sibling, 1 reply; 30+ messages in thread
From: Andi Shyti @ 2020-01-09 15:31 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

Hi Chris,

On Thu, Jan 09, 2020 at 08:58:35AM +0000, Chris Wilson wrote:
> In the near future, we will want to start a GPU error capture from a new
> context, from inside the softirq region of a forced preemption. To do
> so requires us to break up the monolithic error capture to provide new
> entry points with finer control; in particular focusing on one
> engine/gt, and being able to compose an error state from little pieces
> of HW capture.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Andi Shyti <andi.shyti@intel.com>
> ---
>  drivers/gpu/drm/i915/gt/intel_engine.h       |    2 +-
>  drivers/gpu/drm/i915/gt/intel_engine_cs.c    |    6 +-
>  drivers/gpu/drm/i915/gt/intel_ggtt.c         |    3 +
>  drivers/gpu/drm/i915/gt/intel_gtt.h          |    1 +
>  drivers/gpu/drm/i915/gt/intel_reset.c        |    2 +-
>  drivers/gpu/drm/i915/gt/selftest_hangcheck.c |    2 +-
>  drivers/gpu/drm/i915/i915_debugfs.c          |   14 +-
>  drivers/gpu/drm/i915/i915_drv.h              |    2 +-
>  drivers/gpu/drm/i915/i915_gpu_error.c        | 1169 ++++++++++--------
>  drivers/gpu/drm/i915/i915_gpu_error.h        |  328 +++--
>  drivers/gpu/drm/i915/i915_sysfs.c            |    6 +-

don't we want to have a gt/intel_gt_error.[ch] at some point?

Andi
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [Intel-gfx] [PATCH 10/14] drm/i915: Start chopping up the GPU error capture
  2020-01-09 15:31   ` Andi Shyti
@ 2020-01-09 15:40     ` Chris Wilson
  2020-01-09 15:57       ` Andi Shyti
  0 siblings, 1 reply; 30+ messages in thread
From: Chris Wilson @ 2020-01-09 15:40 UTC (permalink / raw)
  To: Andi Shyti; +Cc: intel-gfx

Quoting Andi Shyti (2020-01-09 15:31:15)
> Hi Chris,
> 
> On Thu, Jan 09, 2020 at 08:58:35AM +0000, Chris Wilson wrote:
> > In the near future, we will want to start a GPU error capture from a new
> > context, from inside the softirq region of a forced preemption. To do
> > so requires us to break up the monolithic error capture to provide new
> > entry points with finer control; in particular focusing on one
> > engine/gt, and being able to compose an error state from little pieces
> > of HW capture.
> > 
> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > Cc: Andi Shyti <andi.shyti@intel.com>
> > ---
> >  drivers/gpu/drm/i915/gt/intel_engine.h       |    2 +-
> >  drivers/gpu/drm/i915/gt/intel_engine_cs.c    |    6 +-
> >  drivers/gpu/drm/i915/gt/intel_ggtt.c         |    3 +
> >  drivers/gpu/drm/i915/gt/intel_gtt.h          |    1 +
> >  drivers/gpu/drm/i915/gt/intel_reset.c        |    2 +-
> >  drivers/gpu/drm/i915/gt/selftest_hangcheck.c |    2 +-
> >  drivers/gpu/drm/i915/i915_debugfs.c          |   14 +-
> >  drivers/gpu/drm/i915/i915_drv.h              |    2 +-
> >  drivers/gpu/drm/i915/i915_gpu_error.c        | 1169 ++++++++++--------
> >  drivers/gpu/drm/i915/i915_gpu_error.h        |  328 +++--
> >  drivers/gpu/drm/i915/i915_sysfs.c            |    6 +-
> 
> don't we want to have a gt/intel_gt_error.[ch] at some point?

I did give it some thought, and at the moment i915_gpu_error.c exists in
its own little bubble on the outside of the driver. That isn't to say
we couldn't keep gt/error_(engine|gt).c in the same bubble, but it was
easier to keep it where it was and hack it provide an engine capture
interface.

I think it is the direction we want to go in, but I think the first step
is make the output file structured (yaml is my pick) so that we can
rearrange, extend, remove bits and bobs without upsetting consumers. I
was very close to making the transition to yaml, but decided to bluster
through anyway since we dare not release a kernel where error capture is
disabled-by-default. They have their pitchforks at the ready.
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [Intel-gfx] [PATCH 10/14] drm/i915: Start chopping up the GPU error capture
  2020-01-09 15:40     ` Chris Wilson
@ 2020-01-09 15:57       ` Andi Shyti
  0 siblings, 0 replies; 30+ messages in thread
From: Andi Shyti @ 2020-01-09 15:57 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

Hi Chris,

> > > In the near future, we will want to start a GPU error capture from a new
> > > context, from inside the softirq region of a forced preemption. To do
> > > so requires us to break up the monolithic error capture to provide new
> > > entry points with finer control; in particular focusing on one
> > > engine/gt, and being able to compose an error state from little pieces
> > > of HW capture.
> > > 
> > > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > > Cc: Andi Shyti <andi.shyti@intel.com>
> > > ---
> > >  drivers/gpu/drm/i915/gt/intel_engine.h       |    2 +-
> > >  drivers/gpu/drm/i915/gt/intel_engine_cs.c    |    6 +-
> > >  drivers/gpu/drm/i915/gt/intel_ggtt.c         |    3 +
> > >  drivers/gpu/drm/i915/gt/intel_gtt.h          |    1 +
> > >  drivers/gpu/drm/i915/gt/intel_reset.c        |    2 +-
> > >  drivers/gpu/drm/i915/gt/selftest_hangcheck.c |    2 +-
> > >  drivers/gpu/drm/i915/i915_debugfs.c          |   14 +-
> > >  drivers/gpu/drm/i915/i915_drv.h              |    2 +-
> > >  drivers/gpu/drm/i915/i915_gpu_error.c        | 1169 ++++++++++--------
> > >  drivers/gpu/drm/i915/i915_gpu_error.h        |  328 +++--
> > >  drivers/gpu/drm/i915/i915_sysfs.c            |    6 +-
> > 
> > don't we want to have a gt/intel_gt_error.[ch] at some point?
> 
> I did give it some thought, and at the moment i915_gpu_error.c exists in
> its own little bubble on the outside of the driver. That isn't to say
> we couldn't keep gt/error_(engine|gt).c in the same bubble, but it was
> easier to keep it where it was and hack it provide an engine capture
> interface.

OK, sure... the patch looked straight forward to me, anyway:

Acked-by: Andi Shyti <andi.shyti@intel.com>

Thanks,
Andi
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 30+ messages in thread

* [Intel-gfx] ✗ Fi.CI.BUILD: failure for series starting with [01/14] drm/i915/gt: Push context state allocation earlier
  2020-01-09  8:58 [Intel-gfx] [PATCH 01/14] drm/i915/gt: Push context state allocation earlier Chris Wilson
                   ` (12 preceding siblings ...)
  2020-01-09  8:58 ` [Intel-gfx] [PATCH 14/14] drm/i915/execlists: Offline error capture Chris Wilson
@ 2020-01-09 18:13 ` Patchwork
  13 siblings, 0 replies; 30+ messages in thread
From: Patchwork @ 2020-01-09 18:13 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

== Series Details ==

Series: series starting with [01/14] drm/i915/gt: Push context state allocation earlier
URL   : https://patchwork.freedesktop.org/series/71809/
State : failure

== Summary ==

Applying: drm/i915/gt: Push context state allocation earlier
Using index info to reconstruct a base tree...
M	drivers/gpu/drm/i915/gt/intel_context.c
M	drivers/gpu/drm/i915/gt/intel_context.h
Falling back to patching base and 3-way merge...
Auto-merging drivers/gpu/drm/i915/gt/intel_context.h
Auto-merging drivers/gpu/drm/i915/gt/intel_context.c
CONFLICT (content): Merge conflict in drivers/gpu/drm/i915/gt/intel_context.c
error: Failed to merge in the changes.
hint: Use 'git am --show-current-patch' to see the failed patch
Patch failed at 0001 drm/i915/gt: Push context state allocation earlier
When you have resolved this problem, run "git am --continue".
If you prefer to skip this patch, run "git am --skip" instead.
To restore the original branch and stop patching, run "git am --abort".

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [Intel-gfx] [PATCH 11/14] drm/i915: Drop the shadow ring state from the error capture
  2020-01-09  9:14     ` Chris Wilson
@ 2020-01-10 10:41       ` Mika Kuoppala
  0 siblings, 0 replies; 30+ messages in thread
From: Mika Kuoppala @ 2020-01-10 10:41 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx

Chris Wilson <chris@chris-wilson.co.uk> writes:

> Quoting Mika Kuoppala (2020-01-09 09:04:31)
>> Chris Wilson <chris@chris-wilson.co.uk> writes:
>> 
>> > The shadow ring regs (ring->head, ring->tail) are meaningless in the
>> > post-mortem dump as they do not related to anything on HW. Remove them
>> > from the coredump.
>> 
>> We have been dumping these just to check that our bookkeepping matches?
>
> Kind off, but they never really match since the ring->head is very lazy,
> and ring->tail is wherever the user got up to. We have the relevant
> information from the request where we expect to be in the ring for the
> error, and the HW tells us where it was executing.
> -Chris

Ok, based on that and that  don't remember a single case where,
from external reports, these would have provided anything of value.

Reviewed-by: Mika Kuoppala <mika.kuoppala@linux.intel.com>
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [Intel-gfx] [PATCH 12/14] drm/i915: Drop the shadow w/a batch buffer
  2020-01-09  8:58 ` [Intel-gfx] [PATCH 12/14] drm/i915: Drop the shadow w/a batch buffer Chris Wilson
  2020-01-09  9:05   ` Mika Kuoppala
@ 2020-01-10 11:02   ` Andi Shyti
  1 sibling, 0 replies; 30+ messages in thread
From: Andi Shyti @ 2020-01-10 11:02 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

Hi Chris,

On Thu, Jan 09, 2020 at 08:58:37AM +0000, Chris Wilson wrote:
> While this is technically the batch as executed by the HW (in part at
> least), it is confusing, and only used for a minority of gen.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>

Acked-by: Andi Shyti <andi.shyti@intel.com>

Andi
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [Intel-gfx] [PATCH 13/14] drm/i915: Drop request list from error state
  2020-01-09  8:58 ` [Intel-gfx] [PATCH 13/14] drm/i915: Drop request list from error state Chris Wilson
@ 2020-01-10 11:04   ` Andi Shyti
  0 siblings, 0 replies; 30+ messages in thread
From: Andi Shyti @ 2020-01-10 11:04 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

Hi Chris,

On Thu, Jan 09, 2020 at 08:58:38AM +0000, Chris Wilson wrote:
> The list of requests from after the hang tells little about the hang
> itself, only how busy userspace was after the fact. As it pertains
> nothing to the HW state, drop it from the error state.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>

Acked-by: Andi Shyti <andi.shyti@intel.com>

Andi
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [Intel-gfx] [PATCH 03/14] drm/i915/gt: runtime-pm is no longer required for ce->ops->pin()
  2020-01-09  8:58 ` [Intel-gfx] [PATCH 03/14] drm/i915/gt: runtime-pm is no longer required for ce->ops->pin() Chris Wilson
@ 2020-01-10 14:37   ` Mika Kuoppala
  0 siblings, 0 replies; 30+ messages in thread
From: Mika Kuoppala @ 2020-01-10 14:37 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx

Chris Wilson <chris@chris-wilson.co.uk> writes:

> Now that we have moved the runtime-pm management out of
> intel_context_acctive_acquire, and that itself out of ce->ops->pin(),
> no

s/acctive/active

> explicit runtime pm wakeref is required in intel_context_pin().
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>

Reviewed-by: Mika Kuoppala <mika.kuoppala@linux.intel.com>
> ---
>  drivers/gpu/drm/i915/gt/intel_context.c | 5 +----
>  1 file changed, 1 insertion(+), 4 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/gt/intel_context.c b/drivers/gpu/drm/i915/gt/intel_context.c
> index cac80d87b2bb..9796a54b4f47 100644
> --- a/drivers/gpu/drm/i915/gt/intel_context.c
> +++ b/drivers/gpu/drm/i915/gt/intel_context.c
> @@ -105,14 +105,11 @@ int __intel_context_do_pin(struct intel_context *ce)
>  		return -EINTR;
>  
>  	if (likely(!atomic_read(&ce->pin_count))) {
> -		intel_wakeref_t wakeref;
> -
>  		err = intel_context_active_acquire(ce);
>  		if (unlikely(err))
>  			goto err;
>  
> -		with_intel_runtime_pm(ce->engine->uncore->rpm, wakeref)
> -			err = ce->ops->pin(ce);
> +		err = ce->ops->pin(ce);
>  		if (unlikely(err))
>  			goto err_active;
>  
> -- 
> 2.25.0.rc1
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [Intel-gfx] [PATCH 14/14] drm/i915/execlists: Offline error capture
  2020-01-09  8:58 ` [Intel-gfx] [PATCH 14/14] drm/i915/execlists: Offline error capture Chris Wilson
@ 2020-01-10 22:46     ` kbuild test robot
  2020-01-11 22:33     ` kbuild test robot
  1 sibling, 0 replies; 30+ messages in thread
From: kbuild test robot @ 2020-01-10 22:46 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx, kbuild-all

[-- Attachment #1: Type: text/plain, Size: 4655 bytes --]

Hi Chris,

Thank you for the patch! Yet something to improve:

[auto build test ERROR on next-20200109]
[cannot apply to drm-intel/for-linux-next drm-tip/drm-tip v5.5-rc5 v5.5-rc4 v5.5-rc3 v5.5-rc5]
[if your patch is applied to the wrong git tree, please drop us a note to help
improve the system. BTW, we also suggest to use '--base' option to specify the
base tree in git format-patch, please see https://stackoverflow.com/a/37406982]

url:    https://github.com/0day-ci/linux/commits/Chris-Wilson/drm-i915-gt-Push-context-state-allocation-earlier/20200110-090110
base:    85cff1ab64327cee3090050b3dd6b5f1df3e5e1f
config: x86_64-randconfig-a003-20200109 (attached as .config)
compiler: gcc-5 (Ubuntu 5.5.0-12ubuntu1) 5.5.0 20171010
reproduce:
        # save the attached .config to linux build tree
        make ARCH=x86_64 

If you fix the issue, kindly add following tag
Reported-by: kbuild test robot <lkp@intel.com>

All errors (new ones prefixed by >>):

   In file included from drivers/gpu/drm/i915/i915_drv.h:97:0,
                    from drivers/gpu/drm/i915/gt/intel_lrc.c:136:
   drivers/gpu/drm/i915/i915_gpu_error.h:312:6: error: conflicting types for 'i915_vma_compress_prepare'
    void i915_vma_compress_prepare(struct i915_vma_compress *compress)
         ^
   drivers/gpu/drm/i915/i915_gpu_error.h:307:1: note: previous definition of 'i915_vma_compress_prepare' was here
    i915_vma_compress_prepare(struct intel_gt_coredump *gt)
    ^
   drivers/gpu/drm/i915/gt/intel_lrc.c: In function 'execlists_capture_work':
>> drivers/gpu/drm/i915/gt/intel_lrc.c:2471:10: error: implicit declaration of function 'i915_vma_capture_prepare' [-Werror=implicit-function-declaration]
      comp = i915_vma_capture_prepare(gt);
             ^
   drivers/gpu/drm/i915/gt/intel_lrc.c:2471:8: warning: assignment makes pointer from integer without a cast [-Wint-conversion]
      comp = i915_vma_capture_prepare(gt);
           ^
>> drivers/gpu/drm/i915/gt/intel_lrc.c:2474:4: error: implicit declaration of function 'i915_vma_capture_finish' [-Werror=implicit-function-declaration]
       i915_vma_capture_finish(gt, comp);
       ^
   drivers/gpu/drm/i915/gt/intel_lrc.c:2481:25: error: passing argument 1 of 'i915_error_state_store' from incompatible pointer type [-Werror=incompatible-pointer-types]
     i915_error_state_store(cap->error);
                            ^
   In file included from drivers/gpu/drm/i915/i915_drv.h:97:0,
                    from drivers/gpu/drm/i915/gt/intel_lrc.c:136:
   drivers/gpu/drm/i915/i915_gpu_error.h:317:1: note: expected 'struct drm_i915_private *' but argument is of type 'struct i915_gpu_coredump *'
    i915_error_state_store(struct drm_i915_private *i915,
    ^
   drivers/gpu/drm/i915/gt/intel_lrc.c:2481:2: error: too few arguments to function 'i915_error_state_store'
     i915_error_state_store(cap->error);
     ^
   In file included from drivers/gpu/drm/i915/i915_drv.h:97:0,
                    from drivers/gpu/drm/i915/gt/intel_lrc.c:136:
   drivers/gpu/drm/i915/i915_gpu_error.h:317:1: note: declared here
    i915_error_state_store(struct drm_i915_private *i915,
    ^
>> drivers/gpu/drm/i915/gt/intel_lrc.c:2482:2: error: implicit declaration of function 'i915_gpu_coredump_put' [-Werror=implicit-function-declaration]
     i915_gpu_coredump_put(cap->error);
     ^
   cc1: some warnings being treated as errors

vim +/i915_vma_capture_prepare +2471 drivers/gpu/drm/i915/gt/intel_lrc.c

  2458	
  2459	static void execlists_capture_work(struct work_struct *work)
  2460	{
  2461		struct execlists_capture *cap = container_of(work, typeof(*cap), work);
  2462		const gfp_t gfp = GFP_KERNEL | __GFP_RETRY_MAYFAIL | __GFP_NOWARN;
  2463		struct intel_engine_cs *engine = cap->rq->engine;
  2464		struct intel_gt_coredump *gt = cap->error->gt;
  2465		struct intel_engine_capture_vma *vma;
  2466	
  2467		vma = intel_engine_coredump_add_request(gt->engine, cap->rq, gfp);
  2468		if (vma) {
  2469			struct i915_vma_compress *comp;
  2470	
> 2471			comp = i915_vma_capture_prepare(gt);
  2472			if (comp) {
  2473				intel_engine_coredump_add_vma(gt->engine, vma, comp);
> 2474				i915_vma_capture_finish(gt, comp);
  2475			}
  2476		}
  2477	
  2478		gt->simulated = gt->engine->simulated;
  2479		cap->error->simulated = gt->simulated;
  2480	
  2481		i915_error_state_store(cap->error);
> 2482		i915_gpu_coredump_put(cap->error);
  2483	
  2484		execlists_unhold(engine, cap->rq);
  2485	
  2486		kfree(cap);
  2487	}
  2488	

---
0-DAY kernel test infrastructure                 Open Source Technology Center
https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org Intel Corporation

[-- Attachment #2: .config.gz --]
[-- Type: application/gzip, Size: 34700 bytes --]

[-- Attachment #3: Type: text/plain, Size: 160 bytes --]

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [Intel-gfx] [PATCH 14/14] drm/i915/execlists: Offline error capture
@ 2020-01-10 22:46     ` kbuild test robot
  0 siblings, 0 replies; 30+ messages in thread
From: kbuild test robot @ 2020-01-10 22:46 UTC (permalink / raw)
  To: kbuild-all

[-- Attachment #1: Type: text/plain, Size: 4756 bytes --]

Hi Chris,

Thank you for the patch! Yet something to improve:

[auto build test ERROR on next-20200109]
[cannot apply to drm-intel/for-linux-next drm-tip/drm-tip v5.5-rc5 v5.5-rc4 v5.5-rc3 v5.5-rc5]
[if your patch is applied to the wrong git tree, please drop us a note to help
improve the system. BTW, we also suggest to use '--base' option to specify the
base tree in git format-patch, please see https://stackoverflow.com/a/37406982]

url:    https://github.com/0day-ci/linux/commits/Chris-Wilson/drm-i915-gt-Push-context-state-allocation-earlier/20200110-090110
base:    85cff1ab64327cee3090050b3dd6b5f1df3e5e1f
config: x86_64-randconfig-a003-20200109 (attached as .config)
compiler: gcc-5 (Ubuntu 5.5.0-12ubuntu1) 5.5.0 20171010
reproduce:
        # save the attached .config to linux build tree
        make ARCH=x86_64 

If you fix the issue, kindly add following tag
Reported-by: kbuild test robot <lkp@intel.com>

All errors (new ones prefixed by >>):

   In file included from drivers/gpu/drm/i915/i915_drv.h:97:0,
                    from drivers/gpu/drm/i915/gt/intel_lrc.c:136:
   drivers/gpu/drm/i915/i915_gpu_error.h:312:6: error: conflicting types for 'i915_vma_compress_prepare'
    void i915_vma_compress_prepare(struct i915_vma_compress *compress)
         ^
   drivers/gpu/drm/i915/i915_gpu_error.h:307:1: note: previous definition of 'i915_vma_compress_prepare' was here
    i915_vma_compress_prepare(struct intel_gt_coredump *gt)
    ^
   drivers/gpu/drm/i915/gt/intel_lrc.c: In function 'execlists_capture_work':
>> drivers/gpu/drm/i915/gt/intel_lrc.c:2471:10: error: implicit declaration of function 'i915_vma_capture_prepare' [-Werror=implicit-function-declaration]
      comp = i915_vma_capture_prepare(gt);
             ^
   drivers/gpu/drm/i915/gt/intel_lrc.c:2471:8: warning: assignment makes pointer from integer without a cast [-Wint-conversion]
      comp = i915_vma_capture_prepare(gt);
           ^
>> drivers/gpu/drm/i915/gt/intel_lrc.c:2474:4: error: implicit declaration of function 'i915_vma_capture_finish' [-Werror=implicit-function-declaration]
       i915_vma_capture_finish(gt, comp);
       ^
   drivers/gpu/drm/i915/gt/intel_lrc.c:2481:25: error: passing argument 1 of 'i915_error_state_store' from incompatible pointer type [-Werror=incompatible-pointer-types]
     i915_error_state_store(cap->error);
                            ^
   In file included from drivers/gpu/drm/i915/i915_drv.h:97:0,
                    from drivers/gpu/drm/i915/gt/intel_lrc.c:136:
   drivers/gpu/drm/i915/i915_gpu_error.h:317:1: note: expected 'struct drm_i915_private *' but argument is of type 'struct i915_gpu_coredump *'
    i915_error_state_store(struct drm_i915_private *i915,
    ^
   drivers/gpu/drm/i915/gt/intel_lrc.c:2481:2: error: too few arguments to function 'i915_error_state_store'
     i915_error_state_store(cap->error);
     ^
   In file included from drivers/gpu/drm/i915/i915_drv.h:97:0,
                    from drivers/gpu/drm/i915/gt/intel_lrc.c:136:
   drivers/gpu/drm/i915/i915_gpu_error.h:317:1: note: declared here
    i915_error_state_store(struct drm_i915_private *i915,
    ^
>> drivers/gpu/drm/i915/gt/intel_lrc.c:2482:2: error: implicit declaration of function 'i915_gpu_coredump_put' [-Werror=implicit-function-declaration]
     i915_gpu_coredump_put(cap->error);
     ^
   cc1: some warnings being treated as errors

vim +/i915_vma_capture_prepare +2471 drivers/gpu/drm/i915/gt/intel_lrc.c

  2458	
  2459	static void execlists_capture_work(struct work_struct *work)
  2460	{
  2461		struct execlists_capture *cap = container_of(work, typeof(*cap), work);
  2462		const gfp_t gfp = GFP_KERNEL | __GFP_RETRY_MAYFAIL | __GFP_NOWARN;
  2463		struct intel_engine_cs *engine = cap->rq->engine;
  2464		struct intel_gt_coredump *gt = cap->error->gt;
  2465		struct intel_engine_capture_vma *vma;
  2466	
  2467		vma = intel_engine_coredump_add_request(gt->engine, cap->rq, gfp);
  2468		if (vma) {
  2469			struct i915_vma_compress *comp;
  2470	
> 2471			comp = i915_vma_capture_prepare(gt);
  2472			if (comp) {
  2473				intel_engine_coredump_add_vma(gt->engine, vma, comp);
> 2474				i915_vma_capture_finish(gt, comp);
  2475			}
  2476		}
  2477	
  2478		gt->simulated = gt->engine->simulated;
  2479		cap->error->simulated = gt->simulated;
  2480	
  2481		i915_error_state_store(cap->error);
> 2482		i915_gpu_coredump_put(cap->error);
  2483	
  2484		execlists_unhold(engine, cap->rq);
  2485	
  2486		kfree(cap);
  2487	}
  2488	

---
0-DAY kernel test infrastructure                 Open Source Technology Center
https://lists.01.org/hyperkitty/list/kbuild-all(a)lists.01.org Intel Corporation

[-- Attachment #2: config.gz --]
[-- Type: application/gzip, Size: 34700 bytes --]

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [Intel-gfx] [PATCH 10/14] drm/i915: Start chopping up the GPU error capture
  2020-01-09  8:58 ` [Intel-gfx] [PATCH 10/14] drm/i915: Start chopping up the GPU error capture Chris Wilson
  2020-01-09 15:31   ` Andi Shyti
@ 2020-01-11 10:24   ` kbuild test robot
  1 sibling, 0 replies; 30+ messages in thread
From: kbuild test robot @ 2020-01-11 10:24 UTC (permalink / raw)
  To: kbuild-all

[-- Attachment #1: Type: text/plain, Size: 3278 bytes --]

Hi Chris,

Thank you for the patch! Yet something to improve:

[auto build test ERROR on next-20200109]
[cannot apply to drm-intel/for-linux-next drm-tip/drm-tip v5.5-rc5 v5.5-rc4 v5.5-rc3 v5.5-rc5]
[if your patch is applied to the wrong git tree, please drop us a note to help
improve the system. BTW, we also suggest to use '--base' option to specify the
base tree in git format-patch, please see https://stackoverflow.com/a/37406982]

url:    https://github.com/0day-ci/linux/commits/Chris-Wilson/drm-i915-gt-Push-context-state-allocation-earlier/20200110-090110
base:    85cff1ab64327cee3090050b3dd6b5f1df3e5e1f
config: x86_64-randconfig-g001-20200109 (attached as .config)
compiler: gcc-7 (Debian 7.5.0-3) 7.5.0
reproduce:
        # save the attached .config to linux build tree
        make ARCH=x86_64 

If you fix the issue, kindly add following tag
Reported-by: kbuild test robot <lkp@intel.com>

All errors (new ones prefixed by >>):

   In file included from drivers/gpu/drm/i915/i915_drv.h:97:0,
                    from drivers/gpu/drm/i915/gem/i915_gem_context.h:14,
                    from drivers/gpu/drm/i915/i915_globals.c:11:
>> drivers/gpu/drm/i915/i915_gpu_error.h:317:6: error: conflicting types for 'i915_vma_compress_prepare'
    void i915_vma_compress_prepare(struct i915_vma_compress *compress)
         ^~~~~~~~~~~~~~~~~~~~~~~~~
   drivers/gpu/drm/i915/i915_gpu_error.h:312:1: note: previous definition of 'i915_vma_compress_prepare' was here
    i915_vma_compress_prepare(struct intel_gt_coredump *gt)
    ^~~~~~~~~~~~~~~~~~~~~~~~~
--
   In file included from drivers/gpu/drm/i915/i915_drv.h:97:0,
                    from drivers/gpu/drm/i915/display/intel_display_types.h:46,
                    from drivers/gpu/drm/i915/gt/intel_reset.c:10:
>> drivers/gpu/drm/i915/i915_gpu_error.h:317:6: error: conflicting types for 'i915_vma_compress_prepare'
    void i915_vma_compress_prepare(struct i915_vma_compress *compress)
         ^~~~~~~~~~~~~~~~~~~~~~~~~
   drivers/gpu/drm/i915/i915_gpu_error.h:312:1: note: previous definition of 'i915_vma_compress_prepare' was here
    i915_vma_compress_prepare(struct intel_gt_coredump *gt)
    ^~~~~~~~~~~~~~~~~~~~~~~~~
   drivers/gpu/drm/i915/gt/intel_reset.c: In function 'intel_gt_handle_error':
>> drivers/gpu/drm/i915/gt/intel_reset.c:1233:3: error: too few arguments to function 'i915_capture_error_state'
      i915_capture_error_state(gt->i915);
      ^~~~~~~~~~~~~~~~~~~~~~~~
   In file included from drivers/gpu/drm/i915/i915_drv.h:97:0,
                    from drivers/gpu/drm/i915/display/intel_display_types.h:46,
                    from drivers/gpu/drm/i915/gt/intel_reset.c:10:
   drivers/gpu/drm/i915/i915_gpu_error.h:272:20: note: declared here
    static inline void i915_capture_error_state(struct drm_i915_private *dev_priv,
                       ^~~~~~~~~~~~~~~~~~~~~~~~

vim +/i915_vma_compress_prepare +317 drivers/gpu/drm/i915/i915_gpu_error.h

   316	
 > 317	void i915_vma_compress_prepare(struct i915_vma_compress *compress)
   318	{
   319	}
   320	

---
0-DAY kernel test infrastructure                 Open Source Technology Center
https://lists.01.org/hyperkitty/list/kbuild-all(a)lists.01.org Intel Corporation

[-- Attachment #2: config.gz --]
[-- Type: application/gzip, Size: 36061 bytes --]

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [Intel-gfx] [PATCH 14/14] drm/i915/execlists: Offline error capture
  2020-01-09  8:58 ` [Intel-gfx] [PATCH 14/14] drm/i915/execlists: Offline error capture Chris Wilson
@ 2020-01-11 22:33     ` kbuild test robot
  2020-01-11 22:33     ` kbuild test robot
  1 sibling, 0 replies; 30+ messages in thread
From: kbuild test robot @ 2020-01-11 22:33 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx, kbuild-all

[-- Attachment #1: Type: text/plain, Size: 5015 bytes --]

Hi Chris,

Thank you for the patch! Yet something to improve:

[auto build test ERROR on next-20200109]
[cannot apply to drm-intel/for-linux-next drm-tip/drm-tip v5.5-rc5 v5.5-rc4 v5.5-rc3 v5.5-rc5]
[if your patch is applied to the wrong git tree, please drop us a note to help
improve the system. BTW, we also suggest to use '--base' option to specify the
base tree in git format-patch, please see https://stackoverflow.com/a/37406982]

url:    https://github.com/0day-ci/linux/commits/Chris-Wilson/drm-i915-gt-Push-context-state-allocation-earlier/20200110-090110
base:    85cff1ab64327cee3090050b3dd6b5f1df3e5e1f
config: x86_64-randconfig-g001-20200109 (attached as .config)
compiler: gcc-7 (Debian 7.5.0-3) 7.5.0
reproduce:
        # save the attached .config to linux build tree
        make ARCH=x86_64 

If you fix the issue, kindly add following tag
Reported-by: kbuild test robot <lkp@intel.com>

All error/warnings (new ones prefixed by >>):

   In file included from drivers/gpu/drm/i915/i915_drv.h:97:0,
                    from drivers/gpu/drm/i915/gt/intel_lrc.c:136:
   drivers/gpu/drm/i915/i915_gpu_error.h:312:6: error: conflicting types for 'i915_vma_compress_prepare'
    void i915_vma_compress_prepare(struct i915_vma_compress *compress)
         ^~~~~~~~~~~~~~~~~~~~~~~~~
   drivers/gpu/drm/i915/i915_gpu_error.h:307:1: note: previous definition of 'i915_vma_compress_prepare' was here
    i915_vma_compress_prepare(struct intel_gt_coredump *gt)
    ^~~~~~~~~~~~~~~~~~~~~~~~~
   drivers/gpu/drm/i915/gt/intel_lrc.c: In function 'execlists_capture_work':
>> drivers/gpu/drm/i915/gt/intel_lrc.c:2471:10: error: implicit declaration of function 'i915_vma_capture_prepare'; did you mean 'i915_vma_compress_prepare'? [-Werror=implicit-function-declaration]
      comp = i915_vma_capture_prepare(gt);
             ^~~~~~~~~~~~~~~~~~~~~~~~
             i915_vma_compress_prepare
>> drivers/gpu/drm/i915/gt/intel_lrc.c:2471:8: warning: assignment makes pointer from integer without a cast [-Wint-conversion]
      comp = i915_vma_capture_prepare(gt);
           ^
>> drivers/gpu/drm/i915/gt/intel_lrc.c:2474:4: error: implicit declaration of function 'i915_vma_capture_finish'; did you mean 'i915_vma_clock_flush'? [-Werror=implicit-function-declaration]
       i915_vma_capture_finish(gt, comp);
       ^~~~~~~~~~~~~~~~~~~~~~~
       i915_vma_clock_flush
>> drivers/gpu/drm/i915/gt/intel_lrc.c:2481:25: error: passing argument 1 of 'i915_error_state_store' from incompatible pointer type [-Werror=incompatible-pointer-types]
     i915_error_state_store(cap->error);
                            ^~~
   In file included from drivers/gpu/drm/i915/i915_drv.h:97:0,
                    from drivers/gpu/drm/i915/gt/intel_lrc.c:136:
   drivers/gpu/drm/i915/i915_gpu_error.h:317:1: note: expected 'struct drm_i915_private *' but argument is of type 'struct i915_gpu_coredump *'
    i915_error_state_store(struct drm_i915_private *i915,
    ^~~~~~~~~~~~~~~~~~~~~~
>> drivers/gpu/drm/i915/gt/intel_lrc.c:2481:2: error: too few arguments to function 'i915_error_state_store'
     i915_error_state_store(cap->error);
     ^~~~~~~~~~~~~~~~~~~~~~
   In file included from drivers/gpu/drm/i915/i915_drv.h:97:0,
                    from drivers/gpu/drm/i915/gt/intel_lrc.c:136:
   drivers/gpu/drm/i915/i915_gpu_error.h:317:1: note: declared here
    i915_error_state_store(struct drm_i915_private *i915,
    ^~~~~~~~~~~~~~~~~~~~~~
>> drivers/gpu/drm/i915/gt/intel_lrc.c:2482:2: error: implicit declaration of function 'i915_gpu_coredump_put'; did you mean 'i915_gpu_coredump_alloc'? [-Werror=implicit-function-declaration]
     i915_gpu_coredump_put(cap->error);
     ^~~~~~~~~~~~~~~~~~~~~
     i915_gpu_coredump_alloc
   cc1: some warnings being treated as errors

vim +2471 drivers/gpu/drm/i915/gt/intel_lrc.c

  2458	
  2459	static void execlists_capture_work(struct work_struct *work)
  2460	{
  2461		struct execlists_capture *cap = container_of(work, typeof(*cap), work);
  2462		const gfp_t gfp = GFP_KERNEL | __GFP_RETRY_MAYFAIL | __GFP_NOWARN;
  2463		struct intel_engine_cs *engine = cap->rq->engine;
  2464		struct intel_gt_coredump *gt = cap->error->gt;
  2465		struct intel_engine_capture_vma *vma;
  2466	
  2467		vma = intel_engine_coredump_add_request(gt->engine, cap->rq, gfp);
  2468		if (vma) {
  2469			struct i915_vma_compress *comp;
  2470	
> 2471			comp = i915_vma_capture_prepare(gt);
  2472			if (comp) {
  2473				intel_engine_coredump_add_vma(gt->engine, vma, comp);
> 2474				i915_vma_capture_finish(gt, comp);
  2475			}
  2476		}
  2477	
  2478		gt->simulated = gt->engine->simulated;
  2479		cap->error->simulated = gt->simulated;
  2480	
> 2481		i915_error_state_store(cap->error);
> 2482		i915_gpu_coredump_put(cap->error);
  2483	
  2484		execlists_unhold(engine, cap->rq);
  2485	
  2486		kfree(cap);
  2487	}
  2488	

---
0-DAY kernel test infrastructure                 Open Source Technology Center
https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org Intel Corporation

[-- Attachment #2: .config.gz --]
[-- Type: application/gzip, Size: 36061 bytes --]

[-- Attachment #3: Type: text/plain, Size: 160 bytes --]

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [Intel-gfx] [PATCH 14/14] drm/i915/execlists: Offline error capture
@ 2020-01-11 22:33     ` kbuild test robot
  0 siblings, 0 replies; 30+ messages in thread
From: kbuild test robot @ 2020-01-11 22:33 UTC (permalink / raw)
  To: kbuild-all

[-- Attachment #1: Type: text/plain, Size: 5119 bytes --]

Hi Chris,

Thank you for the patch! Yet something to improve:

[auto build test ERROR on next-20200109]
[cannot apply to drm-intel/for-linux-next drm-tip/drm-tip v5.5-rc5 v5.5-rc4 v5.5-rc3 v5.5-rc5]
[if your patch is applied to the wrong git tree, please drop us a note to help
improve the system. BTW, we also suggest to use '--base' option to specify the
base tree in git format-patch, please see https://stackoverflow.com/a/37406982]

url:    https://github.com/0day-ci/linux/commits/Chris-Wilson/drm-i915-gt-Push-context-state-allocation-earlier/20200110-090110
base:    85cff1ab64327cee3090050b3dd6b5f1df3e5e1f
config: x86_64-randconfig-g001-20200109 (attached as .config)
compiler: gcc-7 (Debian 7.5.0-3) 7.5.0
reproduce:
        # save the attached .config to linux build tree
        make ARCH=x86_64 

If you fix the issue, kindly add following tag
Reported-by: kbuild test robot <lkp@intel.com>

All error/warnings (new ones prefixed by >>):

   In file included from drivers/gpu/drm/i915/i915_drv.h:97:0,
                    from drivers/gpu/drm/i915/gt/intel_lrc.c:136:
   drivers/gpu/drm/i915/i915_gpu_error.h:312:6: error: conflicting types for 'i915_vma_compress_prepare'
    void i915_vma_compress_prepare(struct i915_vma_compress *compress)
         ^~~~~~~~~~~~~~~~~~~~~~~~~
   drivers/gpu/drm/i915/i915_gpu_error.h:307:1: note: previous definition of 'i915_vma_compress_prepare' was here
    i915_vma_compress_prepare(struct intel_gt_coredump *gt)
    ^~~~~~~~~~~~~~~~~~~~~~~~~
   drivers/gpu/drm/i915/gt/intel_lrc.c: In function 'execlists_capture_work':
>> drivers/gpu/drm/i915/gt/intel_lrc.c:2471:10: error: implicit declaration of function 'i915_vma_capture_prepare'; did you mean 'i915_vma_compress_prepare'? [-Werror=implicit-function-declaration]
      comp = i915_vma_capture_prepare(gt);
             ^~~~~~~~~~~~~~~~~~~~~~~~
             i915_vma_compress_prepare
>> drivers/gpu/drm/i915/gt/intel_lrc.c:2471:8: warning: assignment makes pointer from integer without a cast [-Wint-conversion]
      comp = i915_vma_capture_prepare(gt);
           ^
>> drivers/gpu/drm/i915/gt/intel_lrc.c:2474:4: error: implicit declaration of function 'i915_vma_capture_finish'; did you mean 'i915_vma_clock_flush'? [-Werror=implicit-function-declaration]
       i915_vma_capture_finish(gt, comp);
       ^~~~~~~~~~~~~~~~~~~~~~~
       i915_vma_clock_flush
>> drivers/gpu/drm/i915/gt/intel_lrc.c:2481:25: error: passing argument 1 of 'i915_error_state_store' from incompatible pointer type [-Werror=incompatible-pointer-types]
     i915_error_state_store(cap->error);
                            ^~~
   In file included from drivers/gpu/drm/i915/i915_drv.h:97:0,
                    from drivers/gpu/drm/i915/gt/intel_lrc.c:136:
   drivers/gpu/drm/i915/i915_gpu_error.h:317:1: note: expected 'struct drm_i915_private *' but argument is of type 'struct i915_gpu_coredump *'
    i915_error_state_store(struct drm_i915_private *i915,
    ^~~~~~~~~~~~~~~~~~~~~~
>> drivers/gpu/drm/i915/gt/intel_lrc.c:2481:2: error: too few arguments to function 'i915_error_state_store'
     i915_error_state_store(cap->error);
     ^~~~~~~~~~~~~~~~~~~~~~
   In file included from drivers/gpu/drm/i915/i915_drv.h:97:0,
                    from drivers/gpu/drm/i915/gt/intel_lrc.c:136:
   drivers/gpu/drm/i915/i915_gpu_error.h:317:1: note: declared here
    i915_error_state_store(struct drm_i915_private *i915,
    ^~~~~~~~~~~~~~~~~~~~~~
>> drivers/gpu/drm/i915/gt/intel_lrc.c:2482:2: error: implicit declaration of function 'i915_gpu_coredump_put'; did you mean 'i915_gpu_coredump_alloc'? [-Werror=implicit-function-declaration]
     i915_gpu_coredump_put(cap->error);
     ^~~~~~~~~~~~~~~~~~~~~
     i915_gpu_coredump_alloc
   cc1: some warnings being treated as errors

vim +2471 drivers/gpu/drm/i915/gt/intel_lrc.c

  2458	
  2459	static void execlists_capture_work(struct work_struct *work)
  2460	{
  2461		struct execlists_capture *cap = container_of(work, typeof(*cap), work);
  2462		const gfp_t gfp = GFP_KERNEL | __GFP_RETRY_MAYFAIL | __GFP_NOWARN;
  2463		struct intel_engine_cs *engine = cap->rq->engine;
  2464		struct intel_gt_coredump *gt = cap->error->gt;
  2465		struct intel_engine_capture_vma *vma;
  2466	
  2467		vma = intel_engine_coredump_add_request(gt->engine, cap->rq, gfp);
  2468		if (vma) {
  2469			struct i915_vma_compress *comp;
  2470	
> 2471			comp = i915_vma_capture_prepare(gt);
  2472			if (comp) {
  2473				intel_engine_coredump_add_vma(gt->engine, vma, comp);
> 2474				i915_vma_capture_finish(gt, comp);
  2475			}
  2476		}
  2477	
  2478		gt->simulated = gt->engine->simulated;
  2479		cap->error->simulated = gt->simulated;
  2480	
> 2481		i915_error_state_store(cap->error);
> 2482		i915_gpu_coredump_put(cap->error);
  2483	
  2484		execlists_unhold(engine, cap->rq);
  2485	
  2486		kfree(cap);
  2487	}
  2488	

---
0-DAY kernel test infrastructure                 Open Source Technology Center
https://lists.01.org/hyperkitty/list/kbuild-all(a)lists.01.org Intel Corporation

[-- Attachment #2: config.gz --]
[-- Type: application/gzip, Size: 36061 bytes --]

^ permalink raw reply	[flat|nested] 30+ messages in thread

end of thread, other threads:[~2020-01-11 22:33 UTC | newest]

Thread overview: 30+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-01-09  8:58 [Intel-gfx] [PATCH 01/14] drm/i915/gt: Push context state allocation earlier Chris Wilson
2020-01-09  8:58 ` [Intel-gfx] [PATCH 02/14] drm/i915/gt: Pull context activation into central intel_context_pin() Chris Wilson
2020-01-09  8:58 ` [Intel-gfx] [PATCH 03/14] drm/i915/gt: runtime-pm is no longer required for ce->ops->pin() Chris Wilson
2020-01-10 14:37   ` Mika Kuoppala
2020-01-09  8:58 ` [Intel-gfx] [PATCH 04/14] drm/i915: Pin the context as we work on it Chris Wilson
2020-01-09  8:58 ` [Intel-gfx] [PATCH 05/14] drm/i915: Replace vma parking with a clock aging algorithm Chris Wilson
2020-01-09  8:58 ` [Intel-gfx] [PATCH 06/14] drm/i915/gt: Always reset the timeslice after a context switch Chris Wilson
2020-01-09  8:58 ` [Intel-gfx] [PATCH 07/14] drm/i915/gt: Yield the timeslice if waiting on a semaphore Chris Wilson
2020-01-09  8:58 ` [Intel-gfx] [PATCH 08/14] drm/i915: Use common priotree lists for virtual engine Chris Wilson
2020-01-09  8:58 ` [Intel-gfx] [PATCH 09/14] drm/i915/gt: Allow temporary suspension of inflight requests Chris Wilson
2020-01-09  8:58 ` [Intel-gfx] [PATCH 10/14] drm/i915: Start chopping up the GPU error capture Chris Wilson
2020-01-09 15:31   ` Andi Shyti
2020-01-09 15:40     ` Chris Wilson
2020-01-09 15:57       ` Andi Shyti
2020-01-11 10:24   ` kbuild test robot
2020-01-09  8:58 ` [Intel-gfx] [PATCH 11/14] drm/i915: Drop the shadow ring state from the " Chris Wilson
2020-01-09  9:04   ` Mika Kuoppala
2020-01-09  9:14     ` Chris Wilson
2020-01-10 10:41       ` Mika Kuoppala
2020-01-09  8:58 ` [Intel-gfx] [PATCH 12/14] drm/i915: Drop the shadow w/a batch buffer Chris Wilson
2020-01-09  9:05   ` Mika Kuoppala
2020-01-10 11:02   ` Andi Shyti
2020-01-09  8:58 ` [Intel-gfx] [PATCH 13/14] drm/i915: Drop request list from error state Chris Wilson
2020-01-10 11:04   ` Andi Shyti
2020-01-09  8:58 ` [Intel-gfx] [PATCH 14/14] drm/i915/execlists: Offline error capture Chris Wilson
2020-01-10 22:46   ` kbuild test robot
2020-01-10 22:46     ` kbuild test robot
2020-01-11 22:33   ` kbuild test robot
2020-01-11 22:33     ` kbuild test robot
2020-01-09 18:13 ` [Intel-gfx] ✗ Fi.CI.BUILD: failure for series starting with [01/14] drm/i915/gt: Push context state allocation earlier Patchwork

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.